Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
waLBerla
waLBerla
Commits
7be319e9
Commit
7be319e9
authored
Aug 17, 2021
by
Helen Schottenhamml
Browse files
Merge branch 'FixFlowAroundSphere' into 'master'
Enable single precision for benchmark cases Closes
#154
See merge request
!475
parents
b4b0cb4b
ce49cdbf
Pipeline
#33806
passed with stages
in 407 minutes and 58 seconds
Changes
7
Pipelines
2
Show whitespace changes
Inline
Side-by-side
apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.cpp
View file @
7be319e9
...
@@ -70,21 +70,29 @@ auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const stora
...
@@ -70,21 +70,29 @@ auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const stora
};
};
auto
VelocityCallback
=
[](
const
Cell
&
pos
,
const
shared_ptr
<
StructuredBlockForest
>&
SbF
,
IBlock
&
block
,
auto
VelocityCallback
=
[](
const
Cell
&
pos
,
const
shared_ptr
<
StructuredBlockForest
>&
SbF
,
IBlock
&
block
,
real_t
inflow_velocity
)
{
real_t
inflow_velocity
,
const
bool
constant_inflow
=
true
)
{
if
(
constant_inflow
)
{
Vector3
<
real_t
>
result
(
inflow_velocity
,
0.0
,
0.0
);
return
result
;
}
else
{
Cell
globalCell
;
Cell
globalCell
;
CellInterval
domain
=
SbF
->
getDomainCellBB
();
CellInterval
domain
=
SbF
->
getDomainCellBB
();
real_t
h_y
=
domain
.
yMax
()
-
domain
.
y
Min
(
);
real_t
h_y
=
real_c
(
domain
.
y
Size
()
);
real_t
h_z
=
domain
.
zMax
()
-
domain
.
z
Min
(
);
real_t
h_z
=
real_c
(
domain
.
z
Size
()
);
SbF
->
transformBlockLocalToGlobalCell
(
globalCell
,
block
,
pos
);
SbF
->
transformBlockLocalToGlobalCell
(
globalCell
,
block
,
pos
);
real_t
y1
=
globalCell
[
1
]
-
(
h_y
/
2.0
+
0.5
);
real_t
y1
=
real_c
(
globalCell
[
1
]
-
(
h_y
/
2.0
-
0.5
)
)
;
real_t
z1
=
globalCell
[
2
]
-
(
h_z
/
2.0
+
0.5
);
real_t
z1
=
real_c
(
globalCell
[
2
]
-
(
h_z
/
2.0
-
0.5
)
)
;
real_t
u
=
(
inflow_velocity
*
16
)
/
(
h_y
*
h_y
*
h_z
*
h_z
)
*
(
h_y
/
2.0
-
y1
)
*
(
h_y
/
2
+
y1
)
*
(
h_z
/
2
-
z1
)
*
real_t
u
=
(
inflow_velocity
*
real_c
(
16
)
)
/
(
h_y
*
h_y
*
h_z
*
h_z
)
*
(
h_y
/
real_c
(
2.0
)
-
y1
)
*
(
h_z
/
2
+
z1
);
(
h_y
/
real_c
(
2.0
)
+
y1
)
*
(
h_z
/
real_c
(
2.0
)
-
z1
)
*
(
h_z
/
real_c
(
2.0
)
+
z1
);
Vector3
<
real_t
>
result
(
u
,
0.0
,
0.0
);
Vector3
<
real_t
>
result
(
u
,
0.0
,
0.0
);
return
result
;
return
result
;
}
};
};
class
AlternatingBeforeFunction
class
AlternatingBeforeFunction
...
@@ -147,6 +155,7 @@ int main(int argc, char** argv)
...
@@ -147,6 +155,7 @@ int main(int argc, char** argv)
const
real_t
u_max
=
parameters
.
getParameter
<
real_t
>
(
"u_max"
,
real_t
(
0.05
));
const
real_t
u_max
=
parameters
.
getParameter
<
real_t
>
(
"u_max"
,
real_t
(
0.05
));
const
real_t
reynolds_number
=
parameters
.
getParameter
<
real_t
>
(
"reynolds_number"
,
real_t
(
1000
));
const
real_t
reynolds_number
=
parameters
.
getParameter
<
real_t
>
(
"reynolds_number"
,
real_t
(
1000
));
const
uint_t
diameter_sphere
=
parameters
.
getParameter
<
uint_t
>
(
"diameter_sphere"
,
uint_t
(
5
));
const
uint_t
diameter_sphere
=
parameters
.
getParameter
<
uint_t
>
(
"diameter_sphere"
,
uint_t
(
5
));
const
bool
constant_inflow
=
parameters
.
getParameter
<
bool
>
(
"constant_inflow"
,
true
);
const
double
remainingTimeLoggerFrequency
=
const
double
remainingTimeLoggerFrequency
=
parameters
.
getParameter
<
double
>
(
"remainingTimeLoggerFrequency"
,
3.0
);
// in seconds
parameters
.
getParameter
<
double
>
(
"remainingTimeLoggerFrequency"
,
3.0
);
// in seconds
...
@@ -204,7 +213,7 @@ int main(int argc, char** argv)
...
@@ -204,7 +213,7 @@ int main(int argc, char** argv)
auto
boundariesConfig
=
config
->
getOneBlock
(
"Boundaries"
);
auto
boundariesConfig
=
config
->
getOneBlock
(
"Boundaries"
);
std
::
function
<
Vector3
<
real_t
>
(
const
Cell
&
,
const
shared_ptr
<
StructuredBlockForest
>&
,
IBlock
&
)
>
std
::
function
<
Vector3
<
real_t
>
(
const
Cell
&
,
const
shared_ptr
<
StructuredBlockForest
>&
,
IBlock
&
)
>
velocity_initialisation
=
std
::
bind
(
VelocityCallback
,
_1
,
_2
,
_3
,
u_max
);
velocity_initialisation
=
std
::
bind
(
VelocityCallback
,
_1
,
_2
,
_3
,
u_max
,
constant_inflow
);
#if defined(WALBERLA_BUILD_WITH_CUDA)
#if defined(WALBERLA_BUILD_WITH_CUDA)
lbm
::
FlowAroundSphereCodeGen_UBB
ubb
(
blocks
,
pdfFieldIDGPU
,
velocity_initialisation
);
lbm
::
FlowAroundSphereCodeGen_UBB
ubb
(
blocks
,
pdfFieldIDGPU
,
velocity_initialisation
);
...
@@ -236,10 +245,9 @@ int main(int argc, char** argv)
...
@@ -236,10 +245,9 @@ int main(int argc, char** argv)
AlternatingBeforeFunction
communication
(
evenComm
,
oddComm
,
tracker
);
AlternatingBeforeFunction
communication
(
evenComm
,
oddComm
,
tracker
);
// add LBM sweep and communication to time loop
// add LBM sweep and communication to time loop
timeloop
.
add
()
<<
BeforeFunction
(
communication
,
"communication"
)
timeloop
.
add
()
<<
BeforeFunction
(
communication
,
"communication"
)
<<
Sweep
(
ubb
.
getSweep
(
tracker
),
"ubb boundary"
);
<<
Sweep
(
noSlip
.
getSweep
(
tracker
),
"noSlip boundary"
);
timeloop
.
add
()
<<
Sweep
(
outflow
.
getSweep
(
tracker
),
"outflow boundary"
);
timeloop
.
add
()
<<
Sweep
(
outflow
.
getSweep
(
tracker
),
"outflow boundary"
);
timeloop
.
add
()
<<
Sweep
(
ubb
.
getSweep
(
tracker
),
"
ubb
boundary"
);
timeloop
.
add
()
<<
Sweep
(
noSlip
.
getSweep
(
tracker
),
"
noSlip
boundary"
);
timeloop
.
add
()
<<
BeforeFunction
(
tracker
->
getAdvancementFunction
(),
"Timestep Advancement"
)
timeloop
.
add
()
<<
BeforeFunction
(
tracker
->
getAdvancementFunction
(),
"Timestep Advancement"
)
<<
Sweep
(
lbSweep
.
getSweep
(
tracker
),
"LB update rule"
);
<<
Sweep
(
lbSweep
.
getSweep
(
tracker
),
"LB update rule"
);
...
...
apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
View file @
7be319e9
...
@@ -14,26 +14,30 @@ from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_
...
@@ -14,26 +14,30 @@ from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_
import
sympy
as
sp
import
sympy
as
sp
stencil
=
get_stencil
(
"D3Q27"
)
with
CodeGeneration
()
as
ctx
:
q
=
len
(
stencil
)
data_type
=
"float64"
if
ctx
.
double_accuracy
else
"float32"
dim
=
len
(
stencil
[
0
])
stencil
=
get_stencil
(
"D3Q27"
)
streaming_pattern
=
'esotwist'
q
=
len
(
stencil
)
timesteps
=
get_timesteps
(
streaming_pattern
)
dim
=
len
(
stencil
[
0
])
streaming_pattern
=
'esotwist'
pdfs
,
velocity_field
,
density_field
=
fields
(
f
"pdfs(
{
q
}
), velocity(
{
dim
}
), density(1) : double[
{
dim
}
D]"
,
layout
=
'fzyx'
)
timesteps
=
get_timesteps
(
streaming_pattern
)
omega
=
sp
.
Symbol
(
"omega"
)
u_max
=
sp
.
Symbol
(
"u_max"
)
pdfs
,
velocity_field
,
density_field
=
fields
(
f
"pdfs(
{
q
}
), velocity(
{
dim
}
), density(1) :
{
data_type
}
[
{
dim
}
D]"
,
layout
=
'fzyx'
)
output
=
{
omega
=
sp
.
Symbol
(
"omega"
)
u_max
=
sp
.
Symbol
(
"u_max"
)
output
=
{
'density'
:
density_field
,
'density'
:
density_field
,
'velocity'
:
velocity_field
'velocity'
:
velocity_field
}
}
opt
=
{
'symbolic_field'
:
pdfs
,
opt
=
{
'symbolic_field'
:
pdfs
,
'cse_global'
:
False
,
'cse_global'
:
False
,
'cse_pdfs'
:
False
}
'cse_pdfs'
:
False
,
'double_precision'
:
True
if
ctx
.
double_accuracy
else
False
}
method_params
=
{
'method'
:
'cumulant'
,
method_params
=
{
'method'
:
'cumulant'
,
'stencil'
:
stencil
,
'stencil'
:
stencil
,
'relaxation_rate'
:
omega
,
'relaxation_rate'
:
omega
,
'galilean_correction'
:
True
,
'galilean_correction'
:
True
,
...
@@ -42,23 +46,22 @@ method_params = {'method': 'cumulant',
...
@@ -42,23 +46,22 @@ method_params = {'method': 'cumulant',
'output'
:
output
,
'output'
:
output
,
'optimization'
:
opt
}
'optimization'
:
opt
}
collision_rule
=
create_lb_collision_rule
(
**
method_params
)
collision_rule
=
create_lb_collision_rule
(
**
method_params
)
lb_method
=
collision_rule
.
method
lb_method
=
collision_rule
.
method
# getter & setter
# getter & setter
setter_assignments
=
macroscopic_values_setter
(
lb_method
,
velocity
=
velocity_field
.
center_vector
,
setter_assignments
=
macroscopic_values_setter
(
lb_method
,
velocity
=
velocity_field
.
center_vector
,
pdfs
=
pdfs
,
density
=
1.0
,
pdfs
=
pdfs
,
density
=
1.0
,
streaming_pattern
=
streaming_pattern
,
streaming_pattern
=
streaming_pattern
,
previous_timestep
=
timesteps
[
0
])
previous_timestep
=
timesteps
[
0
])
# opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
# opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
stencil_typedefs
=
{
'Stencil_T'
:
stencil
}
stencil_typedefs
=
{
'Stencil_T'
:
stencil
}
field_typedefs
=
{
'PdfField_T'
:
pdfs
,
field_typedefs
=
{
'PdfField_T'
:
pdfs
,
'VelocityField_T'
:
velocity_field
,
'VelocityField_T'
:
velocity_field
,
'ScalarField_T'
:
density_field
}
'ScalarField_T'
:
density_field
}
with
CodeGeneration
()
as
ctx
:
if
ctx
.
cuda
:
if
ctx
.
cuda
:
target
=
'gpu'
target
=
'gpu'
else
:
else
:
...
...
apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGenParameters.py
View file @
7be319e9
...
@@ -4,13 +4,15 @@ from lbmpy.relaxationrates import relaxation_rate_from_lattice_viscosity
...
@@ -4,13 +4,15 @@ from lbmpy.relaxationrates import relaxation_rate_from_lattice_viscosity
class
Scenario
:
class
Scenario
:
def
__init__
(
self
):
def
__init__
(
self
):
self
.
timesteps
=
101
self
.
timesteps
=
10
0
1
self
.
vtkWriteFrequency
=
20
00
self
.
vtkWriteFrequency
=
1
00
self
.
cells
=
(
384
,
128
,
128
)
self
.
cells
=
(
384
,
128
,
128
)
self
.
blocks
=
(
1
,
1
,
1
)
self
.
blocks
=
(
1
,
1
,
1
)
self
.
periodic
=
(
0
,
0
,
0
)
self
.
periodic
=
(
0
,
0
,
0
)
self
.
constant_inflow
=
True
self
.
diameter_sphere
=
min
(
self
.
cells
)
//
2
self
.
diameter_sphere
=
min
(
self
.
cells
)
//
2
self
.
u_max
=
0.1
self
.
u_max
=
0.1
self
.
reynolds_number
=
1000000
self
.
reynolds_number
=
1000000
...
@@ -38,7 +40,8 @@ class Scenario:
...
@@ -38,7 +40,8 @@ class Scenario:
'omega'
:
self
.
omega
,
'omega'
:
self
.
omega
,
'u_max'
:
self
.
u_max
,
'u_max'
:
self
.
u_max
,
'reynolds_number'
:
self
.
reynolds_number
,
'reynolds_number'
:
self
.
reynolds_number
,
'diameter_sphere'
:
self
.
diameter_sphere
'diameter_sphere'
:
self
.
diameter_sphere
,
'constant_inflow'
:
self
.
constant_inflow
},
},
'Boundaries'
:
{
'Boundaries'
:
{
'Border'
:
[
'Border'
:
[
...
@@ -54,7 +57,7 @@ class Scenario:
...
@@ -54,7 +57,7 @@ class Scenario:
'midpoint'
:
(
int
(
0.40
*
self
.
total_cells
[
0
]),
self
.
total_cells
[
1
]
//
2
,
self
.
total_cells
[
2
]
//
2
),
'midpoint'
:
(
int
(
0.40
*
self
.
total_cells
[
0
]),
self
.
total_cells
[
1
]
//
2
,
self
.
total_cells
[
2
]
//
2
),
'radius'
:
self
.
diameter_sphere
//
2
,
'radius'
:
self
.
diameter_sphere
//
2
,
'flag'
:
'NoSlip'
}
'flag'
:
'NoSlip'
}
]
]
,
},
},
}
}
...
...
apps/benchmarks/UniformGridGPU/CMakeLists.txt
View file @
7be319e9
...
@@ -4,9 +4,9 @@ waLBerla_link_files_to_builddir( "*.py" )
...
@@ -4,9 +4,9 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir
(
"simulation_setup"
)
waLBerla_link_files_to_builddir
(
"simulation_setup"
)
foreach
(
streaming_pattern
aa
)
# choose from {
pull
,
push
,
aa
,
esotwist
}
foreach
(
streaming_pattern pull push aa esotwist
)
foreach
(
stencil
d3q27
)
# choose from {
d3q19 d3q27
}
foreach
(
stencil d3q19 d3q27
)
foreach
(
collision_setup srt trt mrt cumulant
)
# choose from {srt trt mrt cumulant entropic smagorinsky}
foreach
(
collision_setup srt trt mrt cumulant
entropic smagorinsky mrt-overrelax cumulant-overrelax
)
set
(
config
${
stencil
}
_
${
streaming_pattern
}
_
${
collision_setup
}
)
set
(
config
${
stencil
}
_
${
streaming_pattern
}
_
${
collision_setup
}
)
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_
${
config
}
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_
${
config
}
FILE UniformGridGPU.py
FILE UniformGridGPU.py
...
@@ -17,6 +17,7 @@ foreach(streaming_pattern aa) # choose from {pull, push, aa, esotwist}
...
@@ -17,6 +17,7 @@ foreach(streaming_pattern aa) # choose from {pull, push, aa, esotwist}
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_MacroSetter.cu UniformGridGPU_MacroSetter.h
UniformGridGPU_MacroSetter.cu UniformGridGPU_MacroSetter.h
UniformGridGPU_StreamOnlyKernel.cu UniformGridGPU_StreamOnlyKernel.h
UniformGridGPU_InfoHeader.h
UniformGridGPU_InfoHeader.h
)
)
...
@@ -25,6 +26,16 @@ foreach(streaming_pattern aa) # choose from {pull, push, aa, esotwist}
...
@@ -25,6 +26,16 @@ foreach(streaming_pattern aa) # choose from {pull, push, aa, esotwist}
FILES UniformGridGPU.cpp
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk UniformGridGPUGenerated_
${
config
}
)
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk UniformGridGPUGenerated_
${
config
}
)
set_target_properties
(
UniformGridGPU_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
set_target_properties
(
UniformGridGPU_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
# all configs are excluded from all except for pull d3q27.
if
(
${
streaming_pattern
}
STREQUAL
"pull"
AND
${
stencil
}
STREQUAL
"d3q27"
)
set_target_properties
(
UniformGridGPUGenerated_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL FALSE
)
set_target_properties
(
UniformGridGPU_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL FALSE
)
else
()
set_target_properties
(
UniformGridGPUGenerated_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL TRUE
)
set_target_properties
(
UniformGridGPU_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL TRUE
)
endif
(
${
streaming_pattern
}
STREQUAL
"pull"
AND
${
stencil
}
STREQUAL
"d3q27"
)
endforeach
()
endforeach
()
endforeach
()
endforeach
()
endforeach
()
endforeach
()
\ No newline at end of file
apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
View file @
7be319e9
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file UniformGridGPU.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//! \author Frederik Hennig <frederik.hennig@fau.de>
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "blockforest/Initialization.h"
#include "core/Environment.h"
#include "core/Environment.h"
...
@@ -7,9 +29,9 @@
...
@@ -7,9 +29,9 @@
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/DeviceSelectMPI.h"
#include "cuda/DeviceSelectMPI.h"
#include "cuda/FieldCopy.h"
#include "cuda/ParallelStreams.h"
#include "cuda/ParallelStreams.h"
#include "cuda/communication/UniformGPUScheme.h"
#include "cuda/communication/UniformGPUScheme.h"
#include "cuda/FieldCopy.h"
#include "cuda/lbm/CombinedInPlaceGpuPackInfo.h"
#include "cuda/lbm/CombinedInPlaceGpuPackInfo.h"
#include "field/AddToStorage.h"
#include "field/AddToStorage.h"
...
@@ -27,14 +49,13 @@
...
@@ -27,14 +49,13 @@
#include "timeloop/SweepTimeloop.h"
#include "timeloop/SweepTimeloop.h"
#include "InitShearVelocity.h"
#include <cmath>
#include <cmath>
#include "InitShearVelocity.h"
#include "UniformGridGPU_InfoHeader.h"
#include "UniformGridGPU_InfoHeader.h"
using
namespace
walberla
;
using
namespace
walberla
;
using
FlagField_T
=
FlagField
<
uint8_t
>
;
using
FlagField_T
=
FlagField
<
uint8_t
>
;
int
main
(
int
argc
,
char
**
argv
)
int
main
(
int
argc
,
char
**
argv
)
{
{
...
@@ -61,7 +82,7 @@ int main(int argc, char** argv)
...
@@ -61,7 +82,7 @@ int main(int argc, char** argv)
auto
parameters
=
config
->
getOneBlock
(
"Parameters"
);
auto
parameters
=
config
->
getOneBlock
(
"Parameters"
);
const
real_t
omega
=
parameters
.
getParameter
<
real_t
>
(
"omega"
,
real_c
(
1.4
));
const
real_t
omega
=
parameters
.
getParameter
<
real_t
>
(
"omega"
,
real_c
(
1.4
));
const
uint_t
timesteps
=
parameters
.
getParameter
<
uint_t
>
(
"timesteps"
,
uint_c
(
50
));
const
uint_t
timesteps
=
parameters
.
getParameter
<
uint_t
>
(
"timesteps"
,
uint_c
(
50
));
const
bool
initShearFlow
=
parameters
.
getParameter
<
bool
>
(
"initShearFlow"
,
true
);
const
bool
initShearFlow
=
parameters
.
getParameter
<
bool
>
(
"initShearFlow"
,
true
);
// Creating fields
// Creating fields
BlockDataID
pdfFieldCpuID
=
BlockDataID
pdfFieldCpuID
=
...
@@ -69,7 +90,8 @@ int main(int argc, char** argv)
...
@@ -69,7 +90,8 @@ int main(int argc, char** argv)
BlockDataID
velFieldCpuID
=
field
::
addToStorage
<
VelocityField_T
>
(
blocks
,
"vel"
,
real_t
(
0
),
field
::
fzyx
);
BlockDataID
velFieldCpuID
=
field
::
addToStorage
<
VelocityField_T
>
(
blocks
,
"vel"
,
real_t
(
0
),
field
::
fzyx
);
// Initialize velocity on cpu
// Initialize velocity on cpu
if
(
initShearFlow
){
if
(
initShearFlow
)
{
WALBERLA_LOG_INFO_ON_ROOT
(
"Initializing shear flow"
)
WALBERLA_LOG_INFO_ON_ROOT
(
"Initializing shear flow"
)
initShearVelocity
(
blocks
,
velFieldCpuID
);
initShearVelocity
(
blocks
,
velFieldCpuID
);
}
}
...
@@ -91,9 +113,7 @@ int main(int argc, char** argv)
...
@@ -91,9 +113,7 @@ int main(int argc, char** argv)
for
(
uint_t
i
=
0
;
i
<
3
;
++
i
)
for
(
uint_t
i
=
0
;
i
<
3
;
++
i
)
{
{
if
(
int_c
(
cellsPerBlock
[
i
])
<=
innerOuterSplit
[
i
]
*
2
)
if
(
int_c
(
cellsPerBlock
[
i
])
<=
innerOuterSplit
[
i
]
*
2
)
{
{
WALBERLA_ABORT_NO_DEBUG_INFO
(
"innerOuterSplit too large - make it smaller or increase cellsPerBlock"
)
}
WALBERLA_ABORT_NO_DEBUG_INFO
(
"innerOuterSplit too large - make it smaller or increase cellsPerBlock"
)
}
}
}
Cell
innerOuterSplitCell
(
innerOuterSplit
[
0
],
innerOuterSplit
[
1
],
innerOuterSplit
[
2
]);
Cell
innerOuterSplitCell
(
innerOuterSplit
[
0
],
innerOuterSplit
[
1
],
innerOuterSplit
[
2
]);
...
@@ -117,23 +137,26 @@ int main(int argc, char** argv)
...
@@ -117,23 +137,26 @@ int main(int argc, char** argv)
LbSweep
lbSweep
(
pdfFieldGpuID
,
omega
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
gpuBlockSize
[
2
],
innerOuterSplitCell
);
LbSweep
lbSweep
(
pdfFieldGpuID
,
omega
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
gpuBlockSize
[
2
],
innerOuterSplitCell
);
lbSweep
.
setOuterPriority
(
streamHighPriority
);
lbSweep
.
setOuterPriority
(
streamHighPriority
);
pystencils
::
UniformGridGPU_StreamOnlyKernel
StreamOnlyKernel
(
pdfFieldGpuID
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
gpuBlockSize
[
2
]);
// Boundaries
// Boundaries
const
FlagUID
fluidFlagUID
(
"Fluid"
);
const
FlagUID
fluidFlagUID
(
"Fluid"
);
BlockDataID
flagFieldID
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"Boundary Flag Field"
);
BlockDataID
flagFieldID
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"Boundary Flag Field"
);
auto
boundariesConfig
=
config
->
getBlock
(
"Boundaries"
);
auto
boundariesConfig
=
config
->
getBlock
(
"Boundaries"
);
bool
boundaries
=
false
;
bool
boundaries
=
false
;
if
(
boundariesConfig
)
if
(
boundariesConfig
)
{
{
boundaries
=
true
;
boundaries
=
true
;
geometry
::
initBoundaryHandling
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
boundariesConfig
);
geometry
::
initBoundaryHandling
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
boundariesConfig
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
fluidFlagUID
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
fluidFlagUID
);
}
}
lbm
::
UniformGridGPU_NoSlip
noSlip
(
blocks
,
pdfFieldGpuID
);
lbm
::
UniformGridGPU_NoSlip
noSlip
(
blocks
,
pdfFieldGpuID
);
noSlip
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"NoSlip"
),
fluidFlagUID
);
noSlip
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"NoSlip"
),
fluidFlagUID
);
lbm
::
UniformGridGPU_UBB
ubb
(
blocks
,
pdfFieldGpuID
);
lbm
::
UniformGridGPU_UBB
ubb
(
blocks
,
pdfFieldGpuID
);
ubb
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"UBB"
),
fluidFlagUID
);
ubb
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"UBB"
),
fluidFlagUID
);
// Initial setup is the post-collision state of an even time step
// Initial setup is the post-collision state of an even time step
auto
tracker
=
make_shared
<
lbm
::
TimestepTracker
>
(
0
);
auto
tracker
=
make_shared
<
lbm
::
TimestepTracker
>
(
0
);
...
@@ -143,7 +166,8 @@ int main(int argc, char** argv)
...
@@ -143,7 +166,8 @@ int main(int argc, char** argv)
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
UniformGPUScheme
<
Stencil_T
>
comm
(
blocks
,
cudaEnabledMPI
);
UniformGPUScheme
<
Stencil_T
>
comm
(
blocks
,
cudaEnabledMPI
);
auto
packInfo
=
make_shared
<
lbm
::
CombinedInPlaceGpuPackInfo
<
PackInfoEven
,
PackInfoOdd
>
>
(
tracker
,
pdfFieldGpuID
);
auto
packInfo
=
make_shared
<
lbm
::
CombinedInPlaceGpuPackInfo
<
PackInfoEven
,
PackInfoOdd
>
>
(
tracker
,
pdfFieldGpuID
);
comm
.
addPackInfo
(
packInfo
);
comm
.
addPackInfo
(
packInfo
);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
...
@@ -152,17 +176,17 @@ int main(int argc, char** argv)
...
@@ -152,17 +176,17 @@ int main(int argc, char** argv)
auto
defaultStream
=
cuda
::
StreamRAII
::
newPriorityStream
(
streamLowPriority
);
auto
defaultStream
=
cuda
::
StreamRAII
::
newPriorityStream
(
streamLowPriority
);
auto
boundarySweep
=
[
&
](
IBlock
*
block
,
uint8_t
t
,
cudaStream_t
stream
){
auto
boundarySweep
=
[
&
](
IBlock
*
block
,
uint8_t
t
,
cudaStream_t
stream
)
{
noSlip
.
run
(
block
,
t
,
stream
);
noSlip
.
run
(
block
,
t
,
stream
);
ubb
.
run
(
block
,
t
,
stream
);
ubb
.
run
(
block
,
t
,
stream
);
};
};
auto
boundaryInner
=
[
&
](
IBlock
*
block
,
uint8_t
t
,
cudaStream_t
stream
){
auto
boundaryInner
=
[
&
](
IBlock
*
block
,
uint8_t
t
,
cudaStream_t
stream
)
{
noSlip
.
inner
(
block
,
t
,
stream
);
noSlip
.
inner
(
block
,
t
,
stream
);
ubb
.
inner
(
block
,
t
,
stream
);
ubb
.
inner
(
block
,
t
,
stream
);
};
};
auto
boundaryOuter
=
[
&
](
IBlock
*
block
,
uint8_t
t
,
cudaStream_t
stream
){
auto
boundaryOuter
=
[
&
](
IBlock
*
block
,
uint8_t
t
,
cudaStream_t
stream
)
{
noSlip
.
outer
(
block
,
t
,
stream
);
noSlip
.
outer
(
block
,
t
,
stream
);
ubb
.
outer
(
block
,
t
,
stream
);
ubb
.
outer
(
block
,
t
,
stream
);
};
};
...
@@ -170,13 +194,15 @@ int main(int argc, char** argv)
...
@@ -170,13 +194,15 @@ int main(int argc, char** argv)
auto
simpleOverlapTimeStep
=
[
&
]()
{
auto
simpleOverlapTimeStep
=
[
&
]()
{
// Communicate post-collision values of previous timestep...
// Communicate post-collision values of previous timestep...
comm
.
startCommunication
(
defaultStream
);
comm
.
startCommunication
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
){
for
(
auto
&
block
:
*
blocks
)
if
(
boundaries
)
boundaryInner
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
{
if
(
boundaries
)
boundaryInner
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
lbSweep
.
inner
(
&
block
,
tracker
->
getCounterPlusOne
(),
defaultStream
);
lbSweep
.
inner
(
&
block
,
tracker
->
getCounterPlusOne
(),
defaultStream
);
}
}
comm
.
wait
(
defaultStream
);
comm
.
wait
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
){
for
(
auto
&
block
:
*
blocks
)
if
(
boundaries
)
boundaryOuter
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
{
if
(
boundaries
)
boundaryOuter
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
lbSweep
.
outer
(
&
block
,
tracker
->
getCounterPlusOne
(),
defaultStream
);
lbSweep
.
outer
(
&
block
,
tracker
->
getCounterPlusOne
(),
defaultStream
);
}
}
...
@@ -185,8 +211,9 @@ int main(int argc, char** argv)
...
@@ -185,8 +211,9 @@ int main(int argc, char** argv)
auto
normalTimeStep
=
[
&
]()
{
auto
normalTimeStep
=
[
&
]()
{
comm
.
communicate
(
defaultStream
);
comm
.
communicate
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
){
for
(
auto
&
block
:
*
blocks
)
if
(
boundaries
)
boundarySweep
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
{
if
(
boundaries
)
boundarySweep
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
lbSweep
(
&
block
,
tracker
->
getCounterPlusOne
(),
defaultStream
);
lbSweep
(
&
block
,
tracker
->
getCounterPlusOne
(),
defaultStream
);
}
}
...
@@ -201,6 +228,12 @@ int main(int argc, char** argv)
...
@@ -201,6 +228,12 @@ int main(int argc, char** argv)
lbSweep
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
lbSweep
(
&
block
,
tracker
->
getCounter
(),
defaultStream
);
};
};
// Stream only function to test a streaming pattern without executing lbm operations inside
auto
StreamOnlyFunc
=
[
&
]()
{
for
(
auto
&
block
:
*
blocks
)
StreamOnlyKernel
(
&
block
,
defaultStream
);
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME LOOP SETUP ///
/// TIME LOOP SETUP ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
...
@@ -221,6 +254,13 @@ int main(int argc, char** argv)
...
@@ -221,6 +254,13 @@ int main(int argc, char** argv)
comm
.
communicate
();
comm
.
communicate
();
timeStep
=
kernelOnlyFunc
;
timeStep
=
kernelOnlyFunc
;
}
}
else
if
(
timeStepStrategy
==
"StreamOnly"
)
{
WALBERLA_LOG_INFO_ON_ROOT
(
"Running only streaming kernel without LBM - this makes only sense for benchmarking!"
)
// Run initial communication once to provide any missing stream-in populations
timeStep
=
StreamOnlyFunc
;
}
else
else
{
{
WALBERLA_ABORT_NO_DEBUG_INFO
(
"Invalid value for 'timeStepStrategy'. Allowed values are 'noOverlap', "
WALBERLA_ABORT_NO_DEBUG_INFO
(
"Invalid value for 'timeStepStrategy'. Allowed values are 'noOverlap', "
...
@@ -239,7 +279,7 @@ int main(int argc, char** argv)
...
@@ -239,7 +279,7 @@ int main(int argc, char** argv)
vtkOutput
->
addCellDataWriter
(
velWriter
);