Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
waLBerla
waLBerla
Commits
d48349d8
Commit
d48349d8
authored
Jul 07, 2021
by
Dominik Thoennes
Browse files
Merge branch 'master' into thoennes/add-oneapi-22
parents
b7838d4b
03b9f95f
Pipeline
#33161
failed with stages
in 247 minutes and 23 seconds
Changes
66
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
d48349d8
...
...
@@ -1011,11 +1011,18 @@ endif()
option
(
WALBERLA_THREAD_SAFE_LOGGING
"Enables/Disables thread-safe logging"
ON
)
if
(
WALBERLA_BUILD_WITH_OPENMP
)
if
(
APPLE AND EXISTS /opt/local/lib/libomp AND EXISTS /opt/local/include/libomp
)
# find libomp from MacPorts
set
(
CMAKE_FRAMEWORK_PATH /opt/local/lib/libomp
)
set
(
CMAKE_INCLUDE_PATH /opt/local/include/libomp
)
endif
()
find_package
(
OpenMP
)
if
(
OpenMP_FOUND
)
add_flag
(
CMAKE_C_FLAGS
"
${
OpenMP_C_FLAGS
}
"
)
add_flag
(
CMAKE_CXX_FLAGS
"
${
OpenMP_CXX_FLAGS
}
"
)
list
(
APPEND SERVICE_LIBS
${
OpenMP_CXX_LIBRARIES
}
)
if
(
OpenMP_CXX_INCLUDE_DIRS
)
include_directories
(
${
OpenMP_CXX_INCLUDE_DIRS
}
)
endif
()
else
()
#workarounds
if
(
WALBERLA_CXX_COMPILER_IS_NEC
)
...
...
apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
View file @
d48349d8
from
pystencils.field
import
fields
from
lbmpy.advanced_streaming.utility
import
get_timesteps
,
Timestep
from
lbmpy.advanced_streaming.utility
import
get_timesteps
from
lbmpy.macroscopic_value_kernels
import
macroscopic_values_setter
from
lbmpy.stencils
import
get_stencil
from
lbmpy.creationfunctions
import
create_lb_collision_rule
,
create_lb_method
,
create_lb_update_rule
from
lbmpy.creationfunctions
import
create_lb_collision_rule
from
lbmpy.boundaries
import
NoSlip
,
UBB
,
ExtrapolationOutflow
from
pystencils_walberla
import
CodeGeneration
,
generate_sweep
,
generate_info_header
...
...
apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
View file @
d48349d8
...
...
@@ -86,7 +86,6 @@ using FlagField_T = FlagField< flag_t >;
#if defined(WALBERLA_BUILD_WITH_CUDA)
typedef
cuda
::
GPUField
<
real_t
>
GPUField
;
#endif
// using CommScheme_T = cuda::communication::UniformGPUScheme<stencil::D2Q9>;
int
main
(
int
argc
,
char
**
argv
)
{
...
...
@@ -185,7 +184,7 @@ int main(int argc, char** argv)
auto
Comm_velocity_based_distributions
=
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
0
);
auto
generatedPackInfo_velocity_based_distributions
=
make_shared
<
pystencils
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field_gpu
);
make_shared
<
lbm
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field_gpu
);
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_velocity_based_distributions
);
auto
generatedPackInfo_phase_field
=
make_shared
<
pystencils
::
PackInfo_phase_field
>
(
phase_field_gpu
);
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_phase_field
);
...
...
@@ -193,7 +192,7 @@ int main(int argc, char** argv)
auto
Comm_phase_field_distributions
=
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
0
);
auto
generatedPackInfo_phase_field_distributions
=
make_shared
<
pystencils
::
PackInfo_phase_field_distributions
>
(
lb_phase_field_gpu
);
make_shared
<
lbm
::
PackInfo_phase_field_distributions
>
(
lb_phase_field_gpu
);
Comm_phase_field_distributions
->
addPackInfo
(
generatedPackInfo_phase_field_distributions
);
#else
...
...
@@ -202,14 +201,14 @@ int main(int argc, char** argv)
auto
generatedPackInfo_phase_field
=
make_shared
<
pystencils
::
PackInfo_phase_field
>
(
phase_field
);
auto
generatedPackInfo_velocity_based_distributions
=
make_shared
<
pystencils
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field
);
make_shared
<
lbm
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field
);
Comm_velocity_based_distributions
.
addPackInfo
(
generatedPackInfo_phase_field
);
Comm_velocity_based_distributions
.
addPackInfo
(
generatedPackInfo_velocity_based_distributions
);
blockforest
::
communication
::
UniformBufferedScheme
<
Stencil_hydro_T
>
Comm_phase_field_distributions
(
blocks
);
auto
generatedPackInfo_phase_field_distributions
=
make_shared
<
pystencils
::
PackInfo_phase_field_distributions
>
(
lb_phase_field
);
make_shared
<
lbm
::
PackInfo_phase_field_distributions
>
(
lb_phase_field
);
Comm_phase_field_distributions
.
addPackInfo
(
generatedPackInfo_phase_field_distributions
);
#endif
...
...
apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
View file @
d48349d8
...
...
@@ -5,11 +5,12 @@ from pystencils import AssignmentCollection
from
lbmpy.creationfunctions
import
create_lb_method
,
create_lb_update_rule
from
lbmpy.stencils
import
get_stencil
from
pystencils_walberla
import
CodeGeneration
,
generate_sweep
,
generate_pack_info_from_kernel
from
pystencils_walberla
import
CodeGeneration
,
generate_sweep
,
generate_pack_info_for_field
from
lbmpy_walberla
import
generate_lb_pack_info
from
lbmpy.phasefield_allen_cahn.kernel_equations
import
initializer_kernel_phase_field_lb
,
\
initializer_kernel_hydro_lb
,
interface_tracking_force
,
\
hydrodynamic_force
,
get_collision_assignments_hydro
hydrodynamic_force
,
get_collision_assignments_hydro
,
get_collision_assignments_phase
from
lbmpy.phasefield_allen_cahn.force_model
import
MultiphaseForceModel
...
...
@@ -52,6 +53,7 @@ w_c = 1.0 / (0.5 + (3.0 * M))
u
=
fields
(
f
"vel_field(
{
dimensions
}
): [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
# phase-field
C
=
fields
(
f
"phase_field: [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
C_tmp
=
fields
(
f
"phase_field_tmp: [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
# phase-field distribution functions
h
=
fields
(
f
"lb_phase_field(
{
q_phase
}
): [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
...
...
@@ -88,32 +90,26 @@ h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
g_updates
=
initializer_kernel_hydro_lb
(
g
,
u
,
method_hydro
)
force_h
=
[
f
/
3
for
f
in
interface_tracking_force
(
C
,
stencil_phase
,
W
)]
force_h
=
[
f
/
3
for
f
in
interface_tracking_force
(
C
,
stencil_phase
,
W
,
fd_stencil
=
get_stencil
(
"D3Q27"
)
)]
force_model_h
=
MultiphaseForceModel
(
force
=
force_h
)
force_g
=
hydrodynamic_force
(
g
,
C
,
method_hydro
,
relaxation_time
,
density_liquid
,
density_gas
,
kappa
,
beta
,
body_force
)
force_g
=
hydrodynamic_force
(
g
,
C
,
method_hydro
,
relaxation_time
,
density_liquid
,
density_gas
,
kappa
,
beta
,
body_force
,
fd_stencil
=
get_stencil
(
"D3Q27"
))
h_tmp_symbol_list
=
[
h_tmp
.
center
(
i
)
for
i
,
_
in
enumerate
(
stencil_phase
)]
sum_h
=
np
.
sum
(
h_tmp_symbol_list
[:])
force_model_g
=
MultiphaseForceModel
(
force
=
force_g
,
rho
=
density
)
####################
# LBM UPDATE RULES #
####################
method_phase
.
set_force_model
(
force_model_h
)
phase_field_LB_step
=
get_collision_assignments_phase
(
lb_method
=
method_phase
,
velocity_input
=
u
,
output
=
{
'density'
:
C_tmp
},
force_model
=
force_model_h
,
symbolic_fields
=
{
"symbolic_field"
:
h
,
"symbolic_temporary_field"
:
h_tmp
},
kernel_type
=
'stream_pull_collide'
)
phase_field_LB_step
=
create_lb_update_rule
(
lb_method
=
method_phase
,
velocity_input
=
u
,
compressible
=
True
,
optimization
=
{
"symbolic_field"
:
h
,
"symbolic_temporary_field"
:
h_tmp
},
kernel_type
=
'stream_pull_collide'
)
phase_field_LB_step
.
set_main_assignments_from_dict
({
**
phase_field_LB_step
.
main_assignments_dict
,
**
{
C
.
center
:
sum_h
}})
phase_field_LB_step
=
AssignmentCollection
(
main_assignments
=
phase_field_LB_step
.
main_assignments
,
subexpressions
=
phase_field_LB_step
.
subexpressions
)
phase_field_LB_step
=
sympy_cse
(
phase_field_LB_step
)
# ---------------------------------------------------------------------------------------------------------
...
...
@@ -121,18 +117,12 @@ phase_field_LB_step = sympy_cse(phase_field_LB_step)
hydro_LB_step
=
get_collision_assignments_hydro
(
lb_method
=
method_hydro
,
density
=
density
,
velocity_input
=
u
,
force
=
force
_g
,
sub_iterations
=
1
,
force
_model
=
force_model
_g
,
sub_iterations
=
2
,
symbolic_fields
=
{
"symbolic_field"
:
g
,
"symbolic_temporary_field"
:
g_tmp
},
kernel_type
=
'collide_stream_push'
)
# streaming of the hydrodynamic distribution
stream_hydro
=
create_lb_update_rule
(
stencil
=
stencil_hydro
,
optimization
=
{
"symbolic_field"
:
g
,
"symbolic_temporary_field"
:
g_tmp
},
kernel_type
=
'stream_pull_only'
)
###################
# GENERATE SWEEPS #
###################
...
...
@@ -161,7 +151,7 @@ with CodeGeneration() as ctx:
generate_sweep
(
ctx
,
'initialize_velocity_based_distributions'
,
g_updates
)
generate_sweep
(
ctx
,
'phase_field_LB_step'
,
phase_field_LB_step
,
field_swaps
=
[(
h
,
h_tmp
)],
field_swaps
=
[(
h
,
h_tmp
)
,
(
C
,
C_tmp
)
],
inner_outer_split
=
True
,
cpu_vectorize_info
=
cpu_vec
)
...
...
@@ -171,12 +161,13 @@ with CodeGeneration() as ctx:
cpu_vectorize_info
=
cpu_vec
)
# communication
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field_distributions'
,
phase_field_LB_step
.
main_assignments
,
target
=
'cpu'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field'
,
hydro_LB_step
.
all_assignments
,
target
=
'cpu'
,
kind
=
'pull'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_velocity_based_distributions'
,
hydro_LB_step
.
all_assignments
,
target
=
'cpu'
,
kind
=
'push'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_phase_field_distributions'
,
stencil_phase
,
h
,
streaming_pattern
=
'pull'
,
target
=
'cpu'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_velocity_based_distributions'
,
stencil_hydro
,
g
,
streaming_pattern
=
'push'
,
target
=
'cpu'
)
generate_pack_info_for_field
(
ctx
,
'PackInfo_phase_field'
,
C
,
target
=
'cpu'
)
ctx
.
write_file
(
"GenDefines.h"
,
info_header
)
...
...
@@ -187,7 +178,7 @@ with CodeGeneration() as ctx:
g_updates
,
target
=
'gpu'
)
generate_sweep
(
ctx
,
'phase_field_LB_step'
,
phase_field_LB_step
,
field_swaps
=
[(
h
,
h_tmp
)],
field_swaps
=
[(
h
,
h_tmp
)
,
(
C
,
C_tmp
)
],
inner_outer_split
=
True
,
target
=
'gpu'
,
gpu_indexing_params
=
sweep_params
,
...
...
@@ -200,12 +191,13 @@ with CodeGeneration() as ctx:
gpu_indexing_params
=
sweep_params
,
varying_parameters
=
vp
)
# communication
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field_distributions'
,
phase_field_LB_step
.
main_assignments
,
target
=
'gpu'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field'
,
hydro_LB_step
.
all_assignments
,
target
=
'gpu'
,
kind
=
'pull'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_velocity_based_distributions'
,
hydro_LB_step
.
all_assignments
,
target
=
'gpu'
,
kind
=
'push'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_phase_field_distributions'
,
stencil_phase
,
h
,
streaming_pattern
=
'pull'
,
target
=
'gpu'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_velocity_based_distributions'
,
stencil_hydro
,
g
,
streaming_pattern
=
'push'
,
target
=
'gpu'
)
generate_pack_info_for_field
(
ctx
,
'PackInfo_phase_field'
,
C
,
target
=
'gpu'
)
ctx
.
write_file
(
"GenDefines.h"
,
info_header
)
...
...
apps/benchmarks/UniformGridGPU/CMakeLists.txt
View file @
d48349d8
...
...
@@ -4,49 +4,27 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir
(
"simulation_setup"
)
foreach
(
config srt trt mrt smagorinsky entropic smagorinsky_noopt entropic_kbc_n4
entropic_kbc_n4_noopt mrt_noopt mrt_full mrt_full_noopt
cumulant cumulant_d3q27
srt_d3q27 mrt_d3q27 mrt_d3q27_noopt smagorinsky_d3q27 smagorinsky_d3q27_noopt mrt_full_d3q27 mrt_full_d3q27_noopt
)
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_
${
config
}
FILE UniformGridGPU.py
CODEGEN_CFG
${
config
}
OUT_FILES UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h
UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
UniformGridGPU_MacroSetter.cpp UniformGridGPU_MacroSetter.h
UniformGridGPU_MacroGetter.cpp UniformGridGPU_MacroGetter.h
UniformGridGPU_Defines.h
)
waLBerla_add_executable
(
NAME UniformGridBenchmarkGPU_
${
config
}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_
${
config
}
)
set_target_properties
(
UniformGridBenchmarkGPU_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
endforeach
()
foreach
(
config srt trt mrt smagorinsky entropic
)
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_AA_
${
config
}
FILE UniformGridGPU_AA.py
CODEGEN_CFG
${
config
}
OUT_FILES UniformGridGPU_AA_PackInfoPull.cu UniformGridGPU_AA_PackInfoPull.h
UniformGridGPU_AA_LbKernelOdd.cu UniformGridGPU_AA_LbKernelOdd.h
UniformGridGPU_AA_LbKernelEven.cu UniformGridGPU_AA_LbKernelEven.h
UniformGridGPU_AA_PackInfoPush.cu UniformGridGPU_AA_PackInfoPush.h
UniformGridGPU_AA_MacroSetter.cpp UniformGridGPU_AA_MacroSetter.h
UniformGridGPU_AA_MacroGetter.cpp UniformGridGPU_AA_MacroGetter.h
UniformGridGPU_AA_Defines.h
)
waLBerla_add_executable
(
NAME UniformGridBenchmarkGPU_AA_
${
config
}
FILES UniformGridGPU_AA.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_AA_
${
config
}
)
set_target_properties
(
UniformGridBenchmarkGPU_AA_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
endforeach
()
foreach
(
streaming_pattern aa
)
# choose from {pull, push, aa, esotwist}
foreach
(
stencil d3q27
)
# choose from {d3q19 d3q27}
foreach
(
collision_setup srt trt mrt cumulant
)
# choose from {srt trt mrt cumulant entropic smagorinsky}
set
(
config
${
stencil
}
_
${
streaming_pattern
}
_
${
collision_setup
}
)
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_
${
config
}
FILE UniformGridGPU.py
CODEGEN_CFG
${
config
}
OUT_FILES UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_PackInfoEven.cu UniformGridGPU_PackInfoEven.h
UniformGridGPU_PackInfoOdd.cu UniformGridGPU_PackInfoOdd.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_MacroSetter.cu UniformGridGPU_MacroSetter.h
UniformGridGPU_InfoHeader.h
)
waLBerla_add_executable
(
NAME UniformGridGPU_
${
config
}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk UniformGridGPUGenerated_
${
config
}
)
set_target_properties
(
UniformGridGPU_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
endforeach
()
endforeach
()
endforeach
()
\ No newline at end of file
apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
View file @
d48349d8
#include
"blockforest/Initialization.h"
#include
"core/Environment.h"
#include
"core/logging/Initialization.h"
#include
"core/math/Random.h"
#include
"python_coupling/CreateConfig.h"
#include
"python_coupling/PythonCallback.h"
#include
"python_coupling/DictWrapper.h"
#include
"blockforest/Initialization.h"
#include
"field/FlagField.h"
#include
"field/AddToStorage.h"
#include
"field/vtk/VTKWriter.h"
#include
"field/communication/PackInfo.h"
#include
"lbm/PerformanceLogger.h"
#include
"blockforest/communication/UniformBufferedScheme.h"
#include
"timeloop/all.h"
#include
"core/math/Random.h"
#include
"geometry/all.h"
#include
"cuda/HostFieldAllocator.h"
#include
"cuda/communication/GPUPackInfo.h"
#include
"cuda/ParallelStreams.h"
#include
"cuda/NVTX.h"
#include
"core/timing/TimingPool.h"
#include
"core/timing/RemainingTimeLogger.h"
#include
"core/timing/TimingPool.h"
#include
"cuda/AddGPUFieldToStorage.h"
#include
"cuda/communication/UniformGPUScheme.h"
#include
"cuda/DeviceSelectMPI.h"
#include
"domain_decomposition/SharedSweep.h"
#include
"gui/Gui.h"
#include
"lbm/gui/Connection.h"
#include
"UniformGridGPU_LatticeModel.h"
#include
"UniformGridGPU_LbKernel.h"
#include
"UniformGridGPU_PackInfo.h"
#include
"UniformGridGPU_UBB.h"
#include
"UniformGridGPU_NoSlip.h"
#include
"UniformGridGPU_Communication.h"
#include
"UniformGridGPU_MacroSetter.h"
#include
"UniformGridGPU_MacroGetter.h"
#include
"UniformGridGPU_Defines.h"
#include
"cuda/ParallelStreams.h"
#include
"cuda/communication/UniformGPUScheme.h"
#include
"cuda/FieldCopy.h"
#include
"cuda/lbm/CombinedInPlaceGpuPackInfo.h"
#include
"field/AddToStorage.h"
#include
"field/FlagField.h"
#include
"field/communication/PackInfo.h"
#include
"field/vtk/VTKWriter.h"
using
namespace
walberla
;
#include
"geometry/InitBoundaryHandling.h"
using
LatticeModel_T
=
lbm
::
UniformGridGPU_LatticeModel
;
#include
"lbm/inplace_streaming/TimestepTracker.h"
const
auto
Q
=
LatticeModel_T
::
Stencil
::
Q
;
#include
"python_coupling/CreateConfig.h"
#include
"python_coupling/DictWrapper.h"
#include
"python_coupling/PythonCallback.h"
#include
"timeloop/SweepTimeloop.h"
using
Stencil_T
=
LatticeModel_T
::
Stencil
;
using
CommunicationStencil_T
=
LatticeModel_T
::
CommunicationStencil
;
using
PdfField_T
=
GhostLayerField
<
real_t
,
Q
>
;
using
CommScheme_T
=
cuda
::
communication
::
UniformGPUScheme
<
CommunicationStencil_T
>
;
using
VelocityField_T
=
GhostLayerField
<
real_t
,
3
>
;
using
flag_t
=
walberla
::
uint8_t
;
using
FlagField_T
=
FlagField
<
flag_t
>
;
#include
"InitShearVelocity.h"
#include
<cmath>
void
initShearVelocity
(
const
shared_ptr
<
StructuredBlockStorage
>
&
blocks
,
BlockDataID
velFieldID
,
const
real_t
xMagnitude
=
real_t
(
0.1
),
const
real_t
fluctuationMagnitude
=
real_t
(
0.05
)
)
{
math
::
seedRandomGenerator
(
0
);
auto
halfZ
=
blocks
->
getDomainCellBB
().
zMax
()
/
2
;
for
(
auto
&
block
:
*
blocks
)
{
auto
velField
=
block
.
getData
<
VelocityField_T
>
(
velFieldID
);
WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ
(
velField
,
Cell
globalCell
;
blocks
->
transformBlockLocalToGlobalCell
(
globalCell
,
block
,
Cell
(
x
,
y
,
z
));
real_t
randomReal
=
xMagnitude
*
math
::
realRandom
<
real_t
>
(
-
fluctuationMagnitude
,
fluctuationMagnitude
);
velField
->
get
(
x
,
y
,
z
,
1
)
=
real_t
(
0
);
velField
->
get
(
x
,
y
,
z
,
2
)
=
randomReal
;
if
(
globalCell
[
2
]
>=
halfZ
)
{
velField
->
get
(
x
,
y
,
z
,
0
)
=
xMagnitude
;
}
else
{
velField
->
get
(
x
,
y
,
z
,
0
)
=
-
xMagnitude
;
}
);
}
}
#include
"UniformGridGPU_InfoHeader.h"
using
namespace
walberla
;
using
FlagField_T
=
FlagField
<
uint8_t
>
;
int
main
(
int
argc
,
char
**
argv
)
int
main
(
int
argc
,
char
**
argv
)
{
mpi
::
Environment
env
(
argc
,
argv
);
mpi
::
Environment
env
(
argc
,
argv
);
cuda
::
selectDeviceBasedOnMpiRank
();
for
(
auto
cfg
=
python_coupling
::
configBegin
(
argc
,
argv
);
cfg
!=
python_coupling
::
configEnd
();
++
cfg
)
for
(
auto
cfg
=
python_coupling
::
configBegin
(
argc
,
argv
);
cfg
!=
python_coupling
::
configEnd
();
++
cfg
)
{
WALBERLA_MPI_WORLD_BARRIER
();
WALBERLA_MPI_WORLD_BARRIER
()
WALBERLA_CUDA_CHECK
(
cudaPeekAtLastError
())
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// SETUP AND CONFIGURATION ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto
config
=
*
cfg
;
logging
::
configureLogging
(
config
);
auto
blocks
=
blockforest
::
createUniformBlockGridFromConfig
(
config
);
logging
::
configureLogging
(
config
);
auto
blocks
=
blockforest
::
createUniformBlockGridFromConfig
(
config
);
Vector3
<
uint_t
>
cellsPerBlock
=
config
->
getBlock
(
"DomainSetup"
).
getParameter
<
Vector3
<
uint_t
>
>
(
"cellsPerBlock"
);
Vector3
<
uint_t
>
cellsPerBlock
=
config
->
getBlock
(
"DomainSetup"
).
getParameter
<
Vector3
<
uint_t
>
>
(
"cellsPerBlock"
);
// Reading parameters
auto
parameters
=
config
->
getOneBlock
(
"Parameters"
);
const
std
::
string
timeStepStrategy
=
parameters
.
getParameter
<
std
::
string
>
(
"timeStepStrategy"
,
"normal"
);
const
real_t
omega
=
parameters
.
getParameter
<
real_t
>
(
"omega"
,
real_c
(
1.4
));
const
uint_t
timesteps
=
parameters
.
getParameter
<
uint_t
>
(
"timesteps"
,
uint_c
(
50
));
auto
parameters
=
config
->
getOneBlock
(
"Parameters"
);
const
real_t
omega
=
parameters
.
getParameter
<
real_t
>
(
"omega"
,
real_c
(
1.4
));
const
uint_t
timesteps
=
parameters
.
getParameter
<
uint_t
>
(
"timesteps"
,
uint_c
(
50
));
const
bool
initShearFlow
=
parameters
.
getParameter
<
bool
>
(
"initShearFlow"
,
true
);
// Creating fields
BlockDataID
pdfFieldCpuID
=
field
::
addToStorage
<
PdfField_T
>
(
blocks
,
"pdfs cpu"
,
real_t
(
0
),
field
::
fzyx
);
BlockDataID
velFieldCpuID
=
field
::
addToStorage
<
VelocityField_T
>
(
blocks
,
"vel"
,
real_t
(
0
),
field
::
fzyx
);
BlockDataID
pdfFieldCpuID
=
field
::
addToStorage
<
PdfField_T
>
(
blocks
,
"pdfs cpu"
,
real_t
(
std
::
nan
(
""
)),
field
::
fzyx
);
BlockDataID
velFieldCpuID
=
field
::
addToStorage
<
VelocityField_T
>
(
blocks
,
"vel"
,
real_t
(
0
),
field
::
fzyx
);
// Initialize velocity on cpu
if
(
initShearFlow
){
WALBERLA_LOG_INFO_ON_ROOT
(
"Initializing shear flow"
)
initShearVelocity
(
blocks
,
velFieldCpuID
);
}
BlockDataID
pdfFieldGpuID
=
cuda
::
addGPUFieldToStorage
<
PdfField_T
>
(
blocks
,
pdfFieldCpuID
,
"pdfs on GPU"
,
true
);
// Velocity field is copied to the GPU
BlockDataID
velFieldGpuID
=
cuda
::
addGPUFieldToStorage
<
VelocityField_T
>
(
blocks
,
velFieldCpuID
,
"velocity on GPU"
,
true
);
if
(
timeStepStrategy
!=
"kernelOnlyNoInit"
)
pystencils
::
UniformGridGPU_MacroSetter
setterSweep
(
pdfFieldGpuID
,
velFieldGpuID
);
// Set up initial PDF values
for
(
auto
&
block
:
*
blocks
)
setterSweep
(
&
block
);
Vector3
<
int
>
innerOuterSplit
=
parameters
.
getParameter
<
Vector3
<
int
>
>
(
"innerOuterSplit"
,
Vector3
<
int
>
(
1
,
1
,
1
));
for
(
uint_t
i
=
0
;
i
<
3
;
++
i
)
{
if
(
initShearFlow
)
{
WALBERLA_LOG_INFO_ON_ROOT
(
"Initializing shear flow"
);
initShearVelocity
(
blocks
,
velFieldCpuID
);
}
pystencils
::
UniformGridGPU_MacroSetter
setterSweep
(
pdfFieldCpuID
,
velFieldCpuID
);
for
(
auto
&
block
:
*
blocks
)
setterSweep
(
&
block
);
// setter sweep only initializes interior of domain - for push schemes to work a first communication is required here
blockforest
::
communication
::
UniformBufferedScheme
<
CommunicationStencil_T
>
initialComm
(
blocks
);
initialComm
.
addPackInfo
(
make_shared
<
field
::
communication
::
PackInfo
<
PdfField_T
>
>
(
pdfFieldCpuID
)
);
initialComm
();
if
(
int_c
(
cellsPerBlock
[
i
])
<=
innerOuterSplit
[
i
]
*
2
)
{
WALBERLA_ABORT_NO_DEBUG_INFO
(
"innerOuterSplit too large - make it smaller or increase cellsPerBlock"
)
}
}
BlockDataID
pdfFieldGpuID
=
cuda
::
addGPUFieldToStorage
<
PdfField_T
>
(
blocks
,
pdfFieldCpuID
,
"pdfs on GPU"
,
true
);
BlockDataID
flagFieldID
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"flag field"
);
Cell
innerOuterSplitCell
(
innerOuterSplit
[
0
],
innerOuterSplit
[
1
],
innerOuterSplit
[
2
]);
bool
cudaEnabledMPI
=
parameters
.
getParameter
<
bool
>
(
"cudaEnabledMPI"
,
false
);
Vector3
<
int32_t
>
gpuBlockSize
=
parameters
.
getParameter
<
Vector3
<
int32_t
>
>
(
"gpuBlockSize"
,
Vector3
<
int32_t
>
(
256
,
1
,
1
));
int
streamHighPriority
=
0
;
int
streamLowPriority
=
0
;
WALBERLA_CUDA_CHECK
(
cudaDeviceGetStreamPriorityRange
(
&
streamLowPriority
,
&
streamHighPriority
))
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
using
LbSweep
=
lbm
::
UniformGridGPU_LbKernel
;
using
PackInfoEven
=
lbm
::
UniformGridGPU_PackInfoEven
;
using
PackInfoOdd
=
lbm
::
UniformGridGPU_PackInfoOdd
;
using
cuda
::
communication
::
UniformGPUScheme
;
LbSweep
lbSweep
(
pdfFieldGpuID
,
omega
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
gpuBlockSize
[
2
],
innerOuterSplitCell
);
lbSweep
.
setOuterPriority
(
streamHighPriority
);
// Boundaries
const
FlagUID
fluidFlagUID
(
"Fluid"
);
BlockDataID
flagFieldID
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"Boundary Flag Field"
);
auto
boundariesConfig
=
config
->
getBlock
(
"Boundaries"
);
bool
disableB
oundaries
=
tru
e
;
bool
b
oundaries
=
fals
e
;
if
(
boundariesConfig
)
{
disableB
oundaries
=
fals
e
;
geometry
::
initBoundaryHandling
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
boundariesConfig
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
fluidFlagUID
);
b
oundaries
=
tru
e
;
geometry
::
initBoundaryHandling
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
boundariesConfig
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
fluidFlagUID
);
}
lbm
::
UniformGridGPU_UBB
ubb
(
blocks
,
pdfFieldGpuID
);
lbm
::
UniformGridGPU_NoSlip
noSlip
(
blocks
,
pdfFieldGpuID
);
noSlip
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"NoSlip"
),
fluidFlagUID
);
ubb
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"UBB"
),
fluidFlagUID
);
noSlip
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"NoSlip"
),
fluidFlagUID
);
// Communication setup
bool
cudaEnabledMPI
=
parameters
.
getParameter
<
bool
>
(
"cudaEnabledMPI"
,
false
);
Vector3
<
int32_t
>
gpuBlockSize
=
parameters
.
getParameter
<
Vector3
<
int32_t
>
>
(
"gpuBlockSize"
,
Vector3
<
int32_t
>
(
256
,
1
,
1
));
const
std
::
string
communicationSchemeStr
=
parameters
.
getParameter
<
std
::
string
>
(
"communicationScheme"
,
"UniformGPUScheme_Baseline"
);
CommunicationSchemeType
communicationScheme
;
if
(
communicationSchemeStr
==
"GPUPackInfo_Baseline"
)
communicationScheme
=
GPUPackInfo_Baseline
;
else
if
(
communicationSchemeStr
==
"GPUPackInfo_Streams"
)
communicationScheme
=
GPUPackInfo_Streams
;
else
if
(
communicationSchemeStr
==
"UniformGPUScheme_Baseline"
)
communicationScheme
=
UniformGPUScheme_Baseline
;
else
if
(
communicationSchemeStr
==
"UniformGPUScheme_Memcpy"
)
communicationScheme
=
UniformGPUScheme_Memcpy
;
else
if
(
communicationSchemeStr
==
"MPIDatatypes"
)
communicationScheme
=
MPIDatatypes
;
else
if
(
communicationSchemeStr
==
"MPIDatatypesFull"
)
communicationScheme
=
MPIDatatypesFull
;
else
{
WALBERLA_ABORT_NO_DEBUG_INFO
(
"Invalid choice for communicationScheme"
)
}
lbm
::
UniformGridGPU_UBB
ubb
(
blocks
,
pdfFieldGpuID
);
ubb
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"UBB"
),
fluidFlagUID
);
Vector3
<
int
>
innerOuterSplit
=
parameters
.
getParameter
<
Vector3
<
int
>
>
(
"innerOuterSplit"
,
Vector3
<
int
>
(
1
,
1
,
1
));
for
(
uint_t
i
=
0
;
i
<
3
;
++
i
)
{
if
(
int_c
(
cellsPerBlock
[
i
])
<=
innerOuterSplit
[
i
]
*
2
)
{
WALBERLA_ABORT_NO_DEBUG_INFO
(
"innerOuterSplit too large - make it smaller or increase cellsPerBlock"
);
}
}
// Initial setup is the post-collision state of an even time step
auto
tracker
=
make_shared
<
lbm
::
TimestepTracker
>
(
0
);
int
streamHighPriority
=
0
;
int
streamLowPriority
=
0
;
WALBERLA_CUDA_CHECK
(
cudaDeviceGetStreamPriorityRange
(
&
streamLowPriority
,
&
streamHighPriority
)
);
WALBERLA_CHECK
(
gpuBlockSize
[
2
]
==
1
);
pystencils
::
UniformGridGPU_LbKernel
lbKernel
(
pdfFieldGpuID
,
omega
,
1.1
,
1.2
,
1.3
,
1.4
,
1.5
,
1.6
,
1.7
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
Cell
(
innerOuterSplit
[
0
],
innerOuterSplit
[
1
],
innerOuterSplit
[
2
])
);
lbKernel
.
setOuterPriority
(
streamHighPriority
);
UniformGridGPU_Communication
<
CommunicationStencil_T
,
cuda
::
GPUField
<
double
>
>
gpuComm
(
blocks
,
pdfFieldGpuID
,
(
CommunicationSchemeType
)
communicationScheme
,
cudaEnabledMPI
);
auto
defaultStream
=
cuda
::
StreamRAII
::
newPriorityStream
(
streamLowPriority
);
auto
innerOuterStreams
=
cuda
::
ParallelStreams
(
streamHighPriority
);