Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Jan Hönig
waLBerla
Commits
d48349d8
Commit
d48349d8
authored
Jul 07, 2021
by
Dominik Thoennes
Browse files
Merge branch 'master' into thoennes/add-oneapi-22
parents
b7838d4b
03b9f95f
Changes
66
Expand all
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
d48349d8
...
...
@@ -1011,11 +1011,18 @@ endif()
option
(
WALBERLA_THREAD_SAFE_LOGGING
"Enables/Disables thread-safe logging"
ON
)
if
(
WALBERLA_BUILD_WITH_OPENMP
)
if
(
APPLE AND EXISTS /opt/local/lib/libomp AND EXISTS /opt/local/include/libomp
)
# find libomp from MacPorts
set
(
CMAKE_FRAMEWORK_PATH /opt/local/lib/libomp
)
set
(
CMAKE_INCLUDE_PATH /opt/local/include/libomp
)
endif
()
find_package
(
OpenMP
)
if
(
OpenMP_FOUND
)
add_flag
(
CMAKE_C_FLAGS
"
${
OpenMP_C_FLAGS
}
"
)
add_flag
(
CMAKE_CXX_FLAGS
"
${
OpenMP_CXX_FLAGS
}
"
)
list
(
APPEND SERVICE_LIBS
${
OpenMP_CXX_LIBRARIES
}
)
if
(
OpenMP_CXX_INCLUDE_DIRS
)
include_directories
(
${
OpenMP_CXX_INCLUDE_DIRS
}
)
endif
()
else
()
#workarounds
if
(
WALBERLA_CXX_COMPILER_IS_NEC
)
...
...
apps/benchmarks/FlowAroundSphereCodeGen/FlowAroundSphereCodeGen.py
View file @
d48349d8
from
pystencils.field
import
fields
from
lbmpy.advanced_streaming.utility
import
get_timesteps
,
Timestep
from
lbmpy.advanced_streaming.utility
import
get_timesteps
from
lbmpy.macroscopic_value_kernels
import
macroscopic_values_setter
from
lbmpy.stencils
import
get_stencil
from
lbmpy.creationfunctions
import
create_lb_collision_rule
,
create_lb_method
,
create_lb_update_rule
from
lbmpy.creationfunctions
import
create_lb_collision_rule
from
lbmpy.boundaries
import
NoSlip
,
UBB
,
ExtrapolationOutflow
from
pystencils_walberla
import
CodeGeneration
,
generate_sweep
,
generate_info_header
...
...
apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
View file @
d48349d8
...
...
@@ -86,7 +86,6 @@ using FlagField_T = FlagField< flag_t >;
#if defined(WALBERLA_BUILD_WITH_CUDA)
typedef
cuda
::
GPUField
<
real_t
>
GPUField
;
#endif
// using CommScheme_T = cuda::communication::UniformGPUScheme<stencil::D2Q9>;
int
main
(
int
argc
,
char
**
argv
)
{
...
...
@@ -185,7 +184,7 @@ int main(int argc, char** argv)
auto
Comm_velocity_based_distributions
=
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
0
);
auto
generatedPackInfo_velocity_based_distributions
=
make_shared
<
pystencils
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field_gpu
);
make_shared
<
lbm
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field_gpu
);
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_velocity_based_distributions
);
auto
generatedPackInfo_phase_field
=
make_shared
<
pystencils
::
PackInfo_phase_field
>
(
phase_field_gpu
);
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_phase_field
);
...
...
@@ -193,7 +192,7 @@ int main(int argc, char** argv)
auto
Comm_phase_field_distributions
=
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
0
);
auto
generatedPackInfo_phase_field_distributions
=
make_shared
<
pystencils
::
PackInfo_phase_field_distributions
>
(
lb_phase_field_gpu
);
make_shared
<
lbm
::
PackInfo_phase_field_distributions
>
(
lb_phase_field_gpu
);
Comm_phase_field_distributions
->
addPackInfo
(
generatedPackInfo_phase_field_distributions
);
#else
...
...
@@ -202,14 +201,14 @@ int main(int argc, char** argv)
auto
generatedPackInfo_phase_field
=
make_shared
<
pystencils
::
PackInfo_phase_field
>
(
phase_field
);
auto
generatedPackInfo_velocity_based_distributions
=
make_shared
<
pystencils
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field
);
make_shared
<
lbm
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field
);
Comm_velocity_based_distributions
.
addPackInfo
(
generatedPackInfo_phase_field
);
Comm_velocity_based_distributions
.
addPackInfo
(
generatedPackInfo_velocity_based_distributions
);
blockforest
::
communication
::
UniformBufferedScheme
<
Stencil_hydro_T
>
Comm_phase_field_distributions
(
blocks
);
auto
generatedPackInfo_phase_field_distributions
=
make_shared
<
pystencils
::
PackInfo_phase_field_distributions
>
(
lb_phase_field
);
make_shared
<
lbm
::
PackInfo_phase_field_distributions
>
(
lb_phase_field
);
Comm_phase_field_distributions
.
addPackInfo
(
generatedPackInfo_phase_field_distributions
);
#endif
...
...
apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py
View file @
d48349d8
...
...
@@ -5,11 +5,12 @@ from pystencils import AssignmentCollection
from
lbmpy.creationfunctions
import
create_lb_method
,
create_lb_update_rule
from
lbmpy.stencils
import
get_stencil
from
pystencils_walberla
import
CodeGeneration
,
generate_sweep
,
generate_pack_info_from_kernel
from
pystencils_walberla
import
CodeGeneration
,
generate_sweep
,
generate_pack_info_for_field
from
lbmpy_walberla
import
generate_lb_pack_info
from
lbmpy.phasefield_allen_cahn.kernel_equations
import
initializer_kernel_phase_field_lb
,
\
initializer_kernel_hydro_lb
,
interface_tracking_force
,
\
hydrodynamic_force
,
get_collision_assignments_hydro
hydrodynamic_force
,
get_collision_assignments_hydro
,
get_collision_assignments_phase
from
lbmpy.phasefield_allen_cahn.force_model
import
MultiphaseForceModel
...
...
@@ -52,6 +53,7 @@ w_c = 1.0 / (0.5 + (3.0 * M))
u
=
fields
(
f
"vel_field(
{
dimensions
}
): [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
# phase-field
C
=
fields
(
f
"phase_field: [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
C_tmp
=
fields
(
f
"phase_field_tmp: [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
# phase-field distribution functions
h
=
fields
(
f
"lb_phase_field(
{
q_phase
}
): [
{
dimensions
}
D]"
,
layout
=
'fzyx'
)
...
...
@@ -88,32 +90,26 @@ h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
g_updates
=
initializer_kernel_hydro_lb
(
g
,
u
,
method_hydro
)
force_h
=
[
f
/
3
for
f
in
interface_tracking_force
(
C
,
stencil_phase
,
W
)]
force_h
=
[
f
/
3
for
f
in
interface_tracking_force
(
C
,
stencil_phase
,
W
,
fd_stencil
=
get_stencil
(
"D3Q27"
)
)]
force_model_h
=
MultiphaseForceModel
(
force
=
force_h
)
force_g
=
hydrodynamic_force
(
g
,
C
,
method_hydro
,
relaxation_time
,
density_liquid
,
density_gas
,
kappa
,
beta
,
body_force
)
force_g
=
hydrodynamic_force
(
g
,
C
,
method_hydro
,
relaxation_time
,
density_liquid
,
density_gas
,
kappa
,
beta
,
body_force
,
fd_stencil
=
get_stencil
(
"D3Q27"
))
h_tmp_symbol_list
=
[
h_tmp
.
center
(
i
)
for
i
,
_
in
enumerate
(
stencil_phase
)]
sum_h
=
np
.
sum
(
h_tmp_symbol_list
[:])
force_model_g
=
MultiphaseForceModel
(
force
=
force_g
,
rho
=
density
)
####################
# LBM UPDATE RULES #
####################
method_phase
.
set_force_model
(
force_model_h
)
phase_field_LB_step
=
get_collision_assignments_phase
(
lb_method
=
method_phase
,
velocity_input
=
u
,
output
=
{
'density'
:
C_tmp
},
force_model
=
force_model_h
,
symbolic_fields
=
{
"symbolic_field"
:
h
,
"symbolic_temporary_field"
:
h_tmp
},
kernel_type
=
'stream_pull_collide'
)
phase_field_LB_step
=
create_lb_update_rule
(
lb_method
=
method_phase
,
velocity_input
=
u
,
compressible
=
True
,
optimization
=
{
"symbolic_field"
:
h
,
"symbolic_temporary_field"
:
h_tmp
},
kernel_type
=
'stream_pull_collide'
)
phase_field_LB_step
.
set_main_assignments_from_dict
({
**
phase_field_LB_step
.
main_assignments_dict
,
**
{
C
.
center
:
sum_h
}})
phase_field_LB_step
=
AssignmentCollection
(
main_assignments
=
phase_field_LB_step
.
main_assignments
,
subexpressions
=
phase_field_LB_step
.
subexpressions
)
phase_field_LB_step
=
sympy_cse
(
phase_field_LB_step
)
# ---------------------------------------------------------------------------------------------------------
...
...
@@ -121,18 +117,12 @@ phase_field_LB_step = sympy_cse(phase_field_LB_step)
hydro_LB_step
=
get_collision_assignments_hydro
(
lb_method
=
method_hydro
,
density
=
density
,
velocity_input
=
u
,
force
=
force
_g
,
sub_iterations
=
1
,
force
_model
=
force_model
_g
,
sub_iterations
=
2
,
symbolic_fields
=
{
"symbolic_field"
:
g
,
"symbolic_temporary_field"
:
g_tmp
},
kernel_type
=
'collide_stream_push'
)
# streaming of the hydrodynamic distribution
stream_hydro
=
create_lb_update_rule
(
stencil
=
stencil_hydro
,
optimization
=
{
"symbolic_field"
:
g
,
"symbolic_temporary_field"
:
g_tmp
},
kernel_type
=
'stream_pull_only'
)
###################
# GENERATE SWEEPS #
###################
...
...
@@ -161,7 +151,7 @@ with CodeGeneration() as ctx:
generate_sweep
(
ctx
,
'initialize_velocity_based_distributions'
,
g_updates
)
generate_sweep
(
ctx
,
'phase_field_LB_step'
,
phase_field_LB_step
,
field_swaps
=
[(
h
,
h_tmp
)],
field_swaps
=
[(
h
,
h_tmp
)
,
(
C
,
C_tmp
)
],
inner_outer_split
=
True
,
cpu_vectorize_info
=
cpu_vec
)
...
...
@@ -171,12 +161,13 @@ with CodeGeneration() as ctx:
cpu_vectorize_info
=
cpu_vec
)
# communication
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field_distributions'
,
phase_field_LB_step
.
main_assignments
,
target
=
'cpu'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field'
,
hydro_LB_step
.
all_assignments
,
target
=
'cpu'
,
kind
=
'pull'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_velocity_based_distributions'
,
hydro_LB_step
.
all_assignments
,
target
=
'cpu'
,
kind
=
'push'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_phase_field_distributions'
,
stencil_phase
,
h
,
streaming_pattern
=
'pull'
,
target
=
'cpu'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_velocity_based_distributions'
,
stencil_hydro
,
g
,
streaming_pattern
=
'push'
,
target
=
'cpu'
)
generate_pack_info_for_field
(
ctx
,
'PackInfo_phase_field'
,
C
,
target
=
'cpu'
)
ctx
.
write_file
(
"GenDefines.h"
,
info_header
)
...
...
@@ -187,7 +178,7 @@ with CodeGeneration() as ctx:
g_updates
,
target
=
'gpu'
)
generate_sweep
(
ctx
,
'phase_field_LB_step'
,
phase_field_LB_step
,
field_swaps
=
[(
h
,
h_tmp
)],
field_swaps
=
[(
h
,
h_tmp
)
,
(
C
,
C_tmp
)
],
inner_outer_split
=
True
,
target
=
'gpu'
,
gpu_indexing_params
=
sweep_params
,
...
...
@@ -200,12 +191,13 @@ with CodeGeneration() as ctx:
gpu_indexing_params
=
sweep_params
,
varying_parameters
=
vp
)
# communication
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field_distributions'
,
phase_field_LB_step
.
main_assignments
,
target
=
'gpu'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_phase_field'
,
hydro_LB_step
.
all_assignments
,
target
=
'gpu'
,
kind
=
'pull'
)
generate_pack_info_from_kernel
(
ctx
,
'PackInfo_velocity_based_distributions'
,
hydro_LB_step
.
all_assignments
,
target
=
'gpu'
,
kind
=
'push'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_phase_field_distributions'
,
stencil_phase
,
h
,
streaming_pattern
=
'pull'
,
target
=
'gpu'
)
generate_lb_pack_info
(
ctx
,
'PackInfo_velocity_based_distributions'
,
stencil_hydro
,
g
,
streaming_pattern
=
'push'
,
target
=
'gpu'
)
generate_pack_info_for_field
(
ctx
,
'PackInfo_phase_field'
,
C
,
target
=
'gpu'
)
ctx
.
write_file
(
"GenDefines.h"
,
info_header
)
...
...
apps/benchmarks/UniformGridGPU/CMakeLists.txt
View file @
d48349d8
...
...
@@ -4,49 +4,27 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir
(
"simulation_setup"
)
foreach
(
config srt trt mrt smagorinsky entropic smagorinsky_noopt entropic_kbc_n4
entropic_kbc_n4_noopt mrt_noopt mrt_full mrt_full_noopt
cumulant cumulant_d3q27
srt_d3q27 mrt_d3q27 mrt_d3q27_noopt smagorinsky_d3q27 smagorinsky_d3q27_noopt mrt_full_d3q27 mrt_full_d3q27_noopt
)
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_
${
config
}
FILE UniformGridGPU.py
CODEGEN_CFG
${
config
}
OUT_FILES UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h
UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
UniformGridGPU_MacroSetter.cpp UniformGridGPU_MacroSetter.h
UniformGridGPU_MacroGetter.cpp UniformGridGPU_MacroGetter.h
UniformGridGPU_Defines.h
)
waLBerla_add_executable
(
NAME UniformGridBenchmarkGPU_
${
config
}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_
${
config
}
)
set_target_properties
(
UniformGridBenchmarkGPU_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
endforeach
()
foreach
(
config srt trt mrt smagorinsky entropic
)
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_AA_
${
config
}
FILE UniformGridGPU_AA.py
CODEGEN_CFG
${
config
}
OUT_FILES UniformGridGPU_AA_PackInfoPull.cu UniformGridGPU_AA_PackInfoPull.h
UniformGridGPU_AA_LbKernelOdd.cu UniformGridGPU_AA_LbKernelOdd.h
UniformGridGPU_AA_LbKernelEven.cu UniformGridGPU_AA_LbKernelEven.h
UniformGridGPU_AA_PackInfoPush.cu UniformGridGPU_AA_PackInfoPush.h
UniformGridGPU_AA_MacroSetter.cpp UniformGridGPU_AA_MacroSetter.h
UniformGridGPU_AA_MacroGetter.cpp UniformGridGPU_AA_MacroGetter.h
UniformGridGPU_AA_Defines.h
)
waLBerla_add_executable
(
NAME UniformGridBenchmarkGPU_AA_
${
config
}
FILES UniformGridGPU_AA.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_AA_
${
config
}
)
set_target_properties
(
UniformGridBenchmarkGPU_AA_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
endforeach
()
foreach
(
streaming_pattern aa
)
# choose from {pull, push, aa, esotwist}
foreach
(
stencil d3q27
)
# choose from {d3q19 d3q27}
foreach
(
collision_setup srt trt mrt cumulant
)
# choose from {srt trt mrt cumulant entropic smagorinsky}
set
(
config
${
stencil
}
_
${
streaming_pattern
}
_
${
collision_setup
}
)
waLBerla_generate_target_from_python
(
NAME UniformGridGPUGenerated_
${
config
}
FILE UniformGridGPU.py
CODEGEN_CFG
${
config
}
OUT_FILES UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_PackInfoEven.cu UniformGridGPU_PackInfoEven.h
UniformGridGPU_PackInfoOdd.cu UniformGridGPU_PackInfoOdd.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_MacroSetter.cu UniformGridGPU_MacroSetter.h
UniformGridGPU_InfoHeader.h
)
waLBerla_add_executable
(
NAME UniformGridGPU_
${
config
}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk UniformGridGPUGenerated_
${
config
}
)
set_target_properties
(
UniformGridGPU_
${
config
}
PROPERTIES CXX_VISIBILITY_PRESET hidden
)
endforeach
()
endforeach
()
endforeach
()
\ No newline at end of file
apps/benchmarks/UniformGridGPU/UniformGridGPU.cpp
View file @
d48349d8
This diff is collapsed.
Click to expand it.
apps/benchmarks/UniformGridGPU/UniformGridGPU.py
View file @
d48349d8
import
sympy
as
sp
import
numpy
as
np
import
pystencils
as
ps
from
lbmpy.creationfunctions
import
create_lb_method
,
create_lb_update_rule
,
create_lb_collision_rule
from
lbmpy.boundaries
import
NoSlip
,
UBB
from
lbmpy.fieldaccess
import
StreamPullTwoFieldsAccessor
from
pystencils_walberla
import
generate_pack_info_from_kernel
from
lbmpy_walberla
import
generate_lattice_model
,
generate_boundary
from
pystencils_walberla
import
CodeGeneration
,
generate_sweep
from
pystencils.data_types
import
TypedSymbol
from
pystencils.fast_approximation
import
insert_fast_sqrts
,
insert_fast_divisions
from
lbmpy.macroscopic_value_kernels
import
macroscopic_values_getter
,
macroscopic_values_setter
from
lbmpy.advanced_streaming
import
Timestep
,
is_inplace
from
lbmpy.advanced_streaming.utility
import
streaming_patterns
from
lbmpy.boundaries
import
NoSlip
,
UBB
from
lbmpy.creationfunctions
import
create_lb_collision_rule
from
lbmpy.macroscopic_value_kernels
import
macroscopic_values_setter
from
lbmpy.stencils
import
get_stencil
from
pystencils_walberla
import
CodeGeneration
,
generate_info_header
,
generate_sweep
from
lbmpy_walberla
import
generate_alternating_lbm_sweep
,
generate_lb_pack_info
,
generate_alternating_lbm_boundary
omega
=
sp
.
symbols
(
"omega"
)
omega_free
=
sp
.
Symbol
(
"omega_free"
)
omega_fill
=
sp
.
symbols
(
"omega_:10"
)
compile_time_block_size
=
False
if
compile_time_block_size
:
...
...
@@ -21,156 +24,158 @@ if compile_time_block_size:
else
:
sweep_block_size
=
(
TypedSymbol
(
"cudaBlockSize0"
,
np
.
int32
),
TypedSymbol
(
"cudaBlockSize1"
,
np
.
int32
),
1
)
TypedSymbol
(
"cudaBlockSize2"
,
np
.
int32
)
)
sweep
_params
=
{
'block_size'
:
sweep_block_size
}
gpu_indexing
_params
=
{
'block_size'
:
sweep_block_size
}
options_dict
=
{
'srt'
:
{
'method'
:
'srt'
,
'stencil'
:
'D3Q19'
,
'relaxation_rate'
:
omega
,
'compressible'
:
False
,
},
'trt'
:
{
'method'
:
'trt'
,
'stencil'
:
'D3Q19'
,
'relaxation_rate'
:
omega
,
},
'mrt'
:
{
'method'
:
'mrt'
,
'stencil'
:
'D3Q19'
,
'relaxation_rates'
:
[
omega
,
1.3
,
1.4
,
1.2
,
1.1
,
1.15
,
1.234
,
1.4235
],
'relaxation_rates'
:
[
omega
,
1
,
1
,
1
,
1
,
1
,
1
],
},
'mrt
_full
'
:
{
'mrt
-overrelax
'
:
{
'method'
:
'mrt'
,
'stencil'
:
'D3Q19'
,
'relaxation_rates'
:
[
omega_fill
[
0
],
omega
,
omega_fill
[
1
],
omega_fill
[
2
],
omega_fill
[
3
],
omega_fill
[
4
],
omega_fill
[
5
]],
'relaxation_rates'
:
[
omega
,
1.3
,
1.4
,
omega
,
1.2
,
1.1
],
},
'
entropic
'
:
{
'method'
:
'
mr
t'
,
'
stencil'
:
'D3Q19'
,
'
cumulant
'
:
{
'method'
:
'
cumulan
t'
,
'
relaxation_rate'
:
omega
,
'compressible'
:
True
,
'relaxation_rates'
:
[
omega
,
omega
,
omega_free
,
omega_free
,
omega_free
,
omega_free
],
'entropic'
:
True
,
},
'
entropic_kbc_n4
'
:
{
'method'
:
'
trt-kbc-n4
'
,
'
stencil'
:
'D3Q27'
,
'
cumulant-overrelax
'
:
{
'method'
:
'
cumulant
'
,
'
relaxation_rates'
:
[
omega
]
+
[
1
+
x
*
1e-2
for
x
in
range
(
1
,
11
)]
,
'compressible'
:
True
,
'relaxation_rates'
:
[
omega
,
omega_free
],
},
'entropic'
:
{
'method'
:
'mrt'
,
'compressible'
:
True
,
'relaxation_rates'
:
[
omega
,
omega
,
omega_free
,
omega_free
,
omega_free
],
'entropic'
:
True
,
},
'smagorinsky'
:
{
'method'
:
'srt'
,
'stencil'
:
'D3Q19'
,
'smagorinsky'
:
True
,
'relaxation_rate'
:
omega
,
},
'cumulant'
:
{
'method'
:
'cumulant'
,
'stencil'
:
'D3Q19'
,
'compressible'
:
True
,
'relaxation_rate'
:
omega
,
},
}
}
info_header
=
"""
#include "stencil/D3Q{q}.h"
\n
using Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
# DEFAULTS
optimize
=
True
with
CodeGeneration
()
as
ctx
:
accessor
=
StreamPullTwoFieldsAccessor
()
# accessor = StreamPushTwoFieldsAccessor()
assert
not
accessor
.
is_inplace
,
"This app does not work for inplace accessors"
config_tokens
=
ctx
.
config
.
split
(
'_'
)
assert
len
(
config_tokens
)
>=
3
stencil_str
=
config_tokens
[
0
]
streaming_pattern
=
config_tokens
[
1
]
collision_setup
=
config_tokens
[
2
]
if
len
(
config_tokens
)
>=
4
:
optimize
=
(
config_tokens
[
3
]
!=
'noopt'
)
stencil
=
get_stencil
(
stencil_str
)
assert
streaming_pattern
in
streaming_patterns
,
f
"Invalid streaming pattern:
{
streaming_pattern
}
"
options
=
options_dict
[
collision_setup
]
q
=
len
(
stencil
)
dim
=
len
(
stencil
[
0
])
assert
dim
==
3
,
"This app supports only three-dimensional stencils"
pdfs
,
pdfs_tmp
,
velocity_field
=
ps
.
fields
(
f
"pdfs(
{
q
}
), pdfs_tmp(
{
q
}
), velocity(3) : double[3D]"
,
layout
=
'fzyx'
)
common_options
=
{
'field_name'
:
'pdfs'
,
'temporary_field_name'
:
'pdfs_tmp'
,
'kernel_type'
:
accessor
,
'optimization'
:
{
'cse_global'
:
True
,
'cse_pdfs'
:
False
}
'stencil'
:
stencil
,
'field_name'
:
pdfs
.
name
,
'optimization'
:
{
'target'
:
'gpu'
,
'cse_global'
:
True
,
'cse_pdfs'
:
False
,
'symbolic_field'
:
pdfs
,
'field_layout'
:
'fzyx'
,
'gpu_indexing_params'
:
gpu_indexing_params
,
}
}
config_name
=
ctx
.
config
noopt
=
False
d3q27
=
False
if
config_name
.
endswith
(
"_noopt"
):
noopt
=
True
config_name
=
config_name
[:
-
len
(
"_noopt"
)]
if
config_name
.
endswith
(
"_d3q27"
):
d3q27
=
True
config_name
=
config_name
[:
-
len
(
"_d3q27"
)]
options
=
options_dict
[
config_name
]
options
.
update
(
common_options
)
options
=
options
.
copy
()
if
noopt
:
options
[
'optimization'
][
'cse_global'
]
=
False
options
[
'optimization'
][
'cse_pdfs'
]
=
False
if
d3q27
:
options
[
'stencil'
]
=
'D3Q27'
options
.
update
(
common_options
)
stencil_str
=
options
[
'stencil'
]
q
=
int
(
stencil_str
[
stencil_str
.
find
(
'Q'
)
+
1
:])
pdfs
,
velocity_field
=
ps
.
fields
(
"pdfs({q}), velocity(3) : double[3D]"
.
format
(
q
=
q
),
layout
=
'fzyx'
)
options
[
'optimization'
][
'symbolic_field'
]
=
pdfs
if
not
is_inplace
(
streaming_pattern
):
options
[
'optimization'
][
'symbolic_temporary_field'
]
=
pdfs_tmp
field_swaps
=
[(
pdfs
,
pdfs_tmp
)]
else
:
field_swaps
=
[]
vp
=
[
(
'double'
,
'omega_0'
),
(
'double'
,
'omega_1'
),
(
'double'
,
'omega_2'
),
(
'double'
,
'omega_3'
),
(
'double'
,
'omega_4'
),
(
'double'
,
'omega_5'
),
(
'double'
,
'omega_6'
),
(
'int32_t'
,
'cudaBlockSize0'
),
(
'int32_t'
,
'cudaBlockSize1'
),
(
'int32_t'
,
'cudaBlockSize2'
)
]
lb_method
=
create_lb_method
(
**
options
)
update_rule
=
create_lb_update_rule
(
lb_method
=
lb_method
,
**
options
)
if
not
noopt
:
update_rule
=
insert_fast_divisions
(
update_rule
)
update_rule
=
insert_fast_sqrts
(
update_rule
)
# CPU lattice model - required for macroscopic value computation, VTK output etc.
options_without_opt
=
options
.
copy
()
del
options_without_opt
[
'optimization'
]
generate_lattice_model
(
ctx
,
'UniformGridGPU_LatticeModel'
,
create_lb_collision_rule
(
lb_method
=
lb_method
,
**
options_without_opt
))
# gpu LB sweep & boundaries
generate_sweep
(
ctx
,
'UniformGridGPU_LbKernel'
,
update_rule
,
field_swaps
=
[(
'pdfs'
,
'pdfs_tmp'
)],
inner_outer_split
=
True
,
target
=
'gpu'
,
gpu_indexing_params
=
sweep_params
,
varying_parameters
=
vp
)
generate_boundary
(
ctx
,
'UniformGridGPU_NoSlip'
,
NoSlip
(),
lb_method
,
target
=
'gpu'
)
generate_boundary
(
ctx
,
'UniformGridGPU_UBB'
,
UBB
([
0.05
,
0
,
0
]),
lb_method
,
target
=
'gpu'
)
# LB Sweep
collision_rule
=
create_lb_collision_rule
(
**
options
)
if
optimize
:
collision_rule
=
insert_fast_divisions
(
collision_rule
)
collision_rule
=
insert_fast_sqrts
(
collision_rule
)
lb_method
=
collision_rule
.
method
generate_alternating_lbm_sweep
(
ctx
,
'UniformGridGPU_LbKernel'
,
collision_rule
,
streaming_pattern
,
optimization
=
options
[
'optimization'
],
inner_outer_split
=
True
,
varying_parameters
=
vp
,
field_swaps
=
field_swaps
)
# getter & setter
setter_assignments
=
macroscopic_values_setter
(
lb_method
,
velocity
=
velocity_field
.
center_vector
,
pdfs
=
pdfs
.
center_vector
,
density
=
1.0
)
getter_assignments
=
macroscopic_values_getter
(
lb_method
,
velocity
=
velocity_field
.
center_vector
,
pdfs
=
pdfs
.
center_vector
,
density
=
None
)
generate_sweep
(
ctx
,
'UniformGridGPU_MacroSetter'
,
setter_assignments
)
generate_sweep
(
ctx
,
'UniformGridGPU_MacroGetter'
,
getter_assignments
)
setter_assignments
=
macroscopic_values_setter
(
lb_method
,
density
=
1.0
,
velocity
=
velocity_field
.
center_vector
,
pdfs
=
pdfs
,
streaming_pattern
=
streaming_pattern
,
previous_timestep
=
Timestep
.
EVEN
)
generate_sweep
(
ctx
,
'UniformGridGPU_MacroSetter'
,
setter_assignments
,
target
=
'gpu'
)
# Boundaries
noslip
=
NoSlip
()
ubb
=
UBB
((
0.05
,
0
,
0
))
generate_alternating_lbm_boundary
(
ctx
,
'UniformGridGPU_NoSlip'
,
noslip
,
lb_method
,
field_name
=
pdfs
.
name
,
streaming_pattern
=
streaming_pattern
,
target
=
'gpu'
)
generate_alternating_lbm_boundary
(
ctx
,
'UniformGridGPU_UBB'
,
ubb
,
lb_method
,
field_name
=
pdfs
.
name
,
streaming_pattern
=
streaming_pattern
,
target
=
'gpu'
)
# communication
generate_pack_info_from_kernel
(
ctx
,
'UniformGridGPU_PackInfo'
,
update_rule
,
target
=
'gpu'
)
generate_lb_pack_info
(
ctx
,
'UniformGridGPU_PackInfo'
,
stencil
,
pdfs
,
streaming_pattern
=
streaming_pattern
,
target
=
'gpu'
,
always_generate_separate_classes
=
True
)
infoHeaderParams
=
{
'stencil'
:
stencil_str
,
'
q'
:
q
,
'co
nfigName'
:
ctx
.
config
,
'
streaming_pattern'
:
streaming_pattern
,
'co
llision_setup'
:
collision_setup
,
'cse_global'
:
int
(
options
[
'optimization'
][
'cse_global'
]),
'cse_pdfs'
:
int
(
options
[
'optimization'
][
'cse_pdfs'
]),
}
ctx
.
write_file
(
"UniformGridGPU_Defines.h"
,
info_header
.
format
(
**
infoHeaderParams
))
stencil_typedefs
=
{
'Stencil_T'
:
stencil
,
'CommunicationStencil_T'
:
stencil
}
field_typedefs
=
{
'PdfField_T'
:
pdfs
,
'VelocityField_T'
:
velocity_field
}
# Info header containing correct template definitions for stencil and field
generate_info_header
(
ctx
,
'UniformGridGPU_InfoHeader'
,
stencil_typedefs
=
stencil_typedefs
,
field_typedefs
=
field_typedefs
,
additional_code
=
info_header
.
format
(
**
infoHeaderParams
))
apps/benchmarks/UniformGridGPU/UniformGridGPU_AA.cpp
deleted
100644 → 0
View file @
b7838d4b
#include
"core/Environment.h"
#include
"core/logging/Initialization.h"
#include
"python_coupling/CreateConfig.h"