Commit d48349d8 authored by Dominik Thoennes's avatar Dominik Thoennes
Browse files

Merge branch 'master' into thoennes/add-oneapi-22

parents b7838d4b 03b9f95f
Pipeline #33161 failed with stages
in 247 minutes and 23 seconds
...@@ -1011,11 +1011,18 @@ endif() ...@@ -1011,11 +1011,18 @@ endif()
option ( WALBERLA_THREAD_SAFE_LOGGING "Enables/Disables thread-safe logging" ON ) option ( WALBERLA_THREAD_SAFE_LOGGING "Enables/Disables thread-safe logging" ON )
if ( WALBERLA_BUILD_WITH_OPENMP ) if ( WALBERLA_BUILD_WITH_OPENMP )
if( APPLE AND EXISTS /opt/local/lib/libomp AND EXISTS /opt/local/include/libomp ) # find libomp from MacPorts
set( CMAKE_FRAMEWORK_PATH /opt/local/lib/libomp )
set( CMAKE_INCLUDE_PATH /opt/local/include/libomp )
endif()
find_package( OpenMP ) find_package( OpenMP )
if (OpenMP_FOUND) if (OpenMP_FOUND)
add_flag ( CMAKE_C_FLAGS "${OpenMP_C_FLAGS}" ) add_flag ( CMAKE_C_FLAGS "${OpenMP_C_FLAGS}" )
add_flag ( CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS}" ) add_flag ( CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS}" )
list ( APPEND SERVICE_LIBS ${OpenMP_CXX_LIBRARIES} ) list ( APPEND SERVICE_LIBS ${OpenMP_CXX_LIBRARIES} )
if( OpenMP_CXX_INCLUDE_DIRS )
include_directories( ${OpenMP_CXX_INCLUDE_DIRS} )
endif()
else() else()
#workarounds #workarounds
if ( WALBERLA_CXX_COMPILER_IS_NEC ) if ( WALBERLA_CXX_COMPILER_IS_NEC )
......
from pystencils.field import fields from pystencils.field import fields
from lbmpy.advanced_streaming.utility import get_timesteps, Timestep from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil from lbmpy.stencils import get_stencil
from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_method, create_lb_update_rule from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
......
...@@ -86,7 +86,6 @@ using FlagField_T = FlagField< flag_t >; ...@@ -86,7 +86,6 @@ using FlagField_T = FlagField< flag_t >;
#if defined(WALBERLA_BUILD_WITH_CUDA) #if defined(WALBERLA_BUILD_WITH_CUDA)
typedef cuda::GPUField< real_t > GPUField; typedef cuda::GPUField< real_t > GPUField;
#endif #endif
// using CommScheme_T = cuda::communication::UniformGPUScheme<stencil::D2Q9>;
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
...@@ -185,7 +184,7 @@ int main(int argc, char** argv) ...@@ -185,7 +184,7 @@ int main(int argc, char** argv)
auto Comm_velocity_based_distributions = auto Comm_velocity_based_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0); make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
auto generatedPackInfo_velocity_based_distributions = auto generatedPackInfo_velocity_based_distributions =
make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu); make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field);
...@@ -193,7 +192,7 @@ int main(int argc, char** argv) ...@@ -193,7 +192,7 @@ int main(int argc, char** argv)
auto Comm_phase_field_distributions = auto Comm_phase_field_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0); make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
auto generatedPackInfo_phase_field_distributions = auto generatedPackInfo_phase_field_distributions =
make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field_gpu); make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions); Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
#else #else
...@@ -202,14 +201,14 @@ int main(int argc, char** argv) ...@@ -202,14 +201,14 @@ int main(int argc, char** argv)
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field);
auto generatedPackInfo_velocity_based_distributions = auto generatedPackInfo_velocity_based_distributions =
make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field); make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field);
Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field); Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field);
Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions); Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions);
blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks); blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks);
auto generatedPackInfo_phase_field_distributions = auto generatedPackInfo_phase_field_distributions =
make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field); make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field);
Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions); Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions);
#endif #endif
......
...@@ -5,11 +5,12 @@ from pystencils import AssignmentCollection ...@@ -5,11 +5,12 @@ from pystencils import AssignmentCollection
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
from lbmpy.stencils import get_stencil from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field
from lbmpy_walberla import generate_lb_pack_info
from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \ from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
initializer_kernel_hydro_lb, interface_tracking_force, \ initializer_kernel_hydro_lb, interface_tracking_force, \
hydrodynamic_force, get_collision_assignments_hydro hydrodynamic_force, get_collision_assignments_hydro, get_collision_assignments_phase
from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel
...@@ -52,6 +53,7 @@ w_c = 1.0 / (0.5 + (3.0 * M)) ...@@ -52,6 +53,7 @@ w_c = 1.0 / (0.5 + (3.0 * M))
u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx') u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx')
# phase-field # phase-field
C = fields(f"phase_field: [{dimensions}D]", layout='fzyx') C = fields(f"phase_field: [{dimensions}D]", layout='fzyx')
C_tmp = fields(f"phase_field_tmp: [{dimensions}D]", layout='fzyx')
# phase-field distribution functions # phase-field distribution functions
h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx') h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx')
...@@ -88,32 +90,26 @@ h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W) ...@@ -88,32 +90,26 @@ h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
g_updates = initializer_kernel_hydro_lb(g, u, method_hydro) g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W)] force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))]
force_model_h = MultiphaseForceModel(force=force_h) force_model_h = MultiphaseForceModel(force=force_h)
force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force) force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force,
fd_stencil=get_stencil("D3Q27"))
h_tmp_symbol_list = [h_tmp.center(i) for i, _ in enumerate(stencil_phase)] force_model_g = MultiphaseForceModel(force=force_g, rho=density)
sum_h = np.sum(h_tmp_symbol_list[:])
#################### ####################
# LBM UPDATE RULES # # LBM UPDATE RULES #
#################### ####################
method_phase.set_force_model(force_model_h) phase_field_LB_step = get_collision_assignments_phase(lb_method=method_phase,
velocity_input=u,
output={'density': C_tmp},
force_model=force_model_h,
symbolic_fields={"symbolic_field": h,
"symbolic_temporary_field": h_tmp},
kernel_type='stream_pull_collide')
phase_field_LB_step = create_lb_update_rule(lb_method=method_phase,
velocity_input=u,
compressible=True,
optimization={"symbolic_field": h,
"symbolic_temporary_field": h_tmp},
kernel_type='stream_pull_collide')
phase_field_LB_step.set_main_assignments_from_dict({**phase_field_LB_step.main_assignments_dict, **{C.center: sum_h}})
phase_field_LB_step = AssignmentCollection(main_assignments=phase_field_LB_step.main_assignments,
subexpressions=phase_field_LB_step.subexpressions)
phase_field_LB_step = sympy_cse(phase_field_LB_step) phase_field_LB_step = sympy_cse(phase_field_LB_step)
# --------------------------------------------------------------------------------------------------------- # ---------------------------------------------------------------------------------------------------------
...@@ -121,18 +117,12 @@ phase_field_LB_step = sympy_cse(phase_field_LB_step) ...@@ -121,18 +117,12 @@ phase_field_LB_step = sympy_cse(phase_field_LB_step)
hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro, hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro,
density=density, density=density,
velocity_input=u, velocity_input=u,
force=force_g, force_model=force_model_g,
sub_iterations=1, sub_iterations=2,
symbolic_fields={"symbolic_field": g, symbolic_fields={"symbolic_field": g,
"symbolic_temporary_field": g_tmp}, "symbolic_temporary_field": g_tmp},
kernel_type='collide_stream_push') kernel_type='collide_stream_push')
# streaming of the hydrodynamic distribution
stream_hydro = create_lb_update_rule(stencil=stencil_hydro,
optimization={"symbolic_field": g,
"symbolic_temporary_field": g_tmp},
kernel_type='stream_pull_only')
################### ###################
# GENERATE SWEEPS # # GENERATE SWEEPS #
################### ###################
...@@ -161,7 +151,7 @@ with CodeGeneration() as ctx: ...@@ -161,7 +151,7 @@ with CodeGeneration() as ctx:
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates) generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step, generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp)], field_swaps=[(h, h_tmp), (C, C_tmp)],
inner_outer_split=True, inner_outer_split=True,
cpu_vectorize_info=cpu_vec) cpu_vectorize_info=cpu_vec)
...@@ -171,12 +161,13 @@ with CodeGeneration() as ctx: ...@@ -171,12 +161,13 @@ with CodeGeneration() as ctx:
cpu_vectorize_info=cpu_vec) cpu_vectorize_info=cpu_vec)
# communication # communication
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions', generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
phase_field_LB_step.main_assignments, target='cpu') streaming_pattern='pull', target='cpu')
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
hydro_LB_step.all_assignments, target='cpu', kind='pull') generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions', streaming_pattern='push', target='cpu')
hydro_LB_step.all_assignments, target='cpu', kind='push')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='cpu')
ctx.write_file("GenDefines.h", info_header) ctx.write_file("GenDefines.h", info_header)
...@@ -187,7 +178,7 @@ with CodeGeneration() as ctx: ...@@ -187,7 +178,7 @@ with CodeGeneration() as ctx:
g_updates, target='gpu') g_updates, target='gpu')
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step, generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp)], field_swaps=[(h, h_tmp), (C, C_tmp)],
inner_outer_split=True, inner_outer_split=True,
target='gpu', target='gpu',
gpu_indexing_params=sweep_params, gpu_indexing_params=sweep_params,
...@@ -200,12 +191,13 @@ with CodeGeneration() as ctx: ...@@ -200,12 +191,13 @@ with CodeGeneration() as ctx:
gpu_indexing_params=sweep_params, gpu_indexing_params=sweep_params,
varying_parameters=vp) varying_parameters=vp)
# communication # communication
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions', generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
phase_field_LB_step.main_assignments, target='gpu') streaming_pattern='pull', target='gpu')
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
hydro_LB_step.all_assignments, target='gpu', kind='pull') generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions', streaming_pattern='push', target='gpu')
hydro_LB_step.all_assignments, target='gpu', kind='push')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='gpu')
ctx.write_file("GenDefines.h", info_header) ctx.write_file("GenDefines.h", info_header)
......
...@@ -4,49 +4,27 @@ waLBerla_link_files_to_builddir( "*.py" ) ...@@ -4,49 +4,27 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" ) waLBerla_link_files_to_builddir( "simulation_setup" )
foreach (config srt trt mrt smagorinsky entropic smagorinsky_noopt entropic_kbc_n4 foreach(streaming_pattern aa) # choose from {pull, push, aa, esotwist}
entropic_kbc_n4_noopt mrt_noopt mrt_full mrt_full_noopt foreach(stencil d3q27) # choose from {d3q19 d3q27}
cumulant cumulant_d3q27 foreach (collision_setup srt trt mrt cumulant) # choose from {srt trt mrt cumulant entropic smagorinsky}
srt_d3q27 mrt_d3q27 mrt_d3q27_noopt smagorinsky_d3q27 smagorinsky_d3q27_noopt mrt_full_d3q27 mrt_full_d3q27_noopt) set(config ${stencil}_${streaming_pattern}_${collision_setup})
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config} FILE UniformGridGPU.py
FILE UniformGridGPU.py CODEGEN_CFG ${config}
CODEGEN_CFG ${config} OUT_FILES UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
OUT_FILES UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h UniformGridGPU_PackInfoEven.cu UniformGridGPU_PackInfoEven.h
UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h UniformGridGPU_PackInfoOdd.cu UniformGridGPU_PackInfoOdd.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h UniformGridGPU_MacroSetter.cu UniformGridGPU_MacroSetter.h
UniformGridGPU_MacroSetter.cpp UniformGridGPU_MacroSetter.h UniformGridGPU_InfoHeader.h
UniformGridGPU_MacroGetter.cpp UniformGridGPU_MacroGetter.h )
UniformGridGPU_Defines.h
)
waLBerla_add_executable(NAME UniformGridGPU_${config}
FILES UniformGridGPU.cpp
waLBerla_add_executable(NAME UniformGridBenchmarkGPU_${config} DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk UniformGridGPUGenerated_${config})
FILES UniformGridGPU.cpp set_target_properties( UniformGridGPU_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_${config}) endforeach ()
set_target_properties( UniformGridBenchmarkGPU_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden) endforeach()
endforeach () endforeach()
\ No newline at end of file
foreach (config srt trt mrt smagorinsky entropic)
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_AA_${config}
FILE UniformGridGPU_AA.py
CODEGEN_CFG ${config}
OUT_FILES UniformGridGPU_AA_PackInfoPull.cu UniformGridGPU_AA_PackInfoPull.h
UniformGridGPU_AA_LbKernelOdd.cu UniformGridGPU_AA_LbKernelOdd.h
UniformGridGPU_AA_LbKernelEven.cu UniformGridGPU_AA_LbKernelEven.h
UniformGridGPU_AA_PackInfoPush.cu UniformGridGPU_AA_PackInfoPush.h
UniformGridGPU_AA_MacroSetter.cpp UniformGridGPU_AA_MacroSetter.h
UniformGridGPU_AA_MacroGetter.cpp UniformGridGPU_AA_MacroGetter.h
UniformGridGPU_AA_Defines.h
)
waLBerla_add_executable(NAME UniformGridBenchmarkGPU_AA_${config}
FILES UniformGridGPU_AA.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_AA_${config})
set_target_properties( UniformGridBenchmarkGPU_AA_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach ()
import sympy as sp import sympy as sp
import numpy as np import numpy as np
import pystencils as ps import pystencils as ps
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule, create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import generate_pack_info_from_kernel
from lbmpy_walberla import generate_lattice_model, generate_boundary
from pystencils_walberla import CodeGeneration, generate_sweep
from pystencils.data_types import TypedSymbol from pystencils.data_types import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_lb_pack_info, generate_alternating_lbm_boundary
omega = sp.symbols("omega") omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free") omega_free = sp.Symbol("omega_free")
omega_fill = sp.symbols("omega_:10")
compile_time_block_size = False compile_time_block_size = False
if compile_time_block_size: if compile_time_block_size:
...@@ -21,156 +24,158 @@ if compile_time_block_size: ...@@ -21,156 +24,158 @@ if compile_time_block_size:
else: else:
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32), sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32), TypedSymbol("cudaBlockSize1", np.int32),
1) TypedSymbol("cudaBlockSize2", np.int32))
sweep_params = {'block_size': sweep_block_size} gpu_indexing_params = {'block_size': sweep_block_size}
options_dict = { options_dict = {
'srt': { 'srt': {
'method': 'srt', 'method': 'srt',
'stencil': 'D3Q19',
'relaxation_rate': omega, 'relaxation_rate': omega,
'compressible': False, 'compressible': False,
}, },
'trt': { 'trt': {
'method': 'trt', 'method': 'trt',
'stencil': 'D3Q19',
'relaxation_rate': omega, 'relaxation_rate': omega,
}, },
'mrt': { 'mrt': {
'method': 'mrt', 'method': 'mrt',
'stencil': 'D3Q19', 'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
'relaxation_rates': [omega, 1.3, 1.4, 1.2, 1.1, 1.15, 1.234, 1.4235],
}, },
'mrt_full': { 'mrt-overrelax': {
'method': 'mrt', 'method': 'mrt',
'stencil': 'D3Q19', 'relaxation_rates': [omega, 1.3, 1.4, omega, 1.2, 1.1],
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2],
omega_fill[3], omega_fill[4], omega_fill[5]],
}, },
'entropic': { 'cumulant': {
'method': 'mrt', 'method': 'cumulant',
'stencil': 'D3Q19', 'relaxation_rate': omega,
'compressible': True, 'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free, omega_free],
'entropic': True,
}, },
'entropic_kbc_n4': { 'cumulant-overrelax': {
'method': 'trt-kbc-n4', 'method': 'cumulant',
'stencil': 'D3Q27', 'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
'compressible': True, 'compressible': True,
'relaxation_rates': [omega, omega_free], },
'entropic': {
'method': 'mrt',
'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free],
'entropic': True, 'entropic': True,
}, },
'smagorinsky': { 'smagorinsky': {
'method': 'srt', 'method': 'srt',
'stencil': 'D3Q19',
'smagorinsky': True, 'smagorinsky': True,
'relaxation_rate': omega, 'relaxation_rate': omega,
}, }
'cumulant': {
'method': 'cumulant',
'stencil': 'D3Q19',
'compressible': True,
'relaxation_rate': omega,
},
} }
info_header = """ info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}"; const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}"; const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global}; const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs}; const bool infoCsePdfs = {cse_pdfs};
""" """
# DEFAULTS
optimize = True
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
accessor = StreamPullTwoFieldsAccessor() config_tokens = ctx.config.split('_')
# accessor = StreamPushTwoFieldsAccessor()
assert not accessor.is_inplace, "This app does not work for inplace accessors" assert len(config_tokens) >= 3
stencil_str = config_tokens[0]
streaming_pattern = config_tokens[1]
collision_setup = config_tokens[2]
if len(config_tokens) >= 4:
optimize = (config_tokens[3] != 'noopt')
stencil = get_stencil(stencil_str)
assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"
options = options_dict[collision_setup]
q = len(stencil)
dim = len(stencil[0])
assert dim == 3, "This app supports only three-dimensional stencils"
pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : double[3D]", layout='fzyx')
common_options = { common_options = {
'field_name': 'pdfs', 'stencil': stencil,
'temporary_field_name': 'pdfs_tmp', 'field_name': pdfs.name,
'kernel_type': accessor, 'optimization': {
'optimization': {'cse_global': True, 'target': 'gpu',
'cse_pdfs': False} 'cse_global': True,
'cse_pdfs': False,
'symbolic_field': pdfs,
'field_layout': 'fzyx',
'gpu_indexing_params': gpu_indexing_params,
}
} }
config_name = ctx.config
noopt = False
d3q27 = False
if config_name.endswith("_noopt"):
noopt = True
config_name = config_name[:-len("_noopt")]
if config_name.endswith("_d3q27"):
d3q27 = True
config_name = config_name[:-len("_d3q27")]