Commit d48349d8 authored by Dominik Thoennes's avatar Dominik Thoennes
Browse files

Merge branch 'master' into thoennes/add-oneapi-22

parents b7838d4b 03b9f95f
......@@ -1011,11 +1011,18 @@ endif()
option ( WALBERLA_THREAD_SAFE_LOGGING "Enables/Disables thread-safe logging" ON )
if ( WALBERLA_BUILD_WITH_OPENMP )
if( APPLE AND EXISTS /opt/local/lib/libomp AND EXISTS /opt/local/include/libomp ) # find libomp from MacPorts
set( CMAKE_FRAMEWORK_PATH /opt/local/lib/libomp )
set( CMAKE_INCLUDE_PATH /opt/local/include/libomp )
endif()
find_package( OpenMP )
if (OpenMP_FOUND)
add_flag ( CMAKE_C_FLAGS "${OpenMP_C_FLAGS}" )
add_flag ( CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS}" )
list ( APPEND SERVICE_LIBS ${OpenMP_CXX_LIBRARIES} )
if( OpenMP_CXX_INCLUDE_DIRS )
include_directories( ${OpenMP_CXX_INCLUDE_DIRS} )
endif()
else()
#workarounds
if ( WALBERLA_CXX_COMPILER_IS_NEC )
......
from pystencils.field import fields
from lbmpy.advanced_streaming.utility import get_timesteps, Timestep
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_method, create_lb_update_rule
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
......
......@@ -86,7 +86,6 @@ using FlagField_T = FlagField< flag_t >;
#if defined(WALBERLA_BUILD_WITH_CUDA)
typedef cuda::GPUField< real_t > GPUField;
#endif
// using CommScheme_T = cuda::communication::UniformGPUScheme<stencil::D2Q9>;
int main(int argc, char** argv)
{
......@@ -185,7 +184,7 @@ int main(int argc, char** argv)
auto Comm_velocity_based_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
auto generatedPackInfo_velocity_based_distributions =
make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field);
......@@ -193,7 +192,7 @@ int main(int argc, char** argv)
auto Comm_phase_field_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
auto generatedPackInfo_phase_field_distributions =
make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
#else
......@@ -202,14 +201,14 @@ int main(int argc, char** argv)
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field);
auto generatedPackInfo_velocity_based_distributions =
make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field);
make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field);
Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field);
Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions);
blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks);
auto generatedPackInfo_phase_field_distributions =
make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field);
make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field);
Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions);
#endif
......
......@@ -5,11 +5,12 @@ from pystencils import AssignmentCollection
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel
from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field
from lbmpy_walberla import generate_lb_pack_info
from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
initializer_kernel_hydro_lb, interface_tracking_force, \
hydrodynamic_force, get_collision_assignments_hydro
hydrodynamic_force, get_collision_assignments_hydro, get_collision_assignments_phase
from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel
......@@ -52,6 +53,7 @@ w_c = 1.0 / (0.5 + (3.0 * M))
u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx')
# phase-field
C = fields(f"phase_field: [{dimensions}D]", layout='fzyx')
C_tmp = fields(f"phase_field_tmp: [{dimensions}D]", layout='fzyx')
# phase-field distribution functions
h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx')
......@@ -88,32 +90,26 @@ h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W)]
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))]
force_model_h = MultiphaseForceModel(force=force_h)
force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force)
force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force,
fd_stencil=get_stencil("D3Q27"))
h_tmp_symbol_list = [h_tmp.center(i) for i, _ in enumerate(stencil_phase)]
sum_h = np.sum(h_tmp_symbol_list[:])
force_model_g = MultiphaseForceModel(force=force_g, rho=density)
####################
# LBM UPDATE RULES #
####################
method_phase.set_force_model(force_model_h)
phase_field_LB_step = get_collision_assignments_phase(lb_method=method_phase,
velocity_input=u,
output={'density': C_tmp},
force_model=force_model_h,
symbolic_fields={"symbolic_field": h,
"symbolic_temporary_field": h_tmp},
kernel_type='stream_pull_collide')
phase_field_LB_step = create_lb_update_rule(lb_method=method_phase,
velocity_input=u,
compressible=True,
optimization={"symbolic_field": h,
"symbolic_temporary_field": h_tmp},
kernel_type='stream_pull_collide')
phase_field_LB_step.set_main_assignments_from_dict({**phase_field_LB_step.main_assignments_dict, **{C.center: sum_h}})
phase_field_LB_step = AssignmentCollection(main_assignments=phase_field_LB_step.main_assignments,
subexpressions=phase_field_LB_step.subexpressions)
phase_field_LB_step = sympy_cse(phase_field_LB_step)
# ---------------------------------------------------------------------------------------------------------
......@@ -121,18 +117,12 @@ phase_field_LB_step = sympy_cse(phase_field_LB_step)
hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro,
density=density,
velocity_input=u,
force=force_g,
sub_iterations=1,
force_model=force_model_g,
sub_iterations=2,
symbolic_fields={"symbolic_field": g,
"symbolic_temporary_field": g_tmp},
kernel_type='collide_stream_push')
# streaming of the hydrodynamic distribution
stream_hydro = create_lb_update_rule(stencil=stencil_hydro,
optimization={"symbolic_field": g,
"symbolic_temporary_field": g_tmp},
kernel_type='stream_pull_only')
###################
# GENERATE SWEEPS #
###################
......@@ -161,7 +151,7 @@ with CodeGeneration() as ctx:
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp)],
field_swaps=[(h, h_tmp), (C, C_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec)
......@@ -171,12 +161,13 @@ with CodeGeneration() as ctx:
cpu_vectorize_info=cpu_vec)
# communication
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions',
phase_field_LB_step.main_assignments, target='cpu')
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
hydro_LB_step.all_assignments, target='cpu', kind='pull')
generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions',
hydro_LB_step.all_assignments, target='cpu', kind='push')
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target='cpu')
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='push', target='cpu')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='cpu')
ctx.write_file("GenDefines.h", info_header)
......@@ -187,7 +178,7 @@ with CodeGeneration() as ctx:
g_updates, target='gpu')
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp)],
field_swaps=[(h, h_tmp), (C, C_tmp)],
inner_outer_split=True,
target='gpu',
gpu_indexing_params=sweep_params,
......@@ -200,12 +191,13 @@ with CodeGeneration() as ctx:
gpu_indexing_params=sweep_params,
varying_parameters=vp)
# communication
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions',
phase_field_LB_step.main_assignments, target='gpu')
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
hydro_LB_step.all_assignments, target='gpu', kind='pull')
generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions',
hydro_LB_step.all_assignments, target='gpu', kind='push')
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target='gpu')
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='push', target='gpu')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='gpu')
ctx.write_file("GenDefines.h", info_header)
......
......@@ -4,49 +4,27 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
foreach (config srt trt mrt smagorinsky entropic smagorinsky_noopt entropic_kbc_n4
entropic_kbc_n4_noopt mrt_noopt mrt_full mrt_full_noopt
cumulant cumulant_d3q27
srt_d3q27 mrt_d3q27 mrt_d3q27_noopt smagorinsky_d3q27 smagorinsky_d3q27_noopt mrt_full_d3q27 mrt_full_d3q27_noopt)
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
FILE UniformGridGPU.py
CODEGEN_CFG ${config}
OUT_FILES UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h
UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
UniformGridGPU_MacroSetter.cpp UniformGridGPU_MacroSetter.h
UniformGridGPU_MacroGetter.cpp UniformGridGPU_MacroGetter.h
UniformGridGPU_Defines.h
)
waLBerla_add_executable(NAME UniformGridBenchmarkGPU_${config}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_${config})
set_target_properties( UniformGridBenchmarkGPU_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach ()
foreach (config srt trt mrt smagorinsky entropic)
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_AA_${config}
FILE UniformGridGPU_AA.py
CODEGEN_CFG ${config}
OUT_FILES UniformGridGPU_AA_PackInfoPull.cu UniformGridGPU_AA_PackInfoPull.h
UniformGridGPU_AA_LbKernelOdd.cu UniformGridGPU_AA_LbKernelOdd.h
UniformGridGPU_AA_LbKernelEven.cu UniformGridGPU_AA_LbKernelEven.h
UniformGridGPU_AA_PackInfoPush.cu UniformGridGPU_AA_PackInfoPush.h
UniformGridGPU_AA_MacroSetter.cpp UniformGridGPU_AA_MacroSetter.h
UniformGridGPU_AA_MacroGetter.cpp UniformGridGPU_AA_MacroGetter.h
UniformGridGPU_AA_Defines.h
)
waLBerla_add_executable(NAME UniformGridBenchmarkGPU_AA_${config}
FILES UniformGridGPU_AA.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_AA_${config})
set_target_properties( UniformGridBenchmarkGPU_AA_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach ()
foreach(streaming_pattern aa) # choose from {pull, push, aa, esotwist}
foreach(stencil d3q27) # choose from {d3q19 d3q27}
foreach (collision_setup srt trt mrt cumulant) # choose from {srt trt mrt cumulant entropic smagorinsky}
set(config ${stencil}_${streaming_pattern}_${collision_setup})
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
FILE UniformGridGPU.py
CODEGEN_CFG ${config}
OUT_FILES UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_PackInfoEven.cu UniformGridGPU_PackInfoEven.h
UniformGridGPU_PackInfoOdd.cu UniformGridGPU_PackInfoOdd.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_MacroSetter.cu UniformGridGPU_MacroSetter.h
UniformGridGPU_InfoHeader.h
)
waLBerla_add_executable(NAME UniformGridGPU_${config}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk UniformGridGPUGenerated_${config})
set_target_properties( UniformGridGPU_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach ()
endforeach()
endforeach()
\ No newline at end of file
import sympy as sp
import numpy as np
import pystencils as ps
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule, create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import generate_pack_info_from_kernel
from lbmpy_walberla import generate_lattice_model, generate_boundary
from pystencils_walberla import CodeGeneration, generate_sweep
from pystencils.data_types import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_lb_pack_info, generate_alternating_lbm_boundary
omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free")
omega_fill = sp.symbols("omega_:10")
compile_time_block_size = False
if compile_time_block_size:
......@@ -21,156 +24,158 @@ if compile_time_block_size:
else:
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32),
1)
TypedSymbol("cudaBlockSize2", np.int32))
sweep_params = {'block_size': sweep_block_size}
gpu_indexing_params = {'block_size': sweep_block_size}
options_dict = {
'srt': {
'method': 'srt',
'stencil': 'D3Q19',
'relaxation_rate': omega,
'compressible': False,
},
'trt': {
'method': 'trt',
'stencil': 'D3Q19',
'relaxation_rate': omega,
},
'mrt': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega, 1.3, 1.4, 1.2, 1.1, 1.15, 1.234, 1.4235],
'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
},
'mrt_full': {
'mrt-overrelax': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2],
omega_fill[3], omega_fill[4], omega_fill[5]],
'relaxation_rates': [omega, 1.3, 1.4, omega, 1.2, 1.1],
},
'entropic': {
'method': 'mrt',
'stencil': 'D3Q19',
'cumulant': {
'method': 'cumulant',
'relaxation_rate': omega,
'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free, omega_free],
'entropic': True,
},
'entropic_kbc_n4': {
'method': 'trt-kbc-n4',
'stencil': 'D3Q27',
'cumulant-overrelax': {
'method': 'cumulant',
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
'compressible': True,
'relaxation_rates': [omega, omega_free],
},
'entropic': {
'method': 'mrt',
'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free],
'entropic': True,
},
'smagorinsky': {
'method': 'srt',
'stencil': 'D3Q19',
'smagorinsky': True,
'relaxation_rate': omega,
},
'cumulant': {
'method': 'cumulant',
'stencil': 'D3Q19',
'compressible': True,
'relaxation_rate': omega,
},
}
}
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
# DEFAULTS
optimize = True
with CodeGeneration() as ctx:
accessor = StreamPullTwoFieldsAccessor()
# accessor = StreamPushTwoFieldsAccessor()
assert not accessor.is_inplace, "This app does not work for inplace accessors"
config_tokens = ctx.config.split('_')
assert len(config_tokens) >= 3
stencil_str = config_tokens[0]
streaming_pattern = config_tokens[1]
collision_setup = config_tokens[2]
if len(config_tokens) >= 4:
optimize = (config_tokens[3] != 'noopt')
stencil = get_stencil(stencil_str)
assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"
options = options_dict[collision_setup]
q = len(stencil)
dim = len(stencil[0])
assert dim == 3, "This app supports only three-dimensional stencils"
pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : double[3D]", layout='fzyx')
common_options = {
'field_name': 'pdfs',
'temporary_field_name': 'pdfs_tmp',
'kernel_type': accessor,
'optimization': {'cse_global': True,
'cse_pdfs': False}
'stencil': stencil,
'field_name': pdfs.name,
'optimization': {
'target': 'gpu',
'cse_global': True,
'cse_pdfs': False,
'symbolic_field': pdfs,
'field_layout': 'fzyx',
'gpu_indexing_params': gpu_indexing_params,
}
}
config_name = ctx.config
noopt = False
d3q27 = False
if config_name.endswith("_noopt"):
noopt = True
config_name = config_name[:-len("_noopt")]
if config_name.endswith("_d3q27"):
d3q27 = True
config_name = config_name[:-len("_d3q27")]
options = options_dict[config_name]
options.update(common_options)
options = options.copy()
if noopt:
options['optimization']['cse_global'] = False
options['optimization']['cse_pdfs'] = False
if d3q27:
options['stencil'] = 'D3Q27'
options.update(common_options)
stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q') + 1:])
pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
options['optimization']['symbolic_field'] = pdfs
if not is_inplace(streaming_pattern):
options['optimization']['symbolic_temporary_field'] = pdfs_tmp
field_swaps = [(pdfs, pdfs_tmp)]
else:
field_swaps = []
vp = [
('double', 'omega_0'),
('double', 'omega_1'),
('double', 'omega_2'),
('double', 'omega_3'),
('double', 'omega_4'),
('double', 'omega_5'),
('double', 'omega_6'),
('int32_t', 'cudaBlockSize0'),
('int32_t', 'cudaBlockSize1'),
('int32_t', 'cudaBlockSize2')
]
lb_method = create_lb_method(**options)
update_rule = create_lb_update_rule(lb_method=lb_method, **options)
if not noopt:
update_rule = insert_fast_divisions(update_rule)
update_rule = insert_fast_sqrts(update_rule)
# CPU lattice model - required for macroscopic value computation, VTK output etc.
options_without_opt = options.copy()
del options_without_opt['optimization']
generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', create_lb_collision_rule(lb_method=lb_method,
**options_without_opt))
# gpu LB sweep & boundaries
generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule,
field_swaps=[('pdfs', 'pdfs_tmp')],
inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params,
varying_parameters=vp)
generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu')
generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu')
# LB Sweep
collision_rule = create_lb_collision_rule(**options)
if optimize:
collision_rule = insert_fast_divisions(collision_rule)
collision_rule = insert_fast_sqrts(collision_rule)
lb_method = collision_rule.method
generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, streaming_pattern,
optimization=options['optimization'],
inner_outer_split=True, varying_parameters=vp, field_swaps=field_swaps)
# getter & setter
setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1.0)
getter_assignments = macroscopic_values_getter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments)
generate_sweep(ctx, 'UniformGridGPU_MacroGetter', getter_assignments)
setter_assignments = macroscopic_values_setter(lb_method, density=1.0, velocity=velocity_field.center_vector,
pdfs=pdfs,
streaming_pattern=streaming_pattern,
previous_timestep=Timestep.EVEN)
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target='gpu')
# Boundaries
noslip = NoSlip()
ubb = UBB((0.05, 0, 0))
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_UBB', ubb, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
# communication
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_PackInfo', update_rule, target='gpu')
generate_lb_pack_info(ctx, 'UniformGridGPU_PackInfo', stencil, pdfs,
streaming_pattern=streaming_pattern, target='gpu',
always_generate_separate_classes=True)
infoHeaderParams = {
'stencil': stencil_str,
'q': q,
'configName': ctx.config,
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(options['optimization']['cse_global']),
'cse_pdfs': int(options['optimization']['cse_pdfs']),
}
ctx.write_file("UniformGridGPU_Defines.h", info_header.format(**infoHeaderParams))
stencil_typedefs = {'Stencil_T': stencil,
'CommunicationStencil_T': stencil}
field_typedefs = {'PdfField_T': pdfs,
'VelocityField_T': velocity_field}
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'UniformGridGPU_InfoHeader',
stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))