diff --git a/AUTHORS.txt b/AUTHORS.txt index b0dba6103dfb10677721c3b62fba9269f2f5ec72..2f956f1b169b71a921ce888d84a0d7f915891f51 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -13,6 +13,7 @@ Dominik Bartuschat Ehsan Fattahi Felix Winterhalter Florian Schornbaum +Frederik Hennig Grigorii Drozdov Helen Schottenhamml Igor Ostanin diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py index c1371278a713e88e186b28346c4c314813dd6347..1a7973f6d6114d375e05d8eccd27d5d4764615e3 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py @@ -69,11 +69,10 @@ options_dict = { 'relaxation_rate': omega, }, 'cumulant': { + 'method': 'cumulant', 'stencil': 'D3Q19', 'compressible': True, - 'method': 'mrt', - 'cumulant': True, - 'relaxation_rates': [0, omega, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + 'relaxation_rate': omega, }, } diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py index b9ca24ba16f406ef37e5f51349068878295edbc3..fc587c44546c21ecf4b33227dcee108cbe1d46c5 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py @@ -55,11 +55,10 @@ options_dict = { 'relaxation_rate': omega, }, 'cumulant': { + 'method': 'cumulant', 'stencil': 'D3Q19', 'compressible': True, - 'method': 'mrt', - 'cumulant': True, - 'relaxation_rates': [0, omega, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + 'relaxation_rate': omega, }, } diff --git a/apps/pythonmodule/PythonModule.cpp b/apps/pythonmodule/PythonModule.cpp index b32f103ddda25f9590a6f2d87ee1cae30bc2e47b..3059e3f059e2fa110dda1681859693cb6616fc44 100644 --- a/apps/pythonmodule/PythonModule.cpp +++ b/apps/pythonmodule/PythonModule.cpp @@ -40,11 +40,13 @@ using namespace walberla; Field<walberla::real_t,1>,\ Field<walberla::real_t,2>,\ Field<walberla::real_t,3>,\ + Field<walberla::real_t,4>,\ Field<walberla::real_t,9>,\ Field<walberla::real_t,15>,\ Field<walberla::real_t,19>,\ Field<walberla::real_t,27>,\ Field<walberla::int8_t,1>,\ + Field<walberla::int32_t,1>,\ Field<walberla::int64_t,1>,\ Field<walberla::int64_t,2>,\ Field<walberla::int64_t,3>,\ diff --git a/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt index 0e8f653f7601a82c69862e8631a4b5d033cce38f..adacc326397db495352f958afa90fb26ae63e5d4 100644 --- a/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt +++ b/apps/showcases/PhaseFieldAllenCahn/CPU/CMakeLists.txt @@ -17,5 +17,4 @@ waLBerla_add_executable(NAME multiphaseCPU FILES multiphase.cpp PythonExports.cpp InitializerFunctions.cpp contact.cpp CalculateNormals.cpp multiphase_codegen.py DEPENDS blockforest core field postprocessing lbm geometry timeloop gui PhaseFieldCodeGenCPU) - - +set_target_properties(multiphaseCPU PROPERTIES CXX_VISIBILITY_PRESET hidden) diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt index c65a87749aee1bfe721d738b1cb7aac751c29648..51c6922032a44744f513890ac6642185930ba3af 100644 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/CMakeLists.txt @@ -20,5 +20,4 @@ waLBerla_add_executable(NAME multiphaseGPU FILES multiphase.cpp PythonExports.cpp InitializerFunctions.cpp CalculateNormals.cpp contact.cu multiphase_codegen.py DEPENDS blockforest core cuda field postprocessing lbm geometry timeloop gui PhaseFieldCodeGenGPU) - - +set_target_properties(multiphaseGPU PROPERTIES CXX_VISIBILITY_PRESET hidden) diff --git a/apps/tutorials/codegen/03_AdvancedLBMCodegen.py b/apps/tutorials/codegen/03_AdvancedLBMCodegen.py index a397909805f5fb18f1f1d3a9235717219d7bdc7d..65a0602f2a4ff2916c87661a38bc5b90c262cec6 100644 --- a/apps/tutorials/codegen/03_AdvancedLBMCodegen.py +++ b/apps/tutorials/codegen/03_AdvancedLBMCodegen.py @@ -36,9 +36,8 @@ optimization = {'cse_global': True, # ================== lbm_params = {'stencil': stencil, - 'method': 'mrt_raw', - 'relaxation_rates': [0, 0, 0, omega, omega, omega, 1, 1, 1], - 'cumulant': True, + 'method': 'cumulant', + 'relaxation_rate': omega, 'compressible': True} lbm_update_rule = create_lb_update_rule(optimization=optimization, diff --git a/python/lbmpy_walberla/__init__.py b/python/lbmpy_walberla/__init__.py index f7b022ab058e6a83a05aafa5fa586fb59d8551fd..1a7b136a7935d13be6c47d8a101e9efc4c631f56 100644 --- a/python/lbmpy_walberla/__init__.py +++ b/python/lbmpy_walberla/__init__.py @@ -1,4 +1,5 @@ from .boundary import generate_boundary from .walberla_lbm_generation import RefinementScaling, generate_lattice_model +from .packinfo import generate_lb_pack_info -__all__ = ['generate_lattice_model', 'RefinementScaling', 'generate_boundary'] +__all__ = ['generate_lattice_model', 'RefinementScaling', 'generate_boundary', 'generate_lb_pack_info'] diff --git a/python/lbmpy_walberla/additional_data_handler.py b/python/lbmpy_walberla/additional_data_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..804c4953e2bd31f8b2cc905c62663e99bf26476b --- /dev/null +++ b/python/lbmpy_walberla/additional_data_handler.py @@ -0,0 +1,127 @@ +from pystencils.stencil import inverse_direction + +from lbmpy.advanced_streaming import AccessPdfValues, numeric_offsets, numeric_index +from lbmpy.boundaries import ExtrapolationOutflow, UBB + +from pystencils_walberla.additional_data_handler import AdditionalDataHandler + + +class UBBAdditionalDataHandler(AdditionalDataHandler): + def __init__(self, stencil, boundary_object): + assert isinstance(boundary_object, UBB) + self._boundary_object = boundary_object + super(UBBAdditionalDataHandler, self).__init__(stencil=stencil) + + @property + def constructor_arguments(self): + return ", std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)>& " \ + "velocityCallback " + + @property + def initialiser_list(self): + return "elementInitaliser(velocityCallback)," + + @property + def additional_arguments_for_fill_function(self): + return "blocks, " + + @property + def additional_parameters_for_fill_function(self): + return " const shared_ptr<StructuredBlockForest> &blocks, " + + def data_initialisation(self, direction): + init_list = ["Vector3<real_t> InitialisatonAdditionalData = elementInitaliser(Cell(it.x(), it.y(), it.z()), " + "blocks, *block);", "element.vel_0 = InitialisatonAdditionalData[0];", + "element.vel_1 = InitialisatonAdditionalData[1];"] + if self._dim == 3: + init_list.append("element.vel_2 = InitialisatonAdditionalData[2];") + + return "\n".join(init_list) + + @property + def additional_member_variable(self): + return "std::function<Vector3<real_t>(const Cell &, const shared_ptr<StructuredBlockForest>&, IBlock&)> " \ + "elementInitaliser; " + + +class OutflowAdditionalDataHandler(AdditionalDataHandler): + def __init__(self, stencil, boundary_object, target='cpu', field_name='pdfs'): + assert isinstance(boundary_object, ExtrapolationOutflow) + self._boundary_object = boundary_object + self._stencil = boundary_object.stencil + self._lb_method = boundary_object.lb_method + self._normal_direction = boundary_object.normal_direction + self._field_name = field_name + self._target = target + super(OutflowAdditionalDataHandler, self).__init__(stencil=stencil) + + assert sum([a != 0 for a in self._normal_direction]) == 1,\ + "The outflow boundary is only implemented for straight walls at the moment." + + @property + def constructor_arguments(self): + return f", BlockDataID {self._field_name}CPUID_" if self._target == 'gpu' else "" + + @property + def initialiser_list(self): + return f"{self._field_name}CPUID({self._field_name}CPUID_)," if self._target == 'gpu' else "" + + @property + def additional_field_data(self): + identifier = "CPU" if self._target == "gpu" else "" + return f"auto {self._field_name} = block->getData< field::GhostLayerField<double, " \ + f"{len(self._stencil)}> >({self._field_name}{identifier}ID); " + + def data_initialisation(self, direction_index): + pdf_acc = AccessPdfValues(self._boundary_object.stencil, + streaming_pattern=self._boundary_object.streaming_pattern, + timestep=self._boundary_object.zeroth_timestep, + streaming_dir='out') + + init_list = [] + for key, value in self.get_init_dict(pdf_acc, direction_index).items(): + init_list.append(f"element.{key} = {self._field_name}->get({value});") + + return "\n".join(init_list) + + @property + def additional_member_variable(self): + return f"BlockDataID {self._field_name}CPUID;" + + @property + def stencil_info(self): + stencil_info = [] + for i, d in enumerate(self._stencil): + if any([a != 0 and b != 0 and a == b for a, b in zip(self._normal_direction, d)]): + direction = d if self._dim == 3 else d + (0,) + stencil_info.append((i, direction, ", ".join([str(e) for e in direction]))) + return stencil_info + + def get_init_dict(self, pdf_accessor, direction_index): + """The Extrapolation Outflow boundary needs additional data. This function provides a list of all values + which have to be initialised""" + position = ["it.x()", "it.y()", "it.z()"] + direction = self._stencil[direction_index] + inv_dir = self._stencil.index(inverse_direction(direction)) + + tangential_offset = tuple(offset - normal for offset, normal in zip(direction, self._normal_direction)) + + result = {} + pos = [] + offsets = numeric_offsets(pdf_accessor.accs[inv_dir]) + for p, o, t in zip(position, offsets, tangential_offset): + pos.append(p + f" + cell_idx_c({str(o + t)})") + if self._dim == 2: + pos.append("0") + pos.append(str(numeric_index(pdf_accessor.accs[inv_dir])[0])) + result[f'pdf'] = ', '.join(pos) + + pos = [] + for p, o, t in zip(position, offsets, tangential_offset): + pos.append(p + f" + cell_idx_c({str(o + t)})") + if self._dim == 2: + pos.append("0") + pos.append(str(numeric_index(pdf_accessor.accs[inv_dir])[0])) + result[f'pdf_nd'] = ', '.join(pos) + + return result diff --git a/python/lbmpy_walberla/boundary.py b/python/lbmpy_walberla/boundary.py index 8f751f5296d74923b58c57c35da1fd97b0df82e4..b0dafb7b3b9835073101acfa33a611c71c62095d 100644 --- a/python/lbmpy_walberla/boundary.py +++ b/python/lbmpy_walberla/boundary.py @@ -1,5 +1,6 @@ import pystencils_walberla.boundary from lbmpy.boundaries.boundaryhandling import create_lattice_boltzmann_boundary_kernel +from lbmpy.advanced_streaming import Timestep, is_inplace def generate_boundary(generation_context, @@ -7,15 +8,29 @@ def generate_boundary(generation_context, boundary_object, lb_method, field_name='pdfs', + streaming_pattern='pull', + always_generate_separate_classes=False, + additional_data_handler=None, **create_kernel_params): - def boundary_creation_function(field, index_field, stencil, boundary_functor, target='cpu', openmp=True, **kwargs): - return create_lattice_boltzmann_boundary_kernel(field, - index_field, - lb_method, - boundary_functor, - target=target, - **kwargs) + pargs = (field, index_field, lb_method, boundary_functor) + kwargs = {'target': target, **kwargs} + if is_inplace(streaming_pattern) or always_generate_separate_classes: + return { + 'EvenSweep': create_lattice_boltzmann_boundary_kernel(*pargs, + streaming_pattern=streaming_pattern, + prev_timestep=Timestep.EVEN, + **kwargs), + 'OddSweep': create_lattice_boltzmann_boundary_kernel(*pargs, + streaming_pattern=streaming_pattern, + prev_timestep=Timestep.ODD, + **kwargs) + } + else: + return create_lattice_boltzmann_boundary_kernel(*pargs, + streaming_pattern=streaming_pattern, + prev_timestep=Timestep.BOTH, + **kwargs) pystencils_walberla.boundary.generate_boundary(generation_context, class_name, @@ -25,4 +40,5 @@ def generate_boundary(generation_context, index_shape=[len(lb_method.stencil)], kernel_creation_function=boundary_creation_function, namespace='lbm', + additional_data_handler=additional_data_handler, **create_kernel_params) diff --git a/python/lbmpy_walberla/packinfo.py b/python/lbmpy_walberla/packinfo.py new file mode 100644 index 0000000000000000000000000000000000000000..54a4e675526ec1afb91ff24783397d954faadd17 --- /dev/null +++ b/python/lbmpy_walberla/packinfo.py @@ -0,0 +1,82 @@ +from collections import defaultdict +from lbmpy.advanced_streaming.utility import Timestep, get_accessor, get_timesteps +from lbmpy.advanced_streaming.communication import _extend_dir +from pystencils.stencil import inverse_direction +from pystencils_walberla.codegen import comm_directions, generate_pack_info +from pystencils import Assignment, Field + + +def generate_lb_pack_info(generation_context, + class_name_prefix: str, + stencil, + pdf_field, + streaming_pattern='pull', + lb_collision_rule=None, + always_generate_separate_classes=False, + namespace='lbm', + **create_kernel_params): + """Generates waLBerla MPI PackInfos for an LBM kernel, based on a given method + and streaming pattern. For in-place streaming patterns, two PackInfos are generated; + one for the even and another for the odd time steps. + + Args: + generation_context: see documentation of `generate_sweep` + class_name_prefix: Prefix of the desired class name which will be extended with + 'Even' or 'Odd' for in-place kernels + stencil: The tuple of directions specifying the employed LB stencil. + pdf_field: pdf field for which the pack info is created + streaming_pattern: The employed streaming pattern. + lb_collision_rule: Optional. The collision rule defining the lattice boltzmann kernel, as returned + by `create_lb_collision_rule`. If specified, it will be scanned for non-local + accesses to other fields other than the PDF fields (as might be required for + computing gradients in coupled simulations), whose communication will then + be included in the PackInfo. + always_generate_separate_classes: If True, generate a pair of Even/Odd PackInfos even for a two- + fields kernel (i.e. the pull/push patterns). Otherwise, for two-fields + kernels, only one PackInfo class will be generated without a + suffix to its name. + namespace: inner namespace of the generated class + **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` + """ + timesteps = [Timestep.EVEN, Timestep.ODD] \ + if always_generate_separate_classes \ + else get_timesteps(streaming_pattern) + + common_spec = defaultdict(set) + + if lb_collision_rule is not None: + assignments = lb_collision_rule.all_assignments + reads = set() + for a in assignments: + if not isinstance(a, Assignment): + continue + reads.update(a.rhs.atoms(Field.Access)) + for fa in reads: + assert all(abs(e) <= 1 for e in fa.offsets) + if all(offset == 0 for offset in fa.offsets): + continue + comm_direction = inverse_direction(fa.offsets) + for comm_dir in comm_directions(comm_direction): + common_spec[(comm_dir,)].add(fa.field.center(*fa.index)) + + for t in timesteps: + spec = common_spec.copy() + write_accesses = get_accessor(streaming_pattern, t).write(pdf_field, stencil) + for comm_dir in stencil: + if all(d == 0 for d in comm_dir): + continue + + for streaming_dir in set(_extend_dir(comm_dir)) & set(stencil): + d = stencil.index(streaming_dir) + fa = write_accesses[d] + spec[(comm_dir,)].add(fa) + + if t == Timestep.EVEN: + class_name_suffix = 'Even' + elif t == Timestep.ODD: + class_name_suffix = 'Odd' + else: + class_name_suffix = '' + + class_name = class_name_prefix + class_name_suffix + generate_pack_info(generation_context, class_name, spec, namespace=namespace, **create_kernel_params) diff --git a/python/lbmpy_walberla/walberla_lbm_generation.py b/python/lbmpy_walberla/walberla_lbm_generation.py index a7563b76c50984da4893d7a4ce471d26794098db..82867d8b803ae5e2753d3286f8246b188ce4a5a1 100644 --- a/python/lbmpy_walberla/walberla_lbm_generation.py +++ b/python/lbmpy_walberla/walberla_lbm_generation.py @@ -6,7 +6,6 @@ from jinja2 import Environment, PackageLoader, StrictUndefined, Template from sympy.tensor import IndexedBase import pystencils as ps -# from lbmpy.creationfunctions import create_lb_update_rule, update_with_default_parameters from lbmpy.fieldaccess import CollideOnlyInplaceAccessor, StreamPullTwoFieldsAccessor from lbmpy.relaxationrates import relaxation_rate_scaling from lbmpy.stencils import get_stencil diff --git a/python/pystencils_walberla/additional_data_handler.py b/python/pystencils_walberla/additional_data_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..7abe7d5aa0365622fc3b870787c64810c52159e4 --- /dev/null +++ b/python/pystencils_walberla/additional_data_handler.py @@ -0,0 +1,55 @@ +from pystencils.stencil import inverse_direction + + +class AdditionalDataHandler: + """Base class that defines how to handle boundary conditions holding additional data.""" + + def __init__(self, stencil): + self._dim = len(stencil[0]) + + # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D + if self._dim == 2: + self._walberla_stencil = () + for d in stencil: + d = d + (0,) + self._walberla_stencil = self._walberla_stencil + (d,) + else: + self._walberla_stencil = stencil + + @property + def constructor_arguments(self): + return "" + + @property + def initialiser_list(self): + return "" + + @property + def additional_arguments_for_fill_function(self): + return "" + + @property + def additional_parameters_for_fill_function(self): + return "" + + @property + def additional_field_data(self): + return "" + + def data_initialisation(self, direction_index): + return "" + + @property + def additional_member_variable(self): + return "" + + @property + def stencil_info(self): + return [(i, d, ", ".join([str(e) for e in d])) for i, d in enumerate(self._walberla_stencil)] + + @property + def inverse_directions(self): + inv_dirs = [] + for direction in self._walberla_stencil: + inv_dirs.append(self._walberla_stencil.index(inverse_direction(direction))) + return inv_dirs diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py index 73d429ec57589b12b63da813df28574044f9f846..0d251236c0f9400bc2aea424e775aa1eb6c7e5a2 100644 --- a/python/pystencils_walberla/boundary.py +++ b/python/pystencils_walberla/boundary.py @@ -1,3 +1,4 @@ +from collections import OrderedDict import numpy as np from jinja2 import Environment, PackageLoader, StrictUndefined from pystencils import Field, FieldType @@ -8,6 +9,7 @@ from pystencils.boundaries.createindexlist import ( from pystencils.data_types import TypedSymbol, create_type from pystencils_walberla.codegen import KernelInfo, default_create_kernel_parameters from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env +from pystencils_walberla.additional_data_handler import AdditionalDataHandler def generate_boundary(generation_context, @@ -20,7 +22,12 @@ def generate_boundary(generation_context, kernel_creation_function=None, target='cpu', namespace='pystencils', + additional_data_handler=None, **create_kernel_params): + + if boundary_object.additional_data and additional_data_handler is None: + raise ValueError("Boundary object has additional data but you have not provided an AdditionalDataHandler.") + struct_name = "IndexInfo" boundary_object.name = class_name dim = len(neighbor_stencil[0]) @@ -44,36 +51,44 @@ def generate_boundary(generation_context, if not kernel_creation_function: kernel_creation_function = create_boundary_kernel - kernel = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, **create_kernel_params) - kernel.function_name = "boundary_" + boundary_object.name - kernel.assumed_inner_stride_one = False - - # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D - if dim == 2: - stencil = () - for d in neighbor_stencil: - d = d + (0,) - stencil = stencil + (d,) + kernels = kernel_creation_function(field, index_field, neighbor_stencil, boundary_object, **create_kernel_params) + if isinstance(kernels, dict): + sweep_to_kernel_info_dict = OrderedDict() + dummy_kernel_info = None + for sweep_class, sweep_kernel in kernels.items(): + sweep_kernel.function_name = "boundary_" + boundary_object.name + '_' + sweep_class + sweep_kernel.assumed_inner_stride_one = False + kernel_info = KernelInfo(sweep_kernel) + sweep_to_kernel_info_dict[sweep_class] = kernel_info + if dummy_kernel_info is None: + dummy_kernel_info = kernel_info + # elif not dummy_kernel_info.has_same_interface(kernel_info): + # raise ValueError("Multiple boundary sweeps must have the same kernel interface!") + multi_sweep = True else: - stencil = neighbor_stencil + multi_sweep = False + kernel = kernels + kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False + kernel_info = KernelInfo(kernel) + sweep_to_kernel_info_dict = {'': kernel_info} + dummy_kernel_info = kernel_info - stencil_info = [(i, d, ", ".join([str(e) for e in d])) for i, d in enumerate(stencil)] - inv_dirs = [] - for direction in stencil: - inverse_dir = tuple([-i for i in direction]) - inv_dirs.append(stencil.index(inverse_dir)) + if additional_data_handler is None: + additional_data_handler = AdditionalDataHandler(stencil=neighbor_stencil) context = { 'class_name': boundary_object.name, + 'sweep_classes': sweep_to_kernel_info_dict, + 'multi_sweep': multi_sweep, + 'dummy_kernel_info': dummy_kernel_info, 'StructName': struct_name, 'StructDeclaration': struct_from_numpy_dtype(struct_name, index_struct_dtype), - 'kernel': KernelInfo(kernel), - 'stencil_info': stencil_info, - 'inverse_directions': inv_dirs, 'dim': dim, 'target': target, 'namespace': namespace, - 'inner_or_boundary': boundary_object.inner_or_boundary + 'inner_or_boundary': boundary_object.inner_or_boundary, + 'additional_data_handler': additional_data_handler } env = Environment(loader=PackageLoader('pystencils_walberla'), undefined=StrictUndefined) @@ -83,8 +98,8 @@ def generate_boundary(generation_context, source = env.get_template('Boundary.tmpl.cpp').render(**context) source_extension = "cpp" if target == "cpu" else "cu" - generation_context.write_file("{}.h".format(class_name), header) - generation_context.write_file("{}.{}".format(class_name, source_extension), source) + generation_context.write_file(f"{class_name}.h", header) + generation_context.write_file(f"{class_name}.{source_extension}", source) def generate_staggered_boundary(generation_context, class_name, boundary_object, @@ -102,23 +117,23 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob def struct_from_numpy_dtype(struct_name, numpy_dtype): - result = "struct %s { \n" % (struct_name,) + result = f"struct {struct_name} {{ \n" equality_compare = [] constructor_params = [] constructor_initializer_list = [] for name, (sub_type, offset) in numpy_dtype.fields.items(): pystencils_type = create_type(sub_type) - result += " %s %s;\n" % (pystencils_type, name) + result += f" {pystencils_type} {name};\n" if name in boundary_index_array_coordinate_names or name == direction_member_name: - constructor_params.append("%s %s_" % (pystencils_type, name)) - constructor_initializer_list.append("%s(%s_)" % (name, name)) + constructor_params.append(f"{pystencils_type} {name}_") + constructor_initializer_list.append(f"{name}({name}_)") else: - constructor_initializer_list.append("%s()" % name) + constructor_initializer_list.append(f"{name}()") if pystencils_type.is_float(): - equality_compare.append("floatIsEqual(%s, o.%s)" % (name, name)) + equality_compare.append(f"floatIsEqual({name}, o.{name})") else: - equality_compare.append("%s == o.%s" % (name, name)) + equality_compare.append(f"{name} == o.{name}") result += " %s(%s) : %s {}\n" % \ (struct_name, ", ".join(constructor_params), ", ".join(constructor_initializer_list)) diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py index 37013cba6203cdd9a45da7b044361dc71fa457d1..896a795cc2670179ca417d3ce05f88d05abb1cfe 100644 --- a/python/pystencils_walberla/codegen.py +++ b/python/pystencils_walberla/codegen.py @@ -20,7 +20,7 @@ __all__ = ['generate_sweep', 'generate_pack_info', 'generate_pack_info_for_field def generate_sweep(generation_context, class_name, assignments, namespace='pystencils', field_swaps=(), staggered=False, varying_parameters=(), - inner_outer_split=False, + inner_outer_split=False, ghost_layers_to_include=0, **create_kernel_params): """Generates a waLBerla sweep from a pystencils representation. @@ -44,6 +44,8 @@ def generate_sweep(generation_context, class_name, assignments, the C++ class constructor even if the kernel does not need them. inner_outer_split: if True generate a sweep that supports separate iteration over inner and outer regions to allow for communication hiding. + ghost_layers_to_include: determines how many ghost layers should be included for the Sweep. + This is relevant if a setter kernel should also set correct values to the ghost layers. **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` """ create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) @@ -78,6 +80,7 @@ def generate_sweep(generation_context, class_name, assignments, 'class_name': class_name, 'target': create_kernel_params.get("target", "cpu"), 'headers': get_headers(ast), + 'ghost_layers_to_include': ghost_layers_to_include } header = env.get_template("Sweep.tmpl.h").render(**jinja_context) source = env.get_template("Sweep.tmpl.cpp").render(**jinja_context) @@ -93,6 +96,7 @@ def generate_sweep(generation_context, class_name, assignments, 'target': create_kernel_params.get("target", "cpu"), 'field': representative_field, 'headers': get_headers(ast), + 'ghost_layers_to_include': 0 } header = env.get_template("SweepInnerOuter.tmpl.h").render(**jinja_context) source = env.get_template("SweepInnerOuter.tmpl.cpp").render(**jinja_context) @@ -132,7 +136,7 @@ def generate_pack_info_from_kernel(generation_context, class_name: str, assignme class_name: name of the generated class assignments: list of assignments from the compute kernel - generates PackInfo for "pull" part only i.e. the kernel is expected to only write to the center - kind: + kind: can either be pull or push **create_kernel_params: remaining keyword arguments are passed to `pystencils.create_kernel` """ assert kind in ('push', 'pull') @@ -240,7 +244,6 @@ def generate_pack_info(generation_context, class_name: str, pack_kernels[direction_strings] = KernelInfo(pack_ast) unpack_kernels[direction_strings] = KernelInfo(unpack_ast) elements_per_cell[direction_strings] = len(terms) - fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params) fused_kernel.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] @@ -333,6 +336,12 @@ class KernelInfo: self.varying_parameters = tuple(varying_parameters) self.parameters = ast.get_parameters() # cache parameters here + def has_same_interface(self, other): + return self.temporary_fields == other.temporary_fields \ + and self.field_swaps == other.field_swaps \ + and self.varying_parameters == other.varying_parameters \ + and self.parameters == other.parameters + def get_vectorize_instruction_set(generation_context): if generation_context.optimize_for_localhost: diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py index 60903be9e73a3ce92a83b1ab1e3968c791b27e3b..7158f81685b5382f7516299af574fd0d374be47a 100644 --- a/python/pystencils_walberla/jinja_filters.py +++ b/python/pystencils_walberla/jinja_filters.py @@ -338,6 +338,26 @@ def generate_constructor_parameters(kernel_info, parameters_to_ignore=None): return ", ".join(parameter_list + varying_parameters) +def generate_constructor_call_arguments(kernel_info, parameters_to_ignore=None): + if parameters_to_ignore is None: + parameters_to_ignore = [] + + varying_parameters = [] + if hasattr(kernel_info, 'varying_parameters'): + varying_parameters = kernel_info.varying_parameters + varying_parameter_names = tuple(e[1] for e in varying_parameters) + parameters_to_ignore += kernel_info.temporary_fields + varying_parameter_names + + parameter_list = [] + for param in kernel_info.parameters: + if param.is_field_pointer and param.field_name not in parameters_to_ignore: + parameter_list.append("%sID" % (param.field_name, )) + elif not param.is_field_parameter and param.symbol.name not in parameters_to_ignore: + parameter_list.append(f'{param.symbol.name}_') + varying_parameters = ["%s_" % e for e in varying_parameter_names] + return ", ".join(parameter_list + varying_parameters) + + @jinja2.contextfilter def generate_members(ctx, kernel_info, parameters_to_ignore=(), only_fields=False): ast = kernel_info.ast @@ -382,14 +402,25 @@ def generate_destructor(kernel_info, class_name): return temporary_constructor.format(contents=contents, class_name=class_name) +@jinja2.contextfilter +def nested_class_method_definition_prefix(ctx, nested_class_name): + outer_class = ctx['class_name'] + if len(nested_class_name) == 0: + return outer_class + else: + return outer_class + '::' + nested_class_name + + def add_pystencils_filters_to_jinja_env(jinja_env): jinja_env.filters['generate_definition'] = generate_definition jinja_env.filters['generate_declaration'] = generate_declaration jinja_env.filters['generate_members'] = generate_members jinja_env.filters['generate_constructor_parameters'] = generate_constructor_parameters jinja_env.filters['generate_constructor_initializer_list'] = generate_constructor_initializer_list + jinja_env.filters['generate_constructor_call_arguments'] = generate_constructor_call_arguments jinja_env.filters['generate_call'] = generate_call jinja_env.filters['generate_block_data_to_field_extraction'] = generate_block_data_to_field_extraction jinja_env.filters['generate_swaps'] = generate_swaps jinja_env.filters['generate_refs_for_kernel_parameters'] = generate_refs_for_kernel_parameters jinja_env.filters['generate_destructor'] = generate_destructor + jinja_env.filters['nested_class_method_definition_prefix'] = nested_class_method_definition_prefix diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.cpp b/python/pystencils_walberla/templates/Boundary.tmpl.cpp index 33f799107424bc94b9e7db4cd3a1ab1dcb827754..ef1ec3b7b860f3ac0fd4b290e1e5fe33bd7f772e 100644 --- a/python/pystencils_walberla/templates/Boundary.tmpl.cpp +++ b/python/pystencils_walberla/templates/Boundary.tmpl.cpp @@ -51,7 +51,11 @@ namespace {{namespace}} { #endif -{{kernel|generate_definition}} +{% for sweep_class, sweep_kernel in sweep_classes.items() %} + +{{sweep_kernel|generate_definition}} + +{% endfor %} #ifdef __GNUC__ #pragma GCC diagnostic pop @@ -61,8 +65,10 @@ namespace {{namespace}} { #pragma pop #endif +{% for sweep_class, sweep_kernel in sweep_classes.items() %} + -void {{class_name}}::run( IBlock * block, IndexVectors::Type type {% if target == 'gpu'%}, cudaStream_t stream {%endif%}) +void {{sweep_class|nested_class_method_definition_prefix}}::run( IBlock * block, IndexVectors::Type type {% if target == 'gpu'%}, cudaStream_t stream {%endif%}) { auto * indexVectors = block->getData<IndexVectors>(indexVectorID); int64_t indexVectorSize = int64_c( indexVectors->indexVector(type).size() ); @@ -77,25 +83,27 @@ void {{class_name}}::run( IBlock * block, IndexVectors::Type type {% if target = uint8_t * _data_indexVector = reinterpret_cast<uint8_t*>(pointer); - {{kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}} - {{kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}} + {{sweep_kernel|generate_block_data_to_field_extraction(['indexVector', 'indexVectorSize'])|indent(4)}} + {{sweep_kernel|generate_refs_for_kernel_parameters(prefix='', parameters_to_ignore=['indexVectorSize'], ignore_fields=True)|indent(4) }} + {{sweep_kernel|generate_call(spatial_shape_symbols=['indexVectorSize'], stream='stream')|indent(4)}} } -void {{class_name}}::operator() ( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +void {{sweep_class|nested_class_method_definition_prefix}}::operator() ( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) { run( block, IndexVectors::ALL{% if target == 'gpu'%}, stream {%endif%}); } -void {{class_name}}::inner( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +void {{sweep_class|nested_class_method_definition_prefix}}::inner( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) { run( block, IndexVectors::INNER{% if target == 'gpu'%}, stream {%endif%} ); } -void {{class_name}}::outer( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) +void {{sweep_class|nested_class_method_definition_prefix}}::outer( IBlock * block{% if target == 'gpu'%}, cudaStream_t stream {%endif%} ) { run( block, IndexVectors::OUTER{% if target == 'gpu'%}, stream {%endif%} ); } +{% endfor %} } // namespace {{namespace}} } // namespace walberla diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h index c856adceaa0a5b1e0cbc6470da5ee52c3dfee622..c0a39c32b0ae8c08202ae7f0641f03db0ad4e592 100644 --- a/python/pystencils_walberla/templates/Boundary.tmpl.h +++ b/python/pystencils_walberla/templates/Boundary.tmpl.h @@ -17,13 +17,13 @@ //! \\author pystencils //====================================================================================================================== - #include "core/DataTypes.h" {% if target is equalto 'cpu' -%} #include "field/GhostLayerField.h" {%- elif target is equalto 'gpu' -%} #include "cuda/GPUField.h" +#include "cuda/FieldCopy.h" {%- endif %} #include "domain_decomposition/BlockDataID.h" #include "domain_decomposition/IBlock.h" @@ -64,7 +64,7 @@ public: NUM_TYPES = 3 }; - IndexVectors() : cpuVectors_(NUM_TYPES) {} + IndexVectors() = default; bool operator==(IndexVectors & other) { return other.cpuVectors_ == cpuVectors_; } {% if target == 'gpu' -%} @@ -72,14 +72,14 @@ public: for( auto & gpuVec: gpuVectors_) cudaFree( gpuVec ); } - {% endif %} + {% endif -%} CpuIndexVector & indexVector(Type t) { return cpuVectors_[t]; } {{StructName}} * pointerCpu(Type t) { return &(cpuVectors_[t][0]); } {% if target == 'gpu' -%} {{StructName}} * pointerGpu(Type t) { return gpuVectors_[t]; } - {% endif %} + {% endif -%} void syncGPU() { @@ -100,39 +100,66 @@ public: } private: - std::vector<CpuIndexVector> cpuVectors_; + std::vector<CpuIndexVector> cpuVectors_{NUM_TYPES}; {% if target == 'gpu' -%} using GpuIndexVector = {{StructName}} *; std::vector<GpuIndexVector> gpuVectors_; - {% endif %} + {%- endif %} + }; + + {% if multi_sweep %} + {% for sweep_class_name, sweep_kernel_info in sweep_classes.items() %} + class {{sweep_class_name}} + { + public: + {{sweep_class_name}} ( {{sweep_kernel_info|generate_constructor_parameters(['indexVectorSize'])}} ) + : {{ sweep_kernel_info|generate_constructor_initializer_list(['indexVectorSize']) }} {}; + + void operator() ( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + void inner( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + void outer( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + + private: + void run( IBlock * block, IndexVectors::Type type{% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + + {{sweep_kernel_info|generate_members(['indexVectorSize'])|indent(12)}} }; + {{sweep_class_name}} get{{sweep_class_name}} () + { + return {{sweep_class_name}} ( {{sweep_kernel_info|generate_constructor_call_arguments(['indexVectorSize'])|indent(12)}} ); + } + {% endfor %} + {% endif %} {{class_name}}( const shared_ptr<StructuredBlockForest> & blocks, - {{kernel|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}} ) - : {{ kernel|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }} + {{dummy_kernel_info|generate_constructor_parameters(['indexVector', 'indexVectorSize'])}}{{additional_data_handler.constructor_arguments}}) + :{{additional_data_handler.initialiser_list}} {{ dummy_kernel_info|generate_constructor_initializer_list(['indexVector', 'indexVectorSize']) }} { auto createIdxVector = []( IBlock * const , StructuredBlockStorage * const ) { return new IndexVectors(); }; indexVectorID = blocks->addStructuredBlockData< IndexVectors >( createIdxVector, "IndexField_{{class_name}}"); }; + {% if not multi_sweep %} + void operator() ( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); void inner( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); void outer( IBlock * block {% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + {% endif %} template<typename FlagField_T> void fillFromFlagField( const shared_ptr<StructuredBlockForest> & blocks, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID) { for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) - fillFromFlagField<FlagField_T>( &*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID ); + fillFromFlagField<FlagField_T>({{additional_data_handler.additional_arguments_for_fill_function}}&*blockIt, flagFieldID, boundaryFlagUID, domainFlagUID ); } template<typename FlagField_T> - void fillFromFlagField( IBlock * block, ConstBlockDataID flagFieldID, + void fillFromFlagField({{additional_data_handler.additional_parameters_for_fill_function}}IBlock * block, ConstBlockDataID flagFieldID, FlagUID boundaryFlagUID, FlagUID domainFlagUID ) { auto * indexVectors = block->getData< IndexVectors > ( indexVectorID ); @@ -140,8 +167,8 @@ public: auto & indexVectorInner = indexVectors->indexVector(IndexVectors::INNER); auto & indexVectorOuter = indexVectors->indexVector(IndexVectors::OUTER); - auto * flagField = block->getData< FlagField_T > ( flagFieldID ); + {{additional_data_handler.additional_field_data|indent(4)}} if( !(flagField->flagExists(boundaryFlagUID) && flagField->flagExists(domainFlagUID) )) return; @@ -152,7 +179,6 @@ public: auto inner = flagField->xyzSize(); inner.expand( cell_idx_t(-1) ); - indexVectorAll.clear(); indexVectorInner.clear(); indexVectorOuter.clear(); @@ -161,15 +187,15 @@ public: { if( ! isFlagSet(it, domainFlag) ) continue; - - {%- for dirIdx, dirVec, offset in stencil_info %} + {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %} if ( isFlagSet( it.neighbor({{offset}} {%if dim == 3%}, 0 {%endif %}), boundaryFlag ) ) { {% if inner_or_boundary -%} auto element = {{StructName}}(it.x(), it.y(), {%if dim == 3%} it.z(), {%endif %} {{dirIdx}} ); {% else -%} - auto element = {{StructName}}(it.x() + cell_idx_c({{dirVec[0]}}), it.y() + cell_idx_c({{dirVec[1]}}), {%if dim == 3%} it.z() + cell_idx_c({{dirVec[2]}}), {%endif %} {{inverse_directions[dirIdx]}} ); + auto element = {{StructName}}(it.x() + cell_idx_c({{dirVec[0]}}), it.y() + cell_idx_c({{dirVec[1]}}), {%if dim == 3%} it.z() + cell_idx_c({{dirVec[2]}}), {%endif %} {{additional_data_handler.inverse_directions[dirIdx]}} ); {% endif -%} + {{additional_data_handler.data_initialisation(dirIdx)|indent(16)}} indexVectorAll.push_back( element ); if( inner.contains( it.x(), it.y(), it.z() ) ) indexVectorInner.push_back( element ); @@ -178,16 +204,18 @@ public: } {% endfor %} } - indexVectors->syncGPU(); } private: + {% if not multi_sweep %} void run( IBlock * block, IndexVectors::Type type{% if target == 'gpu'%}, cudaStream_t stream = 0 {%endif%}); + {% endif %} BlockDataID indexVectorID; + {{additional_data_handler.additional_member_variable|indent(4)}} public: - {{kernel|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}} + {{dummy_kernel_info|generate_members(('indexVector', 'indexVectorSize'))|indent(4)}} }; diff --git a/python/pystencils_walberla/templates/Sweep.tmpl.cpp b/python/pystencils_walberla/templates/Sweep.tmpl.cpp index b3b12c6b39f93c71fc8a3a9fd3b4430e6a1cbb16..b26d9c6db71a12d941c329a3897b707067ae4892 100644 --- a/python/pystencils_walberla/templates/Sweep.tmpl.cpp +++ b/python/pystencils_walberla/templates/Sweep.tmpl.cpp @@ -59,7 +59,7 @@ void {{class_name}}::operator()( IBlock * block{%if target is equalto 'gpu'%} , { {{kernel|generate_block_data_to_field_extraction|indent(4)}} {{kernel|generate_refs_for_kernel_parameters(prefix='this->', ignore_fields=True)|indent(4) }} - {{kernel|generate_call(stream='stream')|indent(4)}} + {{kernel|generate_call(ghost_layers_to_include=ghost_layers_to_include, stream='stream')|indent(4)}} {{kernel|generate_swaps|indent(4)}} } diff --git a/src/lbm/field/QCriterion.h b/src/lbm/field/QCriterion.h index eff9b801523f5d5e696730306527f3239dd1507c..c681117781d1d6744f49703ccf4d3258da2bfeed 100644 --- a/src/lbm/field/QCriterion.h +++ b/src/lbm/field/QCriterion.h @@ -28,36 +28,40 @@ // You should never use these functions directly, always refer to the member functions // of PdfField or the free functions that can be found in MacroscopicValueCalculation.h -namespace walberla { -namespace lbm { - -struct QCriterion { +namespace walberla +{ +namespace lbm +{ +struct QCriterion +{ template< typename VelocityField_T, typename Filter_T > - static inline real_t get(const VelocityField_T & velocityField, const Filter_T & filter, - const cell_idx_t x, const cell_idx_t y, const cell_idx_t z, - real_t dx = real_t(1), real_t dy = real_t(1), real_t dz = real_t(1)) { + static inline real_t get(const VelocityField_T& velocityField, const Filter_T& filter, const cell_idx_t x, + const cell_idx_t y, const cell_idx_t z, real_t dx = real_t(1), real_t dy = real_t(1), + real_t dz = real_t(1)) + { const auto one = cell_idx_t(1); - if(filter(x,y,z) && filter(x+one,y,z) && filter(x-one,y,z) && filter(x,y+one,z) - && filter(x,y-one,z) && filter(x,y,z+one) && filter(x,y,z-one)) { - const Vector3<real_t> xa = velocityField.get(x+one,y,z); - const Vector3<real_t> xb = velocityField.get(x-one,y,z); - const Vector3<real_t> ya = velocityField.get(x,y+one,z); - const Vector3<real_t> yb = velocityField.get(x,y-one,z); - const Vector3<real_t> za = velocityField.get(x,y,z+one); - const Vector3<real_t> zb = velocityField.get(x,y,z-one); + auto f(velocityField.flattenedShallowCopy()); + + if (filter(x, y, z) && filter(x + one, y, z) && filter(x - one, y, z) && filter(x, y + one, z) && + filter(x, y - one, z) && filter(x, y, z + one) && filter(x, y, z - one)) + { + Vector3< real_t > xa(f->get(x + one, y, z, 0), f->get(x + one, y, z, 1), f->get(x + one, y, z, 2)); + Vector3< real_t > xb(f->get(x - one, y, z, 0), f->get(x - one, y, z, 1), f->get(x - one, y, z, 2)); + Vector3< real_t > ya(f->get(x, y + one, z, 0), f->get(x, y + one, z, 1), f->get(x, y + one, z, 2)); + Vector3< real_t > yb(f->get(x, y - one, z, 0), f->get(x, y - one, z, 1), f->get(x, y - one, z, 2)); + Vector3< real_t > za(f->get(x, y, z + one, 0), f->get(x, y, z + one, 1), f->get(x, y, z + one, 2)); + Vector3< real_t > zb(f->get(x, y, z - one, 0), f->get(x, y, z - one, 1), f->get(x, y, z - one, 2)); return calculate(xa, xb, ya, yb, za, zb, dx, dy, dz); } - return real_t(0); } - static inline real_t calculate(const Vector3<real_t> xa, const Vector3<real_t> xb, - const Vector3<real_t> ya, const Vector3<real_t> yb, - const Vector3<real_t> za, const Vector3<real_t> zb, - const real_t dx, const real_t dy, const real_t dz) { - + static inline real_t calculate(const Vector3< real_t > xa, const Vector3< real_t > xb, const Vector3< real_t > ya, + const Vector3< real_t > yb, const Vector3< real_t > za, const Vector3< real_t > zb, + const real_t dx, const real_t dy, const real_t dz) + { const auto halfInvDx = real_t(0.5) / dx; const auto halfInvDy = real_t(0.5) / dy; const auto halfInvDz = real_t(0.5) / dz; @@ -75,18 +79,17 @@ struct QCriterion { const real_t duzdz = (za[2] - zb[2]) * halfInvDz; // Q = 1/2 * (||W||² - ||S||²) - real_t sNormSq = duxdx*duxdx + duydy*duydy + duzdz*duzdz + - real_t(0.5)*(duxdy+duydx)*(duxdy+duydx) + real_t(0.5)*(duydz+duzdy)*(duydz+duzdy) + - real_t(0.5)*(duxdz+duzdx)*(duxdz+duzdx); + real_t sNormSq = duxdx * duxdx + duydy * duydy + duzdz * duzdz + real_t(0.5) * (duxdy + duydx) * (duxdy + duydx) + + real_t(0.5) * (duydz + duzdy) * (duydz + duzdy) + + real_t(0.5) * (duxdz + duzdx) * (duxdz + duzdx); - real_t omegaNormSq = real_t(0.5)*(duxdz-duzdx)*(duxdz-duzdx) + - real_t(0.5)*(duxdy-duydx)*(duxdy-duydx) + - real_t(0.5)*(duydz-duzdy)*(duydz-duzdy); + real_t omegaNormSq = real_t(0.5) * (duxdz - duzdx) * (duxdz - duzdx) + + real_t(0.5) * (duxdy - duydx) * (duxdy - duydx) + + real_t(0.5) * (duydz - duzdy) * (duydz - duzdy); return real_t(0.5) * (omegaNormSq - sNormSq); } }; - } // namespace lbm } // namespace walberla diff --git a/src/timeloop/SweepTimeloop.cpp b/src/timeloop/SweepTimeloop.cpp index 7fc72a38a9dd3f6c999a3d1fa37a8c677f88b667..15b40719112fceed4775077f5953d5102a12a509 100644 --- a/src/timeloop/SweepTimeloop.cpp +++ b/src/timeloop/SweepTimeloop.cpp @@ -54,10 +54,8 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors) } Sweep * selectedSweep = s.sweep.getUnique( selectors + bi->getState() ); - if( !selectedSweep ) - WALBERLA_ABORT("Selecting Sweep " << sweepIt->first << ": " << - "Ambiguous, or no sweep selected. Check your selector " << - selectors + bi->getState() << std::endl << s.sweep); + if (!selectedSweep) + continue; WALBERLA_LOG_PROGRESS_SECTION() { @@ -111,9 +109,7 @@ void SweepTimeloop::doTimeStep(const Set<SUID> &selectors, WcTimingPool &timing) Sweep * selectedSweep = s.sweep.getUnique( selectors + bi->getState(), sweepName ); if( !selectedSweep ) - WALBERLA_ABORT("Selecting Sweep " << sweepIt->first << ": " << - "Ambiguous, or no sweep selected. Check your selector " << - selectors + bi->getState() << std::endl << s.sweep); + continue; WALBERLA_LOG_PROGRESS("Running sweep \"" << sweepName << "\" on block " << bi->getId() ); diff --git a/tests/lbm/CMakeLists.txt b/tests/lbm/CMakeLists.txt index b17f65cfa5a194e4d0b600111f0813108930e74c..a2ddd66905fda5cbc57601163746d1d96ab58421 100644 --- a/tests/lbm/CMakeLists.txt +++ b/tests/lbm/CMakeLists.txt @@ -76,6 +76,21 @@ waLBerla_execute_test( NAME QCriterionTest ) # Code Generation if( WALBERLA_BUILD_WITH_CODEGEN ) +add_subdirectory(codegen) + +waLBerla_generate_target_from_python(NAME GeneratedOutflowBCGenerated + FILE codegen/GeneratedOutflowBC.py + OUT_FILES GeneratedOutflowBC_Sweep.cpp GeneratedOutflowBC_Sweep.h + GeneratedOutflowBC_MacroSetter.cpp GeneratedOutflowBC_MacroSetter.h + GeneratedOutflowBC_Dynamic_UBB.cpp GeneratedOutflowBC_Dynamic_UBB.h + GeneratedOutflowBC_Static_UBB.cpp GeneratedOutflowBC_Static_UBB.h + GeneratedOutflowBC_NoSlip.cpp GeneratedOutflowBC_NoSlip.h + GeneratedOutflowBC_Outflow.cpp GeneratedOutflowBC_Outflow.h + GeneratedOutflowBC_PackInfo.cpp GeneratedOutflowBC_PackInfo.h + GeneratedOutflowBC_InfoHeader.h) +waLBerla_compile_test( FILES codegen/GeneratedOutflowBC.cpp DEPENDS GeneratedOutflowBCGenerated) +waLBerla_execute_test( NAME GeneratedOutflowBC COMMAND $<TARGET_FILE:GeneratedOutflowBC> ${CMAKE_CURRENT_SOURCE_DIR}/codegen/GeneratedOutflowBC.prm ) + waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGenerated FILE codegen/LbCodeGenerationExample.py @@ -95,4 +110,13 @@ waLBerla_generate_target_from_python(NAME FieldLayoutAndVectorizationTestGenerat FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h ) waLBerla_compile_test( FILES codegen/FieldLayoutAndVectorizationTest.cpp DEPENDS FieldLayoutAndVectorizationTestGenerated) -endif() \ No newline at end of file +waLBerla_generate_target_from_python(NAME LbmPackInfoGenerationTestCodegen FILE codegen/LbmPackInfoGenerationTest.py + OUT_FILES AccessorBasedPackInfoEven.cpp AccessorBasedPackInfoEven.h + AccessorBasedPackInfoOdd.cpp AccessorBasedPackInfoOdd.h + FromKernelPackInfoPull.cpp FromKernelPackInfoPull.h + FromKernelPackInfoPush.cpp FromKernelPackInfoPush.h) + +waLBerla_link_files_to_builddir( "diff_packinfos.sh" ) +waLBerla_execute_test( NAME LbmPackInfoGenerationDiffTest COMMAND bash diff_packinfos.sh ) + +endif() diff --git a/tests/lbm/codegen/CMakeLists.txt b/tests/lbm/codegen/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..b149eda3f67a65321c9583835277423d3f0db2f0 --- /dev/null +++ b/tests/lbm/codegen/CMakeLists.txt @@ -0,0 +1,8 @@ +############################################################################################################################# +# +# Tests for lbm module +# +############################################################################################################################# + +waLBerla_link_files_to_builddir( "*.prm" ) +waLBerla_link_files_to_builddir( "ChannelFlowCodeGenParameter.py" ) \ No newline at end of file diff --git a/tests/lbm/codegen/GeneratedOutflowBC.cpp b/tests/lbm/codegen/GeneratedOutflowBC.cpp new file mode 100644 index 0000000000000000000000000000000000000000..66311c7ebd98866bbb553e92be406fb4b8abc3ef --- /dev/null +++ b/tests/lbm/codegen/GeneratedOutflowBC.cpp @@ -0,0 +1,194 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file GeneratedOutflowBC.cpp +//! \author Markus Holzer <markus.holzer@fau.de> +//! \brief Shear flow with dynamic UBB on the left (W) with linear flow profile from zero to u_max (flow in x-direction) +//! along the y-direction. The upper wall (N) is a static UBB which applies u_max in x-direction. +//! On the right (E) an outflow boundary is used and the lower wall (S) is a No-Slip wall. +//! It is tested if the shear flow is correctly propagated through the entire domain after n timesteps. +// +//====================================================================================================================== +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "core/Environment.h" +#include "core/timing/RemainingTimeLogger.h" + +#include "field/AddToStorage.h" +#include "field/vtk/VTKWriter.h" + +#include "geometry/InitBoundaryHandling.h" +#include "timeloop/SweepTimeloop.h" + +// Generated Files +#include "GeneratedOutflowBC_Dynamic_UBB.h" +#include "GeneratedOutflowBC_InfoHeader.h" +#include "GeneratedOutflowBC_MacroSetter.h" +#include "GeneratedOutflowBC_NoSlip.h" +#include "GeneratedOutflowBC_Outflow.h" +#include "GeneratedOutflowBC_PackInfo.h" +#include "GeneratedOutflowBC_Static_UBB.h" +#include "GeneratedOutflowBC_Sweep.h" + +using namespace walberla; + +using PackInfo_T = lbm::GeneratedOutflowBC_PackInfo; +using flag_t = walberla::uint8_t; +using FlagField_T = FlagField< flag_t >; + +auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) { + return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block), + storage->getNumberOfZCells(*block), uint_t(1), field::fzyx, + make_shared< field::AllocateAligned< real_t, 64 > >()); +}; + +//////////////////////////////////////////// +// Linear Velocity Profile for left wall // +////////////////////////////////////////// + +class ShearProfile +{ + public: + + ShearProfile( real_t inflow_velocity ) : + inflow_velocity_( inflow_velocity ) {} + + Vector3< real_t > operator()( const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block ) const; + + private: + + const real_t inflow_velocity_; +}; // class ShearProfile + +Vector3< real_t > ShearProfile::operator()( const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block ) const +{ + Cell globalCell; + CellInterval domain = SbF->getDomainCellBB(); + real_t h_y = domain.yMax() - domain.yMin(); + SbF->transformBlockLocalToGlobalCell(globalCell, block, pos); + + real_t u = inflow_velocity_ * (globalCell[1] / h_y); + + Vector3< real_t > result(u, 0.0, 0.0); + return result; +} + +////////// +// MAIN // +////////// + +int main(int argc, char** argv) +{ + walberla::Environment walberlaEnv(argc, argv); + + auto blocks = blockforest::createUniformBlockGridFromConfig(walberlaEnv.config()); + + // read parameters + auto parameters = walberlaEnv.config()->getOneBlock("Parameters"); + + const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4)); + const real_t u_max = parameters.getParameter< real_t >("u_max", real_t(0.05)); + const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)); + + const double remainingTimeLoggerFrequency = + parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds + + // create fields + BlockDataID pdfFieldID = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs"); + BlockDataID velFieldID = field::addToStorage< VelocityField_T >(blocks, "velocity", real_t(0), field::fzyx); + BlockDataID densityFieldID = field::addToStorage< ScalarField_T >(blocks, "density", real_t(0), field::fzyx); + + BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field"); + + pystencils::GeneratedOutflowBC_MacroSetter setterSweep(pdfFieldID, velFieldID); + for (auto& block : *blocks) + setterSweep(&block); + + // create and initialize boundary handling + const FlagUID fluidFlagUID("Fluid"); + + auto boundariesConfig = walberlaEnv.config()->getOneBlock("Boundaries"); + + ShearProfile velocityCallback{u_max}; + std::function< Vector3< real_t >(const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) > + velocity_initialisation = velocityCallback; + + lbm::GeneratedOutflowBC_Dynamic_UBB ubb_dynamic(blocks, pdfFieldID, velocity_initialisation); + lbm::GeneratedOutflowBC_Static_UBB ubb_static(blocks, pdfFieldID, u_max); + lbm::GeneratedOutflowBC_NoSlip noSlip(blocks, pdfFieldID); + lbm::GeneratedOutflowBC_Outflow outflow(blocks, pdfFieldID); + + geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig); + geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID); + + ubb_dynamic.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB_Inflow"), fluidFlagUID); + ubb_static.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB_Wall"), fluidFlagUID); + noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID); + outflow.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("Outflow"), fluidFlagUID); + + // create time loop + SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); + + // create communication for PdfField + blockforest::communication::UniformBufferedScheme< Stencil_T > communication(blocks); + communication.addPackInfo(make_shared< PackInfo_T >(pdfFieldID)); + + pystencils::GeneratedOutflowBC_Sweep UpdateSweep(densityFieldID, pdfFieldID, velFieldID, omega); + + // add LBM sweep and communication to time loop + timeloop.add() << BeforeFunction(communication, "communication") << Sweep(noSlip, "noSlip boundary"); + timeloop.add() << Sweep(ubb_dynamic, "ubb inflow"); + timeloop.add() << Sweep(ubb_static, "ubb wall"); + timeloop.add() << Sweep(outflow, "outflow boundary"); + timeloop.add() << Sweep(UpdateSweep, "LB stream & collide"); + + // log remaining time + timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency), + "remaining time logger"); + + // VTK Writer + uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); + if (vtkWriteFrequency > 0) + { + auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "GeneratedOutflowBC_VTK", vtkWriteFrequency, 0, false, + "vtk_out", "simulation_step", false, true, true, false, 0); + + auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity"); + auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density"); + + vtkOutput->addCellDataWriter(velWriter); + vtkOutput->addCellDataWriter(densityWriter); + + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); + } + timeloop.run(); + + CellInterval domain = blocks->getDomainCellBB(); + real_t h_y = domain.yMax() - domain.yMin(); + for (auto& block : *blocks) + { + auto velField = block.getData<VelocityField_T>(velFieldID); + WALBERLA_FOR_ALL_CELLS_XYZ + ( + velField, + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z)); + WALBERLA_CHECK_FLOAT_EQUAL_EPSILON(velField->get(x, y, z, 0), u_max * (globalCell[1] / h_y), 0.01) + ) + } + + return EXIT_SUCCESS; +} diff --git a/tests/lbm/codegen/GeneratedOutflowBC.prm b/tests/lbm/codegen/GeneratedOutflowBC.prm new file mode 100644 index 0000000000000000000000000000000000000000..b53043d9b3b81915cabf5fe17d408a613eee863d --- /dev/null +++ b/tests/lbm/codegen/GeneratedOutflowBC.prm @@ -0,0 +1,24 @@ + +Parameters +{ + omega 1.8; + timesteps 500; + u_max 0.05; + vtkWriteFrequency 0; + remainingTimeLoggerFrequency 3; // in seconds +} + +DomainSetup +{ + blocks < 1, 1, 1 >; + cellsPerBlock < 30, 30, 1 >; + periodic < 0, 0, 0 >; +} + +Boundaries +{ + Border { direction W; walldistance -1; flag UBB_Inflow; } + Border { direction E; walldistance -1; flag Outflow; } + Border { direction S; walldistance -1; flag NoSlip; } + Border { direction N; walldistance -1; flag UBB_Wall; } +} diff --git a/tests/lbm/codegen/GeneratedOutflowBC.py b/tests/lbm/codegen/GeneratedOutflowBC.py new file mode 100644 index 0000000000000000000000000000000000000000..59a916fec926acfc8d83ef170046034c407cb018 --- /dev/null +++ b/tests/lbm/codegen/GeneratedOutflowBC.py @@ -0,0 +1,92 @@ +from pystencils.field import fields +from lbmpy.macroscopic_value_kernels import macroscopic_values_setter +from lbmpy.stencils import get_stencil +from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule +from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow +from lbmpy_walberla.additional_data_handler import UBBAdditionalDataHandler, OutflowAdditionalDataHandler +from pystencils_walberla import CodeGeneration, generate_sweep +from lbmpy_walberla import RefinementScaling, generate_boundary, generate_lb_pack_info + +import sympy as sp + +stencil = get_stencil("D2Q9") +q = len(stencil) +dim = len(stencil[0]) + +pdfs, pdfs_tmp = fields(f"pdfs({q}), pdfs_tmp({q}): double[{dim}D]", layout='fzyx') +velocity_field, density_field = fields(f"velocity({dim}), density(1) : double[{dim}D]", layout='fzyx') +omega = sp.Symbol("omega") +u_max = sp.Symbol("u_max") + +output = { + 'density': density_field, + 'velocity': velocity_field +} + +options = {'method': 'cumulant', + 'stencil': stencil, + 'relaxation_rate': omega, + 'galilean_correction': len(stencil) == 27, + 'field_name': 'pdfs', + 'output': output, + 'optimization': {'symbolic_field': pdfs, + 'symbolic_temporary_field': pdfs_tmp, + 'cse_global': False, + 'cse_pdfs': False}} + +method = create_lb_method(**options) + +# getter & setter +setter_assignments = macroscopic_values_setter(method, velocity=velocity_field.center_vector, + pdfs=pdfs, density=1) + +# opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True} + +update_rule = create_lb_update_rule(lb_method=method, **options) + +info_header = f""" +using namespace walberla; +#include "stencil/D{dim}Q{q}.h" +using Stencil_T = walberla::stencil::D{dim}Q{q}; +using PdfField_T = GhostLayerField<real_t, {q}>; +using VelocityField_T = GhostLayerField<real_t, {dim}>; +using ScalarField_T = GhostLayerField<real_t, 1>; + """ + +stencil = method.stencil + +with CodeGeneration() as ctx: + # sweeps + generate_sweep(ctx, 'GeneratedOutflowBC_Sweep', update_rule, field_swaps=[(pdfs, pdfs_tmp)]) + generate_sweep(ctx, 'GeneratedOutflowBC_MacroSetter', setter_assignments) + + # boundaries + ubb_dynamic = UBB(lambda *args: None, dim=dim) + ubb_data_handler = UBBAdditionalDataHandler(stencil, ubb_dynamic) + + if dim == 2: + ubb_static = UBB([sp.Symbol("u_max"), 0]) + else: + ubb_static = UBB([sp.Symbol("u_max"), 0, 0]) + + outflow = ExtrapolationOutflow(stencil[4], method) + outflow_data_handler = OutflowAdditionalDataHandler(stencil, outflow) + + # Dynamic UBB which is used to produce a specific velocity profile at the inflow. + # Note that the additional data handler is needed for that kind of boundary. + generate_boundary(ctx, 'GeneratedOutflowBC_Dynamic_UBB', ubb_dynamic, method, + additional_data_handler=ubb_data_handler) + + # Static UBB which is used to apply a certain velocity u_max at the upper wall in x-direction + generate_boundary(ctx, 'GeneratedOutflowBC_Static_UBB', ubb_static, method) + + generate_boundary(ctx, 'GeneratedOutflowBC_NoSlip', NoSlip(), method) + + generate_boundary(ctx, 'GeneratedOutflowBC_Outflow', outflow, method, + additional_data_handler=outflow_data_handler) + + # communication + generate_lb_pack_info(ctx, 'GeneratedOutflowBC_PackInfo', stencil, pdfs) + + # Info header containing correct template definitions for stencil and field + ctx.write_file("GeneratedOutflowBC_InfoHeader.h", info_header) diff --git a/tests/lbm/codegen/LbmPackInfoGenerationTest.py b/tests/lbm/codegen/LbmPackInfoGenerationTest.py new file mode 100644 index 0000000000000000000000000000000000000000..5fab9c7abd75de925e619dd80254d66064576ec4 --- /dev/null +++ b/tests/lbm/codegen/LbmPackInfoGenerationTest.py @@ -0,0 +1,29 @@ +from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_update_rule +from lbmpy.advanced_streaming import Timestep +from lbmpy.stencils import get_stencil +from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel +from lbmpy_walberla.packinfo import generate_lb_pack_info +from pystencils.field import Field + +with CodeGeneration() as ctx: + streaming_pattern = 'aa' + target = 'cpu' + stencil = get_stencil('D3Q19') + dim = len(stencil[0]) + values_per_cell = len(stencil) + collision_rule = create_lb_collision_rule(method='srt', stencil=stencil) + pdf_field = Field.create_generic('pdfs', dim, index_shape=(values_per_cell,), layout='fzyx') + optimization = { + 'symbolic_field': pdf_field, + 'target': target + } + + # Generate PackInfo specifically for streaming pattern + generate_lb_pack_info(ctx, 'AccessorBasedPackInfo', stencil, pdf_field, + streaming_pattern=streaming_pattern, target=target, namespace='pystencils') + + # Generate reference using the alternating pull/push approach + update_rule_odd = create_lb_update_rule(collision_rule=collision_rule, optimization=optimization, + streaming_pattern=streaming_pattern, timestep=Timestep.ODD) + generate_pack_info_from_kernel(ctx, 'FromKernelPackInfoPull', update_rule_odd, kind='pull', target=target) + generate_pack_info_from_kernel(ctx, 'FromKernelPackInfoPush', update_rule_odd, kind='push', target=target) diff --git a/tests/lbm/diff_packinfos.sh b/tests/lbm/diff_packinfos.sh new file mode 100755 index 0000000000000000000000000000000000000000..bfa89c5ef63477c61fefac60b7767fe22aaf4233 --- /dev/null +++ b/tests/lbm/diff_packinfos.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +REGEX='^((#include)|(void)|(uint_t))' +cd default_codegen +diff -u -B <(grep -vP "$REGEX" FromKernelPackInfoPull.cpp) <(grep -vP "$REGEX" AccessorBasedPackInfoEven.cpp) || exit 1 +diff -u -B <(grep -vP "$REGEX" FromKernelPackInfoPush.cpp) <(grep -vP "$REGEX" AccessorBasedPackInfoOdd.cpp) || exit 1 diff --git a/tests/timeloop/CMakeLists.txt b/tests/timeloop/CMakeLists.txt index 265f266fd7cb57080a837f54609711ba3401b1df..862c3b64811bc9ba69a28a88420bce355cf90662 100644 --- a/tests/timeloop/CMakeLists.txt +++ b/tests/timeloop/CMakeLists.txt @@ -5,5 +5,5 @@ ################################################################################################### -#waLBerla_compile_test( FILES TimeloopAndSweepRegister.cpp DEPENDS field blockforest ) -#waLBerla_execute_test(NAME TimeloopAndSweepRegister ) +waLBerla_compile_test( FILES TimeloopAndSweepRegister.cpp DEPENDS field blockforest ) +waLBerla_execute_test(NAME TimeloopAndSweepRegister ) diff --git a/tests/timeloop/TimeloopAndSweepRegister.cpp b/tests/timeloop/TimeloopAndSweepRegister.cpp index 028e5fad7f743d55ab8a79024952879425858990..94421082135cabe352a01fbb85c550e10a8b66ec 100644 --- a/tests/timeloop/TimeloopAndSweepRegister.cpp +++ b/tests/timeloop/TimeloopAndSweepRegister.cpp @@ -1,175 +1,129 @@ //====================================================================================================================== // -// This file is part of waLBerla. waLBerla is free software: you can +// This file is part of waLBerla. waLBerla is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public -// License as published by the Free Software Foundation, either version 3 of +// License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. -// -// waLBerla is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. -// +// // You should have received a copy of the GNU General Public License along // with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // //! \file TimeloopAndSweepRegister.cpp //! \ingroup timeloop -//! \author Martin Bauer <martin.bauer@fau.de> +//! \author Markus Holzer <markus.holzer@fau.de> //! \brief test cases that test the registering of Sweeps at timeloop // //====================================================================================================================== -#include "timeloop/SweepTimeloop.h" +#include "blockforest/Initialization.h" + #include "core/DataTypes.h" +#include "core/Environment.h" #include "core/debug/TestSubsystem.h" -#include "core/logging/Logging.h" + +#include "field/Field.h" + +#include "timeloop/SweepTimeloop.h" #include <string> #include <vector> - -using namespace std; using namespace walberla; +using Field_T = Field< uint_t, 1 >; + +auto FieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) { + return new Field_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block), + storage->getNumberOfZCells(*block), uint_t(0.0), field::fzyx, + make_shared< field::AllocateAligned< uint_t, 64 > >()); +}; -class GeneralSweep +class Sweep1 { - public: - GeneralSweep(const string & name, vector<string> & vec) - : myName(name), outVec(vec) - {} + public: + Sweep1(BlockDataID fieldID) : fieldID_(fieldID) {} + void operator()(IBlock* block) + { + auto field = block->getData< Field_T >(fieldID_); - void operator() (IBlock * ){ - outVec.push_back(myName); - } + for (auto iter = field->begin(); iter != field->end(); ++iter) + *iter += 1; + } - private: - string myName; - vector<string> & outVec; + private: + BlockDataID fieldID_; }; - -class GeneralFunction +class Sweep2 { - public: - GeneralFunction(const string & name, vector<string> & vec) - : myName(name), outVec(vec) - {} + public: + Sweep2(BlockDataID fieldID) : fieldID_(fieldID) {} - void operator() (){ - outVec.push_back(myName); - } - - private: - string myName; - vector<string> & outVec; -}; + void operator()(IBlock* block) + { + auto field = block->getData< Field_T >(fieldID_); + for (auto iter = field->begin(); iter != field->end(); ++iter) + *iter += 2; + } + private: + BlockDataID fieldID_; +}; -int main() +int main(int argc, char** argv) { debug::enterTestMode(); + mpi::Environment env(argc, argv); - vector<string> expectedSequence; - vector<string> sequence; - - SUID cpuSelect("CPU"); - SUID gpuSelect("GPU"); - - SUID srtSelect("SRT"); - SUID mrtSelect("MRT"); - - - //FIXME put a real test in here - shared_ptr<SweepTimeloop> tl = make_shared<SweepTimeloop>(shared_ptr<BlockStorage>(),100); - - typedef SweepTimeloop::SweepHandle SH; - - SH sweep1 = tl->addSweep( GeneralSweep("CPU1",sequence), cpuSelect); - tl->addSweep(sweep1, GeneralSweep("GPU1",sequence), gpuSelect); - - SH sweep2 = tl->addSweep( GeneralSweep("CPU2",sequence), cpuSelect); - tl->addSweep(sweep2, GeneralSweep("GPU2",sequence), gpuSelect); + std::vector<std::string> expectedSequence; + std::vector<std::string> sequence; + SUID sweepSelect1("Sweep1"); + SUID sweepSelect2("Sweep2"); - tl->addFuncBeforeSweep(sweep1,GeneralFunction("Pre1",sequence)); - tl->addFuncAfterSweep (sweep1,GeneralFunction("Post1",sequence)); + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid( + uint_c(4), uint_c(2), uint_c(2), uint_c(10), uint_c(10), uint_c(10), real_c(1), false, false, false, false); - tl->addFuncBeforeSweep(sweep2,GeneralFunction("Pre2",sequence)); - tl->addFuncAfterSweep (sweep2,GeneralFunction("Post2",sequence)); + BlockDataID fieldID = blocks->addStructuredBlockData< Field_T >(FieldAdder, "Test Field"); - typedef Timeloop::FctHandle FH; - FH preTs = tl->addFuncBeforeTimeStep( GeneralFunction("PreTimestepCPU",sequence),cpuSelect,srtSelect); - tl->addFuncBeforeTimeStep(preTs, GeneralFunction("PreTimestepGPU",sequence),gpuSelect,srtSelect); - FH postTs = tl->addFuncAfterTimeStep ( GeneralFunction("PostTimestepCPU",sequence),cpuSelect,mrtSelect); - tl->addFuncAfterTimeStep (postTs,GeneralFunction("PostTimestepGPU",sequence),gpuSelect,mrtSelect); + for (auto& block : *blocks) + { + if (block.getAABB().min()[0] < 20) + block.setState(sweepSelect1); + else + block.setState(sweepSelect2); + } + uint_t timesteps = 10; + SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); - //---------- First Run - CPU Selector --------------------------------------------- + timeloop.add() << Sweep(Sweep1(fieldID), "Sweep 1", sweepSelect1, sweepSelect2); + timeloop.add() << Sweep(Sweep2(fieldID), "Sweep 2", sweepSelect2, sweepSelect1); - expectedSequence.push_back("PreTimestepCPU"); - expectedSequence.push_back("Pre1"); - expectedSequence.push_back("CPU1"); - expectedSequence.push_back("Post1"); - expectedSequence.push_back("Pre2"); - expectedSequence.push_back("CPU2"); - expectedSequence.push_back("Post2"); - expectedSequence.push_back("PostTimestepCPU"); + WcTimingPool timingPool; - tl->singleStep(cpuSelect); - - WALBERLA_CHECK_EQUAL(expectedSequence.size(), sequence.size()); - WALBERLA_CHECK( equal(expectedSequence.begin(),expectedSequence.end(), sequence.begin() ) ); - - expectedSequence.clear(); - sequence.clear(); - - - // ------------ Second Run - GPU Selector ------------------------------------------- - - expectedSequence.push_back("PreTimestepGPU"); - expectedSequence.push_back("Pre1"); - expectedSequence.push_back("GPU1"); - expectedSequence.push_back("Post1"); - expectedSequence.push_back("Pre2"); - expectedSequence.push_back("GPU2"); - expectedSequence.push_back("Post2"); - expectedSequence.push_back("PostTimestepGPU"); - - tl->singleStep(gpuSelect); - - WALBERLA_CHECK_EQUAL(expectedSequence.size(), sequence.size()); - WALBERLA_CHECK( equal(expectedSequence.begin(),expectedSequence.end(), sequence.begin() ) ); - - expectedSequence.clear(); - sequence.clear(); - - - // ------------ Second Run - GPU and SRT ------------------------------------------- - - - expectedSequence.push_back("Pre1"); - expectedSequence.push_back("GPU1"); - expectedSequence.push_back("Post1"); - expectedSequence.push_back("Pre2"); - expectedSequence.push_back("GPU2"); - expectedSequence.push_back("Post2"); - expectedSequence.push_back("PostTimestepGPU"); - - tl->singleStep(gpuSelect + srtSelect); - - WALBERLA_CHECK_EQUAL(expectedSequence.size(), sequence.size()); - WALBERLA_CHECK( equal(expectedSequence.begin(),expectedSequence.end(), sequence.begin() ) ); - - expectedSequence.clear(); - sequence.clear(); - - - return 0; + timeloop.run(timingPool); + for (auto& block : *blocks) + { + auto field = block.getData< Field_T >(fieldID); + if (block.getAABB().min()[0] < 20) + { + for (auto iter = field->begin(); iter != field->end(); ++iter) + WALBERLA_CHECK_EQUAL(*iter, timesteps) + } + else + { + for (auto iter = field->begin(); iter != field->end(); ++iter) + WALBERLA_CHECK_EQUAL(*iter, timesteps * 2) + } + } + + return EXIT_SUCCESS; } - - -