diff --git a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox index 3166cc5001965b1586ec3622f6dea04c7454cb65..51e697cd20af8d49ca7a557cd8ee8df14a44f977 100644 --- a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox +++ b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox @@ -64,6 +64,8 @@ with CodeGeneration() as ctx: Notice that, other than in \ref tutorial_codegen01, we did not need to define any fields. Both the source and destination PDF fields are created internally by lbmpy and `generate_lattice_model`. +Furthermore, if we optimise the waLBerla for the machine, it is compiled on with the CMake flag `OPTIMIZE_FOR_LOCALHOST`, the code generator automatically introduces vector intrinsics in the kernel files. Available intrinsics sets are `sse`, `avx` and `avx512`. These sets can be passed manually with the argument `cpu_vectorize_info`. More information on CPU optimisations available in `lbmpy` can be found <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/sphinx/kernelcreation.html" target="_blank">here</a>. By installing the `cpu_vectorize_info` package, it is also possible for `lbmpy` to automatically determine the support intrinsics set of the hardware. + As a final touch, we still need to set up the CMake build target for the code generation script. This time, two distinct classes (the lattice model and the pack information) will be generated. Therefore, we need to list the header and source file names for both classes separately. \code diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py index a314baf7827c730fdae8a144faad25605f386953..37013cba6203cdd9a45da7b044361dc71fa457d1 100644 --- a/python/pystencils_walberla/codegen.py +++ b/python/pystencils_walberla/codegen.py @@ -186,8 +186,14 @@ def generate_pack_info(generation_context, class_name: str, items = sorted(items, key=lambda e: e[0]) directions_to_pack_terms = OrderedDict(items) + if 'cpu_vectorize_info' in create_kernel_params: + vec_params = create_kernel_params['cpu_vectorize_info'] + if 'instruction_set' in vec_params and vec_params['instruction_set'] is not None: + raise NotImplementedError("Vectorisation of the pack info is not implemented.") + create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params) target = create_kernel_params.get('target', 'cpu') + create_kernel_params['cpu_vectorize_info']['instruction_set'] = None template_name = "CpuPackInfo.tmpl" if target == 'cpu' else 'GpuPackInfo.tmpl'