From 250bedb8024031a55a783de47882e3f2d9f35c7c Mon Sep 17 00:00:00 2001
From: markus holzer <markus.holzer@fau.de>
Date: Tue, 29 Sep 2020 17:36:12 +0200
Subject: [PATCH] Removed vectorisation of PackInfo and update tutorial 02

---
 apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox | 2 ++
 python/pystencils_walberla/codegen.py                   | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
index 3166cc500..51e697cd2 100644
--- a/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
+++ b/apps/tutorials/codegen/02_LBMLatticeModelGeneration.dox
@@ -64,6 +64,8 @@ with CodeGeneration() as ctx:
 
 Notice that, other than in \ref tutorial_codegen01, we did not need to define any fields. Both the source and destination PDF fields are created internally by lbmpy and `generate_lattice_model`.
 
+Furthermore, if we optimise the waLBerla for the machine, it is compiled on with the CMake flag `OPTIMIZE_FOR_LOCALHOST`, the code generator automatically introduces vector intrinsics in the kernel files. Available intrinsics sets are `sse`, `avx` and `avx512`. These sets can be passed manually with the argument `cpu_vectorize_info`. More information on CPU optimisations available in `lbmpy` can be found <a href="https://pycodegen.pages.i10git.cs.fau.de/lbmpy/sphinx/kernelcreation.html" target="_blank">here</a>. By installing the `cpu_vectorize_info` package, it is also possible for `lbmpy` to automatically determine the support intrinsics set of the hardware.
+
 As a final touch, we still need to set up the CMake build target for the code generation script. This time, two distinct classes (the lattice model and the pack information) will be generated. Therefore, we need to list the header and source file names for both classes separately.
 
 \code
diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py
index a314baf78..37013cba6 100644
--- a/python/pystencils_walberla/codegen.py
+++ b/python/pystencils_walberla/codegen.py
@@ -186,8 +186,14 @@ def generate_pack_info(generation_context, class_name: str,
     items = sorted(items, key=lambda e: e[0])
     directions_to_pack_terms = OrderedDict(items)
 
+    if 'cpu_vectorize_info' in create_kernel_params:
+        vec_params = create_kernel_params['cpu_vectorize_info']
+        if 'instruction_set' in vec_params and vec_params['instruction_set'] is not None:
+            raise NotImplementedError("Vectorisation of the pack info is not implemented.")
+
     create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
     target = create_kernel_params.get('target', 'cpu')
+    create_kernel_params['cpu_vectorize_info']['instruction_set'] = None
 
     template_name = "CpuPackInfo.tmpl" if target == 'cpu' else 'GpuPackInfo.tmpl'
 
-- 
GitLab