From c1c40069f81d9a8e742ea1ac31b902ee202608fc Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Mon, 18 Nov 2024 11:06:38 +0100
Subject: [PATCH] Enable vectorization in some tests

---
 src/lbmpy/_compat.py                          |  8 +-
 .../schaefer_turek/scenario_schaefer_turek.py |  2 +
 tests/test_vectorization.py                   | 98 +++++++++++++------
 3 files changed, 76 insertions(+), 32 deletions(-)

diff --git a/src/lbmpy/_compat.py b/src/lbmpy/_compat.py
index 52760cd2..b41cf60c 100644
--- a/src/lbmpy/_compat.py
+++ b/src/lbmpy/_compat.py
@@ -13,7 +13,13 @@ if PYSTENCILS_VERSION_MAJOR == 2:
         return DEFAULTS.spatial_counters[coord]
 
     def get_supported_instruction_sets():
-        return []
+        from pystencils import Target
+        vector_targets = Target.available_vector_cpu_targets()
+        isas = []
+        for target in vector_targets:
+            tokens = target.name.split("_")
+            isas.append(tokens[-1].lower())
+        return isas
 
 else:
     from pystencils.backends.simd_instruction_sets import (
diff --git a/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py b/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py
index da012818..22d1c02e 100644
--- a/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py
+++ b/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py
@@ -10,6 +10,8 @@ import warnings
 import numpy as np
 import pytest
 
+from pystencils import Target
+
 from lbmpy._compat import get_supported_instruction_sets
 from lbmpy.boundaries.boundaryconditions import NoSlip
 from lbmpy.geometry import get_pipe_velocity_field
diff --git a/tests/test_vectorization.py b/tests/test_vectorization.py
index efda06fb..dcb01a65 100644
--- a/tests/test_vectorization.py
+++ b/tests/test_vectorization.py
@@ -1,8 +1,9 @@
 import numpy as np
 import pytest
+from dataclasses import replace
 
 import pystencils as ps
-from lbmpy._compat import get_supported_instruction_sets
+from lbmpy._compat import get_supported_instruction_sets, IS_PYSTENCILS_2
 from lbmpy.scenarios import create_lid_driven_cavity
 from lbmpy.creationfunctions import LBMConfig, LBMOptimisation
 
@@ -11,7 +12,7 @@ if vector_isas is None:
     vector_isas = []
 
 
-@pytest.mark.skipif(not vector_isas, reason='cannot detect CPU instruction set')
+@pytest.mark.skipif(not vector_isas, reason="cannot detect CPU instruction set")
 def test_lbm_vectorization_short():
     print("Computing reference solutions")
     size1 = (64, 32)
@@ -21,30 +22,45 @@ def test_lbm_vectorization_short():
     ldc1_ref.run(10)
 
     lbm_config = LBMConfig(relaxation_rate=relaxation_rate)
-    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': get_supported_instruction_sets()[-1],
-                                                       'assume_aligned': True,
-                                                       'nontemporal': True,
-                                                       'assume_inner_stride_one': True,
-                                                       'assume_sufficient_line_padding': False,
-                                                       })
-    ldc1 = create_lid_driven_cavity(size1, lbm_config=lbm_config, config=config,
-                                    fixed_loop_sizes=False)
+    config = ps.CreateKernelConfig(
+        cpu_vectorize_info={
+            "instruction_set": get_supported_instruction_sets()[-1],
+            "assume_aligned": True,
+            "nontemporal": True,
+            "assume_inner_stride_one": True,
+            "assume_sufficient_line_padding": False,
+        }
+    )
+    ldc1 = create_lid_driven_cavity(
+        size1, lbm_config=lbm_config, config=config, fixed_loop_sizes=False
+    )
     ldc1.run(10)
 
 
-@pytest.mark.skipif(not vector_isas, reason='cannot detect CPU instruction set')
-@pytest.mark.parametrize('instruction_set', vector_isas)
-@pytest.mark.parametrize('aligned_and_padding', [[False, False], [True, False], [True, True]])
-@pytest.mark.parametrize('nontemporal', [False, True])
-@pytest.mark.parametrize('double_precision', [False, True])
-@pytest.mark.parametrize('fixed_loop_sizes', [False, True])
+@pytest.mark.skipif(not vector_isas, reason="cannot detect CPU instruction set")
+@pytest.mark.xfail(reason="Loop splitting is not available yet")
+@pytest.mark.parametrize("instruction_set", vector_isas)
+@pytest.mark.parametrize(
+    "aligned_and_padding", [[False, False], [True, False], [True, True]]
+)
+@pytest.mark.parametrize("nontemporal", [False, True])
+@pytest.mark.parametrize("double_precision", [False, True])
+@pytest.mark.parametrize("fixed_loop_sizes", [False, True])
 @pytest.mark.longrun
-def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, double_precision, fixed_loop_sizes):
-    vectorization_options = {'instruction_set': instruction_set,
-                             'assume_aligned': aligned_and_padding[0],
-                             'nontemporal': nontemporal,
-                             'assume_inner_stride_one': True,
-                             'assume_sufficient_line_padding': aligned_and_padding[1]}
+def test_lbm_vectorization(
+    instruction_set,
+    aligned_and_padding,
+    nontemporal,
+    double_precision,
+    fixed_loop_sizes,
+):
+    vectorization_options = {
+        "instruction_set": instruction_set,
+        "assume_aligned": aligned_and_padding[0],
+        "nontemporal": nontemporal,
+        "assume_inner_stride_one": True,
+        "assume_sufficient_line_padding": aligned_and_padding[1],
+    }
     time_steps = 100
     size1 = (64, 32)
     size2 = (666, 34)
@@ -57,20 +73,40 @@ def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, do
     ldc2_ref.run(time_steps)
 
     lbm_config = LBMConfig(relaxation_rate=relaxation_rate)
-    config = ps.CreateKernelConfig(data_type="float64" if double_precision else "float32",
-                                   default_number_float="float64" if double_precision else "float32",
-                                   cpu_vectorize_info=vectorization_options)
+    config = ps.CreateKernelConfig(
+        data_type="float64" if double_precision else "float32",
+        cpu_vectorize_info=vectorization_options,
+    )
+
+    if not IS_PYSTENCILS_2:
+        config = replace(
+            config,
+            default_number_float="float64" if double_precision else "float32",
+        )
+
     lbm_opt_split = LBMOptimisation(cse_global=True, split=True)
     lbm_opt = LBMOptimisation(cse_global=True, split=False)
 
-    print(f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, "
-          f"fixed loop sizes {fixed_loop_sizes}")
-    ldc1 = create_lid_driven_cavity(size1, fixed_loop_sizes=fixed_loop_sizes,
-                                    lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config)
+    print(
+        f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, "
+        f"fixed loop sizes {fixed_loop_sizes}"
+    )
+    ldc1 = create_lid_driven_cavity(
+        size1,
+        fixed_loop_sizes=fixed_loop_sizes,
+        lbm_config=lbm_config,
+        lbm_optimisation=lbm_opt,
+        config=config,
+    )
     ldc1.run(time_steps)
     np.testing.assert_almost_equal(ldc1_ref.velocity[:, :], ldc1.velocity[:, :])
 
-    ldc2 = create_lid_driven_cavity(size2, fixed_loop_sizes=fixed_loop_sizes,
-                                    lbm_config=lbm_config, lbm_optimisation=lbm_opt_split, config=config)
+    ldc2 = create_lid_driven_cavity(
+        size2,
+        fixed_loop_sizes=fixed_loop_sizes,
+        lbm_config=lbm_config,
+        lbm_optimisation=lbm_opt_split,
+        config=config,
+    )
     ldc2.run(time_steps)
     np.testing.assert_almost_equal(ldc2_ref.velocity[:, :], ldc2.velocity[:, :])
-- 
GitLab