diff --git a/src/lbmpy/_compat.py b/src/lbmpy/_compat.py index 52760cd2440ea470ebf1a6d4470749ef6b89c068..b41cf60cab9b25231b58dac55bf2771ae461185f 100644 --- a/src/lbmpy/_compat.py +++ b/src/lbmpy/_compat.py @@ -13,7 +13,13 @@ if PYSTENCILS_VERSION_MAJOR == 2: return DEFAULTS.spatial_counters[coord] def get_supported_instruction_sets(): - return [] + from pystencils import Target + vector_targets = Target.available_vector_cpu_targets() + isas = [] + for target in vector_targets: + tokens = target.name.split("_") + isas.append(tokens[-1].lower()) + return isas else: from pystencils.backends.simd_instruction_sets import ( diff --git a/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py b/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py index da0128180aae2b122cc998949fc9f448742407c0..22d1c02ed23bde4b4b578ef489a6d94448db4e9f 100644 --- a/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py +++ b/tests/full_scenarios/schaefer_turek/scenario_schaefer_turek.py @@ -10,6 +10,8 @@ import warnings import numpy as np import pytest +from pystencils import Target + from lbmpy._compat import get_supported_instruction_sets from lbmpy.boundaries.boundaryconditions import NoSlip from lbmpy.geometry import get_pipe_velocity_field diff --git a/tests/test_vectorization.py b/tests/test_vectorization.py index efda06fb860f49938141595947eb4d02db2dda3a..dcb01a65a7fbe9ecbe297df10cab3f3ccbe3ce09 100644 --- a/tests/test_vectorization.py +++ b/tests/test_vectorization.py @@ -1,8 +1,9 @@ import numpy as np import pytest +from dataclasses import replace import pystencils as ps -from lbmpy._compat import get_supported_instruction_sets +from lbmpy._compat import get_supported_instruction_sets, IS_PYSTENCILS_2 from lbmpy.scenarios import create_lid_driven_cavity from lbmpy.creationfunctions import LBMConfig, LBMOptimisation @@ -11,7 +12,7 @@ if vector_isas is None: vector_isas = [] -@pytest.mark.skipif(not vector_isas, reason='cannot detect CPU instruction set') +@pytest.mark.skipif(not vector_isas, reason="cannot detect CPU instruction set") def test_lbm_vectorization_short(): print("Computing reference solutions") size1 = (64, 32) @@ -21,30 +22,45 @@ def test_lbm_vectorization_short(): ldc1_ref.run(10) lbm_config = LBMConfig(relaxation_rate=relaxation_rate) - config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': get_supported_instruction_sets()[-1], - 'assume_aligned': True, - 'nontemporal': True, - 'assume_inner_stride_one': True, - 'assume_sufficient_line_padding': False, - }) - ldc1 = create_lid_driven_cavity(size1, lbm_config=lbm_config, config=config, - fixed_loop_sizes=False) + config = ps.CreateKernelConfig( + cpu_vectorize_info={ + "instruction_set": get_supported_instruction_sets()[-1], + "assume_aligned": True, + "nontemporal": True, + "assume_inner_stride_one": True, + "assume_sufficient_line_padding": False, + } + ) + ldc1 = create_lid_driven_cavity( + size1, lbm_config=lbm_config, config=config, fixed_loop_sizes=False + ) ldc1.run(10) -@pytest.mark.skipif(not vector_isas, reason='cannot detect CPU instruction set') -@pytest.mark.parametrize('instruction_set', vector_isas) -@pytest.mark.parametrize('aligned_and_padding', [[False, False], [True, False], [True, True]]) -@pytest.mark.parametrize('nontemporal', [False, True]) -@pytest.mark.parametrize('double_precision', [False, True]) -@pytest.mark.parametrize('fixed_loop_sizes', [False, True]) +@pytest.mark.skipif(not vector_isas, reason="cannot detect CPU instruction set") +@pytest.mark.xfail(reason="Loop splitting is not available yet") +@pytest.mark.parametrize("instruction_set", vector_isas) +@pytest.mark.parametrize( + "aligned_and_padding", [[False, False], [True, False], [True, True]] +) +@pytest.mark.parametrize("nontemporal", [False, True]) +@pytest.mark.parametrize("double_precision", [False, True]) +@pytest.mark.parametrize("fixed_loop_sizes", [False, True]) @pytest.mark.longrun -def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, double_precision, fixed_loop_sizes): - vectorization_options = {'instruction_set': instruction_set, - 'assume_aligned': aligned_and_padding[0], - 'nontemporal': nontemporal, - 'assume_inner_stride_one': True, - 'assume_sufficient_line_padding': aligned_and_padding[1]} +def test_lbm_vectorization( + instruction_set, + aligned_and_padding, + nontemporal, + double_precision, + fixed_loop_sizes, +): + vectorization_options = { + "instruction_set": instruction_set, + "assume_aligned": aligned_and_padding[0], + "nontemporal": nontemporal, + "assume_inner_stride_one": True, + "assume_sufficient_line_padding": aligned_and_padding[1], + } time_steps = 100 size1 = (64, 32) size2 = (666, 34) @@ -57,20 +73,40 @@ def test_lbm_vectorization(instruction_set, aligned_and_padding, nontemporal, do ldc2_ref.run(time_steps) lbm_config = LBMConfig(relaxation_rate=relaxation_rate) - config = ps.CreateKernelConfig(data_type="float64" if double_precision else "float32", - default_number_float="float64" if double_precision else "float32", - cpu_vectorize_info=vectorization_options) + config = ps.CreateKernelConfig( + data_type="float64" if double_precision else "float32", + cpu_vectorize_info=vectorization_options, + ) + + if not IS_PYSTENCILS_2: + config = replace( + config, + default_number_float="float64" if double_precision else "float32", + ) + lbm_opt_split = LBMOptimisation(cse_global=True, split=True) lbm_opt = LBMOptimisation(cse_global=True, split=False) - print(f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, " - f"fixed loop sizes {fixed_loop_sizes}") - ldc1 = create_lid_driven_cavity(size1, fixed_loop_sizes=fixed_loop_sizes, - lbm_config=lbm_config, lbm_optimisation=lbm_opt, config=config) + print( + f"Vectorization test, double precision {double_precision}, vectorization {vectorization_options}, " + f"fixed loop sizes {fixed_loop_sizes}" + ) + ldc1 = create_lid_driven_cavity( + size1, + fixed_loop_sizes=fixed_loop_sizes, + lbm_config=lbm_config, + lbm_optimisation=lbm_opt, + config=config, + ) ldc1.run(time_steps) np.testing.assert_almost_equal(ldc1_ref.velocity[:, :], ldc1.velocity[:, :]) - ldc2 = create_lid_driven_cavity(size2, fixed_loop_sizes=fixed_loop_sizes, - lbm_config=lbm_config, lbm_optimisation=lbm_opt_split, config=config) + ldc2 = create_lid_driven_cavity( + size2, + fixed_loop_sizes=fixed_loop_sizes, + lbm_config=lbm_config, + lbm_optimisation=lbm_opt_split, + config=config, + ) ldc2.run(time_steps) np.testing.assert_almost_equal(ldc2_ref.velocity[:, :], ldc2.velocity[:, :])