From 686a3ad8300540ccd41b219ea9fa14445406402b Mon Sep 17 00:00:00 2001 From: Michael Kuron <mkuron@icp.uni-stuttgart.de> Date: Tue, 20 Apr 2021 20:06:30 +0200 Subject: [PATCH] Vectorization tests: run with all available instruction sets, add test for maskStore --- pystencils/backends/cbackend.py | 12 +++++++-- pystencils/backends/x86_instruction_sets.py | 18 ++----------- pystencils_tests/test_conditional_vec.py | 23 ++++++++++++++++ pystencils_tests/test_vectorization.py | 26 +++++++++---------- .../test_vectorization_specific.py | 14 +++++++--- 5 files changed, 58 insertions(+), 35 deletions(-) diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 5aabd83d6..988a8e518 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -263,8 +263,16 @@ class CBackend: if mask != True: # NOQA instr = 'maskStore' if aligned else 'maskStoreU' printed_mask = self.sympy_printer.doprint(mask) - if self._vector_instruction_set['dataTypePrefix']['double'] == '__mm256d': - printed_mask = f"_mm256_castpd_si256({printed_mask})" + if data_type.base_type.base_name == 'double': + if self._vector_instruction_set['double'] == '__m256d': + printed_mask = f"_mm256_castpd_si256({printed_mask})" + elif self._vector_instruction_set['double'] == '__m128d': + printed_mask = f"_mm_castpd_si128({printed_mask})" + elif data_type.base_type.base_name == 'float': + if self._vector_instruction_set['float'] == '__m256': + printed_mask = f"_mm256_castps_si256({printed_mask})" + elif self._vector_instruction_set['float'] == '__m128': + printed_mask = f"_mm_castps_si128({printed_mask})" rhs_type = get_type_of_expression(node.rhs) if type(rhs_type) is not VectorType: diff --git a/pystencils/backends/x86_instruction_sets.py b/pystencils/backends/x86_instruction_sets.py index 78515e1a2..50005c5ae 100644 --- a/pystencils/backends/x86_instruction_sets.py +++ b/pystencils/backends/x86_instruction_sets.py @@ -57,23 +57,9 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'): 'storeU': 'storeu[0,1]', 'storeA': 'store[0,1]', 'stream': 'stream[0,1]', - 'maskstore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', - 'maskload': 'mask_load[0, 2, 1]' if instruction_set == 'avx512' else 'maskload[0, 2, 1]' + 'maskStore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', + 'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', } - if instruction_set == 'avx512': - base_names.update({ - 'maskStore': 'mask_store[0, 2, 1]', - 'maskStoreU': 'mask_storeu[0, 2, 1]', - 'maskLoad': 'mask_load[2, 1, 0]', - 'maskLoadU': 'mask_loadu[2, 1, 0]' - }) - if instruction_set == 'avx': - base_names.update({ - 'maskStore': 'maskstore[0, 2, 1]', - 'maskStoreU': 'maskstore[0, 2, 1]', - 'maskLoad': 'maskload[0, 1]', - 'maskLoadU': 'maskloadu[0, 1]' - }) for comparison_op, constant in comparisons.items(): base_names[comparison_op] = f'cmp[0, 1, {constant}]' diff --git a/pystencils_tests/test_conditional_vec.py b/pystencils_tests/test_conditional_vec.py index 59f6367cd..1274aa674 100644 --- a/pystencils_tests/test_conditional_vec.py +++ b/pystencils_tests/test_conditional_vec.py @@ -75,3 +75,26 @@ def test_boolean_before_loop(): np.testing.assert_array_equal(g_arr, 1.0) kernel(f=f_arr, g=g_arr, t2=-1.0) np.testing.assert_array_equal(g_arr, 42.0) + + +@pytest.mark.parametrize('instruction_set', supported_instruction_sets) +@pytest.mark.parametrize('dtype', ('float', 'double')) +def test_vec_maskstore(instruction_set, dtype): + if instruction_set in ['neon', 'vsx']: + pytest.skip('no mask-store instructions available') + data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32) + data_arr[4:-4, 4:-4] = 1.0 + data = ps.fields(f"data: {dtype}[2D]", data=data_arr) + + c = [ + Conditional(data.center() < 1.0, Block([ + ps.Assignment(data.center(), 2.0) + ])) + ] + ast = ps.create_kernel(c, target='cpu', + cpu_vectorize_info={'instruction_set': instruction_set}) + ps.show_code(ast) + kernel = ast.compile() + kernel(data=data_arr) + np.testing.assert_equal(data_arr[0:4, :], 2.0) + np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0) diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index f668c6b81..b7ee2e83b 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -14,7 +14,7 @@ else: instruction_set = None -def test_vector_type_propagation(): +def test_vector_type_propagation(instruction_set=instruction_set): a, b, c, d, e = sp.symbols("a b c d e") arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2)) arr *= 10.0 @@ -33,7 +33,7 @@ def test_vector_type_propagation(): np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3) -def test_aligned_and_nt_stores(openmp=False): +def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False): domain_size = (24, 24) # create a datahandling object dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target='cpu') @@ -63,11 +63,11 @@ def test_aligned_and_nt_stores(openmp=False): dh.run_kernel(kernel) np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size)) -def test_aligned_and_nt_stores_openmp(): - test_aligned_and_nt_stores(True) +def test_aligned_and_nt_stores_openmp(instruction_set=instruction_set): + test_aligned_and_nt_stores(instruction_set, True) -def test_inplace_update(): +def test_inplace_update(instruction_set=instruction_set): shape = (9, 9, 3) arr = np.ones(shape, order='f') @@ -88,7 +88,7 @@ def test_inplace_update(): np.testing.assert_equal(arr, 2) -def test_vectorization_fixed_size(): +def test_vectorization_fixed_size(instruction_set=instruction_set): configurations = [] # Fixed size - multiple of four arr = np.ones((20 + 2, 24 + 2)) * 5.0 @@ -115,7 +115,7 @@ def test_vectorization_fixed_size(): np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0) -def test_vectorization_variable_size(): +def test_vectorization_variable_size(instruction_set=instruction_set): f, g = ps.fields("f, g : double[2D]") update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)] ast = ps.create_kernel(update_rule) @@ -131,7 +131,7 @@ def test_vectorization_variable_size(): np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0) -def test_piecewise1(): +def test_piecewise1(instruction_set=instruction_set): a, b, c, d, e = sp.symbols("a b c d e") arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0 @@ -149,7 +149,7 @@ def test_piecewise1(): np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0) -def test_piecewise2(): +def test_piecewise2(instruction_set=instruction_set): arr = np.zeros((20, 20)) @ps.kernel @@ -167,7 +167,7 @@ def test_piecewise2(): np.testing.assert_equal(arr, np.ones_like(arr)) -def test_piecewise3(): +def test_piecewise3(instruction_set=instruction_set): arr = np.zeros((22, 22)) @ps.kernel @@ -181,7 +181,7 @@ def test_piecewise3(): ast.compile() -def test_logical_operators(): +def test_logical_operators(instruction_set=instruction_set): arr = np.zeros((22, 22)) @ps.kernel @@ -220,7 +220,7 @@ def test_hardware_query(): any([iset.startswith('sve') for iset in supported_instruction_sets]) -def test_vectorised_pow(): +def test_vectorised_pow(instruction_set=instruction_set): arr = np.zeros((24, 24)) f, g = ps.fields(f=arr, g=arr) @@ -256,7 +256,7 @@ def test_vectorised_pow(): ast.compile() -def test_vectorised_fast_approximations(): +def test_vectorised_fast_approximations(instruction_set=instruction_set): arr = np.zeros((24, 24)) f, g = ps.fields(f=arr, g=arr) diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py index fca50949e..df0b9d943 100644 --- a/pystencils_tests/test_vectorization_specific.py +++ b/pystencils_tests/test_vectorization_specific.py @@ -57,15 +57,13 @@ def test_vectorized_abs(instruction_set, dtype): @pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('gl_field, gl_kernel', [(1, 0), (0, 1), (1, 1)]) def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype): - itemsize = 8 if dtype == 'double' else 4 - alignment = get_vector_instruction_set(dtype, instruction_set)['width'] * itemsize dtype = np.float64 if dtype == 'double' else np.float32 domain_size = (128, 128) dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target='cpu') - src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment) + src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True) dh.fill(src.name, 1.0, ghost_layers=True) - dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment) + dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True) dh.fill(dst.name, 1.0, ghost_layers=True) update_rule = ps.Assignment(dst[0, 0], src[0, 0]) @@ -90,3 +88,11 @@ def test_cacheline_size(instruction_set): assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible" assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size" assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2" + + +# test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here +from pystencils_tests import test_vectorization +@pytest.mark.parametrize('instruction_set', set(supported_instruction_sets) - set([test_vectorization.instruction_set])) +@pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query']) +def test_vectorization_other(instruction_set, function): + test_vectorization.__dict__[function](instruction_set) -- GitLab