From 39c214af8042060797d7c56d338442729343c776 Mon Sep 17 00:00:00 2001 From: Michael Kuron <mkuron@icp.uni-stuttgart.de> Date: Thu, 18 Feb 2021 16:20:43 +0100 Subject: [PATCH] vectorization: improve treatment of unary minus --- pystencils/backends/cbackend.py | 2 ++ pystencils/cpu/vectorization.py | 24 +++++++++++++++---- pystencils/data_types.py | 3 +++ pystencils/include/myintrin.h | 4 ++++ pystencils_tests/test_random.py | 2 +- pystencils_tests/test_vectorization.py | 16 ------------- .../test_vectorization_specific.py | 21 ++++++++++++++++ 7 files changed, 50 insertions(+), 22 deletions(-) diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index e489f3cc2..8c835fd1d 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -546,6 +546,8 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter): elif isinstance(expr, cast_func): arg, data_type = expr.args if type(data_type) is VectorType: + # vector_memory_access is a cast_func itself so it should't be directly inside a cast_func + assert not isinstance(arg, vector_memory_access) if isinstance(arg, sp.Tuple): is_boolean = get_type_of_expression(arg[0]) == create_type("bool") is_integer = get_type_of_expression(arg[0]) == create_type("int") diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index 6ecc87284..2f49d44a2 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -1,6 +1,7 @@ import warnings from typing import Container, Union +import numpy as np import sympy as sp from sympy.logic.boolalg import BooleanFunction @@ -160,7 +161,6 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_a def mask_conditionals(loop_body): - def visit_node(node, mask): if isinstance(node, ast.Conditional): cond = node.condition_expr @@ -201,18 +201,32 @@ def insert_vector_casts(ast_node): new_arg = visit_expr(expr.args[0]) base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is vector_memory_access \ else get_type_of_expression(expr.args[0]) - pw = sp.Piecewise((base_type.numpy_dtype.type(-1) * new_arg, new_arg < base_type.numpy_dtype.type(0)), + pw = sp.Piecewise((-new_arg, new_arg < base_type.numpy_dtype.type(0)), (new_arg, True)) return visit_expr(pw) elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction): + default_type = 'double' + if expr.func is sp.Mul and expr.args[0] == -1: + # special treatment for the unary minus: make sure that the -1 has the same type as the argument + dtype = int + for arg in expr.args[1:]: + if type(arg) is vector_memory_access and arg.dtype.base_type.is_float(): + dtype = arg.dtype.base_type.numpy_dtype.type + elif type(arg) is TypedSymbol and type(arg.dtype) is VectorType and arg.dtype.base_type.is_float(): + dtype = arg.dtype.base_type.numpy_dtype.type + if dtype is not int: + if dtype is np.float32: + default_type = 'float' + expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:]) new_args = [visit_expr(a) for a in expr.args] - arg_types = [get_type_of_expression(a) for a in new_args] + arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args] if not any(type(t) is VectorType for t in arg_types): return expr else: target_type = collate_types(arg_types) - casted_args = [cast_func(a, target_type) if t != target_type else a - for a, t in zip(new_args, arg_types)] + casted_args = [ + cast_func(a, target_type) if t != target_type and not isinstance(a, vector_memory_access) else a + for a, t in zip(new_args, arg_types)] return expr.func(*casted_args) elif expr.func is sp.Pow: new_arg = visit_expr(expr.args[0]) diff --git a/pystencils/data_types.py b/pystencils/data_types.py index 7300f01ea..28fc4b32c 100644 --- a/pystencils/data_types.py +++ b/pystencils/data_types.py @@ -502,6 +502,9 @@ def get_type_of_expression(expr, from pystencils.astnodes import ResolvedFieldAccess from pystencils.cpu.vectorization import vec_all, vec_any + if default_float_type == 'float': + default_float_type = 'float32' + if not symbol_type_dict: symbol_type_dict = defaultdict(lambda: create_type('double')) diff --git a/pystencils/include/myintrin.h b/pystencils/include/myintrin.h index a94c316c4..6c1d9d4d0 100644 --- a/pystencils/include/myintrin.h +++ b/pystencils/include/myintrin.h @@ -36,6 +36,8 @@ QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x) { #ifdef __AVX512VL__ return _mm_cvtepu64_pd(x); +#elif defined(__clang__) + return __builtin_convertvector((uint64_t __attribute__((__vector_size__(16)))) x, __m128d); #else __m128i xH = _mm_srli_epi64(x, 32); xH = _mm_or_si128(xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.))); // 2^84 @@ -85,6 +87,8 @@ QUALIFIERS __m256d _my256_cvtepu64_pd(const __m256i x) { #ifdef __AVX512VL__ return _mm256_cvtepu64_pd(x); +#elif defined(__clang__) + return __builtin_convertvector((uint64_t __attribute__((__vector_size__(32)))) x, __m256d); #else __m256i xH = _mm256_srli_epi64(x, 32); xH = _mm256_or_si256(xH, _mm256_castpd_si256(_mm256_set1_pd(19342813113834066795298816.))); // 2^84 diff --git a/pystencils_tests/test_random.py b/pystencils_tests/test_random.py index 7feb9ced5..856bfd2c5 100644 --- a/pystencils_tests/test_random.py +++ b/pystencils_tests/test_random.py @@ -73,7 +73,7 @@ def test_rng(target, rng, precision, dtype, t=124, offsets=(0, 0), keys=(0, 0), for x in range(dh.shape[0]): for y in range(dh.shape[1]): r = Philox(counter=t + (x + offset_values[0]) * 2 ** 32 + (y + offset_values[1]) * 2 ** 64, - key=keys[0] + keys[1] * 2 ** 32, number=4, width=32) + key=keys[0] + keys[1] * 2 ** 32, number=4, width=32, mode="sequence") r.advance(-4, counter=False) int_reference[x, y, :] = r.random_raw(size=4) diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index 640b8919a..7b1891a4d 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -33,22 +33,6 @@ def test_vector_type_propagation(): np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3) -def test_vectorized_abs(): - arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2)) - arr[-3:, :] = -1 - - f, g = ps.fields(f=arr, g=arr) - update_rule = [ps.Assignment(g.center(), sp.Abs(f.center()))] - - ast = ps.create_kernel(update_rule) - vectorize(ast, instruction_set=instruction_set) - - func = ast.compile() - dst = np.zeros_like(arr) - func(g=dst, f=arr) - np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3) - - def test_aligned_and_nt_stores(): domain_size = (24, 24) # create a datahandling object diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py index d5727d40f..9d9d4ac50 100644 --- a/pystencils_tests/test_vectorization_specific.py +++ b/pystencils_tests/test_vectorization_specific.py @@ -1,6 +1,7 @@ import pytest import numpy as np +import sympy as sp import pystencils as ps from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets @@ -28,3 +29,23 @@ def test_vectorisation_varying_arch(instruction_set): kernel = ast.compile() kernel(f=arr) np.testing.assert_equal(arr, 2) + + +@pytest.mark.parametrize('dtype', ('float', 'double')) +@pytest.mark.parametrize('instruction_set', supported_instruction_sets) +def test_vectorized_abs(instruction_set, dtype): + """Some instructions sets have abs, some don't. + Furthermore, the special treatment of unary minus makes this data type-sensitive too. + """ + arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2), dtype=np.float64 if dtype == 'double' else np.float32) + arr[-3:, :] = -1 + + f, g = ps.fields(f=arr, g=arr) + update_rule = [ps.Assignment(g.center(), sp.Abs(f.center()))] + + ast = ps.create_kernel(update_rule, cpu_vectorize_info={'instruction_set': instruction_set}) + + func = ast.compile() + dst = np.zeros_like(arr) + func(g=dst, f=arr) + np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3) -- GitLab