diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 9500fba899a052537ce979c60d655b22ac270609..b32f33d859188b4032e88782f986f97ddc406018 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -659,9 +659,9 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter): if isinstance(expr, vector_memory_access): arg, data_type, aligned, _, mask, stride = expr.args if stride != 1: - return self.instruction_set['loadS'].format("& " + self._print(arg), stride, **self._kwargs) + return self.instruction_set['loadS'].format(f"& {self._print(arg)}", stride, **self._kwargs) instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU'] - return instruction.format("& " + self._print(arg), **self._kwargs) + return instruction.format(f"& {self._print(arg)}", **self._kwargs) elif isinstance(expr, cast_func): arg, data_type = expr.args if type(data_type) is VectorType: diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py index 15eac11db141119d9247e82f94c7f4c1fadda3e9..7144a58bdae9c69cae195039ef0a05a7365053af 100644 --- a/pystencils/cpu/cpujit.py +++ b/pystencils/cpu/cpujit.py @@ -373,8 +373,7 @@ def equal_size_check(fields): return "" ref_field = fields[0] - cond = ["(buffer_{field.name}.shape[{i}] == buffer_{ref_field.name}.shape[{i}])".format(ref_field=ref_field, - field=field_to_test, i=i) + cond = [f"(buffer_{field_to_test.name}.shape[{i}] == buffer_{ref_field.name}.shape[{i}])" for field_to_test in fields[1:] for i in range(fields[0].spatial_dimensions)] cond = " && ".join(cond) @@ -431,8 +430,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec if (np_dtype.isbuiltin and FieldType.is_generic(field) and not np.issubdtype(field.dtype.numpy_dtype, np.complexfloating)): - dtype_cond = "buffer_{name}.format[0] == '{format}'".format(name=field.name, - format=field.dtype.numpy_dtype.char) + dtype_cond = f"buffer_{field.name}.format[0] == '{field.dtype.numpy_dtype.char}'" pre_call_code += template_check_array.format(cond=dtype_cond, what="data type", name=field.name, expected=str(field.dtype.numpy_dtype)) @@ -461,8 +459,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec elif param.is_field_stride: field = param.fields[0] item_size = field.dtype.numpy_dtype.itemsize - parameters.append("buffer_{name}.strides[{i}] / {bytes}".format(bytes=item_size, i=param.symbol.coordinate, - name=field.name)) + parameters.append(f"buffer_{field.name}.strides[{param.symbol.coordinate}] / {item_size}") elif param.is_field_shape: parameters.append(f"buffer_{param.field_name}.shape[{param.symbol.coordinate}]") elif type(param.symbol) is CFunction: @@ -507,8 +504,8 @@ def load_kernel_from_file(module_name, function_name, path): except ImportError: import time import warnings - warnings.warn("Could not load " + path + ", trying on more time...") - time.sleep(1) + warnings.warn(f"Could not load {path}, trying on more time in 5 seconds ...") + time.sleep(5) spec = spec_from_file_location(name=module_name, location=path) mod = module_from_spec(spec) spec.loader.exec_module(mod) diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index f9b35b14fbb5b1c178910ad0abc97415076a232e..c0511aa16c553d24944de5d59addab0dd5877015 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -121,8 +121,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', "to differently typed floating point fields") float_size = field_float_dtypes.pop().numpy_dtype.itemsize assert float_size in (8, 4) - vector_is = get_vector_instruction_set('double' if float_size == 8 else 'float', - instruction_set=instruction_set) + default_float_type = 'double' if float_size == 8 else 'float' + vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set) vector_width = vector_is['width'] kernel_ast.instruction_set = vector_is @@ -130,7 +130,7 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU'] vectorize_inner_loops_and_adapt_load_stores(kernel_ast, vector_width, assume_aligned, nontemporal, strided, keep_loop_stop, assume_sufficient_line_padding) - insert_vector_casts(kernel_ast) + insert_vector_casts(kernel_ast, default_float_type) def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_aligned, nontemporal_fields, @@ -242,25 +242,24 @@ def mask_conditionals(loop_body): visit_node(loop_body, mask=True) -def insert_vector_casts(ast_node): +def insert_vector_casts(ast_node, default_float_type='double'): """Inserts necessary casts from scalar values to vector values.""" handled_functions = (sp.Add, sp.Mul, fast_division, fast_sqrt, fast_inv_sqrt, vec_any, vec_all) - def visit_expr(expr): + def visit_expr(expr, default_type='double'): if isinstance(expr, vector_memory_access): - return vector_memory_access(*expr.args[0:4], visit_expr(expr.args[4]), *expr.args[5:]) + return vector_memory_access(*expr.args[0:4], visit_expr(expr.args[4], default_type), *expr.args[5:]) elif isinstance(expr, cast_func): return expr elif expr.func is sp.Abs and 'abs' not in ast_node.instruction_set: - new_arg = visit_expr(expr.args[0]) + new_arg = visit_expr(expr.args[0], default_type) base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is vector_memory_access \ else get_type_of_expression(expr.args[0]) pw = sp.Piecewise((-new_arg, new_arg < base_type.numpy_dtype.type(0)), (new_arg, True)) - return visit_expr(pw) + return visit_expr(pw, default_type) elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction): - default_type = 'double' if expr.func is sp.Mul and expr.args[0] == -1: # special treatment for the unary minus: make sure that the -1 has the same type as the argument dtype = int @@ -274,7 +273,7 @@ def insert_vector_casts(ast_node): if dtype is np.float32: default_type = 'float' expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:]) - new_args = [visit_expr(a) for a in expr.args] + new_args = [visit_expr(a, default_type) for a in expr.args] arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args] if not any(type(t) is VectorType for t in arg_types): return expr @@ -285,11 +284,11 @@ def insert_vector_casts(ast_node): for a, t in zip(new_args, arg_types)] return expr.func(*casted_args) elif expr.func is sp.Pow: - new_arg = visit_expr(expr.args[0]) + new_arg = visit_expr(expr.args[0], default_type) return expr.func(new_arg, expr.args[1]) elif expr.func == sp.Piecewise: - new_results = [visit_expr(a[0]) for a in expr.args] - new_conditions = [visit_expr(a[1]) for a in expr.args] + new_results = [visit_expr(a[0], default_type) for a in expr.args] + new_conditions = [visit_expr(a[1], default_type) for a in expr.args] types_of_results = [get_type_of_expression(a) for a in new_results] types_of_conditions = [get_type_of_expression(a) for a in new_conditions] @@ -311,14 +310,14 @@ def insert_vector_casts(ast_node): else: return expr - def visit_node(node, substitution_dict): + def visit_node(node, substitution_dict, default_type='double'): substitution_dict = substitution_dict.copy() for arg in node.args: if isinstance(arg, ast.SympyAssignment): assignment = arg subs_expr = fast_subs(assignment.rhs, substitution_dict, skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) - assignment.rhs = visit_expr(subs_expr) + assignment.rhs = visit_expr(subs_expr, default_type) rhs_type = get_type_of_expression(assignment.rhs) if isinstance(assignment.lhs, TypedSymbol): lhs_type = assignment.lhs.dtype @@ -328,13 +327,13 @@ def insert_vector_casts(ast_node): substitution_dict[assignment.lhs] = new_lhs assignment.lhs = new_lhs elif isinstance(assignment.lhs, vector_memory_access): - assignment.lhs = visit_expr(assignment.lhs) + assignment.lhs = visit_expr(assignment.lhs, default_type) elif isinstance(arg, ast.Conditional): arg.condition_expr = fast_subs(arg.condition_expr, substitution_dict, skip=lambda e: isinstance(e, ast.ResolvedFieldAccess)) - arg.condition_expr = visit_expr(arg.condition_expr) - visit_node(arg, substitution_dict) + arg.condition_expr = visit_expr(arg.condition_expr, default_type) + visit_node(arg, substitution_dict, default_type) else: - visit_node(arg, substitution_dict) + visit_node(arg, substitution_dict, default_type) - visit_node(ast_node, {}) + visit_node(ast_node, {}, default_float_type) diff --git a/pystencils/data_types.py b/pystencils/data_types.py index 295afe0897b2cbb1b2f7dfa6c4ce5ea03df9f7d9..846a46b702ad12538eb0debb109eda35b651b021 100644 --- a/pystencils/data_types.py +++ b/pystencils/data_types.py @@ -634,10 +634,10 @@ class BasicType(Type): return 'ComplexDouble' elif name.startswith('int'): width = int(name[len("int"):]) - return "int%d_t" % (width,) + return f"int{width}_t" elif name.startswith('uint'): width = int(name[len("uint"):]) - return "uint%d_t" % (width,) + return f"uint{width}_t" elif name == 'bool': return 'bool' else: @@ -736,7 +736,7 @@ class VectorType(Type): def __str__(self): if self.instruction_set is None: - return "%s[%s]" % (self.base_type, self.width) + return f"{self.base_type}[{self.width}]" else: if self.base_type == create_type("int64") or self.base_type == create_type("int32"): return self.instruction_set['int'] diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index 1bbc6a4a43b1cd7b8861243689c2d4d1f73966df..2fb216aa58df24f4c91ee0611a69e95aeb2c5f62 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -295,3 +295,4 @@ def test_issue40(*_): code = ps.get_code_str(ast) assert 'epi32' not in code + diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py index 6c2f144a4273393efa5bca3b84c1aae0a840889e..b13d8bc28f4a4daf621aa23b75578a87175a826d 100644 --- a/pystencils_tests/test_vectorization_specific.py +++ b/pystencils_tests/test_vectorization_specific.py @@ -6,7 +6,7 @@ import sympy as sp import pystencils as ps from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets, get_vector_instruction_set) -from pystencils.data_types import cast_func, VectorType +from . import test_vectorization from pystencils.enums import Target supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else [] @@ -116,15 +116,33 @@ def test_cacheline_size(instruction_set): pytest.skip() instruction_set = get_vector_instruction_set('double', instruction_set) vector_size = instruction_set['bytes'] - assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible" + assert 8 < cacheline_size < 0x100000, "Cache line size is implausible" if type(vector_size) is int: assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size" assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2" # test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here -from pystencils_tests import test_vectorization -@pytest.mark.parametrize('instruction_set', sorted(set(supported_instruction_sets) - set([test_vectorization.instruction_set]))) -@pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query']) +@pytest.mark.parametrize('instruction_set', + sorted(set(supported_instruction_sets) - {test_vectorization.instruction_set})) +@pytest.mark.parametrize('function', + [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query']) def test_vectorization_other(instruction_set, function): test_vectorization.__dict__[function](instruction_set) + + +@pytest.mark.parametrize('dtype', ('float', 'double')) +@pytest.mark.parametrize('instruction_set', supported_instruction_sets) +@pytest.mark.parametrize('field_layout', ('fzyx', 'zyxf')) +def test_square_root(dtype, instruction_set, field_layout): + config = ps.CreateKernelConfig(data_type=dtype, + cpu_vectorize_info={'instruction_set': instruction_set, + 'assume_inner_stride_one': True, + 'assume_aligned': False, 'nontemporal': False}) + + src_field = ps.Field.create_generic('pdfs', 2, dtype, index_dimensions=1, layout=field_layout, index_shape=(9,)) + + eq = [ps.Assignment(sp.Symbol("xi"), sum(src_field.center_vector)), + ps.Assignment(sp.Symbol("xi_2"), sp.Symbol("xi") * sp.sqrt(src_field.center))] + + ps.create_kernel(eq, config=config).compile()