Commit ec4e1c28 authored by Helen Schottenhamml's avatar Helen Schottenhamml
Browse files

Merge branch 'VectorisationBug' into 'master'

Vectorisation bug

Closes #41

See merge request pycodegen/pystencils!266
parents c8ea55af 8e9bded0
......@@ -659,9 +659,9 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
if isinstance(expr, vector_memory_access):
arg, data_type, aligned, _, mask, stride = expr.args
if stride != 1:
return self.instruction_set['loadS'].format("& " + self._print(arg), stride, **self._kwargs)
return self.instruction_set['loadS'].format(f"& {self._print(arg)}", stride, **self._kwargs)
instruction = self.instruction_set['loadA'] if aligned else self.instruction_set['loadU']
return instruction.format("& " + self._print(arg), **self._kwargs)
return instruction.format(f"& {self._print(arg)}", **self._kwargs)
elif isinstance(expr, cast_func):
arg, data_type = expr.args
if type(data_type) is VectorType:
......
......@@ -373,8 +373,7 @@ def equal_size_check(fields):
return ""
ref_field = fields[0]
cond = ["(buffer_{field.name}.shape[{i}] == buffer_{ref_field.name}.shape[{i}])".format(ref_field=ref_field,
field=field_to_test, i=i)
cond = [f"(buffer_{field_to_test.name}.shape[{i}] == buffer_{ref_field.name}.shape[{i}])"
for field_to_test in fields[1:]
for i in range(fields[0].spatial_dimensions)]
cond = " && ".join(cond)
......@@ -431,8 +430,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
if (np_dtype.isbuiltin and FieldType.is_generic(field)
and not np.issubdtype(field.dtype.numpy_dtype, np.complexfloating)):
dtype_cond = "buffer_{name}.format[0] == '{format}'".format(name=field.name,
format=field.dtype.numpy_dtype.char)
dtype_cond = f"buffer_{field.name}.format[0] == '{field.dtype.numpy_dtype.char}'"
pre_call_code += template_check_array.format(cond=dtype_cond, what="data type", name=field.name,
expected=str(field.dtype.numpy_dtype))
......@@ -461,8 +459,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
elif param.is_field_stride:
field = param.fields[0]
item_size = field.dtype.numpy_dtype.itemsize
parameters.append("buffer_{name}.strides[{i}] / {bytes}".format(bytes=item_size, i=param.symbol.coordinate,
name=field.name))
parameters.append(f"buffer_{field.name}.strides[{param.symbol.coordinate}] / {item_size}")
elif param.is_field_shape:
parameters.append(f"buffer_{param.field_name}.shape[{param.symbol.coordinate}]")
elif type(param.symbol) is CFunction:
......@@ -507,8 +504,8 @@ def load_kernel_from_file(module_name, function_name, path):
except ImportError:
import time
import warnings
warnings.warn("Could not load " + path + ", trying on more time...")
time.sleep(1)
warnings.warn(f"Could not load {path}, trying on more time in 5 seconds ...")
time.sleep(5)
spec = spec_from_file_location(name=module_name, location=path)
mod = module_from_spec(spec)
spec.loader.exec_module(mod)
......
......@@ -121,8 +121,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
"to differently typed floating point fields")
float_size = field_float_dtypes.pop().numpy_dtype.itemsize
assert float_size in (8, 4)
vector_is = get_vector_instruction_set('double' if float_size == 8 else 'float',
instruction_set=instruction_set)
default_float_type = 'double' if float_size == 8 else 'float'
vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set)
vector_width = vector_is['width']
kernel_ast.instruction_set = vector_is
......@@ -130,7 +130,7 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU']
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, vector_width, assume_aligned, nontemporal,
strided, keep_loop_stop, assume_sufficient_line_padding)
insert_vector_casts(kernel_ast)
insert_vector_casts(kernel_ast, default_float_type)
def vectorize_inner_loops_and_adapt_load_stores(ast_node, vector_width, assume_aligned, nontemporal_fields,
......@@ -242,25 +242,24 @@ def mask_conditionals(loop_body):
visit_node(loop_body, mask=True)
def insert_vector_casts(ast_node):
def insert_vector_casts(ast_node, default_float_type='double'):
"""Inserts necessary casts from scalar values to vector values."""
handled_functions = (sp.Add, sp.Mul, fast_division, fast_sqrt, fast_inv_sqrt, vec_any, vec_all)
def visit_expr(expr):
def visit_expr(expr, default_type='double'):
if isinstance(expr, vector_memory_access):
return vector_memory_access(*expr.args[0:4], visit_expr(expr.args[4]), *expr.args[5:])
return vector_memory_access(*expr.args[0:4], visit_expr(expr.args[4], default_type), *expr.args[5:])
elif isinstance(expr, cast_func):
return expr
elif expr.func is sp.Abs and 'abs' not in ast_node.instruction_set:
new_arg = visit_expr(expr.args[0])
new_arg = visit_expr(expr.args[0], default_type)
base_type = get_type_of_expression(expr.args[0]).base_type if type(expr.args[0]) is vector_memory_access \
else get_type_of_expression(expr.args[0])
pw = sp.Piecewise((-new_arg, new_arg < base_type.numpy_dtype.type(0)),
(new_arg, True))
return visit_expr(pw)
return visit_expr(pw, default_type)
elif expr.func in handled_functions or isinstance(expr, sp.Rel) or isinstance(expr, BooleanFunction):
default_type = 'double'
if expr.func is sp.Mul and expr.args[0] == -1:
# special treatment for the unary minus: make sure that the -1 has the same type as the argument
dtype = int
......@@ -274,7 +273,7 @@ def insert_vector_casts(ast_node):
if dtype is np.float32:
default_type = 'float'
expr = sp.Mul(dtype(expr.args[0]), *expr.args[1:])
new_args = [visit_expr(a) for a in expr.args]
new_args = [visit_expr(a, default_type) for a in expr.args]
arg_types = [get_type_of_expression(a, default_float_type=default_type) for a in new_args]
if not any(type(t) is VectorType for t in arg_types):
return expr
......@@ -285,11 +284,11 @@ def insert_vector_casts(ast_node):
for a, t in zip(new_args, arg_types)]
return expr.func(*casted_args)
elif expr.func is sp.Pow:
new_arg = visit_expr(expr.args[0])
new_arg = visit_expr(expr.args[0], default_type)
return expr.func(new_arg, expr.args[1])
elif expr.func == sp.Piecewise:
new_results = [visit_expr(a[0]) for a in expr.args]
new_conditions = [visit_expr(a[1]) for a in expr.args]
new_results = [visit_expr(a[0], default_type) for a in expr.args]
new_conditions = [visit_expr(a[1], default_type) for a in expr.args]
types_of_results = [get_type_of_expression(a) for a in new_results]
types_of_conditions = [get_type_of_expression(a) for a in new_conditions]
......@@ -311,14 +310,14 @@ def insert_vector_casts(ast_node):
else:
return expr
def visit_node(node, substitution_dict):
def visit_node(node, substitution_dict, default_type='double'):
substitution_dict = substitution_dict.copy()
for arg in node.args:
if isinstance(arg, ast.SympyAssignment):
assignment = arg
subs_expr = fast_subs(assignment.rhs, substitution_dict,
skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
assignment.rhs = visit_expr(subs_expr)
assignment.rhs = visit_expr(subs_expr, default_type)
rhs_type = get_type_of_expression(assignment.rhs)
if isinstance(assignment.lhs, TypedSymbol):
lhs_type = assignment.lhs.dtype
......@@ -328,13 +327,13 @@ def insert_vector_casts(ast_node):
substitution_dict[assignment.lhs] = new_lhs
assignment.lhs = new_lhs
elif isinstance(assignment.lhs, vector_memory_access):
assignment.lhs = visit_expr(assignment.lhs)
assignment.lhs = visit_expr(assignment.lhs, default_type)
elif isinstance(arg, ast.Conditional):
arg.condition_expr = fast_subs(arg.condition_expr, substitution_dict,
skip=lambda e: isinstance(e, ast.ResolvedFieldAccess))
arg.condition_expr = visit_expr(arg.condition_expr)
visit_node(arg, substitution_dict)
arg.condition_expr = visit_expr(arg.condition_expr, default_type)
visit_node(arg, substitution_dict, default_type)
else:
visit_node(arg, substitution_dict)
visit_node(arg, substitution_dict, default_type)
visit_node(ast_node, {})
visit_node(ast_node, {}, default_float_type)
......@@ -634,10 +634,10 @@ class BasicType(Type):
return 'ComplexDouble'
elif name.startswith('int'):
width = int(name[len("int"):])
return "int%d_t" % (width,)
return f"int{width}_t"
elif name.startswith('uint'):
width = int(name[len("uint"):])
return "uint%d_t" % (width,)
return f"uint{width}_t"
elif name == 'bool':
return 'bool'
else:
......@@ -736,7 +736,7 @@ class VectorType(Type):
def __str__(self):
if self.instruction_set is None:
return "%s[%s]" % (self.base_type, self.width)
return f"{self.base_type}[{self.width}]"
else:
if self.base_type == create_type("int64") or self.base_type == create_type("int32"):
return self.instruction_set['int']
......
......@@ -295,3 +295,4 @@ def test_issue40(*_):
code = ps.get_code_str(ast)
assert 'epi32' not in code
......@@ -6,7 +6,7 @@ import sympy as sp
import pystencils as ps
from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets,
get_vector_instruction_set)
from pystencils.data_types import cast_func, VectorType
from . import test_vectorization
from pystencils.enums import Target
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
......@@ -116,15 +116,33 @@ def test_cacheline_size(instruction_set):
pytest.skip()
instruction_set = get_vector_instruction_set('double', instruction_set)
vector_size = instruction_set['bytes']
assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible"
assert 8 < cacheline_size < 0x100000, "Cache line size is implausible"
if type(vector_size) is int:
assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size"
assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2"
# test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here
from pystencils_tests import test_vectorization
@pytest.mark.parametrize('instruction_set', sorted(set(supported_instruction_sets) - set([test_vectorization.instruction_set])))
@pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
@pytest.mark.parametrize('instruction_set',
sorted(set(supported_instruction_sets) - {test_vectorization.instruction_set}))
@pytest.mark.parametrize('function',
[f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
def test_vectorization_other(instruction_set, function):
test_vectorization.__dict__[function](instruction_set)
@pytest.mark.parametrize('dtype', ('float', 'double'))
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('field_layout', ('fzyx', 'zyxf'))
def test_square_root(dtype, instruction_set, field_layout):
config = ps.CreateKernelConfig(data_type=dtype,
cpu_vectorize_info={'instruction_set': instruction_set,
'assume_inner_stride_one': True,
'assume_aligned': False, 'nontemporal': False})
src_field = ps.Field.create_generic('pdfs', 2, dtype, index_dimensions=1, layout=field_layout, index_shape=(9,))
eq = [ps.Assignment(sp.Symbol("xi"), sum(src_field.center_vector)),
ps.Assignment(sp.Symbol("xi_2"), sp.Symbol("xi") * sp.sqrt(src_field.center))]
ps.create_kernel(eq, config=config).compile()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment