Skip to content
Snippets Groups Projects
Commit 539b6be3 authored by Markus Holzer's avatar Markus Holzer Committed by Michael Kuron
Browse files

[FIX] Alignement detection

parent 5d1ce2a5
Branches
Tags
No related merge requests found
......@@ -76,8 +76,6 @@ def create_kernel(assignments: NodeCollection,
base_pointer_spec = config.base_pointer_specification
if base_pointer_spec is None:
base_pointer_spec = []
if config.cpu_vectorize_info and config.cpu_vectorize_info.get('nontemporal'):
base_pointer_spec = [['spatialInner0'], ['spatialInner1']] if len(loop_order) >= 2 else [['spatialInner0']]
base_pointer_info = {field.name: parse_base_pointer_info(base_pointer_spec, loop_order,
field.spatial_dimensions, field.index_dimensions)
for field in fields_without_buffers}
......
......@@ -140,12 +140,18 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type):
"""Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type."""
vector_width = ast_node.instruction_set['width']
all_loops = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment)
all_loops = list(filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment))
inner_loops = [loop for loop in all_loops if loop.is_innermost_loop]
zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops}
assert ast_node.instruction_set,\
"The ast needs to hold information about the instruction_set for the vectorisation"
vector_width = ast_node.instruction_set['width']
vector_int_width = ast_node.instruction_set['intwidth']
load_a = ast_node.instruction_set['loadA']
load_u = ast_node.instruction_set['loadU']
for loop_node in inner_loops:
loop_range = loop_node.stop - loop_node.start
......@@ -174,8 +180,18 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
for indexed in loop_node.atoms(sp.Indexed):
base, index = indexed.args
if loop_counter_symbol in index.atoms(sp.Symbol):
if not isinstance(vector_width, int) or load_a == load_u:
# When the vector width is not known during code generation, we cannot determine whether
# the access is aligned or not. None of the current sizeless vector ISAs (SVE and RISC-V-V)
# have separate load/store instructions for aligned and unaligned, so there is no disadvantage
# to falling back to unaligned here. When new ISAs become available, this may need to be revisited.
# On sized vector ISAs that do not have separate instructions for aligned and unaligned access,
# alignment does not matter here either
aligned_access = False
else:
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0
loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms()
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) == 0
stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index)
if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()):
successful = False
......@@ -204,7 +220,6 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
loop_node.step = vector_width
loop_node.subs(substitutions)
vector_int_width = ast_node.instruction_set['intwidth']
arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width))
arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)),
VectorType(loop_counter_symbol.dtype, vector_int_width))
......
......@@ -119,11 +119,14 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set
cpu_vectorize_info=opt, ghost_layers=gl_kernel)
ast = ps.create_kernel(update_rule, config=config)
kernel = ast.compile()
if gl_kernel != gl_field:
with pytest.raises(ValueError):
dh.run_kernel(kernel)
else:
if ast.instruction_set['loadA'] == ast.instruction_set['loadU']:
dh.run_kernel(kernel)
else:
if gl_kernel != gl_field:
with pytest.raises(ValueError):
dh.run_kernel(kernel)
else:
dh.run_kernel(kernel)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment