Skip to content
Snippets Groups Projects
Commit bb7a3cf2 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Merge branch 'FixAlign' into 'master'

[FIX] Alignement detection

See merge request !351
parents 5d1ce2a5 539b6be3
Branches
Tags
1 merge request!351[FIX] Alignement detection
Pipeline #55843 passed with stages
in 1 hour, 3 minutes, and 29 seconds
...@@ -76,8 +76,6 @@ def create_kernel(assignments: NodeCollection, ...@@ -76,8 +76,6 @@ def create_kernel(assignments: NodeCollection,
base_pointer_spec = config.base_pointer_specification base_pointer_spec = config.base_pointer_specification
if base_pointer_spec is None: if base_pointer_spec is None:
base_pointer_spec = [] base_pointer_spec = []
if config.cpu_vectorize_info and config.cpu_vectorize_info.get('nontemporal'):
base_pointer_spec = [['spatialInner0'], ['spatialInner1']] if len(loop_order) >= 2 else [['spatialInner0']]
base_pointer_info = {field.name: parse_base_pointer_info(base_pointer_spec, loop_order, base_pointer_info = {field.name: parse_base_pointer_info(base_pointer_spec, loop_order,
field.spatial_dimensions, field.index_dimensions) field.spatial_dimensions, field.index_dimensions)
for field in fields_without_buffers} for field in fields_without_buffers}
......
...@@ -140,12 +140,18 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -140,12 +140,18 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
strided, keep_loop_stop, assume_sufficient_line_padding, strided, keep_loop_stop, assume_sufficient_line_padding,
default_float_type): default_float_type):
"""Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type.""" """Goes over all innermost loops, changes increment to vector width and replaces field accesses by vector type."""
vector_width = ast_node.instruction_set['width'] all_loops = list(filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment))
all_loops = filtered_tree_iteration(ast_node, ast.LoopOverCoordinate, stop_type=ast.SympyAssignment)
inner_loops = [loop for loop in all_loops if loop.is_innermost_loop] inner_loops = [loop for loop in all_loops if loop.is_innermost_loop]
zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops} zero_loop_counters = {loop.loop_counter_symbol: 0 for loop in all_loops}
assert ast_node.instruction_set,\
"The ast needs to hold information about the instruction_set for the vectorisation"
vector_width = ast_node.instruction_set['width']
vector_int_width = ast_node.instruction_set['intwidth']
load_a = ast_node.instruction_set['loadA']
load_u = ast_node.instruction_set['loadU']
for loop_node in inner_loops: for loop_node in inner_loops:
loop_range = loop_node.stop - loop_node.start loop_range = loop_node.stop - loop_node.start
...@@ -174,8 +180,18 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -174,8 +180,18 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
for indexed in loop_node.atoms(sp.Indexed): for indexed in loop_node.atoms(sp.Indexed):
base, index = indexed.args base, index = indexed.args
if loop_counter_symbol in index.atoms(sp.Symbol): if loop_counter_symbol in index.atoms(sp.Symbol):
if not isinstance(vector_width, int) or load_a == load_u:
# When the vector width is not known during code generation, we cannot determine whether
# the access is aligned or not. None of the current sizeless vector ISAs (SVE and RISC-V-V)
# have separate load/store instructions for aligned and unaligned, so there is no disadvantage
# to falling back to unaligned here. When new ISAs become available, this may need to be revisited.
# On sized vector ISAs that do not have separate instructions for aligned and unaligned access,
# alignment does not matter here either
aligned_access = False
else:
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) % vector_width == 0
loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms() loop_counter_is_offset = loop_counter_symbol not in (index - loop_counter_symbol).atoms()
aligned_access = (index - loop_counter_symbol).subs(zero_loop_counters) == 0
stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index) stride = sp.simplify(index.subs({loop_counter_symbol: loop_counter_symbol + 1}) - index)
if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()): if not loop_counter_is_offset and (not strided or loop_counter_symbol in stride.atoms()):
successful = False successful = False
...@@ -204,7 +220,6 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem ...@@ -204,7 +220,6 @@ def vectorize_inner_loops_and_adapt_load_stores(ast_node, assume_aligned, nontem
loop_node.step = vector_width loop_node.step = vector_width
loop_node.subs(substitutions) loop_node.subs(substitutions)
vector_int_width = ast_node.instruction_set['intwidth']
arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width)) arg_1 = CastFunc(loop_counter_symbol, VectorType(loop_counter_symbol.dtype, vector_int_width))
arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)), arg_2 = CastFunc(tuple(range(vector_int_width if type(vector_int_width) is int else 2)),
VectorType(loop_counter_symbol.dtype, vector_int_width)) VectorType(loop_counter_symbol.dtype, vector_int_width))
......
...@@ -119,11 +119,14 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set ...@@ -119,11 +119,14 @@ def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set
cpu_vectorize_info=opt, ghost_layers=gl_kernel) cpu_vectorize_info=opt, ghost_layers=gl_kernel)
ast = ps.create_kernel(update_rule, config=config) ast = ps.create_kernel(update_rule, config=config)
kernel = ast.compile() kernel = ast.compile()
if gl_kernel != gl_field: if ast.instruction_set['loadA'] == ast.instruction_set['loadU']:
with pytest.raises(ValueError):
dh.run_kernel(kernel)
else:
dh.run_kernel(kernel) dh.run_kernel(kernel)
else:
if gl_kernel != gl_field:
with pytest.raises(ValueError):
dh.run_kernel(kernel)
else:
dh.run_kernel(kernel)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('instruction_set', supported_instruction_sets)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment