Skip to content
Snippets Groups Projects
Commit a4d16275 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Fix nontemporal stores on non-x86 for fields with variable size

parent 205d0a39
Branches
Tags
1 merge request!300Fix nontemporal stores on non-x86 for fields with variable size
Pipeline #41195 failed with stages
in 4 minutes and 25 seconds
......@@ -619,7 +619,13 @@ class SympyAssignment(Node):
for i in range(len(symbol.offsets)):
loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i))
result.update(loop_counters)
result.update(self._lhs_symbol.atoms(sp.Symbol))
sizes = set().union(*(a.field.shape for a in self._lhs_symbol.atoms(ResolvedFieldAccess)))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
result.update(sizes)
return result
@property
......
......@@ -75,6 +75,17 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set):
np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
def test_nt_stores_symbolic_size(instruction_set=instruction_set):
f, g = ps.fields('f, g: [2D]', layout='fzyx')
update_rule = [ps.Assignment(f.center(), 0.0), ps.Assignment(g.center(), 0.0)]
opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True,
'assume_inner_stride_one': True}
config = pystencils.config.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt)
ast = ps.create_kernel(update_rule, config=config)
# ps.show_code(ast)
ast.compile()
def test_inplace_update(instruction_set=instruction_set):
shape = (9, 9, 3)
arr = np.ones(shape, order='f')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment