Commit 67c3f7bb authored by Michael Kuron's avatar Michael Kuron
Browse files

Fix nontemporal stores on non-x86 for fields with variable size

parent f9ebdb5b
Pipeline #41194 failed with stages
in 3 minutes and 15 seconds
......@@ -619,7 +619,12 @@ class SympyAssignment(Node):
for i in range(len(symbol.offsets)):
loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i))
result.update(loop_counters)
result.update(self._lhs_symbol.atoms(sp.Symbol))
sizes = set().union(*(i.field.shape for i in self._lhs_symbol.atoms(ResolvedFieldAccess)))
result.update(sizes)
return result
@property
......
......@@ -75,6 +75,17 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set):
np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
def test_nt_stores_symbolic_size(instruction_set=instruction_set):
f, g = ps.fields('f, g: [2D]', layout='fzyx')
update_rule = [ps.Assignment(f.center(), 0.0), ps.Assignment(g.center(), 0.0)]
opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True,
'assume_inner_stride_one': True}
config = pystencils.config.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt)
ast = ps.create_kernel(update_rule, config=config)
# ps.show_code(ast)
ast.compile()
def test_inplace_update(instruction_set=instruction_set):
shape = (9, 9, 3)
arr = np.ones(shape, order='f')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment