diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index f3ed2711c844c0bbd8c75100e9a7d1a2d506b7b7..f4879ffe808be903dec78d353008aa77b44b0511 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -619,7 +619,14 @@ class SympyAssignment(Node): for i in range(len(symbol.offsets)): loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i)) result.update(loop_counters) + result.update(self._lhs_symbol.atoms(sp.Symbol)) + + sizes = set().union(*(a.field.shape[:a.field.spatial_dimensions] + for a in self._lhs_symbol.atoms(ResolvedFieldAccess))) + sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes) + result.update(sizes) + return result @property diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index de9cb0d31b3be9903ac758991a0fca345f17a4a7..e06c298987650d507d6c8f8aa21ffc09001efda3 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -339,7 +339,9 @@ class CBackend: ptr, self.sympy_printer.doprint(rhs), **self._kwargs) + ';' code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}" elif nontemporal and 'storeAAndFlushCacheline' in self._vector_instruction_set: - tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8] + lhs_hash = hashlib.sha1(self.sympy_printer.doprint(node.lhs).encode('ascii')).hexdigest()[:8] + rhs_hash = hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8] + tmpvar = f'_tmp_{lhs_hash}_{rhs_hash}' code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \ + self.sympy_printer.doprint(rhs) + ';' code1 = self._vector_instruction_set[instr].format(ptr, tmpvar, printed_mask, **self._kwargs) + ';' diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index f526341ec587b508f6c28f8dd2596125366f8ecc..7b97b7b0a8ae99a98993b8807744931a97a6ee78 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -75,6 +75,17 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set): np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size)) +def test_nt_stores_symbolic_size(instruction_set=instruction_set): + f, g = ps.fields('f, g: [2D]', layout='fzyx') + update_rule = [ps.Assignment(f.center(), 0.0), ps.Assignment(g.center(), 0.0)] + opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True, + 'assume_inner_stride_one': True} + config = pystencils.config.CreateKernelConfig(target=Target.CPU, cpu_vectorize_info=opt) + ast = ps.create_kernel(update_rule, config=config) + # ps.show_code(ast) + ast.compile() + + def test_inplace_update(instruction_set=instruction_set): shape = (9, 9, 3) arr = np.ones(shape, order='f')