diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index f4879ffe808be903dec78d353008aa77b44b0511..d91c1dad7036c8cc2f3e2664e047c5b9a13dbd12 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -193,6 +193,10 @@ class KernelFunction(Node): # function that compiles the node to a Python callable, is set by the backends self._compile_function = compile_function self.assignments = assignments + # If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing + # For cacheline zeroing the information of the field size for each field is needed. Thus, in this case + # all field sizes are kernel parameters and not just the common field size used for the loops + self.use_all_written_field_sizes = False @property def target(self): @@ -233,7 +237,8 @@ class KernelFunction(Node): @property def fields_written(self) -> Set[Field]: assignments = self.atoms(SympyAssignment) - return {a.lhs.field for a in assignments if isinstance(a.lhs, ResolvedFieldAccess)} + return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')] + for a in assignments)) @property def fields_read(self) -> Set[Field]: @@ -247,6 +252,11 @@ class KernelFunction(Node): This function is expensive, cache the result where possible! """ field_map = {f.name: f for f in self.fields_accessed} + sizes = set() + + if self.use_all_written_field_sizes: + sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written)) + sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes) def get_fields(symbol): if hasattr(symbol, 'field_name'): @@ -256,6 +266,7 @@ class KernelFunction(Node): return () argument_symbols = self._body.undefined_symbols - self.global_variables + argument_symbols.update(sizes) parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols] if hasattr(self, 'indexing'): parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()] @@ -622,11 +633,6 @@ class SympyAssignment(Node): result.update(self._lhs_symbol.atoms(sp.Symbol)) - sizes = set().union(*(a.field.shape[:a.field.spatial_dimensions] - for a in self._lhs_symbol.atoms(ResolvedFieldAccess))) - sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes) - result.update(sizes) - return result @property diff --git a/pystencils/cpu/vectorization.py b/pystencils/cpu/vectorization.py index 3141eb4009579d4028a1a70a488a78f051519cdb..3adcbc31b0840ac5e4973c3c06cce850217e5890 100644 --- a/pystencils/cpu/vectorization.py +++ b/pystencils/cpu/vectorization.py @@ -127,6 +127,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set) kernel_ast.instruction_set = vector_is + if nontemporal and 'cachelineZero' in vector_is: + kernel_ast.use_all_written_field_sizes = True strided = 'storeS' in vector_is and 'loadS' in vector_is keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU'] vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,