Skip to content
Snippets Groups Projects
Commit 9d9919ff authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

Merge branch 'Regression' into 'master'

Fix Regression from !300

See merge request !303
parents b1308c07 082e5c2b
1 merge request!303Fix Regression from !300
Pipeline #43348 passed with stages
in 13 minutes and 57 seconds
...@@ -193,6 +193,10 @@ class KernelFunction(Node): ...@@ -193,6 +193,10 @@ class KernelFunction(Node):
# function that compiles the node to a Python callable, is set by the backends # function that compiles the node to a Python callable, is set by the backends
self._compile_function = compile_function self._compile_function = compile_function
self.assignments = assignments self.assignments = assignments
# If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing
# For cacheline zeroing the information of the field size for each field is needed. Thus, in this case
# all field sizes are kernel parameters and not just the common field size used for the loops
self.use_all_written_field_sizes = False
@property @property
def target(self): def target(self):
...@@ -233,7 +237,8 @@ class KernelFunction(Node): ...@@ -233,7 +237,8 @@ class KernelFunction(Node):
@property @property
def fields_written(self) -> Set[Field]: def fields_written(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment) assignments = self.atoms(SympyAssignment)
return {a.lhs.field for a in assignments if isinstance(a.lhs, ResolvedFieldAccess)} return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
@property @property
def fields_read(self) -> Set[Field]: def fields_read(self) -> Set[Field]:
...@@ -247,6 +252,11 @@ class KernelFunction(Node): ...@@ -247,6 +252,11 @@ class KernelFunction(Node):
This function is expensive, cache the result where possible! This function is expensive, cache the result where possible!
""" """
field_map = {f.name: f for f in self.fields_accessed} field_map = {f.name: f for f in self.fields_accessed}
sizes = set()
if self.use_all_written_field_sizes:
sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
def get_fields(symbol): def get_fields(symbol):
if hasattr(symbol, 'field_name'): if hasattr(symbol, 'field_name'):
...@@ -256,6 +266,7 @@ class KernelFunction(Node): ...@@ -256,6 +266,7 @@ class KernelFunction(Node):
return () return ()
argument_symbols = self._body.undefined_symbols - self.global_variables argument_symbols = self._body.undefined_symbols - self.global_variables
argument_symbols.update(sizes)
parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols] parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
if hasattr(self, 'indexing'): if hasattr(self, 'indexing'):
parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()] parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
...@@ -622,11 +633,6 @@ class SympyAssignment(Node): ...@@ -622,11 +633,6 @@ class SympyAssignment(Node):
result.update(self._lhs_symbol.atoms(sp.Symbol)) result.update(self._lhs_symbol.atoms(sp.Symbol))
sizes = set().union(*(a.field.shape[:a.field.spatial_dimensions]
for a in self._lhs_symbol.atoms(ResolvedFieldAccess)))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
result.update(sizes)
return result return result
@property @property
......
...@@ -127,6 +127,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best', ...@@ -127,6 +127,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set) vector_is = get_vector_instruction_set(default_float_type, instruction_set=instruction_set)
kernel_ast.instruction_set = vector_is kernel_ast.instruction_set = vector_is
if nontemporal and 'cachelineZero' in vector_is:
kernel_ast.use_all_written_field_sizes = True
strided = 'storeS' in vector_is and 'loadS' in vector_is strided = 'storeS' in vector_is and 'loadS' in vector_is
keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU'] keep_loop_stop = '{loop_stop}' in vector_is['storeA' if assume_aligned else 'storeU']
vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal, vectorize_inner_loops_and_adapt_load_stores(kernel_ast, assume_aligned, nontemporal,
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment