From 8f92d147da830f0ebd83caea6279eadd403cf6c8 Mon Sep 17 00:00:00 2001
From: Michael Kuron <m.kuron@gmx.de>
Date: Fri, 2 Apr 2021 16:49:31 +0200
Subject: [PATCH] remove stream from instruction sets that don't have it

---
 pystencils/backends/arm_instruction_sets.py |  1 -
 pystencils/backends/cbackend.py             |  8 ++++----
 pystencils/backends/ppc_instruction_sets.py |  1 -
 pystencils_tests/test_vectorization.py      | 11 +++--------
 4 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/pystencils/backends/arm_instruction_sets.py b/pystencils/backends/arm_instruction_sets.py
index 4660ced01..a386253a0 100644
--- a/pystencils/backends/arm_instruction_sets.py
+++ b/pystencils/backends/arm_instruction_sets.py
@@ -28,7 +28,6 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
         'loadA': 'ld1[0]',
         'storeU': 'st1[0, 1]',
         'storeA': 'st1[0, 1]',
-        'stream': 'st1[0, 1]',
 
         'abs': 'abs[0]',
         '==': 'ceq[0, 1]',
diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 9ff44bbef..325cb3cdd 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -259,7 +259,7 @@ class CBackend:
                 arg, data_type, aligned, nontemporal, mask = node.lhs.args
                 instr = 'storeU'
                 if aligned:
-                    instr = 'stream' if nontemporal else 'storeA'
+                    instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA'
                 if mask != True:  # NOQA
                     instr = 'maskStore' if aligned else 'maskStoreU'
                     printed_mask = self.sympy_printer.doprint(mask)
@@ -274,18 +274,18 @@ class CBackend:
 
                 ptr = "&" + self.sympy_printer.doprint(node.lhs.args[0])
                 pre_code = ''
-                if instr == 'stream' and 'cachelineZero' in self._vector_instruction_set:
+                if nontemporal and 'cachelineZero' in self._vector_instruction_set:
                     pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "{\n\t" + \
                         self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n}\n'
 
                 code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs),
                                                                   printed_mask) + ';'
                 flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) != {CachelineSize.last_symbol}"
-                if instr == 'stream' and 'flushCacheline' in self._vector_instruction_set:
+                if nontemporal and 'flushCacheline' in self._vector_instruction_set:
                     code2 = self._vector_instruction_set['flushCacheline'].format(
                         ptr, self.sympy_printer.doprint(rhs)) + ';'
                     code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}"
-                elif instr == 'stream' and 'streamAndFlushCacheline' in self._vector_instruction_set:
+                elif nontemporal and 'streamAndFlushCacheline' in self._vector_instruction_set:
                     tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8]
                     code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \
                         + self.sympy_printer.doprint(rhs) + ';'
diff --git a/pystencils/backends/ppc_instruction_sets.py b/pystencils/backends/ppc_instruction_sets.py
index 938677f6a..bd67cabc5 100644
--- a/pystencils/backends/ppc_instruction_sets.py
+++ b/pystencils/backends/ppc_instruction_sets.py
@@ -29,7 +29,6 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
         'loadA': 'ld[0x0, 0]',
         'storeU': 'xst[1, 0x0, 0]',
         'storeA': 'st[1, 0x0, 0]',
-        'stream': 'st[1, 0x0, 0]',
         'streamAndFlushCacheline': 'stl[1, 0x0, 0]',
 
         'abs': 'abs[0]',
diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py
index 783b9bb34..38cdcfd2c 100644
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -48,14 +48,9 @@ def test_aligned_and_nt_stores(openmp=False):
            'assume_inner_stride_one': True}
     update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))]
     ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
-    if 'streamFence' in ast.instruction_set:
-        assert ast.instruction_set['streamFence'] in ps.get_code_str(ast)
-    if 'cachelineZero' in ast.instruction_set:
-        assert ast.instruction_set['cachelineZero'].split('{')[0] in ps.get_code_str(ast)
-    if 'streamAndFlushCacheline' in ast.instruction_set:
-        assert ast.instruction_set['streamAndFlushCacheline'].split('{')[0] in ps.get_code_str(ast)
-    if 'flushCacheline' in ast.instruction_set:
-        assert ast.instruction_set['flushCacheline'].split('{')[0] in ps.get_code_str(ast)
+    for instruction in ['stream', 'streamFence', 'cachelineZero', 'streamAndFlushCacheline', 'flushCacheline']:
+        if instruction in ast.instruction_set:
+            assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
     kernel = ast.compile()
 
     dh.run_kernel(kernel)
-- 
GitLab