diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 325cb3cddb0d07773a3ecaa1b382a49875a33414..9d7aa3cc30ba465fc97b97b75d7227776366ae51 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -280,19 +280,19 @@ class CBackend: code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs), printed_mask) + ';' - flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) != {CachelineSize.last_symbol}" + flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == {CachelineSize.last_symbol}" if nontemporal and 'flushCacheline' in self._vector_instruction_set: code2 = self._vector_instruction_set['flushCacheline'].format( ptr, self.sympy_printer.doprint(rhs)) + ';' code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}" - elif nontemporal and 'streamAndFlushCacheline' in self._vector_instruction_set: + elif nontemporal and 'storeAAndFlushCacheline' in self._vector_instruction_set: tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8] code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \ + self.sympy_printer.doprint(rhs) + ';' - code1 = self._vector_instruction_set['stream'].format(ptr, tmpvar, printed_mask) + ';' - code2 = self._vector_instruction_set['streamAndFlushCacheline'].format(ptr, tmpvar, printed_mask) \ + code1 = self._vector_instruction_set[instr].format(ptr, tmpvar, printed_mask) + ';' + code2 = self._vector_instruction_set['storeAAndFlushCacheline'].format(ptr, tmpvar, printed_mask) \ + ';' - code += f"\nif ({flushcond}) {{\n\t{code1}\n}} else {{\n\t{code2}\n}}" + code += f"\nif ({flushcond}) {{\n\t{code2}\n}} else {{\n\t{code1}\n}}" return pre_code + code else: return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};" diff --git a/pystencils/backends/ppc_instruction_sets.py b/pystencils/backends/ppc_instruction_sets.py index bd67cabc52a8c12919d402a3b49f89158e2de3fa..f792127618b025b1bc9cf272e9c2c59d93422318 100644 --- a/pystencils/backends/ppc_instruction_sets.py +++ b/pystencils/backends/ppc_instruction_sets.py @@ -29,7 +29,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): 'loadA': 'ld[0x0, 0]', 'storeU': 'xst[1, 0x0, 0]', 'storeA': 'st[1, 0x0, 0]', - 'streamAndFlushCacheline': 'stl[1, 0x0, 0]', + 'storeAAndFlushCacheline': 'stl[1, 0x0, 0]', 'abs': 'abs[0]', '==': 'cmpeq[0, 1]', @@ -79,7 +79,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): result['loadA'] = '(__vector double)' + result['loadA'].format('(float*) {0}') result['storeA'] = result['storeA'].format('(float*) {0}', '(__vector float) {1}') result['stream'] = result['stream'].format('(float*) {0}', '(__vector float) {1}') - result['streamAndFlushCacheline'] = result['streamAndFlushCacheline'].format('(float*) {0}', + result['storeAAndFlushCacheline'] = result['storeAAndFlushCacheline'].format('(float*) {0}', '(__vector float) {1}') result['+int'] = "vec_add({0}, {1})" diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index 38cdcfd2c96269a27a87f8ec456c31518f7cc902..880a009a294569f004f7f8a0cba3a02b98ec5bd2 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -48,7 +48,14 @@ def test_aligned_and_nt_stores(openmp=False): 'assume_inner_stride_one': True} update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))] ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) - for instruction in ['stream', 'streamFence', 'cachelineZero', 'streamAndFlushCacheline', 'flushCacheline']: + if instruction_set in ['sse'] or instruction_set.startswith('avx'): + assert 'stream' in ast.instruction_set + assert 'streamFence' in ast.instruction_set + if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'): + assert 'cachelineZero' in ast.instruction_set + if instruction_set in ['vsx']: + assert 'storeAAndFlushCacheline' in ast.instruction_set + for instruction in ['stream', 'streamFence', 'cachelineZero', 'storeAAndFlushCacheline', 'flushCacheline']: if instruction in ast.instruction_set: assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast) kernel = ast.compile()