diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index a1f282b9de584ae0f1b452379f5acf99c9373ded..53731645797afeb62a4a5ff803c2ee7b4e5d8e16 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -859,14 +859,16 @@ class NontemporalFence(Node): class CachelineSize(Node): + symbol = sp.Symbol("_clsize") mask_symbol = sp.Symbol("_clsize_mask") + last_symbol = sp.Symbol("_cl_lastvec") def __init__(self): super(CachelineSize, self).__init__(parent=None) @property def symbols_defined(self): - return set([self.mask_symbol]) + return set([self.symbol, self.mask_symbol, self.last_symbol]) @property def undefined_symbols(self): diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 58dff94e05ef141aeae50db79e62c99f9bcddd33..c14adca46032d4066352b1985677167c7359fb81 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -274,11 +274,20 @@ class CBackend: ptr = "&" + self.sympy_printer.doprint(node.lhs.args[0]) pre_code = '' if instr == 'stream' and 'cachelineZero' in self._vector_instruction_set: - pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "\n\t" + \ - self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n' + pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "{\n\t" + \ + self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n}\n' code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs), printed_mask) + ';' + flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) != {CachelineSize.last_symbol}" + if instr == 'stream' and 'flushCacheline' in self._vector_instruction_set: + code2 = self._vector_instruction_set['flushCacheline'].format( + ptr, self.sympy_printer.doprint(rhs)) + ';' + code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}" + elif instr == 'stream' and 'streamAndFlushCacheline' in self._vector_instruction_set: + code2 = self._vector_instruction_set['streamAndFlushCacheline'].format( + ptr, self.sympy_printer.doprint(rhs), printed_mask) + ';' + code = f"if ({flushcond}) {{\n\t{code}\n}} else {{\n\t{code2}\n}}" return pre_code + code else: return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};" @@ -291,7 +300,10 @@ class CBackend: def _print_CachelineSize(self, node): if 'cachelineSize' in self._vector_instruction_set: - return f'const size_t {node.mask_symbol} = {self._vector_instruction_set["cachelineSize"]} - 1;' + code = f'const size_t {node.symbol} = {self._vector_instruction_set["cachelineSize"]};\n' + code += f'const size_t {node.mask_symbol} = {node.symbol} - 1;\n' + code += f'const size_t {node.last_symbol} = {node.symbol} - 16;\n' # TODO: determine size from instruction set + return code else: return '' diff --git a/pystencils/backends/ppc_instruction_sets.py b/pystencils/backends/ppc_instruction_sets.py index a1c481ae41ca2036b90dc5c338cefd7c71dc9fc4..ff4209aac98decf788067aa47b5ffc0eb6e6c8f0 100644 --- a/pystencils/backends/ppc_instruction_sets.py +++ b/pystencils/backends/ppc_instruction_sets.py @@ -29,7 +29,8 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): 'loadA': 'ld[0x0, 0]', 'storeU': 'xst[1, 0x0, 0]', 'storeA': 'st[1, 0x0, 0]', - 'stream': 'st[1, 0x0, 0]', # stl would flush the cacheline, which only makes sense for the last item + 'stream': 'st[1, 0x0, 0]', + 'streamAndFlushCacheline': 'stl[1, 0x0, 0]', 'abs': 'abs[0]', '==': 'cmpeq[0, 1]', diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index d05c37c6bfd15f6935ec29523321fcd45dff86f1..c7ffa2f3e8eb3350601a301d4ed9a575f754555f 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -56,6 +56,10 @@ def test_aligned_and_nt_stores(openmp=False): assert ast.instruction_set['streamFence'] in ps.get_code_str(ast) if 'cachelineZero' in ast.instruction_set: assert ast.instruction_set['cachelineZero'].split('{0}')[0] in ps.get_code_str(ast) + if 'streamAndFlushCacheline' in ast.instruction_set: + assert ast.instruction_set['streamAndFlushCacheline'].split('{0}')[0] in ps.get_code_str(ast) + if 'flushCacheline' in ast.instruction_set: + assert ast.instruction_set['flushCacheline'].split('{0}')[0] in ps.get_code_str(ast) kernel = ast.compile() dh.run_kernel(kernel)