diff --git a/pystencils/backends/arm_instruction_sets.py b/pystencils/backends/arm_instruction_sets.py index c6da59dfda5621154f292c4c50b75a2d3dad7a5b..4660ced016f2ce2ad2a15f8134d65c9180616bed 100644 --- a/pystencils/backends/arm_instruction_sets.py +++ b/pystencils/backends/arm_instruction_sets.py @@ -47,6 +47,7 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'): suffix = f'q_f{bits[data_type]}' result = dict() + result['bytes'] = 16 for intrinsic_id, function_shortcut in base_names.items(): function_shortcut = function_shortcut.strip() diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index c14adca46032d4066352b1985677167c7359fb81..a924b67fd2d8991206ff0f22be4743ab51227d38 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -1,5 +1,6 @@ import re from collections import namedtuple +import hashlib from typing import Set import numpy as np @@ -285,9 +286,13 @@ class CBackend: ptr, self.sympy_printer.doprint(rhs)) + ';' code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}" elif instr == 'stream' and 'streamAndFlushCacheline' in self._vector_instruction_set: - code2 = self._vector_instruction_set['streamAndFlushCacheline'].format( - ptr, self.sympy_printer.doprint(rhs), printed_mask) + ';' - code = f"if ({flushcond}) {{\n\t{code}\n}} else {{\n\t{code2}\n}}" + tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8] + code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \ + + self.sympy_printer.doprint(rhs) + ';' + code1 = self._vector_instruction_set['stream'].format(ptr, tmpvar, printed_mask) + ';' + code2 = self._vector_instruction_set['streamAndFlushCacheline'].format(ptr, tmpvar, printed_mask) \ + + ';' + code += f"\nif ({flushcond}) {{\n\t{code1}\n}} else {{\n\t{code2}\n}}" return pre_code + code else: return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};" @@ -302,18 +307,15 @@ class CBackend: if 'cachelineSize' in self._vector_instruction_set: code = f'const size_t {node.symbol} = {self._vector_instruction_set["cachelineSize"]};\n' code += f'const size_t {node.mask_symbol} = {node.symbol} - 1;\n' - code += f'const size_t {node.last_symbol} = {node.symbol} - 16;\n' # TODO: determine size from instruction set + vectorsize = self._vector_instruction_set['bytes'] + code += f'const size_t {node.last_symbol} = {node.symbol} - {vectorsize};\n' return code else: return '' def _print_TemporaryMemoryAllocation(self, node): - parent = node.parent - while parent.parent is not None: - parent = parent.parent - instruction_set = parent.instruction_set - if instruction_set: - align = instruction_set['width'] * node.symbol.dtype.base_type.numpy_dtype.type(0).nbytes + if self._vector_instruction_set: + align = self._vector_instruction_set['bytes'] else: align = node.symbol.dtype.base_type.numpy_dtype.type(0).nbytes @@ -336,12 +338,8 @@ class CBackend: align=align) def _print_TemporaryMemoryFree(self, node): - parent = node.parent - while parent.parent is not None: - parent = parent.parent - instruction_set = parent.instruction_set - if instruction_set: - align = instruction_set['width'] * node.symbol.dtype.base_type.numpy_dtype.type(0).nbytes + if self._vector_instruction_set: + align = self._vector_instruction_set['bytes'] else: align = node.symbol.dtype.base_type.numpy_dtype.type(0).nbytes diff --git a/pystencils/backends/ppc_instruction_sets.py b/pystencils/backends/ppc_instruction_sets.py index ff4209aac98decf788067aa47b5ffc0eb6e6c8f0..b8116fd6aa982fc7d6c7205a9b5c286eb7bc5a84 100644 --- a/pystencils/backends/ppc_instruction_sets.py +++ b/pystencils/backends/ppc_instruction_sets.py @@ -65,6 +65,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): intwidth = 128 // bits['int'] result = dict() + result['bytes'] = 16 for intrinsic_id, function_shortcut in base_names.items(): function_shortcut = function_shortcut.strip() diff --git a/pystencils/backends/x86_instruction_sets.py b/pystencils/backends/x86_instruction_sets.py index 7809a89790e9f6b98cf70765a4124097adf40562..86196aad7047c8f2bcc16a94b487537d3147d36d 100644 --- a/pystencils/backends/x86_instruction_sets.py +++ b/pystencils/backends/x86_instruction_sets.py @@ -110,7 +110,8 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'): result = { 'width': width[(data_type, instruction_set)], - 'intwidth': width[('int', instruction_set)] + 'intwidth': width[('int', instruction_set)], + 'bytes': 4 * width[("float", instruction_set)] } pre = prefix[instruction_set] for intrinsic_id, function_shortcut in base_names.items(): diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index c7ffa2f3e8eb3350601a301d4ed9a575f754555f..6852490bfc7666d2338261f209e6cc7dad8d7c37 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -55,11 +55,11 @@ def test_aligned_and_nt_stores(openmp=False): if 'streamFence' in ast.instruction_set: assert ast.instruction_set['streamFence'] in ps.get_code_str(ast) if 'cachelineZero' in ast.instruction_set: - assert ast.instruction_set['cachelineZero'].split('{0}')[0] in ps.get_code_str(ast) + assert ast.instruction_set['cachelineZero'].split('{')[0] in ps.get_code_str(ast) if 'streamAndFlushCacheline' in ast.instruction_set: - assert ast.instruction_set['streamAndFlushCacheline'].split('{0}')[0] in ps.get_code_str(ast) + assert ast.instruction_set['streamAndFlushCacheline'].split('{')[0] in ps.get_code_str(ast) if 'flushCacheline' in ast.instruction_set: - assert ast.instruction_set['flushCacheline'].split('{0}')[0] in ps.get_code_str(ast) + assert ast.instruction_set['flushCacheline'].split('{')[0] in ps.get_code_str(ast) kernel = ast.compile() dh.run_kernel(kernel)