diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 988a8e518186dd1f855d08795abc8ceea8196cdc..6705884681777c2a4eab517253bd8a4a11548509 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -261,7 +261,11 @@ class CBackend: if aligned: instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA' if mask != True: # NOQA - instr = 'maskStore' if aligned else 'maskStoreU' + instr = 'maskStoreA' if aligned else 'maskStoreU' + if instr not in self._vector_instruction_set: + self._vector_instruction_set[instr] = self._vector_instruction_set['store' + instr[-1]].format( + '{0}', self._vector_instruction_set['blendv'].format( + self._vector_instruction_set['load' + instr[-1]].format('{0}'), '{1}', '{2}')) printed_mask = self.sympy_printer.doprint(mask) if data_type.base_type.base_name == 'double': if self._vector_instruction_set['double'] == '__m256d': diff --git a/pystencils/backends/x86_instruction_sets.py b/pystencils/backends/x86_instruction_sets.py index 50005c5ae5963d4b48edbafafcd99d492eaff584..5cf049415ada04ad40eb8c7daad2bd6289748d44 100644 --- a/pystencils/backends/x86_instruction_sets.py +++ b/pystencils/backends/x86_instruction_sets.py @@ -57,7 +57,7 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'): 'storeU': 'storeu[0,1]', 'storeA': 'store[0,1]', 'stream': 'stream[0,1]', - 'maskStore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', + 'maskStoreA': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', 'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]', } diff --git a/pystencils_tests/test_conditional_vec.py b/pystencils_tests/test_conditional_vec.py index 1274aa6749eb4a40a94c393f1cc446a3a806ff82..959e20c2b848128d2fadfaecd928f0d22b6d8b3d 100644 --- a/pystencils_tests/test_conditional_vec.py +++ b/pystencils_tests/test_conditional_vec.py @@ -80,10 +80,8 @@ def test_boolean_before_loop(): @pytest.mark.parametrize('instruction_set', supported_instruction_sets) @pytest.mark.parametrize('dtype', ('float', 'double')) def test_vec_maskstore(instruction_set, dtype): - if instruction_set in ['neon', 'vsx']: - pytest.skip('no mask-store instructions available') data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32) - data_arr[4:-4, 4:-4] = 1.0 + data_arr[3:-3, 3:-3] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ @@ -93,8 +91,10 @@ def test_vec_maskstore(instruction_set, dtype): ] ast = ps.create_kernel(c, target='cpu', cpu_vectorize_info={'instruction_set': instruction_set}) - ps.show_code(ast) kernel = ast.compile() kernel(data=data_arr) - np.testing.assert_equal(data_arr[0:4, :], 2.0) - np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0) + np.testing.assert_equal(data_arr[:3, :], 2.0) + np.testing.assert_equal(data_arr[-3:, :], 2.0) + np.testing.assert_equal(data_arr[:, :3], 2.0) + np.testing.assert_equal(data_arr[:, -3:], 2.0) + np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)