Skip to content
Snippets Groups Projects
Commit c331d24f authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

maskStore improvements

- fix the aligned version
- make sure the test case is incommensurate with the vector width
- implement a fallback for instruction sets that don't support it natively
parent 059de5fb
Branches
Tags
1 merge request!235maskStore improvements
Pipeline #31670 passed with stage
in 8 minutes and 54 seconds
......@@ -261,7 +261,11 @@ class CBackend:
if aligned:
instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA'
if mask != True: # NOQA
instr = 'maskStore' if aligned else 'maskStoreU'
instr = 'maskStoreA' if aligned else 'maskStoreU'
if instr not in self._vector_instruction_set:
self._vector_instruction_set[instr] = self._vector_instruction_set['store' + instr[-1]].format(
'{0}', self._vector_instruction_set['blendv'].format(
self._vector_instruction_set['load' + instr[-1]].format('{0}'), '{1}', '{2}'))
printed_mask = self.sympy_printer.doprint(mask)
if data_type.base_type.base_name == 'double':
if self._vector_instruction_set['double'] == '__m256d':
......
......@@ -57,7 +57,7 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'):
'storeU': 'storeu[0,1]',
'storeA': 'store[0,1]',
'stream': 'stream[0,1]',
'maskStore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
'maskStoreA': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
}
......
......@@ -80,10 +80,8 @@ def test_boolean_before_loop():
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_maskstore(instruction_set, dtype):
if instruction_set in ['neon', 'vsx']:
pytest.skip('no mask-store instructions available')
data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr[4:-4, 4:-4] = 1.0
data_arr[3:-3, 3:-3] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
......@@ -93,8 +91,10 @@ def test_vec_maskstore(instruction_set, dtype):
]
ast = ps.create_kernel(c, target='cpu',
cpu_vectorize_info={'instruction_set': instruction_set})
ps.show_code(ast)
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[0:4, :], 2.0)
np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0)
np.testing.assert_equal(data_arr[:3, :], 2.0)
np.testing.assert_equal(data_arr[-3:, :], 2.0)
np.testing.assert_equal(data_arr[:, :3], 2.0)
np.testing.assert_equal(data_arr[:, -3:], 2.0)
np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment