Commit 059de5fb authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'vec_tests' into 'master'

Vectorization: improve test coverage

See merge request !233
parents b1522533 686a3ad8
Pipeline #31651 passed with stages
in 11 minutes and 36 seconds
......@@ -263,8 +263,16 @@ class CBackend:
if mask != True: # NOQA
instr = 'maskStore' if aligned else 'maskStoreU'
printed_mask = self.sympy_printer.doprint(mask)
if self._vector_instruction_set['dataTypePrefix']['double'] == '__mm256d':
printed_mask = f"_mm256_castpd_si256({printed_mask})"
if data_type.base_type.base_name == 'double':
if self._vector_instruction_set['double'] == '__m256d':
printed_mask = f"_mm256_castpd_si256({printed_mask})"
elif self._vector_instruction_set['double'] == '__m128d':
printed_mask = f"_mm_castpd_si128({printed_mask})"
elif data_type.base_type.base_name == 'float':
if self._vector_instruction_set['float'] == '__m256':
printed_mask = f"_mm256_castps_si256({printed_mask})"
elif self._vector_instruction_set['float'] == '__m128':
printed_mask = f"_mm_castps_si128({printed_mask})"
rhs_type = get_type_of_expression(node.rhs)
if type(rhs_type) is not VectorType:
......
......@@ -57,23 +57,9 @@ def get_vector_instruction_set_x86(data_type='double', instruction_set='avx'):
'storeU': 'storeu[0,1]',
'storeA': 'store[0,1]',
'stream': 'stream[0,1]',
'maskstore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
'maskload': 'mask_load[0, 2, 1]' if instruction_set == 'avx512' else 'maskload[0, 2, 1]'
'maskStore': 'mask_store[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
'maskStoreU': 'mask_storeu[0, 2, 1]' if instruction_set == 'avx512' else 'maskstore[0, 2, 1]',
}
if instruction_set == 'avx512':
base_names.update({
'maskStore': 'mask_store[0, 2, 1]',
'maskStoreU': 'mask_storeu[0, 2, 1]',
'maskLoad': 'mask_load[2, 1, 0]',
'maskLoadU': 'mask_loadu[2, 1, 0]'
})
if instruction_set == 'avx':
base_names.update({
'maskStore': 'maskstore[0, 2, 1]',
'maskStoreU': 'maskstore[0, 2, 1]',
'maskLoad': 'maskload[0, 1]',
'maskLoadU': 'maskloadu[0, 1]'
})
for comparison_op, constant in comparisons.items():
base_names[comparison_op] = f'cmp[0, 1, {constant}]'
......
......@@ -75,3 +75,26 @@ def test_boolean_before_loop():
np.testing.assert_array_equal(g_arr, 1.0)
kernel(f=f_arr, g=g_arr, t2=-1.0)
np.testing.assert_array_equal(g_arr, 42.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_maskstore(instruction_set, dtype):
if instruction_set in ['neon', 'vsx']:
pytest.skip('no mask-store instructions available')
data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr[4:-4, 4:-4] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
Conditional(data.center() < 1.0, Block([
ps.Assignment(data.center(), 2.0)
]))
]
ast = ps.create_kernel(c, target='cpu',
cpu_vectorize_info={'instruction_set': instruction_set})
ps.show_code(ast)
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[0:4, :], 2.0)
np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0)
......@@ -14,7 +14,7 @@ else:
instruction_set = None
def test_vector_type_propagation():
def test_vector_type_propagation(instruction_set=instruction_set):
a, b, c, d, e = sp.symbols("a b c d e")
arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2))
arr *= 10.0
......@@ -33,7 +33,7 @@ def test_vector_type_propagation():
np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3)
def test_aligned_and_nt_stores(openmp=False):
def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False):
domain_size = (24, 24)
# create a datahandling object
dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target='cpu')
......@@ -63,11 +63,11 @@ def test_aligned_and_nt_stores(openmp=False):
dh.run_kernel(kernel)
np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
def test_aligned_and_nt_stores_openmp():
test_aligned_and_nt_stores(True)
def test_aligned_and_nt_stores_openmp(instruction_set=instruction_set):
test_aligned_and_nt_stores(instruction_set, True)
def test_inplace_update():
def test_inplace_update(instruction_set=instruction_set):
shape = (9, 9, 3)
arr = np.ones(shape, order='f')
......@@ -88,7 +88,7 @@ def test_inplace_update():
np.testing.assert_equal(arr, 2)
def test_vectorization_fixed_size():
def test_vectorization_fixed_size(instruction_set=instruction_set):
configurations = []
# Fixed size - multiple of four
arr = np.ones((20 + 2, 24 + 2)) * 5.0
......@@ -115,7 +115,7 @@ def test_vectorization_fixed_size():
np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
def test_vectorization_variable_size():
def test_vectorization_variable_size(instruction_set=instruction_set):
f, g = ps.fields("f, g : double[2D]")
update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
ast = ps.create_kernel(update_rule)
......@@ -131,7 +131,7 @@ def test_vectorization_variable_size():
np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
def test_piecewise1():
def test_piecewise1(instruction_set=instruction_set):
a, b, c, d, e = sp.symbols("a b c d e")
arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0
......@@ -149,7 +149,7 @@ def test_piecewise1():
np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0)
def test_piecewise2():
def test_piecewise2(instruction_set=instruction_set):
arr = np.zeros((20, 20))
@ps.kernel
......@@ -167,7 +167,7 @@ def test_piecewise2():
np.testing.assert_equal(arr, np.ones_like(arr))
def test_piecewise3():
def test_piecewise3(instruction_set=instruction_set):
arr = np.zeros((22, 22))
@ps.kernel
......@@ -181,7 +181,7 @@ def test_piecewise3():
ast.compile()
def test_logical_operators():
def test_logical_operators(instruction_set=instruction_set):
arr = np.zeros((22, 22))
@ps.kernel
......@@ -220,7 +220,7 @@ def test_hardware_query():
any([iset.startswith('sve') for iset in supported_instruction_sets])
def test_vectorised_pow():
def test_vectorised_pow(instruction_set=instruction_set):
arr = np.zeros((24, 24))
f, g = ps.fields(f=arr, g=arr)
......@@ -256,7 +256,7 @@ def test_vectorised_pow():
ast.compile()
def test_vectorised_fast_approximations():
def test_vectorised_fast_approximations(instruction_set=instruction_set):
arr = np.zeros((24, 24))
f, g = ps.fields(f=arr, g=arr)
......
......@@ -57,15 +57,13 @@ def test_vectorized_abs(instruction_set, dtype):
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('gl_field, gl_kernel', [(1, 0), (0, 1), (1, 1)])
def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype):
itemsize = 8 if dtype == 'double' else 4
alignment = get_vector_instruction_set(dtype, instruction_set)['width'] * itemsize
dtype = np.float64 if dtype == 'double' else np.float32
domain_size = (128, 128)
dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target='cpu')
src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True)
dh.fill(src.name, 1.0, ghost_layers=True)
dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=True)
dh.fill(dst.name, 1.0, ghost_layers=True)
update_rule = ps.Assignment(dst[0, 0], src[0, 0])
......@@ -90,3 +88,11 @@ def test_cacheline_size(instruction_set):
assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible"
assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size"
assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2"
# test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here
from pystencils_tests import test_vectorization
@pytest.mark.parametrize('instruction_set', set(supported_instruction_sets) - set([test_vectorization.instruction_set]))
@pytest.mark.parametrize('function', [f for f in test_vectorization.__dict__ if f.startswith('test_') and f != 'test_hardware_query'])
def test_vectorization_other(instruction_set, function):
test_vectorization.__dict__[function](instruction_set)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment