Newer
Older
import numpy as np
import pystencils as ps
from pystencils.astnodes import Block, Conditional
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.cpu.vectorization import vec_all, vec_any
supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_any(instruction_set, dtype):
width = get_vector_instruction_set(dtype, instruction_set)['width']
data_arr = np.zeros((4*width, 4*width), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr[3:9, 1:3*width-1] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)),
Conditional(vec_any(data.center() > 0.0), Block([
ps.Assignment(data.center(), 2.0)
]))
]
ast = ps.create_kernel(c, target='cpu',
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[3:9, :3*width], 2.0)
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_all(instruction_set, dtype):
width = get_vector_instruction_set(dtype, instruction_set)['width']
data_arr = np.zeros((4*width, 4*width), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr[3:9, 1:3*width-1] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
Conditional(vec_all(data.center() > 0.0), Block([
ps.Assignment(data.center(), 2.0)
]))
]
ast = ps.create_kernel(c, target='cpu',
cpu_vectorize_info={'instruction_set': instruction_set})
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[3:9, :1], 0.0)
np.testing.assert_equal(data_arr[3:9, 1:width], 1.0)
np.testing.assert_equal(data_arr[3:9, width:2*width], 2.0)
np.testing.assert_equal(data_arr[3:9, 2*width:3*width-1], 1.0)
np.testing.assert_equal(data_arr[3:9, 3*width-1:], 0.0)
@pytest.mark.skipif(not supported_instruction_sets, reason='cannot detect CPU instruction set')
def test_boolean_before_loop():
t1, t2 = sp.symbols('t1, t2')
f_arr = np.ones((10, 10))
g_arr = np.zeros_like(f_arr)
f, g = ps.fields("f, g : double[2D]", f=f_arr, g=g_arr)
a = [
ps.Assignment(t1, t2 > 0),
ps.Assignment(g[0, 0],
sp.Piecewise((f[0, 0], t1), (42, True)))
]
ast = ps.create_kernel(a, cpu_vectorize_info={'instruction_set': supported_instruction_sets[-1]})
kernel = ast.compile()
kernel(f=f_arr, g=g_arr, t2=1.0)
print(g)
np.testing.assert_array_equal(g_arr, 1.0)
kernel(f=f_arr, g=g_arr, t2=-1.0)
np.testing.assert_array_equal(g_arr, 42.0)

Michael Kuron
committed
@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_maskstore(instruction_set, dtype):
if instruction_set in ['neon', 'vsx']:
pytest.skip('no mask-store instructions available')
data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
data_arr[4:-4, 4:-4] = 1.0
data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
c = [
Conditional(data.center() < 1.0, Block([
ps.Assignment(data.center(), 2.0)
]))
]
ast = ps.create_kernel(c, target='cpu',
cpu_vectorize_info={'instruction_set': instruction_set})
ps.show_code(ast)
kernel = ast.compile()
kernel(data=data_arr)
np.testing.assert_equal(data_arr[0:4, :], 2.0)
np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0)