import numpy as np
import sympy as sp
import pytest

import pystencils as ps
from pystencils.astnodes import Block, Conditional
from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
from pystencils.cpu.vectorization import vec_all, vec_any

supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []

@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_any(instruction_set, dtype):
    width = get_vector_instruction_set(dtype, instruction_set)['width']
    data_arr = np.zeros((4*width, 4*width), dtype=np.float64 if dtype == 'double' else np.float32)

    data_arr[3:9, 1:3*width-1] = 1.0
    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)

    c = [
        ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)),
        Conditional(vec_any(data.center() > 0.0), Block([
            ps.Assignment(data.center(), 2.0)
        ]))
    ]
    ast = ps.create_kernel(c, target='cpu',
                           cpu_vectorize_info={'instruction_set': instruction_set})
    kernel = ast.compile()
    kernel(data=data_arr)
    np.testing.assert_equal(data_arr[3:9, :3*width], 2.0)


@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_all(instruction_set, dtype):
    width = get_vector_instruction_set(dtype, instruction_set)['width']
    data_arr = np.zeros((4*width, 4*width), dtype=np.float64 if dtype == 'double' else np.float32)

    data_arr[3:9, 1:3*width-1] = 1.0
    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)

    c = [
        Conditional(vec_all(data.center() > 0.0), Block([
            ps.Assignment(data.center(), 2.0)
        ]))
    ]
    ast = ps.create_kernel(c, target='cpu',
                           cpu_vectorize_info={'instruction_set': instruction_set})
    kernel = ast.compile()
    kernel(data=data_arr)
    np.testing.assert_equal(data_arr[3:9, :1], 0.0)
    np.testing.assert_equal(data_arr[3:9, 1:width], 1.0)
    np.testing.assert_equal(data_arr[3:9, width:2*width], 2.0)
    np.testing.assert_equal(data_arr[3:9, 2*width:3*width-1], 1.0)
    np.testing.assert_equal(data_arr[3:9, 3*width-1:], 0.0)


@pytest.mark.skipif(not supported_instruction_sets, reason='cannot detect CPU instruction set')
def test_boolean_before_loop():
    t1, t2 = sp.symbols('t1, t2')
    f_arr = np.ones((10, 10))
    g_arr = np.zeros_like(f_arr)
    f, g = ps.fields("f, g : double[2D]", f=f_arr, g=g_arr)

    a = [
        ps.Assignment(t1, t2 > 0),
        ps.Assignment(g[0, 0],
                      sp.Piecewise((f[0, 0], t1), (42, True)))
    ]
    ast = ps.create_kernel(a, cpu_vectorize_info={'instruction_set': supported_instruction_sets[-1]})
    kernel = ast.compile()
    kernel(f=f_arr, g=g_arr, t2=1.0)
    print(g)
    np.testing.assert_array_equal(g_arr, 1.0)
    kernel(f=f_arr, g=g_arr, t2=-1.0)
    np.testing.assert_array_equal(g_arr, 42.0)


@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
@pytest.mark.parametrize('dtype', ('float', 'double'))
def test_vec_maskstore(instruction_set, dtype):
    if instruction_set in ['neon', 'vsx']:
        pytest.skip('no mask-store instructions available')
    data_arr = np.zeros((16, 16), dtype=np.float64 if dtype == 'double' else np.float32)
    data_arr[4:-4, 4:-4] = 1.0
    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)

    c = [
        Conditional(data.center() < 1.0, Block([
            ps.Assignment(data.center(), 2.0)
        ]))
    ]
    ast = ps.create_kernel(c, target='cpu',
                           cpu_vectorize_info={'instruction_set': instruction_set})
    ps.show_code(ast)
    kernel = ast.compile()
    kernel(data=data_arr)
    np.testing.assert_equal(data_arr[0:4, :], 2.0)
    np.testing.assert_equal(data_arr[4:-4, 4:-4], 1.0)