diff --git a/MANIFEST.in b/MANIFEST.in index f32d4f3d17079502cccd556699d845504908e52a..5c16bb2001e59376e6a2a264cadec0f69c3c0b56 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ include README.md include COPYING.txt include RELEASE-VERSION +global-include *.pyx +global-exclude boundaries/createindexlistcython.c diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py index 3cc1be4f36b126baf19d074657b4394cdd2cefbb..057402c3362c508d877ed7e97e1c98165e771582 100644 --- a/pystencils_tests/test_vectorization.py +++ b/pystencils_tests/test_vectorization.py @@ -42,7 +42,7 @@ def test_inplace_update(): f1 @= 2 * s.tmp0 f2 @= 2 * s.tmp0 - ast = ps.create_kernel(update_rule, cpu_vectorize_info={'instruction_set': 'avx'}) + ast = ps.create_kernel(update_rule, cpu_vectorize_info=True) kernel = ast.compile() kernel(f=arr) np.testing.assert_equal(arr, 2) diff --git a/pystencils_tests/test_vectorization_specific.py b/pystencils_tests/test_vectorization_specific.py new file mode 100644 index 0000000000000000000000000000000000000000..6764f282b3e6edbb8ce1297b750162fd578306f2 --- /dev/null +++ b/pystencils_tests/test_vectorization_specific.py @@ -0,0 +1,35 @@ +import pytest + +import numpy as np +import sympy as sp + +import pystencils as ps +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets +from pystencils.cpu.vectorization import vectorize +from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions +from pystencils.transformations import replace_inner_stride_with_one +from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets + +supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else [] + + +@pytest.mark.parametrize('instruction_set', supported_instruction_sets) +def test_vectorisation_varying_arch(instruction_set): + shape = (9, 9, 3) + arr = np.ones(shape, order='f') + + @ps.kernel + def update_rule(s): + f = ps.fields("f(3) : [2D]", f=arr) + s.tmp0 @= f(0) + s.tmp1 @= f(1) + s.tmp2 @= f(2) + f0, f1, f2 = f(0), f(1), f(2) + f0 @= 2 * s.tmp0 + f1 @= 2 * s.tmp0 + f2 @= 2 * s.tmp0 + + ast = ps.create_kernel(update_rule, cpu_vectorize_info={'instruction_set': instruction_set}) + kernel = ast.compile() + kernel(f=arr) + np.testing.assert_equal(arr, 2) diff --git a/setup.py b/setup.py index d917867c1da2b7f4518c60cac6d1a366c270390f..b209fcbb9990b3ceb8e097d37d03fb05260f1c6d 100644 --- a/setup.py +++ b/setup.py @@ -88,7 +88,8 @@ setuptools.setup(name='pystencils', install_requires=['sympy>=1.1', 'numpy', 'appdirs', 'joblib'], package_data={'pystencils': ['include/*.h', 'backends/cuda_known_functions.txt', - 'backends/opencl1.1_known_functions.txt']}, + 'backends/opencl1.1_known_functions.txt', + 'boundaries/createindexlistcython.pyx']}, ext_modules=cython_extensions("pystencils.boundaries.createindexlistcython") if USE_CYTHON else [], classifiers=[