import numpy as np import pytest import sympy as sp import pystencils from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.opencl_backend import OpenClBackend from pystencils.opencl.opencljit import make_python_function try: import pyopencl as cl HAS_OPENCL = True except Exception: HAS_OPENCL = False def test_print_opencl(): z, y, x = pystencils.fields("z, y, x: [2d]") assignments = pystencils.AssignmentCollection({ z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) }) print(assignments) ast = pystencils.create_kernel(assignments, target='gpu') print(ast) code = pystencils.show_code(ast, custom_backend=CudaBackend()) print(code) opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) print(opencl_code) assert "__global double * RESTRICT const _data_x" in str(opencl_code) assert "__global double * RESTRICT" in str(opencl_code) assert "get_local_id(0)" in str(opencl_code) @pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") def test_opencl_jit_fixed_size(): z, y, x = pystencils.fields("z, y, x: [20,30]") assignments = pystencils.AssignmentCollection({ z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) }) print(assignments) ast = pystencils.create_kernel(assignments, target='gpu') print(ast) code = pystencils.show_code(ast, custom_backend=CudaBackend()) print(code) opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) print(opencl_code) cuda_kernel = ast.compile() assert cuda_kernel is not None import pycuda.gpuarray as gpuarray x_cpu = np.random.rand(20, 30) y_cpu = np.random.rand(20, 30) z_cpu = np.random.rand(20, 30) x = gpuarray.to_gpu(x_cpu) y = gpuarray.to_gpu(y_cpu) z = gpuarray.to_gpu(z_cpu) cuda_kernel(x=x, y=y, z=z) result_cuda = z.get() import pyopencl.array as array ctx = cl.create_some_context(0) queue = cl.CommandQueue(ctx) x = array.to_device(queue, x_cpu) y = array.to_device(queue, y_cpu) z = array.to_device(queue, z_cpu) opencl_kernel = make_python_function(ast, queue, ctx) assert opencl_kernel is not None opencl_kernel(x=x, y=y, z=z) result_opencl = z.get(queue) assert np.allclose(result_cuda, result_opencl) @pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") def test_opencl_jit(): z, y, x = pystencils.fields("z, y, x: [2d]") assignments = pystencils.AssignmentCollection({ z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) }) print(assignments) ast = pystencils.create_kernel(assignments, target='gpu') print(ast) code = pystencils.show_code(ast, custom_backend=CudaBackend()) print(code) opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) print(opencl_code) cuda_kernel = ast.compile() assert cuda_kernel is not None import pycuda.gpuarray as gpuarray x_cpu = np.random.rand(20, 30) y_cpu = np.random.rand(20, 30) z_cpu = np.random.rand(20, 30) x = gpuarray.to_gpu(x_cpu) y = gpuarray.to_gpu(y_cpu) z = gpuarray.to_gpu(z_cpu) cuda_kernel(x=x, y=y, z=z) result_cuda = z.get() import pyopencl.array as array ctx = cl.create_some_context(0) queue = cl.CommandQueue(ctx) x = array.to_device(queue, x_cpu) y = array.to_device(queue, y_cpu) z = array.to_device(queue, z_cpu) opencl_kernel = make_python_function(ast, queue, ctx) assert opencl_kernel is not None opencl_kernel(x=x, y=y, z=z) result_opencl = z.get(queue) assert np.allclose(result_cuda, result_opencl) @pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") def test_opencl_jit_with_parameter(): z, y, x = pystencils.fields("z, y, x: [2d]") a = sp.Symbol('a') assignments = pystencils.AssignmentCollection({ z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a }) print(assignments) ast = pystencils.create_kernel(assignments, target='gpu') print(ast) code = pystencils.show_code(ast, custom_backend=CudaBackend()) print(code) opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend()) print(opencl_code) cuda_kernel = ast.compile() assert cuda_kernel is not None import pycuda.gpuarray as gpuarray x_cpu = np.random.rand(20, 30) y_cpu = np.random.rand(20, 30) z_cpu = np.random.rand(20, 30) x = gpuarray.to_gpu(x_cpu) y = gpuarray.to_gpu(y_cpu) z = gpuarray.to_gpu(z_cpu) cuda_kernel(x=x, y=y, z=z, a=5.) result_cuda = z.get() import pyopencl.array as array ctx = cl.create_some_context(0) queue = cl.CommandQueue(ctx) x = array.to_device(queue, x_cpu) y = array.to_device(queue, y_cpu) z = array.to_device(queue, z_cpu) opencl_kernel = make_python_function(ast, queue, ctx) assert opencl_kernel is not None opencl_kernel(x=x, y=y, z=z, a=5.) result_opencl = z.get(queue) assert np.allclose(result_cuda, result_opencl) if __name__ == '__main__': test_opencl_jit()