diff --git a/pystencils/display_utils.py b/pystencils/display_utils.py index 638d1290acbbfc4d86bec12028dc59b37e2f98ea..610c404b709885e7445eba3be412f6811fca0241 100644 --- a/pystencils/display_utils.py +++ b/pystencils/display_utils.py @@ -45,7 +45,12 @@ def show_code(ast: KernelFunction, custom_backend=None): if isinstance(ast, KernelWrapper): ast = ast.ast - dialect = 'cuda' if ast.backend == 'gpucuda' else 'c' + if ast.backend == 'gpucuda': + dialect = 'cuda' + elif ast.backend == 'opencl': + dialect = 'opencl' + else: + dialect = 'c' class CodeDisplay: def __init__(self, ast_input): diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py index 555196a623ce61fcfd2a9a40272f06c27fb70e54..16b0c64246e5540621210fe8502963be06db709f 100644 --- a/pystencils/kernelcreation.py +++ b/pystencils/kernelcreation.py @@ -12,6 +12,7 @@ from pystencils.simp.assignment_collection import AssignmentCollection from pystencils.stencil import direction_string_to_offset, inverse_direction_string from pystencils.transformations import ( loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel) +import functools def create_kernel(assignments, @@ -27,13 +28,15 @@ def create_kernel(assignments, gpu_indexing_params=MappingProxyType({}), use_textures_for_interpolation=True, cpu_prepend_optimizations=[], - use_auto_for_assignments=False): + use_auto_for_assignments=False, + opencl_queue=None, + opencl_ctx=None): """ Creates abstract syntax tree (AST) of kernel, using a list of update equations. Args: assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection` - target: 'cpu', 'llvm' or 'gpu' + target: 'cpu', 'llvm', 'gpu' or 'opencl' data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name to type iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \ @@ -108,13 +111,20 @@ def create_kernel(assignments, from pystencils.llvm import create_kernel ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, iteration_slice=iteration_slice, ghost_layers=ghost_layers) - elif target == 'gpu': + elif target == 'gpu' or target == 'opencl': from pystencils.gpucuda import create_cuda_kernel ast = create_cuda_kernel(assignments, type_info=data_type, indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params), iteration_slice=iteration_slice, ghost_layers=ghost_layers, skip_independence_check=skip_independence_check, use_textures_for_interpolation=use_textures_for_interpolation) + if target == 'opencl': + from pystencils.opencl.opencljit import make_python_function + ast._backend = 'opencl' + ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx) + ast._target = 'opencl' + ast._backend = 'opencl' + return ast else: raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,)) diff --git a/pystencils/opencl/opencljit.py b/pystencils/opencl/opencljit.py index 5526c954a1cc25438710576a0119abd65ab9854d..051dc1ec5e0d6baea989d0a42954ef154a6bcffb 100644 --- a/pystencils/opencl/opencljit.py +++ b/pystencils/opencl/opencljit.py @@ -3,10 +3,31 @@ import numpy as np from pystencils.backends.cbackend import generate_c, get_headers from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments from pystencils.include import get_pystencils_include_path +from pystencils.kernel_wrapper import KernelWrapper USE_FAST_MATH = True +_global_cl_ctx = None +_global_cl_queue = None + + +def get_global_cl_queue(): + return _global_cl_queue + + +def get_global_cl_ctx(): + return _global_cl_ctx + + +def init_globally(device_index=0): + import pyopencl as cl + global _global_cl_ctx + global _global_cl_queue + _global_cl_ctx = cl.create_some_context(device_index) + _global_cl_queue = cl.CommandQueue(_global_cl_ctx) + + def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None): """ Creates a **OpenCL** kernel function from an abstract syntax tree which @@ -24,6 +45,12 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen compiled kernel as Python function """ import pyopencl as cl + + if not opencl_ctx: + opencl_ctx = _global_cl_ctx + if not opencl_queue: + opencl_queue = _global_cl_queue + assert opencl_ctx, "No valid OpenCL context" assert opencl_queue, "No valid OpenCL queue" @@ -90,4 +117,5 @@ def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argumen wrapper.ast = kernel_function_node wrapper.parameters = kernel_function_node.get_parameters() + wrapper = KernelWrapper(wrapper, parameters, kernel_function_node) return wrapper diff --git a/pystencils_tests/test_opencl.py b/pystencils_tests/test_opencl.py index adffeb4750a62f5bb08ded29fffe00ac5900f706..2fce04f4a2a1779abd11d1a6dfc0fa2bb30eb34d 100644 --- a/pystencils_tests/test_opencl.py +++ b/pystencils_tests/test_opencl.py @@ -5,7 +5,7 @@ import pystencils import sympy as sp from pystencils.backends.cuda_backend import CudaBackend from pystencils.backends.opencl_backend import OpenClBackend -from pystencils.opencl.opencljit import make_python_function +from pystencils.opencl.opencljit import make_python_function, init_globally, get_global_cl_queue try: import pyopencl as cl @@ -233,3 +233,41 @@ def test_without_cuda(): opencl_kernel = make_python_function(ast, queue, ctx) assert opencl_kernel is not None opencl_kernel(x=x, y=y, z=z) + + + +@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl") +def test_kernel_creation(): + z, y, x = pystencils.fields("z, y, x: [20,30]") + + assignments = pystencils.AssignmentCollection({ + z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + }) + + print(assignments) + + + init_globally() + ast = pystencils.create_kernel(assignments, target='opencl') + + print(ast.backend) + + code = str(pystencils.show_code(ast)) + print(code) + assert 'get_local_size' in code + + opencl_kernel = ast.compile() + + x_cpu = np.random.rand(20, 30) + y_cpu = np.random.rand(20, 30) + z_cpu = np.random.rand(20, 30) + + import pyopencl.array as array + assert get_global_cl_queue() + x = array.to_device(get_global_cl_queue(), x_cpu) + y = array.to_device(get_global_cl_queue(), y_cpu) + z = array.to_device(get_global_cl_queue(), z_cpu) + + assert opencl_kernel is not None + opencl_kernel(x=x, y=y, z=z) +