diff --git a/README.md b/README.md
index d079128a539b9c9244d39dc5fc0a6c5e88055227..60ab1dd3e0cebc9ba6948ace01382dc127176711 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,7 @@ Without `[interactive]` you get a minimal version with very little dependencies.
 
 All options:
 -  `gpu`: use this if an Nvidia GPU is available and CUDA is installed
+-  `pyopencl`: basic OpenCL support (experimental)
 - `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
 - `bench_db`: functionality to store benchmark result in object databases
 - `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py
index b2f5727c4068852fd6f4ba1b755a1ba5d1215b91..393396aa2d073c77e8df6ff627860164c9cfe526 100644
--- a/pystencils/astnodes.py
+++ b/pystencils/astnodes.py
@@ -707,8 +707,7 @@ class DestructuringBindingsForFieldClass(Node):
         corresponding_field_names = {s.field_name for s in undefined_field_symbols if hasattr(s, 'field_name')}
         corresponding_field_names |= {s.field_names[0] for s in undefined_field_symbols if hasattr(s, 'field_names')}
         return {TypedSymbol(f, self.CLASS_NAME_TEMPLATE.format(dtype=field_map[f].dtype, ndim=field_map[f].ndim) + '&')
-                for f in corresponding_field_names} | \
-            (self.body.undefined_symbols - undefined_field_symbols)
+                for f in corresponding_field_names} | (self.body.undefined_symbols - undefined_field_symbols)
 
     def subs(self, subs_dict) -> None:
         """Inplace! substitute, similar to sympy's but modifies the AST inplace."""
diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index cac2b00a1b52b29a61ccaa0cd73e9e5d37a89e95..239f060eee7de833321f811ed95b05ff8f02adbb 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -59,6 +59,9 @@ def generate_c(ast_node: Node, signature_only: bool = False, dialect='c', custom
     elif dialect == 'cuda':
         from pystencils.backends.cuda_backend import CudaBackend
         printer = CudaBackend(signature_only=signature_only)
+    elif dialect == 'opencl':
+        from pystencils.backends.opencl_backend import OpenClBackend
+        printer = OpenClBackend(signature_only=signature_only)
     else:
         raise ValueError("Unknown dialect: " + str(dialect))
     code = printer(ast_node)
@@ -166,14 +169,19 @@ class CBackend:
         return result
 
     def _print(self, node):
+        if isinstance(node, str):
+            return node
         for cls in type(node).__mro__:
             method_name = "_print_" + cls.__name__
             if hasattr(self, method_name):
                 return getattr(self, method_name)(node)
         raise NotImplementedError(self.__class__.__name__ + " does not support node of type " + node.__class__.__name__)
 
+    def _print_Type(self, node):
+        return str(node)
+
     def _print_KernelFunction(self, node):
-        function_arguments = ["%s %s" % (str(s.symbol.dtype), s.symbol.name) for s in node.get_parameters()]
+        function_arguments = ["%s %s" % (self._print(s.symbol.dtype), s.symbol.name) for s in node.get_parameters()]
         launch_bounds = ""
         if self._dialect == 'cuda':
             max_threads = node.indexing.max_threads_per_block()
@@ -208,7 +216,11 @@ class CBackend:
 
     def _print_SympyAssignment(self, node):
         if node.is_declaration:
-            data_type = "const " + str(node.lhs.dtype) + " " if node.is_const else str(node.lhs.dtype) + " "
+            if node.is_const:
+                prefix = 'const '
+            else:
+                prefix = ''
+            data_type = prefix + self._print(node.lhs.dtype) + " "
             return "%s%s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs),
                                    self.sympy_printer.doprint(node.rhs))
         else:
@@ -277,10 +289,10 @@ class CBackend:
                                   ]
         destructuring_bindings.sort()  # only for code aesthetics
         return "{\n" + self._indent + \
-            ("\n" + self._indent).join(destructuring_bindings) + \
-            "\n" + self._indent + \
-            ("\n" + self._indent).join(self._print(node.body).splitlines()) + \
-            "\n}"
+               ("\n" + self._indent).join(destructuring_bindings) + \
+               "\n" + self._indent + \
+               ("\n" + self._indent).join(self._print(node.body).splitlines()) + \
+               "\n}"
 
 
 # ------------------------------------------ Helper function & classes -------------------------------------------------
diff --git a/pystencils/backends/opencl1.1_known_functions.txt b/pystencils/backends/opencl1.1_known_functions.txt
new file mode 100644
index 0000000000000000000000000000000000000000..abeab29043e2c76b2b2df8e9024c00e7ac34de0f
--- /dev/null
+++ b/pystencils/backends/opencl1.1_known_functions.txt
@@ -0,0 +1,100 @@
+acos
+acosh
+acospi
+asin
+asinh
+asinpi
+atan
+atan2
+atanh
+atanpi
+atan2pi
+cbrt
+ceil
+copysign
+cos
+cosh
+cospi
+erfc
+erf
+exp
+exp2
+exp10
+expm1
+fabs
+fdim
+floor
+fma
+fmax
+fmax
+fmin45
+fmin
+fmod
+fract
+frexp
+hypot
+ilogb
+ldexp
+lgamma
+lgamma_r
+log
+log2
+log10
+log1p
+logb
+mad 
+maxmag 
+minmag 
+modf 
+nextafter
+pow 
+pown
+powr
+remquo 
+intn
+remquo
+rint
+rootn
+rootn
+round
+rsqrt
+sin
+sincos
+sinh
+sinpi
+sqrt
+tan
+tanh
+tanpi
+tgamma
+trunc
+
+
+half_cos 
+half_divide
+half_exp 
+half_exp2 
+half_exp10 
+half_log 
+half_log2 
+half_log10 
+half_powr 
+half_recip 
+half_rsqrt 
+half_sin 
+half_sqrt 
+half_tan 
+native_cos 
+native_divide
+native_exp 
+native_exp2 
+native_exp10 
+native_log 
+native_log2 
+native_log10 
+native_powr 
+native_recip 
+native_rsqrt 
+native_sin 
+native_sqrt 
+native_tan 
diff --git a/pystencils/backends/opencl_backend.py b/pystencils/backends/opencl_backend.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2e40a69f05fe1181fc41110ca9a1b07763a75be
--- /dev/null
+++ b/pystencils/backends/opencl_backend.py
@@ -0,0 +1,94 @@
+from os.path import dirname, join
+
+import pystencils.data_types
+from pystencils.astnodes import Node
+from pystencils.backends.cbackend import CustomSympyPrinter, generate_c
+from pystencils.backends.cuda_backend import CudaBackend, CudaSympyPrinter
+from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
+
+with open(join(dirname(__file__), 'opencl1.1_known_functions.txt')) as f:
+    lines = f.readlines()
+    OPENCL_KNOWN_FUNCTIONS = {l.strip(): l.strip() for l in lines if l}
+
+
+def generate_opencl(astnode: Node, signature_only: bool = False) -> str:
+    """Prints an abstract syntax tree node (made for target 'gpu') as OpenCL code.
+
+    Args:
+        astnode: KernelFunction node to generate code for
+        signature_only: if True only the signature is printed
+
+    Returns:
+        C-like code for the ast node and its descendants
+    """
+    return generate_c(astnode, signature_only, dialect='opencl')
+
+
+class OpenClBackend(CudaBackend):
+
+    def __init__(self,
+                 sympy_printer=None,
+                 signature_only=False):
+        if not sympy_printer:
+            sympy_printer = OpenClSympyPrinter()
+
+        super().__init__(sympy_printer, signature_only)
+        self._dialect = 'opencl'
+
+    def _print_Type(self, node):
+        code = super()._print_Type(node)
+        if isinstance(node, pystencils.data_types.PointerType):
+            return "__global " + code
+        else:
+            return code
+
+    def _print_ThreadBlockSynchronization(self, node):
+        raise NotImplementedError()
+
+    def _print_TextureDeclaration(self, node):
+        raise NotImplementedError()
+
+
+class OpenClSympyPrinter(CudaSympyPrinter):
+    language = "OpenCL"
+
+    DIMENSION_MAPPING = {
+        'x': '0',
+        'y': '1',
+        'z': '2'
+    }
+    INDEXING_FUNCTION_MAPPING = {
+        'blockIdx': 'get_group_id',
+        'threadIdx': 'get_local_id',
+        'blockDim': 'get_local_size',
+        'gridDim': 'get_global_size'
+    }
+
+    def __init__(self):
+        CustomSympyPrinter.__init__(self)
+        self.known_functions = OPENCL_KNOWN_FUNCTIONS
+
+    def _print_ThreadIndexingSymbol(self, node):
+        symbol_name: str = node.name
+        function_name, dimension = tuple(symbol_name.split("."))
+        dimension = self.DIMENSION_MAPPING[dimension]
+        function_name = self.INDEXING_FUNCTION_MAPPING[function_name]
+        return f"{function_name}({dimension})"
+
+    def _print_TextureAccess(self, node):
+        raise NotImplementedError()
+
+    # For math functions, OpenCL is more similar to the C++ printer CustomSympyPrinter
+    # since built-in math functions are generic.
+    # In CUDA, you have to differentiate between `sin` and `sinf`
+    _print_math_func = CustomSympyPrinter._print_math_func
+    _print_Pow = CustomSympyPrinter._print_Pow
+
+    def _print_Function(self, expr):
+        if isinstance(expr, fast_division):
+            return "native_divide(%s, %s)" % tuple(self._print(a) for a in expr.args)
+        elif isinstance(expr, fast_sqrt):
+            return "native_sqrt(%s)" % tuple(self._print(a) for a in expr.args)
+        elif isinstance(expr, fast_inv_sqrt):
+            return "native_rsqrt(%s)" % tuple(self._print(a) for a in expr.args)
+        return CustomSympyPrinter._print_Function(self, expr)
diff --git a/pystencils/gpucuda/indexing.py b/pystencils/gpucuda/indexing.py
index f6f1fbe80c3da6c1dea78cf687fe96dc6810ec10..4c8701b2599dd9b4570c7d7f8875e0e6b58bef36 100644
--- a/pystencils/gpucuda/indexing.py
+++ b/pystencils/gpucuda/indexing.py
@@ -1,8 +1,8 @@
 import abc
 from functools import partial
-from typing import Tuple  # noqa
 
 import sympy as sp
+from sympy.core.cache import cacheit
 
 from pystencils.astnodes import Block, Conditional
 from pystencils.data_types import TypedSymbol, create_type
@@ -10,10 +10,24 @@ from pystencils.integer_functions import div_ceil, div_floor
 from pystencils.slicing import normalize_slice
 from pystencils.sympyextensions import is_integer_sequence, prod
 
-BLOCK_IDX = [TypedSymbol("blockIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
-THREAD_IDX = [TypedSymbol("threadIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
-BLOCK_DIM = [TypedSymbol("blockDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
-GRID_DIM = [TypedSymbol("gridDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
+
+class ThreadIndexingSymbol(TypedSymbol):
+    def __new__(cls, *args, **kwds):
+        obj = ThreadIndexingSymbol.__xnew_cached_(cls, *args, **kwds)
+        return obj
+
+    def __new_stage2__(cls, name, dtype, *args, **kwargs):
+        obj = super(ThreadIndexingSymbol, cls).__xnew__(cls, name, dtype, *args, **kwargs)
+        return obj
+
+    __xnew__ = staticmethod(__new_stage2__)
+    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+
+BLOCK_IDX = [ThreadIndexingSymbol("blockIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
+THREAD_IDX = [ThreadIndexingSymbol("threadIdx." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
+BLOCK_DIM = [ThreadIndexingSymbol("blockDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
+GRID_DIM = [ThreadIndexingSymbol("gridDim." + coord, create_type("int")) for coord in ('x', 'y', 'z')]
 
 
 class AbstractIndexing(abc.ABC):
@@ -69,6 +83,7 @@ class AbstractIndexing(abc.ABC):
     def symbolic_parameters(self):
         """Set of symbols required in call_parameters code"""
 
+
 # -------------------------------------------- Implementations ---------------------------------------------------------
 
 
@@ -82,6 +97,7 @@ class BlockIndexing(AbstractIndexing):
                                                 gets the largest amount of threads
         compile_time_block_size: compile in concrete block size, otherwise the cuda variable 'blockDim' is used
     """
+
     def __init__(self, field, iteration_slice,
                  block_size=(16, 16, 1), permute_block_size_dependent_on_layout=True, compile_time_block_size=False,
                  maximum_block_size=(1024, 1024, 64)):
diff --git a/pystencils/include/opencl_stdint.h b/pystencils/include/opencl_stdint.h
new file mode 100644
index 0000000000000000000000000000000000000000..14ad55852fe87f42a569fb4159187a27f348e7ce
--- /dev/null
+++ b/pystencils/include/opencl_stdint.h
@@ -0,0 +1,16 @@
+#ifndef OPENCL_STDINT
+#define OPENCL_STDINT
+
+typedef unsigned int      uint;
+typedef unsigned int      uint_t;
+
+typedef signed char       int8_t;
+typedef signed short      int16_t;
+typedef signed int        int32_t;
+typedef signed long int   int64_t;
+typedef unsigned char     uint8_t;
+typedef unsigned short    uint16_t;
+typedef unsigned int      uint32_t;
+typedef unsigned long int uint64_t;
+
+#endif
diff --git a/pystencils/opencl/opencljit.py b/pystencils/opencl/opencljit.py
new file mode 100644
index 0000000000000000000000000000000000000000..75076db53bd8dd37457fc4bb9c9dea833942d57f
--- /dev/null
+++ b/pystencils/opencl/opencljit.py
@@ -0,0 +1,79 @@
+import numpy as np
+
+from pystencils.backends.cbackend import generate_c, get_headers
+from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments
+from pystencils.include import get_pystencils_include_path
+
+USE_FAST_MATH = True
+
+
+def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None):
+    """
+    Creates a **OpenCL** kernel function from an abstract syntax tree which
+    was created for the ``target='gpu'`` e.g. by :func:`pystencils.gpucuda.create_cuda_kernel`
+    or :func:`pystencils.gpucuda.created_indexed_cuda_kernel`
+
+    Args:
+        opencl_queue: a valid :class:`pyopencl.CommandQueue`
+        opencl_ctx: a valid :class:`pyopencl.Context`
+        kernel_function_node: the abstract syntax tree
+        argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the
+                       returned kernel functor.
+
+    Returns:
+        compiled kernel as Python function
+    """
+    import pyopencl as cl
+    assert opencl_ctx, "No valid OpenCL context"
+    assert opencl_queue, "No valid OpenCL queue"
+
+    if argument_dict is None:
+        argument_dict = {}
+
+    # Changing of kernel name necessary since compilation with default name "kernel" is not possible (OpenCL keyword!)
+    kernel_function_node.function_name = "opencl_" + kernel_function_node.function_name
+    header_list = ['"opencl_stdint.h"'] + list(get_headers(kernel_function_node))
+    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
+
+    code = includes + "\n"
+    code += "#define FUNC_PREFIX __kernel\n"
+    code += "#define RESTRICT restrict\n\n"
+    code += str(generate_c(kernel_function_node, dialect='opencl', custom_backend=custom_backend))
+    options = []
+    if USE_FAST_MATH:
+        options.append("-cl-unsafe-math-optimizations -cl-mad-enable -cl-fast-relaxed-math -cl-finite-math-only")
+    options.append("-I \"" + get_pystencils_include_path() + "\"")
+    mod = cl.Program(opencl_ctx, code).build(options=options)
+    func = getattr(mod, kernel_function_node.function_name)
+
+    parameters = kernel_function_node.get_parameters()
+
+    cache = {}
+    cache_values = []
+
+    def wrapper(**kwargs):
+        key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v))
+                         for k, v in kwargs.items()))
+        try:
+            args, block_and_thread_numbers = cache[key]
+            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)
+        except KeyError:
+            full_arguments = argument_dict.copy()
+            full_arguments.update(kwargs)
+            shape = _check_arguments(parameters, full_arguments)
+
+            indexing = kernel_function_node.indexing
+            block_and_thread_numbers = indexing.call_parameters(shape)
+            block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block'])
+            block_and_thread_numbers['grid'] = tuple(int(b * g) for (b, g) in zip(block_and_thread_numbers['block'],
+                                                                                  block_and_thread_numbers['grid']))
+
+            args = _build_numpy_argument_list(parameters, full_arguments)
+            args = [a.data if hasattr(a, 'data') else a for a in args]
+            cache[key] = (args, block_and_thread_numbers)
+            cache_values.append(kwargs)  # keep objects alive such that ids remain unique
+            func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args)
+
+    wrapper.ast = kernel_function_node
+    wrapper.parameters = kernel_function_node.get_parameters()
+    return wrapper
diff --git a/pystencils_tests/test_opencl.py b/pystencils_tests/test_opencl.py
new file mode 100644
index 0000000000000000000000000000000000000000..24ef56f9b5e868496d4240d33fe708aaf82b43ed
--- /dev/null
+++ b/pystencils_tests/test_opencl.py
@@ -0,0 +1,199 @@
+import numpy as np
+import pytest
+import sympy as sp
+
+import pystencils
+from pystencils.backends.cuda_backend import CudaBackend
+from pystencils.backends.opencl_backend import OpenClBackend
+from pystencils.opencl.opencljit import make_python_function
+
+try:
+    import pyopencl as cl
+    HAS_OPENCL = True
+except Exception:
+    HAS_OPENCL = False
+
+
+def test_print_opencl():
+    z, y, x = pystencils.fields("z, y, x: [2d]")
+
+    assignments = pystencils.AssignmentCollection({
+        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
+    })
+
+    print(assignments)
+
+    ast = pystencils.create_kernel(assignments, target='gpu')
+
+    print(ast)
+
+    code = pystencils.show_code(ast, custom_backend=CudaBackend())
+    print(code)
+
+    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
+    print(opencl_code)
+
+    assert "__global double * RESTRICT const _data_x" in str(opencl_code)
+    assert "__global double * RESTRICT" in str(opencl_code)
+    assert "get_local_id(0)" in str(opencl_code)
+
+
+@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
+def test_opencl_jit_fixed_size():
+    z, y, x = pystencils.fields("z, y, x: [20,30]")
+
+    assignments = pystencils.AssignmentCollection({
+        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
+    })
+
+    print(assignments)
+
+    ast = pystencils.create_kernel(assignments, target='gpu')
+
+    print(ast)
+
+    code = pystencils.show_code(ast, custom_backend=CudaBackend())
+    print(code)
+    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
+    print(opencl_code)
+
+    cuda_kernel = ast.compile()
+    assert cuda_kernel is not None
+
+    import pycuda.gpuarray as gpuarray
+
+    x_cpu = np.random.rand(20, 30)
+    y_cpu = np.random.rand(20, 30)
+    z_cpu = np.random.rand(20, 30)
+
+    x = gpuarray.to_gpu(x_cpu)
+    y = gpuarray.to_gpu(y_cpu)
+    z = gpuarray.to_gpu(z_cpu)
+    cuda_kernel(x=x, y=y, z=z)
+
+    result_cuda = z.get()
+
+    import pyopencl.array as array
+    ctx = cl.create_some_context(0)
+    queue = cl.CommandQueue(ctx)
+
+    x = array.to_device(queue, x_cpu)
+    y = array.to_device(queue, y_cpu)
+    z = array.to_device(queue, z_cpu)
+
+    opencl_kernel = make_python_function(ast, queue, ctx)
+    assert opencl_kernel is not None
+    opencl_kernel(x=x, y=y, z=z)
+
+    result_opencl = z.get(queue)
+
+    assert np.allclose(result_cuda, result_opencl)
+
+
+@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
+def test_opencl_jit():
+    z, y, x = pystencils.fields("z, y, x: [2d]")
+
+    assignments = pystencils.AssignmentCollection({
+        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
+    })
+
+    print(assignments)
+
+    ast = pystencils.create_kernel(assignments, target='gpu')
+
+    print(ast)
+
+    code = pystencils.show_code(ast, custom_backend=CudaBackend())
+    print(code)
+    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
+    print(opencl_code)
+
+    cuda_kernel = ast.compile()
+    assert cuda_kernel is not None
+
+    import pycuda.gpuarray as gpuarray
+
+    x_cpu = np.random.rand(20, 30)
+    y_cpu = np.random.rand(20, 30)
+    z_cpu = np.random.rand(20, 30)
+
+    x = gpuarray.to_gpu(x_cpu)
+    y = gpuarray.to_gpu(y_cpu)
+    z = gpuarray.to_gpu(z_cpu)
+    cuda_kernel(x=x, y=y, z=z)
+
+    result_cuda = z.get()
+
+    import pyopencl.array as array
+    ctx = cl.create_some_context(0)
+    queue = cl.CommandQueue(ctx)
+
+    x = array.to_device(queue, x_cpu)
+    y = array.to_device(queue, y_cpu)
+    z = array.to_device(queue, z_cpu)
+
+    opencl_kernel = make_python_function(ast, queue, ctx)
+    assert opencl_kernel is not None
+    opencl_kernel(x=x, y=y, z=z)
+
+    result_opencl = z.get(queue)
+
+    assert np.allclose(result_cuda, result_opencl)
+
+
+@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
+def test_opencl_jit_with_parameter():
+    z, y, x = pystencils.fields("z, y, x: [2d]")
+
+    a = sp.Symbol('a')
+    assignments = pystencils.AssignmentCollection({
+        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a
+    })
+
+    print(assignments)
+
+    ast = pystencils.create_kernel(assignments, target='gpu')
+
+    print(ast)
+
+    code = pystencils.show_code(ast, custom_backend=CudaBackend())
+    print(code)
+    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
+    print(opencl_code)
+
+    cuda_kernel = ast.compile()
+    assert cuda_kernel is not None
+
+    import pycuda.gpuarray as gpuarray
+
+    x_cpu = np.random.rand(20, 30)
+    y_cpu = np.random.rand(20, 30)
+    z_cpu = np.random.rand(20, 30)
+
+    x = gpuarray.to_gpu(x_cpu)
+    y = gpuarray.to_gpu(y_cpu)
+    z = gpuarray.to_gpu(z_cpu)
+    cuda_kernel(x=x, y=y, z=z, a=5.)
+
+    result_cuda = z.get()
+
+    import pyopencl.array as array
+    ctx = cl.create_some_context(0)
+    queue = cl.CommandQueue(ctx)
+
+    x = array.to_device(queue, x_cpu)
+    y = array.to_device(queue, y_cpu)
+    z = array.to_device(queue, z_cpu)
+
+    opencl_kernel = make_python_function(ast, queue, ctx)
+    assert opencl_kernel is not None
+    opencl_kernel(x=x, y=y, z=z, a=5.)
+
+    result_opencl = z.get(queue)
+
+    assert np.allclose(result_cuda, result_opencl)
+
+
+if __name__ == '__main__':
+    test_opencl_jit()
diff --git a/setup.py b/setup.py
index 6959f09c28b6d9508802c4d0b1c81999e1b4463c..da6b82856b17e5252941154d43310ec1888eaa39 100644
--- a/setup.py
+++ b/setup.py
@@ -88,7 +88,10 @@ setup(name='pystencils',
       url='https://i10git.cs.fau.de/pycodegen/pystencils/',
       packages=['pystencils'] + ['pystencils.' + s for s in find_packages('pystencils')],
       install_requires=['sympy>=1.1', 'numpy', 'appdirs', 'joblib'],
-      package_data={'pystencils': ['include/*.h', 'backends/cuda_known_functions.txt']},
+      package_data={'pystencils': ['include/*.h',
+                                   'backends/cuda_known_functions.txt',
+                                   'backends/opencl1.1_known_functions.txt']},
+
       ext_modules=cython_extensions("pystencils.boundaries.createindexlistcython"),
       classifiers=[
           'Development Status :: 4 - Beta',
@@ -106,6 +109,7 @@ setup(name='pystencils',
       },
       extras_require={
           'gpu': ['pycuda'],
+          'opencl': ['pyopencl'],
           'alltrafos': ['islpy', 'py-cpuinfo'],
           'bench_db': ['blitzdb', 'pymongo', 'pandas'],
           'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk'],