diff --git a/pystencils/datahandling/__init__.py b/pystencils/datahandling/__init__.py
index 583f121b09ffbfdc6dc2103ec6e300e639791891..a4fa55bdc7a52e1b9c2015e2210fcbb48aaeb2e1 100644
--- a/pystencils/datahandling/__init__.py
+++ b/pystencils/datahandling/__init__.py
@@ -20,7 +20,8 @@ def create_data_handling(domain_size: Tuple[int, ...],
                          default_layout: str = 'SoA',
                          default_target: str = 'cpu',
                          parallel: bool = False,
-                         default_ghost_layers: int = 1) -> DataHandling:
+                         default_ghost_layers: int = 1,
+                         opencl_queue=None) -> DataHandling:
     """Creates a data handling instance.
 
     Args:
@@ -33,6 +34,7 @@ def create_data_handling(domain_size: Tuple[int, ...],
         default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
     """
     if parallel:
+        assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
         if wlb is None:
             raise ValueError("Cannot create parallel data handling because walberla module is not available")
 
@@ -56,8 +58,12 @@ def create_data_handling(domain_size: Tuple[int, ...],
         return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target,
                                     default_layout=default_layout, default_ghost_layers=default_ghost_layers)
     else:
-        return SerialDataHandling(domain_size, periodicity=periodicity, default_target=default_target,
-                                  default_layout=default_layout, default_ghost_layers=default_ghost_layers)
+        return SerialDataHandling(domain_size,
+                                  periodicity=periodicity,
+                                  default_target=default_target,
+                                  default_layout=default_layout,
+                                  default_ghost_layers=default_ghost_layers,
+                                  opencl_queue=opencl_queue)
 
 
 __all__ = ['create_data_handling']
diff --git a/pystencils/datahandling/pycuda.py b/pystencils/datahandling/pycuda.py
new file mode 100644
index 0000000000000000000000000000000000000000..30602a30ce6b87d0e25861b43c5291cda77ed570
--- /dev/null
+++ b/pystencils/datahandling/pycuda.py
@@ -0,0 +1,39 @@
+try:
+    import pycuda.gpuarray as gpuarray
+except ImportError:
+    gpuarray = None
+import numpy as np
+
+import pystencils
+
+
+class PyCudaArrayHandler:
+
+    def __init__(self):
+        import pycuda.autoinit  # NOQA
+
+    def zeros(self, shape, dtype=np.float64, order='C'):
+        return gpuarray.zeros(shape, dtype, order)
+
+    def ones(self, shape, dtype, order='C'):
+        return gpuarray.ones(shape, dtype, order)
+
+    def empty(self, shape, dtype=np.float64, layout=None):
+        if layout:
+            cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
+            return self.from_numpy(cpu_array)
+        else:
+            return gpuarray.empty(shape, dtype)
+
+    def to_gpu(self, array):
+        return gpuarray.to_gpu(array)
+
+    def upload(self, gpuarray, numpy_array):
+        gpuarray.set(numpy_array)
+
+    def download(self, gpuarray, numpy_array):
+        gpuarray.get(numpy_array)
+
+    def randn(self, shape, dtype=np.float64):
+        cpu_array = np.random.randn(*shape).astype(dtype)
+        return self.from_numpy(cpu_array)
diff --git a/pystencils/datahandling/pyopencl.py b/pystencils/datahandling/pyopencl.py
new file mode 100644
index 0000000000000000000000000000000000000000..05470ea9fec1b1128e88c9226d9a53a9ac1556d4
--- /dev/null
+++ b/pystencils/datahandling/pyopencl.py
@@ -0,0 +1,44 @@
+try:
+    import pyopencl.array as gpuarray
+except ImportError:
+    gpuarray = None
+
+import numpy as np
+
+import pystencils
+
+
+class PyOpenClArrayHandler:
+
+    def __init__(self, queue):
+        if not queue:
+            from pystencils.opencl.opencljit import get_global_cl_queue
+            queue = get_global_cl_queue()
+        assert queue, "OpenCL queue missing"
+        self.queue = queue
+
+    def zeros(self, shape, dtype=np.float64, order='C'):
+        return gpuarray.zeros(shape, dtype, order)
+
+    def ones(self, shape, dtype, order='C'):
+        return gpuarray.ones(self.queue, shape, dtype, order)
+
+    def empty(self, shape, dtype=np.float64, layout=None):
+        if layout:
+            cpu_array = pystencils.field.create_numpy_array_with_layout(shape, dtype, layout)
+            return self.from_numpy(cpu_array)
+        else:
+            return gpuarray.empty(self.queue, shape, dtype)
+
+    def to_gpu(self, array):
+        return gpuarray.to_device(self.queue, array)
+
+    def upload(self, gpuarray, numpy_array):
+        gpuarray.set(numpy_array, self.queue)
+
+    def download(self, gpuarray, numpy_array):
+        gpuarray.get(self.queue, numpy_array)
+
+    def randn(self, shape, dtype=np.float64):
+        cpu_array = np.random.randn(*shape).astype(dtype)
+        return self.from_numpy(cpu_array)
diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py
index 697b6b6674a83b6786c8a6a3f687df3e49cd8a01..248b1e611b372e49094f925f510658c46c36bf15 100644
--- a/pystencils/datahandling/serial_datahandling.py
+++ b/pystencils/datahandling/serial_datahandling.py
@@ -6,22 +6,24 @@ import numpy as np
 
 from pystencils.datahandling.blockiteration import SerialBlock
 from pystencils.datahandling.datahandling_interface import DataHandling
+from pystencils.datahandling.pycuda import PyCudaArrayHandler
+from pystencils.datahandling.pyopencl import PyOpenClArrayHandler
 from pystencils.field import (
     Field, FieldType, create_numpy_array_with_layout, layout_string_to_tuple, spatial_layout_string_to_tuple)
 from pystencils.slicing import normalize_slice, remove_ghost_layers
 from pystencils.utils import DotDict
 
-try:
-    import pycuda.gpuarray as gpuarray
-    import pycuda.autoinit  # NOQA
-except ImportError:
-    gpuarray = None
-
 
 class SerialDataHandling(DataHandling):
 
-    def __init__(self, domain_size: Sequence[int], default_ghost_layers: int = 1, default_layout: str = 'SoA',
-                 periodicity: Union[bool, Sequence[bool]] = False, default_target: str = 'cpu') -> None:
+    def __init__(self,
+                 domain_size: Sequence[int],
+                 default_ghost_layers: int = 1,
+                 default_layout: str = 'SoA',
+                 periodicity: Union[bool, Sequence[bool]] = False,
+                 default_target: str = 'cpu',
+                 opencl_queue=None,
+                 array_handler=None) -> None:
         """
         Creates a data handling for single node simulations.
 
@@ -43,6 +45,18 @@ class SerialDataHandling(DataHandling):
         self.custom_data_gpu = DotDict()
         self._custom_data_transfer_functions = {}
 
+        if array_handler:
+            self.array_handler = array_handler
+        else:
+            try:
+                self.array_handler = PyCudaArrayHandler()
+            except Exception:
+                self.array_handler = None
+
+            if default_target == 'opencl' or opencl_queue:
+                default_target = 'gpu'
+                self.array_handler = PyOpenClArrayHandler(opencl_queue)
+
         if periodicity is None or periodicity is False:
             periodicity = [False] * self.dim
         if periodicity is True:
@@ -126,7 +140,7 @@ class SerialDataHandling(DataHandling):
         if gpu:
             if name in self.gpu_arrays:
                 raise ValueError("GPU Field with this name already exists")
-            self.gpu_arrays[name] = gpuarray.to_gpu(cpu_arr)
+            self.gpu_arrays[name] = self.array_handler.to_gpu(cpu_arr)
 
         assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
         self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions,
@@ -222,7 +236,8 @@ class SerialDataHandling(DataHandling):
             self.to_gpu(name)
 
     def run_kernel(self, kernel_function, **kwargs):
-        arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda' else self.cpu_arrays
+        arrays = self.gpu_arrays if kernel_function.ast.backend == 'gpucuda'  \
+            or kernel_function.ast.backend == 'opencl' else self.cpu_arrays
         kernel_function(**arrays, **kwargs)
 
     def get_kernel_kwargs(self, kernel_function, **kwargs):
@@ -236,14 +251,14 @@ class SerialDataHandling(DataHandling):
             transfer_func = self._custom_data_transfer_functions[name][1]
             transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
         else:
-            self.gpu_arrays[name].get(self.cpu_arrays[name])
+            self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name])
 
     def to_gpu(self, name):
         if name in self._custom_data_transfer_functions:
             transfer_func = self._custom_data_transfer_functions[name][0]
             transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
         else:
-            self.gpu_arrays[name].set(self.cpu_arrays[name])
+            self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name])
 
     def is_on_gpu(self, name):
         return name in self.gpu_arrays
diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py
index cdee059c218a67ad9adc824f41fa09ae3df2f19b..1af1a68e3bfdb3681e05443c2412d8bd24903b0d 100644
--- a/pystencils_tests/test_datahandling.py
+++ b/pystencils_tests/test_datahandling.py
@@ -6,6 +6,13 @@ import numpy as np
 import pystencils as ps
 from pystencils import create_data_handling, create_kernel
 
+try:
+    import pytest
+except ImportError:
+    import unittest.mock
+    pytest = unittest.mock.MagicMock()
+
+
 
 def basic_iteration(dh):
     dh.add_array('basic_iter_test_gl_default')
@@ -100,15 +107,9 @@ def synchronization(dh, test_gpu=False):
         np.testing.assert_equal(42, b[field_name])
 
 
-def kernel_execution_jacobi(dh, test_gpu=False):
-    if test_gpu:
-        try:
-            from pycuda import driver
-            import pycuda.autoinit
-        except ImportError:
-            print("Skipping kernel_execution_jacobi GPU version, because pycuda not available")
-            return
+def kernel_execution_jacobi(dh, target):
 
+    test_gpu = target == 'gpu' or target == 'opencl'
     dh.add_array('f', gpu=test_gpu)
     dh.add_array('tmp', gpu=test_gpu)
     stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
@@ -119,7 +120,7 @@ def kernel_execution_jacobi(dh, test_gpu=False):
     def jacobi():
         dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil)
 
-    kernel = create_kernel(jacobi, target='gpu' if test_gpu else 'cpu').compile()
+    kernel = create_kernel(jacobi, target).compile()
     for b in dh.iterate(ghost_layers=1):
         b['f'].fill(42)
     dh.run_kernel(kernel)
@@ -196,17 +197,32 @@ def test_access_and_gather():
 def test_kernel():
     for domain_shape in [(4, 5), (3, 4, 5)]:
         dh = create_data_handling(domain_size=domain_shape, periodicity=True)
-        kernel_execution_jacobi(dh, test_gpu=True)
+        kernel_execution_jacobi(dh, 'cpu')
         reduction(dh)
 
         try:
             import pycuda
             dh = create_data_handling(domain_size=domain_shape, periodicity=True)
-            kernel_execution_jacobi(dh, test_gpu=False)
+            kernel_execution_jacobi(dh, 'gpu')
         except ImportError:
             pass
 
 
+@pytest.mark.parametrize('target', ('cpu', 'gpu', 'opencl'))
+def test_kernel_param(target):
+    for domain_shape in [(4, 5), (3, 4, 5)]:
+        if target == 'gpu':
+            pytest.importorskip('pycuda')
+        if target == 'opencl':
+            pytest.importorskip('pyopencl')
+            from pystencils.opencl.opencljit import init_globally
+            init_globally()
+
+        dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target)
+        kernel_execution_jacobi(dh, target)
+        reduction(dh)
+
+
 def test_vtk_output():
     for domain_shape in [(4, 5), (3, 4, 5)]:
         dh = create_data_handling(domain_size=domain_shape, periodicity=True)
diff --git a/pystencils_tests/test_datahandling_parallel.py b/pystencils_tests/test_datahandling_parallel.py
index 52ccb6bce34b2ca115b19a8d00fa31da622b8ff4..0bfbfbd02fc86952e7fba84ce6aa080d2080ec0a 100644
--- a/pystencils_tests/test_datahandling_parallel.py
+++ b/pystencils_tests/test_datahandling_parallel.py
@@ -50,13 +50,13 @@ def test_kernel():
         # 3D
         blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
         dh = ParallelDataHandling(blocks)
-        kernel_execution_jacobi(dh, test_gpu=gpu)
+        kernel_execution_jacobi(dh, 'gpu')
         reduction(dh)
 
         # 2D
         blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False)
         dh = ParallelDataHandling(blocks, dim=2)
-        kernel_execution_jacobi(dh, test_gpu=gpu)
+        kernel_execution_jacobi(dh, 'gpu')
         reduction(dh)