diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py
index 10e855df2fd9a661f053adbdb43dbfb8d969fdf4..d62b4972e2b9fce5f5a70f7aa0cd6b97bc2d52cc 100644
--- a/pystencils/astnodes.py
+++ b/pystencils/astnodes.py
@@ -164,31 +164,20 @@ class KernelFunction(Node):
         def field_name(self):
             return self.fields[0].name
-    def __init__(self, body, target, backend, compile_function, ghost_layers, function_name="kernel"):
+    def __init__(self, body, target, compile_function, ghost_layers, function_name="kernel"):
         super(KernelFunction, self).__init__()
         self._body = body
         body.parent = self
         self.function_name = function_name
         self._body.parent = self
         self.ghost_layers = ghost_layers
-        self._target = target
-        self._backend = backend
+        self.target = target
         # these variables are assumed to be global, so no automatic parameter is generated for them
         self.global_variables = set()
         self.instruction_set = None  # used in `vectorize` function to tell the backend which i.s. (SSE,AVX) to use
         # function that compiles the node to a Python callable, is set by the backends
         self._compile_function = compile_function
-    @property
-    def target(self):
-        """Currently either 'cpu' or 'gpu' """
-        return self._target
-    @property
-    def backend(self):
-        """Backend for generating the code e.g. 'llvm', 'c', 'cuda' """
-        return self._backend
     def symbols_defined(self):
         return set()
diff --git a/pystencils/boundaries/boundaryhandling.py b/pystencils/boundaries/boundaryhandling.py
index e19e24ecb1d5cd5e59a53796fc7ae7f0c9aa30fc..42c4b14180655ec3568d45007616a278a359a51e 100644
--- a/pystencils/boundaries/boundaryhandling.py
+++ b/pystencils/boundaries/boundaryhandling.py
@@ -43,7 +43,7 @@ class FlagInterface:
             raise ValueError("There is already a boundary handling registered at the data handling."
                              "If you want to add multiple handling objects, choose a different name.")
-        self.flag_field = data_handling.add_array(self.flag_field_name, dtype=self.dtype, cpu=True, gpu=False)
+        self.flag_field = data_handling.add_array(self.flag_field_name, dtype=self.dtype, cpu=True, acc=False)
         ff_ghost_layers = data_handling.ghost_layers_of_field(self.flag_field_name)
         for b in data_handling.iterate(ghost_layers=ff_ghost_layers):
@@ -87,26 +87,23 @@ class BoundaryHandling:
         fi = flag_interface
         self.flag_interface = fi if fi is not None else FlagInterface(data_handling, name + "Flags")
-        gpu = self._target in self._data_handling._GPU_LIKE_TARGETS
-        class_ = self.IndexFieldBlockData
-        if self._target == 'opencl':
-            def opencl_to_device(gpu_version, cpu_version):
-                from pyopencl import array
-                gpu_version = gpu_version.boundary_object_to_index_list
-                cpu_version = cpu_version.boundary_object_to_index_list
-                for obj, cpu_arr in cpu_version.items():
-                    if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape:
-                        from pystencils.opencl.opencljit import get_global_cl_queue
-                        queue = self._data_handling._opencl_queue or get_global_cl_queue()
-                        gpu_version[obj] = array.to_device(queue, cpu_arr)
-                    else:
-                        gpu_version[obj].set(cpu_arr)
-            class_ = type('opencl_class', (self.IndexFieldBlockData,), {
-                'to_gpu': opencl_to_device
-            })
-        data_handling.add_custom_class(self._index_array_name, class_, cpu=True, gpu=gpu)
+        def to_cpu(gpu_version, cpu_version):
+            gpu_version = gpu_version.boundary_object_to_index_list
+            cpu_version = cpu_version.boundary_object_to_index_list
+            for obj, cpu_arr in cpu_version.items():
+                gpu_version[obj].get(cpu_arr)
+        def to_acc(gpu_version, cpu_version):
+            gpu_version = gpu_version.boundary_object_to_index_list
+            cpu_version = cpu_version.boundary_object_to_index_list
+            for obj, cpu_arr in cpu_version.items():
+                if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape:
+                    gpu_version[obj] = self.data_handling.array_handler.to_gpu(cpu_arr)
+                else:
+                    self.data_handling.array_handler.upload(gpu_version[obj], cpu_arr)
+        creation_function = lambda: self.IndexFieldBlockData()
+        data_handling.add_custom_data(self._index_array_name, creation_function, creation_function, to_acc, to_cpu)
     def data_handling(self):
@@ -222,7 +219,7 @@ class BoundaryHandling:
         if self._dirty:
-        for b in self._data_handling.iterate(gpu=self._target in self._data_handling._GPU_LIKE_TARGETS):
+        for b in self._data_handling.iterate(acc=self._target in self._data_handling.ACCELERATOR_TARGETS):
             for b_obj, idx_arr in b[self._index_array_name].boundary_object_to_index_list.items():
                 kwargs[self._field_name] = b[self._field_name]
                 kwargs['indexField'] = idx_arr
@@ -237,7 +234,7 @@ class BoundaryHandling:
         if self._dirty:
-        for b in self._data_handling.iterate(gpu=self._target in self._data_handling._GPU_LIKE_TARGETS):
+        for b in self._data_handling.iterate(acc=self._target in self._data_handling.ACCELERATOR_TARGETS):
             for b_obj, idx_arr in b[self._index_array_name].boundary_object_to_index_list.items():
                 arguments = kwargs.copy()
                 arguments[self._field_name] = b[self._field_name]
@@ -320,8 +317,8 @@ class BoundaryHandling:
     def _boundary_data_initialization(self, boundary_obj, boundary_data_setter, **kwargs):
         if boundary_obj.additional_data_init_callback:
             boundary_obj.additional_data_init_callback(boundary_data_setter, **kwargs)
-        if self._target in self._data_handling._GPU_LIKE_TARGETS:
-            self._data_handling.to_gpu(self._index_array_name)
+        if self._target in self._data_handling.ACCELERATOR_TARGETS:
+            self._data_handling.to_acc(self._index_array_name)
     class BoundaryInfo(object):
         def __init__(self, boundary_obj, flag, kernel):
@@ -330,7 +327,7 @@ class BoundaryHandling:
             self.kernel = kernel
     class IndexFieldBlockData:
-        def __init__(self, *_1, **_2):
+        def __init__(self):
             self.boundary_object_to_index_list = {}
             self.boundary_object_to_data_setter = {}
@@ -338,25 +335,6 @@ class BoundaryHandling:
-        @staticmethod
-        def to_cpu(gpu_version, cpu_version):
-            gpu_version = gpu_version.boundary_object_to_index_list
-            cpu_version = cpu_version.boundary_object_to_index_list
-            for obj, cpu_arr in cpu_version.items():
-                gpu_version[obj].get(cpu_arr)
-        @staticmethod
-        def to_gpu(gpu_version, cpu_version):
-            from pycuda import gpuarray
-            gpu_version = gpu_version.boundary_object_to_index_list
-            cpu_version = cpu_version.boundary_object_to_index_list
-            for obj, cpu_arr in cpu_version.items():
-                if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape:
-                    gpu_version[obj] = gpuarray.to_gpu(cpu_arr)
-                else:
-                    gpu_version[obj].set(cpu_arr)
 class BoundaryDataSetter:
     def __init__(self, index_array, offset, stencil, ghost_layers, pdf_array):
diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py
index f351ce5a2bb03d723d22a8e1f772b25a934f7994..3a99220d55008d89a8897bc3280ebce0315fcca6 100644
--- a/pystencils/cpu/kernelcreation.py
+++ b/pystencils/cpu/kernelcreation.py
@@ -64,7 +64,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
     loop_order = get_optimal_loop_ordering(fields_without_buffers)
     loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice,
                                                         ghost_layers=ghost_layers, loop_order=loop_order)
-    ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function,
+    ast_node = KernelFunction(loop_node, 'cpu', compile_function=make_python_function,
                               ghost_layers=ghost_layer_info, function_name=function_name)
@@ -145,7 +145,7 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
     function_body = Block([loop_node])
-    ast_node = KernelFunction(function_body, "cpu", "c", make_python_function,
+    ast_node = KernelFunction(function_body, "cpu", make_python_function,
                               ghost_layers=None, function_name=function_name)
     fixed_coordinate_mapping = {f.name: coordinate_typed_symbols for f in non_index_fields}
diff --git a/pystencils/datahandling/__init__.py b/pystencils/datahandling/__init__.py
index a4fa55bdc7a52e1b9c2015e2210fcbb48aaeb2e1..d0690cb7ac30131faa91d51a0e76ffbc7e820d5c 100644
--- a/pystencils/datahandling/__init__.py
+++ b/pystencils/datahandling/__init__.py
@@ -18,10 +18,9 @@ except ImportError:
 def create_data_handling(domain_size: Tuple[int, ...],
                          periodicity: Union[bool, Tuple[bool, ...]] = False,
                          default_layout: str = 'SoA',
-                         default_target: str = 'cpu',
+                         target: str = 'cpu',
                          parallel: bool = False,
-                         default_ghost_layers: int = 1,
-                         opencl_queue=None) -> DataHandling:
+                         default_ghost_layers: int = 1) -> DataHandling:
     """Creates a data handling instance.
@@ -29,12 +28,11 @@ def create_data_handling(domain_size: Tuple[int, ...],
         periodicity: either True, False for full or no periodicity or a tuple of booleans indicating periodicity
                      for each coordinate
         default_layout: default array layout, that is used if not explicitly specified in 'add_array'
-        default_target: either 'cpu' or 'gpu'
+        target: target where code should be run, e.g. 'cpu' or 'cuda' or 'opencl'
         parallel: if True a parallel domain is created using walberla - each MPI process gets a part of the domain
         default_ghost_layers: default number of ghost layers if not overwritten in 'add_array'
     if parallel:
-        assert not opencl_queue, "OpenCL is only supported for SerialDataHandling"
         if wlb is None:
             raise ValueError("Cannot create parallel data handling because walberla module is not available")
@@ -55,15 +53,14 @@ def create_data_handling(domain_size: Tuple[int, ...],
         # noinspection PyArgumentList
         block_storage = wlb.createUniformBlockGrid(cells=domain_size, periodic=periodicity)
-        return ParallelDataHandling(blocks=block_storage, dim=dim, default_target=default_target,
+        return ParallelDataHandling(blocks=block_storage, dim=dim, target=target,
                                     default_layout=default_layout, default_ghost_layers=default_ghost_layers)
         return SerialDataHandling(domain_size,
-                                  default_target=default_target,
+                                  target=target,
-                                  default_ghost_layers=default_ghost_layers,
-                                  opencl_queue=opencl_queue)
+                                  default_ghost_layers=default_ghost_layers)
 __all__ = ['create_data_handling']
diff --git a/pystencils/datahandling/datahandling_interface.py b/pystencils/datahandling/datahandling_interface.py
index af1a6ba1fc9d003042063023aa1ede5fc08665db..dc40b226490432c07a651fc0fdaec41a32955479 100644
--- a/pystencils/datahandling/datahandling_interface.py
+++ b/pystencils/datahandling/datahandling_interface.py
@@ -16,8 +16,7 @@ class DataHandling(ABC):
     'gather' function that has collects (parts of the) distributed data on a single process.
-    _GPU_LIKE_TARGETS = ['gpu', 'opencl']
-    _GPU_LIKE_BACKENDS = ['gpucuda', 'opencl']
+    ACCELERATOR_TARGETS = ['cuda', 'opencl', 'llvm_gpu']
     # ---------------------------- Adding and accessing data -----------------------------------------------------------
@@ -39,7 +38,7 @@ class DataHandling(ABC):
     def add_array(self, name: str, values_per_cell, dtype=np.float64,
                   latex_name: Optional[str] = None, ghost_layers: Optional[int] = None, layout: Optional[str] = None,
-                  cpu: bool = True, gpu: Optional[bool] = None, alignment=False, field_type=FieldType.GENERIC) -> Field:
+                  cpu: bool = True, acc: Optional[bool] = None, alignment=False, field_type=FieldType.GENERIC) -> Field:
         """Adds a (possibly distributed) array to the handling that can be accessed using the given name.
         For each array a symbolic field is available via the 'fields' dictionary
@@ -56,8 +55,11 @@ class DataHandling(ABC):
             layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
                     this is only important if values_per_cell > 1
             cpu: allocate field on the CPU
-            gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
+            acc: allocate field on an accelerator, if an accelerator target has been selected
+                 if None, an accelerator field is allocated only if the target selected when creating the data handling
+                 is not 'cpu'
             alignment: either False for no alignment, or the number of bytes to align to
+            field_type: change from generic to  staggered or absolutely accessed fields, see field documentation
             pystencils field, that can be used to formulate symbolic kernels
@@ -67,7 +69,7 @@ class DataHandling(ABC):
         """Returns true if a field or custom data element with this name was added."""
-    def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
+    def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, acc=None):
         Adds an array with the same parameters (number of ghost layers, values_per_cell, dtype) as existing array.
@@ -76,33 +78,33 @@ class DataHandling(ABC):
             name_of_template_field: name of array that is used as template
             latex_name: see 'add' method
             cpu: see 'add' method
-            gpu: see 'add' method
+            acc: see 'add' method
     def add_custom_data(self, name: str, cpu_creation_function,
-                        gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
+                        acc_creation_function=None, cpu_to_acc_transfer_func=None, acc_to_cpu_transfer_func=None):
         """Adds custom (non-array) data to domain.
             name: name to access data
             cpu_creation_function: function returning a new instance of the data that should be stored
-            gpu_creation_function: optional, function returning a new instance, stored on GPU
-            cpu_to_gpu_transfer_func: function that transfers cpu to gpu version,
-                                      getting two parameters (gpu_instance, cpu_instance)
-            gpu_to_cpu_transfer_func: function that transfers gpu to cpu version, getting two parameters
-                                      (gpu_instance, cpu_instance)
+            acc_creation_function: optional, function returning a new instance, stored on the accelerator
+            cpu_to_acc_transfer_func: function that transfers cpu to accelerator version,
+                                      getting two parameters (acc_instance, cpu_instance)
+            acc_to_cpu_transfer_func: function that transfers accelerator to cpu version, getting two parameters
+                                      (acc_instance, cpu_instance)
-    def add_custom_class(self, name: str, class_obj, cpu: bool = True, gpu: bool = False):
-        """Adds non-array data by passing a class object with optional 'to_gpu' and 'to_cpu' member functions."""
-        cpu_to_gpu_transfer_func = class_obj.to_gpu if cpu and gpu and hasattr(class_obj, 'to_gpu') else None
-        gpu_to_cpu_transfer_func = class_obj.to_cpu if cpu and gpu and hasattr(class_obj, 'to_cpu') else None
+    def add_custom_class(self, name: str, class_obj, cpu: bool = True, acc: bool = False):
+        """Adds non-array data by passing a class object with optional 'to_acc' and 'to_cpu' member functions."""
+        cpu_to_acc_transfer_func = class_obj.to_acc if cpu and acc and hasattr(class_obj, 'to_acc') else None
+        acc_to_cpu_transfer_func = class_obj.to_cpu if cpu and acc and hasattr(class_obj, 'to_cpu') else None
                              cpu_creation_function=class_obj if cpu else None,
-                             gpu_creation_function=class_obj if gpu else None,
-                             cpu_to_gpu_transfer_func=cpu_to_gpu_transfer_func,
-                             gpu_to_cpu_transfer_func=gpu_to_cpu_transfer_func)
+                             acc_creation_function=class_obj if acc else None,
+                             cpu_to_acc_transfer_func=cpu_to_acc_transfer_func,
+                             acc_to_cpu_transfer_func=acc_to_cpu_transfer_func)
@@ -128,7 +130,7 @@ class DataHandling(ABC):
         """Returns values_per_cell of array."""
-    def iterate(self, slice_obj=None, gpu=False, ghost_layers=None,
+    def iterate(self, slice_obj=None, acc=False, ghost_layers=None,
                 inner_ghost_layers=True) -> Iterable['Block']:
         """Iterate over local part of potentially distributed data structure."""
@@ -157,32 +159,32 @@ class DataHandling(ABC):
-    def swap(self, name1, name2, gpu=False):
+    def swap(self, name1, name2, acc=False):
         """Swaps data of two arrays"""
-    # ------------------------------- CPU/GPU transfer -----------------------------------------------------------------
+    # ------------------------------- CPU/ACC transfer -----------------------------------------------------------------
     def to_cpu(self, name):
-        """Copies GPU data of array with specified name to CPU.
-        Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method."""
+        """Copies accelerator data of array with specified name to CPU.
+        Works only if 'cpu=True' and 'acc=True' has been used in 'add' method."""
-    def to_gpu(self, name):
-        """Copies GPU data of array with specified name to GPU.
-        Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method."""
+    def to_acc(self, name):
+        """Copies accelerator data of array with specified name to accelerator.
+        Works only if 'cpu=True' and 'acc=True' has been used in 'add' method."""
     def all_to_cpu(self):
-        """Copies data from GPU to CPU for all arrays that have a CPU and a GPU representation."""
+        """Copies data from accelerator to CPU for all arrays that have a CPU and an accelerator representation."""
-    def all_to_gpu(self):
-        """Copies data from CPU to GPU for all arrays that have a CPU and a GPU representation."""
+    def all_to_acc(self):
+        """Copies data from CPU to accelerator for all arrays that have a CPU and a accelerator representation."""
-    def is_on_gpu(self, name):
-        """Checks if this data was also allocated on the GPU - does not check if this data item is in synced."""
+    def is_on_acc(self, name):
+        """Checks if this data was also allocated on the accelerator - does not check if this data item is in synced."""
     def create_vtk_writer(self, file_name, data_names, ghost_layers=False) -> Callable[[int], None]:
@@ -216,7 +218,7 @@ class DataHandling(ABC):
     # ------------------------------- Communication --------------------------------------------------------------------
-    def synchronization_function(self, names, stencil=None, target=None, **kwargs) -> Callable[[], None]:
+    def synchronization_function(self, names, stencil=None, acc=None, **kwargs) -> Callable[[], None]:
         """Synchronizes ghost layers for distributed arrays.
         For serial scenario this has to be called for correct periodicity handling
@@ -225,8 +227,9 @@ class DataHandling(ABC):
             names: what data to synchronize: name of array or sequence of names
             stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19'
                      if None, a full synchronization (i.e. D2Q9 or D3Q27) is done
-            target: either 'cpu' or 'gpu
-            kwargs: implementation specific, optional optimization parameters for communication
+            acc: synchronize data on accelerator, if None use accelerator when target at construction is an
+                 accelerator target
+            kwargs: implementation specific
             function object to run the communication
diff --git a/pystencils/datahandling/parallel_datahandling.py b/pystencils/datahandling/parallel_datahandling.py
index 54f26806be318f6ef91a5ca11a9888a59524fb0c..f46b48ea717d22af2fe940b74cd95440717f193c 100644
--- a/pystencils/datahandling/parallel_datahandling.py
+++ b/pystencils/datahandling/parallel_datahandling.py
@@ -16,7 +16,7 @@ class ParallelDataHandling(DataHandling):
     GPU_DATA_PREFIX = "gpu_"
     VTK_COUNTER = 0
-    def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, default_target='cpu'):
+    def __init__(self, blocks, default_ghost_layers=1, default_layout='SoA', dim=3, target='cpu'):
         Creates data handling based on walberla block storage
@@ -27,8 +27,7 @@ class ParallelDataHandling(DataHandling):
             dim: dimension of scenario,
                  walberla always uses three dimensions, so if dim=2 the extend of the
                  z coordinate of blocks has to be 1
-            default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated
-                           if not overwritten in add_array, and synchronization functions are for the GPU by default
+            target: either 'cpu' or 'cuda', other targets are not supported in parallel setup
         super(ParallelDataHandling, self).__init__()
         assert dim in (2, 3)
@@ -52,7 +51,8 @@ class ParallelDataHandling(DataHandling):
         if self._dim == 2:
             assert self.blocks.getDomainCellBB().size[2] == 1
-        self.default_target = default_target
+        assert target in ('cpu', 'cuda'), "ParallelDataHandling only support 'cpu' and 'cuda' target"
+        self.target = target
     def dim(self):
@@ -77,24 +77,24 @@ class ParallelDataHandling(DataHandling):
         return self._fieldInformation[name]['values_per_cell']
     def add_custom_data(self, name, cpu_creation_function,
-                        gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
-        if cpu_creation_function and gpu_creation_function:
-            if cpu_to_gpu_transfer_func is None or gpu_to_cpu_transfer_func is None:
+                        acc_creation_function=None, cpu_to_acc_transfer_func=None, acc_to_cpu_transfer_func=None):
+        if cpu_creation_function and acc_creation_function:
+            if cpu_to_acc_transfer_func is None or acc_to_cpu_transfer_func is None:
                 raise ValueError("For GPU data, both transfer functions have to be specified")
-            self._custom_data_transfer_functions[name] = (cpu_to_gpu_transfer_func, gpu_to_cpu_transfer_func)
+            self._custom_data_transfer_functions[name] = (cpu_to_acc_transfer_func, acc_to_cpu_transfer_func)
         if cpu_creation_function:
             self.blocks.addBlockData(name, cpu_creation_function)
-        if gpu_creation_function:
-            self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpu_creation_function)
+        if acc_creation_function:
+            self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, acc_creation_function)
     def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None,
-                  layout=None, cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
+                  layout=None, cpu=True, acc=None, alignment=False, field_type=FieldType.GENERIC):
         if ghost_layers is None:
             ghost_layers = self.default_ghost_layers
-        if gpu is None:
-            gpu = self.default_target == 'gpu'
+        if acc is None:
+            acc = self.target == 'cuda'
         if layout is None:
             layout = self.default_layout
         if len(self.blocks) == 0:
@@ -122,13 +122,13 @@ class ParallelDataHandling(DataHandling):
         if cpu:
             wlb.field.addToStorage(self.blocks, name, dtype, fSize=values_per_cell, layout=layout_map[layout],
                                    ghostLayers=ghost_layers, alignment=alignment)
-        if gpu:
+        if acc:
             if alignment != 0:
                 raise ValueError("Alignment for walberla GPU fields not yet supported")
             wlb.cuda.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX + name, dtype, fSize=values_per_cell,
                                           usePitchedMem=False, ghostLayers=ghost_layers, layout=layout_map[layout])
-        if cpu and gpu:
+        if cpu and acc:
             self._cpu_gpu_pairs.append((name, self.GPU_DATA_PREFIX + name))
         block_bb = self.blocks.getBlockCellBB(self.blocks[0])
@@ -144,7 +144,7 @@ class ParallelDataHandling(DataHandling):
         self.fields[name].latex_name = latex_name
         self._field_name_to_cpu_data_name[name] = name
-        if gpu:
+        if acc:
             self._field_name_to_gpu_data_name[name] = self.GPU_DATA_PREFIX + name
         return self.fields[name]
@@ -159,18 +159,18 @@ class ParallelDataHandling(DataHandling):
     def custom_data_names(self):
         return tuple(self._custom_data_names)
-    def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
-        return self.add_array(name, latex_name=latex_name, cpu=cpu, gpu=gpu,
+    def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, acc=None):
+        return self.add_array(name, latex_name=latex_name, cpu=cpu, acc=acc,
-    def swap(self, name1, name2, gpu=False):
-        if gpu:
+    def swap(self, name1, name2, acc=False):
+        if acc:
             name1 = self.GPU_DATA_PREFIX + name1
             name2 = self.GPU_DATA_PREFIX + name2
         for block in self.blocks:
-    def iterate(self, slice_obj=None, gpu=False, ghost_layers=True, inner_ghost_layers=True):
+    def iterate(self, slice_obj=None, acc=False, ghost_layers=True, inner_ghost_layers=True):
         if ghost_layers is True:
             ghost_layers = self.default_ghost_layers
         elif ghost_layers is False:
@@ -185,7 +185,7 @@ class ParallelDataHandling(DataHandling):
         elif isinstance(ghost_layers, str):
             ghost_layers = self.ghost_layers_of_field(ghost_layers)
-        prefix = self.GPU_DATA_PREFIX if gpu else ""
+        prefix = self.GPU_DATA_PREFIX if acc else ""
         if slice_obj is not None:
             yield from sliced_block_iteration(self.blocks, slice_obj, inner_ghost_layers, ghost_layers,
                                               self.dim, prefix)
@@ -229,7 +229,8 @@ class ParallelDataHandling(DataHandling):
     def get_kernel_kwargs(self, kernel_function, **kwargs):
-        if kernel_function.ast.backend == 'gpucuda':
+        if kernel_function.ast.target in self.ACCELERATOR_TARGETS:
+            assert kernel_function.ast.target == 'cuda', 'ParallelDataHandling only supports CUDA and CPU'
             name_map = self._field_name_to_gpu_data_name
             to_array = wlb.cuda.toGpuArray
@@ -258,7 +259,7 @@ class ParallelDataHandling(DataHandling):
             wlb.cuda.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
-    def to_gpu(self, name):
+    def to_acc(self, name):
         if name in self._custom_data_transfer_functions:
             transfer_func = self._custom_data_transfer_functions[name][0]
             for block in self.blocks:
@@ -266,7 +267,7 @@ class ParallelDataHandling(DataHandling):
             wlb.cuda.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
-    def is_on_gpu(self, name):
+    def is_on_acc(self, name):
         return (name, self.GPU_DATA_PREFIX + name) in self._cpu_gpu_pairs
     def all_to_cpu(self):
@@ -275,21 +276,15 @@ class ParallelDataHandling(DataHandling):
         for name in self._custom_data_transfer_functions.keys():
-    def all_to_gpu(self):
+    def all_to_acc(self):
         for cpu_name, gpu_name in self._cpu_gpu_pairs:
             wlb.cuda.copyFieldToGpu(self.blocks, gpu_name, cpu_name)
         for name in self._custom_data_transfer_functions.keys():
-            self.to_gpu(name)
+            self.to_acc(name)
-    def synchronization_function_cpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
-        return self.synchronization_function(names, stencil, 'cpu', buffered, stencil_restricted)
-    def synchronization_function_gpu(self, names, stencil=None, buffered=True, stencil_restricted=False, **_):
-        return self.synchronization_function(names, stencil, 'gpu', buffered, stencil_restricted)
-    def synchronization_function(self, names, stencil=None, target=None, buffered=True, stencil_restricted=False):
-        if target is None:
-            target = self.default_target
+    def synchronization_function(self, names, stencil=None, acc=None, buffered=True, stencil_restricted=False):
+        if acc is None:
+            target = self.target
         if stencil is None:
             stencil = 'D3Q27' if self.dim == 3 else 'D2Q9'
@@ -298,12 +293,12 @@ class ParallelDataHandling(DataHandling):
             names = [names]
         create_scheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme
-        if target == 'cpu':
+        if not acc:
             create_packing = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo
             if not buffered and stencil_restricted:
                 create_packing = wlb.field.createStencilRestrictedPackInfo
-            assert target == 'gpu'
+            assert self.target == 'cuda'
             create_packing = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo
             names = [self.GPU_DATA_PREFIX + name for name in names]
diff --git a/pystencils/datahandling/pycuda.py b/pystencils/datahandling/pycuda.py
index 30602a30ce6b87d0e25861b43c5291cda77ed570..954d9492afcf6c045697c1fd124ce63c855400c8 100644
--- a/pystencils/datahandling/pycuda.py
+++ b/pystencils/datahandling/pycuda.py
@@ -25,7 +25,7 @@ class PyCudaArrayHandler:
             return gpuarray.empty(shape, dtype)
-    def to_gpu(self, array):
+    def to_acc(self, array):
         return gpuarray.to_gpu(array)
     def upload(self, gpuarray, numpy_array):
diff --git a/pystencils/datahandling/pyopencl.py b/pystencils/datahandling/pyopencl.py
index 7b6f44088f60c47d0c57c5185f1afd16ef16bac7..f3e5428fd1d086edac412b86d6fa0333d1c627c2 100644
--- a/pystencils/datahandling/pyopencl.py
+++ b/pystencils/datahandling/pyopencl.py
@@ -10,7 +10,7 @@ import pystencils
 class PyOpenClArrayHandler:
-    def __init__(self, queue):
+    def __init__(self, queue=None):
         if not queue:
             from pystencils.opencl.opencljit import get_global_cl_queue
             queue = get_global_cl_queue()
@@ -31,7 +31,7 @@ class PyOpenClArrayHandler:
             return gpuarray.empty(self.queue, shape, dtype)
-    def to_gpu(self, array):
+    def to_acc(self, array):
         return gpuarray.to_device(self.queue, array)
     def upload(self, gpuarray, numpy_array):
diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py
index ea708ae2bb1759d5a9766b278afa2dd7c235f2da..e13ecc263ab30319a12fad81da3ae37797fc3527 100644
--- a/pystencils/datahandling/serial_datahandling.py
+++ b/pystencils/datahandling/serial_datahandling.py
@@ -21,9 +21,7 @@ class SerialDataHandling(DataHandling):
                  default_ghost_layers: int = 1,
                  default_layout: str = 'SoA',
                  periodicity: Union[bool, Sequence[bool]] = False,
-                 default_target: str = 'cpu',
-                 opencl_queue=None,
-                 opencl_ctx=None,
+                 target: str = 'cpu',
                  array_handler=None) -> None:
         Creates a data handling for single node simulations.
@@ -32,8 +30,7 @@ class SerialDataHandling(DataHandling):
             domain_size: size of the spatial domain as tuple
             default_ghost_layers: default number of ghost layers used, if not overridden in add_array() method
             default_layout: default layout used, if  not overridden in add_array() method
-            default_target: either 'cpu' or 'gpu' . If set to 'gpu' for each array also a GPU version is allocated
-                            if not overwritten in add_array, and synchronization functions are for the GPU by default
+            target: one of the values of DataHandling.ACCELERATOR_TARGETS
         super(SerialDataHandling, self).__init__()
         self._domainSize = tuple(domain_size)
@@ -41,23 +38,20 @@ class SerialDataHandling(DataHandling):
         self.default_layout = default_layout
         self._fields = DotDict()
         self.cpu_arrays = DotDict()
-        self.gpu_arrays = DotDict()
+        self.acc_arrays = DotDict()
         self.custom_data_cpu = DotDict()
-        self.custom_data_gpu = DotDict()
+        self.custom_data_acc = DotDict()
         self._custom_data_transfer_functions = {}
-        self._opencl_queue = opencl_queue
-        self._opencl_ctx = opencl_ctx
-        if not array_handler:
-            try:
-                self.array_handler = PyCudaArrayHandler()
-            except Exception:
-                self.array_handler = None
-            if default_target == 'opencl' or opencl_queue:
-                self.array_handler = PyOpenClArrayHandler(opencl_queue)
-        else:
-            self.array_handler = array_handler
+        self.array_handler = array_handler
+        if self.array_handler is None:
+            if target == 'opencl':
+                self.array_handler = PyOpenClArrayHandler()
+            else:
+                try:
+                    self.array_handler = PyCudaArrayHandler()
+                except Exception:
+                    pass
         if periodicity is None or periodicity is False:
             periodicity = [False] * self.dim
@@ -66,8 +60,8 @@ class SerialDataHandling(DataHandling):
         self._periodicity = periodicity
         self._field_information = {}
-        self.default_target = default_target
         self._start_time = time.perf_counter()
+        self.target = target
     def dim(self):
@@ -91,14 +85,20 @@ class SerialDataHandling(DataHandling):
     def values_per_cell(self, name):
         return self._field_information[name]['values_per_cell']
+    def _default_acc_value(self, acc):
+        if acc is None:
+            return self.target in self.ACCELERATOR_TARGETS
+        else:
+            return acc
     def add_array(self, name, values_per_cell=1, dtype=np.float64, latex_name=None, ghost_layers=None, layout=None,
-                  cpu=True, gpu=None, alignment=False, field_type=FieldType.GENERIC):
+                  cpu=True, acc=None, alignment=False, field_type=FieldType.GENERIC):
+        acc = self._default_acc_value(acc)
         if ghost_layers is None:
             ghost_layers = self.default_ghost_layers
         if layout is None:
             layout = self.default_layout
-        if gpu is None:
-            gpu = self.default_target in self._GPU_LIKE_TARGETS
         kwargs = {
             'shape': tuple(s + 2 * ghost_layers for s in self._domainSize),
@@ -131,17 +131,17 @@ class SerialDataHandling(DataHandling):
         cpu_arr = create_numpy_array_with_layout(layout=layout_tuple, alignment=alignment,
                                                  byte_offset=byte_offset, **kwargs)
-        if alignment and gpu:
-            raise NotImplementedError("Alignment for GPU fields not supported")
+        if alignment and acc:
+            raise NotImplementedError("Alignment for accelerator fields not supported")
         if cpu:
             if name in self.cpu_arrays:
                 raise ValueError("CPU Field with this name already exists")
             self.cpu_arrays[name] = cpu_arr
-        if gpu:
-            if name in self.gpu_arrays:
-                raise ValueError("GPU Field with this name already exists")
-            self.gpu_arrays[name] = self.array_handler.to_gpu(cpu_arr)
+        if acc:
+            if name in self.acc_arrays:
+                raise ValueError("Accelerator Field with this name already exists")
+            self.acc_arrays[name] = self.array_handler.to_acc(cpu_arr)
         assert all(f.name != name for f in self.fields.values()), "Symbolic field with this name already exists"
         self.fields[name] = Field.create_from_numpy_array(name, cpu_arr, index_dimensions=index_dimensions,
@@ -150,30 +150,30 @@ class SerialDataHandling(DataHandling):
         return self.fields[name]
     def add_custom_data(self, name, cpu_creation_function,
-                        gpu_creation_function=None, cpu_to_gpu_transfer_func=None, gpu_to_cpu_transfer_func=None):
+                        acc_creation_function=None, cpu_to_acc_transfer_func=None, acc_to_cpu_transfer_func=None):
-        if cpu_creation_function and gpu_creation_function:
-            if cpu_to_gpu_transfer_func is None or gpu_to_cpu_transfer_func is None:
-                raise ValueError("For GPU data, both transfer functions have to be specified")
-            self._custom_data_transfer_functions[name] = (cpu_to_gpu_transfer_func, gpu_to_cpu_transfer_func)
+        if cpu_creation_function and acc_creation_function:
+            if cpu_to_acc_transfer_func is None or acc_to_cpu_transfer_func is None:
+                raise ValueError("For accelerator data, both transfer functions have to be specified")
+            self._custom_data_transfer_functions[name] = (cpu_to_acc_transfer_func, acc_to_cpu_transfer_func)
         assert name not in self.custom_data_cpu
         if cpu_creation_function:
             assert name not in self.cpu_arrays
             self.custom_data_cpu[name] = cpu_creation_function()
-        if gpu_creation_function:
-            assert name not in self.gpu_arrays
-            self.custom_data_gpu[name] = gpu_creation_function()
+        if acc_creation_function:
+            assert name not in self.acc_arrays
+            self.custom_data_acc[name] = acc_creation_function()
     def has_data(self, name):
         return name in self.fields
-    def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, gpu=None):
-        return self.add_array(name, latex_name=latex_name, cpu=cpu, gpu=gpu,
+    def add_array_like(self, name, name_of_template_field, latex_name=None, cpu=True, acc=None):
+        return self.add_array(name, latex_name=latex_name, cpu=cpu, acc=acc,
-    def iterate(self, slice_obj=None, gpu=False, ghost_layers=True, inner_ghost_layers=True):
+    def iterate(self, slice_obj=None, acc=False, ghost_layers=True, inner_ghost_layers=True):
         if ghost_layers is True:
             ghost_layers = self.default_ghost_layers
         elif ghost_layers is False:
@@ -186,8 +186,8 @@ class SerialDataHandling(DataHandling):
         slice_obj = normalize_slice(slice_obj, tuple(s + 2 * ghost_layers for s in self._domainSize))
         slice_obj = tuple(s if type(s) is slice else slice(s, s + 1, None) for s in slice_obj)
-        arrays = self.gpu_arrays if gpu else self.cpu_arrays
-        custom_data_dict = self.custom_data_gpu if gpu else self.custom_data_cpu
+        arrays = self.acc_arrays if acc else self.cpu_arrays
+        custom_data_dict = self.custom_data_acc if acc else self.custom_data_cpu
         iter_dict = custom_data_dict.copy()
         for name, arr in arrays.items():
             field_gls = self._field_information[name]['ghost_layers']
@@ -222,61 +222,49 @@ class SerialDataHandling(DataHandling):
         arr.flags.writeable = False
         return arr
-    def swap(self, name1, name2, gpu=None):
-        if gpu is None:
-            gpu = self.default_target == "gpu"
-        arr = self.gpu_arrays if gpu else self.cpu_arrays
+    def swap(self, name1, name2, acc=False):
+        arr = self.acc_arrays if acc else self.cpu_arrays
         arr[name1], arr[name2] = arr[name2], arr[name1]
     def all_to_cpu(self):
-        for name in (self.cpu_arrays.keys() & self.gpu_arrays.keys()) | self._custom_data_transfer_functions.keys():
+        for name in (self.cpu_arrays.keys() & self.acc_arrays.keys()) | self._custom_data_transfer_functions.keys():
-    def all_to_gpu(self):
-        for name in (self.cpu_arrays.keys() & self.gpu_arrays.keys()) | self._custom_data_transfer_functions.keys():
-            self.to_gpu(name)
+    def all_to_acc(self):
+        for name in (self.cpu_arrays.keys() & self.acc_arrays.keys()) | self._custom_data_transfer_functions.keys():
+            self.to_acc(name)
     def run_kernel(self, kernel_function, **kwargs):
-        arrays = self.gpu_arrays if kernel_function.ast.backend in self._GPU_LIKE_BACKENDS else self.cpu_arrays
+        arrays = self.acc_arrays if kernel_function.ast.target in self.ACCELERATOR_TARGETS else self.cpu_arrays
         kernel_function(**arrays, **kwargs)
     def get_kernel_kwargs(self, kernel_function, **kwargs):
         result = {}
-        result.update(self.gpu_arrays if kernel_function.ast.backend in self._GPU_LIKE_BACKENDS else self.cpu_arrays)
+        result.update(self.acc_arrays if kernel_function.ast.target in self.ACCELERATOR_TARGETS else self.cpu_arrays)
         return [result]
     def to_cpu(self, name):
         if name in self._custom_data_transfer_functions:
             transfer_func = self._custom_data_transfer_functions[name][1]
-            transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
+            transfer_func(self.custom_data_acc[name], self.custom_data_cpu[name])
-            self.array_handler.download(self.gpu_arrays[name], self.cpu_arrays[name])
+            self.array_handler.download(self.acc_arrays[name], self.cpu_arrays[name])
-    def to_gpu(self, name):
+    def to_acc(self, name):
         if name in self._custom_data_transfer_functions:
             transfer_func = self._custom_data_transfer_functions[name][0]
-            transfer_func(self.custom_data_gpu[name], self.custom_data_cpu[name])
+            transfer_func(self.custom_data_acc[name], self.custom_data_cpu[name])
-            self.array_handler.upload(self.gpu_arrays[name], self.cpu_arrays[name])
-    def is_on_gpu(self, name):
-        return name in self.gpu_arrays
-    def synchronization_function_cpu(self, names, stencil_name=None, **_):
-        return self.synchronization_function(names, stencil_name, 'cpu')
+            self.array_handler.upload(self.acc_arrays[name], self.cpu_arrays[name])
-    def synchronization_function_gpu(self, names, stencil_name=None, **_):
-        return self.synchronization_function(names, stencil_name, 'gpu')
+    def is_on_acc(self, name):
+        return name in self.acc_arrays
-    def synchronization_function(self, names, stencil=None, target=None, **_):
-        if target is None:
-            target = self.default_target
-        if target == 'opencl':
-            target = 'gpu'
-        assert target in ('cpu', 'gpu')
+    def synchronization_function(self, names, stencil=None, acc=None, **kwargs):
         if not hasattr(names, '__len__') or type(names) is str:
             names = [names]
+        acc = self._default_acc_value(acc)
         filtered_stencil = []
         neighbors = [-1, 0, 1]
@@ -310,29 +298,27 @@ class SerialDataHandling(DataHandling):
                 raise NotImplementedError("Synchronization of this field is not supported: " + name)
             if len(filtered_stencil) > 0:
-                if target == 'cpu':
+                if not acc:
                     from pystencils.slicing import get_periodic_boundary_functor
                     result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls))
                     from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func
-                    target = 'gpu' if not isinstance(self.array_handler, PyOpenClArrayHandler) else 'opencl'
                     result.append(boundary_func(filtered_stencil, self._domainSize,
-                                                target=target,
-                                                opencl_queue=self._opencl_queue,
-                                                opencl_ctx=self._opencl_ctx))
+                                                target=self.target,
+                                                **kwargs))
-        if target == 'cpu':
+        if acc:
             def result_functor():
                 for arr_name, func in zip(names, result):
-                    func(pdfs=self.cpu_arrays[arr_name])
+                    func(pdfs=self.acc_arrays[arr_name])
             def result_functor():
                 for arr_name, func in zip(names, result):
-                    func(pdfs=self.gpu_arrays[arr_name])
+                    func(pdfs=self.cpu_arrays[arr_name])
         return result_functor
diff --git a/pystencils/display_utils.py b/pystencils/display_utils.py
index 610c404b709885e7445eba3be412f6811fca0241..bccdd875b9762ee97f82f64b4f8708fc52c3d703 100644
--- a/pystencils/display_utils.py
+++ b/pystencils/display_utils.py
@@ -35,7 +35,7 @@ def highlight_cpp(code: str):
     return HTML(highlight(code, CppLexer(), HtmlFormatter()))
-def show_code(ast: KernelFunction, custom_backend=None):
+def code(ast: KernelFunction, custom_backend=None):
     """Returns an object to display generated code (C/C++ or CUDA)
     Can either  be displayed as HTML in Jupyter notebooks or printed as normal string.
@@ -45,11 +45,8 @@ def show_code(ast: KernelFunction, custom_backend=None):
     if isinstance(ast, KernelWrapper):
         ast = ast.ast
-    if ast.backend == 'gpucuda':
-        dialect = 'cuda'
-    elif ast.backend == 'opencl':
-        dialect = 'opencl'
-    else:
+    dialect = ast.target
+    if dialect == 'cpu':
         dialect = 'c'
     class CodeDisplay:
@@ -65,3 +62,12 @@ def show_code(ast: KernelFunction, custom_backend=None):
         def __repr__(self):
             return generate_c(self.ast, dialect=dialect, custom_backend=custom_backend)
     return CodeDisplay(ast)
+def show_code(ast: KernelFunction, custom_backend=None):
+    code_display = code(ast, custom_backend)
+    try:
+        from IPython.display import display
+        display(code_display)
+    except ImportError:
+        print(code_display)
diff --git a/pystencils/gpucuda/kernelcreation.py b/pystencils/gpucuda/kernelcreation.py
index eecfe278e4378cdbed8738275d2cfe813d350d7b..df9a2644f45d63a217e615294f1dd790afbbf6ac 100644
--- a/pystencils/gpucuda/kernelcreation.py
+++ b/pystencils/gpucuda/kernelcreation.py
@@ -61,7 +61,7 @@ def create_cuda_kernel(assignments,
     block = indexing.guard(block, common_shape)
     unify_shape_symbols(block, common_shape=common_shape, fields=fields_without_buffers)
-    ast = KernelFunction(block, 'gpu', 'gpucuda', make_python_function, ghost_layers, function_name)
+    ast = KernelFunction(block, 'cuda', make_python_function, ghost_layers, function_name)
     implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation)
@@ -136,7 +136,7 @@ def created_indexed_cuda_kernel(assignments,
     function_body = Block(coordinate_symbol_assignments + assignments)
     function_body = indexing.guard(function_body, get_common_shape(index_fields))
-    ast = KernelFunction(function_body, 'gpu', 'gpucuda', make_python_function, None, function_name)
+    ast = KernelFunction(function_body, 'cuda', make_python_function, None, function_name)
     implement_interpolations(ast, implement_by_texture_accesses=use_textures_for_interpolation)
diff --git a/pystencils/gpucuda/periodicity.py b/pystencils/gpucuda/periodicity.py
index 080ef44ebd995e1d8dcf4dbe1f0839e0a218fde6..02a5488c1d7af026991f3b06c7d48245c882c226 100644
--- a/pystencils/gpucuda/periodicity.py
+++ b/pystencils/gpucuda/periodicity.py
@@ -31,14 +31,14 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
 def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
                                   thickness=None, dtype=float, target='gpu', opencl_queue=None, opencl_ctx=None):
-    assert target in ['gpu', 'opencl']
+    assert target in ['cuda', 'opencl']
     src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
     kernels = []
     index_dimensions = index_dimensions
     for src_slice, dst_slice in src_dst_slice_tuples:
         ast = create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype)
-        if target == 'gpu':
+        if target == 'cuda':
             kernels.append(pystencils.opencl.make_python_function(ast, opencl_queue, opencl_ctx))
diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py
index c4d5273d4f0b303e600a8ead6378878e1fdf403f..cdc7b1ce003f5f44f023f8831e30530bb2f8176d 100644
--- a/pystencils/kernelcreation.py
+++ b/pystencils/kernelcreation.py
@@ -100,12 +100,12 @@ def create_kernel(assignments,
                 raise ValueError("Invalid value for cpu_vectorize_info")
         return ast
-    elif target == 'llvm':
+    elif target == 'llvm_cpu' or target == 'llvm_gpu':
         from pystencils.llvm import create_kernel
         ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
                             iteration_slice=iteration_slice, ghost_layers=ghost_layers)
         return ast
-    elif target == 'gpu' or target == 'opencl':
+    elif target == 'cuda' or target == 'opencl':
         from pystencils.gpucuda import create_cuda_kernel
         ast = create_cuda_kernel(assignments, type_info=data_type,
                                  indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
@@ -114,10 +114,8 @@ def create_kernel(assignments,
         if target == 'opencl':
             from pystencils.opencl.opencljit import make_python_function
-            ast._backend = 'opencl'
             ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
-            ast._target = 'opencl'
-            ast._backend = 'opencl'
+            ast.target = 'opencl'
         return ast
         raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))
@@ -181,9 +179,9 @@ def create_indexed_kernel(assignments,
         if cpu_openmp:
             add_openmp(ast, num_threads=cpu_openmp)
         return ast
-    elif target == 'llvm':
+    elif target == 'llvm_cpu' or target == 'llvm_gpu':
         raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
-    elif target == 'gpu' or target == 'opencl':
+    elif target == 'cuda' or target == 'opencl':
         from pystencils.gpucuda import created_indexed_cuda_kernel
         idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
         ast = created_indexed_cuda_kernel(assignments,
@@ -194,10 +192,8 @@ def create_indexed_kernel(assignments,
         if target == 'opencl':
             from pystencils.opencl.opencljit import make_python_function
-            ast._backend = 'opencl'
             ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
-            ast._target = 'opencl'
-            ast._backend = 'opencl'
+            ast.target = 'opencl'
         return ast
         raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
diff --git a/pystencils/llvm/kernelcreation.py b/pystencils/llvm/kernelcreation.py
index 57e5b73876b643196f5529d05df41a11a1ef01e5..663a421eff9e092e218456eccf16b527f3d5ae36 100644
--- a/pystencils/llvm/kernelcreation.py
+++ b/pystencils/llvm/kernelcreation.py
@@ -3,7 +3,7 @@ from pystencils.transformations import insert_casts
 def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(),
-                  iteration_slice=None, ghost_layers=None, target='cpu'):
+                  iteration_slice=None, ghost_layers=None, target='llvm_cpu'):
     Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
@@ -25,21 +25,20 @@ def create_kernel(assignments, function_name="kernel", type_info=None, split_gro
     :return: :class:`pystencils.ast.KernelFunction` node
-    if target == 'cpu':
+    if target == 'llvm_cpu':
         from pystencils.cpu import create_kernel
         code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers)
-        code._backend = 'llvm'
-    elif target == 'gpu':
+    elif target == 'llvm_cpu':
         from pystencils.gpucuda.kernelcreation import create_cuda_kernel
         code = create_cuda_kernel(assignments,
-        code._backend = 'llvm_gpu'
     code.body = insert_casts(code.body)
     code._compile_function = make_python_function
+    code.target = target
     return code
diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py
index 1af1a68e3bfdb3681e05443c2412d8bd24903b0d..569cd3a42cc79af239b04dbfa4274cf85cb8437d 100644
--- a/pystencils_tests/test_datahandling.py
+++ b/pystencils_tests/test_datahandling.py
@@ -77,17 +77,10 @@ def access_and_gather(dh, domain_size):
                     assert full_arr[x, y] == x + y
-def synchronization(dh, test_gpu=False):
+def synchronization(dh):
     field_name = 'comm_field_test'
-    if test_gpu:
-        try:
-            from pycuda import driver
-            import pycuda.autoinit
-        except ImportError:
-            return
-        field_name += 'Gpu'
-    dh.add_array(field_name, ghost_layers=1, dtype=np.int32, cpu=True, gpu=test_gpu)
+    dh.add_array(field_name, ghost_layers=1, dtype=np.int32)
     # initialize everything with 1
     for b in dh.iterate(ghost_layers=1):
@@ -95,23 +88,17 @@ def synchronization(dh, test_gpu=False):
     for b in dh.iterate(ghost_layers=0):
-    if test_gpu:
-        dh.to_gpu(field_name)
-    dh.synchronization_function(field_name, target='gpu' if test_gpu else 'cpu')()
-    if test_gpu:
-        dh.to_cpu(field_name)
+    dh.all_to_acc()
+    dh.synchronization_function(field_name)()
+    dh.all_to_cpu()
     for b in dh.iterate(ghost_layers=1):
         np.testing.assert_equal(42, b[field_name])
-def kernel_execution_jacobi(dh, target):
-    test_gpu = target == 'gpu' or target == 'opencl'
-    dh.add_array('f', gpu=test_gpu)
-    dh.add_array('tmp', gpu=test_gpu)
+def kernel_execution_jacobi(dh):
+    dh.add_array('f')
+    dh.add_array('tmp')
     stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
     stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
     stencil = stencil_2d if dh.dim == 2 else stencil_3d
@@ -120,10 +107,12 @@ def kernel_execution_jacobi(dh, target):
     def jacobi():
         dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil)
-    kernel = create_kernel(jacobi, target).compile()
+    kernel = create_kernel(jacobi, dh.target).compile()
     for b in dh.iterate(ghost_layers=1):
+    dh.all_to_acc()
+    dh.all_to_cpu()
     for b in dh.iterate(ghost_layers=0):
         np.testing.assert_equal(b['f'], 42)
@@ -190,36 +179,36 @@ def test_access_and_gather():
     for domain_shape in [(2, 2, 3), (2, 3)]:
         dh = create_data_handling(domain_size=domain_shape, periodicity=True)
         access_and_gather(dh, domain_shape)
-        synchronization(dh, test_gpu=False)
-        synchronization(dh, test_gpu=True)
+        dh = create_data_handling(domain_size=domain_shape, periodicity=True, target='cpu')
+        synchronization(dh)
+        dh = create_data_handling(domain_size=domain_shape, periodicity=True, target='cuda')
+        synchronization(dh)
 def test_kernel():
     for domain_shape in [(4, 5), (3, 4, 5)]:
         dh = create_data_handling(domain_size=domain_shape, periodicity=True)
-        kernel_execution_jacobi(dh, 'cpu')
+        kernel_execution_jacobi(dh)
-        try:
-            import pycuda
-            dh = create_data_handling(domain_size=domain_shape, periodicity=True)
-            kernel_execution_jacobi(dh, 'gpu')
-        except ImportError:
-            pass
+        pytest.importorskip('pycuda')
+        dh = create_data_handling(domain_size=domain_shape, periodicity=True, target='cuda')
+        kernel_execution_jacobi(dh)
-@pytest.mark.parametrize('target', ('cpu', 'gpu', 'opencl'))
+@pytest.mark.parametrize('target', ('cpu', 'cuda', 'opencl'))
 def test_kernel_param(target):
     for domain_shape in [(4, 5), (3, 4, 5)]:
-        if target == 'gpu':
+        if target == 'cuda':
         if target == 'opencl':
             from pystencils.opencl.opencljit import init_globally
-        dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target)
-        kernel_execution_jacobi(dh, target)
+        dh = create_data_handling(domain_size=domain_shape, periodicity=True, target=target)
+        kernel_execution_jacobi(dh)
diff --git a/pystencils_tests/test_datahandling_parallel.py b/pystencils_tests/test_datahandling_parallel.py
index 0bfbfbd02fc86952e7fba84ce6aa080d2080ec0a..4346ce61d895590d0600d0a13ad8c17dbf21d95f 100644
--- a/pystencils_tests/test_datahandling_parallel.py
+++ b/pystencils_tests/test_datahandling_parallel.py
@@ -28,7 +28,7 @@ def test_gpu():
     num_blocks = (3, 2, 1)
     blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
     dh = ParallelDataHandling(blocks, default_ghost_layers=2)
-    dh.add_array('v', values_per_cell=3, dtype=np.int64, ghost_layers=2, gpu=True)
+    dh.add_array('v', values_per_cell=3, dtype=np.int64, ghost_layers=2, acc=True)
     for b in dh.iterate():