diff --git a/astnodes.py b/astnodes.py
index d87b45555337d3889cd2d10a7188d4a67a752a18..cd39d26c169cc5a28bec056e953224cbe2c39dd7 100644
--- a/astnodes.py
+++ b/astnodes.py
@@ -155,7 +155,7 @@ class KernelFunction(Node):
         def __repr__(self):
             return '<{0} {1}>'.format(self.dtype, self.name)
 
-    def __init__(self, body, ghostLayers=None, functionName="kernel"):
+    def __init__(self, body, ghostLayers=None, functionName="kernel", backend=""):
         super(KernelFunction, self).__init__()
         self._body = body
         body.parent = self
@@ -166,6 +166,7 @@ class KernelFunction(Node):
         self.ghostLayers = ghostLayers
         # these variables are assumed to be global, so no automatic parameter is generated for them
         self.globalVariables = set()
+        self.backend = ""
 
     @property
     def symbolsDefined(self):
diff --git a/cpu/cpujit.py b/cpu/cpujit.py
index 28184d6f366d20904eb3d958e3c98af18339a8e5..7fcfab610c353bcd1c0fc7c55f204447524d0493 100644
--- a/cpu/cpujit.py
+++ b/cpu/cpujit.py
@@ -439,4 +439,5 @@ def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict, func):
             cache[key] = args
             cacheValues.append(kwargs)  # keep objects alive such that ids remain unique
             func(*args)
+    wrapper.ast = kernelFunctionNode
     return wrapper
diff --git a/cpu/kernelcreation.py b/cpu/kernelcreation.py
index 91b6dc3ebc296c3ba1ccd240a4081a0edc1019b6..eb5e4851176bb82db62d77ebb0100c092d2322f4 100644
--- a/cpu/kernelcreation.py
+++ b/cpu/kernelcreation.py
@@ -139,7 +139,7 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ
         loopBody.append(assignment)
 
     functionBody = Block([loopNode])
-    ast = KernelFunction(functionBody, functionName=functionName)
+    ast = KernelFunction(functionBody, "cpu", functionName=functionName)
 
     fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields}
     resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping)
diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py
index 3a0fe85f4ceec904438381b4104b29fcfb2f1d30..96087f1a48ce3478d321a00e63afb4895f2fcf38 100644
--- a/gpucuda/cudajit.py
+++ b/gpucuda/cudajit.py
@@ -52,6 +52,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
             cacheValues.append(kwargs)  # keep objects alive such that ids remain unique
             func(*args, **dictWithBlockAndThreadNumbers)
         #cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
+    wrapper.ast = kernelFunctionNode
     return wrapper
 
 
diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py
index 5bd2f5d7d96efda99bf4122483a50325cde7f2d3..7797b46a3620fa6fb24974c6d474158bb7b5e9e7 100644
--- a/gpucuda/kernelcreation.py
+++ b/gpucuda/kernelcreation.py
@@ -45,7 +45,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None,
 
     block = Block(assignments)
     block = indexing.guard(block, commonShape)
-    ast = KernelFunction(block, functionName=functionName, ghostLayers=ghostLayers)
+    ast = KernelFunction(block, functionName=functionName, ghostLayers=ghostLayers, backend='gpucuda')
     ast.globalVariables.update(indexing.indexVariables)
 
     coordMapping = indexing.coordinates
@@ -118,7 +118,7 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
 
     functionBody = Block(coordinateSymbolAssignments + assignments)
     functionBody = indexing.guard(functionBody, getCommonShape(indexFields))
-    ast = KernelFunction(functionBody, functionName=functionName)
+    ast = KernelFunction(functionBody, functionName=functionName, backend='gpucuda')
     ast.globalVariables.update(indexing.indexVariables)
 
     coordMapping = indexing.coordinates
diff --git a/llvm/kernelcreation.py b/llvm/kernelcreation.py
index 121dcdd924a84f90d94ca557f5ac19cf5a299802..aa690b957d687c2f2dcd695ca2396ef3d29f2550 100644
--- a/llvm/kernelcreation.py
+++ b/llvm/kernelcreation.py
@@ -89,7 +89,7 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ
         loopBody.append(assignment)
 
     functionBody = Block([loopNode])
-    ast = KernelFunction(functionBody, allFields, functionName)
+    ast = KernelFunction(functionBody, None, functionName, backend='llvm')
 
     fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields}
     resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping)
diff --git a/parallel/datahandling.py b/parallel/datahandling.py
index 675c47a2c1659a67e5666f90e55c7af651b8dbad..f0553981609806f2978d2cb36338357a0062c9f4 100644
--- a/parallel/datahandling.py
+++ b/parallel/datahandling.py
@@ -141,6 +141,14 @@ class ParallelDataHandling(DataHandling):
                     array = array[:, :, 0]
                 yield array
 
+    def runKernel(self, kernelFunc, *args, **kwargs):
+        fieldArguments = [p.fieldName for p in kernelFunc.ast.parameters if p.isFieldPtrArgument]
+        for block in self.blocks:
+            fieldArgs = {fieldName: wlb.field.toArray(block[fieldName], withGhostLayers=True)
+                         for fieldName in fieldArguments}
+            fieldArgs.update(kwargs)
+            kernelFunc(*args, **kwargs)
+
     def toCpu(self, name):
         if name in self._customDataTransferFunctions:
             transferFunc = self._customDataTransferFunctions[name][1]
diff --git a/transformations/transformations.py b/transformations/transformations.py
index bf9426f326150b5d996305c3afc7b729f9bdd80e..73bc8361dbf0a5089433aea7d16ccf62cfd235fb 100644
--- a/transformations/transformations.py
+++ b/transformations/transformations.py
@@ -121,7 +121,7 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
                 currentBody.insertFront(assignment)
 
     loopVars = [numBufferAccesses * var for var in loopVars]
-    astNode = ast.KernelFunction(currentBody, ghostLayers=ghostLayers, functionName=functionName)
+    astNode = ast.KernelFunction(currentBody, ghostLayers=ghostLayers, functionName=functionName, backend='cpu')
     return (astNode, loopStrides, loopVars)