diff --git a/gpucuda/__init__.py b/gpucuda/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..fb98db294e71205b7a3dc02bf8a9381c42d08a7b 100644
--- a/gpucuda/__init__.py
+++ b/gpucuda/__init__.py
@@ -0,0 +1,3 @@
+from pystencils.gpucuda.kernelcreation import createCUDAKernel
+from pystencils.gpucuda.cudajit import makePythonFunction
+from pystencils.backends.cbackend import generateCUDA
\ No newline at end of file
diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py
index 513b8eda0ac66860b5f16f7fc015f3cdb075ab45..077e755d0e79e7a887408ab461a687c555e5992e 100644
--- a/gpucuda/cudajit.py
+++ b/gpucuda/cudajit.py
@@ -2,6 +2,8 @@ import numpy as np
 import pycuda.driver as cuda
 import pycuda.autoinit
 from pycuda.compiler import SourceModule
+from pycuda.gpuarray import GPUArray
+from pystencils.backends.cbackend import generateCUDA
 
 
 def numpyTypeFromString(typename, includePointers=True):
@@ -54,14 +56,28 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict):
 
 
 def makePythonFunction(kernelFunctionNode, argumentDict={}):
-    mod = SourceModule(str(kernelFunctionNode.generateC()))
+    mod = SourceModule(str(generateCUDA(kernelFunctionNode)))
     func = mod.get_function(kernelFunctionNode.functionName)
 
-    # 1) get argument list
-    args = buildNumpyArgumentList(kernelFunctionNode, argumentDict)
+    def wrapper(**kwargs):
+        from copy import copy
+        fullArguments = copy(argumentDict)
+        fullArguments.update(kwargs)
 
-    # 2) determine block and grid tuples
-    dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters()
-    
-    # TODO prepare the function here
+        shapes = set()
+        strides = set()
+        for argValue in fullArguments.values():
+            if isinstance(argValue, GPUArray):
+                shapes.add(argValue.shape)
+                strides.add(argValue.strides)
+        if len(strides) == 0:
+            raise ValueError("No GPU arrays passed as argument")
+        assert len(strides) < 2, "All passed arrays have to have the same strides"
+        assert len(shapes) < 2, "All passed arrays have to have the same size"
+        shape = list(shapes)[0]
+        dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters(shape)
+
+        args = buildNumpyArgumentList(kernelFunctionNode, fullArguments)
+        func(*args, **dictWithBlockAndThreadNumbers)
+    return wrapper
 
diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py
index b01886bea3699f20ad7d09bdb506b1be0ed4188a..93f4233c64b4b3647022a6a41a177e7812320366 100644
--- a/gpucuda/kernelcreation.py
+++ b/gpucuda/kernelcreation.py
@@ -23,16 +23,12 @@ def getLinewiseCoordinates(field, ghostLayers):
             if cudaIdx not in result:
                 return 1
             else:
-                return arrShape[result.index[cudaIdx]] - 2 * ghostLayers
+                return arrShape[result.index(cudaIdx)] - 2 * ghostLayers
 
         return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]),
                 'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX]) }
 
-    # add ghost layer offset
-    for i in range(len(result)):
-        result[i] += ghostLayers
-
-    return result, getCallParameters
+    return [i + ghostLayers for i in result], getCallParameters
 
 
 def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=defaultdict(lambda: "double")):
@@ -77,6 +73,5 @@ if __name__ == "__main__":
     from pycuda.compiler import SourceModule
     print(generateCUDA(kernel))
 
-
     mod = SourceModule(str(generateCUDA(kernel)))
     func = mod.get_function("kernel")