diff --git a/gpucuda/__init__.py b/gpucuda/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..fb98db294e71205b7a3dc02bf8a9381c42d08a7b 100644 --- a/gpucuda/__init__.py +++ b/gpucuda/__init__.py @@ -0,0 +1,3 @@ +from pystencils.gpucuda.kernelcreation import createCUDAKernel +from pystencils.gpucuda.cudajit import makePythonFunction +from pystencils.backends.cbackend import generateCUDA \ No newline at end of file diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index 513b8eda0ac66860b5f16f7fc015f3cdb075ab45..077e755d0e79e7a887408ab461a687c555e5992e 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -2,6 +2,8 @@ import numpy as np import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule +from pycuda.gpuarray import GPUArray +from pystencils.backends.cbackend import generateCUDA def numpyTypeFromString(typename, includePointers=True): @@ -54,14 +56,28 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict): def makePythonFunction(kernelFunctionNode, argumentDict={}): - mod = SourceModule(str(kernelFunctionNode.generateC())) + mod = SourceModule(str(generateCUDA(kernelFunctionNode))) func = mod.get_function(kernelFunctionNode.functionName) - # 1) get argument list - args = buildNumpyArgumentList(kernelFunctionNode, argumentDict) + def wrapper(**kwargs): + from copy import copy + fullArguments = copy(argumentDict) + fullArguments.update(kwargs) - # 2) determine block and grid tuples - dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters() - - # TODO prepare the function here + shapes = set() + strides = set() + for argValue in fullArguments.values(): + if isinstance(argValue, GPUArray): + shapes.add(argValue.shape) + strides.add(argValue.strides) + if len(strides) == 0: + raise ValueError("No GPU arrays passed as argument") + assert len(strides) < 2, "All passed arrays have to have the same strides" + assert len(shapes) < 2, "All passed arrays have to have the same size" + shape = list(shapes)[0] + dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters(shape) + + args = buildNumpyArgumentList(kernelFunctionNode, fullArguments) + func(*args, **dictWithBlockAndThreadNumbers) + return wrapper diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index b01886bea3699f20ad7d09bdb506b1be0ed4188a..93f4233c64b4b3647022a6a41a177e7812320366 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -23,16 +23,12 @@ def getLinewiseCoordinates(field, ghostLayers): if cudaIdx not in result: return 1 else: - return arrShape[result.index[cudaIdx]] - 2 * ghostLayers + return arrShape[result.index(cudaIdx)] - 2 * ghostLayers return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]), 'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX]) } - # add ghost layer offset - for i in range(len(result)): - result[i] += ghostLayers - - return result, getCallParameters + return [i + ghostLayers for i in result], getCallParameters def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=defaultdict(lambda: "double")): @@ -77,6 +73,5 @@ if __name__ == "__main__": from pycuda.compiler import SourceModule print(generateCUDA(kernel)) - mod = SourceModule(str(generateCUDA(kernel))) func = mod.get_function("kernel")