From 0735ea854b4d80312082dc1ccc7bf42a7da0a238 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Sat, 5 Nov 2016 15:18:09 +0100 Subject: [PATCH] CUDA jit, first test working --- gpucuda/__init__.py | 3 +++ gpucuda/cudajit.py | 30 +++++++++++++++++++++++------- gpucuda/kernelcreation.py | 9 ++------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/gpucuda/__init__.py b/gpucuda/__init__.py index e69de29bb..fb98db294 100644 --- a/gpucuda/__init__.py +++ b/gpucuda/__init__.py @@ -0,0 +1,3 @@ +from pystencils.gpucuda.kernelcreation import createCUDAKernel +from pystencils.gpucuda.cudajit import makePythonFunction +from pystencils.backends.cbackend import generateCUDA \ No newline at end of file diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index 513b8eda0..077e755d0 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -2,6 +2,8 @@ import numpy as np import pycuda.driver as cuda import pycuda.autoinit from pycuda.compiler import SourceModule +from pycuda.gpuarray import GPUArray +from pystencils.backends.cbackend import generateCUDA def numpyTypeFromString(typename, includePointers=True): @@ -54,14 +56,28 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict): def makePythonFunction(kernelFunctionNode, argumentDict={}): - mod = SourceModule(str(kernelFunctionNode.generateC())) + mod = SourceModule(str(generateCUDA(kernelFunctionNode))) func = mod.get_function(kernelFunctionNode.functionName) - # 1) get argument list - args = buildNumpyArgumentList(kernelFunctionNode, argumentDict) + def wrapper(**kwargs): + from copy import copy + fullArguments = copy(argumentDict) + fullArguments.update(kwargs) - # 2) determine block and grid tuples - dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters() - - # TODO prepare the function here + shapes = set() + strides = set() + for argValue in fullArguments.values(): + if isinstance(argValue, GPUArray): + shapes.add(argValue.shape) + strides.add(argValue.strides) + if len(strides) == 0: + raise ValueError("No GPU arrays passed as argument") + assert len(strides) < 2, "All passed arrays have to have the same strides" + assert len(shapes) < 2, "All passed arrays have to have the same size" + shape = list(shapes)[0] + dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters(shape) + + args = buildNumpyArgumentList(kernelFunctionNode, fullArguments) + func(*args, **dictWithBlockAndThreadNumbers) + return wrapper diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index b01886bea..93f4233c6 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -23,16 +23,12 @@ def getLinewiseCoordinates(field, ghostLayers): if cudaIdx not in result: return 1 else: - return arrShape[result.index[cudaIdx]] - 2 * ghostLayers + return arrShape[result.index(cudaIdx)] - 2 * ghostLayers return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]), 'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX]) } - # add ghost layer offset - for i in range(len(result)): - result[i] += ghostLayers - - return result, getCallParameters + return [i + ghostLayers for i in result], getCallParameters def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=defaultdict(lambda: "double")): @@ -77,6 +73,5 @@ if __name__ == "__main__": from pycuda.compiler import SourceModule print(generateCUDA(kernel)) - mod = SourceModule(str(generateCUDA(kernel))) func = mod.get_function("kernel") -- GitLab