Skip to content
Snippets Groups Projects
Commit 0735ea85 authored by Martin Bauer's avatar Martin Bauer
Browse files

CUDA jit, first test working

parent 8c693cd1
Branches
Tags
No related merge requests found
from pystencils.gpucuda.kernelcreation import createCUDAKernel
from pystencils.gpucuda.cudajit import makePythonFunction
from pystencils.backends.cbackend import generateCUDA
\ No newline at end of file
......@@ -2,6 +2,8 @@ import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
from pycuda.gpuarray import GPUArray
from pystencils.backends.cbackend import generateCUDA
def numpyTypeFromString(typename, includePointers=True):
......@@ -54,14 +56,28 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict):
def makePythonFunction(kernelFunctionNode, argumentDict={}):
mod = SourceModule(str(kernelFunctionNode.generateC()))
mod = SourceModule(str(generateCUDA(kernelFunctionNode)))
func = mod.get_function(kernelFunctionNode.functionName)
# 1) get argument list
args = buildNumpyArgumentList(kernelFunctionNode, argumentDict)
def wrapper(**kwargs):
from copy import copy
fullArguments = copy(argumentDict)
fullArguments.update(kwargs)
# 2) determine block and grid tuples
dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters()
# TODO prepare the function here
shapes = set()
strides = set()
for argValue in fullArguments.values():
if isinstance(argValue, GPUArray):
shapes.add(argValue.shape)
strides.add(argValue.strides)
if len(strides) == 0:
raise ValueError("No GPU arrays passed as argument")
assert len(strides) < 2, "All passed arrays have to have the same strides"
assert len(shapes) < 2, "All passed arrays have to have the same size"
shape = list(shapes)[0]
dictWithBlockAndThreadNumbers = kernelFunctionNode.getCallParameters(shape)
args = buildNumpyArgumentList(kernelFunctionNode, fullArguments)
func(*args, **dictWithBlockAndThreadNumbers)
return wrapper
......@@ -23,16 +23,12 @@ def getLinewiseCoordinates(field, ghostLayers):
if cudaIdx not in result:
return 1
else:
return arrShape[result.index[cudaIdx]] - 2 * ghostLayers
return arrShape[result.index(cudaIdx)] - 2 * ghostLayers
return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]),
'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX]) }
# add ghost layer offset
for i in range(len(result)):
result[i] += ghostLayers
return result, getCallParameters
return [i + ghostLayers for i in result], getCallParameters
def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=defaultdict(lambda: "double")):
......@@ -77,6 +73,5 @@ if __name__ == "__main__":
from pycuda.compiler import SourceModule
print(generateCUDA(kernel))
mod = SourceModule(str(generateCUDA(kernel)))
func = mod.get_function("kernel")
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment