From 3f45aed6ba232d0aabfad09d3ca96da26cb39d9a Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Sun, 19 Mar 2017 22:04:11 +0100 Subject: [PATCH] GPU bugfixes and lbmpy GPU support - bugfix for CUDA kernels with variable field sizes - extended tests for pystencils gpu kernels --- gpucuda/cudajit.py | 11 +++++------ gpucuda/kernelcreation.py | 17 ++++------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index c754afcd6..a0355d26c 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -4,7 +4,7 @@ import pycuda.autoinit from pycuda.compiler import SourceModule from pystencils.backends.cbackend import generateC from pystencils.transformations import symbolNameToVariableName -from pystencils.types import StructType +from pystencils.types import StructType, getBaseType def makePythonFunction(kernelFunctionNode, argumentDict={}): @@ -36,6 +36,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): args = _buildNumpyArgumentList(kernelFunctionNode, fullArguments) func(*args, **dictWithBlockAndThreadNumbers) + # cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called return wrapper @@ -47,12 +48,10 @@ def _buildNumpyArgumentList(kernelFunctionNode, argumentDict): field = argumentDict[arg.fieldName] if arg.isFieldPtrArgument: result.append(field.gpudata) - elif arg.isFieldShapeArgument: - strideArr = np.array(field.strides, dtype=np.int32) / field.dtype.itemsize - result.append(cuda.In(strideArr)) elif arg.isFieldStrideArgument: - shapeArr = np.array(field.shape, dtype=np.int32) - result.append(cuda.In(shapeArr)) + dtype = getBaseType(arg.dtype).numpyDtype + strideArr = np.array(field.strides, dtype=dtype) // field.dtype.itemsize + result.append(cuda.In(strideArr)) else: assert False else: diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index 512334898..664752935 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -1,8 +1,7 @@ import sympy as sp -from pystencils.transformations import resolveFieldAccesses, typeAllEquations, \ - parseBasePointerInfo, typingFromSympyInspection -from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment +from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo +from pystencils.astnodes import Block, KernelFunction, SympyAssignment from pystencils import Field from pystencils.types import TypedSymbol, BasicType, StructType @@ -82,20 +81,12 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel" coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]] coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments] - assignments = coordinateSymbolAssignments + assignments - # make 1D loop over index fields - loopBody = Block([]) - loopNode = LoopOverCoordinate(loopBody, coordinateToLoopOver=0, start=0, stop=indexFields[0].shape[0]) - - for assignment in assignments: - loopBody.append(assignment) - - functionBody = Block([loopNode]) + functionBody = Block(coordinateSymbolAssignments + assignments) ast = KernelFunction(functionBody, allFields, functionName) ast.globalVariables.update(BLOCK_IDX + THREAD_IDX) - coordMapping, getCallParameters = getLinewiseCoordinates(list(fieldsRead)[0], ghostLayers=0) + coordMapping, getCallParameters = getLinewiseCoordinates(list(indexFields)[0], ghostLayers=0) basePointerInfo = [['spatialInner0']] basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields} -- GitLab