Commit 3f45aed6 authored by Martin Bauer's avatar Martin Bauer
Browse files

GPU bugfixes and lbmpy GPU support

- bugfix for CUDA kernels with variable field sizes
- extended tests for pystencils gpu kernels
parent cb511afe
......@@ -4,7 +4,7 @@ import pycuda.autoinit
from pycuda.compiler import SourceModule
from pystencils.backends.cbackend import generateC
from pystencils.transformations import symbolNameToVariableName
from pystencils.types import StructType
from pystencils.types import StructType, getBaseType
def makePythonFunction(kernelFunctionNode, argumentDict={}):
......@@ -36,6 +36,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
args = _buildNumpyArgumentList(kernelFunctionNode, fullArguments)
func(*args, **dictWithBlockAndThreadNumbers)
# cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
return wrapper
......@@ -47,12 +48,10 @@ def _buildNumpyArgumentList(kernelFunctionNode, argumentDict):
field = argumentDict[arg.fieldName]
if arg.isFieldPtrArgument:
result.append(field.gpudata)
elif arg.isFieldShapeArgument:
strideArr = np.array(field.strides, dtype=np.int32) / field.dtype.itemsize
result.append(cuda.In(strideArr))
elif arg.isFieldStrideArgument:
shapeArr = np.array(field.shape, dtype=np.int32)
result.append(cuda.In(shapeArr))
dtype = getBaseType(arg.dtype).numpyDtype
strideArr = np.array(field.strides, dtype=dtype) // field.dtype.itemsize
result.append(cuda.In(strideArr))
else:
assert False
else:
......
import sympy as sp
from pystencils.transformations import resolveFieldAccesses, typeAllEquations, \
parseBasePointerInfo, typingFromSympyInspection
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo
from pystencils.astnodes import Block, KernelFunction, SympyAssignment
from pystencils import Field
from pystencils.types import TypedSymbol, BasicType, StructType
......@@ -82,20 +81,12 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]]
coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments]
assignments = coordinateSymbolAssignments + assignments
# make 1D loop over index fields
loopBody = Block([])
loopNode = LoopOverCoordinate(loopBody, coordinateToLoopOver=0, start=0, stop=indexFields[0].shape[0])
for assignment in assignments:
loopBody.append(assignment)
functionBody = Block([loopNode])
functionBody = Block(coordinateSymbolAssignments + assignments)
ast = KernelFunction(functionBody, allFields, functionName)
ast.globalVariables.update(BLOCK_IDX + THREAD_IDX)
coordMapping, getCallParameters = getLinewiseCoordinates(list(fieldsRead)[0], ghostLayers=0)
coordMapping, getCallParameters = getLinewiseCoordinates(list(indexFields)[0], ghostLayers=0)
basePointerInfo = [['spatialInner0']]
basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment