GPU bugfixes and lbmpy GPU support

- bugfix for CUDA kernels with variable field sizes
- extended tests for pystencils gpu kernels
......@@ -4,7 +4,7 @@ import pycuda.autoinit
from pycuda.compiler import SourceModule
from pystencils.backends.cbackend import generateC
from pystencils.transformations import symbolNameToVariableName
from pystencils.types import StructType
from pystencils.types import StructType, getBaseType
def makePythonFunction(kernelFunctionNode, argumentDict={}):
......@@ -36,6 +36,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
args = _buildNumpyArgumentList(kernelFunctionNode, fullArguments)
func(*args, **dictWithBlockAndThreadNumbers)
# cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
return wrapper
......@@ -47,12 +48,10 @@ def _buildNumpyArgumentList(kernelFunctionNode, argumentDict):
field = argumentDict[arg.fieldName]
if arg.isFieldPtrArgument:
elif arg.isFieldShapeArgument:
strideArr = np.array(field.strides, dtype=np.int32) / field.dtype.itemsize
elif arg.isFieldStrideArgument:
shapeArr = np.array(field.shape, dtype=np.int32)
dtype = getBaseType(arg.dtype).numpyDtype
strideArr = np.array(field.strides, dtype=dtype) // field.dtype.itemsize
assert False
import sympy as sp
from pystencils.transformations import resolveFieldAccesses, typeAllEquations, \
parseBasePointerInfo, typingFromSympyInspection
from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo
from pystencils.astnodes import Block, KernelFunction, SympyAssignment
from pystencils import Field
from pystencils.types import TypedSymbol, BasicType, StructType
......@@ -82,20 +81,12 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]]
coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments]
assignments = coordinateSymbolAssignments + assignments
# make 1D loop over index fields
loopBody = Block([])
loopNode = LoopOverCoordinate(loopBody, coordinateToLoopOver=0, start=0, stop=indexFields[0].shape[0])
for assignment in assignments:
functionBody = Block([loopNode])
functionBody = Block(coordinateSymbolAssignments + assignments)
ast = KernelFunction(functionBody, allFields, functionName)
ast.globalVariables.update(BLOCK_IDX + THREAD_IDX)
coordMapping, getCallParameters = getLinewiseCoordinates(list(fieldsRead)[0], ghostLayers=0)
coordMapping, getCallParameters = getLinewiseCoordinates(list(indexFields)[0], ghostLayers=0)
basePointerInfo = [['spatialInner0']]
basePointerInfos = { parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields}
