GPU bugfixes and lbmpy GPU support

- bugfix for CUDA kernels with variable field sizes - extended tests for pystencils gpu kernels

GPU bugfixes and lbmpy GPU support
- bugfix for CUDA kernels with variable field sizes - extended tests for pystencils gpu kernels
3f45aed6 · Martin Bauer · cb511afe · 3f45aed6 · 3f45aed6
Commit 3f45aed6 authored 8 years ago by Martin Bauer
--- a/gpucuda/cudajit.py
+++ b/gpucuda/cudajit.py
@@ -4,7 +4,7 @@ import pycuda.autoinit
 from pycuda.compiler import SourceModule
 from pystencils.backends.cbackend import generateC
 from pystencils.transformations import symbolNameToVariableName
-from pystencils.types import StructType
+from pystencils.types import StructType, getBaseType
 def makePythonFunction(kernelFunctionNode, argumentDict={}):
@@ -36,6 +36,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
        args = _buildNumpyArgumentList(kernelFunctionNode, fullArguments)
        func(*args, **dictWithBlockAndThreadNumbers)
+        # cuda.Context.synchronize() #  useful for debugging, to get errors right after kernel was called
    return wrapper
@@ -47,12 +48,10 @@ def _buildNumpyArgumentList(kernelFunctionNode, argumentDict):
            field = argumentDict[arg.fieldName]
            if arg.isFieldPtrArgument:
                result.append(field.gpudata)
-            elif arg.isFieldShapeArgument:
-                strideArr = np.array(field.strides, dtype=np.int32) / field.dtype.itemsize
-                result.append(cuda.In(strideArr))
            elif arg.isFieldStrideArgument:
-                shapeArr = np.array(field.shape, dtype=np.int32)
+                dtype = getBaseType(arg.dtype).numpyDtype
-                result.append(cuda.In(shapeArr))
+                strideArr = np.array(field.strides, dtype=dtype) // field.dtype.itemsize
+                result.append(cuda.In(strideArr))
            else:
                assert False
        else:

--- a/gpucuda/kernelcreation.py
+++ b/gpucuda/kernelcreation.py
 import sympy as sp
-from pystencils.transformations import resolveFieldAccesses, typeAllEquations, \
+from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo
-    parseBasePointerInfo, typingFromSympyInspection
+from pystencils.astnodes import Block, KernelFunction, SympyAssignment
-from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment
 from pystencils import Field
 from pystencils.types import TypedSymbol, BasicType, StructType
@@ -82,20 +81,12 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
    coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]]
    coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments]
-    assignments = coordinateSymbolAssignments + assignments
-    # make 1D loop over index fields
+    functionBody = Block(coordinateSymbolAssignments + assignments)
-    loopBody = Block([])
-    loopNode = LoopOverCoordinate(loopBody, coordinateToLoopOver=0, start=0, stop=indexFields[0].shape[0])
-    for assignment in assignments:
-        loopBody.append(assignment)
-    functionBody = Block([loopNode])
    ast = KernelFunction(functionBody, allFields, functionName)
    ast.globalVariables.update(BLOCK_IDX + THREAD_IDX)
-    coordMapping, getCallParameters = getLinewiseCoordinates(list(fieldsRead)[0], ghostLayers=0)
+    coordMapping, getCallParameters = getLinewiseCoordinates(list(indexFields)[0], ghostLayers=0)
    basePointerInfo = [['spatialInner0']]
    basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields}