diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index c754afcd6cf795cb88dd0f5f386a08a378371bb1..a0355d26cf0c325dd938dbf8de360963acb69ea2 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -4,7 +4,7 @@ import pycuda.autoinit from pycuda.compiler import SourceModule from pystencils.backends.cbackend import generateC from pystencils.transformations import symbolNameToVariableName -from pystencils.types import StructType +from pystencils.types import StructType, getBaseType def makePythonFunction(kernelFunctionNode, argumentDict={}): @@ -36,6 +36,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): args = _buildNumpyArgumentList(kernelFunctionNode, fullArguments) func(*args, **dictWithBlockAndThreadNumbers) + # cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called return wrapper @@ -47,12 +48,10 @@ def _buildNumpyArgumentList(kernelFunctionNode, argumentDict): field = argumentDict[arg.fieldName] if arg.isFieldPtrArgument: result.append(field.gpudata) - elif arg.isFieldShapeArgument: - strideArr = np.array(field.strides, dtype=np.int32) / field.dtype.itemsize - result.append(cuda.In(strideArr)) elif arg.isFieldStrideArgument: - shapeArr = np.array(field.shape, dtype=np.int32) - result.append(cuda.In(shapeArr)) + dtype = getBaseType(arg.dtype).numpyDtype + strideArr = np.array(field.strides, dtype=dtype) // field.dtype.itemsize + result.append(cuda.In(strideArr)) else: assert False else: diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index 512334898583749c2ab7ddb861c186e26c82c8f6..664752935bbcd807d0e493ce832014a426d01cef 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -1,8 +1,7 @@ import sympy as sp -from pystencils.transformations import resolveFieldAccesses, typeAllEquations, \ - parseBasePointerInfo, typingFromSympyInspection -from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment +from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo +from pystencils.astnodes import Block, KernelFunction, SympyAssignment from pystencils import Field from pystencils.types import TypedSymbol, BasicType, StructType @@ -82,20 +81,12 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel" coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]] coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments] - assignments = coordinateSymbolAssignments + assignments - # make 1D loop over index fields - loopBody = Block([]) - loopNode = LoopOverCoordinate(loopBody, coordinateToLoopOver=0, start=0, stop=indexFields[0].shape[0]) - - for assignment in assignments: - loopBody.append(assignment) - - functionBody = Block([loopNode]) + functionBody = Block(coordinateSymbolAssignments + assignments) ast = KernelFunction(functionBody, allFields, functionName) ast.globalVariables.update(BLOCK_IDX + THREAD_IDX) - coordMapping, getCallParameters = getLinewiseCoordinates(list(fieldsRead)[0], ghostLayers=0) + coordMapping, getCallParameters = getLinewiseCoordinates(list(indexFields)[0], ghostLayers=0) basePointerInfo = [['spatialInner0']] basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields}