Restructuring: moved to pystencils

61541046 · Martin Bauer · 3414de0a · 61541046 · 61541046 · 61541046
Commit 61541046 authored 8 years ago by Martin Bauer
--- a/cuda.py
+++ b/cuda.py
+from collections import defaultdict
+
+import sympy as sp
+
+from pystencils.generator import resolveFieldAccesses
+from pystencils.generator import typeAllEquations, Block, KernelFunction, parseBasePointerInfo
+
+BLOCK_IDX = list(sp.symbols("blockIdx.x blockIdx.y blockIdx.z"))
+THREAD_IDX = list(sp.symbols("threadIdx.x threadIdx.y threadIdx.z"))
+
+"""
+GPU Access Patterns
+
+- knows about the iteration range
+- know about mapping of field indices to CUDA block and thread indices
+- iterates over spatial coordinates - constructed with a specific number of coordinates
+- can
+"""
+
+
+def getLinewiseCoordinateAccessExpression(field, indexCoordinate):
+    availableIndices = [THREAD_IDX[0]] + BLOCK_IDX
+    fastestCoordinate = field.layout[-1]
+    availableIndices[fastestCoordinate], availableIndices[0] = availableIndices[0], availableIndices[fastestCoordinate]
+    cudaIndices = availableIndices[:field.spatialDimensions]
+
+    offsetToCell = sum([cudaIdx * stride for cudaIdx, stride in zip(cudaIndices, field.spatialStrides)])
+    indexOffset = sum([idx * indexStride for idx, indexStride in zip(indexCoordinate, field.indexStrides)])
+    return sp.simplify(offsetToCell + indexOffset)
+
+
+def getLinewiseCoordinates(field):
+    availableIndices = [THREAD_IDX[0]] + BLOCK_IDX
+    d = field.spatialDimensions + field.indexDimensions
+    fastestCoordinate = field.layout[-1]
+    result = availableIndices[:d]
+    result[0], result[fastestCoordinate] = result[fastestCoordinate], result[0]
+    return result
+
+
+def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=defaultdict(lambda: "double")):
+    fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
+    for f in fieldsRead - fieldsWritten:
+        f.setReadOnly()
+
+    code = KernelFunction(Block(assignments), functionName)
+    code.qualifierPrefix = "__global__ "
+    code.variablesToIgnore.update(BLOCK_IDX + THREAD_IDX)
+
+    coordMapping = getLinewiseCoordinates(list(fieldsRead)[0])
+    allFields = fieldsRead.union(fieldsWritten)
+    basePointerInfo = [['spatialInner0']]
+    basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [0, 1, 2], f) for f in allFields}
+    resolveFieldAccesses(code, fieldToFixedCoordinates={'src': coordMapping, 'dst': coordMapping},
+                         fieldToBasePointerInfo=basePointerInfos)
+    return code
+
+
+if __name__ == "__main__":
+    import sympy as sp
+    from lbmpy.stencils import getStencil
+    from lbmpy.collisionoperator import makeSRT
+    from lbmpy.lbmgenerator import createLbmEquations
+
+    latticeModel = makeSRT(getStencil("D2Q9"), order=2, compressible=False)
+    r = createLbmEquations(latticeModel, doCSE=True)
+    kernel = createCUDAKernel(r)
+    print(kernel.generateC())
+
+    from pycuda.compiler import SourceModule
+
+    mod = SourceModule(str(kernel.generateC()))
+    func = mod.get_function("kernel")
--- a/cudajit.py
+++ b/cudajit.py
+import numpy as np
+import pycuda.driver as cuda
+from pycuda.compiler import SourceModule
+
+
+def numpyTypeFromString(typename, includePointers=True):
+    import ctypes as ct
+
+    typename = typename.replace("*", " * ")
+    typeComponents = typename.split()
+
+    basicTypeMap = {
+        'double': np.float64,
+        'float': np.float32,
+        'int': np.int32,
+        'long': np.int64,
+    }
+
+    resultType = None
+    for typeComponent in typeComponents:
+        typeComponent = typeComponent.strip()
+        if typeComponent == "const" or typeComponent == "restrict" or typeComponent == "volatile":
+            continue
+        if typeComponent in basicTypeMap:
+            resultType = basicTypeMap[typeComponent]
+        elif typeComponent == "*" and includePointers:
+            assert resultType is not None
+            resultType = ct.POINTER(resultType)
+
+    return resultType
+
+
+def buildNumpyArgumentList(kernelFunctionNode, argumentDict):
+    result = []
+    for arg in kernelFunctionNode.parameters:
+        if arg.isFieldArgument:
+            field = argumentDict[arg.fieldName]
+            if arg.isFieldPtrArgument:
+                result.append(field.gpudata)
+            elif arg.isFieldShapeArgument:
+                strideArr = np.array(field.strides, dtype=np.int32) / field.dtype.itemsize
+                result.append(cuda.In(strideArr))
+            elif arg.isFieldStrideArgument:
+                shapeArr = np.array(field.shape, dtype=np.int32)
+                result.append(cuda.In(shapeArr))
+            else:
+                assert False
+        else:
+            param = argumentDict[arg.name]
+            expectedType = numpyTypeFromString(arg.dtype)
+            result.append(expectedType(param))
+    return result
+
+
+def makePythonFunction(kernelFunctionNode, argumentDict):
+    mod = SourceModule(str(kernelFunctionNode.generateC()))
+    func = mod.get_function(kernelFunctionNode.functionName)
+
+    # 1) get argument list
+    args = buildNumpyArgumentList(kernelFunctionNode, argumentDict)
+
+    # 2) determine block and grid tuples
+        
+    # TODO prepare the function here
+
--- a/field.py
+++ b/field.py
+from itertools import chain
+import numpy as np
+import sympy as sp
+from sympy.core.cache import cacheit
+from sympy.tensor import IndexedBase
+from pystencils.typedsymbol import TypedSymbol
+
+
+def getLayoutFromNumpyArray(arr):
+    """
+    Returns a list indicating the memory layout (linearization order) of the numpy array.
+    Example:
+        >>> getLayoutFromNumpyArray(np.zeros([3,3,3]))
+        [0, 1, 2]
+    In this example the loop over the zeroth coordinate should be the outermost loop,
+    followed by the first and second. Elements arr[x,y,0] and arr[x,y,1] are adjacent in memory.
+    Normally constructed numpy arrays have this order, however by stride tricks or other frameworks, arrays
+    with different memory layout can be created.
+    """
+    coordinates = list(range(len(arr.shape)))
+    return [x for (y, x) in sorted(zip(arr.strides, coordinates), key=lambda pair: pair[0], reverse=True)]
+
+
+def numpyDataTypeToC(dtype):
+    """Mapping numpy data types to C data types"""
+    if dtype == np.float64:
+        return "double"
+    elif dtype == np.float32:
+        return "float"
+    elif dtype == np.int32:
+        return "int"
+    raise NotImplementedError()
+
+
+def offsetComponentToDirectionString(coordinateId, value):
+    """
+    Translates numerical offset to string notation.
+    x offsets are labeled with east 'E' and 'W',
+    y offsets with north 'N' and 'S' and
+    z offsets with top 'T' and bottom 'B'
+    If the absolute value of the offset is bigger than 1, this number is prefixed.
+    :param coordinateId: integer 0, 1 or 2 standing for x,y and z
+    :param value: integer offset
+
+    Example:
+    >>> offsetComponentToDirectionString(0, 1)
+    'E'
+    >>> offsetComponentToDirectionString(1, 2)
+    '2N'
+    """
+    nameComponents = (('W', 'E'),  # west, east
+                      ('S', 'N'),  # south, north
+                      ('B', 'T'),  # bottom, top
+                      )
+    if value == 0:
+        result = ""
+    elif value < 0:
+        result = nameComponents[coordinateId][0]
+    else:
+        result = nameComponents[coordinateId][1]
+    if abs(value) > 1:
+        result = "%d%s" % (abs(value), result)
+    return result
+
+
+def offsetToDirectionString(offsetTuple):
+    """
+    Translates numerical offset to string notation.
+    For details see :func:`offsetComponentToDirectionString`
+    :param offsetTuple: 3-tuple with x,y,z offset
+
+    Example:
+    >>> offsetToDirectionString([1, -1, 0])
+    'SE'
+    >>> offsetToDirectionString(([-3, 0, -2]))
+    '2B3W'
+    """
+    names = ["", "", ""]
+    for i in range(len(offsetTuple)):
+        names[i] = offsetComponentToDirectionString(i, offsetTuple[i])
+    name = "".join(reversed(names))
+    if name == "":
+        name = "C"
+    return name
+
+
+def directionStringToOffset(directionStr, dim=3):
+    """
+    Reverse mapping of :func:`offsetToDirectionString`
+    :param directionStr: string representation of offset
+    :param dim: dimension of offset, i.e the length of the returned list
+
+    >>> directionStringToOffset('NW', dim=3)
+    array([-1,  1,  0])
+    >>> directionStringToOffset('NW', dim=2)
+    array([-1,  1])
+    >>> directionStringToOffset(offsetToDirectionString([3,-2,1]))
+    array([ 3, -2,  1])
+    """
+    offsetMap = {
+        'C': np.array([0, 0, 0]),
+
+        'W': np.array([-1, 0, 0]),
+        'E': np.array([1, 0, 0]),
+
+        'S': np.array([0, -1, 0]),
+        'N': np.array([0, 1, 0]),
+
+        'B': np.array([0, 0, -1]),
+        'T': np.array([0, 0, 1]),
+    }
+    offset = np.array([0, 0, 0])
+
+    while len(directionStr) > 0:
+        factor = 1
+        firstNonDigit = 0
+        while directionStr[firstNonDigit].isdigit():
+            firstNonDigit += 1
+        if firstNonDigit > 0:
+            factor = int(directionStr[:firstNonDigit])
+            directionStr = directionStr[firstNonDigit:]
+        curOffset = offsetMap[directionStr[0]]
+        offset += factor * curOffset
+        directionStr = directionStr[1:]
+    return offset[:dim]
+
+
+class Field:
+    """
+    With fields one can formulate stencil-like update rules on structured grids.
+    This Field class knows about the dimension, memory layout (strides) and optionally about the size of an array.
+
+    To create a field use one of the static create* members. There are two options:
+        1. create a kernel with fixed loop sizes i.e. the shape of the array is already known. This is usually the
+           case if just-in-time compilation directly from Python is done. (see Field.createFromNumpyArray)
+        2. create a more general kernel that works for variable array sizes. This can be used to create kernels
+           beforehand for a library. (see Field.createGeneric)
+
+    Dimensions:
+        A field has spatial and index dimensions, where the spatial dimensions come first.
+        The interpretation is that the field has multiple cells in (usually) two or three dimensional space which are
+        looped over. Additionally  N values are stored per cell. In this case spatialDimensions is two or three,
+        and indexDimensions equals N. If you want to store a matrix on each point in a two dimensional grid, there
+        are four dimensions, two spatial and two index dimensions. len(arr.shape) == spatialDims + indexDims
+
+    Indexing:
+        When accessing (indexing) a field the result is a FieldAccess which is derived from sympy Symbol.
+        First specify the spatial offsets in [], then in case indexDimension>0 the indices in ()
+        e.g. f[-1,0,0](7)
+
+    Example without index dimensions:
+        >>> a = np.zeros([10, 10])
+        >>> f = Field.createFromNumpyArray("f", a, indexDimensions=0)
+        >>> jacobi = ( f[-1,0] + f[1,0] + f[0,-1] + f[0,1] ) / 4
+
+    Example with index dimensions: LBM D2Q9 stream pull
+        >>> stencil = np.array([[0,0], [0,1], [0,-1]])
+        >>> src = Field.createGeneric("src", spatialDimensions=2, indexDimensions=1)
+        >>> dst = Field.createGeneric("dst", spatialDimensions=2, indexDimensions=1)
+        >>> for i, offset in enumerate(stencil):
+        ...     sp.Eq(dst[0,0](i), src[-offset](i))
+        Eq(dst_C^0, src_C^0)
+        Eq(dst_C^1, src_S^1)
+        Eq(dst_C^2, src_N^2)
+    """
+    @staticmethod
+    def createFromNumpyArray(fieldName, npArray, indexDimensions=0):
+        """
+        Creates a field based on the layout, data type, and shape of a given numpy array.
+        Kernels created for these kind of fields can only be called with arrays of the same layout, shape and type.
+        :param fieldName: symbolic name for the field
+        :param npArray: numpy array
+        :param indexDimensions: see documentation of Field
+        """
+        spatialDimensions = len(npArray.shape) - indexDimensions
+        if spatialDimensions < 1:
+            raise ValueError("Too many index dimensions. At least one spatial dimension required")
+
+        fullLayout = getLayoutFromNumpyArray(npArray)
+        spatialLayout = tuple([i for i in fullLayout if i < spatialDimensions])
+        assert len(spatialLayout) == spatialDimensions
+
+        strides = tuple([s // np.dtype(npArray.dtype).itemsize for s in npArray.strides])
+        shape = tuple([int(s) for s in npArray.shape])
+
+        return Field(fieldName, npArray.dtype, spatialLayout, shape, strides)
+
+    @staticmethod
+    def createGeneric(fieldName, spatialDimensions, dtype=np.float64, indexDimensions=0, layout=None):
+        """
+        Creates a generic field where the field size is not fixed i.e. can be called with arrays of different sizes
+        :param fieldName: symbolic name for the field
+        :param dtype: numpy data type of the array the kernel is called with later
+        :param spatialDimensions: see documentation of Field
+        :param indexDimensions: see documentation of Field
+        :param layout: tuple specifying the loop ordering of the spatial dimensions e.g. (2, 1, 0 ) means that
+                       the outer loop loops over dimension 2, the second outer over dimension 1, and the inner loop
+                       over dimension 0
+        """
+        if not layout:
+            layout = tuple(reversed(range(spatialDimensions)))
+        if len(layout) != spatialDimensions:
+            raise ValueError("Layout")
+        shapeSymbol = IndexedBase(TypedSymbol(Field.SHAPE_PREFIX + fieldName, Field.SHAPE_DTYPE), shape=(1,))
+        strideSymbol = IndexedBase(TypedSymbol(Field.STRIDE_PREFIX + fieldName, Field.STRIDE_DTYPE), shape=(1,))
+        totalDimensions = spatialDimensions + indexDimensions
+        shape = tuple([shapeSymbol[i] for i in range(totalDimensions)])
+        strides = tuple([strideSymbol[i] for i in range(totalDimensions)])
+        return Field(fieldName, dtype, layout, shape, strides)
+
+    def __init__(self, fieldName, dtype, layout, shape, strides):
+        """Do not use directly. Use static create* methods"""
+        self._fieldName = fieldName
+        self._dtype = numpyDataTypeToC(dtype)
+        self._layout = layout
+        self._shape = shape
+        self._strides = strides
+        self._readonly = False
+
+    @property
+    def spatialDimensions(self):
+        return len(self._layout)
+
+    @property
+    def indexDimensions(self):
+        return len(self._shape) - len(self._layout)
+
+    @property
+    def layout(self):
+        return self._layout
+
+    @property
+    def name(self):
+        return self._fieldName
+
+    @property
+    def shape(self):
+        return self._shape
+
+    @property
+    def spatialShape(self):
+        return self._shape[:self.spatialDimensions]
+
+    @property
+    def indexShape(self):
+        return self._shape[self.spatialDimensions:]
+
+    @property
+    def spatialStrides(self):
+        return self._strides[:self.spatialDimensions]
+
+    @property
+    def indexStrides(self):
+        return self._strides[self.spatialDimensions:]
+
+    @property
+    def strides(self):
+        return self._strides
+
+    @property
+    def dtype(self):
+        return self._dtype
+
+    @property
+    def readOnly(self):
+        return self._readonly
+
+    def setReadOnly(self, value=True):
+        self._readonly = value
+
+    def __repr__(self):
+        return self._fieldName
+
+    def __getitem__(self, offset):
+        if type(offset) is np.ndarray:
+            offset = tuple(offset)
+        if type(offset) is str:
+            offset = tuple(directionStringToOffset(offset, self.spatialDimensions))
+        if type(offset) is not tuple:
+            offset = (offset,)
+        if len(offset) != self.spatialDimensions:
+            raise ValueError("Wrong number of spatial indices: "
+                             "Got %d, expected %d" % (len(offset), self.spatialDimensions))
+        return Field.Access(self, offset)
+
+    def __call__(self, *args, **kwargs):
+        center = tuple([0]*self.spatialDimensions)
+        return Field.Access(self, center)(*args, **kwargs)
+
+    def __hash__(self):
+        return hash((self._layout, self._shape, self._strides, self._dtype, self._fieldName))
+
+    def __eq__(self, other):
+        selfTuple = (self.shape, self.strides, self.name, self.dtype)
+        otherTuple = (other.shape, other.strides, other.name, other.dtype)
+        return selfTuple == otherTuple
+
+    PREFIX = "f"
+    STRIDE_PREFIX = PREFIX + "stride_"
+    SHAPE_PREFIX = PREFIX + "shape_"
+    STRIDE_DTYPE = "const int *"
+    SHAPE_DTYPE = "const int *"
+
+    class Access(sp.Symbol):
+        def __new__(cls, name, *args, **kwargs):
+            obj = Field.Access.__xnew_cached_(cls, name, *args, **kwargs)
+            return obj
+
+        def __new_stage2__(self, field, offsets=(0, 0, 0), idx=None):
+            fieldName = field.name
+            offsetsAndIndex = chain(offsets, idx) if idx is not None else offsets
+            constantOffsets = not any([isinstance(o, sp.Basic) for o in offsetsAndIndex])
+
+            if not idx:
+                idx = tuple([0] * field.indexDimensions)
+
+            if constantOffsets:
+                offsetName = offsetToDirectionString(offsets)
+
+                if field.indexDimensions == 0:
+                    obj = super(Field.Access, self).__xnew__(self, fieldName + "_" + offsetName)
+                elif field.indexDimensions == 1:
+                    obj = super(Field.Access, self).__xnew__(self, fieldName + "_" + offsetName + "^" + str(idx[0]))
+                else:
+                    idxStr = ",".join([str(e) for e in idx])
+                    obj = super(Field.Access, self).__xnew__(self, fieldName + "_" + offsetName + "^" + idxStr)
+
+            else:
+                offsetName = "%0.10X" % (abs(hash(tuple(offsetsAndIndex))))
+                obj = super(Field.Access, self).__xnew__(self, fieldName + "_" + offsetName)
+
+            obj._field = field
+            obj._offsets = []
+            for o in offsets:
+                if isinstance(o, sp.Basic):
+                    obj._offsets.append(o)
+                else:
+                    obj._offsets.append(int(o))
+            obj._offsetName = offsetName
+            obj._index = idx
+
+            return obj
+
+        __xnew__ = staticmethod(__new_stage2__)
+        __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+        def __call__(self, *idx):
+            if self._index != tuple([0]*self.field.indexDimensions):
+                print(self._index, tuple([0]*self.field.indexDimensions))
+                raise ValueError("Indexing an already indexed Field.Access")
+
+            idx = tuple(idx)
+            if len(idx) != self.field.indexDimensions and idx != (0,):
+                raise ValueError("Wrong number of indices: "
+                                 "Got %d, expected %d" % (len(idx), self.field.indexDimensions))
+            return Field.Access(self.field, self._offsets, idx)
+
+        @property
+        def field(self):
+            return self._field
+
+        @property
+        def offsets(self):
+            return self._offsets
+
+        @property
+        def requiredGhostLayers(self):
+            return int(np.max(np.abs(self._offsets)))
+
+        @property
+        def nrOfCoordinates(self):
+            return len(self._offsets)
+
+        @property
+        def offsetName(self):
+            return self._offsetName
+
+        @property
+        def index(self):
+            return self._index
+
+        def _hashable_content(self):
+            superClassContents = list(super(Field.Access, self)._hashable_content())
+            t = tuple([*superClassContents, hash(self._field), self._index] + self._offsets)
+            return t
--- a/finitedifferences.py
+++ b/finitedifferences.py
-import sympy as sp
 import numpy as np
-from lbmpy.generator import Field
+import sympy as sp
+
+from pystencils.generator import Field


 def __upDownOffsets(d, dim):

--- a/generator.py
+++ b/generator.py
--- a/jit.py
+++ b/jit.py
+import os
+import subprocess
+from ctypes import cdll, c_double, c_float, sizeof
+from tempfile import TemporaryDirectory
+
+import numpy as np
+
+
+CONFIG_GCC = {
+    'compiler': 'g++',
+    'flags': '-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp',
+}
+CONFIG_INTEL = {
+    'compiler': '/software/intel/2017/bin/icpc',
+    'flags': '-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp -Wl,-rpath=/software/intel/2017/lib/intel64',
+    'env': {
+        'INTEL_LICENSE_FILE': '1713@license4.rrze.uni-erlangen.de',
+        'LM_PROJECT': 'iwia',
+    }
+}
+CONFIG_CLANG = {
+    'compiler': 'clang++',
+    'flags': '-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp',
+}
+CONFIG = CONFIG_INTEL
+
+
+def ctypeFromString(typename, includePointers=True):
+    import ctypes as ct
+
+    typename = typename.replace("*", " * ")
+    typeComponents = typename.split()
+
+    basicTypeMap = {
+        'double': ct.c_double,
+        'float': ct.c_float,
+        'int': ct.c_int,
+        'long': ct.c_long,
+    }
+
+    resultType = None
+    for typeComponent in typeComponents:
+        typeComponent = typeComponent.strip()
+        if typeComponent == "const" or typeComponent == "restrict" or typeComponent == "volatile":
+            continue
+        if typeComponent in basicTypeMap:
+            resultType = basicTypeMap[typeComponent]
+        elif typeComponent == "*" and includePointers:
+            assert resultType is not None
+            resultType = ct.POINTER(resultType)
+
+    return resultType
+
+
+def ctypeFromNumpyType(numpyType):
+    typeMap = {
+        np.dtype('float64'): c_double,
+        np.dtype('float32'): c_float,
+    }
+    return typeMap[numpyType]
+
+
+def compileAndLoad(kernelFunctionNode):
+    with TemporaryDirectory() as tmpDir:
+        srcFile = os.path.join(tmpDir, 'source.cpp')
+        with open(srcFile, 'w') as sourceFile:
+            print('#include <iostream>', file=sourceFile)
+            print("#include <cmath>", file=sourceFile)
+            print('extern "C" { ', file=sourceFile)
+            print(kernelFunctionNode.generateC(), file=sourceFile)
+            print('}', file=sourceFile)
+
+        compilerCmd = [CONFIG['compiler']] + CONFIG['flags'].split()
+        libFile = os.path.join(tmpDir, "jit.so")
+        compilerCmd += [srcFile, '-o', libFile]
+        configEnv = CONFIG['env'] if 'env' in CONFIG else {}
+        env = os.environ.copy()
+        env.update(configEnv)
+        subprocess.call(compilerCmd, env=env)
+
+        showAssembly = False
+        if showAssembly:
+            assemblyFile = os.path.join(tmpDir, "assembly.s")
+            compilerCmd = [CONFIG['compiler'], '-S', '-o', assemblyFile, srcFile] + CONFIG['flags'].split()
+            subprocess.call(compilerCmd, env=env)
+            assembly = open(assemblyFile, 'r').read()
+            kernelFunctionNode.assembly = assembly
+        loadedJitLib = cdll.LoadLibrary(libFile)
+
+    return loadedJitLib
+
+
+def buildCTypeArgumentList(kernelFunctionNode, argumentDict):
+    ctArguments = []
+    for arg in kernelFunctionNode.parameters:
+        if arg.isFieldArgument:
+            field = argumentDict[arg.fieldName]
+            if arg.isFieldPtrArgument:
+                ctArguments.append(field.ctypes.data_as(ctypeFromString(arg.dtype)))
+            elif arg.isFieldShapeArgument:
+                dataType = ctypeFromString(arg.dtype, includePointers=False)
+                ctArguments.append(field.ctypes.shape_as(dataType))
+            elif arg.isFieldStrideArgument:
+                dataType = ctypeFromString(arg.dtype, includePointers=False)
+                baseFieldType = ctypeFromNumpyType(field.dtype)
+                strides = field.ctypes.strides_as(dataType)
+                for i in range(len(field.shape)):
+                    assert strides[i] % sizeof(baseFieldType) == 0
+                    strides[i] //= sizeof(baseFieldType)
+                ctArguments.append(strides)
+            else:
+                assert False
+        else:
+            param = argumentDict[arg.name]
+            expectedType = ctypeFromString(arg.dtype)
+            ctArguments.append(expectedType(param))
+    return ctArguments
+
+
+def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict):
+    func = compileAndLoad(kernelFunctionNode)[kernelFunctionNode.functionName]
+    func.restype = None
+
+    def wrapper(**kwargs):
+        from copy import copy
+        fullArguments = copy(argumentDict)
+        fullArguments.update(kwargs)
+        args = buildCTypeArgumentList(kernelFunctionNode, fullArguments)
+        func(*args)
+    return wrapper
+
+
+def makePythonFunction(kernelFunctionNode, argumentDict={}):
+    # build up list of CType arguments
+    try:
+        args = buildCTypeArgumentList(kernelFunctionNode, argumentDict)
+    except KeyError:
+        # not all parameters specified yet
+        return makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict)
+    func = compileAndLoad(kernelFunctionNode)[kernelFunctionNode.functionName]
+    func.restype = None
+    return lambda: func(*args)
+
+
+
+
+
+
+
--- a/typedsymbol.py
+++ b/typedsymbol.py
+import sympy as sp
+from sympy.core.cache import cacheit
+
+
+class TypedSymbol(sp.Symbol):
+
+    def __new__(cls, name, *args, **kwds):
+        obj = TypedSymbol.__xnew_cached_(cls, name, *args, **kwds)
+        return obj
+
+    def __new_stage2__(cls, name, dtype):
+        obj = super(TypedSymbol, cls).__xnew__(cls, name)
+        obj._dtype = dtype
+        return obj
+
+    __xnew__ = staticmethod(__new_stage2__)
+    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+    @property
+    def dtype(self):
+        return self._dtype
+
+    def _hashable_content(self):
+        superClassContents = list(super(TypedSymbol, self)._hashable_content())
+        t = tuple([*superClassContents, hash(self._dtype)])
+        return t