diff --git a/__init__.py b/__init__.py index 513808744da84e549813b49a97970783bdb67d05..b78f17d66f89640855bc6950f7ddd752d2bdf838 100644 --- a/__init__.py +++ b/__init__.py @@ -1,3 +1,5 @@ from pystencils.field import Field, extractCommonSubexpressions from pystencils.data_types import TypedSymbol from pystencils.slicing import makeSlice +from pystencils.kernelcreation import createKernel, createIndexedKernel +from pystencils.display_utils import showCode diff --git a/astnodes.py b/astnodes.py index df48c6d4a4e30266506d7f6936f460ce4b14f22f..d87b45555337d3889cd2d10a7188d4a67a752a18 100644 --- a/astnodes.py +++ b/astnodes.py @@ -1,48 +1,10 @@ import sympy as sp from sympy.tensor import IndexedBase from pystencils.field import Field -from pystencils.data_types import TypedSymbol, createType, get_type_from_sympy, createTypeFromString, castFunc +from pystencils.data_types import TypedSymbol, createType, castFunc from pystencils.sympyextensions import fastSubs -class ResolvedFieldAccess(sp.Indexed): - def __new__(cls, base, linearizedIndex, field, offsets, idxCoordinateValues): - if not isinstance(base, IndexedBase): - base = IndexedBase(base, shape=(1,)) - obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearizedIndex) - obj.field = field - obj.offsets = offsets - obj.idxCoordinateValues = idxCoordinateValues - return obj - - def _eval_subs(self, old, new): - return ResolvedFieldAccess(self.args[0], - self.args[1].subs(old, new), - self.field, self.offsets, self.idxCoordinateValues) - - def fastSubs(self, subsDict): - if self in subsDict: - return subsDict[self] - return ResolvedFieldAccess(self.args[0].subs(subsDict), - self.args[1].subs(subsDict), - self.field, self.offsets, self.idxCoordinateValues) - - def _hashable_content(self): - superClassContents = super(ResolvedFieldAccess, self)._hashable_content() - return superClassContents + tuple(self.offsets) + (repr(self.idxCoordinateValues), hash(self.field)) - - @property - def typedSymbol(self): - return self.base.label - - def __str__(self): - top = super(ResolvedFieldAccess, self).__str__() - return "%s (%s)" % (top, self.typedSymbol.dtype) - - def __getnewargs__(self): - return self.base, self.indices[0], self.field, self.offsets, self.idxCoordinateValues - - class Node(object): """Base class for all AST nodes""" @@ -200,6 +162,7 @@ class KernelFunction(Node): self._parameters = None self.functionName = functionName self._body.parent = self + self.compile = None self.ghostLayers = ghostLayers # these variables are assumed to be global, so no automatic parameter is generated for them self.globalVariables = set() @@ -318,16 +281,21 @@ class Block(Node): return result - definedSymbols def __str__(self): - return ''.join('{!s}\n'.format(node) for node in self._nodes) + return "Block " + ''.join('{!s}\n'.format(node) for node in self._nodes) def __repr__(self): - return ''.join('{!r}'.format(node) for node in self._nodes) + return "Block" class PragmaBlock(Block): def __init__(self, pragmaLine, listOfNodes): super(PragmaBlock, self).__init__(listOfNodes) self.pragmaLine = pragmaLine + for n in listOfNodes: + n.parent = self + + def __repr__(self): + return self.pragmaLine class LoopOverCoordinate(Node): @@ -400,7 +368,7 @@ class LoopOverCoordinate(Node): prefix = LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX if not symbol.name.startswith(prefix): return None - if symbol.dtype != createTypeFromString('int'): + if symbol.dtype != createType('int'): return None coordinate = int(symbol.name[len(prefix)+1:]) return coordinate @@ -505,6 +473,44 @@ class SympyAssignment(Node): return repr(self.lhs) + " = " + repr(self.rhs) +class ResolvedFieldAccess(sp.Indexed): + def __new__(cls, base, linearizedIndex, field, offsets, idxCoordinateValues): + if not isinstance(base, IndexedBase): + base = IndexedBase(base, shape=(1,)) + obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearizedIndex) + obj.field = field + obj.offsets = offsets + obj.idxCoordinateValues = idxCoordinateValues + return obj + + def _eval_subs(self, old, new): + return ResolvedFieldAccess(self.args[0], + self.args[1].subs(old, new), + self.field, self.offsets, self.idxCoordinateValues) + + def fastSubs(self, subsDict): + if self in subsDict: + return subsDict[self] + return ResolvedFieldAccess(self.args[0].subs(subsDict), + self.args[1].subs(subsDict), + self.field, self.offsets, self.idxCoordinateValues) + + def _hashable_content(self): + superClassContents = super(ResolvedFieldAccess, self)._hashable_content() + return superClassContents + tuple(self.offsets) + (repr(self.idxCoordinateValues), hash(self.field)) + + @property + def typedSymbol(self): + return self.base.label + + def __str__(self): + top = super(ResolvedFieldAccess, self).__str__() + return "%s (%s)" % (top, self.typedSymbol.dtype) + + def __getnewargs__(self): + return self.base, self.indices[0], self.field, self.offsets, self.idxCoordinateValues + + class TemporaryMemoryAllocation(Node): def __init__(self, typedSymbol, size): self.symbol = typedSymbol diff --git a/backends/dot.py b/backends/dot.py index a36c8a5a3f6aa9d038639c9b62991c2f748da2f7..85d684d0c7d6116cef19d58d97b706bfe2af5148 100644 --- a/backends/dot.py +++ b/backends/dot.py @@ -15,23 +15,25 @@ class DotPrinter(Printer): self.dot.quote_edge = lang.quote def _print_KernelFunction(self, function): - self.dot.node(self._nodeToStrFunction(function), style='filled', fillcolor='#E69F00') + self.dot.node(self._nodeToStrFunction(function), style='filled', fillcolor='#a056db', label="Function") self._print(function.body) + self.dot.edge(self._nodeToStrFunction(function), self._nodeToStrFunction(function.body)) def _print_LoopOverCoordinate(self, loop): - self.dot.node(self._nodeToStrFunction(loop), style='filled', fillcolor='#56B4E9') + self.dot.node(self._nodeToStrFunction(loop), style='filled', fillcolor='#3498db') self._print(loop.body) + self.dot.edge(self._nodeToStrFunction(loop), self._nodeToStrFunction(loop.body)) def _print_Block(self, block): for node in block.args: self._print(node) - parent = block.parent + + self.dot.node(self._nodeToStrFunction(block), style='filled', fillcolor='#dbc256', label=repr(block)) for node in block.args: - self.dot.edge(self._nodeToStrFunction(parent), self._nodeToStrFunction(node)) - #parent = node + self.dot.edge(self._nodeToStrFunction(block), self._nodeToStrFunction(node)) def _print_SympyAssignment(self, assignment): - self.dot.node(self._nodeToStrFunction(assignment)) + self.dot.node(self._nodeToStrFunction(assignment), style='filled', fillcolor='#56db7f') if self.full: for node in assignment.args: self._print(node) @@ -54,7 +56,7 @@ class DotPrinter(Printer): def __shortened(node): - from pystencils.astnodes import LoopOverCoordinate, KernelFunction, SympyAssignment + from pystencils.astnodes import LoopOverCoordinate, KernelFunction, SympyAssignment, Block if isinstance(node, LoopOverCoordinate): return "Loop over dim %d" % (node.coordinateToLoopOver,) elif isinstance(node, KernelFunction): @@ -62,7 +64,11 @@ def __shortened(node): params += [p.name for p in node.parameters if not p.isFieldArgument] return "Func: %s (%s)" % (node.functionName, ",".join(params)) elif isinstance(node, SympyAssignment): - return "Assignment: " + repr(node.lhs) + return repr(node.lhs) + elif isinstance(node, Block): + return "Block" + str(id(node)) + else: + raise NotImplementedError("Cannot handle node type %s" % (type(node),)) def dotprint(node, view=False, short=False, full=False, **kwargs): @@ -102,6 +108,6 @@ if __name__ == "__main__": updateRule = sp.Eq(dstField[0, 0], sobelX) updateRule - from pystencils.cpu import createKernel + from pystencils import createKernel ast = createKernel([updateRule]) print(dotprint(ast, short=True)) diff --git a/cpu/kernelcreation.py b/cpu/kernelcreation.py index 616c89253d368c8e4ce21dd3f84b29b57ea2b34c..68f38ef44ad437e90b06e649fefb0b3b360acf72 100644 --- a/cpu/kernelcreation.py +++ b/cpu/kernelcreation.py @@ -1,4 +1,7 @@ import sympy as sp +from functools import partial + +from collections import defaultdict from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, \ @@ -7,9 +10,10 @@ from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, from pystencils.data_types import TypedSymbol, BasicType, StructType, createType from pystencils.field import Field import pystencils.astnodes as ast +from pystencils.cpu.cpujit import makePythonFunction -def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, splitGroups=(), +def createKernel(listOfEquations, functionName="kernel", typeForSymbol='double', splitGroups=(), iterationSlice=None, ghostLayers=None): """ Creates an abstract syntax tree for a kernel function, by taking a list of update rules. @@ -31,8 +35,6 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl :return: :class:`pystencils.ast.KernelFunction` node """ - if typeForSymbol is None: - typeForSymbol = 'double' def typeSymbol(term): if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): @@ -53,6 +55,7 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl loopOrder = getOptimalLoopOrdering(allFields) code = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice, ghostLayers=ghostLayers, loopOrder=loopOrder) + code.target = 'cpu' if splitGroups: typedSplitGroups = [[typeSymbol(s) for s in splitGroup] for splitGroup in splitGroups] @@ -64,7 +67,7 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl resolveFieldAccesses(code, readOnlyFields, fieldToBasePointerInfo=basePointerInfos) substituteArrayAccessesWithConstants(code) moveConstantsBeforeLoop(code) - + code.compile = partial(makePythonFunction, code) return code @@ -120,12 +123,13 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ loopBody.append(assignment) functionBody = Block([loopNode]) - ast = KernelFunction(functionBody, functionName=functionName) + ast = KernelFunction(functionBody, "cpu", functionName=functionName) fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields} resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping) substituteArrayAccessesWithConstants(ast) moveConstantsBeforeLoop(ast) + ast.compile = partial(makePythonFunction, ast) return ast diff --git a/data_types.py b/data_types.py index 7b084f6846d02e9767b27647f99be22828d89a19..2e0b5e0e7e67e964a571df6329b543a381b0cc23 100644 --- a/data_types.py +++ b/data_types.py @@ -73,8 +73,6 @@ def createType(specification): """ if isinstance(specification, Type): return specification - elif isinstance(specification, str): - return createTypeFromString(specification) else: npDataType = np.dtype(specification) if npDataType.fields is None: @@ -84,7 +82,7 @@ def createType(specification): @memorycache(maxsize=64) -def createTypeFromString(specification): +def createCompositeTypeFromString(specification): """ Creates a new Type object from a c-like string specification :param specification: Specification string @@ -111,11 +109,7 @@ def createTypeFromString(specification): if basePart[0][-1] == "*": basePart[0] = basePart[0][:-1] parts.append('*') - try: - baseType = BasicType(basePart[0], const) - except TypeError: - baseType = BasicType(createTypeFromString.map[basePart[0]], const) - currentType = baseType + currentType = BasicType(np.dtype(basePart[0]), const) # Parse pointer parts for part in parts: restrict = False @@ -130,13 +124,6 @@ def createTypeFromString(specification): currentType = PointerType(currentType, const, restrict) return currentType -createTypeFromString.map = { - 'i64': np.int64, - 'i32': np.int32, - 'i16': np.int16, - 'i8': np.int8, -} - def getBaseType(type): while type.baseType is not None: @@ -282,9 +269,9 @@ def getTypeOfExpression(expr): from pystencils.astnodes import ResolvedFieldAccess expr = sp.sympify(expr) if isinstance(expr, sp.Integer): - return createTypeFromString("int") + return createType("int") elif isinstance(expr, sp.Rational) or isinstance(expr, sp.Float): - return createTypeFromString("double") + return createType("double") elif isinstance(expr, ResolvedFieldAccess): return expr.field.dtype elif isinstance(expr, TypedSymbol): @@ -304,7 +291,7 @@ def getTypeOfExpression(expr): return typedSymbol.dtype.baseType elif isinstance(expr, sp.boolalg.Boolean) or isinstance(expr, sp.boolalg.BooleanFunction): # if any arg is of vector type return a vector boolean, else return a normal scalar boolean - result = createTypeFromString("bool") + result = createType("bool") vecArgs = [getTypeOfExpression(a) for a in expr.args if isinstance(getTypeOfExpression(a), VectorType)] if vecArgs: result = VectorType(result, width=vecArgs[0].width) @@ -454,13 +441,13 @@ class VectorType(Type): if self.instructionSet is None: return "%s[%d]" % (self.baseType, self.width) else: - if self.baseType == createTypeFromString("int64"): + if self.baseType == createType("int64"): return self.instructionSet['int'] - elif self.baseType == createTypeFromString("double"): + elif self.baseType == createType("float64"): return self.instructionSet['double'] - elif self.baseType == createTypeFromString("float"): + elif self.baseType == createType("float32"): return self.instructionSet['float'] - elif self.baseType == createTypeFromString("bool"): + elif self.baseType == createType("bool"): return self.instructionSet['bool'] else: raise NotImplementedError() diff --git a/display_utils.py b/display_utils.py index 11e5b426b0df087a769597986050fbd84eddf867..f8cfc7de24e6dac063fc7e45a93848e5cbaa7171 100644 --- a/display_utils.py +++ b/display_utils.py @@ -30,6 +30,24 @@ def highlightCpp(code): return HTML(highlight(code, CppLexer(), HtmlFormatter())) +def showCode(ast): + from pystencils.cpu import generateC + + class CodeDisplay: + def __init__(self, astInput): + self.ast = astInput + + def _repr_html_(self): + return highlightCpp(generateC(self.ast)).__html__() + + def __str__(self): + return generateC(self.ast) + + def __repr__(self): + return generateC(self.ast) + return CodeDisplay(ast) + + def debugGUI(ast): app = QApplication.instance() if app is None: diff --git a/field.py b/field.py index b0f61e5973d0bb544e3347db45c66ab83f09e65e..5bced7f0e96d8f98a6a6812cc4e9e8ca5036c79a 100644 --- a/field.py +++ b/field.py @@ -3,7 +3,7 @@ import numpy as np import sympy as sp from sympy.core.cache import cacheit from sympy.tensor import IndexedBase -from pystencils.data_types import TypedSymbol, createType +from pystencils.data_types import TypedSymbol, createType, createCompositeTypeFromString from pystencils.sympyextensions import isIntegerSequence @@ -249,8 +249,8 @@ class Field(object): PREFIX = "f" STRIDE_PREFIX = PREFIX + "stride_" SHAPE_PREFIX = PREFIX + "shape_" - STRIDE_DTYPE = "const int *" - SHAPE_DTYPE = "const int *" + STRIDE_DTYPE = createCompositeTypeFromString("const int *") + SHAPE_DTYPE = createCompositeTypeFromString("const int *") DATA_PREFIX = PREFIX + "d_" class Access(sp.Symbol): diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py index d61d5353bfc6383ab060123b415b3218af18f4bb..99db1efc974c11b0d3a5f911bf86bdf51094a4cc 100644 --- a/gpucuda/indexing.py +++ b/gpucuda/indexing.py @@ -4,12 +4,13 @@ import sympy as sp from pystencils.astnodes import Conditional, Block from pystencils.slicing import normalizeSlice -from pystencils.data_types import TypedSymbol, createTypeFromString +from pystencils.data_types import TypedSymbol, createType +from functools import partial AUTO_BLOCKSIZE_LIMITING = True -BLOCK_IDX = [TypedSymbol("blockIdx." + coord, createTypeFromString("int")) for coord in ('x', 'y', 'z')] -THREAD_IDX = [TypedSymbol("threadIdx." + coord, createTypeFromString("int")) for coord in ('x', 'y', 'z')] +BLOCK_IDX = [TypedSymbol("blockIdx." + coord, createType("int")) for coord in ('x', 'y', 'z')] +THREAD_IDX = [TypedSymbol("threadIdx." + coord, createType("int")) for coord in ('x', 'y', 'z')] class AbstractIndexing(abc.ABCMeta('ABC', (object,), {})): @@ -281,3 +282,17 @@ def _getEndFromSlice(iterationSlice, arrShape): res.append(sliceComponent + 1) return res + +def indexingCreatorFromParams(gpuIndexing, gpuIndexingParams): + if isinstance(gpuIndexing, str): + if gpuIndexing == 'block': + indexingCreator = BlockIndexing + elif gpuIndexing == 'line': + indexingCreator = LineIndexing + else: + raise ValueError("Unknown GPU indexing %s. Valid values are 'block' and 'line'" % (gpuIndexing,)) + if gpuIndexingParams: + indexingCreator = partial(indexingCreator, **gpuIndexingParams) + return indexingCreator + else: + return gpuIndexing diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index 959a94b49dcb6591105ed10d48ffb3f1c1953122..7387093620757f4966f1c77f2e758834eba185ac 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -1,9 +1,12 @@ +from functools import partial + from pystencils.gpucuda.indexing import BlockIndexing from pystencils.transformations import resolveFieldAccesses, typeAllEquations, parseBasePointerInfo, getCommonShape, \ substituteArrayAccessesWithConstants from pystencils.astnodes import Block, KernelFunction, SympyAssignment, LoopOverCoordinate from pystencils.data_types import TypedSymbol, BasicType, StructType from pystencils import Field +from pystencils.gpucuda.cudajit import makePythonFunction def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing, @@ -59,6 +62,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, ast.body.insertFront(SympyAssignment(loopCounter, indexing.coordinates[i])) ast.indexing = indexing + ast.compile = partial(makePythonFunction, ast) return ast @@ -111,4 +115,5 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel" # add the function which determines #blocks and #threads as additional member to KernelFunction node # this is used by the jit ast.indexing = indexing + ast.compile = partial(makePythonFunction, ast) return ast diff --git a/kernelcreation.py b/kernelcreation.py new file mode 100644 index 0000000000000000000000000000000000000000..0823c017cf7e6a881d8618f2f53ab9ae849d6e42 --- /dev/null +++ b/kernelcreation.py @@ -0,0 +1,104 @@ +from pystencils.equationcollection import EquationCollection +from pystencils.gpucuda.indexing import indexingCreatorFromParams + + +def createKernel(equations, target='cpu', dataType="double", iterationSlice=None, ghostLayers=None, + cpuOpenMP=True, cpuVectorizeInfo=None, + gpuIndexing='block', gpuIndexingParams={}): + """ + Creates abstract syntax tree (AST) of kernel, using a list of update equations. + :param equations: either be a plain list of equations or a EquationCollection object + :param target: 'cpu', 'llvm' or 'gpu' + :param dataType: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name + to type + :param iterationSlice: rectangular subset to iterate over, if not specified the complete non-ghost layer part of the + field is iterated over + :param ghostLayers: if left to default, the number of necessary ghost layers is determined automatically + a single integer specifies the ghost layer count at all borders, can also be a sequence of + pairs [(xLowerGl, xUpperGl), .... ] + + CPU specific Parameters: + :param cpuOpenMP: True or number of threads for OpenMP parallelization, False for no OpenMP + :param cpuVectorizeInfo: pair of instruction set name ('sse, 'avx', 'avx512') and data type ('float', 'double') + + GPU specific Parameters + :param gpuIndexing: either 'block' or 'line' , or custom indexing class (see gpucuda/indexing.py) + :param gpuIndexingParams: dict with indexing parameters (constructor parameters of indexing class) + e.g. for 'block' one can specify {'blockSize': (20, 20, 10) } + + :return: abstract syntax tree object, that can either be printed as source code or can be compiled with + through its compile() function + """ + + # ---- Normalizing parameters + splitGroups = () + if isinstance(equations, EquationCollection): + if 'splitGroups' in equations.simplificationHints: + splitGroups = equations.simplificationHints['splitGroups'] + equations = equations.allEquations + + # ---- Creating ast + if target == 'cpu': + from pystencils.cpu import createKernel + from pystencils.cpu import addOpenMP + ast = createKernel(equations, typeForSymbol=dataType, splitGroups=splitGroups, + iterationSlice=iterationSlice, ghostLayers=ghostLayers) + if cpuOpenMP: + addOpenMP(ast, numThreads=cpuOpenMP) + if cpuVectorizeInfo: + import pystencils.backends.simd_instruction_sets as vec + from pystencils.vectorization import vectorize + vecParams = cpuVectorizeInfo + vec.selectedInstructionSet = vec.x86VectorInstructionSet(instructionSet=vecParams[0], dataType=vecParams[1]) + vectorize(ast) + return ast + elif target == 'llvm': + from pystencils.llvm import createKernel + ast = createKernel(equations, typeForSymbol=dataType, splitGroups=splitGroups, + iterationSlice=iterationSlice, ghostLayers=ghostLayers) + return ast + elif target == 'gpu': + from pystencils.gpucuda import createCUDAKernel + ast = createCUDAKernel(equations, typeForSymbol=dataType, + indexingCreator=indexingCreatorFromParams(gpuIndexing, gpuIndexingParams), + iterationSlice=iterationSlice, ghostLayers=ghostLayers) + return ast + else: + raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,)) + + +def createIndexedKernel(equations, indexFields, target='cpu', dataType="double", coordinateNames=('x', 'y', 'z'), + cpuOpenMP=True, + gpuIndexing='block', gpuIndexingParams={}): + """ + Similar to :func:`createKernel`, but here not all cells of a field are updated but only cells with + coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. + + The coordinates are stored in a separated indexField, which is a one dimensional array with struct data type. + This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the + 'coordinateNames' parameter. The struct can have also other fields that can be read and written in the kernel, for + example boundary parameters. + + indexFields: list of index fields, i.e. 1D fields with struct data type + coordinateNames: name of the coordinate fields in the struct data type + """ + + if isinstance(equations, EquationCollection): + equations = equations.allEquations + if target == 'cpu': + from pystencils.cpu import createIndexedKernel + from pystencils.cpu import addOpenMP + ast = createIndexedKernel(equations, indexField=indexFields, typeForSymbol=dataType, + coordinateNames=coordinateNames) + if cpuOpenMP: + addOpenMP(ast, numThreads=cpuOpenMP) + return ast + elif target == 'llvm': + raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend") + elif target == 'gpu': + from pystencils.gpucuda import createdIndexedCUDAKernel + ast = createdIndexedCUDAKernel(equations, indexFields, typeForSymbol=dataType, coordinateNames=coordinateNames, + indexingCreator=indexingCreatorFromParams(gpuIndexing, gpuIndexingParams)) + return ast + else: + raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,)) diff --git a/llvm/kernelcreation.py b/llvm/kernelcreation.py index e0c16f9a190dec35735035625aa92d6fe27a1203..c90d63cc1be84deff45a3e815003141ca1cb02d2 100644 --- a/llvm/kernelcreation.py +++ b/llvm/kernelcreation.py @@ -6,6 +6,8 @@ from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, from pystencils.data_types import TypedSymbol, BasicType, StructType from pystencils.field import Field import pystencils.astnodes as ast +from functools import partial +from pystencils.llvm.llvmjit import makePythonFunction def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, splitGroups=(), @@ -30,42 +32,42 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl :return: :class:`pystencils.ast.KernelFunction` node """ - if not typeForSymbol: - typeForSymbol = typingFromSympyInspection(listOfEquations, "double") - - def typeSymbol(term): - if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): - return term - elif isinstance(term, sp.Symbol): - return TypedSymbol(term.name, typeForSymbol[term.name]) - else: - raise ValueError("Term has to be field access or symbol") - - fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol) - allFields = fieldsRead.union(fieldsWritten) - - readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten]) - - body = ast.Block(assignments) - loopOrder = getOptimalLoopOrdering(allFields) - code = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice, - ghostLayers=ghostLayers, loopOrder=loopOrder) - - if splitGroups: - typedSplitGroups = [[typeSymbol(s) for s in splitGroup] for splitGroup in splitGroups] - splitInnerLoop(code, typedSplitGroups) - - basePointerInfo = [] - for i in range(len(loopOrder)): - basePointerInfo.append(['spatialInner%d' % i]) - basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields} - - resolveFieldAccesses(code, readOnlyFields, fieldToBasePointerInfo=basePointerInfos) - moveConstantsBeforeLoop(code) - - print(code) + #if not typeForSymbol: + # typeForSymbol = typingFromSympyInspection(listOfEquations, "double") + # + #def typeSymbol(term): + # if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): + # return term + # elif isinstance(term, sp.Symbol): + # return TypedSymbol(term.name, typeForSymbol[term.name]) + # else: + # raise ValueError("Term has to be field access or symbol") + # + #fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol) + #allFields = fieldsRead.union(fieldsWritten) + # + #readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten]) + # + #body = ast.Block(assignments) + #loopOrder = getOptimalLoopOrdering(allFields) + #code = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice, + # ghostLayers=ghostLayers, loopOrder=loopOrder) + # + #if splitGroups: + # typedSplitGroups = [[typeSymbol(s) for s in splitGroup] for splitGroup in splitGroups] + # splitInnerLoop(code, typedSplitGroups) + # + #basePointerInfo = [] + #for i in range(len(loopOrder)): + # basePointerInfo.append(['spatialInner%d' % i]) + #basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields} + # + #resolveFieldAccesses(code, readOnlyFields, fieldToBasePointerInfo=basePointerInfos) + #moveConstantsBeforeLoop(code) + from pystencils.cpu import createKernel + code = createKernel(listOfEquations, functionName, typeForSymbol, splitGroups, iterationSlice, ghostLayers) code = insertCasts(code) - print(code) + code.compile = partial(makePythonFunction, code) return code @@ -129,5 +131,6 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ desympy_ast(ast) insert_casts(ast) + ast.compile = partial(makePythonFunction, ast) return ast diff --git a/llvm/llvm.py b/llvm/llvm.py index b33e4e57ae5a5aa5de43b30e743a965ea09dd906..6529c699eef065cc4e8e9abe960df8a5c6bcfdd2 100644 --- a/llvm/llvm.py +++ b/llvm/llvm.py @@ -6,7 +6,8 @@ from sympy import S # S is numbers? from pystencils.llvm.control_flow import Loop -from pystencils.data_types import createType, to_llvm_type, getTypeOfExpression, collateTypes +from pystencils.data_types import createType, to_llvm_type, getTypeOfExpression, collateTypes, \ + createCompositeTypeFromString from sympy import Indexed from sympy.codegen.ast import Assignment @@ -210,18 +211,25 @@ class LLVMPrinter(Printer): from_dtype = getTypeOfExpression(conversion.args[0]) # (From, to) decision = { - (createType("int"), createType("double")): functools.partial(self.builder.sitofp, node, self.fp_type), - (createType("double"), createType("int")): functools.partial(self.builder.fptosi, node, self.integer), - (createType("double *"), createType("int")): functools.partial(self.builder.ptrtoint, node, self.integer), - (createType("int"), createType("double *")): functools.partial(self.builder.inttoptr, node, + (createCompositeTypeFromString("int"), createCompositeTypeFromString("double")): functools.partial( + self.builder.sitofp, node, self.fp_type), + (createCompositeTypeFromString("double"), createCompositeTypeFromString("int")): functools.partial( + self.builder.fptosi, node, self.integer), + (createCompositeTypeFromString("double *"), createCompositeTypeFromString("int")): functools.partial( + self.builder.ptrtoint, node, self.integer), + (createCompositeTypeFromString("int"), createCompositeTypeFromString("double *")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), - (createType("double * restrict"), createType("int")): functools.partial(self.builder.ptrtoint, node, - self.integer), - (createType("int"), createType("double * restrict")): functools.partial(self.builder.inttoptr, node, + (createCompositeTypeFromString("double * restrict"), createCompositeTypeFromString("int")): functools.partial( + self.builder.ptrtoint, node, + self.integer), + (createCompositeTypeFromString("int"), + createCompositeTypeFromString("double * restrict")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), - (createType("double * restrict const"), createType("int")): functools.partial(self.builder.ptrtoint, node, - self.integer), - (createType("int"), createType("double * restrict const")): functools.partial(self.builder.inttoptr, node, + (createCompositeTypeFromString("double * restrict const"), + createCompositeTypeFromString("int")): functools.partial(self.builder.ptrtoint, node, + self.integer), + (createCompositeTypeFromString("int"), + createCompositeTypeFromString("double * restrict const")): functools.partial(self.builder.inttoptr, node, self.fp_pointer), } # TODO float, TEST: const, restrict diff --git a/llvm/llvmjit.py b/llvm/llvmjit.py index 710439e8b353a0a91af82a6c444f8b9ece0a8a69..bd848671a39ed6e1b566b4195779c0dfbec7e06d 100644 --- a/llvm/llvmjit.py +++ b/llvm/llvmjit.py @@ -5,7 +5,8 @@ import ctypes as ct import subprocess import shutil -from ..data_types import toCtypes, createType, ctypes_from_llvm +from pystencils.data_types import createCompositeTypeFromString +from ..data_types import toCtypes, ctypes_from_llvm from .llvm import generateLLVM from ..cpu.cpujit import buildCTypeArgumentList, makePythonFunctionIncompleteParams @@ -127,8 +128,8 @@ class Jit(object): if not function.is_declaration: return_type = None if function.ftype.return_type != ir.VoidType(): - return_type = toCtypes(createType(str(function.ftype.return_type))) - args = [toCtypes(createType(str(arg))) for arg in function.ftype.args] + return_type = toCtypes(createCompositeTypeFromString(str(function.ftype.return_type))) + args = [ctypes_from_llvm(arg) for arg in function.ftype.args] function_address = self.ee.get_function_address(function.name) fptr[function.name] = ct.CFUNCTYPE(return_type, *args)(function_address) self.fptr = fptr