diff --git a/astnodes.py b/astnodes.py index ed0771778c2b5acbf36dd97e479ace4cf8763b79..736f747c69ba60b215b38938324d0d10e6f2cfac 100644 --- a/astnodes.py +++ b/astnodes.py @@ -4,6 +4,22 @@ from pystencils.field import Field from pystencils.types import TypedSymbol, createType, get_type_from_sympy, createTypeFromString +class ResolvedFieldAccess(sp.Indexed): + def __new__(cls, base, linearizedIndex, field, offsets, idxCoordinateValues): + obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearizedIndex) + obj.field = field + obj.offsets = offsets + obj.idxCoordinateValues = idxCoordinateValues + return obj + + def _hashable_content(self): + superClassContents = super(ResolvedFieldAccess, self)._hashable_content() + return superClassContents + tuple(self.offsets) + (repr(self.idxCoordinateValues), hash(self.field)) + + def __getnewargs__(self): + return self.name, self.indices[0], self.field, self.offsets, self.idxCoordinateValues + + class Node(object): """Base class for all AST nodes""" @@ -112,14 +128,13 @@ class KernelFunction(Node): def __repr__(self): return '<{0} {1}>'.format(self.dtype, self.name) - def __init__(self, body, fieldsAccessed, functionName="kernel"): + def __init__(self, body, functionName="kernel"): super(KernelFunction, self).__init__() self._body = body body.parent = self self._parameters = None self.functionName = functionName self._body.parent = self - self._fieldsAccessed = fieldsAccessed # these variables are assumed to be global, so no automatic parameter is generated for them self.globalVariables = set() @@ -147,7 +162,7 @@ class KernelFunction(Node): @property def fieldsAccessed(self): """Set of Field instances: fields which are accessed inside this kernel function""" - return self._fieldsAccessed + return set(o.field for o in self.atoms(ResolvedFieldAccess)) def _updateParameters(self): undefinedSymbols = self._body.undefinedSymbols - self.globalVariables diff --git a/cpu/kernelcreation.py b/cpu/kernelcreation.py index 897cbb6a4a0dacc9f7b51809b8e54aca594af14a..d466f2e9157aeedb37516a085054ed0019ef607a 100644 --- a/cpu/kernelcreation.py +++ b/cpu/kernelcreation.py @@ -118,7 +118,7 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ loopBody.append(assignment) functionBody = Block([loopNode]) - ast = KernelFunction(functionBody, allFields, functionName) + ast = KernelFunction(functionBody, functionName) fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields} resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping) diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index 4aa87969d4878a5200da5d7a364683011d7a7d82..84c00ba262f218c14e541406a8a10f4f58304c84 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -34,7 +34,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, block = Block(assignments) block = indexing.guard(block, commonShape) - ast = KernelFunction(block, allFields, functionName) + ast = KernelFunction(block, functionName) ast.globalVariables.update(indexing.indexVariables) coordMapping = indexing.coordinates @@ -91,7 +91,7 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel" functionBody = Block(coordinateSymbolAssignments + assignments) functionBody = indexing.guard(functionBody, getCommonShape(indexFields)) - ast = KernelFunction(functionBody, allFields, functionName) + ast = KernelFunction(functionBody, functionName) ast.globalVariables.update(indexing.indexVariables) coordMapping = indexing.coordinates diff --git a/kerncraft/2d-5pt.c_compilable.c b/kerncraft/2d-5pt.c_compilable.c deleted file mode 100644 index f22f12800191a00dacb823427c9944bb35cff20c..0000000000000000000000000000000000000000 --- a/kerncraft/2d-5pt.c_compilable.c +++ /dev/null @@ -1,24 +0,0 @@ -#include <stdlib.h> - -#define M 512*512 -#define N 512*512 - -void twodfivept(double s, int n, double a[M][n], double b[M][n]) { - for (int j = 1; j < (M - 1); ++j) - for (int i = 1; i < (n - 1); ++i) - b[j][i] = (a[j][i-1] + a[j][i+1] + a[j-1][i] + a[j+1][i]) * (s ? a[j][i-1] > 0 : 1.0); -} - -int main(int argc, char **argv) -{ - double a[M][N]; - double b[M][N]; - - for(int i=0; i<M; ++i) - for(int j=0; j<N; ++j) - a[i][j] = b[i][j] = 42.0; - - twodfivept(0.23, N, a, b); - - return 0; -} diff --git a/kerncraft/2d-5pt.c_compilable.c___twodfivept___1.yml b/kerncraft/2d-5pt.c_compilable.c___twodfivept___1.yml deleted file mode 100644 index 6f805a938454a25dc461f4d6b74e9339755fafab..0000000000000000000000000000000000000000 --- a/kerncraft/2d-5pt.c_compilable.c___twodfivept___1.yml +++ /dev/null @@ -1,46 +0,0 @@ - -general: - analysis: - tool: LLVM (3.9.0svn) - build: Apr 19 2016 (14:38:27) - kernel: - file: 2d-5pt.c_compilable.c - function: twodfivept - scop number: 0 - -arrays: - a: - type: double - dimension: [*, n] - b: - type: double - dimension: [*, n] - -loops: - - - index: i - start: 0 - stop: 262142 - step: 1 - - - index: j - start: 0 - stop: (-2+n) - step: 1 - -data sources: - a: - - [1+i, j] - - [1+i, 2+j] - - [i, 1+j] - - [2+i, 1+j] - -data destinations: - b: - - [1+i, 1+j] - -flops: - "+": 3 - "*": 1 - "f+": 4 - "f*": 2 diff --git a/kerncraft/2d-5pt.yml b/kerncraft/2d-5pt.yml deleted file mode 100644 index 4404ae67e29e7115212da14db068ac5d624f5778..0000000000000000000000000000000000000000 --- a/kerncraft/2d-5pt.yml +++ /dev/null @@ -1,34 +0,0 @@ -arrays: - a: - type: double - dimension: [30, 50] - b: - type: double - dimension: [30, 50] - -loops: - - - index: i - start: 0 - stop: 262142 - step: 1 - - - index: j - start: 0 - stop: (-2+20) - step: 1 - -data sources: - a: - - [1+i, j] - - [1+i, 2+j] - - [i, 1+j] - - [2+i, 1+j] - -data destinations: - b: - - [1+i, 1+j] - -flops: - "+": 3 - "*": 1 diff --git a/kerncraft/__init__.py b/kerncraft/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d5a970aaf1fb0c206832f29b8705cd3b0c4d5305 100644 --- a/kerncraft/__init__.py +++ b/kerncraft/__init__.py @@ -0,0 +1 @@ +from .kernel import PyStencilsKerncraftKernel, KerncraftParameters diff --git a/kerncraft/kerncraft_test.py b/kerncraft/kerncraft_test.py deleted file mode 100644 index ac810c0d33b08f74a4ed40a0432fea5cc26b0ef0..0000000000000000000000000000000000000000 --- a/kerncraft/kerncraft_test.py +++ /dev/null @@ -1,120 +0,0 @@ -import kerncraft -from kerncraft.kernel import KernelCode -from kernel import KernelDescription -from pystencils.astnodes import LoopOverCoordinate -from pystencils.cpu import createKernel -from pystencils.field import getLayoutFromStrides -from pystencils.sympyextensions import countNumberOfOperations -from pystencils.transformations import typeAllEquations -from pystencils import Field -from collections import defaultdict - - -class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): - - def __init__(self, listOfEquations, typeForSymbol=None): - super(PyStencilsKerncraftKernel, self).__init__() - - pystencilsAst = createKernel(listOfEquations, typeForSymbol=typeForSymbol) - self.ast = pystencilsAst - fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol) - allFields = fieldsRead.union(fieldsWritten) - - # Loops - innerLoops = [l for l in pystencilsAst.atoms(LoopOverCoordinate) if l.isInnermostLoop] - if len(innerLoops) == 0: - raise ValueError("No loop found in pystencils AST") - elif len(innerLoops) > 1: - raise ValueError("pystencils AST contains multiple inner loops - only one can be analyzed") - else: - innerLoop = innerLoops[0] - - self._loop_stack = [] - curNode = innerLoop - while curNode is not None: - if isinstance(curNode, LoopOverCoordinate): - loopInfo = (curNode.loopCounterSymbol.name, curNode.start, curNode.stop, curNode.step) - self._loop_stack.append(loopInfo) - curNode = curNode.parent - self._loop_stack = list(reversed(self._loop_stack)) - - # Data sources & destinations - self._sources = defaultdict(list) - self._destinations = defaultdict(list) - for eq in listOfEquations: - for accessesDict, expr in [(self._destinations, eq.lhs), (self._sources, eq.rhs)]: - for fa in expr.atoms(Field.Access): - coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i)) + off for i, off in enumerate(fa.offsets)] - coord += list(fa.index) - layout = getLayoutFromStrides(fa.field.strides) - permutedCoord = [coord[i] for i in layout] - accessesDict[fa.field.name].append(permutedCoord) - - # Variables (arrays) - for field in allFields: - layout = getLayoutFromStrides(field.strides) - permutedShape = list(field.shape[i] for i in layout) - self.set_variable(field.name, str(field.dtype), permutedShape) - for param in pystencilsAst.parameters: - if not param.isFieldArgument: - self.set_variable(param.name, str(param.dtype), None) - self._sources[param.name] = [None] - - # Datatype - self.datatype = list(self.variables.values())[0][0] - - # Flops - operationCount = countNumberOfOperations(listOfEquations) - self._flops = { - '+': operationCount['adds'], - '*': operationCount['muls'], - '/': operationCount['divs'], - } - - self.check() - -from kerncraft.iaca_marker import find_asm_blocks, userselect_block, select_best_block -from kerncraft.models import ECM, ECMData -from kerncraft.machinemodel import MachineModel -from ruamel import yaml - -if __name__ == "__main__": - from pystencils import Field - import sympy as sp - import numpy as np - from pystencils.cpu import generateC - - arr = np.zeros([80, 40], order='c') - #arr = np.zeros([40, 80, 3], order='f') - a = Field.createFromNumpyArray('a', arr, indexDimensions=0) - b = Field.createFromNumpyArray('b', arr, indexDimensions=0) - - s = sp.Symbol("s") - rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0] - updateRule = sp.Eq(b[0, 0], s*rhs) - k = PyStencilsKerncraftKernel([updateRule]) - print(generateC(k.ast)) - kernelFile = "2d-5pt.c" - #k = KernelCode(open("/home/martin/dev/kerncraft/examples/kernels/" + kernelFile).read())] - descr = yaml.load(open("/home/martin/dev/pystencils/pystencils/kerncraft/2d-5pt.yml").read()) - k = KernelDescription(descr) - k.print_kernel_info() - k.print_variables_info() - offsets = list(k.compile_global_offsets(1000)) - print(offsets) - - machineFilePath = "/home/martin/dev/kerncraft/examples/machine-files/emmy.yaml" - machine = MachineModel(path_to_yaml=machineFilePath) - #exit(0) - from kerncraft.kerncraft import create_parser - parser = create_parser() - parserArgs = parser.parse_args(["-m", machineFilePath, "-p", "ECMData", machineFilePath]) - - model = ECMData(k, machine, parserArgs) - model.analyze() - model.report() - #blocks = find_asm_blocks(open("/home/martin/dev/kerncraft/2d-5pt.c_compilable.s").readlines()) - #userselect_block(blocks) - ##select_ - #bestBlock = select_best_block(blocks) - #print(bestBlock) diff --git a/kerncraft/kernel.py b/kerncraft/kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..ed222e9e96ef13d647168abab43be22d4110934a --- /dev/null +++ b/kerncraft/kernel.py @@ -0,0 +1,178 @@ +import sympy as sp +from collections import defaultdict +import kerncraft.kernel +from pystencils.cpu import createKernel +from pystencils.transformations import typeAllEquations +from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess +from pystencils.field import Field, getLayoutFromStrides +from pystencils.sympyextensions import countNumberOfOperations, prod, countNumberOfOperationsInAst +from pystencils.utils import DotDict + + +class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): + """ + Implementation of kerncraft's kernel interface for pystencils CPU kernels. + Analyses a list of equations assuming they will be executed on a CPU + """ + def __init__(self, ast): + super(PyStencilsKerncraftKernel, self).__init__() + + self.ast = ast + + # Loops + innerLoops = [l for l in ast.atoms(LoopOverCoordinate) if l.isInnermostLoop] + if len(innerLoops) == 0: + raise ValueError("No loop found in pystencils AST") + elif len(innerLoops) > 1: + raise ValueError("pystencils AST contains multiple inner loops - only one can be analyzed") + else: + innerLoop = innerLoops[0] + + self._loop_stack = [] + curNode = innerLoop + while curNode is not None: + if isinstance(curNode, LoopOverCoordinate): + loopCounterSym = curNode.loopCounterSymbol + loopInfo = (loopCounterSym.name, curNode.start, curNode.stop, curNode.step) + self._loop_stack.append(loopInfo) + curNode = curNode.parent + self._loop_stack = list(reversed(self._loop_stack)) + + # Data sources & destinations + self._sources = defaultdict(list) + self._destinations = defaultdict(list) + + reads, writes = searchResolvedFieldAccessesInAst(innerLoop) + for accesses, targetDict in [(reads, self._sources), (writes, self._destinations)]: + for fa in accesses: + coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i), positive=True) + off + for i, off in enumerate(fa.offsets)] + coord += list(fa.idxCoordinateValues) + layout = getLayoutFromStrides(fa.field.strides) + permutedCoord = [coord[i] for i in layout] + targetDict[fa.field.name].append(permutedCoord) + + # Variables (arrays) + fieldsAccessed = ast.fieldsAccessed + for field in fieldsAccessed: + layout = getLayoutFromStrides(field.strides) + permutedShape = list(field.shape[i] for i in layout) + self.set_variable(field.name, str(field.dtype), permutedShape) + + for param in ast.parameters: + if not param.isFieldArgument: + self.set_variable(param.name, str(param.dtype), None) + self._sources[param.name] = [None] + + # data type + self.datatype = list(self.variables.values())[0][0] + + # flops + operationCount = countNumberOfOperationsInAst(innerLoop) + self._flops = { + '+': operationCount['adds'], + '*': operationCount['muls'], + '/': operationCount['divs'], + } + + self.check() + + +class KerncraftParameters(DotDict): + def __init__(self): + self['asm_block'] = 'auto' + self['asm_increment'] = 0 + self['cores'] = 1 + self['cache_predictor'] = 'SIM' + self['verbose'] = 0 + + +class PyStencilsKerncraftKernelOld(kerncraft.kernel.Kernel): + """ + Implementation of kerncraft's kernel interface for pystencils CPU kernels. + Analyses a list of equations assuming they will be executed on a CPU + + - right now it uses only the equations, not the optimized form e.g. expressions + that have been pulled in front of the loop + - + """ + def __init__(self, listOfEquations, typeForSymbol=None): + super(PyStencilsKerncraftKernel, self).__init__() + + ast = createKernel(listOfEquations, typeForSymbol=typeForSymbol) + self.ast = ast + fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol) + allFields = fieldsRead.union(fieldsWritten) + + # Loops + innerLoops = [l for l in ast.atoms(LoopOverCoordinate) if l.isInnermostLoop] + if len(innerLoops) == 0: + raise ValueError("No loop found in pystencils AST") + elif len(innerLoops) > 1: + raise ValueError("pystencils AST contains multiple inner loops - only one can be analyzed") + else: + innerLoop = innerLoops[0] + + self._loop_stack = [] + curNode = innerLoop + while curNode is not None: + if isinstance(curNode, LoopOverCoordinate): + loopInfo = (curNode.loopCounterSymbol.name, curNode.start, curNode.stop, curNode.step) + self._loop_stack.append(loopInfo) + curNode = curNode.parent + self._loop_stack = list(reversed(self._loop_stack)) + + # Data sources & destinations + self._sources = defaultdict(list) + self._destinations = defaultdict(list) + for eq in listOfEquations: + for accessesDict, expr in [(self._destinations, eq.lhs), (self._sources, eq.rhs)]: + for fa in expr.atoms(Field.Access): + coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i), positive=True) + off + for i, off in enumerate(fa.offsets)] + coord += list(fa.index) + layout = getLayoutFromStrides(fa.field.strides) + permutedCoord = [coord[i] for i in layout] + accessesDict[fa.field.name].append(permutedCoord) + + # Variables (arrays) + for field in allFields: + layout = getLayoutFromStrides(field.strides) + permutedShape = list(field.shape[i] for i in layout) + self.set_variable(field.name, str(field.dtype), permutedShape) + for param in ast.parameters: + if not param.isFieldArgument: + self.set_variable(param.name, str(param.dtype), None) + self._sources[param.name] = [None] + + # Datatype + self.datatype = list(self.variables.values())[0][0] + + # Flops + operationCount = countNumberOfOperations(listOfEquations) + self._flops = { + '+': operationCount['adds'], + '*': operationCount['muls'], + '/': operationCount['divs'], + } + + self.check() + + +# ------------------------------------------- Helper functions --------------------------------------------------------- + + +def searchResolvedFieldAccessesInAst(ast): + def visit(node, reads, writes): + if not isinstance(node, SympyAssignment): + for a in node.args: + visit(a, reads, writes) + return + + for expr, accesses in [(node.lhs, writes), (node.rhs, reads)]: + accesses.update(expr.atoms(ResolvedFieldAccess)) + + readAccesses = set() + writeAccesses = set() + visit(ast, readAccesses, writeAccesses) + return readAccesses, writeAccesses \ No newline at end of file diff --git a/runhelper/parameterstudy.py b/runhelper/parameterstudy.py index 5923d6030bc59f811cd3590007407afb6eca1989..f3599caf12bf6d6e2c72089864b11bfd7b578b52 100644 --- a/runhelper/parameterstudy.py +++ b/runhelper/parameterstudy.py @@ -7,18 +7,12 @@ from copy import deepcopy from collections import namedtuple from time import sleep from pystencils.runhelper import Database +from pystencils.utils import DotDict class ParameterStudy(object): - Run = namedtuple("Run", ['parameterDict', 'weight']) - class DotDict(dict): - """Normal dict with additional dot access for all keys""" - __getattr__ = dict.get - __setattr__ = dict.__setitem__ - __delattr__ = dict.__delitem__ - def __init__(self, runFunction, listOfRuns=[], databaseFile='./db'): self.listOfRuns = listOfRuns self.runFunction = runFunction @@ -35,7 +29,7 @@ class ParameterStudy(object): for valueTuple in itertools.product(*parameterValues): paramsDict = deepcopy(defaultParamsDict) paramsDict.update({name: value for name, value in zip(parameterNames, valueTuple)}) - params = self.DotDict(paramsDict) + params = DotDict(paramsDict) if filterFunction: params = filterFunction(params) if params is None: diff --git a/transformations.py b/transformations.py index ce3a1da257f72822003482e84439514d28769421..1c9732eb0d50715e287816fd07114695e6f432d0 100644 --- a/transformations.py +++ b/transformations.py @@ -100,7 +100,7 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None assignment = ast.SympyAssignment(ast.LoopOverCoordinate.getLoopCounterSymbol(loopCoordinate), sp.sympify(sliceComponent)) currentBody.insertFront(assignment) - return ast.KernelFunction(currentBody, fields, functionName) + return ast.KernelFunction(currentBody, functionName) def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr): @@ -231,6 +231,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn counters to index the field these symbols are used as coordinates :return: transformed AST """ + fieldToBasePointerInfo = OrderedDict(sorted(fieldToBasePointerInfo.items(), key=lambda pair: pair[0])) fieldToFixedCoordinates = OrderedDict(sorted(fieldToFixedCoordinates.items(), key=lambda pair: pair[0])) @@ -279,16 +280,17 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn coordDict = createCoordinateDict(basePointerInfo[0]) _, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer) baseArr = IndexedBase(lastPointer, shape=(1,)) - result = baseArr[offset] + result = ast.ResolvedFieldAccess(baseArr, offset, fieldAccess.field, fieldAccess.offsets, fieldAccess.index) castFunc = sp.Function("cast") if isinstance(getBaseType(fieldAccess.field.dtype), StructType): newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0]) result = castFunc(result, newType) - return visitSympyExpr(result, enclosingBlock, sympyAssignment) else: - newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args] + if isinstance(expr, ast.ResolvedFieldAccess): + return expr + newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args] kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {} return expr.func(*newArgs, **kwargs) if newArgs else expr diff --git a/types.py b/types.py index 43373f0cb4e63a69d3481f6574a4fb76b79209ed..7c204cdcb423b7b5dfc965a63b540b68e88c31fb 100644 --- a/types.py +++ b/types.py @@ -6,8 +6,8 @@ from sympy.core.cache import cacheit class TypedSymbol(sp.Symbol): - def __new__(cls, name, *args, **kwds): - obj = TypedSymbol.__xnew_cached_(cls, name, *args, **kwds) + def __new__(cls, *args, **kwds): + obj = TypedSymbol.__xnew_cached_(cls, *args, **kwds) return obj def __new_stage2__(cls, name, dtype): @@ -30,25 +30,6 @@ class TypedSymbol(sp.Symbol): return self.name, self.dtype -#class IndexedWithCast(sp.tensor.Indexed): -# def __new__(cls, base, castTo, *args): -# obj = super(IndexedWithCast, cls).__new__(cls, base, *args) -# obj._castTo = castTo -# return obj -# -# @property -# def castTo(self): -# return self._castTo -# -# def _hashable_content(self): -# superClassContents = list(super(IndexedWithCast, self)._hashable_content()) -# t = tuple(superClassContents + [hash(repr(self._castTo))]) -# return t -# -# def __getnewargs__(self): -# return self.base, self.castTo - - def createType(specification): """ Create a subclass of Type according to a string or an object of subclass Type diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a48a963faf6b4ac8e7ed6121719b2f646fed603c --- /dev/null +++ b/utils.py @@ -0,0 +1,6 @@ + +class DotDict(dict): + """Normal dict with additional dot access for all keys""" + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__