Commit 3b4deebe authored by Martin Bauer's avatar Martin Bauer
Browse files

Kerncraft coupling

- pystencils can create now a non-compilable kernel that can be
  analyzed by kerncraft
parent f1cc4051
...@@ -4,6 +4,22 @@ from pystencils.field import Field ...@@ -4,6 +4,22 @@ from pystencils.field import Field
from pystencils.types import TypedSymbol, createType, get_type_from_sympy, createTypeFromString from pystencils.types import TypedSymbol, createType, get_type_from_sympy, createTypeFromString
class ResolvedFieldAccess(sp.Indexed):
def __new__(cls, base, linearizedIndex, field, offsets, idxCoordinateValues):
obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearizedIndex)
obj.field = field
obj.offsets = offsets
obj.idxCoordinateValues = idxCoordinateValues
return obj
def _hashable_content(self):
superClassContents = super(ResolvedFieldAccess, self)._hashable_content()
return superClassContents + tuple(self.offsets) + (repr(self.idxCoordinateValues), hash(self.field))
def __getnewargs__(self):
return self.name, self.indices[0], self.field, self.offsets, self.idxCoordinateValues
class Node(object): class Node(object):
"""Base class for all AST nodes""" """Base class for all AST nodes"""
...@@ -112,14 +128,13 @@ class KernelFunction(Node): ...@@ -112,14 +128,13 @@ class KernelFunction(Node):
def __repr__(self): def __repr__(self):
return '<{0} {1}>'.format(self.dtype, self.name) return '<{0} {1}>'.format(self.dtype, self.name)
def __init__(self, body, fieldsAccessed, functionName="kernel"): def __init__(self, body, functionName="kernel"):
super(KernelFunction, self).__init__() super(KernelFunction, self).__init__()
self._body = body self._body = body
body.parent = self body.parent = self
self._parameters = None self._parameters = None
self.functionName = functionName self.functionName = functionName
self._body.parent = self self._body.parent = self
self._fieldsAccessed = fieldsAccessed
# these variables are assumed to be global, so no automatic parameter is generated for them # these variables are assumed to be global, so no automatic parameter is generated for them
self.globalVariables = set() self.globalVariables = set()
...@@ -147,7 +162,7 @@ class KernelFunction(Node): ...@@ -147,7 +162,7 @@ class KernelFunction(Node):
@property @property
def fieldsAccessed(self): def fieldsAccessed(self):
"""Set of Field instances: fields which are accessed inside this kernel function""" """Set of Field instances: fields which are accessed inside this kernel function"""
return self._fieldsAccessed return set(o.field for o in self.atoms(ResolvedFieldAccess))
def _updateParameters(self): def _updateParameters(self):
undefinedSymbols = self._body.undefinedSymbols - self.globalVariables undefinedSymbols = self._body.undefinedSymbols - self.globalVariables
......
...@@ -118,7 +118,7 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ ...@@ -118,7 +118,7 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ
loopBody.append(assignment) loopBody.append(assignment)
functionBody = Block([loopNode]) functionBody = Block([loopNode])
ast = KernelFunction(functionBody, allFields, functionName) ast = KernelFunction(functionBody, functionName)
fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields} fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields}
resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping) resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping)
......
...@@ -34,7 +34,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, ...@@ -34,7 +34,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None,
block = Block(assignments) block = Block(assignments)
block = indexing.guard(block, commonShape) block = indexing.guard(block, commonShape)
ast = KernelFunction(block, allFields, functionName) ast = KernelFunction(block, functionName)
ast.globalVariables.update(indexing.indexVariables) ast.globalVariables.update(indexing.indexVariables)
coordMapping = indexing.coordinates coordMapping = indexing.coordinates
...@@ -91,7 +91,7 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel" ...@@ -91,7 +91,7 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
functionBody = Block(coordinateSymbolAssignments + assignments) functionBody = Block(coordinateSymbolAssignments + assignments)
functionBody = indexing.guard(functionBody, getCommonShape(indexFields)) functionBody = indexing.guard(functionBody, getCommonShape(indexFields))
ast = KernelFunction(functionBody, allFields, functionName) ast = KernelFunction(functionBody, functionName)
ast.globalVariables.update(indexing.indexVariables) ast.globalVariables.update(indexing.indexVariables)
coordMapping = indexing.coordinates coordMapping = indexing.coordinates
......
#include <stdlib.h>
#define M 512*512
#define N 512*512
void twodfivept(double s, int n, double a[M][n], double b[M][n]) {
for (int j = 1; j < (M - 1); ++j)
for (int i = 1; i < (n - 1); ++i)
b[j][i] = (a[j][i-1] + a[j][i+1] + a[j-1][i] + a[j+1][i]) * (s ? a[j][i-1] > 0 : 1.0);
}
int main(int argc, char **argv)
{
double a[M][N];
double b[M][N];
for(int i=0; i<M; ++i)
for(int j=0; j<N; ++j)
a[i][j] = b[i][j] = 42.0;
twodfivept(0.23, N, a, b);
return 0;
}
general:
analysis:
tool: LLVM (3.9.0svn)
build: Apr 19 2016 (14:38:27)
kernel:
file: 2d-5pt.c_compilable.c
function: twodfivept
scop number: 0
arrays:
a:
type: double
dimension: [*, n]
b:
type: double
dimension: [*, n]
loops:
-
index: i
start: 0
stop: 262142
step: 1
-
index: j
start: 0
stop: (-2+n)
step: 1
data sources:
a:
- [1+i, j]
- [1+i, 2+j]
- [i, 1+j]
- [2+i, 1+j]
data destinations:
b:
- [1+i, 1+j]
flops:
"+": 3
"*": 1
"f+": 4
"f*": 2
arrays:
a:
type: double
dimension: [30, 50]
b:
type: double
dimension: [30, 50]
loops:
-
index: i
start: 0
stop: 262142
step: 1
-
index: j
start: 0
stop: (-2+20)
step: 1
data sources:
a:
- [1+i, j]
- [1+i, 2+j]
- [i, 1+j]
- [2+i, 1+j]
data destinations:
b:
- [1+i, 1+j]
flops:
"+": 3
"*": 1
from .kernel import PyStencilsKerncraftKernel, KerncraftParameters
import kerncraft import sympy as sp
from kerncraft.kernel import KernelCode from collections import defaultdict
from kernel import KernelDescription import kerncraft.kernel
from pystencils.astnodes import LoopOverCoordinate
from pystencils.cpu import createKernel from pystencils.cpu import createKernel
from pystencils.field import getLayoutFromStrides
from pystencils.sympyextensions import countNumberOfOperations
from pystencils.transformations import typeAllEquations from pystencils.transformations import typeAllEquations
from pystencils import Field from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess
from collections import defaultdict from pystencils.field import Field, getLayoutFromStrides
from pystencils.sympyextensions import countNumberOfOperations, prod, countNumberOfOperationsInAst
from pystencils.utils import DotDict
class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
"""
Implementation of kerncraft's kernel interface for pystencils CPU kernels.
Analyses a list of equations assuming they will be executed on a CPU
"""
def __init__(self, ast):
super(PyStencilsKerncraftKernel, self).__init__()
self.ast = ast
# Loops
innerLoops = [l for l in ast.atoms(LoopOverCoordinate) if l.isInnermostLoop]
if len(innerLoops) == 0:
raise ValueError("No loop found in pystencils AST")
elif len(innerLoops) > 1:
raise ValueError("pystencils AST contains multiple inner loops - only one can be analyzed")
else:
innerLoop = innerLoops[0]
self._loop_stack = []
curNode = innerLoop
while curNode is not None:
if isinstance(curNode, LoopOverCoordinate):
loopCounterSym = curNode.loopCounterSymbol
loopInfo = (loopCounterSym.name, curNode.start, curNode.stop, curNode.step)
self._loop_stack.append(loopInfo)
curNode = curNode.parent
self._loop_stack = list(reversed(self._loop_stack))
# Data sources & destinations
self._sources = defaultdict(list)
self._destinations = defaultdict(list)
reads, writes = searchResolvedFieldAccessesInAst(innerLoop)
for accesses, targetDict in [(reads, self._sources), (writes, self._destinations)]:
for fa in accesses:
coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i), positive=True) + off
for i, off in enumerate(fa.offsets)]
coord += list(fa.idxCoordinateValues)
layout = getLayoutFromStrides(fa.field.strides)
permutedCoord = [coord[i] for i in layout]
targetDict[fa.field.name].append(permutedCoord)
# Variables (arrays)
fieldsAccessed = ast.fieldsAccessed
for field in fieldsAccessed:
layout = getLayoutFromStrides(field.strides)
permutedShape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), permutedShape)
for param in ast.parameters:
if not param.isFieldArgument:
self.set_variable(param.name, str(param.dtype), None)
self._sources[param.name] = [None]
# data type
self.datatype = list(self.variables.values())[0][0]
# flops
operationCount = countNumberOfOperationsInAst(innerLoop)
self._flops = {
'+': operationCount['adds'],
'*': operationCount['muls'],
'/': operationCount['divs'],
}
self.check()
class KerncraftParameters(DotDict):
def __init__(self):
self['asm_block'] = 'auto'
self['asm_increment'] = 0
self['cores'] = 1
self['cache_predictor'] = 'SIM'
self['verbose'] = 0
class PyStencilsKerncraftKernelOld(kerncraft.kernel.Kernel):
"""
Implementation of kerncraft's kernel interface for pystencils CPU kernels.
Analyses a list of equations assuming they will be executed on a CPU
- right now it uses only the equations, not the optimized form e.g. expressions
that have been pulled in front of the loop
-
"""
def __init__(self, listOfEquations, typeForSymbol=None): def __init__(self, listOfEquations, typeForSymbol=None):
super(PyStencilsKerncraftKernel, self).__init__() super(PyStencilsKerncraftKernel, self).__init__()
pystencilsAst = createKernel(listOfEquations, typeForSymbol=typeForSymbol) ast = createKernel(listOfEquations, typeForSymbol=typeForSymbol)
self.ast = pystencilsAst self.ast = ast
fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol) fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
allFields = fieldsRead.union(fieldsWritten) allFields = fieldsRead.union(fieldsWritten)
# Loops # Loops
innerLoops = [l for l in pystencilsAst.atoms(LoopOverCoordinate) if l.isInnermostLoop] innerLoops = [l for l in ast.atoms(LoopOverCoordinate) if l.isInnermostLoop]
if len(innerLoops) == 0: if len(innerLoops) == 0:
raise ValueError("No loop found in pystencils AST") raise ValueError("No loop found in pystencils AST")
elif len(innerLoops) > 1: elif len(innerLoops) > 1:
...@@ -44,7 +128,8 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): ...@@ -44,7 +128,8 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
for eq in listOfEquations: for eq in listOfEquations:
for accessesDict, expr in [(self._destinations, eq.lhs), (self._sources, eq.rhs)]: for accessesDict, expr in [(self._destinations, eq.lhs), (self._sources, eq.rhs)]:
for fa in expr.atoms(Field.Access): for fa in expr.atoms(Field.Access):
coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i)) + off for i, off in enumerate(fa.offsets)] coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i), positive=True) + off
for i, off in enumerate(fa.offsets)]
coord += list(fa.index) coord += list(fa.index)
layout = getLayoutFromStrides(fa.field.strides) layout = getLayoutFromStrides(fa.field.strides)
permutedCoord = [coord[i] for i in layout] permutedCoord = [coord[i] for i in layout]
...@@ -55,7 +140,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): ...@@ -55,7 +140,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
layout = getLayoutFromStrides(field.strides) layout = getLayoutFromStrides(field.strides)
permutedShape = list(field.shape[i] for i in layout) permutedShape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), permutedShape) self.set_variable(field.name, str(field.dtype), permutedShape)
for param in pystencilsAst.parameters: for param in ast.parameters:
if not param.isFieldArgument: if not param.isFieldArgument:
self.set_variable(param.name, str(param.dtype), None) self.set_variable(param.name, str(param.dtype), None)
self._sources[param.name] = [None] self._sources[param.name] = [None]
...@@ -73,48 +158,21 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel): ...@@ -73,48 +158,21 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
self.check() self.check()
from kerncraft.iaca_marker import find_asm_blocks, userselect_block, select_best_block
from kerncraft.models import ECM, ECMData # ------------------------------------------- Helper functions ---------------------------------------------------------
from kerncraft.machinemodel import MachineModel
from ruamel import yaml
def searchResolvedFieldAccessesInAst(ast):
if __name__ == "__main__": def visit(node, reads, writes):
from pystencils import Field if not isinstance(node, SympyAssignment):
import sympy as sp for a in node.args:
import numpy as np visit(a, reads, writes)
from pystencils.cpu import generateC return
arr = np.zeros([80, 40], order='c') for expr, accesses in [(node.lhs, writes), (node.rhs, reads)]:
#arr = np.zeros([40, 80, 3], order='f') accesses.update(expr.atoms(ResolvedFieldAccess))
a = Field.createFromNumpyArray('a', arr, indexDimensions=0)
b = Field.createFromNumpyArray('b', arr, indexDimensions=0) readAccesses = set()
writeAccesses = set()
s = sp.Symbol("s") visit(ast, readAccesses, writeAccesses)
rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0] return readAccesses, writeAccesses
updateRule = sp.Eq(b[0, 0], s*rhs) \ No newline at end of file
k = PyStencilsKerncraftKernel([updateRule])
print(generateC(k.ast))
kernelFile = "2d-5pt.c"
#k = KernelCode(open("/home/martin/dev/kerncraft/examples/kernels/" + kernelFile).read())]
descr = yaml.load(open("/home/martin/dev/pystencils/pystencils/kerncraft/2d-5pt.yml").read())
k = KernelDescription(descr)
k.print_kernel_info()
k.print_variables_info()
offsets = list(k.compile_global_offsets(1000))
print(offsets)
machineFilePath = "/home/martin/dev/kerncraft/examples/machine-files/emmy.yaml"
machine = MachineModel(path_to_yaml=machineFilePath)
#exit(0)
from kerncraft.kerncraft import create_parser
parser = create_parser()
parserArgs = parser.parse_args(["-m", machineFilePath, "-p", "ECMData", machineFilePath])
model = ECMData(k, machine, parserArgs)
model.analyze()
model.report()
#blocks = find_asm_blocks(open("/home/martin/dev/kerncraft/2d-5pt.c_compilable.s").readlines())
#userselect_block(blocks)
##select_
#bestBlock = select_best_block(blocks)
#print(bestBlock)
...@@ -7,18 +7,12 @@ from copy import deepcopy ...@@ -7,18 +7,12 @@ from copy import deepcopy
from collections import namedtuple from collections import namedtuple
from time import sleep from time import sleep
from pystencils.runhelper import Database from pystencils.runhelper import Database
from pystencils.utils import DotDict
class ParameterStudy(object): class ParameterStudy(object):
Run = namedtuple("Run", ['parameterDict', 'weight']) Run = namedtuple("Run", ['parameterDict', 'weight'])
class DotDict(dict):
"""Normal dict with additional dot access for all keys"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __init__(self, runFunction, listOfRuns=[], databaseFile='./db'): def __init__(self, runFunction, listOfRuns=[], databaseFile='./db'):
self.listOfRuns = listOfRuns self.listOfRuns = listOfRuns
self.runFunction = runFunction self.runFunction = runFunction
...@@ -35,7 +29,7 @@ class ParameterStudy(object): ...@@ -35,7 +29,7 @@ class ParameterStudy(object):
for valueTuple in itertools.product(*parameterValues): for valueTuple in itertools.product(*parameterValues):
paramsDict = deepcopy(defaultParamsDict) paramsDict = deepcopy(defaultParamsDict)
paramsDict.update({name: value for name, value in zip(parameterNames, valueTuple)}) paramsDict.update({name: value for name, value in zip(parameterNames, valueTuple)})
params = self.DotDict(paramsDict) params = DotDict(paramsDict)
if filterFunction: if filterFunction:
params = filterFunction(params) params = filterFunction(params)
if params is None: if params is None:
......
...@@ -100,7 +100,7 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None ...@@ -100,7 +100,7 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
assignment = ast.SympyAssignment(ast.LoopOverCoordinate.getLoopCounterSymbol(loopCoordinate), assignment = ast.SympyAssignment(ast.LoopOverCoordinate.getLoopCounterSymbol(loopCoordinate),
sp.sympify(sliceComponent)) sp.sympify(sliceComponent))
currentBody.insertFront(assignment) currentBody.insertFront(assignment)
return ast.KernelFunction(currentBody, fields, functionName) return ast.KernelFunction(currentBody, functionName)
def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr): def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr):
...@@ -231,6 +231,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn ...@@ -231,6 +231,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
counters to index the field these symbols are used as coordinates counters to index the field these symbols are used as coordinates
:return: transformed AST :return: transformed AST
""" """
fieldToBasePointerInfo = OrderedDict(sorted(fieldToBasePointerInfo.items(), key=lambda pair: pair[0])) fieldToBasePointerInfo = OrderedDict(sorted(fieldToBasePointerInfo.items(), key=lambda pair: pair[0]))
fieldToFixedCoordinates = OrderedDict(sorted(fieldToFixedCoordinates.items(), key=lambda pair: pair[0])) fieldToFixedCoordinates = OrderedDict(sorted(fieldToFixedCoordinates.items(), key=lambda pair: pair[0]))
...@@ -279,16 +280,17 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn ...@@ -279,16 +280,17 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
coordDict = createCoordinateDict(basePointerInfo[0]) coordDict = createCoordinateDict(basePointerInfo[0])
_, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer) _, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer)
baseArr = IndexedBase(lastPointer, shape=(1,)) baseArr = IndexedBase(lastPointer, shape=(1,))
result = baseArr[offset] result = ast.ResolvedFieldAccess(baseArr, offset, fieldAccess.field, fieldAccess.offsets, fieldAccess.index)
castFunc = sp.Function("cast") castFunc = sp.Function("cast")
if isinstance(getBaseType(fieldAccess.field.dtype), StructType): if isinstance(getBaseType(fieldAccess.field.dtype), StructType):
newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0]) newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0])
result = castFunc(result, newType) result = castFunc(result, newType)
return visitSympyExpr(result, enclosingBlock, sympyAssignment) return visitSympyExpr(result, enclosingBlock, sympyAssignment)
else: else:
newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args] if isinstance(expr, ast.ResolvedFieldAccess):
return expr
newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args]
kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {} kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {}
return expr.func(*newArgs, **kwargs) if newArgs else expr return expr.func(*newArgs, **kwargs) if newArgs else expr
......
...@@ -6,8 +6,8 @@ from sympy.core.cache import cacheit ...@@ -6,8 +6,8 @@ from sympy.core.cache import cacheit
class TypedSymbol(sp.Symbol): class TypedSymbol(sp.Symbol):
def __new__(cls, name, *args, **kwds): def __new__(cls, *args, **kwds):
obj = TypedSymbol.__xnew_cached_(cls, name, *args, **kwds) obj = TypedSymbol.__xnew_cached_(cls, *args, **kwds)
return obj return obj
def __new_stage2__(cls, name, dtype): def __new_stage2__(cls, name, dtype):
...@@ -30,25 +30,6 @@ class TypedSymbol(sp.Symbol): ...@@ -30,25 +30,6 @@ class TypedSymbol(sp.Symbol):
return self.name, self.dtype return self.name, self.dtype
#class IndexedWithCast(sp.tensor.Indexed):
# def __new__(cls, base, castTo, *args):
# obj = super(IndexedWithCast, cls).__new__(cls, base, *args)
# obj._castTo = castTo
# return obj
#
# @property
# def castTo(self):
# return self._castTo
#
# def _hashable_content(self):
# superClassContents = list(super(IndexedWithCast, self)._hashable_content())
# t = tuple(superClassContents + [hash(repr(self._castTo))])
# return t
#
# def __getnewargs__(self):
# return self.base, self.castTo
def createType(specification): def createType(specification):
""" """
Create a subclass of Type according to a string or an object of subclass Type Create a subclass of Type according to a string or an object of subclass Type
......
class DotDict(dict):
"""Normal dict with additional dot access for all keys"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__