Commit 3b4deebe authored by Martin Bauer's avatar Martin Bauer
Browse files

Kerncraft coupling

- pystencils can create now a non-compilable kernel that can be
  analyzed by kerncraft
parent f1cc4051
......@@ -4,6 +4,22 @@ from pystencils.field import Field
from pystencils.types import TypedSymbol, createType, get_type_from_sympy, createTypeFromString
class ResolvedFieldAccess(sp.Indexed):
def __new__(cls, base, linearizedIndex, field, offsets, idxCoordinateValues):
obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearizedIndex)
obj.field = field
obj.offsets = offsets
obj.idxCoordinateValues = idxCoordinateValues
return obj
def _hashable_content(self):
superClassContents = super(ResolvedFieldAccess, self)._hashable_content()
return superClassContents + tuple(self.offsets) + (repr(self.idxCoordinateValues), hash(self.field))
def __getnewargs__(self):
return self.name, self.indices[0], self.field, self.offsets, self.idxCoordinateValues
class Node(object):
"""Base class for all AST nodes"""
......@@ -112,14 +128,13 @@ class KernelFunction(Node):
def __repr__(self):
return '<{0} {1}>'.format(self.dtype, self.name)
def __init__(self, body, fieldsAccessed, functionName="kernel"):
def __init__(self, body, functionName="kernel"):
super(KernelFunction, self).__init__()
self._body = body
body.parent = self
self._parameters = None
self.functionName = functionName
self._body.parent = self
self._fieldsAccessed = fieldsAccessed
# these variables are assumed to be global, so no automatic parameter is generated for them
self.globalVariables = set()
......@@ -147,7 +162,7 @@ class KernelFunction(Node):
@property
def fieldsAccessed(self):
"""Set of Field instances: fields which are accessed inside this kernel function"""
return self._fieldsAccessed
return set(o.field for o in self.atoms(ResolvedFieldAccess))
def _updateParameters(self):
undefinedSymbols = self._body.undefinedSymbols - self.globalVariables
......
......@@ -118,7 +118,7 @@ def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typ
loopBody.append(assignment)
functionBody = Block([loopNode])
ast = KernelFunction(functionBody, allFields, functionName)
ast = KernelFunction(functionBody, functionName)
fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields}
resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping)
......
......@@ -34,7 +34,7 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None,
block = Block(assignments)
block = indexing.guard(block, commonShape)
ast = KernelFunction(block, allFields, functionName)
ast = KernelFunction(block, functionName)
ast.globalVariables.update(indexing.indexVariables)
coordMapping = indexing.coordinates
......@@ -91,7 +91,7 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
functionBody = Block(coordinateSymbolAssignments + assignments)
functionBody = indexing.guard(functionBody, getCommonShape(indexFields))
ast = KernelFunction(functionBody, allFields, functionName)
ast = KernelFunction(functionBody, functionName)
ast.globalVariables.update(indexing.indexVariables)
coordMapping = indexing.coordinates
......
#include <stdlib.h>
#define M 512*512
#define N 512*512
void twodfivept(double s, int n, double a[M][n], double b[M][n]) {
for (int j = 1; j < (M - 1); ++j)
for (int i = 1; i < (n - 1); ++i)
b[j][i] = (a[j][i-1] + a[j][i+1] + a[j-1][i] + a[j+1][i]) * (s ? a[j][i-1] > 0 : 1.0);
}
int main(int argc, char **argv)
{
double a[M][N];
double b[M][N];
for(int i=0; i<M; ++i)
for(int j=0; j<N; ++j)
a[i][j] = b[i][j] = 42.0;
twodfivept(0.23, N, a, b);
return 0;
}
general:
analysis:
tool: LLVM (3.9.0svn)
build: Apr 19 2016 (14:38:27)
kernel:
file: 2d-5pt.c_compilable.c
function: twodfivept
scop number: 0
arrays:
a:
type: double
dimension: [*, n]
b:
type: double
dimension: [*, n]
loops:
-
index: i
start: 0
stop: 262142
step: 1
-
index: j
start: 0
stop: (-2+n)
step: 1
data sources:
a:
- [1+i, j]
- [1+i, 2+j]
- [i, 1+j]
- [2+i, 1+j]
data destinations:
b:
- [1+i, 1+j]
flops:
"+": 3
"*": 1
"f+": 4
"f*": 2
arrays:
a:
type: double
dimension: [30, 50]
b:
type: double
dimension: [30, 50]
loops:
-
index: i
start: 0
stop: 262142
step: 1
-
index: j
start: 0
stop: (-2+20)
step: 1
data sources:
a:
- [1+i, j]
- [1+i, 2+j]
- [i, 1+j]
- [2+i, 1+j]
data destinations:
b:
- [1+i, 1+j]
flops:
"+": 3
"*": 1
from .kernel import PyStencilsKerncraftKernel, KerncraftParameters
import kerncraft
from kerncraft.kernel import KernelCode
from kernel import KernelDescription
from pystencils.astnodes import LoopOverCoordinate
import sympy as sp
from collections import defaultdict
import kerncraft.kernel
from pystencils.cpu import createKernel
from pystencils.field import getLayoutFromStrides
from pystencils.sympyextensions import countNumberOfOperations
from pystencils.transformations import typeAllEquations
from pystencils import Field
from collections import defaultdict
from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess
from pystencils.field import Field, getLayoutFromStrides
from pystencils.sympyextensions import countNumberOfOperations, prod, countNumberOfOperationsInAst
from pystencils.utils import DotDict
class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
"""
Implementation of kerncraft's kernel interface for pystencils CPU kernels.
Analyses a list of equations assuming they will be executed on a CPU
"""
def __init__(self, ast):
super(PyStencilsKerncraftKernel, self).__init__()
self.ast = ast
# Loops
innerLoops = [l for l in ast.atoms(LoopOverCoordinate) if l.isInnermostLoop]
if len(innerLoops) == 0:
raise ValueError("No loop found in pystencils AST")
elif len(innerLoops) > 1:
raise ValueError("pystencils AST contains multiple inner loops - only one can be analyzed")
else:
innerLoop = innerLoops[0]
self._loop_stack = []
curNode = innerLoop
while curNode is not None:
if isinstance(curNode, LoopOverCoordinate):
loopCounterSym = curNode.loopCounterSymbol
loopInfo = (loopCounterSym.name, curNode.start, curNode.stop, curNode.step)
self._loop_stack.append(loopInfo)
curNode = curNode.parent
self._loop_stack = list(reversed(self._loop_stack))
# Data sources & destinations
self._sources = defaultdict(list)
self._destinations = defaultdict(list)
reads, writes = searchResolvedFieldAccessesInAst(innerLoop)
for accesses, targetDict in [(reads, self._sources), (writes, self._destinations)]:
for fa in accesses:
coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i), positive=True) + off
for i, off in enumerate(fa.offsets)]
coord += list(fa.idxCoordinateValues)
layout = getLayoutFromStrides(fa.field.strides)
permutedCoord = [coord[i] for i in layout]
targetDict[fa.field.name].append(permutedCoord)
# Variables (arrays)
fieldsAccessed = ast.fieldsAccessed
for field in fieldsAccessed:
layout = getLayoutFromStrides(field.strides)
permutedShape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), permutedShape)
for param in ast.parameters:
if not param.isFieldArgument:
self.set_variable(param.name, str(param.dtype), None)
self._sources[param.name] = [None]
# data type
self.datatype = list(self.variables.values())[0][0]
# flops
operationCount = countNumberOfOperationsInAst(innerLoop)
self._flops = {
'+': operationCount['adds'],
'*': operationCount['muls'],
'/': operationCount['divs'],
}
self.check()
class KerncraftParameters(DotDict):
def __init__(self):
self['asm_block'] = 'auto'
self['asm_increment'] = 0
self['cores'] = 1
self['cache_predictor'] = 'SIM'
self['verbose'] = 0
class PyStencilsKerncraftKernelOld(kerncraft.kernel.Kernel):
"""
Implementation of kerncraft's kernel interface for pystencils CPU kernels.
Analyses a list of equations assuming they will be executed on a CPU
- right now it uses only the equations, not the optimized form e.g. expressions
that have been pulled in front of the loop
-
"""
def __init__(self, listOfEquations, typeForSymbol=None):
super(PyStencilsKerncraftKernel, self).__init__()
pystencilsAst = createKernel(listOfEquations, typeForSymbol=typeForSymbol)
self.ast = pystencilsAst
ast = createKernel(listOfEquations, typeForSymbol=typeForSymbol)
self.ast = ast
fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
allFields = fieldsRead.union(fieldsWritten)
# Loops
innerLoops = [l for l in pystencilsAst.atoms(LoopOverCoordinate) if l.isInnermostLoop]
innerLoops = [l for l in ast.atoms(LoopOverCoordinate) if l.isInnermostLoop]
if len(innerLoops) == 0:
raise ValueError("No loop found in pystencils AST")
elif len(innerLoops) > 1:
......@@ -44,7 +128,8 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
for eq in listOfEquations:
for accessesDict, expr in [(self._destinations, eq.lhs), (self._sources, eq.rhs)]:
for fa in expr.atoms(Field.Access):
coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i)) + off for i, off in enumerate(fa.offsets)]
coord = [sp.Symbol(LoopOverCoordinate.getLoopCounterName(i), positive=True) + off
for i, off in enumerate(fa.offsets)]
coord += list(fa.index)
layout = getLayoutFromStrides(fa.field.strides)
permutedCoord = [coord[i] for i in layout]
......@@ -55,7 +140,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
layout = getLayoutFromStrides(field.strides)
permutedShape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), permutedShape)
for param in pystencilsAst.parameters:
for param in ast.parameters:
if not param.isFieldArgument:
self.set_variable(param.name, str(param.dtype), None)
self._sources[param.name] = [None]
......@@ -73,48 +158,21 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.Kernel):
self.check()
from kerncraft.iaca_marker import find_asm_blocks, userselect_block, select_best_block
from kerncraft.models import ECM, ECMData
from kerncraft.machinemodel import MachineModel
from ruamel import yaml
if __name__ == "__main__":
from pystencils import Field
import sympy as sp
import numpy as np
from pystencils.cpu import generateC
arr = np.zeros([80, 40], order='c')
#arr = np.zeros([40, 80, 3], order='f')
a = Field.createFromNumpyArray('a', arr, indexDimensions=0)
b = Field.createFromNumpyArray('b', arr, indexDimensions=0)
s = sp.Symbol("s")
rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
updateRule = sp.Eq(b[0, 0], s*rhs)
k = PyStencilsKerncraftKernel([updateRule])
print(generateC(k.ast))
kernelFile = "2d-5pt.c"
#k = KernelCode(open("/home/martin/dev/kerncraft/examples/kernels/" + kernelFile).read())]
descr = yaml.load(open("/home/martin/dev/pystencils/pystencils/kerncraft/2d-5pt.yml").read())
k = KernelDescription(descr)
k.print_kernel_info()
k.print_variables_info()
offsets = list(k.compile_global_offsets(1000))
print(offsets)
machineFilePath = "/home/martin/dev/kerncraft/examples/machine-files/emmy.yaml"
machine = MachineModel(path_to_yaml=machineFilePath)
#exit(0)
from kerncraft.kerncraft import create_parser
parser = create_parser()
parserArgs = parser.parse_args(["-m", machineFilePath, "-p", "ECMData", machineFilePath])
model = ECMData(k, machine, parserArgs)
model.analyze()
model.report()
#blocks = find_asm_blocks(open("/home/martin/dev/kerncraft/2d-5pt.c_compilable.s").readlines())
#userselect_block(blocks)
##select_
#bestBlock = select_best_block(blocks)
#print(bestBlock)
# ------------------------------------------- Helper functions ---------------------------------------------------------
def searchResolvedFieldAccessesInAst(ast):
def visit(node, reads, writes):
if not isinstance(node, SympyAssignment):
for a in node.args:
visit(a, reads, writes)
return
for expr, accesses in [(node.lhs, writes), (node.rhs, reads)]:
accesses.update(expr.atoms(ResolvedFieldAccess))
readAccesses = set()
writeAccesses = set()
visit(ast, readAccesses, writeAccesses)
return readAccesses, writeAccesses
\ No newline at end of file
......@@ -7,18 +7,12 @@ from copy import deepcopy
from collections import namedtuple
from time import sleep
from pystencils.runhelper import Database
from pystencils.utils import DotDict
class ParameterStudy(object):
Run = namedtuple("Run", ['parameterDict', 'weight'])
class DotDict(dict):
"""Normal dict with additional dot access for all keys"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __init__(self, runFunction, listOfRuns=[], databaseFile='./db'):
self.listOfRuns = listOfRuns
self.runFunction = runFunction
......@@ -35,7 +29,7 @@ class ParameterStudy(object):
for valueTuple in itertools.product(*parameterValues):
paramsDict = deepcopy(defaultParamsDict)
paramsDict.update({name: value for name, value in zip(parameterNames, valueTuple)})
params = self.DotDict(paramsDict)
params = DotDict(paramsDict)
if filterFunction:
params = filterFunction(params)
if params is None:
......
......@@ -100,7 +100,7 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
assignment = ast.SympyAssignment(ast.LoopOverCoordinate.getLoopCounterSymbol(loopCoordinate),
sp.sympify(sliceComponent))
currentBody.insertFront(assignment)
return ast.KernelFunction(currentBody, fields, functionName)
return ast.KernelFunction(currentBody, functionName)
def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr):
......@@ -231,6 +231,7 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
counters to index the field these symbols are used as coordinates
:return: transformed AST
"""
fieldToBasePointerInfo = OrderedDict(sorted(fieldToBasePointerInfo.items(), key=lambda pair: pair[0]))
fieldToFixedCoordinates = OrderedDict(sorted(fieldToFixedCoordinates.items(), key=lambda pair: pair[0]))
......@@ -279,16 +280,17 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
coordDict = createCoordinateDict(basePointerInfo[0])
_, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer)
baseArr = IndexedBase(lastPointer, shape=(1,))
result = baseArr[offset]
result = ast.ResolvedFieldAccess(baseArr, offset, fieldAccess.field, fieldAccess.offsets, fieldAccess.index)
castFunc = sp.Function("cast")
if isinstance(getBaseType(fieldAccess.field.dtype), StructType):
newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0])
result = castFunc(result, newType)
return visitSympyExpr(result, enclosingBlock, sympyAssignment)
else:
newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args]
if isinstance(expr, ast.ResolvedFieldAccess):
return expr
newArgs = [visitSympyExpr(e, enclosingBlock, sympyAssignment) for e in expr.args]
kwargs = {'evaluate': False} if type(expr) in (sp.Add, sp.Mul, sp.Piecewise) else {}
return expr.func(*newArgs, **kwargs) if newArgs else expr
......
......@@ -6,8 +6,8 @@ from sympy.core.cache import cacheit
class TypedSymbol(sp.Symbol):
def __new__(cls, name, *args, **kwds):
obj = TypedSymbol.__xnew_cached_(cls, name, *args, **kwds)
def __new__(cls, *args, **kwds):
obj = TypedSymbol.__xnew_cached_(cls, *args, **kwds)
return obj
def __new_stage2__(cls, name, dtype):
......@@ -30,25 +30,6 @@ class TypedSymbol(sp.Symbol):
return self.name, self.dtype
#class IndexedWithCast(sp.tensor.Indexed):
# def __new__(cls, base, castTo, *args):
# obj = super(IndexedWithCast, cls).__new__(cls, base, *args)
# obj._castTo = castTo
# return obj
#
# @property
# def castTo(self):
# return self._castTo
#
# def _hashable_content(self):
# superClassContents = list(super(IndexedWithCast, self)._hashable_content())
# t = tuple(superClassContents + [hash(repr(self._castTo))])
# return t
#
# def __getnewargs__(self):
# return self.base, self.castTo
def createType(specification):
"""
Create a subclass of Type according to a string or an object of subclass Type
......
class DotDict(dict):
"""Normal dict with additional dot access for all keys"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment