Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 2016 additions and 1374 deletions
import sympy as sp
from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction
from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, typingFromSympyInspection, \
typeAllEquations, getOptimalLoopOrdering, parseBasePointerInfo, moveConstantsBeforeLoop, splitInnerLoop, insertCasts#, \
#desympy_ast, insert_casts
from pystencils.data_types import TypedSymbol, BasicType, StructType
from pystencils.field import Field
import pystencils.astnodes as ast
from functools import partial
from pystencils.llvm.llvmjit import makePythonFunction
def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, splitGroups=(),
iterationSlice=None, ghostLayers=None):
"""
Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
Loops are created according to the field accesses in the equations.
:param listOfEquations: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
Defining the update rules of the kernel
:param functionName: name of the generated function - only important if generated code is written out
:param typeForSymbol: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
be of type 'double' except symbols which occur on the left hand side of equations where the
right hand side is a sympy Boolean which are assumed to be 'bool' .
:param splitGroups: Specification on how to split up inner loop into multiple loops. For details see
transformation :func:`pystencils.transformation.splitInnerLoop`
:param iterationSlice: if not None, iteration is done only over this slice of the field
:param ghostLayers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
if None, the number of ghost layers is determined automatically and assumed to be equal for a
all dimensions
:return: :class:`pystencils.ast.KernelFunction` node
"""
#if not typeForSymbol:
# typeForSymbol = typingFromSympyInspection(listOfEquations, "double")
#
#def typeSymbol(term):
# if isinstance(term, Field.Access) or isinstance(term, TypedSymbol):
# return term
# elif isinstance(term, sp.Symbol):
# return TypedSymbol(term.name, typeForSymbol[term.name])
# else:
# raise ValueError("Term has to be field access or symbol")
#
#fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
#allFields = fieldsRead.union(fieldsWritten)
#
#readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
#
#body = ast.Block(assignments)
#loopOrder = getOptimalLoopOrdering(allFields)
#code = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice,
# ghostLayers=ghostLayers, loopOrder=loopOrder)
#
#if splitGroups:
# typedSplitGroups = [[typeSymbol(s) for s in splitGroup] for splitGroup in splitGroups]
# splitInnerLoop(code, typedSplitGroups)
#
#basePointerInfo = []
#for i in range(len(loopOrder)):
# basePointerInfo.append(['spatialInner%d' % i])
#basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields}
#
#resolveFieldAccesses(code, readOnlyFields, fieldToBasePointerInfo=basePointerInfos)
#moveConstantsBeforeLoop(code)
from pystencils.cpu import createKernel
code = createKernel(listOfEquations, functionName, typeForSymbol, splitGroups, iterationSlice, ghostLayers)
code = insertCasts(code)
code.compile = partial(makePythonFunction, code)
return code
def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typeForSymbol=None,
coordinateNames=('x', 'y', 'z')):
"""
Similar to :func:`createKernel`, but here not all cells of a field are updated but only cells with
coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
The coordinates are stored in a separated indexField, which is a one dimensional array with struct data type.
This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
'coordinateNames' parameter. The struct can have also other fields that can be read and written in the kernel, for
example boundary parameters.
:param listOfEquations: list of update equations or AST nodes
:param indexFields: list of index fields, i.e. 1D fields with struct data type
:param typeForSymbol: see documentation of :func:`createKernel`
:param functionName: see documentation of :func:`createKernel`
:param coordinateNames: name of the coordinate fields in the struct data type
:return: abstract syntax tree
"""
fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
allFields = fieldsRead.union(fieldsWritten)
for indexField in indexFields:
indexField.isIndexField = True
assert indexField.spatialDimensions == 1, "Index fields have to be 1D"
nonIndexFields = [f for f in allFields if f not in indexFields]
spatialCoordinates = {f.spatialDimensions for f in nonIndexFields}
assert len(spatialCoordinates) == 1, "Non-index fields do not have the same number of spatial coordinates"
spatialCoordinates = list(spatialCoordinates)[0]
def getCoordinateSymbolAssignment(name):
for indexField in indexFields:
assert isinstance(indexField.dtype, StructType), "Index fields have to have a struct datatype"
dataType = indexField.dtype
if dataType.hasElement(name):
rhs = indexField[0](name)
lhs = TypedSymbol(name, BasicType(dataType.getElementType(name)))
return SympyAssignment(lhs, rhs)
raise ValueError("Index %s not found in any of the passed index fields" % (name,))
coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]]
coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments]
assignments = coordinateSymbolAssignments + assignments
# make 1D loop over index fields
loopBody = Block([])
loopNode = LoopOverCoordinate(loopBody, coordinateToLoopOver=0, start=0, stop=indexFields[0].shape[0])
for assignment in assignments:
loopBody.append(assignment)
functionBody = Block([loopNode])
ast = KernelFunction(functionBody, allFields, functionName)
fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields}
resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping)
moveConstantsBeforeLoop(ast)
desympy_ast(ast)
insert_casts(ast)
ast.compile = partial(makePythonFunction, ast)
return ast
import llvmlite.ir as ir
import functools
from sympy.printing.printer import Printer
from sympy import S
# S is numbers?
from pystencils.llvm.control_flow import Loop
from pystencils.data_types import createType, to_llvm_type, getTypeOfExpression, collateTypes, \
createCompositeTypeFromString
from sympy import Indexed
from sympy.codegen.ast import Assignment
def generateLLVM(ast_node, module=None, builder=None):
"""
Prints the ast as llvm code
"""
if module is None:
module = ir.Module()
if builder is None:
builder = ir.IRBuilder()
printer = LLVMPrinter(module, builder)
return printer._print(ast_node) # TODO use doprint() instead???
class LLVMPrinter(Printer):
"""Convert expressions to LLVM IR"""
def __init__(self, module, builder, fn=None, *args, **kwargs):
self.func_arg_map = kwargs.pop("func_arg_map", {})
super(LLVMPrinter, self).__init__(*args, **kwargs)
self.fp_type = ir.DoubleType()
self.fp_pointer = self.fp_type.as_pointer()
self.integer = ir.IntType(64)
self.integer_pointer = self.integer.as_pointer()
self.void = ir.VoidType()
self.module = module
self.builder = builder
self.fn = fn
self.ext_fn = {} # keep track of wrappers to external functions
self.tmp_var = {}
def _add_tmp_var(self, name, value):
self.tmp_var[name] = value
def _remove_tmp_var(self, name):
del self.tmp_var[name]
def _print_Number(self, n):
if getTypeOfExpression(n) == createType("int"):
return ir.Constant(self.integer, int(n))
elif getTypeOfExpression(n) == createType("double"):
return ir.Constant(self.fp_type, float(n))
else:
raise NotImplementedError("Numbers can only have int and double", n)
def _print_Float(self, expr):
return ir.Constant(self.fp_type, float(expr))
def _print_Integer(self, expr):
return ir.Constant(self.integer, int(expr))
def _print_int(self, i):
return ir.Constant(self.integer, i)
def _print_Symbol(self, s):
val = self.tmp_var.get(s)
if not val:
# look up parameter with name s
val = self.func_arg_map.get(s.name)
if not val:
raise LookupError("Symbol not found: %s" % s)
return val
def _print_Pow(self, expr):
base0 = self._print(expr.base)
if expr.exp == S.NegativeOne:
return self.builder.fdiv(ir.Constant(self.fp_type, 1.0), base0)
if expr.exp == S.Half:
fn = self.ext_fn.get("sqrt")
if not fn:
fn_type = ir.FunctionType(self.fp_type, [self.fp_type])
fn = ir.Function(self.module, fn_type, "sqrt")
self.ext_fn["sqrt"] = fn
return self.builder.call(fn, [base0], "sqrt")
if expr.exp == 2:
return self.builder.fmul(base0, base0)
elif expr.exp == 3:
return self.builder.fmul(self.builder.fmul(base0, base0), base0)
exp0 = self._print(expr.exp)
fn = self.ext_fn.get("pow")
if not fn:
fn_type = ir.FunctionType(self.fp_type, [self.fp_type, self.fp_type])
fn = ir.Function(self.module, fn_type, "pow")
self.ext_fn["pow"] = fn
return self.builder.call(fn, [base0, exp0], "pow")
def _print_Mul(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
if getTypeOfExpression(expr) == createType('double'):
mul = self.builder.fmul
else: # int TODO unsigned/signed
mul = self.builder.mul
for node in nodes[1:]:
e = mul(e, node)
return e
def _print_Add(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
if getTypeOfExpression(expr) == createType('double'):
add = self.builder.fadd
else: # int TODO unsigned/signed
add = self.builder.add
for node in nodes[1:]:
e = add(e, node)
return e
def _print_Or(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
for node in nodes[1:]:
e = self.builder.or_(e, node)
return e
def _print_And(self, expr):
nodes = [self._print(a) for a in expr.args]
e = nodes[0]
for node in nodes[1:]:
e = self.builder.and_(e, node)
return e
def _print_StrictLessThan(self, expr):
return self._comparison('<', expr)
def _print_LessThan(self, expr):
return self._comparison('<=', expr)
def _print_StrictGreaterThan(self, expr):
return self._comparison('>', expr)
def _print_GreaterThan(self, expr):
return self._comparison('>=', expr)
def _print_Unequality(self, expr):
return self._comparison('!=', expr)
def _print_Equality(self, expr):
return self._comparison('==', expr)
def _comparison(self, cmpop, expr):
if collateTypes([getTypeOfExpression(arg) for arg in expr.args]) == createType('double'):
comparison = self.builder.fcmp_unordered
else:
comparison = self.builder.icmp_signed
return comparison(cmpop, self._print(expr.lhs), self._print(expr.rhs))
def _print_KernelFunction(self, function):
# KernelFunction does not posses a return type
return_type = self.void
parameter_type = []
for parameter in function.parameters:
parameter_type.append(to_llvm_type(parameter.dtype))
func_type = ir.FunctionType(return_type, tuple(parameter_type))
name = function.functionName
fn = ir.Function(self.module, func_type, name)
self.ext_fn[name] = fn
# set proper names to arguments
for i, arg in enumerate(fn.args):
arg.name = function.parameters[i].name
self.func_arg_map[function.parameters[i].name] = arg
# func.attributes.add("inlinehint")
# func.attributes.add("argmemonly")
block = fn.append_basic_block(name="entry")
self.builder = ir.IRBuilder(block) # TODO use goto_block instead
self._print(function.body)
self.builder.ret_void()
self.fn = fn
return fn
def _print_Block(self, block):
for node in block.args:
self._print(node)
def _print_LoopOverCoordinate(self, loop):
with Loop(self.builder, self._print(loop.start), self._print(loop.stop), self._print(loop.step),
loop.loopCounterName, loop.loopCounterSymbol.name) as i:
self._add_tmp_var(loop.loopCounterSymbol, i)
self._print(loop.body)
self._remove_tmp_var(loop.loopCounterSymbol)
def _print_SympyAssignment(self, assignment):
expr = self._print(assignment.rhs)
lhs = assignment.lhs
if isinstance(lhs, Indexed):
ptr = self._print(lhs.base.label)
index = self._print(lhs.args[1])
gep = self.builder.gep(ptr, [index])
return self.builder.store(expr, gep)
self.func_arg_map[assignment.lhs.name] = expr
return expr
def _print_castFunc(self, conversion):
node = self._print(conversion.args[0])
to_dtype = getTypeOfExpression(conversion)
from_dtype = getTypeOfExpression(conversion.args[0])
# (From, to)
decision = {
(createCompositeTypeFromString("int"), createCompositeTypeFromString("double")): functools.partial(
self.builder.sitofp, node, self.fp_type),
(createCompositeTypeFromString("double"), createCompositeTypeFromString("int")): functools.partial(
self.builder.fptosi, node, self.integer),
(createCompositeTypeFromString("double *"), createCompositeTypeFromString("int")): functools.partial(
self.builder.ptrtoint, node, self.integer),
(createCompositeTypeFromString("int"), createCompositeTypeFromString("double *")): functools.partial(self.builder.inttoptr, node,
self.fp_pointer),
(createCompositeTypeFromString("double * restrict"), createCompositeTypeFromString("int")): functools.partial(
self.builder.ptrtoint, node,
self.integer),
(createCompositeTypeFromString("int"),
createCompositeTypeFromString("double * restrict")): functools.partial(self.builder.inttoptr, node,
self.fp_pointer),
(createCompositeTypeFromString("double * restrict const"),
createCompositeTypeFromString("int")): functools.partial(self.builder.ptrtoint, node,
self.integer),
(createCompositeTypeFromString("int"),
createCompositeTypeFromString("double * restrict const")): functools.partial(self.builder.inttoptr, node,
self.fp_pointer),
}
# TODO float, TEST: const, restrict
# TODO bitcast, addrspacecast
# TODO unsigned/signed fills
# print([x for x in decision.keys()])
# print("Types:")
# print([(type(x), type(y)) for (x, y) in decision.keys()])
# print("Cast:")
# print((from_dtype, to_dtype))
return decision[(from_dtype, to_dtype)]()
def _print_pointerArithmeticFunc(self, pointer):
ptr = self._print(pointer.args[0])
index = self._print(pointer.args[1])
return self.builder.gep(ptr, [index])
def _print_Indexed(self, indexed):
ptr = self._print(indexed.base.label)
index = self._print(indexed.args[1])
gep = self.builder.gep(ptr, [index])
return self.builder.load(gep, name=indexed.base.label.name)
def _print_Piecewise(self, piece):
if not piece.args[-1].cond:
# We need the last conditional to be a True, otherwise the resulting
# function may not return a result.
raise ValueError("All Piecewise expressions must contain an "
"(expr, True) statement to be used as a default "
"condition. Without one, the generated "
"expression may not evaluate to anything under "
"some condition.")
if piece.has(Assignment):
raise NotImplementedError('The llvm-backend does not support assignments'
'in the Piecewise function. It is questionable'
'whether to implement it. So far there is no'
'use-case to test it.')
else:
phiData = []
after_block = self.builder.append_basic_block()
for (expr, condition) in piece.args:
if condition == True: # Don't use 'is' use '=='!
phiData.append((self._print(expr), self.builder.block))
self.builder.branch(after_block)
self.builder.position_at_end(after_block)
else:
cond = self._print(condition)
trueBlock = self.builder.append_basic_block()
falseBlock = self.builder.append_basic_block()
self.builder.cbranch(cond, trueBlock, falseBlock)
self.builder.position_at_end(trueBlock)
phiData.append((self._print(expr), trueBlock))
self.builder.branch(after_block)
self.builder.position_at_end(falseBlock)
phi = self.builder.phi(to_llvm_type(getTypeOfExpression(piece)))
for (val, block) in phiData:
phi.add_incoming(val, block)
return phi
# Should have a list of math library functions to validate this.
# TODO function calls to libs
def _print_Function(self, expr):
name = expr.func.__name__
e0 = self._print(expr.args[0])
fn = self.ext_fn.get(name)
if not fn:
fn_type = ir.FunctionType(self.fp_type, [self.fp_type])
fn = ir.Function(self.module, fn_type, name)
self.ext_fn[name] = fn
return self.builder.call(fn, [e0], name)
def emptyPrinter(self, expr):
try:
import inspect
mro = inspect.getmro(expr)
except AttributeError:
mro = "None"
raise TypeError("Unsupported type for LLVM JIT conversion: Expression:\"%s\", Type:\"%s\", MRO:%s"
% (expr, type(expr), mro))
import llvmlite.ir as ir
import llvmlite.binding as llvm
import numpy as np
import ctypes as ct
import subprocess
import shutil
from pystencils.data_types import createCompositeTypeFromString
from ..data_types import toCtypes, ctypes_from_llvm
from .llvm import generateLLVM
from ..cpu.cpujit import buildCTypeArgumentList, makePythonFunctionIncompleteParams
def generate_and_jit(ast):
gen = generateLLVM(ast)
if isinstance(gen, ir.Module):
return compileLLVM(gen)
else:
return compileLLVM(gen.module)
def makePythonFunction(ast, argumentDict={}, func=None):
if func is None:
jit = generate_and_jit(ast)
func = jit.get_function_ptr(ast.functionName)
try:
args = buildCTypeArgumentList(ast.parameters, argumentDict)
except KeyError:
# not all parameters specified yet
return makePythonFunctionIncompleteParams(ast, argumentDict, func)
return lambda: func(*args)
def compileLLVM(module):
jit = Jit()
jit.parse(module)
jit.optimize()
jit.compile()
return jit
class Jit(object):
def __init__(self):
llvm.initialize()
llvm.initialize_all_targets()
llvm.initialize_native_target()
llvm.initialize_native_asmprinter()
self.module = None
self._llvmmod = llvm.parse_assembly("")
self.target = llvm.Target.from_default_triple()
self.cpu = llvm.get_host_cpu_name()
self.cpu_features = llvm.get_host_cpu_features()
self.target_machine = self.target.create_target_machine(cpu=self.cpu, features=self.cpu_features.flatten(), opt=2)
llvm.check_jit_execution()
self.ee = llvm.create_mcjit_compiler(self.llvmmod, self.target_machine)
self.ee.finalize_object()
self.fptr = None
@property
def llvmmod(self):
return self._llvmmod
@llvmmod.setter
def llvmmod(self, mod):
self.ee.remove_module(self.llvmmod)
self.ee.add_module(mod)
self.ee.finalize_object()
self.compile()
self._llvmmod = mod
def parse(self, module):
self.module = module
llvmmod = llvm.parse_assembly(str(module))
llvmmod.verify()
llvmmod.triple = self.target.triple
llvmmod.name = 'module'
self.llvmmod = llvmmod
def write_ll(self, file):
with open(file, 'w') as f:
f.write(str(self.llvmmod))
def write_assembly(self, file):
with open(file, 'w') as f:
f.write(self.target_machine.emit_assembly(self.llvmmod))
def write_object_file(self, file):
with open(file, 'wb') as f:
f.write(self.target_machine.emit_object(self.llvmmod))
def optimize(self):
pmb = llvm.create_pass_manager_builder()
pmb.opt_level = 2
pmb.disable_unit_at_a_time = False
pmb.loop_vectorize = True
pmb.slp_vectorize = True
# TODO possible to pass for functions
pm = llvm.create_module_pass_manager()
pm.add_instruction_combining_pass()
pm.add_function_attrs_pass()
pm.add_constant_merge_pass()
pm.add_licm_pass()
pmb.populate(pm)
pm.run(self.llvmmod)
def optimize_polly(self, opt):
if shutil.which(opt) is None:
print('Path to the executable is wrong')
return
canonicalize = subprocess.Popen([opt, '-polly-canonicalize'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
analyze = subprocess.Popen(
[opt, '-polly-codegen', '-polly-vectorizer=polly', '-polly-parallel', '-polly-process-unprofitable', '-f'],
stdin=canonicalize.stdout, stdout=subprocess.PIPE)
canonicalize.communicate(input=self.llvmmod.as_bitcode())
optimize = subprocess.Popen([opt, '-O3', '-f'], stdin=analyze.stdout, stdout=subprocess.PIPE)
opts, _ = optimize.communicate()
llvmmod = llvm.parse_bitcode(opts)
llvmmod.verify()
self.llvmmod = llvmmod
def compile(self):
fptr = {}
for function in self.module.functions:
if not function.is_declaration:
return_type = None
if function.ftype.return_type != ir.VoidType():
return_type = toCtypes(createCompositeTypeFromString(str(function.ftype.return_type)))
args = [ctypes_from_llvm(arg) for arg in function.ftype.args]
function_address = self.ee.get_function_address(function.name)
fptr[function.name] = ct.CFUNCTYPE(return_type, *args)(function_address)
self.fptr = fptr
def __call__(self, function, *args, **kwargs):
target_function = next(f for f in self.module.functions if f.name == function)
arg_types = [ctypes_from_llvm(arg.type) for arg in target_function.args]
transformed_args = []
for i, arg in enumerate(args):
if isinstance(arg, np.ndarray):
transformed_args.append(arg.ctypes.data_as(arg_types[i]))
else:
transformed_args.append(arg)
self.fptr[function](*transformed_args)
def print_functions(self):
for f in self.module.functions:
print(f.ftype.return_type, f.name, f.args)
def get_function_ptr(self, name):
fptr = self.fptr[name]
fptr.jit = self
return fptr
import numpy as np
import waLBerla as wlb
from pystencils.slicing import normalizeSlice
class BlockIterationInfo:
def __init__(self, block, offset, localSlice):
self._block = block
self._offset = offset
self._localSlice = localSlice
@property
def block(self):
return self._block
@property
def offset(self):
return self._offset
@property
def shape(self):
return tuple(s.stop - s.start for s in self._localSlice)
@property
def localSlice(self):
"""Slice object of intersection between current block and iteration interval in local coordinates"""
return self._localSlice
@property
def midpointArrays(self):
"""Global coordinate meshgrid of cell midpoints which are shifted by 0.5 compared to cell indices"""
meshGridParams = [offset + 0.5 + np.arange(width, dtype=float)
for offset, width in zip(self.offset, self.shape)]
return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
@property
def cellIndexArrays(self):
"""Global coordinate meshgrid of cell coordinates. Cell indices start at 0 at the first inner cell,
ghost layers have negative indices"""
meshGridParams = [offset + np.arange(width, dtype=np.int32)
for offset, width in zip(self.offset, self.shape)]
return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, sliceNormalizationGhostLayers=1):
"""
Iterates of all blocks that have an intersection with the given slice object.
For these blocks a BlockIterationInfo object is yielded
:param blocks: waLBerla block data structure
:param sliceObj: a slice (i.e. rectangular subregion), can be created with makeSlice[]
:param innerGhostLayers: how many ghost layers are included in the local slice and the optional index arrays
:param sliceNormalizationGhostLayers: slices can have relative coordinates e.g. makeSlice[0.2, :, :]
when computing absolute values, the domain size is needed. This parameter
specifies how many ghost layers are taken into account for this operation.
Example: assume no slice is given, then sliceNormalizationGhostLayers effectively sets how much ghost layers
at the border of the domain are included. The innerGhostLayers parameter specifies how many inner ghost layers are
included
"""
if sliceObj is None:
sliceObj = [slice(None, None, None)] * 3
domainCellBB = blocks.getDomainCellBB()
domainExtent = [s + 2 * sliceNormalizationGhostLayers for s in domainCellBB.size]
sliceObj = normalizeSlice(sliceObj, domainExtent)
targetCellBB = wlb.CellInterval.fromSlice(sliceObj)
targetCellBB.shift(*[a - sliceNormalizationGhostLayers for a in domainCellBB.min])
for block in blocks:
intersection = blocks.getBlockCellBB(block).getExpanded(innerGhostLayers)
intersection.intersect(targetCellBB)
if intersection.empty():
continue
localTargetBB = blocks.transformGlobalToLocal(block, intersection)
localTargetBB.shift(innerGhostLayers, innerGhostLayers, innerGhostLayers)
localSlice = localTargetBB.toSlice(False)
yield BlockIterationInfo(block, intersection.min, localSlice)
import numpy as np
from pystencils import Field, makeSlice
from pystencils.datahandling import DataHandling
from pystencils.parallel.blockiteration import slicedBlockIteration
from pystencils.utils import DotDict
import waLBerla as wlb
class ParallelDataHandling(DataHandling):
GPU_DATA_PREFIX = "gpu_"
def __init__(self, blocks, defaultGhostLayers=1, defaultLayout='SoA', dim=3):
"""
Creates data handling based on waLBerla block storage
:param blocks: waLBerla block storage
:param defaultGhostLayers: nr of ghost layers used if not specified in add() method
:param defaultLayout: layout used if no layout is given to add() method
:param dim: dimension of scenario,
waLBerla always uses three dimensions, so if dim=2 the extend of the
z coordinate of blocks has to be 1
"""
super(ParallelDataHandling, self).__init__()
assert dim in (2, 3)
self.blocks = blocks
self.defaultGhostLayers = defaultGhostLayers
self.defaultLayout = defaultLayout
self._fields = DotDict() # maps name to symbolic pystencils field
self.dataNames = set()
self._dim = dim
self._fieldInformation = {}
self._cpuGpuPairs = []
self._customDataTransferFunctions = {}
if self._dim == 2:
assert self.blocks.getDomainCellBB().size[2] == 1
@property
def dim(self):
return self._dim
@property
def fields(self):
return self._fields
def addCustomData(self, name, cpuCreationFunction,
gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None):
self.blocks.addBlockData(name, cpuCreationFunction)
if gpuCreationFunction:
self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpuCreationFunction)
if cpuToGpuTransferFunc is None or gpuToCpuTransferFunc is None:
raise ValueError("For GPU data, both transfer functions have to be specified")
self._customDataTransferFunctions[name] = (cpuToGpuTransferFunc, gpuToCpuTransferFunc)
def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False):
if ghostLayers is None:
ghostLayers = self.defaultGhostLayers
if layout is None:
layout = self.defaultLayout
if latexName is None:
latexName = name
if len(self.blocks) == 0:
raise ValueError("Data handling expects that each process has at least one block")
if hasattr(dtype, 'type'):
dtype = dtype.type
if name in self.blocks[0] or self.GPU_DATA_PREFIX + name in self.blocks[0]:
raise ValueError("Data with this name has already been added")
self._fieldInformation[name] = {'ghostLayers': ghostLayers,
'fSize': fSize,
'layout': layout,
'dtype': dtype}
layoutMap = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
'SoA': wlb.field.Layout.fzyx, 'AoS': wlb.field.Layout.zyxf}
if cpu:
wlb.field.addToStorage(self.blocks, name, dtype, fSize=fSize, layout=layoutMap[layout],
ghostLayers=ghostLayers)
if gpu:
wlb.cuda.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX+name, dtype, fSize=fSize,
usePitchedMem=False, ghostLayers=ghostLayers, layout=layoutMap[layout])
if cpu and gpu:
self._cpuGpuPairs.append((name, self.GPU_DATA_PREFIX + name))
blockBB = self.blocks.getBlockCellBB(self.blocks[0])
shape = tuple(s + 2 * ghostLayers for s in blockBB.size)
indexDimensions = 1 if fSize > 1 else 0
if indexDimensions == 1:
shape += (fSize, )
assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists"
self.fields[name] = Field.createFixedSize(latexName, shape, indexDimensions, dtype, layout)
def hasData(self, name):
return name in self._fields
def addArrayLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False):
self.addArray(name, latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField])
def swap(self, name1, name2, gpu=False):
if gpu:
name1 = self.GPU_DATA_PREFIX + name1
name2 = self.GPU_DATA_PREFIX + name2
for block in self.blocks:
block[name1].swapDataPointers(block[name2])
def accessArray(self, name, sliceObj=None, innerGhostLayers='all', outerGhostLayers='all'):
fieldInfo = self._fieldInformation[name]
with self.accessWrapper(name):
if innerGhostLayers is 'all':
innerGhostLayers = fieldInfo['ghostLayers']
if outerGhostLayers is 'all':
outerGhostLayers = fieldInfo['ghostLayers']
for iterInfo in slicedBlockIteration(self.blocks, sliceObj, innerGhostLayers, outerGhostLayers):
arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=innerGhostLayers)[iterInfo.localSlice]
if self.fields[name].indexDimensions == 0:
arr = arr[..., 0]
if self.dim == 2:
arr = arr[:, :, 0]
yield arr, iterInfo
def accessCustomData(self, name):
with self.accessWrapper(name):
for block in self.blocks:
data = block[name]
cellBB = self.blocks.getBlockCellBB(block)
min = cellBB.min[:self.dim]
max = tuple(e + 1 for e in cellBB.max[:self.dim])
yield data, (min, max)
def gatherArray(self, name, sliceObj=None, allGather=False):
with self.accessWrapper(name):
if sliceObj is None:
sliceObj = makeSlice[:, :, :]
for array in wlb.field.gatherGenerator(self.blocks, name, sliceObj, allGather):
if self.fields[name].indexDimensions == 0:
array = array[..., 0]
if self.dim == 2:
array = array[:, :, 0]
yield array
def toCpu(self, name):
if name in self._customDataTransferFunctions:
transferFunc = self._customDataTransferFunctions[name][1]
for block in self.blocks:
transferFunc(block[self.GPU_DATA_PREFIX + name], block[name])
else:
wlb.cuda.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def toGpu(self, name):
if name in self._customDataTransferFunctions:
transferFunc = self._customDataTransferFunctions[name][0]
for block in self.blocks:
transferFunc(block[self.GPU_DATA_PREFIX + name], block[name])
else:
wlb.cuda.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
def allToCpu(self):
for cpuName, gpuName in self._cpuGpuPairs:
wlb.cuda.copyFieldToCpu(self.blocks, gpuName, cpuName)
for name in self._customDataTransferFunctions.keys():
self.toCpu(name)
def allToGpu(self):
for cpuName, gpuName in self._cpuGpuPairs:
wlb.cuda.copyFieldToGpu(self.blocks, gpuName, cpuName)
for name in self._customDataTransferFunctions.keys():
self.toGpu(name)
def synchronizationFunctionCPU(self, names, stencil=None, buffered=True, **kwargs):
return self._synchronizationFunction(names, stencil, buffered, 'cpu')
def synchronizationFunctionGPU(self, names, stencil=None, buffered=True, **kwargs):
return self._synchronizationFunction(names, stencil, buffered, 'gpu')
def _synchronizationFunction(self, names, stencil, buffered, target):
if stencil is None:
stencil = 'D3Q27' if self.dim == 3 else 'D2Q9'
if not hasattr(names, '__len__') or type(names) is str:
names = [names]
createScheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme
if target == 'cpu':
createPacking = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo
elif target == 'gpu':
createPacking = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo
names = [self.GPU_DATA_PREFIX + name for name in names]
syncFunction = createScheme(self.blocks, stencil)
for name in names:
syncFunction.addDataToCommunicate(createPacking(self.blocks, name))
return syncFunction
from matplotlib.pyplot import *
def vectorField(field, step=2, **kwargs):
"""
Plot given vector field as quiver (arrow) plot.
:param field: numpy array with 3 dimensions, first two are spatial x,y coordinate, the last
coordinate should have shape 2 and stores the 2 velocity components
:param step: plots only every steps's cell
:param kwargs: keyword arguments passed to :func:`matplotlib.pyplot.quiver`
"""
veln = field.swapaxes(0, 1)
res = quiver(veln[::step, ::step, 0], veln[::step, ::step, 1], **kwargs)
axis('equal')
return res
def vectorFieldMagnitude(field, **kwargs):
"""
Plots the magnitude of a vector field as colormap
:param field: numpy array with 3 dimensions, first two are spatial x,y coordinate, the last
coordinate should have shape 2 and stores the 2 velocity components
:param kwargs: keyword arguments passed to :func:`matplotlib.pyplot.imshow`
"""
from numpy.linalg import norm
norm = norm(field, axis=2, ord=2)
if hasattr(field, 'mask'):
norm = np.ma.masked_array(norm, mask=field.mask[:, :, 0])
return scalarField(norm, **kwargs)
def scalarField(field, **kwargs):
"""
Plots field values as colormap
:param field: two dimensional numpy array
:param kwargs: keyword arguments passed to :func:`matplotlib.pyplot.imshow`
"""
import numpy as np
field = np.swapaxes(field, 0, 1)
res = imshow(field, origin='lower', **kwargs)
axis('equal')
return res
def scalarFieldContour(field, **kwargs):
field = np.swapaxes(field, 0, 1)
res = contour(field, **kwargs)
axis('equal')
return res
def multipleScalarFields(field, **kwargs):
subPlots = field.shape[-1]
for i in range(subPlots):
subplot(1, subPlots, i + 1)
title(str(i))
scalarField(field[..., i])
colorbar()
# ------------------------------------------- Animations ---------------------------------------------------------------
def vectorFieldAnimation(runFunction, step=2, rescale=True, plotSetupFunction=lambda: None,
plotUpdateFunction=lambda: None, interval=30, frames=180, **kwargs):
import matplotlib.animation as animation
from numpy.linalg import norm
fig = gcf()
im = None
field = runFunction()
if rescale:
maxNorm = np.max(norm(field, axis=2, ord=2))
field /= maxNorm
if 'scale' not in kwargs:
kwargs['scale'] = 1.0
quiverPlot = vectorField(field, step=step, **kwargs)
plotSetupFunction()
def updatefig(*args):
f = runFunction()
f = np.swapaxes(f, 0, 1)
if rescale:
maxNorm = np.max(norm(f, axis=2, ord=2))
f /= maxNorm
u, v = f[::step, ::step, 0], f[::step, ::step, 1]
quiverPlot.set_UVC(u, v)
plotUpdateFunction()
return im,
return animation.FuncAnimation(fig, updatefig, interval=interval, frames=frames)
def vectorFieldMagnitudeAnimation(runFunction, plotSetupFunction=lambda: None,
plotUpdateFunction=lambda: None, interval=30, frames=180, **kwargs):
import matplotlib.animation as animation
from numpy.linalg import norm
fig = gcf()
im = None
field = runFunction()
im = vectorFieldMagnitude(field, **kwargs)
plotSetupFunction()
def updatefig(*args):
f = runFunction()
normed = norm(f, axis=2, ord=2)
if hasattr(f, 'mask'):
normed = np.ma.masked_array(normed, mask=f.mask[:, :, 0])
normed = np.swapaxes(normed, 0, 1)
im.set_array(normed)
plotUpdateFunction()
return im,
return animation.FuncAnimation(fig, updatefig, interval=interval, frames=frames)
\ No newline at end of file
[project]
name = "pystencils"
description = "Speeding up stencil computations on CPUs and GPUs"
dynamic = ["version"]
readme = "README.md"
authors = [
{ name = "Martin Bauer" },
{ name = "Jan Hönig " },
{ name = "Markus Holzer" },
{ name = "Frederik Hennig" },
{ email = "cs10-codegen@fau.de" },
]
license = { file = "COPYING.txt" }
requires-python = ">=3.10"
dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "fasteners"]
classifiers = [
"Development Status :: 4 - Beta",
"Framework :: Jupyter",
"Topic :: Software Development :: Code Generators",
"Topic :: Scientific/Engineering :: Physics",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
]
[project.urls]
"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/pystencils/-/issues"
"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/pystencils/"
"Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"
[project.optional-dependencies]
gpu = ['cupy']
alltrafos = ['islpy', 'py-cpuinfo']
bench_db = ['blitzdb', 'pymongo', 'pandas']
interactive = [
'matplotlib',
'ipy_table',
'imageio',
'jupyter',
'pyevtk',
'rich',
'graphviz',
]
use_cython = [
'Cython'
]
doc = [
'sphinx',
'sphinx_rtd_theme',
'nbsphinx',
'sphinxcontrib-bibtex',
'sphinx_autodoc_typehints',
'pandoc',
]
tests = [
'pytest',
'pytest-cov',
'pytest-html',
'ansi2html',
'pytest-xdist',
'flake8',
'nbformat',
'nbconvert',
'ipython',
'matplotlib',
'py-cpuinfo',
'randomgen>=1.18',
]
[build-system]
requires = [
"setuptools>=61",
"versioneer[toml]>=0.29",
# 'Cython'
]
build-backend = "setuptools.build_meta"
[tool.setuptools.package-data]
pystencils = [
"include/*.h",
"boundaries/createindexlistcython.pyx"
]
[tool.setuptools.packages.find]
where = ["src"]
include = ["pystencils", "pystencils.*"]
namespaces = false
[tool.versioneer]
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.
VCS = "git"
style = "pep440"
versionfile_source = "src/pystencils/_version.py"
versionfile_build = "pystencils/_version.py"
tag_prefix = "release/"
parentdir_prefix = "pystencils-"
[pytest]
testpaths = src tests doc/notebooks
pythonpath = src
python_files = test_*.py *_test.py scenario_*.py
norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
addopts = --doctest-modules --durations=20 --cov-config pytest.ini
markers =
longrun: tests only run at night since they have large execution time
notebook: mark for notebooks
# these warnings all come from third party libraries.
filterwarnings =
ignore:an integer is required:DeprecationWarning
ignore:\s*load will be removed, use:PendingDeprecationWarning
ignore:the imp module is deprecated in favour of importlib:DeprecationWarning
ignore:.*is a deprecated alias for the builtin `bool`:DeprecationWarning
ignore:'contextfilter' is renamed to 'pass_context':DeprecationWarning
ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
ignore:Animation was deleted without rendering anything:UserWarning
[run]
branch = True
source = src/pystencils
tests
omit = doc/*
tests/*
setup.py
quicktest.py
conftest.py
versioneer.py
src/pystencils/jupytersetup.py
src/pystencils/cpu/msvc_detection.py
src/pystencils/sympy_gmpy_bug_workaround.py
src/pystencils/cache.py
src/pystencils/pacxx/benchmark.py
src/pystencils/_version.py
venv/
[report]
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover
def __repr__
def _repr_html_
# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError
NotImplementedError()
#raise ValueError
# Don't complain if non-runnable code isn't run:
if 0:
if False:
if __name__ == .__main__.:
skip_covered = True
fail_under = 85
[html]
directory = coverage_report
import sys
from PyQt5.QtWidgets import QWidget, QApplication, QTreeWidget, QTreeWidgetItem, QHBoxLayout
from pystencils.astnodes import Block, LoopOverCoordinate, KernelFunction
def debugGUI(ast):
app = QApplication.instance()
if app is None:
app = QApplication(sys.argv)
else:
print('QApplication instance already exists: %s' % str(app))
ex = DebugTree()
ex.insert_ast(ast)
app.exec_()
class DebugTree(QWidget):
def __init__(self):
super().__init__()
self.initUI()
def initUI(self):
self.tree = QTreeWidget(self)
self.tree.setColumnCount(1)
self.tree.setHeaderLabel('repr')
hbox = QHBoxLayout()
hbox.stretch(1)
hbox.addWidget(self.tree)
self.setWindowTitle('Debug')
self.setLayout(hbox)
self.show()
def insert_ast(self, node, parent=None):
if parent is None:
parent = self.tree
if isinstance(node, Block): # Blocks are represented with the tree structure
item = parent
else:
item = QTreeWidgetItem(parent)
item.setText(0, repr(node))
if node.func in [LoopOverCoordinate, KernelFunction]:
self.tree.expandItem(item)
for child in node.args:
self.insert_ast(child, item)
#!/usr/bin/env python3
from contextlib import redirect_stdout
import io
from tests.test_quicktests import (
test_basic_kernel,
test_basic_blocking_staggered,
test_basic_vectorization,
)
quick_tests = [
test_basic_kernel,
test_basic_blocking_staggered,
test_basic_vectorization,
]
if __name__ == "__main__":
print("Running pystencils quicktests")
for qt in quick_tests:
print(f" -> {qt.__name__}")
with redirect_stdout(io.StringIO()):
qt()
#!/bin/bash
echo "Existing versions"
git tag -l | grep release
echo "Enter the next version"
read new_version
git tag -s release/${new_version}
git push origin master release/${new_version}
rm -rf dist
python setup.py sdist
twine upload dist/*
import time
import socket
import blitzdb
from pystencils.cpu.cpujit import getCompilerConfig
def removeConstantColumns(df):
import pandas as pd
remainingDf = df.loc[:, df.apply(pd.Series.nunique) > 1]
constants = df.loc[:, df.apply(pd.Series.nunique) <= 1].iloc[0]
return remainingDf, constants
def removeColumnsByPrefix(df, prefixes, inplace=False):
if not inplace:
df = df.copy()
for columnName in df.columns:
for prefix in prefixes:
if columnName.startswith(prefix):
del df[columnName]
return df
def removePrefixInColumnName(df, inplace=False):
if not inplace:
df = df.copy()
newColumnNames = []
for columnName in df.columns:
if '.' in columnName:
newColumnNames.append(columnName[columnName.index('.') + 1:])
else:
newColumnNames.append(columnName)
df.columns = newColumnNames
return df
class Database(object):
class SimulationResult(blitzdb.Document):
pass
def __init__(self, file):
if file.startswith("mongo://"):
from pymongo import MongoClient
dbName = file[len("mongo://"):]
c = MongoClient()
self.backend = blitzdb.MongoBackend(c[dbName])
else:
self.backend = blitzdb.FileBackend(file)
self.backend.autocommit = True
@staticmethod
def getEnv():
return {
'timestamp': time.mktime(time.gmtime()),
'hostname': socket.gethostname(),
'cpuCompilerConfig': getCompilerConfig(),
}
def save(self, params, result, env=None, **kwargs):
documentDict = {
'params': params,
'result': result,
'env': env if env else self.getEnv(),
}
documentDict.update(kwargs)
document = Database.SimulationResult(documentDict, backend=self.backend)
document.save()
self.backend.commit()
def filter(self, *args, **kwargs):
return self.backend.filter(Database.SimulationResult, *args, **kwargs)
def filterParams(self, query, *args, **kwargs):
query = {'params.' + k: v for k, v in query.items()}
return self.filter(query, *args, **kwargs)
def alreadySimulated(self, parameters):
return len(self.filter({'params': parameters})) > 0
# Columns with these prefixes are not included in pandas result
pandasColumnsToIgnore = ['changedParams.', 'env.']
def toPandas(self, parameterQuery, removePrefix=True, dropConstantColumns=False):
import pandas as pd
queryResult = self.filterParams(parameterQuery)
if len(queryResult) == 0:
return
df = pd.io.json.json_normalize([e.attributes for e in queryResult])
df.set_index('pk', inplace=True)
if self.pandasColumnsToIgnore:
removeColumnsByPrefix(df, self.pandasColumnsToIgnore, inplace=True)
if removePrefix:
removePrefixInColumnName(df, inplace=True)
if dropConstantColumns:
df, _ = removeConstantColumns(df)
return df
import json
import datetime
import os
import socket
import itertools
from copy import deepcopy
from collections import namedtuple
from time import sleep
from pystencils.runhelper import Database
from pystencils.utils import DotDict
class ParameterStudy(object):
Run = namedtuple("Run", ['parameterDict', 'weight'])
def __init__(self, runFunction, listOfRuns=[], databaseFile='./db'):
self.listOfRuns = listOfRuns
self.runFunction = runFunction
self.db = Database(databaseFile)
def addRun(self, parameterDict, weight=1):
self.listOfRuns.append(self.Run(parameterDict, weight))
def addCombinations(self, degreesOfFreedom, constantParameters=None, filterFunction=None, weightFunction=None):
parameterNames = [e[0] for e in degreesOfFreedom]
parameterValues = [e[1] for e in degreesOfFreedom]
defaultParamsDict = {} if constantParameters is None else constantParameters
for valueTuple in itertools.product(*parameterValues):
paramsDict = deepcopy(defaultParamsDict)
paramsDict.update({name: value for name, value in zip(parameterNames, valueTuple)})
params = DotDict(paramsDict)
if filterFunction:
params = filterFunction(params)
if params is None:
continue
weight = 1 if not weightFunction else weightFunction(params)
self.addRun(params, weight)
def filterAlreadySimulated(self, allRuns):
return [r for r in allRuns if not self.db.alreadySimulated(r.parameterDict)]
@staticmethod
def distributeRuns(allRuns, process, numProcesses):
sortedRuns = sorted(allRuns, key=lambda e: e.weight, reverse=True)
result = sortedRuns[process::numProcesses]
result.reverse() # start with faster scenarios
return result
def runServer(self, ip="0.0.0.0", port=8342):
from http.server import BaseHTTPRequestHandler, HTTPServer
filteredRuns = self.filterAlreadySimulated(self.listOfRuns)
if not filteredRuns:
print("No Scenarios to simulate")
return
class ParameterStudyServer(BaseHTTPRequestHandler):
parameterStudy = self
allRuns = filteredRuns
runs = filteredRuns.copy()
currentlyRunning = {}
finishedRuns = []
def nextScenario(self, receivedJsonData):
clientName = receivedJsonData['clientName']
if len(self.runs) > 0:
runStatus = "%d/%d" % (len(self.finishedRuns), len(self.allRuns))
workStatus = "%d/%d" % (sum(r.weight for r in self.finishedRuns),
sum(r.weight for r in self.allRuns))
formatArgs = {
'remaining': len(self.runs),
'time': datetime.datetime.now().strftime("%H:%M:%S"),
'clientName': clientName,
'runStatus': runStatus,
'workStatus': workStatus,
}
scenario = self.runs.pop(0)
print(" {time} {clientName} fetched scenario. Scenarios: {runStatus}, Work: {workStatus}"
.format(**formatArgs))
self.currentlyRunning[clientName] = scenario
return {'status': 'ok', 'params': scenario.parameterDict}
else:
return {'status': 'finished'}
def result(self, receivedJsonData):
clientName = receivedJsonData['clientName']
run = self.currentlyRunning[clientName]
self.finishedRuns.append(run)
del self.currentlyRunning[clientName]
d = receivedJsonData
def hash_dict(d):
import hashlib
return hashlib.sha1(json.dumps(d, sort_keys=True).encode()).hexdigest()
assert hash_dict(d['params']) == hash_dict(run.parameterDict)
self.parameterStudy.db.save(run.parameterDict, d['result'], d['env'], changedParams=d['changedParams'])
return {}
def do_POST(self):
mapping = {'/nextScenario': self.nextScenario,
'/result': self.result}
if self.path in mapping.keys():
data = self.rfile.read(int(self.headers['Content-Length']))
self.send_response(200)
self.send_header("Content-type", "application/json")
self.end_headers()
jsonData = json.loads(data.decode())
response = mapping[self.path](jsonData)
self.wfile.write(json.dumps(response).encode())
else:
self.send_response(400)
def do_GET(self):
return self.do_POST()
def log_message(self, format, *args):
return
print("Listening to connections on {}:{}. Scenarios to simulate: {}".format(ip, port, len(filteredRuns)))
server = HTTPServer((ip, port), ParameterStudyServer)
while len(ParameterStudyServer.currentlyRunning) > 0 or len(ParameterStudyServer.runs) > 0:
server.handle_request()
server.handle_request()
def runClient(self, clientName="{hostname}_{pid}", server='localhost', port=8342, parameterUpdate={}):
from urllib.request import urlopen, URLError
url = "http://{}:{}".format(server, port)
clientName = clientName.format(hostname=socket.gethostname(), pid=os.getpid())
while True:
try:
httpResponse = urlopen(url + "/nextScenario",
data=json.dumps({'clientName': clientName}).encode())
scenario = json.loads(httpResponse.read().decode())
if scenario['status'] != 'ok':
break
originalParams = scenario['params'].copy()
scenario['params'].update(parameterUpdate)
result = self.runFunction(**scenario['params'])
answer = {'params': originalParams,
'changedParams': parameterUpdate,
'result': result,
'env': Database.getEnv(),
'clientName': clientName}
urlopen(url + '/result', data=json.dumps(answer).encode())
except URLError:
print("Cannot connect to server {} retrying in 5 seconds...".format(url))
sleep(5)
def run(self, process, numProcesses, parameterUpdate={}):
ownRuns = self.distributeRuns(self.listOfRuns, process, numProcesses)
for run in ownRuns:
parameterDict = run.parameterDict.copy()
parameterDict.update(parameterUpdate)
result = self.runFunction(**parameterDict)
self.db.save(run.parameterDict, result, None, changedParams=parameterUpdate)
def runScenariosNotInDatabase(self, parameterUpdate={}):
filteredRuns = self.filterAlreadySimulated(self.listOfRuns)
for run in filteredRuns:
parameterDict = run.parameterDict.copy()
parameterDict.update(parameterUpdate)
result = self.runFunction(**parameterDict)
self.db.save(run.parameterDict, result, None, changedParams=parameterUpdate)
def runFromCommandLine(self, argv=None):
from argparse import ArgumentParser
def server(a):
if a.database:
self.db = Database(a.database)
self.runServer(a.host, a.port)
def client(a):
self.runClient(a.clientName, a.host, a.port, json.loads(a.parameterOverride))
def local(a):
if a.database:
self.db = Database(a.database)
self.runScenariosNotInDatabase(json.loads(a.parameterOverride))
parser = ArgumentParser()
subparsers = parser.add_subparsers()
localParser = subparsers.add_parser('local', aliases=['l'],
help="Run scenarios locally which are not yet in database",)
localParser.add_argument("-d", "--database", type=str, default="")
localParser.add_argument("-P", "--parameterOverride", type=str, default="{}",
help="JSON: the parameter dictionary is updated with these parameters. Use this to "
"set host specific options like GPU call parameters. Enclose in \" ")
localParser.set_defaults(func=local)
serverParser = subparsers.add_parser('server', aliases=['serv', 's'],
help="Runs server to distribute different scenarios to workers",)
serverParser.add_argument("-p", "--port", type=int, default=8342, help="Port to listen on")
serverParser.add_argument("-H", "--host", type=str, default="0.0.0.0", help="IP/Hostname to listen on")
serverParser.add_argument("-d", "--database", type=str, default="")
serverParser.set_defaults(func=server)
clientParser = subparsers.add_parser('client', aliases=['c'],
help="Runs a worker client connection to scenario distribution server")
clientParser.add_argument("-p", "--port", type=int, default=8342, help="Port to connect to")
clientParser.add_argument("-H", "--host", type=str, default="localhost", help="Host or IP to connect to")
clientParser.add_argument("-n", "--clientName", type=str, default="{hostname}_{pid}",
help="Unique client name, you can use {hostname} and {pid} as placeholder")
clientParser.add_argument("-P", "--parameterOverride", type=str, default="{}",
help="JSON: the parameter dictionary is updated with these parameters. Use this to "
"set host specific options like GPU call parameters. Enclose in \" ")
clientParser.set_defaults(func=client)
args = parser.parse_args(argv)
if not len(vars(args)):
parser.print_help()
else:
args.func(args)
from setuptools import setup, __version__ as setuptools_version
if int(setuptools_version.split('.')[0]) < 61:
raise Exception(
"[ERROR] pystencils requires at least setuptools version 61 to install.\n"
"If this error occurs during an installation via pip, it is likely that there is a conflict between "
"versions of setuptools installed by pip and the system package manager. "
"In this case, it is recommended to install pystencils into a virtual environment instead."
)
import versioneer
def get_cmdclass():
return versioneer.get_cmdclass()
setup(
version=versioneer.get_version(),
cmdclass=get_cmdclass(),
)
"""Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
from .enums import Backend, Target
from . import fd
from . import stencil as stencil
from .assignment import Assignment, AddAugmentedAssignment, assignment_from_stencil
from .typing.typed_sympy import TypedSymbol
from .display_utils import get_code_obj, get_code_str, show_code, to_dot
from .field import Field, FieldType, fields
from .config import CreateKernelConfig
from .cache import clear_cache
from .kernel_decorator import kernel, kernel_config
from .kernelcreation import create_kernel, create_staggered_kernel
from .simp import AssignmentCollection
from .slicing import make_slice
from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
from .sympyextensions import SymbolCreator
from .datahandling import create_data_handling
__all__ = ['Field', 'FieldType', 'fields',
'TypedSymbol',
'make_slice',
'CreateKernelConfig',
'create_kernel', 'create_staggered_kernel',
'Target', 'Backend',
'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
'AssignmentCollection',
'Assignment', 'AddAugmentedAssignment',
'assignment_from_stencil',
'SymbolCreator',
'create_data_handling',
'clear_cache',
'kernel', 'kernel_config',
'x_', 'y_', 'z_',
'x_staggered', 'y_staggered', 'z_staggered',
'x_vector', 'x_staggered_vector',
'fd',
'stencil']
from . import _version
__version__ = _version.get_versions()['version']
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain.
# Generated by versioneer-0.29
# https://github.com/python-versioneer/python-versioneer
"""Git implementation of _version.py."""
import errno
import os
import re
import subprocess
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple
import functools
def get_keywords() -> Dict[str, str]:
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
git_date = "$Format:%ci$"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
VCS: str
style: str
tag_prefix: str
parentdir_prefix: str
versionfile_source: str
verbose: bool
def get_config() -> VersioneerConfig:
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
cfg = VersioneerConfig()
cfg.VCS = "git"
cfg.style = "pep440"
cfg.tag_prefix = "release/"
cfg.parentdir_prefix = "pystencils-"
cfg.versionfile_source = "src/pystencils/_version.py"
cfg.verbose = False
return cfg
class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}
def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator
"""Create decorator to mark a method as the handler of a VCS."""
def decorate(f: Callable) -> Callable:
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate
def run_command(
commands: List[str],
args: List[str],
cwd: Optional[str] = None,
verbose: bool = False,
hide_stderr: bool = False,
env: Optional[Dict[str, str]] = None,
) -> Tuple[Optional[str], Optional[int]]:
"""Call the given command(s)."""
assert isinstance(commands, list)
process = None
popen_kwargs: Dict[str, Any] = {}
if sys.platform == "win32":
# This hides the console window if pythonw.exe is used
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
popen_kwargs["startupinfo"] = startupinfo
for command in commands:
try:
dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
process = subprocess.Popen([command] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None), **popen_kwargs)
break
except OSError as e:
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %s" % dispcmd)
print(e)
return None, None
else:
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
stdout = process.communicate()[0].strip().decode()
if process.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
return None, process.returncode
return stdout, process.returncode
def versions_from_parentdir(
parentdir_prefix: str,
root: str,
verbose: bool,
) -> Dict[str, Any]:
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []
for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %s but none started with prefix %s" %
(str(rootdirs), parentdir_prefix))
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords: Dict[str, str] = {}
try:
with open(versionfile_abs, "r") as fobj:
for line in fobj:
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
except OSError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(
keywords: Dict[str, str],
tag_prefix: str,
verbose: bool,
) -> Dict[str, Any]:
"""Get version information from git keywords."""
if "refnames" not in keywords:
raise NotThisMethod("Short version file found")
date = keywords.get("date")
if date is not None:
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = {r for r in refs if re.search(r'\d', r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
# Filter out refs that exactly match prefix or that don't start
# with a number once the prefix is stripped (mostly a concern
# when prefix is '')
if not re.match(r'\d', r):
continue
if verbose:
print("picking %s" % r)
return {"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": None,
"date": date}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": "no suitable tags", "date": None}
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(
tag_prefix: str,
root: str,
verbose: bool,
runner: Callable = run_command
) -> Dict[str, Any]:
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
# GIT_DIR can interfere with correct operation of Versioneer.
# It may be intended to be passed to the Versioneer-versioned project,
# but that should not change where we get our version from.
env = os.environ.copy()
env.pop("GIT_DIR", None)
runner = functools.partial(runner, env=env)
_, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=not verbose)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = runner(GITS, [
"describe", "--tags", "--dirty", "--always", "--long",
"--match", f"{tag_prefix}[[:digit:]]*"
], cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces: Dict[str, Any] = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
cwd=root)
# --abbrev-ref was added in git-1.6.3
if rc != 0 or branch_name is None:
raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
branch_name = branch_name.strip()
if branch_name == "HEAD":
# If we aren't exactly on a branch, pick a branch which represents
# the current commit. If all else fails, we are on a branchless
# commit.
branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
# --contains was added in git-1.5.4
if rc != 0 or branches is None:
raise NotThisMethod("'git branch --contains' returned error")
branches = branches.split("\n")
# Remove the first line if we're running detached
if "(" in branches[0]:
branches.pop(0)
# Strip off the leading "* " from the list of branches.
branches = [branch[2:] for branch in branches]
if "master" in branches:
branch_name = "master"
elif not branches:
branch_name = None
else:
# Pick the first branch that is returned. Good or bad.
branch_name = branches[0]
pieces["branch"] = branch_name
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[:git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
# unparsable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out)
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
% (full_tag, tag_prefix))
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix):]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
# commit: short hex revision ID
pieces["short"] = mo.group(3)
else:
# HEX: no tags
pieces["closest-tag"] = None
out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
pieces["distance"] = len(out.split()) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
def plus_or_dot(pieces: Dict[str, Any]) -> str:
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces: Dict[str, Any]) -> str:
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_branch(pieces: Dict[str, Any]) -> str:
"""TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
The ".dev0" means not master branch. Note that .dev0 sorts backwards
(a feature branch will appear "older" than the master branch).
Exceptions:
1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0"
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
"""Split pep440 version string at the post-release segment.
Returns the release segments before the post-release and the
post-release version number (or -1 if no post-release segment is present).
"""
vc = str.split(ver, ".post")
return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
def render_pep440_pre(pieces: Dict[str, Any]) -> str:
"""TAG[.postN.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
if pieces["distance"]:
# update the post release segment
tag_version, post_version = pep440_split_post(pieces["closest-tag"])
rendered = tag_version
if post_version is not None:
rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
else:
rendered += ".post0.dev%d" % (pieces["distance"])
else:
# no commits, use the tag as the version
rendered = pieces["closest-tag"]
else:
# exception #1
rendered = "0.post0.dev%d" % pieces["distance"]
return rendered
def render_pep440_post(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
return rendered
def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
The ".dev0" means not master branch.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_old(pieces: Dict[str, Any]) -> str:
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
return rendered
def render_git_describe(pieces: Dict[str, Any]) -> str:
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render_git_describe_long(pieces: Dict[str, Any]) -> str:
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None}
if not style or style == "default":
style = "pep440" # the default
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-branch":
rendered = render_pep440_branch(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-post-branch":
rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError("unknown style '%s'" % style)
return {"version": rendered, "full-revisionid": pieces["long"],
"dirty": pieces["dirty"], "error": None,
"date": pieces.get("date")}
def get_versions() -> Dict[str, Any]:
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.
cfg = get_config()
verbose = cfg.verbose
try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
verbose)
except NotThisMethod:
pass
try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for _ in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None}
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
return render(pieces, cfg.style)
except NotThisMethod:
pass
try:
if cfg.parentdir_prefix:
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
except NotThisMethod:
pass
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to compute version", "date": None}
import numpy as np
def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
"""
Creates an aligned empty numpy array
Args:
shape: size of the array
byte_alignment: alignment in bytes, for the start address of the array holds (a % byte_alignment) == 0
By default, use the maximum required by the CPU (or 512 bits if this cannot be detected).
When 'cacheline' is specified, the size of a cache line is used.
dtype: numpy data type
byte_offset: offset in bytes for position that should be aligned i.e. (a+byte_offset) % byte_alignment == 0
typically used to align first inner cell instead of ghost layer
order: storage linearization order
align_inner_coordinate: if True, the start of the innermost coordinate lines are aligned as well
"""
if byte_alignment is True or byte_alignment == 'cacheline':
from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size,
get_vector_instruction_set)
instruction_sets = get_supported_instruction_sets()
if instruction_sets is None:
byte_alignment = 64
elif byte_alignment == 'cacheline':
cacheline_sizes = [get_cacheline_size(is_name) for is_name in instruction_sets]
if all([s is None for s in cacheline_sizes]) or \
max([s for s in cacheline_sizes if s is not None]) > 0x100000:
widths = [get_vector_instruction_set(dtype, is_name)['width'] * np.dtype(dtype).itemsize
for is_name in instruction_sets
if type(get_vector_instruction_set(dtype, is_name)['width']) is int]
byte_alignment = 64 if all([s is None for s in widths]) else max(widths)
else:
byte_alignment = max([s for s in cacheline_sizes if s is not None])
elif not any([type(get_vector_instruction_set(dtype, is_name)['width']) is int
for is_name in instruction_sets]):
byte_alignment = 64
else:
byte_alignment = max([get_vector_instruction_set(dtype, is_name)['width'] * np.dtype(dtype).itemsize
for is_name in instruction_sets
if type(get_vector_instruction_set(dtype, is_name)['width']) is int])
if (not align_inner_coordinate) or (not hasattr(shape, '__len__')):
size = np.prod(shape)
d = np.dtype(dtype)
# 2 * byte_alignment instead of 1 * byte_alignment to have slack in the end such that
# vectorized loops can access vector_width elements further and don't require a tail loop
tmp = np.empty(size * d.itemsize + 2 * byte_alignment, dtype=np.uint8)
address = tmp.__array_interface__['data'][0]
offset = (byte_alignment - (address + byte_offset) % byte_alignment) % byte_alignment
return tmp[offset:offset + size * d.itemsize].view(dtype=d).reshape(shape, order=order)
else:
if order == 'C':
dim0_size = shape[-1]
dim0 = -1
dim1_size = np.prod(shape[:-1])
else:
dim0_size = shape[0]
dim0 = 0
dim1_size = np.prod(shape[1:])
d = np.dtype(dtype)
assert byte_alignment >= d.itemsize and byte_alignment % d.itemsize == 0
padding = (byte_alignment - ((dim0_size * d.itemsize) % byte_alignment)) % byte_alignment
size = dim1_size * padding + np.prod(shape) * d.itemsize
tmp = aligned_empty(size, byte_alignment=byte_alignment, dtype=np.uint8, byte_offset=byte_offset)
tmp = tmp.view(dtype=dtype)
shape_in_bytes = [i for i in shape]
shape_in_bytes[dim0] = dim0_size + padding // d.itemsize
tmp = tmp.reshape(shape_in_bytes, order=order)
if tmp.flags['C_CONTIGUOUS']:
tmp = tmp[..., :shape[-1]]
else:
tmp = tmp[:shape[0], ...]
return tmp
def aligned_zeros(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset,
order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate)
x = np.zeros((), arr.dtype)
arr[...] = x
return arr
def aligned_ones(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset,
order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate)
x = np.ones((), arr.dtype)
arr[...] = x
return arr
import numpy as np
import sympy as sp
from sympy.codegen.ast import Assignment, AugmentedAssignment, AddAugmentedAssignment
from sympy.printing.latex import LatexPrinter
__all__ = ['Assignment', 'AugmentedAssignment', 'AddAugmentedAssignment', 'assignment_from_stencil']
def print_assignment_latex(printer, expr):
binop = f"{expr.binop}=" if isinstance(expr, AugmentedAssignment) else ''
"""sympy cannot print Assignments as Latex. Thus, this function is added to the sympy Latex printer"""
printed_lhs = printer.doprint(expr.lhs)
printed_rhs = printer.doprint(expr.rhs)
return fr"{printed_lhs} \leftarrow_{{{binop}}} {printed_rhs}"
def assignment_str(assignment):
op = f"{assignment.binop}=" if isinstance(assignment, AugmentedAssignment) else ''
return fr"{assignment.lhs} {op} {assignment.rhs}"
_old_new = sp.codegen.ast.Assignment.__new__
# TODO Typing Part2 add default type, defult_float_type, default_int_type and use sane defaults
def _Assignment__new__(cls, lhs, rhs, *args, **kwargs):
if isinstance(lhs, (list, tuple, sp.Matrix)) and isinstance(rhs, (list, tuple, sp.Matrix)):
assert len(lhs) == len(rhs), f'{lhs} and {rhs} must have same length when performing vector assignment!'
return tuple(_old_new(cls, a, b, *args, **kwargs) for a, b in zip(lhs, rhs))
return _old_new(cls, lhs, rhs, *args, **kwargs)
Assignment.__str__ = assignment_str
Assignment.__new__ = _Assignment__new__
LatexPrinter._print_Assignment = print_assignment_latex
AugmentedAssignment.__str__ = assignment_str
LatexPrinter._print_AugmentedAssignment = print_assignment_latex
sp.MutableDenseMatrix.__hash__ = lambda self: hash(tuple(self))
def assignment_from_stencil(stencil_array, input_field, output_field,
normalization_factor=None, order='visual') -> Assignment:
"""Creates an assignment
Args:
stencil_array: nested list of numpy array defining the stencil weights
input_field: field or field access, defining where the stencil should be applied to
output_field: field or field access where the result is written to
normalization_factor: optional normalization factor for the stencil
order: defines how the stencil_array is interpreted. Possible values are 'visual' and 'numpy'.
For details see examples
Returns:
Assignment that can be used to create a kernel
Examples:
>>> import pystencils as ps
>>> f, g = ps.fields("f, g: [2D]")
>>> stencil = [[0, 2, 0],
... [3, 4, 5],
... [0, 6, 0]]
By default 'visual ordering is used - i.e. the stencil is applied as the nested lists are written down
>>> expected_output = Assignment(g[0, 0], 3*f[-1, 0] + 6*f[0, -1] + 4*f[0, 0] + 2*f[0, 1] + 5*f[1, 0])
>>> assignment_from_stencil(stencil, f, g, order='visual') == expected_output
True
'numpy' ordering uses the first coordinate of the stencil array for x offset, second for y offset etc.
>>> expected_output = Assignment(g[0, 0], 2*f[-1, 0] + 3*f[0, -1] + 4*f[0, 0] + 5*f[0, 1] + 6*f[1, 0])
>>> assignment_from_stencil(stencil, f, g, order='numpy') == expected_output
True
You can also pass field accesses to apply the stencil at an already shifted position:
>>> expected_output = Assignment(g[2, 0], 3*f[0, 0] + 6*f[1, -1] + 4*f[1, 0] + 2*f[1, 1] + 5*f[2, 0])
>>> assignment_from_stencil(stencil, f[1, 0], g[2, 0]) == expected_output
True
"""
from pystencils.field import Field
stencil_array = np.array(stencil_array)
if order == 'visual':
stencil_array = np.swapaxes(stencil_array, 0, 1)
stencil_array = np.flip(stencil_array, axis=1)
elif order == 'numpy':
pass
else:
raise ValueError("'order' has to be either 'visual' or 'numpy'")
if isinstance(input_field, Field):
input_field = input_field.center
if isinstance(output_field, Field):
output_field = output_field.center
rhs = 0
offset = tuple(s // 2 for s in stencil_array.shape)
for index, factor in np.ndenumerate(stencil_array):
shift = tuple(i - o for i, o in zip(index, offset))
rhs += factor * input_field.get_shifted(*shift)
if normalization_factor:
rhs *= normalization_factor
return Assignment(output_field, rhs)
import collections.abc
import itertools
import uuid
from typing import Any, List, Optional, Sequence, Set, Union
import sympy as sp
from pystencils.assignment import Assignment
from pystencils.enums import Target, Backend
from pystencils.field import Field
from pystencils.sympyextensions import fast_subs
from pystencils.typing import (create_type, get_next_parent_of_type,
FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol, TypedSymbol, CFunction)
NodeOrExpr = Union['Node', sp.Expr]
class Node:
"""Base class for all AST nodes."""
def __init__(self, parent: Optional['Node'] = None):
self.parent = parent
@property
def args(self) -> List[NodeOrExpr]:
"""Returns all arguments/children of this node."""
raise NotImplementedError()
@property
def symbols_defined(self) -> Set[sp.Symbol]:
"""Set of symbols which are defined by this node."""
raise NotImplementedError()
@property
def undefined_symbols(self) -> Set[sp.Symbol]:
"""Symbols which are used but are not defined inside this node."""
raise NotImplementedError()
def subs(self, subs_dict) -> None:
"""Inplace! Substitute, similar to sympy's but modifies the AST inplace."""
for i, a in enumerate(self.args):
result = a.subs(subs_dict)
if isinstance(a, sp.Expr): # sympy expressions' subs is out-of-place
self.args[i] = result
else: # all other should be in-place
assert result is None
@property
def func(self):
return self.__class__
def atoms(self, arg_type) -> Set[Any]:
"""Returns a set of all descendants recursively, which are an instance of the given type."""
result = set()
for arg in self.args:
if isinstance(arg, arg_type):
result.add(arg)
result.update(arg.atoms(arg_type))
return result
class Conditional(Node):
"""Conditional that maps to a 'if' statement in C/C++.
Try to avoid using this node inside of loops, since currently this construction can not be vectorized.
Consider using assignments with sympy.Piecewise in this case.
Args:
condition_expr: sympy relational expression
true_block: block which is run if conditional is true
false_block: optional block which is run if conditional is false
"""
def __init__(self, condition_expr: sp.Basic, true_block: Union['Block', 'SympyAssignment'],
false_block: Optional['Block'] = None) -> None:
super(Conditional, self).__init__(parent=None)
self.condition_expr = condition_expr
def handle_child(c):
if c is None:
return None
if not isinstance(c, Block):
c = Block([c])
c.parent = self
return c
self.true_block = handle_child(true_block)
self.false_block = handle_child(false_block)
def subs(self, subs_dict):
self.true_block.subs(subs_dict)
if self.false_block:
self.false_block.subs(subs_dict)
self.condition_expr = self.condition_expr.subs(subs_dict)
@property
def args(self):
result = [self.condition_expr, self.true_block]
if self.false_block:
result.append(self.false_block)
return result
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
result = self.true_block.undefined_symbols
if self.false_block:
result.update(self.false_block.undefined_symbols)
if hasattr(self.condition_expr, 'atoms'):
result.update(self.condition_expr.atoms(sp.Symbol))
return result
def __str__(self):
return self.__repr__()
def __repr__(self):
result = f'if:({self.condition_expr!r}) '
if self.true_block:
result += f'\n\t{self.true_block}) '
if self.false_block:
result = 'else: '
result += f'\n\t{self.false_block} '
return result
def replace_by_true_block(self):
"""Replaces the conditional by its True block"""
self.parent.replace(self, [self.true_block])
def replace_by_false_block(self):
"""Replaces the conditional by its False block"""
self.parent.replace(self, [self.false_block] if self.false_block else [])
class KernelFunction(Node):
class Parameter:
"""Function parameter.
Each undefined symbol in a `KernelFunction` node becomes a parameter to the function.
Parameters are either symbols introduced by the user that never occur on the left hand side of an
Assignment, or are related to fields/arrays passed to the function.
A parameter consists of the typed symbol (symbol property). For field related parameters this is a symbol
defined in pystencils.kernelparameters.
If the parameter is related to one or multiple fields, these fields are referenced in the fields property.
"""
def __init__(self, symbol, fields):
self.symbol = symbol # type: TypedSymbol
self.fields = fields # type: Sequence[Field]
def __repr__(self):
return repr(self.symbol)
@property
def is_field_stride(self):
return isinstance(self.symbol, FieldStrideSymbol)
@property
def is_field_shape(self):
return isinstance(self.symbol, FieldShapeSymbol)
@property
def is_field_pointer(self):
return isinstance(self.symbol, FieldPointerSymbol)
@property
def is_field_parameter(self):
return self.is_field_pointer or self.is_field_shape or self.is_field_stride
@property
def field_name(self):
return self.fields[0].name
def __init__(self, body, target: Target, backend: Backend, compile_function, ghost_layers,
function_name: str = "kernel",
assignments=None):
super(KernelFunction, self).__init__()
self._body = body
body.parent = self
self.function_name = function_name
self._body.parent = self
self.ghost_layers = ghost_layers
self._target = target
self._backend = backend
# these variables are assumed to be global, so no automatic parameter is generated for them
self.global_variables = set()
self.instruction_set = None # used in `vectorize` function to tell the backend which i.s. (SSE,AVX) to use
# function that compiles the node to a Python callable, is set by the backends
self._compile_function = compile_function
self.assignments = assignments
# If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing
# For cacheline zeroing the information of the field size for each field is needed. Thus, in this case
# all field sizes are kernel parameters and not just the common field size used for the loops
self.use_all_written_field_sizes = False
@property
def target(self):
"""See pystencils.Target"""
return self._target
@property
def backend(self):
"""Backend for generating the code: `Backend`"""
return self._backend
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
@property
def body(self):
return self._body
@body.setter
def body(self, value):
self._body = value
self._body.parent = self
@property
def args(self):
return self._body,
@property
def fields_accessed(self) -> Set[Field]:
"""Set of Field instances: fields which are accessed inside this kernel function"""
return set(o.field for o in itertools.chain(self.atoms(ResolvedFieldAccess)))
@property
def fields_written(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment)
return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
@property
def fields_read(self) -> Set[Field]:
assignments = self.atoms(SympyAssignment)
return set().union(itertools.chain.from_iterable([f.field for f in a.rhs.free_symbols if hasattr(f, 'field')]
for a in assignments))
def get_parameters(self) -> Sequence['KernelFunction.Parameter']:
"""Returns list of parameters for this function.
This function is expensive, cache the result where possible!
"""
field_map = {f.name: f for f in self.fields_accessed}
sizes = set()
if self.use_all_written_field_sizes:
sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written))
sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
def get_fields(symbol):
if hasattr(symbol, 'field_name'):
return field_map[symbol.field_name],
elif hasattr(symbol, 'field_names'):
return tuple(field_map[fn] for fn in symbol.field_names)
return ()
argument_symbols = self._body.undefined_symbols - self.global_variables
argument_symbols.update(sizes)
parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
if hasattr(self, 'indexing'):
parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
# Exclude paramters of type CFunction. These parameters will result in a C function call that will be handled
# by including a respective header file in the compute kernel. Hence, it is not a free parameter.
parameters = [p for p in parameters if not isinstance(p.symbol, CFunction)]
parameters.sort(key=lambda p: p.symbol.name)
return parameters
def __str__(self):
params = [p.symbol for p in self.get_parameters()]
return '{0} {1}({2})\n{3}'.format(type(self).__name__, self.function_name, params,
("\t" + "\t".join(str(self.body).splitlines(True))))
def __repr__(self):
params = [p.symbol for p in self.get_parameters()]
return f'{type(self).__name__} {self.function_name}({params})'
def compile(self, *args, **kwargs):
if self._compile_function is None:
raise ValueError("No compile-function provided for this KernelFunction node")
return self._compile_function(self, *args, **kwargs)
class SkipIteration(Node):
@property
def args(self):
return []
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
class Block(Node):
def __init__(self, nodes: Union[Node, List[Node]]):
super(Block, self).__init__()
if not isinstance(nodes, list):
nodes = [nodes]
self._nodes = nodes
self.parent = None
for n in self._nodes:
try:
n.parent = self
except AttributeError:
pass
@property
def args(self):
return self._nodes
def subs(self, subs_dict) -> None:
for a in self.args:
a.subs(subs_dict)
def fast_subs(self, subs_dict, skip=None):
self._nodes = [fast_subs(a, subs_dict, skip) for a in self._nodes]
return self
def insert_front(self, node, if_not_exists=False):
if if_not_exists and len(self._nodes) > 0 and self._nodes[0] == node:
return
if isinstance(node, collections.abc.Iterable):
node = list(node)
for n in node:
n.parent = self
self._nodes = node + self._nodes
else:
node.parent = self
self._nodes.insert(0, node)
def insert_before(self, new_node, insert_before, if_not_exists=False):
new_node.parent = self
assert self._nodes.count(insert_before) == 1
idx = self._nodes.index(insert_before)
if not if_not_exists or self._nodes[idx] != new_node:
self._nodes.insert(idx, new_node)
def insert_after(self, new_node, insert_after, if_not_exists=False):
new_node.parent = self
assert self._nodes.count(insert_after) == 1
idx = self._nodes.index(insert_after) + 1
if not if_not_exists or not (self._nodes[idx - 1] == new_node
or (idx < len(self._nodes) and self._nodes[idx] == new_node)):
self._nodes.insert(idx, new_node)
def append(self, node):
if isinstance(node, list) or isinstance(node, tuple):
for n in node:
n.parent = self
self._nodes.append(n)
else:
node.parent = self
self._nodes.append(node)
def take_child_nodes(self):
tmp = self._nodes
self._nodes = []
return tmp
def replace(self, child, replacements):
assert self._nodes.count(child) == 1
idx = self._nodes.index(child)
del self._nodes[idx]
if type(replacements) is list:
for e in replacements:
e.parent = self
self._nodes = self._nodes[:idx] + replacements + self._nodes[idx:]
else:
replacements.parent = self
self._nodes.insert(idx, replacements)
@property
def symbols_defined(self):
result = set()
for a in self.args:
if isinstance(a, Assignment):
result.update(a.free_symbols)
else:
result.update(a.symbols_defined)
return result
@property
def undefined_symbols(self):
result = set()
defined_symbols = set()
for a in self.args:
if isinstance(a, Assignment):
result.update(a.free_symbols)
defined_symbols.update({a.lhs})
else:
result.update(a.undefined_symbols)
defined_symbols.update(a.symbols_defined)
return result - defined_symbols
def __str__(self):
return "Block " + ''.join('{!s}\n'.format(node) for node in self._nodes)
def __repr__(self):
return "Block"
class PragmaBlock(Block):
def __init__(self, pragma_line, nodes):
super(PragmaBlock, self).__init__(nodes)
self.pragma_line = pragma_line
for n in nodes:
n.parent = self
def __repr__(self):
return self.pragma_line
class LoopOverCoordinate(Node):
LOOP_COUNTER_NAME_PREFIX = "ctr"
BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False, custom_loop_ctr=None):
super(LoopOverCoordinate, self).__init__(parent=None)
self.body = body
body.parent = self
self.coordinate_to_loop_over = coordinate_to_loop_over
self.start = start
self.stop = stop
self.step = step
self.body.parent = self
self.prefix_lines = []
self.is_block_loop = is_block_loop
self.custom_loop_ctr = custom_loop_ctr
def new_loop_with_different_body(self, new_body):
result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop,
self.step, self.is_block_loop, self.custom_loop_ctr)
result.prefix_lines = [prefix_line for prefix_line in self.prefix_lines]
return result
def subs(self, subs_dict):
self.body.subs(subs_dict)
if hasattr(self.start, "subs"):
self.start = self.start.subs(subs_dict)
if hasattr(self.stop, "subs"):
self.stop = self.stop.subs(subs_dict)
if hasattr(self.step, "subs"):
self.step = self.step.subs(subs_dict)
def fast_subs(self, subs_dict, skip=None):
self.body = fast_subs(self.body, subs_dict, skip)
if isinstance(self.start, sp.Basic):
self.start = fast_subs(self.start, subs_dict, skip)
if isinstance(self.stop, sp.Basic):
self.stop = fast_subs(self.stop, subs_dict, skip)
if isinstance(self.step, sp.Basic):
self.step = fast_subs(self.step, subs_dict, skip)
return self
@property
def args(self):
result = [self.body]
for e in [self.start, self.stop, self.step]:
if hasattr(e, "args"):
result.append(e)
return result
def replace(self, child, replacement):
if child == self.body:
self.body = replacement
elif child == self.start:
self.start = replacement
elif child == self.step:
self.step = replacement
elif child == self.stop:
self.stop = replacement
@property
def symbols_defined(self):
return {self.loop_counter_symbol}
@property
def undefined_symbols(self):
result = self.body.undefined_symbols
for possible_symbol in [self.start, self.stop, self.step]:
if isinstance(possible_symbol, Node) or isinstance(possible_symbol, sp.Basic):
result.update(possible_symbol.atoms(sp.Symbol))
return result - {self.loop_counter_symbol}
@staticmethod
def get_loop_counter_name(coordinate_to_loop_over):
return f"{LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
@staticmethod
def get_block_loop_counter_name(coordinate_to_loop_over):
return f"{LoopOverCoordinate.BLOCK_LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
@property
def loop_counter_name(self):
if self.custom_loop_ctr:
return self.custom_loop_ctr.name
else:
if self.is_block_loop:
return LoopOverCoordinate.get_block_loop_counter_name(self.coordinate_to_loop_over)
else:
return LoopOverCoordinate.get_loop_counter_name(self.coordinate_to_loop_over)
@staticmethod
def is_loop_counter_symbol(symbol):
prefix = LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX
if not symbol.name.startswith(prefix):
return None
if symbol.dtype != create_type('int'):
return None
coordinate = int(symbol.name[len(prefix) + 1:])
return coordinate
@staticmethod
def get_loop_counter_symbol(coordinate_to_loop_over):
return TypedSymbol(LoopOverCoordinate.get_loop_counter_name(coordinate_to_loop_over), 'int', nonnegative=True)
@staticmethod
def get_block_loop_counter_symbol(coordinate_to_loop_over):
return TypedSymbol(LoopOverCoordinate.get_block_loop_counter_name(coordinate_to_loop_over),
'int',
nonnegative=True)
@property
def loop_counter_symbol(self):
if self.custom_loop_ctr:
return self.custom_loop_ctr
else:
if self.is_block_loop:
return self.get_block_loop_counter_symbol(self.coordinate_to_loop_over)
else:
return self.get_loop_counter_symbol(self.coordinate_to_loop_over)
@property
def is_outermost_loop(self):
return get_next_parent_of_type(self, LoopOverCoordinate) is None
@property
def is_innermost_loop(self):
return len(self.atoms(LoopOverCoordinate)) == 0
def __str__(self):
return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})\n{!s}'.format(self.loop_counter_name, self.start,
self.loop_counter_name, self.stop,
self.loop_counter_name, self.step,
("\t" + "\t".join(str(self.body).splitlines(True))))
def __repr__(self):
return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})'.format(self.loop_counter_name, self.start,
self.loop_counter_name, self.stop,
self.loop_counter_name, self.step)
class SympyAssignment(Node):
def __init__(self, lhs_symbol, rhs_expr, is_const=True, use_auto=False):
super(SympyAssignment, self).__init__(parent=None)
self._lhs_symbol = sp.sympify(lhs_symbol)
self._rhs = sp.sympify(rhs_expr)
self._is_const = is_const
self._is_declaration = self.__is_declaration()
self._use_auto = use_auto
def __is_declaration(self):
from pystencils.typing import CastFunc
if isinstance(self._lhs_symbol, CastFunc):
return False
if any(isinstance(self._lhs_symbol, c) for c in (Field.Access, sp.Indexed, TemporaryMemoryAllocation)):
return False
return True
@property
def lhs(self):
return self._lhs_symbol
@property
def rhs(self):
return self._rhs
@lhs.setter
def lhs(self, new_value):
self._lhs_symbol = new_value
self._is_declaration = self.__is_declaration()
@rhs.setter
def rhs(self, new_rhs_expr):
self._rhs = new_rhs_expr
def subs(self, subs_dict):
self.lhs = fast_subs(self.lhs, subs_dict)
self.rhs = fast_subs(self.rhs, subs_dict)
def fast_subs(self, subs_dict, skip=None):
self.lhs = fast_subs(self.lhs, subs_dict, skip)
self.rhs = fast_subs(self.rhs, subs_dict, skip)
return self
def optimize(self, optimizations):
try:
from sympy.codegen.rewriting import optimize
self.rhs = optimize(self.rhs, optimizations)
except Exception:
pass
@property
def args(self):
return [self._lhs_symbol, self.rhs]
@property
def symbols_defined(self):
if not self._is_declaration:
return set()
return {self._lhs_symbol}
@property
def undefined_symbols(self):
result = {s for s in self.rhs.free_symbols if not isinstance(s, sp.Indexed)}
# Add loop counters if there a field accesses
loop_counters = set()
for symbol in result:
if isinstance(symbol, Field.Access):
for i in range(len(symbol.offsets)):
loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i))
result.update(loop_counters)
result.update(self._lhs_symbol.atoms(sp.Symbol))
return result
@property
def is_declaration(self):
return self._is_declaration
@property
def is_const(self):
return self._is_const
@property
def use_auto(self):
return self._use_auto
def replace(self, child, replacement):
if child == self.lhs:
replacement.parent = self
self.lhs = replacement
elif child == self.rhs:
replacement.parent = self
self.rhs = replacement
else:
raise ValueError(f'{replacement} is not in args of {self.__class__}')
def __repr__(self):
return repr(self.lhs) + "" + repr(self.rhs)
def _repr_html_(self):
printed_lhs = sp.latex(self.lhs)
printed_rhs = sp.latex(self.rhs)
return f"${printed_lhs} \\leftarrow {printed_rhs}$"
def __hash__(self):
return hash((self.lhs, self.rhs))
def __eq__(self, other):
return type(self) is type(other) and (self.lhs, self.rhs) == (other.lhs, other.rhs)
class ResolvedFieldAccess(sp.Indexed):
def __new__(cls, base, linearized_index, field, offsets, idx_coordinate_values):
if not isinstance(base, sp.IndexedBase):
assert isinstance(base, TypedSymbol)
base = sp.IndexedBase(base, shape=(1,))
assert isinstance(base.label, TypedSymbol)
obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearized_index)
obj.field = field
obj.offsets = offsets
obj.idx_coordinate_values = idx_coordinate_values
return obj
def _eval_subs(self, old, new):
return ResolvedFieldAccess(self.args[0],
self.args[1].subs(old, new),
self.field, self.offsets, self.idx_coordinate_values)
def fast_subs(self, substitutions, skip=None):
if self in substitutions:
return substitutions[self]
return ResolvedFieldAccess(self.args[0].subs(substitutions),
self.args[1].subs(substitutions),
self.field, self.offsets, self.idx_coordinate_values)
def _hashable_content(self):
super_class_contents = super(ResolvedFieldAccess, self)._hashable_content()
return super_class_contents + tuple(self.offsets) + (repr(self.idx_coordinate_values), hash(self.field))
@property
def typed_symbol(self):
return self.base.label
def __str__(self):
top = super(ResolvedFieldAccess, self).__str__()
return f"{top} ({self.typed_symbol.dtype})"
def __getnewargs__(self):
return self.base, self.indices[0], self.field, self.offsets, self.idx_coordinate_values
def __getnewargs_ex__(self):
return (self.base, self.indices[0], self.field, self.offsets, self.idx_coordinate_values), {}
class TemporaryMemoryAllocation(Node):
"""Node for temporary memory buffer allocation.
Always allocates aligned memory.
Args:
typed_symbol: symbol used as pointer (has to be typed)
size: number of elements to allocate
align_offset: the align_offset's element is aligned
"""
def __init__(self, typed_symbol: TypedSymbol, size, align_offset):
super(TemporaryMemoryAllocation, self).__init__(parent=None)
self.symbol = typed_symbol
self.size = size
self.headers = ['<stdlib.h>']
self._align_offset = align_offset
@property
def symbols_defined(self):
return {self.symbol}
@property
def undefined_symbols(self):
if isinstance(self.size, sp.Basic):
return self.size.atoms(sp.Symbol)
else:
return set()
@property
def args(self):
return [self.symbol]
def offset(self, byte_alignment):
"""Number of ELEMENTS to skip for a pointer that is aligned to byte_alignment."""
np_dtype = self.symbol.dtype.base_type.numpy_dtype
assert byte_alignment % np_dtype.itemsize == 0
return -self._align_offset % (byte_alignment / np_dtype.itemsize)
class TemporaryMemoryFree(Node):
def __init__(self, alloc_node):
super(TemporaryMemoryFree, self).__init__(parent=None)
self.alloc_node = alloc_node
@property
def symbol(self):
return self.alloc_node.symbol
def offset(self, byte_alignment):
return self.alloc_node.offset(byte_alignment)
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
@property
def args(self):
return []
def early_out(condition):
from pystencils.cpu.vectorization import vec_all
return Conditional(vec_all(condition), Block([SkipIteration()]))
def get_dummy_symbol(dtype='bool'):
return TypedSymbol(f'dummy{uuid.uuid4().hex}', create_type(dtype))
class SourceCodeComment(Node):
def __init__(self, text):
self.text = text
@property
def args(self):
return []
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
def __str__(self):
return "/* " + self.text + " */"
def __repr__(self):
return self.__str__()
class EmptyLine(Node):
def __init__(self):
pass
@property
def args(self):
return []
@property
def symbols_defined(self):
return set()
@property
def undefined_symbols(self):
return set()
def __str__(self):
return ""
def __repr__(self):
return self.__str__()
class ConditionalFieldAccess(sp.Function):
"""
:class:`pystencils.Field.Access` that is only executed if a certain condition is met.
Can be used, for instance, for out-of-bound checks.
"""
def __new__(cls, field_access, outofbounds_condition, outofbounds_value=0):
return sp.Function.__new__(cls, field_access, outofbounds_condition, sp.S(outofbounds_value))
@property
def access(self):
return self.args[0]
@property
def outofbounds_condition(self):
return self.args[1]
@property
def outofbounds_value(self):
return self.args[2]
def __getnewargs__(self):
return self.access, self.outofbounds_condition, self.outofbounds_value
def __getnewargs_ex__(self):
return (self.access, self.outofbounds_condition, self.outofbounds_value), {}
from .cbackend import generate_c
__all__ = ['generate_c']
try:
from .dot import print_dot # NOQA
__all__.append('print_dot')
except ImportError:
pass