dd95c69d · dd95c69d · dd95c69d · dd95c69d · dd95c69d · dd95c69d
--- a/llvm/kernelcreation.py
+++ b/llvm/kernelcreation.py
-import sympy as sp
-from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction
-from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, typingFromSympyInspection, \
-    typeAllEquations, getOptimalLoopOrdering, parseBasePointerInfo, moveConstantsBeforeLoop, splitInnerLoop, insertCasts#, \
-    #desympy_ast, insert_casts
-from pystencils.data_types import TypedSymbol, BasicType, StructType
-from pystencils.field import Field
-import pystencils.astnodes as ast
-from functools import partial
-from pystencils.llvm.llvmjit import makePythonFunction
-
-
-def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, splitGroups=(),
-                 iterationSlice=None, ghostLayers=None):
-    """
-    Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
-
-    Loops are created according to the field accesses in the equations.
-
-    :param listOfEquations: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
-           Defining the update rules of the kernel
-    :param functionName: name of the generated function - only important if generated code is written out
-    :param typeForSymbol: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
-           be of type 'double' except symbols which occur on the left hand side of equations where the
-           right hand side is a sympy Boolean which are assumed to be 'bool' .
-    :param splitGroups: Specification on how to split up inner loop into multiple loops. For details see
-           transformation :func:`pystencils.transformation.splitInnerLoop`
-    :param iterationSlice: if not None, iteration is done only over this slice of the field
-    :param ghostLayers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
-                        if None, the number of ghost layers is determined automatically and assumed to be equal for a
-                        all dimensions
-
-    :return: :class:`pystencils.ast.KernelFunction` node
-    """
-    #if not typeForSymbol:
-    #    typeForSymbol = typingFromSympyInspection(listOfEquations, "double")
-    #
-    #def typeSymbol(term):
-    #    if isinstance(term, Field.Access) or isinstance(term, TypedSymbol):
-    #        return term
-    #    elif isinstance(term, sp.Symbol):
-    #        return TypedSymbol(term.name, typeForSymbol[term.name])
-    #    else:
-    #        raise ValueError("Term has to be field access or symbol")
-    #
-    #fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
-    #allFields = fieldsRead.union(fieldsWritten)
-    #
-    #readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
-    #
-    #body = ast.Block(assignments)
-    #loopOrder = getOptimalLoopOrdering(allFields)
-    #code = makeLoopOverDomain(body, functionName, iterationSlice=iterationSlice,
-    #                          ghostLayers=ghostLayers, loopOrder=loopOrder)
-    #
-    #if splitGroups:
-    #    typedSplitGroups = [[typeSymbol(s) for s in splitGroup] for splitGroup in splitGroups]
-    #    splitInnerLoop(code, typedSplitGroups)
-    #
-    #basePointerInfo = []
-    #for i in range(len(loopOrder)):
-    #    basePointerInfo.append(['spatialInner%d' % i])
-    #basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields}
-    #
-    #resolveFieldAccesses(code, readOnlyFields, fieldToBasePointerInfo=basePointerInfos)
-    #moveConstantsBeforeLoop(code)
-    from pystencils.cpu import createKernel
-    code = createKernel(listOfEquations, functionName, typeForSymbol, splitGroups, iterationSlice, ghostLayers)
-    code = insertCasts(code)
-    code.compile = partial(makePythonFunction, code)
-    return code
-
-
-def createIndexedKernel(listOfEquations, indexFields, functionName="kernel", typeForSymbol=None,
-                        coordinateNames=('x', 'y', 'z')):
-    """
-    Similar to :func:`createKernel`, but here not all cells of a field are updated but only cells with
-    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
-
-    The coordinates are stored in a separated indexField, which is a one dimensional array with struct data type.
-    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
-    'coordinateNames' parameter. The struct can have also other fields that can be read and written in the kernel, for
-    example boundary parameters.
-
-    :param listOfEquations: list of update equations or AST nodes
-    :param indexFields: list of index fields, i.e. 1D fields with struct data type
-    :param typeForSymbol: see documentation of :func:`createKernel`
-    :param functionName: see documentation of :func:`createKernel`
-    :param coordinateNames: name of the coordinate fields in the struct data type
-    :return: abstract syntax tree
-    """
-    fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
-    allFields = fieldsRead.union(fieldsWritten)
-
-    for indexField in indexFields:
-        indexField.isIndexField = True
-        assert indexField.spatialDimensions == 1, "Index fields have to be 1D"
-
-    nonIndexFields = [f for f in allFields if f not in indexFields]
-    spatialCoordinates = {f.spatialDimensions for f in nonIndexFields}
-    assert len(spatialCoordinates) == 1, "Non-index fields do not have the same number of spatial coordinates"
-    spatialCoordinates = list(spatialCoordinates)[0]
-
-    def getCoordinateSymbolAssignment(name):
-        for indexField in indexFields:
-            assert isinstance(indexField.dtype, StructType), "Index fields have to have a struct datatype"
-            dataType = indexField.dtype
-            if dataType.hasElement(name):
-                rhs = indexField[0](name)
-                lhs = TypedSymbol(name, BasicType(dataType.getElementType(name)))
-                return SympyAssignment(lhs, rhs)
-        raise ValueError("Index %s not found in any of the passed index fields" % (name,))
-
-    coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]]
-    coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments]
-    assignments = coordinateSymbolAssignments + assignments
-
-    # make 1D loop over index fields
-    loopBody = Block([])
-    loopNode = LoopOverCoordinate(loopBody, coordinateToLoopOver=0, start=0, stop=indexFields[0].shape[0])
-
-    for assignment in assignments:
-        loopBody.append(assignment)
-
-    functionBody = Block([loopNode])
-    ast = KernelFunction(functionBody, allFields, functionName)
-
-    fixedCoordinateMapping = {f.name: coordinateTypedSymbols for f in nonIndexFields}
-    resolveFieldAccesses(ast, set(['indexField']), fieldToFixedCoordinates=fixedCoordinateMapping)
-    moveConstantsBeforeLoop(ast)
-
-    desympy_ast(ast)
-    insert_casts(ast)
-    ast.compile = partial(makePythonFunction, ast)
-
-    return ast
--- a/llvm/llvm.py
+++ b/llvm/llvm.py
-import llvmlite.ir as ir
-import functools
-
-from sympy.printing.printer import Printer
-from sympy import S
-# S is numbers?
-
-from pystencils.llvm.control_flow import Loop
-from pystencils.data_types import createType, to_llvm_type, getTypeOfExpression, collateTypes, \
-    createCompositeTypeFromString
-from sympy import Indexed
-from sympy.codegen.ast import Assignment
-
-
-def generateLLVM(ast_node, module=None, builder=None):
-    """
-    Prints the ast as llvm code
-    """
-    if module is None:
-        module = ir.Module()
-    if builder is None:
-        builder = ir.IRBuilder()
-    printer = LLVMPrinter(module, builder)
-    return printer._print(ast_node)  # TODO use doprint() instead???
-
-
-class LLVMPrinter(Printer):
-    """Convert expressions to LLVM IR"""
-
-    def __init__(self, module, builder, fn=None, *args, **kwargs):
-        self.func_arg_map = kwargs.pop("func_arg_map", {})
-        super(LLVMPrinter, self).__init__(*args, **kwargs)
-        self.fp_type = ir.DoubleType()
-        self.fp_pointer = self.fp_type.as_pointer()
-        self.integer = ir.IntType(64)
-        self.integer_pointer = self.integer.as_pointer()
-        self.void = ir.VoidType()
-        self.module = module
-        self.builder = builder
-        self.fn = fn
-        self.ext_fn = {}  # keep track of wrappers to external functions
-        self.tmp_var = {}
-
-    def _add_tmp_var(self, name, value):
-        self.tmp_var[name] = value
-
-    def _remove_tmp_var(self, name):
-        del self.tmp_var[name]
-
-    def _print_Number(self, n):
-        if getTypeOfExpression(n) == createType("int"):
-            return ir.Constant(self.integer, int(n))
-        elif getTypeOfExpression(n) == createType("double"):
-            return ir.Constant(self.fp_type, float(n))
-        else:
-            raise NotImplementedError("Numbers can only have int and double", n)
-
-    def _print_Float(self, expr):
-        return ir.Constant(self.fp_type, float(expr))
-
-    def _print_Integer(self, expr):
-        return ir.Constant(self.integer, int(expr))
-
-    def _print_int(self, i):
-        return ir.Constant(self.integer, i)
-
-    def _print_Symbol(self, s):
-        val = self.tmp_var.get(s)
-        if not val:
-            # look up parameter with name s
-            val = self.func_arg_map.get(s.name)
-        if not val:
-            raise LookupError("Symbol not found: %s" % s)
-        return val
-
-    def _print_Pow(self, expr):
-        base0 = self._print(expr.base)
-        if expr.exp == S.NegativeOne:
-            return self.builder.fdiv(ir.Constant(self.fp_type, 1.0), base0)
-        if expr.exp == S.Half:
-            fn = self.ext_fn.get("sqrt")
-            if not fn:
-                fn_type = ir.FunctionType(self.fp_type, [self.fp_type])
-                fn = ir.Function(self.module, fn_type, "sqrt")
-                self.ext_fn["sqrt"] = fn
-            return self.builder.call(fn, [base0], "sqrt")
-        if expr.exp == 2:
-            return self.builder.fmul(base0, base0)
-        elif expr.exp == 3:
-            return self.builder.fmul(self.builder.fmul(base0, base0), base0)
-
-        exp0 = self._print(expr.exp)
-        fn = self.ext_fn.get("pow")
-        if not fn:
-            fn_type = ir.FunctionType(self.fp_type, [self.fp_type, self.fp_type])
-            fn = ir.Function(self.module, fn_type, "pow")
-            self.ext_fn["pow"] = fn
-        return self.builder.call(fn, [base0, exp0], "pow")
-
-    def _print_Mul(self, expr):
-        nodes = [self._print(a) for a in expr.args]
-        e = nodes[0]
-        if getTypeOfExpression(expr) == createType('double'):
-            mul = self.builder.fmul
-        else:  # int TODO unsigned/signed
-            mul = self.builder.mul
-        for node in nodes[1:]:
-            e = mul(e, node)
-        return e
-
-    def _print_Add(self, expr):
-        nodes = [self._print(a) for a in expr.args]
-        e = nodes[0]
-        if getTypeOfExpression(expr) == createType('double'):
-            add = self.builder.fadd
-        else:  # int TODO unsigned/signed
-            add = self.builder.add
-        for node in nodes[1:]:
-            e = add(e, node)
-        return e
-
-    def _print_Or(self, expr):
-        nodes = [self._print(a) for a in expr.args]
-        e = nodes[0]
-        for node in nodes[1:]:
-            e = self.builder.or_(e, node)
-        return e
-
-    def _print_And(self, expr):
-        nodes = [self._print(a) for a in expr.args]
-        e = nodes[0]
-        for node in nodes[1:]:
-            e = self.builder.and_(e, node)
-        return e
-
-    def _print_StrictLessThan(self, expr):
-        return self._comparison('<', expr)
-
-    def _print_LessThan(self, expr):
-        return self._comparison('<=', expr)
-
-    def _print_StrictGreaterThan(self, expr):
-        return self._comparison('>', expr)
-
-    def _print_GreaterThan(self, expr):
-        return self._comparison('>=', expr)
-
-    def _print_Unequality(self, expr):
-        return self._comparison('!=', expr)
-
-    def _print_Equality(self, expr):
-        return self._comparison('==', expr)
-
-    def _comparison(self, cmpop, expr):
-        if collateTypes([getTypeOfExpression(arg) for arg in expr.args]) == createType('double'):
-            comparison = self.builder.fcmp_unordered
-        else:
-            comparison = self.builder.icmp_signed
-        return comparison(cmpop, self._print(expr.lhs), self._print(expr.rhs))
-
-    def _print_KernelFunction(self, function):
-        # KernelFunction does not posses a return type
-        return_type = self.void
-        parameter_type = []
-        for parameter in function.parameters:
-            parameter_type.append(to_llvm_type(parameter.dtype))
-        func_type = ir.FunctionType(return_type, tuple(parameter_type))
-        name = function.functionName
-        fn = ir.Function(self.module, func_type, name)
-        self.ext_fn[name] = fn
-
-        # set proper names to arguments
-        for i, arg in enumerate(fn.args):
-            arg.name = function.parameters[i].name
-            self.func_arg_map[function.parameters[i].name] = arg
-
-        # func.attributes.add("inlinehint")
-        # func.attributes.add("argmemonly")
-        block = fn.append_basic_block(name="entry")
-        self.builder = ir.IRBuilder(block)  # TODO use goto_block instead
-        self._print(function.body)
-        self.builder.ret_void()
-        self.fn = fn
-        return fn
-
-    def _print_Block(self, block):
-        for node in block.args:
-            self._print(node)
-
-    def _print_LoopOverCoordinate(self, loop):
-        with Loop(self.builder, self._print(loop.start), self._print(loop.stop), self._print(loop.step),
-                  loop.loopCounterName, loop.loopCounterSymbol.name) as i:
-            self._add_tmp_var(loop.loopCounterSymbol, i)
-            self._print(loop.body)
-            self._remove_tmp_var(loop.loopCounterSymbol)
-
-    def _print_SympyAssignment(self, assignment):
-        expr = self._print(assignment.rhs)
-        lhs = assignment.lhs
-        if isinstance(lhs, Indexed):
-            ptr = self._print(lhs.base.label)
-            index = self._print(lhs.args[1])
-            gep = self.builder.gep(ptr, [index])
-            return self.builder.store(expr, gep)
-        self.func_arg_map[assignment.lhs.name] = expr
-        return expr
-
-    def _print_castFunc(self, conversion):
-        node = self._print(conversion.args[0])
-        to_dtype = getTypeOfExpression(conversion)
-        from_dtype = getTypeOfExpression(conversion.args[0])
-        # (From, to)
-        decision = {
-            (createCompositeTypeFromString("int"), createCompositeTypeFromString("double")): functools.partial(
-                self.builder.sitofp, node, self.fp_type),
-            (createCompositeTypeFromString("double"), createCompositeTypeFromString("int")): functools.partial(
-                self.builder.fptosi, node, self.integer),
-            (createCompositeTypeFromString("double *"), createCompositeTypeFromString("int")): functools.partial(
-                self.builder.ptrtoint, node, self.integer),
-            (createCompositeTypeFromString("int"), createCompositeTypeFromString("double *")): functools.partial(self.builder.inttoptr, node,
-                                                                           self.fp_pointer),
-            (createCompositeTypeFromString("double * restrict"), createCompositeTypeFromString("int")): functools.partial(
-                self.builder.ptrtoint, node,
-                self.integer),
-            (createCompositeTypeFromString("int"),
-             createCompositeTypeFromString("double * restrict")): functools.partial(self.builder.inttoptr, node,
-                                                                                    self.fp_pointer),
-            (createCompositeTypeFromString("double * restrict const"),
-             createCompositeTypeFromString("int")): functools.partial(self.builder.ptrtoint, node,
-                                                                      self.integer),
-            (createCompositeTypeFromString("int"),
-             createCompositeTypeFromString("double * restrict const")): functools.partial(self.builder.inttoptr, node,
-                                                                                          self.fp_pointer),
-        }
-        # TODO float, TEST: const, restrict
-        # TODO bitcast, addrspacecast
-        # TODO unsigned/signed fills
-        # print([x for x in decision.keys()])
-        # print("Types:")
-        # print([(type(x), type(y)) for (x, y) in decision.keys()])
-        # print("Cast:")
-        # print((from_dtype, to_dtype))
-        return decision[(from_dtype, to_dtype)]()
-
-    def _print_pointerArithmeticFunc(self, pointer):
-        ptr = self._print(pointer.args[0])
-        index = self._print(pointer.args[1])
-        return self.builder.gep(ptr, [index])
-
-    def _print_Indexed(self, indexed):
-        ptr = self._print(indexed.base.label)
-        index = self._print(indexed.args[1])
-        gep = self.builder.gep(ptr, [index])
-        return self.builder.load(gep, name=indexed.base.label.name)
-
-    def _print_Piecewise(self, piece):
-        if not piece.args[-1].cond:
-            # We need the last conditional to be a True, otherwise the resulting
-            # function may not return a result.
-            raise ValueError("All Piecewise expressions must contain an "
-                             "(expr, True) statement to be used as a default "
-                             "condition. Without one, the generated "
-                             "expression may not evaluate to anything under "
-                             "some condition.")
-        if piece.has(Assignment):
-            raise NotImplementedError('The llvm-backend does not support assignments'
-                                      'in the Piecewise function. It is questionable'
-                                      'whether to implement it. So far there is no'
-                                      'use-case to test it.')
-        else:
-            phiData = []
-            after_block = self.builder.append_basic_block()
-            for (expr, condition) in piece.args:
-                if condition == True:  # Don't use 'is' use '=='!
-                    phiData.append((self._print(expr), self.builder.block))
-                    self.builder.branch(after_block)
-                    self.builder.position_at_end(after_block)
-                else:
-                    cond = self._print(condition)
-                    trueBlock = self.builder.append_basic_block()
-                    falseBlock = self.builder.append_basic_block()
-                    self.builder.cbranch(cond, trueBlock, falseBlock)
-                    self.builder.position_at_end(trueBlock)
-                    phiData.append((self._print(expr), trueBlock))
-                    self.builder.branch(after_block)
-                    self.builder.position_at_end(falseBlock)
-
-            phi = self.builder.phi(to_llvm_type(getTypeOfExpression(piece)))
-            for (val, block) in phiData:
-                phi.add_incoming(val, block)
-            return phi
-
-    # Should have a list of math library functions to validate this.
-    # TODO function calls to libs
-    def _print_Function(self, expr):
-        name = expr.func.__name__
-        e0 = self._print(expr.args[0])
-        fn = self.ext_fn.get(name)
-        if not fn:
-            fn_type = ir.FunctionType(self.fp_type, [self.fp_type])
-            fn = ir.Function(self.module, fn_type, name)
-            self.ext_fn[name] = fn
-        return self.builder.call(fn, [e0], name)
-
-    def emptyPrinter(self, expr):
-        try:
-            import inspect
-            mro = inspect.getmro(expr)
-        except AttributeError:
-            mro = "None"
-        raise TypeError("Unsupported type for LLVM JIT conversion: Expression:\"%s\", Type:\"%s\", MRO:%s"
-                        % (expr, type(expr), mro))
--- a/llvm/llvmjit.py
+++ b/llvm/llvmjit.py
-import llvmlite.ir as ir
-import llvmlite.binding as llvm
-import numpy as np
-import ctypes as ct
-import subprocess
-import shutil
-
-from pystencils.data_types import createCompositeTypeFromString
-from ..data_types import toCtypes, ctypes_from_llvm
-from .llvm import generateLLVM
-from ..cpu.cpujit import buildCTypeArgumentList, makePythonFunctionIncompleteParams
-
-
-def generate_and_jit(ast):
-    gen = generateLLVM(ast)
-    if isinstance(gen, ir.Module):
-        return compileLLVM(gen)
-    else:
-        return compileLLVM(gen.module)
-
-
-def makePythonFunction(ast, argumentDict={}, func=None):
-    if func is None:
-        jit = generate_and_jit(ast)
-        func = jit.get_function_ptr(ast.functionName)
-    try:
-        args = buildCTypeArgumentList(ast.parameters, argumentDict)
-    except KeyError:
-        # not all parameters specified yet
-        return makePythonFunctionIncompleteParams(ast, argumentDict, func)
-    return lambda: func(*args)
-
-
-def compileLLVM(module):
-    jit = Jit()
-    jit.parse(module)
-    jit.optimize()
-    jit.compile()
-    return jit
-
-
-class Jit(object):
-    def __init__(self):
-        llvm.initialize()
-        llvm.initialize_all_targets()
-        llvm.initialize_native_target()
-        llvm.initialize_native_asmprinter()
-
-        self.module = None
-        self._llvmmod = llvm.parse_assembly("")
-        self.target = llvm.Target.from_default_triple()
-        self.cpu = llvm.get_host_cpu_name()
-        self.cpu_features = llvm.get_host_cpu_features()
-        self.target_machine = self.target.create_target_machine(cpu=self.cpu, features=self.cpu_features.flatten(), opt=2)
-        llvm.check_jit_execution()
-        self.ee = llvm.create_mcjit_compiler(self.llvmmod, self.target_machine)
-        self.ee.finalize_object()
-        self.fptr = None
-
-    @property
-    def llvmmod(self):
-        return self._llvmmod
-
-    @llvmmod.setter
-    def llvmmod(self, mod):
-        self.ee.remove_module(self.llvmmod)
-        self.ee.add_module(mod)
-        self.ee.finalize_object()
-        self.compile()
-        self._llvmmod = mod
-
-    def parse(self, module):
-        self.module = module
-        llvmmod = llvm.parse_assembly(str(module))
-        llvmmod.verify()
-        llvmmod.triple = self.target.triple
-        llvmmod.name = 'module'
-        self.llvmmod = llvmmod
-
-    def write_ll(self, file):
-        with open(file, 'w') as f:
-            f.write(str(self.llvmmod))
-
-    def write_assembly(self, file):
-        with open(file, 'w') as f:
-            f.write(self.target_machine.emit_assembly(self.llvmmod))
-
-    def write_object_file(self, file):
-        with open(file, 'wb') as f:
-            f.write(self.target_machine.emit_object(self.llvmmod))
-
-    def optimize(self):
-        pmb = llvm.create_pass_manager_builder()
-        pmb.opt_level = 2
-        pmb.disable_unit_at_a_time = False
-        pmb.loop_vectorize = True
-        pmb.slp_vectorize = True
-        # TODO possible to pass for functions
-        pm = llvm.create_module_pass_manager()
-        pm.add_instruction_combining_pass()
-        pm.add_function_attrs_pass()
-        pm.add_constant_merge_pass()
-        pm.add_licm_pass()
-        pmb.populate(pm)
-        pm.run(self.llvmmod)
-
-    def optimize_polly(self, opt):
-        if shutil.which(opt) is None:
-            print('Path to the executable is wrong')
-            return
-        canonicalize = subprocess.Popen([opt, '-polly-canonicalize'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
-
-        analyze = subprocess.Popen(
-            [opt, '-polly-codegen', '-polly-vectorizer=polly', '-polly-parallel', '-polly-process-unprofitable', '-f'],
-            stdin=canonicalize.stdout, stdout=subprocess.PIPE)
-
-        canonicalize.communicate(input=self.llvmmod.as_bitcode())
-
-        optimize = subprocess.Popen([opt, '-O3', '-f'], stdin=analyze.stdout, stdout=subprocess.PIPE)
-        opts, _ = optimize.communicate()
-        llvmmod = llvm.parse_bitcode(opts)
-        llvmmod.verify()
-        self.llvmmod = llvmmod
-
-    def compile(self):
-        fptr = {}
-        for function in self.module.functions:
-            if not function.is_declaration:
-                return_type = None
-                if function.ftype.return_type != ir.VoidType():
-                    return_type = toCtypes(createCompositeTypeFromString(str(function.ftype.return_type)))
-                args = [ctypes_from_llvm(arg) for arg in function.ftype.args]
-                function_address = self.ee.get_function_address(function.name)
-                fptr[function.name] = ct.CFUNCTYPE(return_type, *args)(function_address)
-        self.fptr = fptr
-
-    def __call__(self, function, *args, **kwargs):
-        target_function = next(f for f in self.module.functions if f.name == function)
-        arg_types = [ctypes_from_llvm(arg.type) for arg in target_function.args]
-
-        transformed_args = []
-        for i, arg in enumerate(args):
-            if isinstance(arg, np.ndarray):
-                transformed_args.append(arg.ctypes.data_as(arg_types[i]))
-            else:
-                transformed_args.append(arg)
-
-        self.fptr[function](*transformed_args)
-
-    def print_functions(self):
-        for f in self.module.functions:
-            print(f.ftype.return_type, f.name, f.args)
-
-    def get_function_ptr(self, name):
-        fptr = self.fptr[name]
-        fptr.jit = self
-        return fptr
-
-
-
--- a/parallel/blockiteration.py
+++ b/parallel/blockiteration.py
-import numpy as np
-import waLBerla as wlb
-from pystencils.slicing import normalizeSlice
-
-
-class BlockIterationInfo:
-    def __init__(self, block, offset, localSlice):
-        self._block = block
-        self._offset = offset
-        self._localSlice = localSlice
-
-    @property
-    def block(self):
-        return self._block
-
-    @property
-    def offset(self):
-        return self._offset
-
-    @property
-    def shape(self):
-        return tuple(s.stop - s.start for s in self._localSlice)
-
-    @property
-    def localSlice(self):
-        """Slice object of intersection between current block and iteration interval in local coordinates"""
-        return self._localSlice
-
-    @property
-    def midpointArrays(self):
-        """Global coordinate meshgrid of cell midpoints which are shifted by 0.5 compared to cell indices"""
-        meshGridParams = [offset + 0.5 + np.arange(width, dtype=float)
-                          for offset, width in zip(self.offset, self.shape)]
-        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
-
-    @property
-    def cellIndexArrays(self):
-        """Global coordinate meshgrid of cell coordinates. Cell indices start at 0 at the first inner cell,
-        ghost layers have negative indices"""
-        meshGridParams = [offset + np.arange(width, dtype=np.int32)
-                          for offset, width in zip(self.offset, self.shape)]
-        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
-
-
-def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, sliceNormalizationGhostLayers=1):
-    """
-    Iterates of all blocks that have an intersection with the given slice object.
-    For these blocks a BlockIterationInfo object is yielded
-    
-    :param blocks: waLBerla block data structure
-    :param sliceObj: a slice (i.e. rectangular subregion), can be created with makeSlice[]
-    :param innerGhostLayers: how many ghost layers are included in the local slice and the optional index arrays
-    :param sliceNormalizationGhostLayers: slices can have relative coordinates e.g. makeSlice[0.2, :, :]
-                                          when computing absolute values, the domain size is needed. This parameter 
-                                          specifies how many ghost layers are taken into account for this operation.
-
-    Example: assume no slice is given, then sliceNormalizationGhostLayers effectively sets how much ghost layers
-    at the border of the domain are included. The innerGhostLayers parameter specifies how many inner ghost layers are
-    included
-    """
-    if sliceObj is None:
-        sliceObj = [slice(None, None, None)] * 3
-
-    domainCellBB = blocks.getDomainCellBB()
-    domainExtent = [s + 2 * sliceNormalizationGhostLayers for s in domainCellBB.size]
-    sliceObj = normalizeSlice(sliceObj, domainExtent)
-    targetCellBB = wlb.CellInterval.fromSlice(sliceObj)
-    targetCellBB.shift(*[a - sliceNormalizationGhostLayers for a in domainCellBB.min])
-
-    for block in blocks:
-        intersection = blocks.getBlockCellBB(block).getExpanded(innerGhostLayers)
-        intersection.intersect(targetCellBB)
-        if intersection.empty():
-            continue
-
-        localTargetBB = blocks.transformGlobalToLocal(block, intersection)
-        localTargetBB.shift(innerGhostLayers, innerGhostLayers, innerGhostLayers)
-        localSlice = localTargetBB.toSlice(False)
-        yield BlockIterationInfo(block, intersection.min, localSlice)
-
--- a/parallel/datahandling.py
+++ b/parallel/datahandling.py
-import numpy as np
-from pystencils import Field, makeSlice
-from pystencils.datahandling import DataHandling
-from pystencils.parallel.blockiteration import slicedBlockIteration
-from pystencils.utils import DotDict
-import waLBerla as wlb
-
-
-class ParallelDataHandling(DataHandling):
-    GPU_DATA_PREFIX = "gpu_"
-
-    def __init__(self, blocks, defaultGhostLayers=1, defaultLayout='SoA', dim=3):
-        """
-        Creates data handling based on waLBerla block storage
-
-        :param blocks: waLBerla block storage
-        :param defaultGhostLayers: nr of ghost layers used if not specified in add() method
-        :param defaultLayout: layout used if no layout is given to add() method
-        :param dim: dimension of scenario,
-                    waLBerla always uses three dimensions, so if dim=2 the extend of the
-                    z coordinate of blocks has to be 1
-        """
-        super(ParallelDataHandling, self).__init__()
-        assert dim in (2, 3)
-        self.blocks = blocks
-        self.defaultGhostLayers = defaultGhostLayers
-        self.defaultLayout = defaultLayout
-        self._fields = DotDict()  # maps name to symbolic pystencils field
-        self.dataNames = set()
-        self._dim = dim
-        self._fieldInformation = {}
-        self._cpuGpuPairs = []
-        self._customDataTransferFunctions = {}
-        if self._dim == 2:
-            assert self.blocks.getDomainCellBB().size[2] == 1
-
-    @property
-    def dim(self):
-        return self._dim
-
-    @property
-    def fields(self):
-        return self._fields
-
-    def addCustomData(self, name, cpuCreationFunction,
-                      gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None):
-        self.blocks.addBlockData(name, cpuCreationFunction)
-        if gpuCreationFunction:
-            self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpuCreationFunction)
-            if cpuToGpuTransferFunc is None or gpuToCpuTransferFunc is None:
-                raise ValueError("For GPU data, both transfer functions have to be specified")
-            self._customDataTransferFunctions[name] = (cpuToGpuTransferFunc, gpuToCpuTransferFunc)
-
-    def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False):
-        if ghostLayers is None:
-            ghostLayers = self.defaultGhostLayers
-        if layout is None:
-            layout = self.defaultLayout
-        if latexName is None:
-            latexName = name
-        if len(self.blocks) == 0:
-            raise ValueError("Data handling expects that each process has at least one block")
-        if hasattr(dtype, 'type'):
-            dtype = dtype.type
-        if name in self.blocks[0] or self.GPU_DATA_PREFIX + name in self.blocks[0]:
-            raise ValueError("Data with this name has already been added")
-
-        self._fieldInformation[name] = {'ghostLayers': ghostLayers,
-                                        'fSize': fSize,
-                                        'layout': layout,
-                                        'dtype': dtype}
-
-        layoutMap = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
-                     'SoA': wlb.field.Layout.fzyx,  'AoS': wlb.field.Layout.zyxf}
-
-        if cpu:
-            wlb.field.addToStorage(self.blocks, name, dtype, fSize=fSize, layout=layoutMap[layout],
-                                   ghostLayers=ghostLayers)
-        if gpu:
-            wlb.cuda.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX+name, dtype, fSize=fSize,
-                                          usePitchedMem=False, ghostLayers=ghostLayers, layout=layoutMap[layout])
-
-        if cpu and gpu:
-            self._cpuGpuPairs.append((name, self.GPU_DATA_PREFIX + name))
-
-        blockBB = self.blocks.getBlockCellBB(self.blocks[0])
-        shape = tuple(s + 2 * ghostLayers for s in blockBB.size)
-        indexDimensions = 1 if fSize > 1 else 0
-        if indexDimensions == 1:
-            shape += (fSize, )
-
-        assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists"
-        self.fields[name] = Field.createFixedSize(latexName, shape, indexDimensions, dtype, layout)
-
-    def hasData(self, name):
-        return name in self._fields
-
-    def addArrayLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False):
-        self.addArray(name, latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField])
-
-    def swap(self, name1, name2, gpu=False):
-        if gpu:
-            name1 = self.GPU_DATA_PREFIX + name1
-            name2 = self.GPU_DATA_PREFIX + name2
-        for block in self.blocks:
-            block[name1].swapDataPointers(block[name2])
-
-    def accessArray(self, name, sliceObj=None, innerGhostLayers='all', outerGhostLayers='all'):
-        fieldInfo = self._fieldInformation[name]
-        with self.accessWrapper(name):
-            if innerGhostLayers is 'all':
-                innerGhostLayers = fieldInfo['ghostLayers']
-            if outerGhostLayers is 'all':
-                outerGhostLayers = fieldInfo['ghostLayers']
-
-            for iterInfo in slicedBlockIteration(self.blocks, sliceObj, innerGhostLayers, outerGhostLayers):
-                arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=innerGhostLayers)[iterInfo.localSlice]
-                if self.fields[name].indexDimensions == 0:
-                    arr = arr[..., 0]
-                if self.dim == 2:
-                    arr = arr[:, :, 0]
-                yield arr, iterInfo
-
-    def accessCustomData(self, name):
-        with self.accessWrapper(name):
-            for block in self.blocks:
-                data = block[name]
-                cellBB = self.blocks.getBlockCellBB(block)
-                min = cellBB.min[:self.dim]
-                max = tuple(e + 1 for e in cellBB.max[:self.dim])
-                yield data, (min, max)
-
-    def gatherArray(self, name, sliceObj=None, allGather=False):
-        with self.accessWrapper(name):
-            if sliceObj is None:
-                sliceObj = makeSlice[:, :, :]
-            for array in wlb.field.gatherGenerator(self.blocks, name, sliceObj, allGather):
-                if self.fields[name].indexDimensions == 0:
-                    array = array[..., 0]
-                if self.dim == 2:
-                    array = array[:, :, 0]
-                yield array
-
-    def toCpu(self, name):
-        if name in self._customDataTransferFunctions:
-            transferFunc = self._customDataTransferFunctions[name][1]
-            for block in self.blocks:
-                transferFunc(block[self.GPU_DATA_PREFIX + name], block[name])
-        else:
-            wlb.cuda.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
-
-    def toGpu(self, name):
-        if name in self._customDataTransferFunctions:
-            transferFunc = self._customDataTransferFunctions[name][0]
-            for block in self.blocks:
-                transferFunc(block[self.GPU_DATA_PREFIX + name], block[name])
-        else:
-            wlb.cuda.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
-
-    def allToCpu(self):
-        for cpuName, gpuName in self._cpuGpuPairs:
-            wlb.cuda.copyFieldToCpu(self.blocks, gpuName, cpuName)
-        for name in self._customDataTransferFunctions.keys():
-            self.toCpu(name)
-
-    def allToGpu(self):
-        for cpuName, gpuName in self._cpuGpuPairs:
-            wlb.cuda.copyFieldToGpu(self.blocks, gpuName, cpuName)
-        for name in self._customDataTransferFunctions.keys():
-            self.toGpu(name)
-
-    def synchronizationFunctionCPU(self, names, stencil=None, buffered=True, **kwargs):
-        return self._synchronizationFunction(names, stencil, buffered, 'cpu')
-
-    def synchronizationFunctionGPU(self, names, stencil=None, buffered=True, **kwargs):
-        return self._synchronizationFunction(names, stencil, buffered, 'gpu')
-
-    def _synchronizationFunction(self, names, stencil, buffered, target):
-        if stencil is None:
-            stencil = 'D3Q27' if self.dim == 3 else 'D2Q9'
-
-        if not hasattr(names, '__len__') or type(names) is str:
-            names = [names]
-
-        createScheme = wlb.createUniformBufferedScheme if buffered else wlb.createUniformDirectScheme
-        if target == 'cpu':
-            createPacking = wlb.field.createPackInfo if buffered else wlb.field.createMPIDatatypeInfo
-        elif target == 'gpu':
-            createPacking = wlb.cuda.createPackInfo if buffered else wlb.cuda.createMPIDatatypeInfo
-            names = [self.GPU_DATA_PREFIX + name for name in names]
-
-        syncFunction = createScheme(self.blocks, stencil)
-        for name in names:
-            syncFunction.addDataToCommunicate(createPacking(self.blocks, name))
-
-        return syncFunction
--- a/plot2d.py
+++ b/plot2d.py
-from matplotlib.pyplot import *
-
-
-def vectorField(field, step=2, **kwargs):
-    """
-    Plot given vector field as quiver (arrow) plot.
-
-    :param field: numpy array with 3 dimensions, first two are spatial x,y coordinate, the last
-                  coordinate should have shape 2 and stores the 2 velocity components
-    :param step: plots only every steps's cell
-    :param kwargs: keyword arguments passed to :func:`matplotlib.pyplot.quiver`
-    """
-    veln = field.swapaxes(0, 1)
-    res = quiver(veln[::step, ::step, 0], veln[::step, ::step, 1], **kwargs)
-    axis('equal')
-    return res
-
-
-def vectorFieldMagnitude(field, **kwargs):
-    """
-    Plots the magnitude of a vector field as colormap
-    :param field: numpy array with 3 dimensions, first two are spatial x,y coordinate, the last
-                  coordinate should have shape 2 and stores the 2 velocity components
-    :param kwargs: keyword arguments passed to :func:`matplotlib.pyplot.imshow`
-    """
-    from numpy.linalg import norm
-    norm = norm(field, axis=2, ord=2)
-    if hasattr(field, 'mask'):
-        norm = np.ma.masked_array(norm, mask=field.mask[:, :, 0])
-    return scalarField(norm, **kwargs)
-
-
-def scalarField(field, **kwargs):
-    """
-    Plots field values as colormap
-
-    :param field: two dimensional numpy array
-    :param kwargs: keyword arguments passed to :func:`matplotlib.pyplot.imshow`
-    """
-    import numpy as np
-    field = np.swapaxes(field, 0, 1)
-    res = imshow(field, origin='lower', **kwargs)
-    axis('equal')
-    return res
-
-
-def scalarFieldContour(field, **kwargs):
-    field = np.swapaxes(field, 0, 1)
-    res = contour(field, **kwargs)
-    axis('equal')
-    return res
-
-
-def multipleScalarFields(field, **kwargs):
-    subPlots = field.shape[-1]
-    for i in range(subPlots):
-        subplot(1, subPlots, i + 1)
-        title(str(i))
-        scalarField(field[..., i])
-        colorbar()
-
-
-# ------------------------------------------- Animations ---------------------------------------------------------------
-
-
-def vectorFieldAnimation(runFunction, step=2, rescale=True, plotSetupFunction=lambda: None,
-                         plotUpdateFunction=lambda: None, interval=30, frames=180, **kwargs):
-    import matplotlib.animation as animation
-    from numpy.linalg import norm
-
-    fig = gcf()
-    im = None
-    field = runFunction()
-    if rescale:
-        maxNorm = np.max(norm(field, axis=2, ord=2))
-        field /= maxNorm
-        if 'scale' not in kwargs:
-            kwargs['scale'] = 1.0
-
-    quiverPlot = vectorField(field, step=step, **kwargs)
-    plotSetupFunction()
-
-    def updatefig(*args):
-        f = runFunction()
-        f = np.swapaxes(f, 0, 1)
-        if rescale:
-            maxNorm = np.max(norm(f, axis=2, ord=2))
-            f /= maxNorm
-        u, v = f[::step, ::step, 0], f[::step, ::step, 1]
-        quiverPlot.set_UVC(u, v)
-        plotUpdateFunction()
-        return im,
-
-    return animation.FuncAnimation(fig, updatefig, interval=interval, frames=frames)
-
-
-def vectorFieldMagnitudeAnimation(runFunction, plotSetupFunction=lambda: None,
-                                  plotUpdateFunction=lambda: None, interval=30, frames=180, **kwargs):
-    import matplotlib.animation as animation
-    from numpy.linalg import norm
-
-    fig = gcf()
-    im = None
-    field = runFunction()
-    im = vectorFieldMagnitude(field, **kwargs)
-    plotSetupFunction()
-
-    def updatefig(*args):
-        f = runFunction()
-        normed = norm(f, axis=2, ord=2)
-        if hasattr(f, 'mask'):
-            normed = np.ma.masked_array(normed, mask=f.mask[:, :, 0])
-        normed = np.swapaxes(normed, 0, 1)
-        im.set_array(normed)
-        plotUpdateFunction()
-        return im,
-
-    return animation.FuncAnimation(fig, updatefig, interval=interval, frames=frames)
\ No newline at end of file
--- a/pyproject.toml
+++ b/pyproject.toml
+[project]
+name = "pystencils"
+description = "Speeding up stencil computations on CPUs and GPUs"
+dynamic = ["version"]
+readme = "README.md"
+authors = [
+    { name = "Martin Bauer" },
+    { name = "Jan Hönig " },
+    { name = "Markus Holzer" },
+    { name = "Frederik Hennig" },
+    { email = "cs10-codegen@fau.de" },
+]
+license = { file = "COPYING.txt" }
+requires-python = ">=3.10"
+dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "fasteners"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Framework :: Jupyter",
+    "Topic :: Software Development :: Code Generators",
+    "Topic :: Scientific/Engineering :: Physics",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+]
+
+[project.urls]
+"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/pystencils/-/issues"
+"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/pystencils/"
+"Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"
+
+[project.optional-dependencies]
+gpu = ['cupy']
+alltrafos = ['islpy', 'py-cpuinfo']
+bench_db = ['blitzdb', 'pymongo', 'pandas']
+interactive = [
+    'matplotlib',
+    'ipy_table',
+    'imageio',
+    'jupyter',
+    'pyevtk',
+    'rich',
+    'graphviz',
+]
+use_cython = [
+    'Cython'
+]
+doc = [
+    'sphinx',
+    'sphinx_rtd_theme',
+    'nbsphinx',
+    'sphinxcontrib-bibtex',
+    'sphinx_autodoc_typehints',
+    'pandoc',
+]
+tests = [
+    'pytest',
+    'pytest-cov',
+    'pytest-html',
+    'ansi2html',
+    'pytest-xdist',
+    'flake8',
+    'nbformat',
+    'nbconvert',
+    'ipython',
+    'matplotlib',
+    'py-cpuinfo',
+    'randomgen>=1.18',
+]
+
+[build-system]
+requires = [
+    "setuptools>=61",
+    "versioneer[toml]>=0.29",
+    # 'Cython'
+]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+pystencils = [
+    "include/*.h",
+    "boundaries/createindexlistcython.pyx"
+]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["pystencils", "pystencils.*"]
+namespaces = false
+
+[tool.versioneer]
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+VCS = "git"
+style = "pep440"
+versionfile_source = "src/pystencils/_version.py"
+versionfile_build = "pystencils/_version.py"
+tag_prefix = "release/"
+parentdir_prefix = "pystencils-"
--- a/pytest.ini
+++ b/pytest.ini
+[pytest]
+testpaths = src tests doc/notebooks
+pythonpath = src
+python_files = test_*.py *_test.py scenario_*.py
+norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
+addopts = --doctest-modules --durations=20  --cov-config pytest.ini
+markers =
+       longrun: tests only run at night since they have large execution time
+       notebook: mark for notebooks
+# these warnings all come from third party libraries.
+filterwarnings =
+       ignore:an integer is required:DeprecationWarning
+       ignore:\s*load will be removed, use:PendingDeprecationWarning
+       ignore:the imp module is deprecated in favour of importlib:DeprecationWarning
+       ignore:.*is a deprecated alias for the builtin `bool`:DeprecationWarning
+       ignore:'contextfilter' is renamed to 'pass_context':DeprecationWarning
+       ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
+       ignore:Animation was deleted without rendering anything:UserWarning
+
+[run]
+branch = True
+source = src/pystencils
+         tests
+
+omit = doc/*
+       tests/*
+       setup.py
+       quicktest.py
+       conftest.py
+       versioneer.py
+       src/pystencils/jupytersetup.py
+       src/pystencils/cpu/msvc_detection.py
+       src/pystencils/sympy_gmpy_bug_workaround.py
+       src/pystencils/cache.py
+       src/pystencils/pacxx/benchmark.py
+       src/pystencils/_version.py
+       venv/
+
+[report]
+exclude_lines =
+       # Have to re-enable the standard pragma
+       pragma: no cover
+
+       def __repr__
+       def _repr_html_
+
+       # Don't complain if tests don't hit defensive assertion code:
+       raise AssertionError
+       raise NotImplementedError
+       NotImplementedError()
+       #raise ValueError
+
+       # Don't complain if non-runnable code isn't run:
+       if 0:
+       if False:
+       if __name__ == .__main__.:
+
+skip_covered = True
+fail_under = 85
+
+[html]
+directory = coverage_report
--- a/qtgui.py
+++ b/qtgui.py
-import sys
-from PyQt5.QtWidgets import QWidget, QApplication, QTreeWidget, QTreeWidgetItem, QHBoxLayout
-from pystencils.astnodes import Block, LoopOverCoordinate, KernelFunction
-
-
-def debugGUI(ast):
-    app = QApplication.instance()
-    if app is None:
-        app = QApplication(sys.argv)
-    else:
-        print('QApplication instance already exists: %s' % str(app))
-    ex = DebugTree()
-    ex.insert_ast(ast)
-    app.exec_()
-
-
-class DebugTree(QWidget):
-    def __init__(self):
-        super().__init__()
-        self.initUI()
-
-    def initUI(self):
-        self.tree = QTreeWidget(self)
-        self.tree.setColumnCount(1)
-        self.tree.setHeaderLabel('repr')
-
-        hbox = QHBoxLayout()
-        hbox.stretch(1)
-        hbox.addWidget(self.tree)
-
-        self.setWindowTitle('Debug')
-        self.setLayout(hbox)
-        self.show()
-
-    def insert_ast(self, node, parent=None):
-        if parent is None:
-            parent = self.tree
-        if isinstance(node, Block):  # Blocks are represented with the tree structure
-            item = parent
-        else:
-            item = QTreeWidgetItem(parent)
-            item.setText(0, repr(node))
-
-        if node.func in [LoopOverCoordinate, KernelFunction]:
-            self.tree.expandItem(item)
-
-        for child in node.args:
-            self.insert_ast(child, item)
--- a/quicktest.py
+++ b/quicktest.py
+#!/usr/bin/env python3
+
+from contextlib import redirect_stdout
+import io
+from tests.test_quicktests import (
+    test_basic_kernel,
+    test_basic_blocking_staggered,
+    test_basic_vectorization,
+)
+
+quick_tests = [
+    test_basic_kernel,
+    test_basic_blocking_staggered,
+    test_basic_vectorization,
+]
+
+if __name__ == "__main__":
+    print("Running pystencils quicktests")
+    for qt in quick_tests:
+        print(f"   -> {qt.__name__}")
+        with redirect_stdout(io.StringIO()):
+            qt()
--- a/release.sh
+++ b/release.sh
+#!/bin/bash
+
+echo "Existing versions"
+git tag -l | grep release
+
+echo "Enter the next version"
+read new_version
+
+git tag -s release/${new_version}
+git push origin master release/${new_version}
+rm -rf dist
+python setup.py sdist
+twine upload dist/*
--- a/runhelper/db.py
+++ b/runhelper/db.py
-import time
-import socket
-import blitzdb
-from pystencils.cpu.cpujit import getCompilerConfig
-
-
-def removeConstantColumns(df):
-    import pandas as pd
-    remainingDf = df.loc[:, df.apply(pd.Series.nunique) > 1]
-    constants = df.loc[:, df.apply(pd.Series.nunique) <= 1].iloc[0]
-    return remainingDf, constants
-
-
-def removeColumnsByPrefix(df, prefixes, inplace=False):
-    if not inplace:
-        df = df.copy()
-
-    for columnName in df.columns:
-        for prefix in prefixes:
-            if columnName.startswith(prefix):
-                del df[columnName]
-    return df
-
-
-def removePrefixInColumnName(df, inplace=False):
-    if not inplace:
-        df = df.copy()
-
-    newColumnNames = []
-    for columnName in df.columns:
-        if '.' in columnName:
-            newColumnNames.append(columnName[columnName.index('.') + 1:])
-        else:
-            newColumnNames.append(columnName)
-    df.columns = newColumnNames
-    return df
-
-
-class Database(object):
-    class SimulationResult(blitzdb.Document):
-        pass
-
-    def __init__(self, file):
-        if file.startswith("mongo://"):
-            from pymongo import MongoClient
-            dbName = file[len("mongo://"):]
-            c = MongoClient()
-            self.backend = blitzdb.MongoBackend(c[dbName])
-        else:
-            self.backend = blitzdb.FileBackend(file)
-
-        self.backend.autocommit = True
-
-    @staticmethod
-    def getEnv():
-        return {
-            'timestamp': time.mktime(time.gmtime()),
-            'hostname': socket.gethostname(),
-            'cpuCompilerConfig': getCompilerConfig(),
-        }
-
-    def save(self, params, result, env=None, **kwargs):
-        documentDict = {
-            'params': params,
-            'result': result,
-            'env': env if env else self.getEnv(),
-        }
-        documentDict.update(kwargs)
-        document = Database.SimulationResult(documentDict, backend=self.backend)
-        document.save()
-        self.backend.commit()
-
-    def filter(self, *args, **kwargs):
-        return self.backend.filter(Database.SimulationResult, *args, **kwargs)
-
-    def filterParams(self, query, *args, **kwargs):
-        query = {'params.' + k: v for k, v in query.items()}
-        return self.filter(query, *args, **kwargs)
-
-    def alreadySimulated(self, parameters):
-        return len(self.filter({'params': parameters})) > 0
-
-    # Columns with these prefixes are not included in pandas result
-    pandasColumnsToIgnore = ['changedParams.', 'env.']
-
-    def toPandas(self, parameterQuery, removePrefix=True, dropConstantColumns=False):
-        import pandas as pd
-
-        queryResult = self.filterParams(parameterQuery)
-        if len(queryResult) == 0:
-            return
-
-        df = pd.io.json.json_normalize([e.attributes for e in queryResult])
-        df.set_index('pk', inplace=True)
-
-        if self.pandasColumnsToIgnore:
-            removeColumnsByPrefix(df, self.pandasColumnsToIgnore, inplace=True)
-        if removePrefix:
-            removePrefixInColumnName(df, inplace=True)
-        if dropConstantColumns:
-            df, _ = removeConstantColumns(df)
-
-        return df
--- a/runhelper/parameterstudy.py
+++ b/runhelper/parameterstudy.py
-import json
-import datetime
-import os
-import socket
-import itertools
-from copy import deepcopy
-from collections import namedtuple
-from time import sleep
-from pystencils.runhelper import Database
-from pystencils.utils import DotDict
-
-
-class ParameterStudy(object):
-    Run = namedtuple("Run", ['parameterDict', 'weight'])
-
-    def __init__(self, runFunction, listOfRuns=[], databaseFile='./db'):
-        self.listOfRuns = listOfRuns
-        self.runFunction = runFunction
-        self.db = Database(databaseFile)
-
-    def addRun(self, parameterDict, weight=1):
-        self.listOfRuns.append(self.Run(parameterDict, weight))
-
-    def addCombinations(self, degreesOfFreedom, constantParameters=None, filterFunction=None, weightFunction=None):
-        parameterNames = [e[0] for e in degreesOfFreedom]
-        parameterValues = [e[1] for e in degreesOfFreedom]
-
-        defaultParamsDict = {} if constantParameters is None else constantParameters
-        for valueTuple in itertools.product(*parameterValues):
-            paramsDict = deepcopy(defaultParamsDict)
-            paramsDict.update({name: value for name, value in zip(parameterNames, valueTuple)})
-            params = DotDict(paramsDict)
-            if filterFunction:
-                params = filterFunction(params)
-                if params is None:
-                    continue
-            weight = 1 if not weightFunction else weightFunction(params)
-            self.addRun(params, weight)
-
-    def filterAlreadySimulated(self, allRuns):
-        return [r for r in allRuns if not self.db.alreadySimulated(r.parameterDict)]
-
-    @staticmethod
-    def distributeRuns(allRuns, process, numProcesses):
-        sortedRuns = sorted(allRuns, key=lambda e: e.weight, reverse=True)
-        result = sortedRuns[process::numProcesses]
-        result.reverse()  # start with faster scenarios
-        return result
-
-    def runServer(self, ip="0.0.0.0", port=8342):
-        from http.server import BaseHTTPRequestHandler, HTTPServer
-        filteredRuns = self.filterAlreadySimulated(self.listOfRuns)
-
-        if not filteredRuns:
-            print("No Scenarios to simulate")
-            return
-
-        class ParameterStudyServer(BaseHTTPRequestHandler):
-            parameterStudy = self
-            allRuns = filteredRuns
-            runs = filteredRuns.copy()
-            currentlyRunning = {}
-            finishedRuns = []
-
-            def nextScenario(self, receivedJsonData):
-                clientName = receivedJsonData['clientName']
-                if len(self.runs) > 0:
-                    runStatus = "%d/%d" % (len(self.finishedRuns), len(self.allRuns))
-                    workStatus = "%d/%d" % (sum(r.weight for r in self.finishedRuns),
-                                            sum(r.weight for r in self.allRuns))
-                    formatArgs = {
-                        'remaining': len(self.runs),
-                        'time': datetime.datetime.now().strftime("%H:%M:%S"),
-                        'clientName': clientName,
-                        'runStatus': runStatus,
-                        'workStatus': workStatus,
-                    }
-
-                    scenario = self.runs.pop(0)
-                    print(" {time} {clientName} fetched scenario. Scenarios: {runStatus}, Work: {workStatus}"
-                          .format(**formatArgs))
-                    self.currentlyRunning[clientName] = scenario
-                    return {'status': 'ok', 'params': scenario.parameterDict}
-                else:
-                    return {'status': 'finished'}
-
-            def result(self, receivedJsonData):
-                clientName = receivedJsonData['clientName']
-                run = self.currentlyRunning[clientName]
-                self.finishedRuns.append(run)
-                del self.currentlyRunning[clientName]
-                d = receivedJsonData
-
-                def hash_dict(d):
-                    import hashlib
-                    return hashlib.sha1(json.dumps(d, sort_keys=True).encode()).hexdigest()
-
-                assert hash_dict(d['params']) == hash_dict(run.parameterDict)
-                self.parameterStudy.db.save(run.parameterDict, d['result'], d['env'], changedParams=d['changedParams'])
-                return {}
-
-            def do_POST(self):
-                mapping = {'/nextScenario': self.nextScenario,
-                           '/result': self.result}
-                if self.path in mapping.keys():
-                    data = self.rfile.read(int(self.headers['Content-Length']))
-                    self.send_response(200)
-                    self.send_header("Content-type", "application/json")
-                    self.end_headers()
-                    jsonData = json.loads(data.decode())
-                    response = mapping[self.path](jsonData)
-                    self.wfile.write(json.dumps(response).encode())
-                else:
-                    self.send_response(400)
-
-            def do_GET(self):
-                return self.do_POST()
-
-            def log_message(self, format, *args):
-                return
-
-        print("Listening to connections on {}:{}. Scenarios to simulate: {}".format(ip, port, len(filteredRuns)))
-        server = HTTPServer((ip, port), ParameterStudyServer)
-        while len(ParameterStudyServer.currentlyRunning) > 0 or len(ParameterStudyServer.runs) > 0:
-            server.handle_request()
-        server.handle_request()
-
-    def runClient(self, clientName="{hostname}_{pid}", server='localhost', port=8342, parameterUpdate={}):
-        from urllib.request import urlopen, URLError
-        url = "http://{}:{}".format(server, port)
-        clientName = clientName.format(hostname=socket.gethostname(), pid=os.getpid())
-        while True:
-            try:
-                httpResponse = urlopen(url + "/nextScenario",
-                                       data=json.dumps({'clientName': clientName}).encode())
-                scenario = json.loads(httpResponse.read().decode())
-                if scenario['status'] != 'ok':
-                    break
-                originalParams = scenario['params'].copy()
-                scenario['params'].update(parameterUpdate)
-                result = self.runFunction(**scenario['params'])
-
-                answer = {'params': originalParams,
-                          'changedParams': parameterUpdate,
-                          'result': result,
-                          'env': Database.getEnv(),
-                          'clientName': clientName}
-                urlopen(url + '/result', data=json.dumps(answer).encode())
-            except URLError:
-                print("Cannot connect to server {}  retrying in 5 seconds...".format(url))
-                sleep(5)
-
-    def run(self, process, numProcesses, parameterUpdate={}):
-        ownRuns = self.distributeRuns(self.listOfRuns, process, numProcesses)
-        for run in ownRuns:
-            parameterDict = run.parameterDict.copy()
-            parameterDict.update(parameterUpdate)
-            result = self.runFunction(**parameterDict)
-
-            self.db.save(run.parameterDict, result, None, changedParams=parameterUpdate)
-
-    def runScenariosNotInDatabase(self, parameterUpdate={}):
-        filteredRuns = self.filterAlreadySimulated(self.listOfRuns)
-        for run in filteredRuns:
-            parameterDict = run.parameterDict.copy()
-            parameterDict.update(parameterUpdate)
-            result = self.runFunction(**parameterDict)
-
-            self.db.save(run.parameterDict, result, None, changedParams=parameterUpdate)
-
-    def runFromCommandLine(self, argv=None):
-        from argparse import ArgumentParser
-
-        def server(a):
-            if a.database:
-                self.db = Database(a.database)
-            self.runServer(a.host, a.port)
-
-        def client(a):
-            self.runClient(a.clientName, a.host, a.port, json.loads(a.parameterOverride))
-
-        def local(a):
-            if a.database:
-                self.db = Database(a.database)
-            self.runScenariosNotInDatabase(json.loads(a.parameterOverride))
-
-        parser = ArgumentParser()
-        subparsers = parser.add_subparsers()
-
-        localParser = subparsers.add_parser('local', aliases=['l'],
-                                            help="Run scenarios locally which are not yet in database",)
-        localParser.add_argument("-d", "--database", type=str, default="")
-        localParser.add_argument("-P", "--parameterOverride", type=str, default="{}",
-                                 help="JSON: the parameter dictionary is updated with these parameters. Use this to "
-                                      "set host specific options like GPU call parameters. Enclose in \" ")
-        localParser.set_defaults(func=local)
-
-        serverParser = subparsers.add_parser('server', aliases=['serv', 's'],
-                                             help="Runs server to distribute different scenarios to workers",)
-        serverParser.add_argument("-p", "--port", type=int, default=8342, help="Port to listen on")
-        serverParser.add_argument("-H", "--host", type=str, default="0.0.0.0", help="IP/Hostname to listen on")
-        serverParser.add_argument("-d", "--database", type=str, default="")
-        serverParser.set_defaults(func=server)
-
-        clientParser = subparsers.add_parser('client', aliases=['c'],
-                                             help="Runs a worker client connection to scenario distribution server")
-        clientParser.add_argument("-p", "--port", type=int, default=8342, help="Port to connect to")
-        clientParser.add_argument("-H", "--host", type=str, default="localhost", help="Host or IP to connect to")
-        clientParser.add_argument("-n", "--clientName", type=str, default="{hostname}_{pid}",
-                                  help="Unique client name, you can use {hostname} and {pid} as placeholder")
-        clientParser.add_argument("-P", "--parameterOverride", type=str, default="{}",
-                                  help="JSON: the parameter dictionary is updated with these parameters. Use this to "
-                                       "set host specific options like GPU call parameters. Enclose in \" ")
-        clientParser.set_defaults(func=client)
-
-        args = parser.parse_args(argv)
-        if not len(vars(args)):
-            parser.print_help()
-        else:
-            args.func(args)
-
--- a/setup.py
+++ b/setup.py
+from setuptools import setup, __version__ as setuptools_version
+
+if int(setuptools_version.split('.')[0]) < 61:
+    raise Exception(
+        "[ERROR] pystencils requires at least setuptools version 61 to install.\n"
+        "If this error occurs during an installation via pip, it is likely that there is a conflict between "
+        "versions of setuptools installed by pip and the system package manager. "
+        "In this case, it is recommended to install pystencils into a virtual environment instead."
+    )
+
+import versioneer
+
+
+def get_cmdclass():
+    return versioneer.get_cmdclass()
+
+
+setup(
+    version=versioneer.get_version(),
+    cmdclass=get_cmdclass(),
+)
--- a/src/pystencils/__init__.py
+++ b/src/pystencils/__init__.py
+"""Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
+from .enums import Backend, Target
+from . import fd
+from . import stencil as stencil
+from .assignment import Assignment, AddAugmentedAssignment, assignment_from_stencil
+from .typing.typed_sympy import TypedSymbol
+from .display_utils import get_code_obj, get_code_str, show_code, to_dot
+from .field import Field, FieldType, fields
+from .config import CreateKernelConfig
+from .cache import clear_cache
+from .kernel_decorator import kernel, kernel_config
+from .kernelcreation import create_kernel, create_staggered_kernel
+from .simp import AssignmentCollection
+from .slicing import make_slice
+from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
+from .sympyextensions import SymbolCreator
+from .datahandling import create_data_handling
+
+__all__ = ['Field', 'FieldType', 'fields',
+           'TypedSymbol',
+           'make_slice',
+           'CreateKernelConfig',
+           'create_kernel', 'create_staggered_kernel',
+           'Target', 'Backend',
+           'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
+           'AssignmentCollection',
+           'Assignment', 'AddAugmentedAssignment',
+           'assignment_from_stencil',
+           'SymbolCreator',
+           'create_data_handling',
+           'clear_cache',
+           'kernel', 'kernel_config',
+           'x_', 'y_', 'z_',
+           'x_staggered', 'y_staggered', 'z_staggered',
+           'x_vector', 'x_staggered_vector',
+           'fd',
+           'stencil']
+
+from . import _version
+__version__ = _version.get_versions()['version']
--- a/src/pystencils/_version.py
+++ b/src/pystencils/_version.py
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
+
+
+def get_keywords() -> Dict[str, str]:
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+    VCS: str
+    style: str
+    tag_prefix: str
+    parentdir_prefix: str
+    versionfile_source: str
+    verbose: bool
+
+
+def get_config() -> VersioneerConfig:
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = "release/"
+    cfg.parentdir_prefix = "pystencils-"
+    cfg.versionfile_source = "src/pystencils/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f: Callable) -> Callable:
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(
+    commands: List[str],
+    args: List[str],
+    cwd: Optional[str] = None,
+    verbose: bool = False,
+    hide_stderr: bool = False,
+    env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+
+    popen_kwargs: Dict[str, Any] = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
+            break
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(
+    parentdir_prefix: str,
+    root: str,
+    verbose: bool,
+) -> Dict[str, Any]:
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords: Dict[str, str] = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(
+    keywords: Dict[str, str],
+    tag_prefix: str,
+    verbose: bool,
+) -> Dict[str, Any]:
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(
+    tag_prefix: str,
+    root: str,
+    verbose: bool,
+    runner: Callable = run_command
+) -> Dict[str, Any]:
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces: Dict[str, Any] = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces: Dict[str, Any]) -> str:
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces: Dict[str, Any]) -> str:
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions() -> Dict[str, Any]:
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
--- a/src/pystencils/alignedarray.py
+++ b/src/pystencils/alignedarray.py
+import numpy as np
+
+
+def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
+    """
+    Creates an aligned empty numpy array
+
+    Args:
+        shape: size of the array
+        byte_alignment: alignment in bytes, for the start address of the array holds (a % byte_alignment) == 0
+                        By default, use the maximum required by the CPU (or 512 bits if this cannot be detected).
+                        When 'cacheline' is specified, the size of a cache line is used.
+        dtype: numpy data type
+        byte_offset: offset in bytes for position that should be aligned i.e. (a+byte_offset) % byte_alignment == 0
+                    typically used to align first inner cell instead of ghost layer
+        order: storage linearization order
+        align_inner_coordinate: if True, the start of the innermost coordinate lines are aligned as well
+    """
+    if byte_alignment is True or byte_alignment == 'cacheline':
+        from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size,
+                                                               get_vector_instruction_set)
+
+        instruction_sets = get_supported_instruction_sets()
+        if instruction_sets is None:
+            byte_alignment = 64
+        elif byte_alignment == 'cacheline':
+            cacheline_sizes = [get_cacheline_size(is_name) for is_name in instruction_sets]
+            if all([s is None for s in cacheline_sizes]) or \
+                    max([s for s in cacheline_sizes if s is not None]) > 0x100000:
+                widths = [get_vector_instruction_set(dtype, is_name)['width'] * np.dtype(dtype).itemsize
+                          for is_name in instruction_sets
+                          if type(get_vector_instruction_set(dtype, is_name)['width']) is int]
+                byte_alignment = 64 if all([s is None for s in widths]) else max(widths)
+            else:
+                byte_alignment = max([s for s in cacheline_sizes if s is not None])
+        elif not any([type(get_vector_instruction_set(dtype, is_name)['width']) is int
+                      for is_name in instruction_sets]):
+            byte_alignment = 64
+        else:
+            byte_alignment = max([get_vector_instruction_set(dtype, is_name)['width'] * np.dtype(dtype).itemsize
+                                  for is_name in instruction_sets
+                                  if type(get_vector_instruction_set(dtype, is_name)['width']) is int])
+    if (not align_inner_coordinate) or (not hasattr(shape, '__len__')):
+        size = np.prod(shape)
+        d = np.dtype(dtype)
+        # 2 * byte_alignment instead of 1 * byte_alignment to have slack in the end such that
+        # vectorized loops can access vector_width elements further and don't require a tail loop
+        tmp = np.empty(size * d.itemsize + 2 * byte_alignment, dtype=np.uint8)
+        address = tmp.__array_interface__['data'][0]
+        offset = (byte_alignment - (address + byte_offset) % byte_alignment) % byte_alignment
+        return tmp[offset:offset + size * d.itemsize].view(dtype=d).reshape(shape, order=order)
+    else:
+        if order == 'C':
+            dim0_size = shape[-1]
+            dim0 = -1
+            dim1_size = np.prod(shape[:-1])
+        else:
+            dim0_size = shape[0]
+            dim0 = 0
+            dim1_size = np.prod(shape[1:])
+        d = np.dtype(dtype)
+
+        assert byte_alignment >= d.itemsize and byte_alignment % d.itemsize == 0
+        padding = (byte_alignment - ((dim0_size * d.itemsize) % byte_alignment)) % byte_alignment
+
+        size = dim1_size * padding + np.prod(shape) * d.itemsize
+        tmp = aligned_empty(size, byte_alignment=byte_alignment, dtype=np.uint8, byte_offset=byte_offset)
+        tmp = tmp.view(dtype=dtype)
+        shape_in_bytes = [i for i in shape]
+        shape_in_bytes[dim0] = dim0_size + padding // d.itemsize
+        tmp = tmp.reshape(shape_in_bytes, order=order)
+        if tmp.flags['C_CONTIGUOUS']:
+            tmp = tmp[..., :shape[-1]]
+        else:
+            tmp = tmp[:shape[0], ...]
+
+        return tmp
+
+
+def aligned_zeros(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
+    arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset,
+                        order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate)
+    x = np.zeros((), arr.dtype)
+    arr[...] = x
+    return arr
+
+
+def aligned_ones(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True):
+    arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset,
+                        order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate)
+    x = np.ones((), arr.dtype)
+    arr[...] = x
+    return arr
--- a/src/pystencils/assignment.py
+++ b/src/pystencils/assignment.py
+import numpy as np
+import sympy as sp
+from sympy.codegen.ast import Assignment, AugmentedAssignment, AddAugmentedAssignment
+from sympy.printing.latex import LatexPrinter
+
+__all__ = ['Assignment', 'AugmentedAssignment', 'AddAugmentedAssignment', 'assignment_from_stencil']
+
+
+def print_assignment_latex(printer, expr):
+    binop = f"{expr.binop}=" if isinstance(expr, AugmentedAssignment) else ''
+    """sympy cannot print Assignments as Latex. Thus, this function is added to the sympy Latex printer"""
+    printed_lhs = printer.doprint(expr.lhs)
+    printed_rhs = printer.doprint(expr.rhs)
+    return fr"{printed_lhs} \leftarrow_{{{binop}}} {printed_rhs}"
+
+
+def assignment_str(assignment):
+    op = f"{assignment.binop}=" if isinstance(assignment, AugmentedAssignment) else '←'
+    return fr"{assignment.lhs} {op} {assignment.rhs}"
+
+
+_old_new = sp.codegen.ast.Assignment.__new__
+
+
+# TODO Typing Part2 add default type, defult_float_type, default_int_type and use sane defaults
+def _Assignment__new__(cls, lhs, rhs, *args, **kwargs):
+    if isinstance(lhs, (list, tuple, sp.Matrix)) and isinstance(rhs, (list, tuple, sp.Matrix)):
+        assert len(lhs) == len(rhs), f'{lhs} and {rhs} must have same length when performing vector assignment!'
+        return tuple(_old_new(cls, a, b, *args, **kwargs) for a, b in zip(lhs, rhs))
+    return _old_new(cls, lhs, rhs, *args, **kwargs)
+
+
+Assignment.__str__ = assignment_str
+Assignment.__new__ = _Assignment__new__
+LatexPrinter._print_Assignment = print_assignment_latex
+
+AugmentedAssignment.__str__ = assignment_str
+LatexPrinter._print_AugmentedAssignment = print_assignment_latex
+
+sp.MutableDenseMatrix.__hash__ = lambda self: hash(tuple(self))
+
+
+def assignment_from_stencil(stencil_array, input_field, output_field,
+                            normalization_factor=None, order='visual') -> Assignment:
+    """Creates an assignment
+
+    Args:
+        stencil_array: nested list of numpy array defining the stencil weights
+        input_field: field or field access, defining where the stencil should be applied to
+        output_field: field or field access where the result is written to
+        normalization_factor: optional normalization factor for the stencil
+        order: defines how the stencil_array is interpreted. Possible values are 'visual' and 'numpy'.
+               For details see examples
+
+    Returns:
+        Assignment that can be used to create a kernel
+
+    Examples:
+        >>> import pystencils as ps
+        >>> f, g = ps.fields("f, g: [2D]")
+        >>> stencil = [[0, 2, 0],
+        ...            [3, 4, 5],
+        ...            [0, 6, 0]]
+
+        By default 'visual ordering is used - i.e. the stencil is applied as the nested lists are written down
+        >>> expected_output = Assignment(g[0, 0], 3*f[-1, 0] + 6*f[0, -1] + 4*f[0, 0] + 2*f[0, 1] + 5*f[1, 0])
+        >>> assignment_from_stencil(stencil, f, g, order='visual') == expected_output
+        True
+
+        'numpy' ordering uses the first coordinate of the stencil array for x offset, second for y offset etc.
+        >>> expected_output = Assignment(g[0, 0], 2*f[-1, 0] + 3*f[0, -1] + 4*f[0, 0] + 5*f[0, 1] + 6*f[1, 0])
+        >>> assignment_from_stencil(stencil, f, g, order='numpy') == expected_output
+        True
+
+        You can also pass field accesses to apply the stencil at an already shifted position:
+        >>> expected_output = Assignment(g[2, 0], 3*f[0, 0] + 6*f[1, -1] + 4*f[1, 0] + 2*f[1, 1] + 5*f[2, 0])
+        >>> assignment_from_stencil(stencil, f[1, 0], g[2, 0]) == expected_output
+        True
+    """
+    from pystencils.field import Field
+
+    stencil_array = np.array(stencil_array)
+    if order == 'visual':
+        stencil_array = np.swapaxes(stencil_array, 0, 1)
+        stencil_array = np.flip(stencil_array, axis=1)
+    elif order == 'numpy':
+        pass
+    else:
+        raise ValueError("'order' has to be either 'visual' or 'numpy'")
+
+    if isinstance(input_field, Field):
+        input_field = input_field.center
+    if isinstance(output_field, Field):
+        output_field = output_field.center
+
+    rhs = 0
+    offset = tuple(s // 2 for s in stencil_array.shape)
+
+    for index, factor in np.ndenumerate(stencil_array):
+        shift = tuple(i - o for i, o in zip(index, offset))
+        rhs += factor * input_field.get_shifted(*shift)
+
+    if normalization_factor:
+        rhs *= normalization_factor
+
+    return Assignment(output_field, rhs)
--- a/src/pystencils/astnodes.py
+++ b/src/pystencils/astnodes.py
+import collections.abc
+import itertools
+import uuid
+from typing import Any, List, Optional, Sequence, Set, Union
+
+import sympy as sp
+
+from pystencils.assignment import Assignment
+from pystencils.enums import Target, Backend
+from pystencils.field import Field
+from pystencils.sympyextensions import fast_subs
+from pystencils.typing import (create_type, get_next_parent_of_type,
+                               FieldPointerSymbol, FieldShapeSymbol, FieldStrideSymbol, TypedSymbol, CFunction)
+
+NodeOrExpr = Union['Node', sp.Expr]
+
+
+class Node:
+    """Base class for all AST nodes."""
+
+    def __init__(self, parent: Optional['Node'] = None):
+        self.parent = parent
+
+    @property
+    def args(self) -> List[NodeOrExpr]:
+        """Returns all arguments/children of this node."""
+        raise NotImplementedError()
+
+    @property
+    def symbols_defined(self) -> Set[sp.Symbol]:
+        """Set of symbols which are defined by this node."""
+        raise NotImplementedError()
+
+    @property
+    def undefined_symbols(self) -> Set[sp.Symbol]:
+        """Symbols which are used but are not defined inside this node."""
+        raise NotImplementedError()
+
+    def subs(self, subs_dict) -> None:
+        """Inplace! Substitute, similar to sympy's but modifies the AST inplace."""
+        for i, a in enumerate(self.args):
+            result = a.subs(subs_dict)
+            if isinstance(a, sp.Expr):  # sympy expressions' subs is out-of-place
+                self.args[i] = result
+            else:  # all other should be in-place
+                assert result is None
+
+    @property
+    def func(self):
+        return self.__class__
+
+    def atoms(self, arg_type) -> Set[Any]:
+        """Returns a set of all descendants recursively, which are an instance of the given type."""
+        result = set()
+        for arg in self.args:
+            if isinstance(arg, arg_type):
+                result.add(arg)
+            result.update(arg.atoms(arg_type))
+        return result
+
+
+class Conditional(Node):
+    """Conditional that maps to a 'if' statement in C/C++.
+
+    Try to avoid using this node inside of loops, since currently this construction can not be vectorized.
+    Consider using assignments with sympy.Piecewise in this case.
+
+    Args:
+        condition_expr: sympy relational expression
+        true_block: block which is run if conditional is true
+        false_block: optional block which is run if conditional is false
+    """
+
+    def __init__(self, condition_expr: sp.Basic, true_block: Union['Block', 'SympyAssignment'],
+                 false_block: Optional['Block'] = None) -> None:
+        super(Conditional, self).__init__(parent=None)
+
+        self.condition_expr = condition_expr
+
+        def handle_child(c):
+            if c is None:
+                return None
+            if not isinstance(c, Block):
+                c = Block([c])
+            c.parent = self
+            return c
+
+        self.true_block = handle_child(true_block)
+        self.false_block = handle_child(false_block)
+
+    def subs(self, subs_dict):
+        self.true_block.subs(subs_dict)
+        if self.false_block:
+            self.false_block.subs(subs_dict)
+        self.condition_expr = self.condition_expr.subs(subs_dict)
+
+    @property
+    def args(self):
+        result = [self.condition_expr, self.true_block]
+        if self.false_block:
+            result.append(self.false_block)
+        return result
+
+    @property
+    def symbols_defined(self):
+        return set()
+
+    @property
+    def undefined_symbols(self):
+        result = self.true_block.undefined_symbols
+        if self.false_block:
+            result.update(self.false_block.undefined_symbols)
+        if hasattr(self.condition_expr, 'atoms'):
+            result.update(self.condition_expr.atoms(sp.Symbol))
+        return result
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __repr__(self):
+        result = f'if:({self.condition_expr!r}) '
+        if self.true_block:
+            result += f'\n\t{self.true_block}) '
+        if self.false_block:
+            result = 'else: '
+            result += f'\n\t{self.false_block} '
+
+        return result
+
+    def replace_by_true_block(self):
+        """Replaces the conditional by its True block"""
+        self.parent.replace(self, [self.true_block])
+
+    def replace_by_false_block(self):
+        """Replaces the conditional by its False block"""
+        self.parent.replace(self, [self.false_block] if self.false_block else [])
+
+
+class KernelFunction(Node):
+    class Parameter:
+        """Function parameter.
+
+        Each undefined symbol in a `KernelFunction` node becomes a parameter to the function.
+        Parameters are either symbols introduced by the user that never occur on the left hand side of an
+        Assignment, or are related to fields/arrays passed to the function.
+
+        A parameter consists of the typed symbol (symbol property). For field related parameters this is a symbol
+        defined in pystencils.kernelparameters.
+        If the parameter is related to one or multiple fields, these fields are referenced in the fields property.
+        """
+
+        def __init__(self, symbol, fields):
+            self.symbol = symbol  # type: TypedSymbol
+            self.fields = fields  # type: Sequence[Field]
+
+        def __repr__(self):
+            return repr(self.symbol)
+
+        @property
+        def is_field_stride(self):
+            return isinstance(self.symbol, FieldStrideSymbol)
+
+        @property
+        def is_field_shape(self):
+            return isinstance(self.symbol, FieldShapeSymbol)
+
+        @property
+        def is_field_pointer(self):
+            return isinstance(self.symbol, FieldPointerSymbol)
+
+        @property
+        def is_field_parameter(self):
+            return self.is_field_pointer or self.is_field_shape or self.is_field_stride
+
+        @property
+        def field_name(self):
+            return self.fields[0].name
+
+    def __init__(self, body, target: Target, backend: Backend, compile_function, ghost_layers,
+                 function_name: str = "kernel",
+                 assignments=None):
+        super(KernelFunction, self).__init__()
+        self._body = body
+        body.parent = self
+        self.function_name = function_name
+        self._body.parent = self
+        self.ghost_layers = ghost_layers
+        self._target = target
+        self._backend = backend
+        # these variables are assumed to be global, so no automatic parameter is generated for them
+        self.global_variables = set()
+        self.instruction_set = None  # used in `vectorize` function to tell the backend which i.s. (SSE,AVX) to use
+        # function that compiles the node to a Python callable, is set by the backends
+        self._compile_function = compile_function
+        self.assignments = assignments
+        # If nontemporal stores are activated together with the Neon instruction set it results in cacheline zeroing
+        # For cacheline zeroing the information of the field size for each field is needed. Thus, in this case
+        # all field sizes are kernel parameters and not just the common field size used for the loops
+        self.use_all_written_field_sizes = False
+
+    @property
+    def target(self):
+        """See pystencils.Target"""
+        return self._target
+
+    @property
+    def backend(self):
+        """Backend for generating the code: `Backend`"""
+        return self._backend
+
+    @property
+    def symbols_defined(self):
+        return set()
+
+    @property
+    def undefined_symbols(self):
+        return set()
+
+    @property
+    def body(self):
+        return self._body
+
+    @body.setter
+    def body(self, value):
+        self._body = value
+        self._body.parent = self
+
+    @property
+    def args(self):
+        return self._body,
+
+    @property
+    def fields_accessed(self) -> Set[Field]:
+        """Set of Field instances: fields which are accessed inside this kernel function"""
+        return set(o.field for o in itertools.chain(self.atoms(ResolvedFieldAccess)))
+
+    @property
+    def fields_written(self) -> Set[Field]:
+        assignments = self.atoms(SympyAssignment)
+        return set().union(itertools.chain.from_iterable([f.field for f in a.lhs.free_symbols if hasattr(f, 'field')]
+                                                         for a in assignments))
+
+    @property
+    def fields_read(self) -> Set[Field]:
+        assignments = self.atoms(SympyAssignment)
+        return set().union(itertools.chain.from_iterable([f.field for f in a.rhs.free_symbols if hasattr(f, 'field')]
+                                                         for a in assignments))
+
+    def get_parameters(self) -> Sequence['KernelFunction.Parameter']:
+        """Returns list of parameters for this function.
+
+        This function is expensive, cache the result where possible!
+        """
+        field_map = {f.name: f for f in self.fields_accessed}
+        sizes = set()
+
+        if self.use_all_written_field_sizes:
+            sizes = set().union(*(a.shape[:a.spatial_dimensions] for a in self.fields_written))
+            sizes = filter(lambda s: isinstance(s, FieldShapeSymbol), sizes)
+
+        def get_fields(symbol):
+            if hasattr(symbol, 'field_name'):
+                return field_map[symbol.field_name],
+            elif hasattr(symbol, 'field_names'):
+                return tuple(field_map[fn] for fn in symbol.field_names)
+            return ()
+
+        argument_symbols = self._body.undefined_symbols - self.global_variables
+        argument_symbols.update(sizes)
+        parameters = [self.Parameter(symbol, get_fields(symbol)) for symbol in argument_symbols]
+        if hasattr(self, 'indexing'):
+            parameters += [self.Parameter(s, []) for s in self.indexing.symbolic_parameters()]
+        # Exclude paramters of type CFunction. These parameters will result in a C function call that will be handled
+        # by including a respective header file in the compute kernel. Hence, it is not a free parameter.
+        parameters = [p for p in parameters if not isinstance(p.symbol, CFunction)]
+        parameters.sort(key=lambda p: p.symbol.name)
+        return parameters
+
+    def __str__(self):
+        params = [p.symbol for p in self.get_parameters()]
+        return '{0} {1}({2})\n{3}'.format(type(self).__name__, self.function_name, params,
+                                          ("\t" + "\t".join(str(self.body).splitlines(True))))
+
+    def __repr__(self):
+        params = [p.symbol for p in self.get_parameters()]
+        return f'{type(self).__name__} {self.function_name}({params})'
+
+    def compile(self, *args, **kwargs):
+        if self._compile_function is None:
+            raise ValueError("No compile-function provided for this KernelFunction node")
+        return self._compile_function(self, *args, **kwargs)
+
+
+class SkipIteration(Node):
+    @property
+    def args(self):
+        return []
+
+    @property
+    def symbols_defined(self):
+        return set()
+
+    @property
+    def undefined_symbols(self):
+        return set()
+
+
+class Block(Node):
+    def __init__(self, nodes: Union[Node, List[Node]]):
+        super(Block, self).__init__()
+        if not isinstance(nodes, list):
+            nodes = [nodes]
+        self._nodes = nodes
+        self.parent = None
+        for n in self._nodes:
+            try:
+                n.parent = self
+            except AttributeError:
+                pass
+
+    @property
+    def args(self):
+        return self._nodes
+
+    def subs(self, subs_dict) -> None:
+        for a in self.args:
+            a.subs(subs_dict)
+
+    def fast_subs(self, subs_dict, skip=None):
+        self._nodes = [fast_subs(a, subs_dict, skip) for a in self._nodes]
+        return self
+
+    def insert_front(self, node, if_not_exists=False):
+        if if_not_exists and len(self._nodes) > 0 and self._nodes[0] == node:
+            return
+        if isinstance(node, collections.abc.Iterable):
+            node = list(node)
+            for n in node:
+                n.parent = self
+
+            self._nodes = node + self._nodes
+        else:
+            node.parent = self
+            self._nodes.insert(0, node)
+
+    def insert_before(self, new_node, insert_before, if_not_exists=False):
+        new_node.parent = self
+        assert self._nodes.count(insert_before) == 1
+        idx = self._nodes.index(insert_before)
+
+        if not if_not_exists or self._nodes[idx] != new_node:
+            self._nodes.insert(idx, new_node)
+
+    def insert_after(self, new_node, insert_after, if_not_exists=False):
+        new_node.parent = self
+        assert self._nodes.count(insert_after) == 1
+        idx = self._nodes.index(insert_after) + 1
+
+        if not if_not_exists or not (self._nodes[idx - 1] == new_node
+                                     or (idx < len(self._nodes) and self._nodes[idx] == new_node)):
+            self._nodes.insert(idx, new_node)
+
+    def append(self, node):
+        if isinstance(node, list) or isinstance(node, tuple):
+            for n in node:
+                n.parent = self
+                self._nodes.append(n)
+        else:
+            node.parent = self
+            self._nodes.append(node)
+
+    def take_child_nodes(self):
+        tmp = self._nodes
+        self._nodes = []
+        return tmp
+
+    def replace(self, child, replacements):
+        assert self._nodes.count(child) == 1
+        idx = self._nodes.index(child)
+        del self._nodes[idx]
+        if type(replacements) is list:
+            for e in replacements:
+                e.parent = self
+            self._nodes = self._nodes[:idx] + replacements + self._nodes[idx:]
+        else:
+            replacements.parent = self
+            self._nodes.insert(idx, replacements)
+
+    @property
+    def symbols_defined(self):
+        result = set()
+        for a in self.args:
+            if isinstance(a, Assignment):
+                result.update(a.free_symbols)
+            else:
+                result.update(a.symbols_defined)
+        return result
+
+    @property
+    def undefined_symbols(self):
+        result = set()
+        defined_symbols = set()
+        for a in self.args:
+            if isinstance(a, Assignment):
+                result.update(a.free_symbols)
+                defined_symbols.update({a.lhs})
+            else:
+                result.update(a.undefined_symbols)
+                defined_symbols.update(a.symbols_defined)
+        return result - defined_symbols
+
+    def __str__(self):
+        return "Block " + ''.join('{!s}\n'.format(node) for node in self._nodes)
+
+    def __repr__(self):
+        return "Block"
+
+
+class PragmaBlock(Block):
+    def __init__(self, pragma_line, nodes):
+        super(PragmaBlock, self).__init__(nodes)
+        self.pragma_line = pragma_line
+        for n in nodes:
+            n.parent = self
+
+    def __repr__(self):
+        return self.pragma_line
+
+
+class LoopOverCoordinate(Node):
+    LOOP_COUNTER_NAME_PREFIX = "ctr"
+    BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
+
+    def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False, custom_loop_ctr=None):
+        super(LoopOverCoordinate, self).__init__(parent=None)
+        self.body = body
+        body.parent = self
+        self.coordinate_to_loop_over = coordinate_to_loop_over
+        self.start = start
+        self.stop = stop
+        self.step = step
+        self.body.parent = self
+        self.prefix_lines = []
+        self.is_block_loop = is_block_loop
+        self.custom_loop_ctr = custom_loop_ctr
+
+    def new_loop_with_different_body(self, new_body):
+        result = LoopOverCoordinate(new_body, self.coordinate_to_loop_over, self.start, self.stop,
+                                    self.step, self.is_block_loop, self.custom_loop_ctr)
+        result.prefix_lines = [prefix_line for prefix_line in self.prefix_lines]
+        return result
+
+    def subs(self, subs_dict):
+        self.body.subs(subs_dict)
+        if hasattr(self.start, "subs"):
+            self.start = self.start.subs(subs_dict)
+        if hasattr(self.stop, "subs"):
+            self.stop = self.stop.subs(subs_dict)
+        if hasattr(self.step, "subs"):
+            self.step = self.step.subs(subs_dict)
+
+    def fast_subs(self, subs_dict, skip=None):
+        self.body = fast_subs(self.body, subs_dict, skip)
+        if isinstance(self.start, sp.Basic):
+            self.start = fast_subs(self.start, subs_dict, skip)
+        if isinstance(self.stop, sp.Basic):
+            self.stop = fast_subs(self.stop, subs_dict, skip)
+        if isinstance(self.step, sp.Basic):
+            self.step = fast_subs(self.step, subs_dict, skip)
+        return self
+
+    @property
+    def args(self):
+        result = [self.body]
+        for e in [self.start, self.stop, self.step]:
+            if hasattr(e, "args"):
+                result.append(e)
+        return result
+
+    def replace(self, child, replacement):
+        if child == self.body:
+            self.body = replacement
+        elif child == self.start:
+            self.start = replacement
+        elif child == self.step:
+            self.step = replacement
+        elif child == self.stop:
+            self.stop = replacement
+
+    @property
+    def symbols_defined(self):
+        return {self.loop_counter_symbol}
+
+    @property
+    def undefined_symbols(self):
+        result = self.body.undefined_symbols
+        for possible_symbol in [self.start, self.stop, self.step]:
+            if isinstance(possible_symbol, Node) or isinstance(possible_symbol, sp.Basic):
+                result.update(possible_symbol.atoms(sp.Symbol))
+        return result - {self.loop_counter_symbol}
+
+    @staticmethod
+    def get_loop_counter_name(coordinate_to_loop_over):
+        return f"{LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
+
+    @staticmethod
+    def get_block_loop_counter_name(coordinate_to_loop_over):
+        return f"{LoopOverCoordinate.BLOCK_LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
+
+    @property
+    def loop_counter_name(self):
+        if self.custom_loop_ctr:
+            return self.custom_loop_ctr.name
+        else:
+            if self.is_block_loop:
+                return LoopOverCoordinate.get_block_loop_counter_name(self.coordinate_to_loop_over)
+            else:
+                return LoopOverCoordinate.get_loop_counter_name(self.coordinate_to_loop_over)
+
+    @staticmethod
+    def is_loop_counter_symbol(symbol):
+        prefix = LoopOverCoordinate.LOOP_COUNTER_NAME_PREFIX
+        if not symbol.name.startswith(prefix):
+            return None
+        if symbol.dtype != create_type('int'):
+            return None
+        coordinate = int(symbol.name[len(prefix) + 1:])
+        return coordinate
+
+    @staticmethod
+    def get_loop_counter_symbol(coordinate_to_loop_over):
+        return TypedSymbol(LoopOverCoordinate.get_loop_counter_name(coordinate_to_loop_over), 'int', nonnegative=True)
+
+    @staticmethod
+    def get_block_loop_counter_symbol(coordinate_to_loop_over):
+        return TypedSymbol(LoopOverCoordinate.get_block_loop_counter_name(coordinate_to_loop_over),
+                           'int',
+                           nonnegative=True)
+
+    @property
+    def loop_counter_symbol(self):
+        if self.custom_loop_ctr:
+            return self.custom_loop_ctr
+        else:
+            if self.is_block_loop:
+                return self.get_block_loop_counter_symbol(self.coordinate_to_loop_over)
+            else:
+                return self.get_loop_counter_symbol(self.coordinate_to_loop_over)
+
+    @property
+    def is_outermost_loop(self):
+        return get_next_parent_of_type(self, LoopOverCoordinate) is None
+
+    @property
+    def is_innermost_loop(self):
+        return len(self.atoms(LoopOverCoordinate)) == 0
+
+    def __str__(self):
+        return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})\n{!s}'.format(self.loop_counter_name, self.start,
+                                                                    self.loop_counter_name, self.stop,
+                                                                    self.loop_counter_name, self.step,
+                                                                    ("\t" + "\t".join(str(self.body).splitlines(True))))
+
+    def __repr__(self):
+        return 'for({!s}={!s}; {!s}<{!s}; {!s}+={!s})'.format(self.loop_counter_name, self.start,
+                                                              self.loop_counter_name, self.stop,
+                                                              self.loop_counter_name, self.step)
+
+
+class SympyAssignment(Node):
+    def __init__(self, lhs_symbol, rhs_expr, is_const=True, use_auto=False):
+        super(SympyAssignment, self).__init__(parent=None)
+        self._lhs_symbol = sp.sympify(lhs_symbol)
+        self._rhs = sp.sympify(rhs_expr)
+        self._is_const = is_const
+        self._is_declaration = self.__is_declaration()
+        self._use_auto = use_auto
+
+    def __is_declaration(self):
+        from pystencils.typing import CastFunc
+        if isinstance(self._lhs_symbol, CastFunc):
+            return False
+        if any(isinstance(self._lhs_symbol, c) for c in (Field.Access, sp.Indexed, TemporaryMemoryAllocation)):
+            return False
+        return True
+
+    @property
+    def lhs(self):
+        return self._lhs_symbol
+
+    @property
+    def rhs(self):
+        return self._rhs
+
+    @lhs.setter
+    def lhs(self, new_value):
+        self._lhs_symbol = new_value
+        self._is_declaration = self.__is_declaration()
+
+    @rhs.setter
+    def rhs(self, new_rhs_expr):
+        self._rhs = new_rhs_expr
+
+    def subs(self, subs_dict):
+        self.lhs = fast_subs(self.lhs, subs_dict)
+        self.rhs = fast_subs(self.rhs, subs_dict)
+
+    def fast_subs(self, subs_dict, skip=None):
+        self.lhs = fast_subs(self.lhs, subs_dict, skip)
+        self.rhs = fast_subs(self.rhs, subs_dict, skip)
+        return self
+
+    def optimize(self, optimizations):
+        try:
+            from sympy.codegen.rewriting import optimize
+            self.rhs = optimize(self.rhs, optimizations)
+        except Exception:
+            pass
+
+    @property
+    def args(self):
+        return [self._lhs_symbol, self.rhs]
+
+    @property
+    def symbols_defined(self):
+        if not self._is_declaration:
+            return set()
+        return {self._lhs_symbol}
+
+    @property
+    def undefined_symbols(self):
+        result = {s for s in self.rhs.free_symbols if not isinstance(s, sp.Indexed)}
+        # Add loop counters if there a field accesses
+        loop_counters = set()
+        for symbol in result:
+            if isinstance(symbol, Field.Access):
+                for i in range(len(symbol.offsets)):
+                    loop_counters.add(LoopOverCoordinate.get_loop_counter_symbol(i))
+        result.update(loop_counters)
+        
+        result.update(self._lhs_symbol.atoms(sp.Symbol))
+        
+        return result
+
+    @property
+    def is_declaration(self):
+        return self._is_declaration
+
+    @property
+    def is_const(self):
+        return self._is_const
+
+    @property
+    def use_auto(self):
+        return self._use_auto
+
+    def replace(self, child, replacement):
+        if child == self.lhs:
+            replacement.parent = self
+            self.lhs = replacement
+        elif child == self.rhs:
+            replacement.parent = self
+            self.rhs = replacement
+        else:
+            raise ValueError(f'{replacement} is not in args of {self.__class__}')
+
+    def __repr__(self):
+        return repr(self.lhs) + " ← " + repr(self.rhs)
+
+    def _repr_html_(self):
+        printed_lhs = sp.latex(self.lhs)
+        printed_rhs = sp.latex(self.rhs)
+        return f"${printed_lhs} \\leftarrow {printed_rhs}$"
+
+    def __hash__(self):
+        return hash((self.lhs, self.rhs))
+
+    def __eq__(self, other):
+        return type(self) is type(other) and (self.lhs, self.rhs) == (other.lhs, other.rhs)
+
+
+class ResolvedFieldAccess(sp.Indexed):
+    def __new__(cls, base, linearized_index, field, offsets, idx_coordinate_values):
+        if not isinstance(base, sp.IndexedBase):
+            assert isinstance(base, TypedSymbol)
+            base = sp.IndexedBase(base, shape=(1,))
+            assert isinstance(base.label, TypedSymbol)
+        obj = super(ResolvedFieldAccess, cls).__new__(cls, base, linearized_index)
+        obj.field = field
+        obj.offsets = offsets
+        obj.idx_coordinate_values = idx_coordinate_values
+        return obj
+
+    def _eval_subs(self, old, new):
+        return ResolvedFieldAccess(self.args[0],
+                                   self.args[1].subs(old, new),
+                                   self.field, self.offsets, self.idx_coordinate_values)
+
+    def fast_subs(self, substitutions, skip=None):
+        if self in substitutions:
+            return substitutions[self]
+        return ResolvedFieldAccess(self.args[0].subs(substitutions),
+                                   self.args[1].subs(substitutions),
+                                   self.field, self.offsets, self.idx_coordinate_values)
+
+    def _hashable_content(self):
+        super_class_contents = super(ResolvedFieldAccess, self)._hashable_content()
+        return super_class_contents + tuple(self.offsets) + (repr(self.idx_coordinate_values), hash(self.field))
+
+    @property
+    def typed_symbol(self):
+        return self.base.label
+
+    def __str__(self):
+        top = super(ResolvedFieldAccess, self).__str__()
+        return f"{top} ({self.typed_symbol.dtype})"
+
+    def __getnewargs__(self):
+        return self.base, self.indices[0], self.field, self.offsets, self.idx_coordinate_values
+
+    def __getnewargs_ex__(self):
+        return (self.base, self.indices[0], self.field, self.offsets, self.idx_coordinate_values), {}
+
+
+class TemporaryMemoryAllocation(Node):
+    """Node for temporary memory buffer allocation.
+
+    Always allocates aligned memory.
+
+    Args:
+        typed_symbol: symbol used as pointer (has to be typed)
+        size: number of elements to allocate
+        align_offset: the align_offset's element is aligned
+    """
+
+    def __init__(self, typed_symbol: TypedSymbol, size, align_offset):
+        super(TemporaryMemoryAllocation, self).__init__(parent=None)
+        self.symbol = typed_symbol
+        self.size = size
+        self.headers = ['<stdlib.h>']
+        self._align_offset = align_offset
+
+    @property
+    def symbols_defined(self):
+        return {self.symbol}
+
+    @property
+    def undefined_symbols(self):
+        if isinstance(self.size, sp.Basic):
+            return self.size.atoms(sp.Symbol)
+        else:
+            return set()
+
+    @property
+    def args(self):
+        return [self.symbol]
+
+    def offset(self, byte_alignment):
+        """Number of ELEMENTS to skip for a pointer that is aligned to byte_alignment."""
+        np_dtype = self.symbol.dtype.base_type.numpy_dtype
+        assert byte_alignment % np_dtype.itemsize == 0
+        return -self._align_offset % (byte_alignment / np_dtype.itemsize)
+
+
+class TemporaryMemoryFree(Node):
+    def __init__(self, alloc_node):
+        super(TemporaryMemoryFree, self).__init__(parent=None)
+        self.alloc_node = alloc_node
+
+    @property
+    def symbol(self):
+        return self.alloc_node.symbol
+
+    def offset(self, byte_alignment):
+        return self.alloc_node.offset(byte_alignment)
+
+    @property
+    def symbols_defined(self):
+        return set()
+
+    @property
+    def undefined_symbols(self):
+        return set()
+
+    @property
+    def args(self):
+        return []
+
+
+def early_out(condition):
+    from pystencils.cpu.vectorization import vec_all
+    return Conditional(vec_all(condition), Block([SkipIteration()]))
+
+
+def get_dummy_symbol(dtype='bool'):
+    return TypedSymbol(f'dummy{uuid.uuid4().hex}', create_type(dtype))
+
+
+class SourceCodeComment(Node):
+    def __init__(self, text):
+        self.text = text
+
+    @property
+    def args(self):
+        return []
+
+    @property
+    def symbols_defined(self):
+        return set()
+
+    @property
+    def undefined_symbols(self):
+        return set()
+
+    def __str__(self):
+        return "/* " + self.text + " */"
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class EmptyLine(Node):
+    def __init__(self):
+        pass
+
+    @property
+    def args(self):
+        return []
+
+    @property
+    def symbols_defined(self):
+        return set()
+
+    @property
+    def undefined_symbols(self):
+        return set()
+
+    def __str__(self):
+        return ""
+
+    def __repr__(self):
+        return self.__str__()
+
+
+class ConditionalFieldAccess(sp.Function):
+    """
+    :class:`pystencils.Field.Access` that is only executed if a certain condition is met.
+    Can be used, for instance, for out-of-bound checks.
+    """
+
+    def __new__(cls, field_access, outofbounds_condition, outofbounds_value=0):
+        return sp.Function.__new__(cls, field_access, outofbounds_condition, sp.S(outofbounds_value))
+
+    @property
+    def access(self):
+        return self.args[0]
+
+    @property
+    def outofbounds_condition(self):
+        return self.args[1]
+
+    @property
+    def outofbounds_value(self):
+        return self.args[2]
+
+    def __getnewargs__(self):
+        return self.access, self.outofbounds_condition, self.outofbounds_value
+
+    def __getnewargs_ex__(self):
+        return (self.access, self.outofbounds_condition, self.outofbounds_value), {}
--- a/src/pystencils/backends/__init__.py
+++ b/src/pystencils/backends/__init__.py
+from .cbackend import generate_c
+
+__all__ = ['generate_c']
+try:
+    from .dot import print_dot  # NOQA
+    __all__.append('print_dot')
+except ImportError:
+    pass
No results found