5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6
--- a/doc/notebooks/05_tutorial_phasefield_spinodal_decomposition.ipynb
+++ b/doc/notebooks/05_tutorial_phasefield_spinodal_decomposition.ipynb
--- a/doc/notebooks/06_tutorial_phasefield_dentritic_growth.ipynb
+++ b/doc/notebooks/06_tutorial_phasefield_dentritic_growth.ipynb
--- a/doc/notebooks/demo_assignment_collection.ipynb
+++ b/doc/notebooks/demo_assignment_collection.ipynb
--- a/doc/notebooks/demo_benchmark.ipynb
+++ b/doc/notebooks/demo_benchmark.ipynb
--- a/doc/notebooks/demo_derivatives.ipynb
+++ b/doc/notebooks/demo_derivatives.ipynb
--- a/doc/notebooks/demo_plotting_and_animation.ipynb
+++ b/doc/notebooks/demo_plotting_and_animation.ipynb
--- a/doc/notebooks/demo_wave_equation.ipynb
+++ b/doc/notebooks/demo_wave_equation.ipynb
--- a/doc/sphinx/api.rst
+++ b/doc/sphinx/api.rst
@@ -5,6 +5,7 @@ API Reference
   :maxdepth: 3

   kernel_compile_and_call.rst
+   enums.rst
   simplifications.rst
   datahandling.rst
   configuration.rst

--- a/doc/sphinx/enums.rst
+++ b/doc/sphinx/enums.rst
+************
+Enumerations
+************
+
+.. automodule:: pystencils.enums
+   :members:
--- a/doc/sphinx/kernel_compile_and_call.rst
+++ b/doc/sphinx/kernel_compile_and_call.rst
@@ -8,9 +8,14 @@ Creating kernels

 .. autofunction:: pystencils.create_kernel

-.. autofunction:: pystencils.create_indexed_kernel
+.. autoclass:: pystencils.CreateKernelConfig
+    :members:

-.. autofunction:: pystencils.create_staggered_kernel
+.. autofunction:: pystencils.kernelcreation.create_domain_kernel
+
+.. autofunction:: pystencils.kernelcreation.create_indexed_kernel
+
+.. autofunction:: pystencils.kernelcreation.create_staggered_kernel


 Code printing
@@ -22,11 +27,11 @@ Code printing
 GPU Indexing
 -------------

-.. autoclass:: pystencils.gpucuda.AbstractIndexing
+.. autoclass:: pystencils.gpu.AbstractIndexing
   :members:

-.. autoclass:: pystencils.gpucuda.BlockIndexing
+.. autoclass:: pystencils.gpu.BlockIndexing
   :members:

-.. autoclass:: pystencils.gpucuda.LineIndexing
+.. autoclass:: pystencils.gpu.LineIndexing
   :members:
--- a/pystencils_tests/__init__.py
+++ b/pystencils_tests/__init__.py
--- a/doc/sphinx/simplifications.rst
+++ b/doc/sphinx/simplifications.rst
@@ -10,13 +10,27 @@ AssignmentCollection
   :members:


+SimplificationStrategy
+======================
+
+.. autoclass:: pystencils.simp.SimplificationStrategy
+    :members:
+
 Simplifications
 ===============

-.. automodule:: pystencils.simp
-   :members:
+.. automodule:: pystencils.simp.simplifications
+    :members:
+
+Subexpression insertion
+=======================

+The subexpression insertions have the goal to insert subexpressions which will not reduce the number of FLOPs.
+For example a constant value kept as subexpression will lead to a new variable in the code which will occupy
+a register slot. On the other side a single variable could just be inserted in all assignments.

+.. automodule:: pystencils.simp.subexpression_insertion
+    :members:




--- a/doc/version_from_git.py
+++ b/doc/version_from_git.py
-import subprocess
-
-def version_number_from_git(tag_prefix='release/', sha_length=10, version_format="{version}.dev{commits}+{sha}"):
-
-    def get_released_versions():
-        tags = sorted(subprocess.getoutput('git tag').split('\n'))
-        versions = [t[len(tag_prefix):] for t in tags if t.startswith(tag_prefix)]
-        return versions
-
-    def tag_from_version(v):
-        return tag_prefix + v
-
-    def increment_version(v):
-        parsed_version = [int(i) for i in v.split('.')]
-        parsed_version[-1] += 1
-        return '.'.join(str(i) for i in parsed_version)
-
-    latest_release = get_released_versions()[-1]
-    commits_since_tag = subprocess.getoutput('git rev-list {}..HEAD --count'.format(tag_from_version(latest_release)))
-    sha = subprocess.getoutput('git rev-parse HEAD')[:sha_length]
-    is_dirty = len(subprocess.getoutput("git status --untracked-files=no -s")) > 0
-
-    if int(commits_since_tag) == 0:
-        version_string = latest_release
-    else:
-        next_version = increment_version(latest_release)
-        version_string = version_format.format(version=next_version, commits=commits_since_tag, sha=sha)
-
-    if is_dirty:
-        version_string += ".dirty"
-    return version_string
--- a/pyproject.toml
+++ b/pyproject.toml
+[project]
+name = "pystencils"
+description = "Speeding up stencil computations on CPUs and GPUs"
+dynamic = ["version"]
+readme = "README.md"
+authors = [
+    { name = "Martin Bauer" },
+    { name = "Jan Hönig " },
+    { name = "Markus Holzer" },
+    { name = "Frederik Hennig" },
+    { email = "cs10-codegen@fau.de" },
+]
+license = { file = "COPYING.txt" }
+requires-python = ">=3.10"
+dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "fasteners"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Framework :: Jupyter",
+    "Topic :: Software Development :: Code Generators",
+    "Topic :: Scientific/Engineering :: Physics",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+]
+
+[project.urls]
+"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/pystencils/-/issues"
+"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/pystencils/"
+"Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"
+
+[project.optional-dependencies]
+gpu = ['cupy']
+alltrafos = ['islpy', 'py-cpuinfo']
+bench_db = ['blitzdb', 'pymongo', 'pandas']
+interactive = [
+    'matplotlib',
+    'ipy_table',
+    'imageio',
+    'jupyter',
+    'pyevtk',
+    'rich',
+    'graphviz',
+]
+use_cython = [
+    'Cython'
+]
+doc = [
+    'sphinx',
+    'sphinx_rtd_theme',
+    'nbsphinx',
+    'sphinxcontrib-bibtex',
+    'sphinx_autodoc_typehints',
+    'pandoc',
+]
+tests = [
+    'pytest',
+    'pytest-cov',
+    'pytest-html',
+    'ansi2html',
+    'pytest-xdist',
+    'flake8',
+    'nbformat',
+    'nbconvert',
+    'ipython',
+    'matplotlib',
+    'py-cpuinfo',
+    'randomgen>=1.18',
+]
+
+[build-system]
+requires = [
+    "setuptools>=61",
+    "versioneer[toml]>=0.29",
+    # 'Cython'
+]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+pystencils = [
+    "include/*.h",
+    "boundaries/createindexlistcython.pyx"
+]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["pystencils", "pystencils.*"]
+namespaces = false
+
+[tool.versioneer]
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+VCS = "git"
+style = "pep440"
+versionfile_source = "src/pystencils/_version.py"
+versionfile_build = "pystencils/_version.py"
+tag_prefix = "release/"
+parentdir_prefix = "pystencils-"
--- a/pystencils/backends/simd_instruction_sets.py
+++ b/pystencils/backends/simd_instruction_sets.py
-
-
-# noinspection SpellCheckingInspection
-def get_vector_instruction_set(data_type='double', instruction_set='avx'):
-    comparisons = {
-        '==': '_CMP_EQ_UQ',
-        '!=': '_CMP_NEQ_UQ',
-        '>=': '_CMP_GE_OQ',
-        '<=': '_CMP_LE_OQ',
-        '<': '_CMP_NGE_UQ',
-        '>': '_CMP_NLE_UQ',
-    }
-    base_names = {
-        '+': 'add[0, 1]',
-        '-': 'sub[0, 1]',
-        '*': 'mul[0, 1]',
-        '/': 'div[0, 1]',
-        '&': 'and[0, 1]',
-        '|': 'or[0, 1]',
-        'blendv': 'blendv[0, 1, 2]',
-
-        'sqrt': 'sqrt[0]',
-
-        'makeVec': 'set[]',
-        'makeZero': 'setzero[]',
-
-        'loadU': 'loadu[0]',
-        'loadA': 'load[0]',
-        'storeU': 'storeu[0,1]',
-        'storeA': 'store[0,1]',
-        'stream': 'stream[0,1]',
-    }
-    for comparison_op, constant in comparisons.items():
-        base_names[comparison_op] = 'cmp[0, 1, %s]' % (constant,)
-
-    headers = {
-        'avx512': ['<immintrin.h>'],
-        'avx': ['<immintrin.h>'],
-        'sse': ['<immintrin.h>', '<xmmintrin.h>', '<emmintrin.h>', '<pmmintrin.h>',
-                '<tmmintrin.h>', '<smmintrin.h>', '<nmmintrin.h>']
-    }
-
-    suffix = {
-        'double': 'pd',
-        'float': 'ps',
-    }
-    prefix = {
-        'sse': '_mm',
-        'avx': '_mm256',
-        'avx512': '_mm512',
-    }
-
-    width = {
-        ("double", "sse"): 2,
-        ("float", "sse"): 4,
-        ("double", "avx"): 4,
-        ("float", "avx"): 8,
-        ("double", "avx512"): 8,
-        ("float", "avx512"): 16,
-    }
-
-    result = {
-        'width': width[(data_type, instruction_set)],
-    }
-    pre = prefix[instruction_set]
-    suf = suffix[data_type]
-    for intrinsic_id, function_shortcut in base_names.items():
-        function_shortcut = function_shortcut.strip()
-        name = function_shortcut[:function_shortcut.index('[')]
-
-        if intrinsic_id == 'makeVec':
-            arg_string = "({})".format(",".join(["{0}"] * result['width']))
-        else:
-            args = function_shortcut[function_shortcut.index('[') + 1: -1]
-            arg_string = "("
-            for arg in args.split(","):
-                arg = arg.strip()
-                if not arg:
-                    continue
-                if arg in ('0', '1', '2', '3', '4', '5'):
-                    arg_string += "{" + arg + "},"
-                else:
-                    arg_string += arg + ","
-            arg_string = arg_string[:-1] + ")"
-        mask_suffix = '_mask' if instruction_set == 'avx512' and intrinsic_id in comparisons.keys() else ''
-        result[intrinsic_id] = pre + "_" + name + "_" + suf + mask_suffix + arg_string
-
-    result['dataTypePrefix'] = {
-        'double': "_" + pre + 'd',
-        'float': "_" + pre,
-    }
-
-    result['rsqrt'] = None
-    bit_width = result['width'] * (64 if data_type == 'double' else 32)
-    result['double'] = "__m%dd" % (bit_width,)
-    result['float'] = "__m%d" % (bit_width,)
-    result['int'] = "__m%di" % (bit_width,)
-    result['bool'] = "__m%dd" % (bit_width,)
-
-    result['headers'] = headers[instruction_set]
-    result['any'] = "%s_movemask_%s({0}) > 0" % (pre, suf)
-    result['all'] = "%s_movemask_%s({0}) == 0xF" % (pre, suf)
-
-    if instruction_set == 'avx512':
-        size = 8 if data_type == 'double' else 16
-        result['&'] = '_kand_mask%d({0}, {1})' % (size,)
-        result['|'] = '_kor_mask%d({0}, {1})' % (size,)
-        result['any'] = '!_ktestz_mask%d_u8({0}, {0})' % (size, )
-        result['all'] = '_kortestc_mask%d_u8({0}, {0})' % (size, )
-        result['blendv'] = '%s_mask_blend_%s({2}, {0}, {1})' % (pre, suf)
-        result['rsqrt'] = "_mm512_rsqrt14_%s({0})" % (suf,)
-        result['bool'] = "__mmask%d" % (size,)
-
-    if instruction_set == 'avx' and data_type == 'float':
-        result['rsqrt'] = "_mm256_rsqrt_ps({0})"
-
-    return result
-
-
-def get_supported_instruction_sets():
-    """List of supported instruction sets on current hardware, or None if query failed."""
-    try:
-        from cpuinfo import get_cpu_info
-    except ImportError:
-        return None
-
-    result = []
-    required_sse_flags = {'sse', 'sse2', 'ssse3', 'sse4_1', 'sse4_2'}
-    required_avx_flags = {'avx'}
-    required_avx512_flags = {'avx512f'}
-    flags = set(get_cpu_info()['flags'])
-    if flags.issuperset(required_sse_flags):
-        result.append("sse")
-    if flags.issuperset(required_avx_flags):
-        result.append("avx")
-    if flags.issuperset(required_avx512_flags):
-        result.append("avx512")
-    return result
--- a/pystencils/cache.py
+++ b/pystencils/cache.py
-import os
-
-try:
-    from functools import lru_cache as memorycache
-except ImportError:
-    from backports.functools_lru_cache import lru_cache as memorycache
-
-try:
-    from joblib import Memory
-    from appdirs import user_cache_dir
-    if 'PYSTENCILS_CACHE_DIR' in os.environ:
-        cache_dir = os.environ['PYSTENCILS_CACHE_DIR']
-    else:
-        cache_dir = user_cache_dir('pystencils')
-    disk_cache = Memory(cachedir=cache_dir, verbose=False).cache
-    disk_cache_no_fallback = disk_cache
-except ImportError:
-    # fallback to in-memory caching if joblib is not available
-    disk_cache = memorycache(maxsize=64)
-
-    def disk_cache_no_fallback(o):
-        return o
-
-
-# Disable memory cache:
-# disk_cache = lambda o: o
-# disk_cache_no_fallback = lambda o: o
--- a/pystencils/data_types.py
+++ b/pystencils/data_types.py
-import ctypes
-import sympy as sp
-import numpy as np
-
-try:
-    import llvmlite.ir as ir
-except ImportError as e:
-    ir = None
-    _ir_importerror = e
-from sympy.core.cache import cacheit
-
-from pystencils.cache import memorycache
-from pystencils.utils import all_equal
-from sympy.logic.boolalg import Boolean
-
-
-# noinspection PyPep8Naming
-class cast_func(sp.Function):
-    is_Atom = True
-
-    def __new__(cls, *args, **kwargs):
-        # to work in conditions of sp.Piecewise cast_func has to be of type Boolean as well
-        # however, a cast_function should only be a boolean if its argument is a boolean, otherwise this leads
-        # to problems when for example comparing cast_func's for equality
-        #
-        # lhs = bitwise_and(a, cast_func(1, 'int'))
-        # rhs = cast_func(0, 'int')
-        # print( sp.Ne(lhs, rhs) ) # would give true if all cast_funcs are booleans
-        # -> thus a separate class boolean_cast_func is introduced
-        if isinstance(args[0], Boolean):
-            cls = boolean_cast_func
-        return sp.Function.__new__(cls, *args, **kwargs)
-
-    @property
-    def canonical(self):
-        if hasattr(self.args[0], 'canonical'):
-            return self.args[0].canonical
-        else:
-            raise NotImplementedError()
-
-    @property
-    def is_commutative(self):
-        return self.args[0].is_commutative
-
-    @property
-    def dtype(self):
-        return self.args[1]
-
-
-# noinspection PyPep8Naming
-class boolean_cast_func(cast_func, Boolean):
-    pass
-
-
-# noinspection PyPep8Naming
-class vector_memory_access(cast_func):
-    nargs = (4,)
-
-
-# noinspection PyPep8Naming
-class reinterpret_cast_func(cast_func):
-    pass
-
-
-# noinspection PyPep8Naming
-class pointer_arithmetic_func(sp.Function, Boolean):
-    @property
-    def canonical(self):
-        if hasattr(self.args[0], 'canonical'):
-            return self.args[0].canonical
-        else:
-            raise NotImplementedError()
-
-
-class TypedSymbol(sp.Symbol):
-    def __new__(cls, *args, **kwds):
-        obj = TypedSymbol.__xnew_cached_(cls, *args, **kwds)
-        return obj
-
-    def __new_stage2__(cls, name, dtype):
-        obj = super(TypedSymbol, cls).__xnew__(cls, name)
-        try:
-            obj._dtype = create_type(dtype)
-        except TypeError:
-            # on error keep the string
-            obj._dtype = dtype
-        return obj
-
-    __xnew__ = staticmethod(__new_stage2__)
-    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
-
-    @property
-    def dtype(self):
-        return self._dtype
-
-    def _hashable_content(self):
-        return super()._hashable_content(), hash(self._dtype)
-
-    def __getnewargs__(self):
-        return self.name, self.dtype
-
-
-def create_type(specification):
-    """Creates a subclass of Type according to a string or an object of subclass Type.
-
-    Args:
-        specification: Type object, or a string
-
-    Returns:
-        Type object, or a new Type object parsed from the string
-    """
-    if isinstance(specification, Type):
-        return specification
-    else:
-        numpy_dtype = np.dtype(specification)
-        if numpy_dtype.fields is None:
-            return BasicType(numpy_dtype, const=False)
-        else:
-            return StructType(numpy_dtype, const=False)
-
-
-@memorycache(maxsize=64)
-def create_composite_type_from_string(specification):
-    """Creates a new Type object from a c-like string specification.
-
-    Args:
-        specification: Specification string
-
-    Returns:
-        Type object
-    """
-    specification = specification.lower().split()
-    parts = []
-    current = []
-    for s in specification:
-        if s == '*':
-            parts.append(current)
-            current = [s]
-        else:
-            current.append(s)
-    if len(current) > 0:
-        parts.append(current)
-        # Parse native part
-    base_part = parts.pop(0)
-    const = False
-    if 'const' in base_part:
-        const = True
-        base_part.remove('const')
-    assert len(base_part) == 1
-    if base_part[0][-1] == "*":
-        base_part[0] = base_part[0][:-1]
-        parts.append('*')
-    current_type = BasicType(np.dtype(base_part[0]), const)
-    # Parse pointer parts
-    for part in parts:
-        restrict = False
-        const = False
-        if 'restrict' in part:
-            restrict = True
-            part.remove('restrict')
-        if 'const' in part:
-            const = True
-            part.remove("const")
-        assert len(part) == 1 and part[0] == '*'
-        current_type = PointerType(current_type, const, restrict)
-    return current_type
-
-
-def get_base_type(data_type):
-    while data_type.base_type is not None:
-        data_type = data_type.base_type
-    return data_type
-
-
-def to_ctypes(data_type):
-    """
-    Transforms a given Type into ctypes
-    :param data_type: Subclass of Type
-    :return: ctypes type object
-    """
-    if isinstance(data_type, PointerType):
-        return ctypes.POINTER(to_ctypes(data_type.base_type))
-    elif isinstance(data_type, StructType):
-        return ctypes.POINTER(ctypes.c_uint8)
-    else:
-        return to_ctypes.map[data_type.numpy_dtype]
-
-
-to_ctypes.map = {
-    np.dtype(np.int8): ctypes.c_int8,
-    np.dtype(np.int16): ctypes.c_int16,
-    np.dtype(np.int32): ctypes.c_int32,
-    np.dtype(np.int64): ctypes.c_int64,
-
-    np.dtype(np.uint8): ctypes.c_uint8,
-    np.dtype(np.uint16): ctypes.c_uint16,
-    np.dtype(np.uint32): ctypes.c_uint32,
-    np.dtype(np.uint64): ctypes.c_uint64,
-
-    np.dtype(np.float32): ctypes.c_float,
-    np.dtype(np.float64): ctypes.c_double,
-}
-
-
-def ctypes_from_llvm(data_type):
-    if not ir:
-        raise _ir_importerror
-    if isinstance(data_type, ir.PointerType):
-        ctype = ctypes_from_llvm(data_type.pointee)
-        if ctype is None:
-            return ctypes.c_void_p
-        else:
-            return ctypes.POINTER(ctype)
-    elif isinstance(data_type, ir.IntType):
-        if data_type.width == 8:
-            return ctypes.c_int8
-        elif data_type.width == 16:
-            return ctypes.c_int16
-        elif data_type.width == 32:
-            return ctypes.c_int32
-        elif data_type.width == 64:
-            return ctypes.c_int64
-        else:
-            raise ValueError("Int width %d is not supported" % data_type.width)
-    elif isinstance(data_type, ir.FloatType):
-        return ctypes.c_float
-    elif isinstance(data_type, ir.DoubleType):
-        return ctypes.c_double
-    elif isinstance(data_type, ir.VoidType):
-        return None  # Void type is not supported by ctypes
-    else:
-        raise NotImplementedError('Data type %s of %s is not supported yet' % (type(data_type), data_type))
-
-
-def to_llvm_type(data_type):
-    """
-    Transforms a given type into ctypes
-    :param data_type: Subclass of Type
-    :return: llvmlite type object
-    """
-    if not ir:
-        raise _ir_importerror
-    if isinstance(data_type, PointerType):
-        return to_llvm_type(data_type.base_type).as_pointer()
-    else:
-        return to_llvm_type.map[data_type.numpy_dtype]
-
-
-if ir:
-    to_llvm_type.map = {
-        np.dtype(np.int8): ir.IntType(8),
-        np.dtype(np.int16): ir.IntType(16),
-        np.dtype(np.int32): ir.IntType(32),
-        np.dtype(np.int64): ir.IntType(64),
-
-        np.dtype(np.uint8): ir.IntType(8),
-        np.dtype(np.uint16): ir.IntType(16),
-        np.dtype(np.uint32): ir.IntType(32),
-        np.dtype(np.uint64): ir.IntType(64),
-
-        np.dtype(np.float32): ir.FloatType(),
-        np.dtype(np.float64): ir.DoubleType(),
-    }
-
-
-def peel_off_type(dtype, type_to_peel_off):
-    while type(dtype) is type_to_peel_off:
-        dtype = dtype.base_type
-    return dtype
-
-
-def collate_types(types):
-    """
-    Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double
-    Uses the collation rules from numpy.
-    """
-
-    # Pointer arithmetic case i.e. pointer + integer is allowed
-    if any(type(t) is PointerType for t in types):
-        pointer_type = None
-        for t in types:
-            if type(t) is PointerType:
-                if pointer_type is not None:
-                    raise ValueError("Cannot collate the combination of two pointer types")
-                pointer_type = t
-            elif type(t) is BasicType:
-                if not (t.is_int() or t.is_uint()):
-                    raise ValueError("Invalid pointer arithmetic")
-            else:
-                raise ValueError("Invalid pointer arithmetic")
-        return pointer_type
-
-    # peel of vector types, if at least one vector type occurred the result will also be the vector type
-    vector_type = [t for t in types if type(t) is VectorType]
-    if not all_equal(t.width for t in vector_type):
-        raise ValueError("Collation failed because of vector types with different width")
-    types = [peel_off_type(t, VectorType) for t in types]
-
-    # now we should have a list of basic types - struct types are not yet supported
-    assert all(type(t) is BasicType for t in types)
-
-    if any(t.is_float() for t in types):
-        types = tuple(t for t in types if t.is_float())
-    # use numpy collation -> create type from numpy type -> and, put vector type around if necessary
-    result_numpy_type = np.result_type(*(t.numpy_dtype for t in types))
-    result = BasicType(result_numpy_type)
-    if vector_type:
-        result = VectorType(result, vector_type[0].width)
-    return result
-
-
-@memorycache(maxsize=2048)
-def get_type_of_expression(expr):
-    from pystencils.astnodes import ResolvedFieldAccess
-    from pystencils.cpu.vectorization import vec_all, vec_any
-
-    expr = sp.sympify(expr)
-    if isinstance(expr, sp.Integer):
-        return create_type("int")
-    elif isinstance(expr, sp.Rational) or isinstance(expr, sp.Float):
-        return create_type("double")
-    elif isinstance(expr, ResolvedFieldAccess):
-        return expr.field.dtype
-    elif isinstance(expr, TypedSymbol):
-        return expr.dtype
-    elif isinstance(expr, sp.Symbol):
-        raise ValueError("All symbols inside this expression have to be typed! ", str(expr))
-    elif isinstance(expr, cast_func):
-        return expr.args[1]
-    elif isinstance(expr, vec_any) or isinstance(expr, vec_all):
-        return create_type("bool")
-    elif hasattr(expr, 'func') and expr.func == sp.Piecewise:
-        collated_result_type = collate_types(tuple(get_type_of_expression(a[0]) for a in expr.args))
-        collated_condition_type = collate_types(tuple(get_type_of_expression(a[1]) for a in expr.args))
-        if type(collated_condition_type) is VectorType and type(collated_result_type) is not VectorType:
-            collated_result_type = VectorType(collated_result_type, width=collated_condition_type.width)
-        return collated_result_type
-    elif isinstance(expr, sp.Indexed):
-        typed_symbol = expr.base.label
-        return typed_symbol.dtype.base_type
-    elif isinstance(expr, sp.boolalg.Boolean) or isinstance(expr, sp.boolalg.BooleanFunction):
-        # if any arg is of vector type return a vector boolean, else return a normal scalar boolean
-        result = create_type("bool")
-        vec_args = [get_type_of_expression(a) for a in expr.args if isinstance(get_type_of_expression(a), VectorType)]
-        if vec_args:
-            result = VectorType(result, width=vec_args[0].width)
-        return result
-    elif isinstance(expr, sp.Pow):
-        return get_type_of_expression(expr.args[0])
-    elif isinstance(expr, sp.Expr):
-        types = tuple(get_type_of_expression(a) for a in expr.args)
-        return collate_types(types)
-
-    raise NotImplementedError("Could not determine type for", expr, type(expr))
-
-
-class Type(sp.Basic):
-    is_Atom = True
-
-    def __new__(cls, *args, **kwargs):
-        return sp.Basic.__new__(cls)
-
-    def _sympystr(self, *args, **kwargs):
-        return str(self)
-
-
-class BasicType(Type):
-    @staticmethod
-    def numpy_name_to_c(name):
-        if name == 'float64':
-            return 'double'
-        elif name == 'float32':
-            return 'float'
-        elif name.startswith('int'):
-            width = int(name[len("int"):])
-            return "int%d_t" % (width,)
-        elif name.startswith('uint'):
-            width = int(name[len("uint"):])
-            return "uint%d_t" % (width,)
-        elif name == 'bool':
-            return 'bool'
-        else:
-            raise NotImplementedError("Can map numpy to C name for %s" % (name,))
-
-    def __init__(self, dtype, const=False):
-        self.const = const
-        if isinstance(dtype, Type):
-            self._dtype = dtype.numpy_dtype
-        else:
-            self._dtype = np.dtype(dtype)
-        assert self._dtype.fields is None, "Tried to initialize NativeType with a structured type"
-        assert self._dtype.hasobject is False
-        assert self._dtype.subdtype is None
-
-    def __getnewargs__(self):
-        return self.numpy_dtype, self.const
-
-    @property
-    def base_type(self):
-        return None
-
-    @property
-    def numpy_dtype(self):
-        return self._dtype
-
-    @property
-    def item_size(self):
-        return 1
-
-    def is_int(self):
-        return self.numpy_dtype in np.sctypes['int']
-
-    def is_float(self):
-        return self.numpy_dtype in np.sctypes['float']
-
-    def is_uint(self):
-        return self.numpy_dtype in np.sctypes['uint']
-
-    def is_complex(self):
-        return self.numpy_dtype in np.sctypes['complex']
-
-    def is_other(self):
-        return self.numpy_dtype in np.sctypes['others']
-
-    @property
-    def base_name(self):
-        return BasicType.numpy_name_to_c(str(self._dtype))
-
-    def __str__(self):
-        result = BasicType.numpy_name_to_c(str(self._dtype))
-        if self.const:
-            result += " const"
-        return result
-
-    def __repr__(self):
-        return str(self)
-
-    def __eq__(self, other):
-        if not isinstance(other, BasicType):
-            return False
-        else:
-            return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)
-
-    def __hash__(self):
-        return hash(str(self))
-
-
-class VectorType(Type):
-    instruction_set = None
-
-    def __init__(self, base_type, width=4):
-        self._base_type = base_type
-        self.width = width
-
-    @property
-    def base_type(self):
-        return self._base_type
-
-    @property
-    def item_size(self):
-        return self.width * self.base_type.item_size
-
-    def __eq__(self, other):
-        if not isinstance(other, VectorType):
-            return False
-        else:
-            return (self.base_type, self.width) == (other.base_type, other.width)
-
-    def __str__(self):
-        if self.instruction_set is None:
-            return "%s[%d]" % (self.base_type, self.width)
-        else:
-            if self.base_type == create_type("int64"):
-                return self.instruction_set['int']
-            elif self.base_type == create_type("float64"):
-                return self.instruction_set['double']
-            elif self.base_type == create_type("float32"):
-                return self.instruction_set['float']
-            elif self.base_type == create_type("bool"):
-                return self.instruction_set['bool']
-            else:
-                raise NotImplementedError()
-
-    def __hash__(self):
-        return hash((self.base_type, self.width))
-
-    def __getnewargs__(self):
-        return self._base_type, self.width
-
-
-class PointerType(Type):
-    def __init__(self, base_type, const=False, restrict=True):
-        self._base_type = base_type
-        self.const = const
-        self.restrict = restrict
-
-    def __getnewargs__(self):
-        return self.base_type, self.const, self.restrict
-
-    @property
-    def alias(self):
-        return not self.restrict
-
-    @property
-    def base_type(self):
-        return self._base_type
-
-    @property
-    def item_size(self):
-        return self.base_type.item_size
-
-    def __eq__(self, other):
-        if not isinstance(other, PointerType):
-            return False
-        else:
-            return (self.base_type, self.const, self.restrict) == (other.base_type, other.const, other.restrict)
-
-    def __str__(self):
-        components = [str(self.base_type), '*']
-        if self.restrict:
-            components.append('RESTRICT')
-        if self.const:
-            components.append("const")
-        return " ".join(components)
-
-    def __repr__(self):
-        return str(self)
-
-    def __hash__(self):
-        return hash((self._base_type, self.const, self.restrict))
-
-
-class StructType:
-    def __init__(self, numpy_type, const=False):
-        self.const = const
-        self._dtype = np.dtype(numpy_type)
-
-    def __getnewargs__(self):
-        return self.numpy_dtype, self.const
-
-    @property
-    def base_type(self):
-        return None
-
-    @property
-    def numpy_dtype(self):
-        return self._dtype
-
-    @property
-    def item_size(self):
-        return self.numpy_dtype.itemsize
-
-    def get_element_offset(self, element_name):
-        return self.numpy_dtype.fields[element_name][1]
-
-    def get_element_type(self, element_name):
-        np_element_type = self.numpy_dtype.fields[element_name][0]
-        return BasicType(np_element_type, self.const)
-
-    def has_element(self, element_name):
-        return element_name in self.numpy_dtype.fields
-
-    def __eq__(self, other):
-        if not isinstance(other, StructType):
-            return False
-        else:
-            return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)
-
-    def __str__(self):
-        # structs are handled byte-wise
-        result = "uint8_t"
-        if self.const:
-            result += " const"
-        return result
-
-    def __repr__(self):
-        return str(self)
-
-    def __hash__(self):
-        return hash((self.numpy_dtype, self.const))
--- a/pystencils/include/philox_rand.h
+++ b/pystencils/include/philox_rand.h
-#include <cstdint>
-
-#ifndef __CUDA_ARCH__
-#define QUALIFIERS inline
-#else
-#define QUALIFIERS static __forceinline__ __device__
-#endif
-
-#define PHILOX_W32_0   (0x9E3779B9)
-#define PHILOX_W32_1   (0xBB67AE85)
-#define PHILOX_M4x32_0 (0xD2511F53)
-#define PHILOX_M4x32_1 (0xCD9E8D57)
-#define TWOPOW53_INV_DOUBLE (1.1102230246251565e-16)
-#define TWOPOW32_INV_FLOAT (2.3283064e-10f)
-
-typedef std::uint32_t uint32;
-typedef std::uint64_t uint64;
-
-
-QUALIFIERS uint32 mulhilo32(uint32 a, uint32 b, uint32* hip)
-{
-#ifndef __CUDA_ARCH__
-    // host code
-    uint64 product = ((uint64)a) * ((uint64)b);
-    *hip = product >> 32;
-    return (uint32)product;
-#else
-    // device code
-    *hip = __umulhi(a,b);
-    return a*b;
-#endif
-}
-
-QUALIFIERS void _philox4x32round(uint32* ctr, uint32* key)
-{
-    uint32 hi0;
-    uint32 hi1;
-    uint32 lo0 = mulhilo32(PHILOX_M4x32_0, ctr[0], &hi0);
-    uint32 lo1 = mulhilo32(PHILOX_M4x32_1, ctr[2], &hi1);
-
-    ctr[0] = hi1^ctr[1]^key[0];
-    ctr[1] = lo1;
-    ctr[2] = hi0^ctr[3]^key[1];
-    ctr[3] = lo0;
-}
-
-QUALIFIERS void _philox4x32bumpkey(uint32* key)
-{
-    key[0] += PHILOX_W32_0;
-    key[1] += PHILOX_W32_1;
-}
-
-QUALIFIERS double _uniform_double_hq(uint32 x, uint32 y)
-{
-    unsigned long long z = (unsigned long long)x ^
-        ((unsigned long long)y << (53 - 32));
-    return z * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0);
-}
-
-
-QUALIFIERS void philox_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
-                               uint32 key0, uint32 key1, double & rnd1, double & rnd2)
-{
-    uint32 key[2] = {key0, key1};
-    uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
-    _philox4x32round(ctr, key);                           // 1
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 2
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 3
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 4
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 5
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 6
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 7
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 8
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 9
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 10
-
-    rnd1 = _uniform_double_hq(ctr[0], ctr[1]);
-    rnd2 = _uniform_double_hq(ctr[2], ctr[3]);
-}
-
-
-
-QUALIFIERS void philox_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
-                              uint32 key0, uint32 key1,
-                              float & rnd1, float & rnd2, float & rnd3, float & rnd4)
-{
-    uint32 key[2] = {key0, key1};
-    uint32 ctr[4] = {ctr0, ctr1, ctr2, ctr3};
-    _philox4x32round(ctr, key);                           // 1
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 2
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 3
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 4
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 5
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 6
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 7
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 8
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 9
-    _philox4x32bumpkey(key); _philox4x32round(ctr, key);  // 10
-
-    rnd1 = ctr[0] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
-    rnd2 = ctr[1] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
-    rnd3 = ctr[2] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
-    rnd4 = ctr[3] * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f);
-}
\ No newline at end of file
--- a/pystencils/kerncraft_coupling/__init__.py
+++ b/pystencils/kerncraft_coupling/__init__.py
-from .kerncraft_interface import PyStencilsKerncraftKernel, KerncraftParameters
-
-__all__ = ['PyStencilsKerncraftKernel', 'KerncraftParameters']
--- a/pystencils/kerncraft_coupling/generate_benchmark.py
+++ b/pystencils/kerncraft_coupling/generate_benchmark.py
-from jinja2 import Template
-from pystencils.backends.cbackend import generate_c, get_headers
-from pystencils.sympyextensions import prod
-from pystencils.data_types import get_base_type
-from pystencils.astnodes import PragmaBlock
-
-benchmark_template = Template("""
-#include "kerncraft.h"
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <math.h>
-{{ includes }}
-
-{%- if likwid %}
-#include <likwid.h>
-{%- endif %}
-
-#define RESTRICT __restrict__
-#define FUNC_PREFIX
-void dummy(double *);
-extern int var_false;
-
-
-{{kernel_code}}
-
-
-int main(int argc, char **argv)
-{
-  {%- if likwid %}
-  likwid_markerInit();
-  {%- endif %}
-
-  {%- for field_name, dataType, size in fields %}
-
-  // Initialization {{field_name}}
-  double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
-  for (unsigned long long i = 0; i < {{size}}; ++i)
-    {{field_name}}[i] = 0.23;
-
-  if(var_false)
-    dummy({{field_name}});
-
-  {%- endfor %}
-
-
-
-  {%- for constantName, dataType in constants %}
-
-  // Constant {{constantName}}
-  {{dataType}} {{constantName}};
-  {{constantName}} = 0.23;
-  if(var_false)
-      dummy(& {{constantName}});
-
-  {%- endfor %}
-
-  {%- if likwid and openmp %}
-  #pragma omp parallel
-  {
-  likwid_markerRegisterRegion("loop");
-  #pragma omp barrier
-  {%- elif likwid %}
-  likwid_markerRegisterRegion("loop");
-  {%- endif %}
-
-  for(int warmup = 1; warmup >= 0; --warmup) {
-    int repeat = 2;
-    if(warmup == 0) {
-      repeat = atoi(argv[1]);
-      {%- if likwid %}
-      likwid_markerStartRegion("loop");
-      {%- endif %}
-    }
-
-    for (; repeat > 0; --repeat)
-    {
-      {{kernelName}}({{call_argument_list}});
-
-      // Dummy calls
-      {%- for field_name, dataType, size in fields %}
-      if(var_false) dummy({{field_name}});
-      {%- endfor %}
-      {%- for constantName, dataType in constants %}
-      if(var_false) dummy(&{{constantName}});
-      {%- endfor %}
-    }
-  }
-
-  {%- if likwid %}
-  likwid_markerStopRegion("loop");
-  {%- if openmp %}
-  }
-  {%- endif %}
-  {%- endif %}
-
-  {%- if likwid %}
-  likwid_markerClose();
-  {%- endif %}
-}
-""")
-
-
-def generate_benchmark(ast, likwid=False, openmp=False):
-    accessed_fields = {f.name: f for f in ast.fields_accessed}
-    constants = []
-    fields = []
-    call_parameters = []
-    for p in ast.get_parameters():
-        if not p.is_field_parameter:
-            constants.append((p.symbol.name, str(p.symbol.dtype)))
-            call_parameters.append(p.symbol.name)
-        else:
-            assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
-            field = accessed_fields[p.field_name]
-            dtype = str(get_base_type(p.symbol.dtype))
-            fields.append((p.field_name, dtype, prod(field.shape)))
-            call_parameters.append(p.field_name)
-
-    header_list = get_headers(ast)
-    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
-
-    # Strip "#pragma omp parallel" from within kernel, because main function takes care of that
-    # when likwid and openmp are enabled
-    if likwid and openmp:
-        if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
-            ast.body.args[0].pragma_line = ''
-
-    args = {
-        'likwid': likwid,
-        'openmp': openmp,
-        'kernel_code': generate_c(ast, dialect='c'),
-        'kernelName': ast.function_name,
-        'fields': fields,
-        'constants': constants,
-        'call_argument_list': ",".join(call_parameters),
-        'includes': includes,
-    }
-    return benchmark_template.render(**args)
No results found