5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6
--- a/doc/notebooks/demo_assignment_collection.ipynb
+++ b/doc/notebooks/demo_assignment_collection.ipynb
--- a/doc/notebooks/demo_benchmark.ipynb
+++ b/doc/notebooks/demo_benchmark.ipynb
--- a/doc/notebooks/demo_derivatives.ipynb
+++ b/doc/notebooks/demo_derivatives.ipynb
--- a/doc/notebooks/demo_plotting_and_animation.ipynb
+++ b/doc/notebooks/demo_plotting_and_animation.ipynb
--- a/doc/notebooks/demo_wave_equation.ipynb
+++ b/doc/notebooks/demo_wave_equation.ipynb
--- a/doc/sphinx/api.rst
+++ b/doc/sphinx/api.rst
@@ -5,6 +5,7 @@ API Reference
   :maxdepth: 3

   kernel_compile_and_call.rst
+   enums.rst
   simplifications.rst
   datahandling.rst
   configuration.rst

--- a/doc/sphinx/enums.rst
+++ b/doc/sphinx/enums.rst
+************
+Enumerations
+************
+
+.. automodule:: pystencils.enums
+   :members:
--- a/doc/sphinx/kernel_compile_and_call.rst
+++ b/doc/sphinx/kernel_compile_and_call.rst
@@ -8,9 +8,14 @@ Creating kernels

 .. autofunction:: pystencils.create_kernel

-.. autofunction:: pystencils.create_indexed_kernel
+.. autoclass:: pystencils.CreateKernelConfig
+    :members:

-.. autofunction:: pystencils.create_staggered_kernel
+.. autofunction:: pystencils.kernelcreation.create_domain_kernel
+
+.. autofunction:: pystencils.kernelcreation.create_indexed_kernel
+
+.. autofunction:: pystencils.kernelcreation.create_staggered_kernel


 Code printing
@@ -22,11 +27,11 @@ Code printing
 GPU Indexing
 -------------

-.. autoclass:: pystencils.gpucuda.AbstractIndexing
+.. autoclass:: pystencils.gpu.AbstractIndexing
   :members:

-.. autoclass:: pystencils.gpucuda.BlockIndexing
+.. autoclass:: pystencils.gpu.BlockIndexing
   :members:

-.. autoclass:: pystencils.gpucuda.LineIndexing
+.. autoclass:: pystencils.gpu.LineIndexing
   :members:
--- a/pystencils_tests/__init__.py
+++ b/pystencils_tests/__init__.py
--- a/doc/sphinx/simplifications.rst
+++ b/doc/sphinx/simplifications.rst
@@ -10,13 +10,27 @@ AssignmentCollection
   :members:


+SimplificationStrategy
+======================
+
+.. autoclass:: pystencils.simp.SimplificationStrategy
+    :members:
+
 Simplifications
 ===============

-.. automodule:: pystencils.simp
-   :members:
+.. automodule:: pystencils.simp.simplifications
+    :members:
+
+Subexpression insertion
+=======================

+The subexpression insertions have the goal to insert subexpressions which will not reduce the number of FLOPs.
+For example a constant value kept as subexpression will lead to a new variable in the code which will occupy
+a register slot. On the other side a single variable could just be inserted in all assignments.

+.. automodule:: pystencils.simp.subexpression_insertion
+    :members:




--- a/doc/version_from_git.py
+++ b/doc/version_from_git.py
-import subprocess
-
-def version_number_from_git(tag_prefix='release/', sha_length=10, version_format="{version}.dev{commits}+{sha}"):
-
-    def get_released_versions():
-        tags = sorted(subprocess.getoutput('git tag').split('\n'))
-        versions = [t[len(tag_prefix):] for t in tags if t.startswith(tag_prefix)]
-        return versions
-
-    def tag_from_version(v):
-        return tag_prefix + v
-
-    def increment_version(v):
-        parsed_version = [int(i) for i in v.split('.')]
-        parsed_version[-1] += 1
-        return '.'.join(str(i) for i in parsed_version)
-
-    latest_release = get_released_versions()[-1]
-    commits_since_tag = subprocess.getoutput('git rev-list {}..HEAD --count'.format(tag_from_version(latest_release)))
-    sha = subprocess.getoutput('git rev-parse HEAD')[:sha_length]
-    is_dirty = len(subprocess.getoutput("git status --untracked-files=no -s")) > 0
-
-    if int(commits_since_tag) == 0:
-        version_string = latest_release
-    else:
-        next_version = increment_version(latest_release)
-        version_string = version_format.format(version=next_version, commits=commits_since_tag, sha=sha)
-
-    if is_dirty:
-        version_string += ".dirty"
-    return version_string
--- a/pyproject.toml
+++ b/pyproject.toml
+[project]
+name = "pystencils"
+description = "Speeding up stencil computations on CPUs and GPUs"
+dynamic = ["version"]
+readme = "README.md"
+authors = [
+    { name = "Martin Bauer" },
+    { name = "Jan Hönig " },
+    { name = "Markus Holzer" },
+    { name = "Frederik Hennig" },
+    { email = "cs10-codegen@fau.de" },
+]
+license = { file = "COPYING.txt" }
+requires-python = ">=3.10"
+dependencies = ["sympy>=1.9,<=1.12.1", "numpy>=1.8.0", "appdirs", "joblib", "pyyaml", "fasteners"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Framework :: Jupyter",
+    "Topic :: Software Development :: Code Generators",
+    "Topic :: Scientific/Engineering :: Physics",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
+]
+
+[project.urls]
+"Bug Tracker" = "https://i10git.cs.fau.de/pycodegen/pystencils/-/issues"
+"Documentation" = "https://pycodegen.pages.i10git.cs.fau.de/pystencils/"
+"Source Code" = "https://i10git.cs.fau.de/pycodegen/pystencils"
+
+[project.optional-dependencies]
+gpu = ['cupy']
+alltrafos = ['islpy', 'py-cpuinfo']
+bench_db = ['blitzdb', 'pymongo', 'pandas']
+interactive = [
+    'matplotlib',
+    'ipy_table',
+    'imageio',
+    'jupyter',
+    'pyevtk',
+    'rich',
+    'graphviz',
+]
+use_cython = [
+    'Cython'
+]
+doc = [
+    'sphinx',
+    'sphinx_rtd_theme',
+    'nbsphinx',
+    'sphinxcontrib-bibtex',
+    'sphinx_autodoc_typehints',
+    'pandoc',
+]
+tests = [
+    'pytest',
+    'pytest-cov',
+    'pytest-html',
+    'ansi2html',
+    'pytest-xdist',
+    'flake8',
+    'nbformat',
+    'nbconvert',
+    'ipython',
+    'matplotlib',
+    'py-cpuinfo',
+    'randomgen>=1.18',
+]
+
+[build-system]
+requires = [
+    "setuptools>=61",
+    "versioneer[toml]>=0.29",
+    # 'Cython'
+]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+pystencils = [
+    "include/*.h",
+    "boundaries/createindexlistcython.pyx"
+]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["pystencils", "pystencils.*"]
+namespaces = false
+
+[tool.versioneer]
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+VCS = "git"
+style = "pep440"
+versionfile_source = "src/pystencils/_version.py"
+versionfile_build = "pystencils/_version.py"
+tag_prefix = "release/"
+parentdir_prefix = "pystencils-"
--- a/pystencils/autodiff.py
+++ b/pystencils/autodiff.py
-"""
-Provides tools for generation of auto-differentiable operations.
-
-See https://github.com/theHamsta/pystencils_autodiff
-
-Installation:
-
-.. code-block:: bash
-    pip install pystencils-autodiff
-"""
-import os
-
-if 'CI' not in os.environ:
-    raise NotImplementedError('pystencils-autodiff is not installed. Run `pip install pystencils-autodiff`')
--- a/pystencils/backends/cuda_known_functions.txt
+++ b/pystencils/backends/cuda_known_functions.txt
-__prof_trigger
-printf
-
-__syncthreads
-__syncthreads_count
-__syncthreads_and
-__syncthreads_or
-__syncwarp
-__threadfence
-__threadfence_block
-__threadfence_system
-
-atomicAdd
-atomicSub
-atomicExch
-atomicMin
-atomicMax
-atomicInc
-atomicDec
-atomicAnd
-atomicOr
-atomicXor
-atomicCAS
-
-__all_sync
-__any_sync
-__ballot_sync
-__active_mask
-
-__shfl_sync
-__shfl_up_sync
-__shfl_down_sync
-__shfl_xor_sync
-
-__match_any_sync
-__match_all_sync
-
-__isGlobal
-__isShared
-__isConstant
-__isLocal
-
-tex1Dfetch
-tex1D
-tex2D
-tex3D
-
-rsqrtf
-cbrtf
-rcbrtf
-hypotf
-rhypotf
-norm3df
-rnorm3df
-norm4df
-rnorm4df
-normf
-rnormf
-expf
-exp2f
-exp10f
-expm1f
-logf
-log2f
-log10f
-log1pf
-sinf
-cosf
-tanf
-sincosf
-sinpif
-cospif
-sincospif
-asinf
-acosf
-atanf
-atan2f
-sinhf
-coshf
-tanhf
-asinhf
-acoshf
-atanhf
-powf
-erff
-erfcf
-erfinvf
-erfcinvf
-erfcxf
-normcdff
-normcdfinvf
-lgammaf
-tgammaf
-fmaf
-frexpf
-ldexpf
-scalbnf
-scalblnf
-logbf
-ilogbf
-j0f
-j1f
-jnf
-y0f
-y1f
-ynf
-cyl_bessel_i0f
-cyl_bessel_i1f
-fmodf
-remainderf
-remquof
-modff
-fdimf
-truncf
-roundf
-rintf
-nearbyintf
-ceilf
-floorf
-lrintf
-lroundf
-llrintf
-llroundf
-
-sqrt
-rsqrt
-cbrt
-rcbrt
-hypot
-rhypot
-norm3d
-rnorm3d
-norm4d
-rnorm4d
-norm
-rnorm
-exp
-exp2
-exp10
-expm1
-log
-log2
-log10
-log1p
-sin
-cos
-tan
-sincos
-sinpi
-cospi
-sincospi
-asin
-acos
-atan
-atan2
-sinh
-cosh
-tanh
-asinh
-acosh
-atanh
-pow
-erf
-erfc
-erfinv
-erfcinv
-erfcx
-normcdf
-normcdfinv
-lgamma
-tgamma
-fma
-frexp
-ldexp
-scalbn
-scalbln
-logb
-ilogb
-j0
-j1
-jn
-y0
-y1
-yn
-cyl_bessel_i0
-cyl_bessel_i1
-fmod
-remainder
-remquo
-mod
-fdim
-trunc
-round
-rint
-nearbyint
-ceil
-floor
-lrint
-lround
-llrint
-llround
-
-__fdividef
-__sinf
-__cosf
-__tanf
-__sincosf
-__logf
-__log2f
-__log10f
-__expf
-__exp10f
-__powf
-
-__fadd_rn
-__fsub_rn
-__fmul_rn
-__fmaf_rn
-__frcp_rn
-__fsqrt_rn
-__frsqrt_rn
-__fdiv_rn
-
-__fadd_rz
-__fsub_rz
-__fmul_rz
-__fmaf_rz
-__frcp_rz
-__fsqrt_rz
-__frsqrt_rz
-__fdiv_rz
-
-__fadd_ru
-__fsub_ru
-__fmul_ru
-__fmaf_ru
-__frcp_ru
-__fsqrt_ru
-__frsqrt_ru
-__fdiv_ru
-
-__fadd_rd
-__fsub_rd
-__fmul_rd
-__fmaf_rd
-__frcp_rd
-__fsqrt_rd
-__frsqrt_rd
-__fdiv_rd
-
-__fdividef
-__expf
-__exp10f
-__logf
-__log2f
-__log10f
-__sinf
-__cosf
-__sincosf
-__tanf
-__powf
-
-__dadd_rn
-__dsub_rn
-__dmul_rn
-__fma_rn
-__ddiv_rn
-__drcp_rn
-__dsqrt_rn
-
-__dadd_rz
-__dsub_rz
-__dmul_rz
-__fma_rz
-__ddiv_rz
-__drcp_rz
-__dsqrt_rz
-
-__dadd_ru
-__dsub_ru
-__dmul_ru
-__fma_ru
-__ddiv_ru
-__drcp_ru
-__dsqrt_ru
-
-__dadd_rd
-__dsub_rd
-__dmul_rd
-__fma_rd
-__ddiv_rd
-__drcp_rd
-__dsqrt_rd
--- a/pystencils/backends/simd_instruction_sets.py
+++ b/pystencils/backends/simd_instruction_sets.py
-
-
-# noinspection SpellCheckingInspection
-def get_vector_instruction_set(data_type='double', instruction_set='avx'):
-    comparisons = {
-        '==': '_CMP_EQ_UQ',
-        '!=': '_CMP_NEQ_UQ',
-        '>=': '_CMP_GE_OQ',
-        '<=': '_CMP_LE_OQ',
-        '<': '_CMP_NGE_UQ',
-        '>': '_CMP_NLE_UQ',
-    }
-    base_names = {
-        '+': 'add[0, 1]',
-        '-': 'sub[0, 1]',
-        '*': 'mul[0, 1]',
-        '/': 'div[0, 1]',
-        '&': 'and[0, 1]',
-        '|': 'or[0, 1]',
-        'blendv': 'blendv[0, 1, 2]',
-
-        'sqrt': 'sqrt[0]',
-
-        'makeVec': 'set[]',
-        'makeZero': 'setzero[]',
-
-        'loadU': 'loadu[0]',
-        'loadA': 'load[0]',
-        'storeU': 'storeu[0,1]',
-        'storeA': 'store[0,1]',
-        'stream': 'stream[0,1]',
-    }
-    for comparison_op, constant in comparisons.items():
-        base_names[comparison_op] = 'cmp[0, 1, %s]' % (constant,)
-
-    headers = {
-        'avx512': ['<immintrin.h>'],
-        'avx': ['<immintrin.h>'],
-        'sse': ['<immintrin.h>', '<xmmintrin.h>', '<emmintrin.h>', '<pmmintrin.h>',
-                '<tmmintrin.h>', '<smmintrin.h>', '<nmmintrin.h>']
-    }
-
-    suffix = {
-        'double': 'pd',
-        'float': 'ps',
-    }
-    prefix = {
-        'sse': '_mm',
-        'avx': '_mm256',
-        'avx512': '_mm512',
-    }
-
-    width = {
-        ("double", "sse"): 2,
-        ("float", "sse"): 4,
-        ("double", "avx"): 4,
-        ("float", "avx"): 8,
-        ("double", "avx512"): 8,
-        ("float", "avx512"): 16,
-    }
-
-    result = {
-        'width': width[(data_type, instruction_set)],
-    }
-    pre = prefix[instruction_set]
-    suf = suffix[data_type]
-    for intrinsic_id, function_shortcut in base_names.items():
-        function_shortcut = function_shortcut.strip()
-        name = function_shortcut[:function_shortcut.index('[')]
-
-        if intrinsic_id == 'makeVec':
-            arg_string = "({})".format(",".join(["{0}"] * result['width']))
-        else:
-            args = function_shortcut[function_shortcut.index('[') + 1: -1]
-            arg_string = "("
-            for arg in args.split(","):
-                arg = arg.strip()
-                if not arg:
-                    continue
-                if arg in ('0', '1', '2', '3', '4', '5'):
-                    arg_string += "{" + arg + "},"
-                else:
-                    arg_string += arg + ","
-            arg_string = arg_string[:-1] + ")"
-        mask_suffix = '_mask' if instruction_set == 'avx512' and intrinsic_id in comparisons.keys() else ''
-        result[intrinsic_id] = pre + "_" + name + "_" + suf + mask_suffix + arg_string
-
-    result['dataTypePrefix'] = {
-        'double': "_" + pre + 'd',
-        'float': "_" + pre,
-    }
-
-    result['rsqrt'] = None
-    bit_width = result['width'] * (64 if data_type == 'double' else 32)
-    result['double'] = "__m%dd" % (bit_width,)
-    result['float'] = "__m%d" % (bit_width,)
-    result['int'] = "__m%di" % (bit_width,)
-    result['bool'] = "__m%dd" % (bit_width,)
-
-    result['headers'] = headers[instruction_set]
-    result['any'] = "%s_movemask_%s({0}) > 0" % (pre, suf)
-    result['all'] = "%s_movemask_%s({0}) == 0xF" % (pre, suf)
-
-    if instruction_set == 'avx512':
-        size = 8 if data_type == 'double' else 16
-        result['&'] = '_kand_mask%d({0}, {1})' % (size,)
-        result['|'] = '_kor_mask%d({0}, {1})' % (size,)
-        result['any'] = '!_ktestz_mask%d_u8({0}, {0})' % (size, )
-        result['all'] = '_kortestc_mask%d_u8({0}, {0})' % (size, )
-        result['blendv'] = '%s_mask_blend_%s({2}, {0}, {1})' % (pre, suf)
-        result['rsqrt'] = "_mm512_rsqrt14_%s({0})" % (suf,)
-        result['bool'] = "__mmask%d" % (size,)
-
-    if instruction_set == 'avx' and data_type == 'float':
-        result['rsqrt'] = "_mm256_rsqrt_ps({0})"
-
-    return result
-
-
-def get_supported_instruction_sets():
-    """List of supported instruction sets on current hardware, or None if query failed."""
-    try:
-        from cpuinfo import get_cpu_info
-    except ImportError:
-        return None
-
-    result = []
-    required_sse_flags = {'sse', 'sse2', 'ssse3', 'sse4_1', 'sse4_2'}
-    required_avx_flags = {'avx'}
-    required_avx512_flags = {'avx512f'}
-    flags = set(get_cpu_info()['flags'])
-    if flags.issuperset(required_sse_flags):
-        result.append("sse")
-    if flags.issuperset(required_avx_flags):
-        result.append("avx")
-    if flags.issuperset(required_avx512_flags):
-        result.append("avx512")
-    return result
--- a/pystencils/boundaries/createindexlistcython.c
+++ b/pystencils/boundaries/createindexlistcython.c
--- a/pystencils/cache.py
+++ b/pystencils/cache.py
-import os
-
-try:
-    from functools import lru_cache as memorycache
-except ImportError:
-    from backports.functools_lru_cache import lru_cache as memorycache
-
-try:
-    from joblib import Memory
-    from appdirs import user_cache_dir
-    if 'PYSTENCILS_CACHE_DIR' in os.environ:
-        cache_dir = os.environ['PYSTENCILS_CACHE_DIR']
-    else:
-        cache_dir = user_cache_dir('pystencils')
-    disk_cache = Memory(cache_dir, verbose=False).cache
-    disk_cache_no_fallback = disk_cache
-except ImportError:
-    # fallback to in-memory caching if joblib is not available
-    disk_cache = memorycache(maxsize=64)
-
-    def disk_cache_no_fallback(o):
-        return o
-
-
-# Disable memory cache:
-# disk_cache = lambda o: o
-# disk_cache_no_fallback = lambda o: o
--- a/pystencils/data_types.py
+++ b/pystencils/data_types.py
-import ctypes
-
-import numpy as np
-import sympy as sp
-from sympy.core.cache import cacheit
-from sympy.logic.boolalg import Boolean
-
-from pystencils.cache import memorycache
-from pystencils.utils import all_equal
-
-try:
-    import llvmlite.ir as ir
-except ImportError as e:
-    ir = None
-    _ir_importerror = e
-
-
-# noinspection PyPep8Naming
-class address_of(sp.Function):
-    is_Atom = True
-
-    def __new__(cls, arg):
-        obj = sp.Function.__new__(cls, arg)
-        return obj
-
-    @property
-    def canonical(self):
-        if hasattr(self.args[0], 'canonical'):
-            return self.args[0].canonical
-        else:
-            raise NotImplementedError()
-
-    @property
-    def is_commutative(self):
-        return self.args[0].is_commutative
-
-    @property
-    def dtype(self):
-        if hasattr(self.args[0], 'dtype'):
-            return PointerType(self.args[0].dtype, restrict=True)
-        else:
-            return PointerType('void', restrict=True)
-
-
-# noinspection PyPep8Naming
-class cast_func(sp.Function):
-    is_Atom = True
-
-    def __new__(cls, *args, **kwargs):
-        if len(args) != 2:
-            pass
-        expr, dtype, *other_args = args
-        if not isinstance(dtype, Type):
-            dtype = create_type(dtype)
-        # to work in conditions of sp.Piecewise cast_func has to be of type Boolean as well
-        # however, a cast_function should only be a boolean if its argument is a boolean, otherwise this leads
-        # to problems when for example comparing cast_func's for equality
-        #
-        # lhs = bitwise_and(a, cast_func(1, 'int'))
-        # rhs = cast_func(0, 'int')
-        # print( sp.Ne(lhs, rhs) ) # would give true if all cast_funcs are booleans
-        # -> thus a separate class boolean_cast_func is introduced
-        if isinstance(expr, Boolean):
-            cls = boolean_cast_func
-        return sp.Function.__new__(cls, expr, dtype, *other_args, **kwargs)
-
-    @property
-    def canonical(self):
-        if hasattr(self.args[0], 'canonical'):
-            return self.args[0].canonical
-        else:
-            raise NotImplementedError()
-
-    @property
-    def is_commutative(self):
-        return self.args[0].is_commutative
-
-    def _eval_evalf(self, *args, **kwargs):
-        return self.args[0].evalf()
-
-    @property
-    def dtype(self):
-        return self.args[1]
-
-
-# noinspection PyPep8Naming
-class boolean_cast_func(cast_func, Boolean):
-    pass
-
-
-# noinspection PyPep8Naming
-class vector_memory_access(cast_func):
-    nargs = (4,)
-
-
-# noinspection PyPep8Naming
-class reinterpret_cast_func(cast_func):
-    pass
-
-
-# noinspection PyPep8Naming
-class pointer_arithmetic_func(sp.Function, Boolean):
-    @property
-    def canonical(self):
-        if hasattr(self.args[0], 'canonical'):
-            return self.args[0].canonical
-        else:
-            raise NotImplementedError()
-
-
-class TypedSymbol(sp.Symbol):
-    def __new__(cls, *args, **kwds):
-        obj = TypedSymbol.__xnew_cached_(cls, *args, **kwds)
-        return obj
-
-    def __new_stage2__(cls, name, dtype, *args, **kwargs):
-        obj = super(TypedSymbol, cls).__xnew__(cls, name, *args, **kwargs)
-        try:
-            obj._dtype = create_type(dtype)
-        except (TypeError, ValueError):
-            # on error keep the string
-            obj._dtype = dtype
-        return obj
-
-    __xnew__ = staticmethod(__new_stage2__)
-    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
-
-    @property
-    def dtype(self):
-        return self._dtype
-
-    def _hashable_content(self):
-        return super()._hashable_content(), hash(self._dtype)
-
-    def __getnewargs__(self):
-        return self.name, self.dtype
-
-    # For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
-    @property
-    def is_integer(self):
-        if hasattr(self.dtype, 'numpy_dtype'):
-            return np.issubdtype(self.dtype.numpy_dtype, np.integer) or super().is_integer
-        else:
-            return super().is_integer
-
-    @property
-    def is_negative(self):
-        if hasattr(self.dtype, 'numpy_dtype'):
-            if np.issubdtype(self.dtype.numpy_dtype, np.unsignedinteger):
-                return False
-
-        return super().is_negative
-
-    @property
-    def is_nonnegative(self):
-        if self.is_negative is False:
-            return True
-        else:
-            return super().is_nonnegative
-
-    @property
-    def is_real(self):
-        if hasattr(self.dtype, 'numpy_dtype'):
-            return np.issubdtype(self.dtype.numpy_dtype, np.integer) or \
-                np.issubdtype(self.dtype.numpy_dtype, np.floating) or \
-                super().is_real
-        else:
-            return super().is_real
-
-
-def create_type(specification):
-    """Creates a subclass of Type according to a string or an object of subclass Type.
-
-    Args:
-        specification: Type object, or a string
-
-    Returns:
-        Type object, or a new Type object parsed from the string
-    """
-    if isinstance(specification, Type):
-        return specification
-    else:
-        numpy_dtype = np.dtype(specification)
-        if numpy_dtype.fields is None:
-            return BasicType(numpy_dtype, const=False)
-        else:
-            return StructType(numpy_dtype, const=False)
-
-
-@memorycache(maxsize=64)
-def create_composite_type_from_string(specification):
-    """Creates a new Type object from a c-like string specification.
-
-    Args:
-        specification: Specification string
-
-    Returns:
-        Type object
-    """
-    specification = specification.lower().split()
-    parts = []
-    current = []
-    for s in specification:
-        if s == '*':
-            parts.append(current)
-            current = [s]
-        else:
-            current.append(s)
-    if len(current) > 0:
-        parts.append(current)
-        # Parse native part
-    base_part = parts.pop(0)
-    const = False
-    if 'const' in base_part:
-        const = True
-        base_part.remove('const')
-    assert len(base_part) == 1
-    if base_part[0][-1] == "*":
-        base_part[0] = base_part[0][:-1]
-        parts.append('*')
-    current_type = BasicType(np.dtype(base_part[0]), const)
-    # Parse pointer parts
-    for part in parts:
-        restrict = False
-        const = False
-        if 'restrict' in part:
-            restrict = True
-            part.remove('restrict')
-        if 'const' in part:
-            const = True
-            part.remove("const")
-        assert len(part) == 1 and part[0] == '*'
-        current_type = PointerType(current_type, const, restrict)
-    return current_type
-
-
-def get_base_type(data_type):
-    while data_type.base_type is not None:
-        data_type = data_type.base_type
-    return data_type
-
-
-def to_ctypes(data_type):
-    """
-    Transforms a given Type into ctypes
-    :param data_type: Subclass of Type
-    :return: ctypes type object
-    """
-    if isinstance(data_type, PointerType):
-        return ctypes.POINTER(to_ctypes(data_type.base_type))
-    elif isinstance(data_type, StructType):
-        return ctypes.POINTER(ctypes.c_uint8)
-    else:
-        return to_ctypes.map[data_type.numpy_dtype]
-
-
-to_ctypes.map = {
-    np.dtype(np.int8): ctypes.c_int8,
-    np.dtype(np.int16): ctypes.c_int16,
-    np.dtype(np.int32): ctypes.c_int32,
-    np.dtype(np.int64): ctypes.c_int64,
-
-    np.dtype(np.uint8): ctypes.c_uint8,
-    np.dtype(np.uint16): ctypes.c_uint16,
-    np.dtype(np.uint32): ctypes.c_uint32,
-    np.dtype(np.uint64): ctypes.c_uint64,
-
-    np.dtype(np.float32): ctypes.c_float,
-    np.dtype(np.float64): ctypes.c_double,
-}
-
-
-def ctypes_from_llvm(data_type):
-    if not ir:
-        raise _ir_importerror
-    if isinstance(data_type, ir.PointerType):
-        ctype = ctypes_from_llvm(data_type.pointee)
-        if ctype is None:
-            return ctypes.c_void_p
-        else:
-            return ctypes.POINTER(ctype)
-    elif isinstance(data_type, ir.IntType):
-        if data_type.width == 8:
-            return ctypes.c_int8
-        elif data_type.width == 16:
-            return ctypes.c_int16
-        elif data_type.width == 32:
-            return ctypes.c_int32
-        elif data_type.width == 64:
-            return ctypes.c_int64
-        else:
-            raise ValueError("Int width %d is not supported" % data_type.width)
-    elif isinstance(data_type, ir.FloatType):
-        return ctypes.c_float
-    elif isinstance(data_type, ir.DoubleType):
-        return ctypes.c_double
-    elif isinstance(data_type, ir.VoidType):
-        return None  # Void type is not supported by ctypes
-    else:
-        raise NotImplementedError('Data type %s of %s is not supported yet' % (type(data_type), data_type))
-
-
-def to_llvm_type(data_type):
-    """
-    Transforms a given type into ctypes
-    :param data_type: Subclass of Type
-    :return: llvmlite type object
-    """
-    if not ir:
-        raise _ir_importerror
-    if isinstance(data_type, PointerType):
-        return to_llvm_type(data_type.base_type).as_pointer()
-    else:
-        return to_llvm_type.map[data_type.numpy_dtype]
-
-
-if ir:
-    to_llvm_type.map = {
-        np.dtype(np.int8): ir.IntType(8),
-        np.dtype(np.int16): ir.IntType(16),
-        np.dtype(np.int32): ir.IntType(32),
-        np.dtype(np.int64): ir.IntType(64),
-
-        np.dtype(np.uint8): ir.IntType(8),
-        np.dtype(np.uint16): ir.IntType(16),
-        np.dtype(np.uint32): ir.IntType(32),
-        np.dtype(np.uint64): ir.IntType(64),
-
-        np.dtype(np.float32): ir.FloatType(),
-        np.dtype(np.float64): ir.DoubleType(),
-    }
-
-
-def peel_off_type(dtype, type_to_peel_off):
-    while type(dtype) is type_to_peel_off:
-        dtype = dtype.base_type
-    return dtype
-
-
-def collate_types(types):
-    """
-    Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double
-    Uses the collation rules from numpy.
-    """
-
-    # Pointer arithmetic case i.e. pointer + integer is allowed
-    if any(type(t) is PointerType for t in types):
-        pointer_type = None
-        for t in types:
-            if type(t) is PointerType:
-                if pointer_type is not None:
-                    raise ValueError("Cannot collate the combination of two pointer types")
-                pointer_type = t
-            elif type(t) is BasicType:
-                if not (t.is_int() or t.is_uint()):
-                    raise ValueError("Invalid pointer arithmetic")
-            else:
-                raise ValueError("Invalid pointer arithmetic")
-        return pointer_type
-
-    # peel of vector types, if at least one vector type occurred the result will also be the vector type
-    vector_type = [t for t in types if type(t) is VectorType]
-    if not all_equal(t.width for t in vector_type):
-        raise ValueError("Collation failed because of vector types with different width")
-    types = [peel_off_type(t, VectorType) for t in types]
-
-    # now we should have a list of basic types - struct types are not yet supported
-    assert all(type(t) is BasicType for t in types)
-
-    if any(t.is_float() for t in types):
-        types = tuple(t for t in types if t.is_float())
-    # use numpy collation -> create type from numpy type -> and, put vector type around if necessary
-    result_numpy_type = np.result_type(*(t.numpy_dtype for t in types))
-    result = BasicType(result_numpy_type)
-    if vector_type:
-        result = VectorType(result, vector_type[0].width)
-    return result
-
-
-@memorycache(maxsize=2048)
-def get_type_of_expression(expr):
-    from pystencils.astnodes import ResolvedFieldAccess
-    from pystencils.cpu.vectorization import vec_all, vec_any
-
-    expr = sp.sympify(expr)
-    if isinstance(expr, sp.Integer):
-        return create_type("int")
-    elif isinstance(expr, sp.Rational) or isinstance(expr, sp.Float):
-        return create_type("double")
-    elif isinstance(expr, ResolvedFieldAccess):
-        return expr.field.dtype
-    elif isinstance(expr, TypedSymbol):
-        return expr.dtype
-    elif isinstance(expr, sp.Symbol):
-        raise ValueError("All symbols inside this expression have to be typed! ", str(expr))
-    elif isinstance(expr, cast_func):
-        return expr.args[1]
-    elif isinstance(expr, vec_any) or isinstance(expr, vec_all):
-        return create_type("bool")
-    elif hasattr(expr, 'func') and expr.func == sp.Piecewise:
-        collated_result_type = collate_types(tuple(get_type_of_expression(a[0]) for a in expr.args))
-        collated_condition_type = collate_types(tuple(get_type_of_expression(a[1]) for a in expr.args))
-        if type(collated_condition_type) is VectorType and type(collated_result_type) is not VectorType:
-            collated_result_type = VectorType(collated_result_type, width=collated_condition_type.width)
-        return collated_result_type
-    elif isinstance(expr, sp.Indexed):
-        typed_symbol = expr.base.label
-        return typed_symbol.dtype.base_type
-    elif isinstance(expr, sp.boolalg.Boolean) or isinstance(expr, sp.boolalg.BooleanFunction):
-        # if any arg is of vector type return a vector boolean, else return a normal scalar boolean
-        result = create_type("bool")
-        vec_args = [get_type_of_expression(a) for a in expr.args if isinstance(get_type_of_expression(a), VectorType)]
-        if vec_args:
-            result = VectorType(result, width=vec_args[0].width)
-        return result
-    elif isinstance(expr, sp.Pow):
-        return get_type_of_expression(expr.args[0])
-    elif isinstance(expr, sp.Expr):
-        types = tuple(get_type_of_expression(a) for a in expr.args)
-        return collate_types(types)
-
-    raise NotImplementedError("Could not determine type for", expr, type(expr))
-
-
-class Type(sp.Basic):
-    is_Atom = True
-
-    def __new__(cls, *args, **kwargs):
-        return sp.Basic.__new__(cls)
-
-    def _sympystr(self, *args, **kwargs):
-        return str(self)
-
-
-class BasicType(Type):
-    @staticmethod
-    def numpy_name_to_c(name):
-        if name == 'float64':
-            return 'double'
-        elif name == 'float32':
-            return 'float'
-        elif name.startswith('int'):
-            width = int(name[len("int"):])
-            return "int%d_t" % (width,)
-        elif name.startswith('uint'):
-            width = int(name[len("uint"):])
-            return "uint%d_t" % (width,)
-        elif name == 'bool':
-            return 'bool'
-        else:
-            raise NotImplementedError("Can map numpy to C name for %s" % (name,))
-
-    def __init__(self, dtype, const=False):
-        self.const = const
-        if isinstance(dtype, Type):
-            self._dtype = dtype.numpy_dtype
-        else:
-            self._dtype = np.dtype(dtype)
-        assert self._dtype.fields is None, "Tried to initialize NativeType with a structured type"
-        assert self._dtype.hasobject is False
-        assert self._dtype.subdtype is None
-
-    def __getnewargs__(self):
-        return self.numpy_dtype, self.const
-
-    @property
-    def base_type(self):
-        return None
-
-    @property
-    def numpy_dtype(self):
-        return self._dtype
-
-    @property
-    def item_size(self):
-        return 1
-
-    def is_int(self):
-        return self.numpy_dtype in np.sctypes['int'] or self.numpy_dtype in np.sctypes['uint']
-
-    def is_float(self):
-        return self.numpy_dtype in np.sctypes['float']
-
-    def is_uint(self):
-        return self.numpy_dtype in np.sctypes['uint']
-
-    def is_complex(self):
-        return self.numpy_dtype in np.sctypes['complex']
-
-    def is_other(self):
-        return self.numpy_dtype in np.sctypes['others']
-
-    @property
-    def base_name(self):
-        return BasicType.numpy_name_to_c(str(self._dtype))
-
-    def __str__(self):
-        result = BasicType.numpy_name_to_c(str(self._dtype))
-        if self.const:
-            result += " const"
-        return result
-
-    def __repr__(self):
-        return str(self)
-
-    def __eq__(self, other):
-        if not isinstance(other, BasicType):
-            return False
-        else:
-            return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)
-
-    def __hash__(self):
-        return hash(str(self))
-
-
-class VectorType(Type):
-    instruction_set = None
-
-    def __init__(self, base_type, width=4):
-        self._base_type = base_type
-        self.width = width
-
-    @property
-    def base_type(self):
-        return self._base_type
-
-    @property
-    def item_size(self):
-        return self.width * self.base_type.item_size
-
-    def __eq__(self, other):
-        if not isinstance(other, VectorType):
-            return False
-        else:
-            return (self.base_type, self.width) == (other.base_type, other.width)
-
-    def __str__(self):
-        if self.instruction_set is None:
-            return "%s[%d]" % (self.base_type, self.width)
-        else:
-            if self.base_type == create_type("int64"):
-                return self.instruction_set['int']
-            elif self.base_type == create_type("float64"):
-                return self.instruction_set['double']
-            elif self.base_type == create_type("float32"):
-                return self.instruction_set['float']
-            elif self.base_type == create_type("bool"):
-                return self.instruction_set['bool']
-            else:
-                raise NotImplementedError()
-
-    def __hash__(self):
-        return hash((self.base_type, self.width))
-
-    def __getnewargs__(self):
-        return self._base_type, self.width
-
-
-class PointerType(Type):
-    def __init__(self, base_type, const=False, restrict=True):
-        self._base_type = base_type
-        self.const = const
-        self.restrict = restrict
-
-    def __getnewargs__(self):
-        return self.base_type, self.const, self.restrict
-
-    @property
-    def alias(self):
-        return not self.restrict
-
-    @property
-    def base_type(self):
-        return self._base_type
-
-    @property
-    def item_size(self):
-        return self.base_type.item_size
-
-    def __eq__(self, other):
-        if not isinstance(other, PointerType):
-            return False
-        else:
-            return (self.base_type, self.const, self.restrict) == (other.base_type, other.const, other.restrict)
-
-    def __str__(self):
-        components = [str(self.base_type), '*']
-        if self.restrict:
-            components.append('RESTRICT')
-        if self.const:
-            components.append("const")
-        return " ".join(components)
-
-    def __repr__(self):
-        return str(self)
-
-    def __hash__(self):
-        return hash((self._base_type, self.const, self.restrict))
-
-
-class StructType:
-    def __init__(self, numpy_type, const=False):
-        self.const = const
-        self._dtype = np.dtype(numpy_type)
-
-    def __getnewargs__(self):
-        return self.numpy_dtype, self.const
-
-    @property
-    def base_type(self):
-        return None
-
-    @property
-    def numpy_dtype(self):
-        return self._dtype
-
-    @property
-    def item_size(self):
-        return self.numpy_dtype.itemsize
-
-    def get_element_offset(self, element_name):
-        return self.numpy_dtype.fields[element_name][1]
-
-    def get_element_type(self, element_name):
-        np_element_type = self.numpy_dtype.fields[element_name][0]
-        return BasicType(np_element_type, self.const)
-
-    def has_element(self, element_name):
-        return element_name in self.numpy_dtype.fields
-
-    def __eq__(self, other):
-        if not isinstance(other, StructType):
-            return False
-        else:
-            return (self.numpy_dtype, self.const) == (other.numpy_dtype, other.const)
-
-    def __str__(self):
-        # structs are handled byte-wise
-        result = "uint8_t"
-        if self.const:
-            result += " const"
-        return result
-
-    def __repr__(self):
-        return str(self)
-
-    def __hash__(self):
-        return hash((self.numpy_dtype, self.const))
--- a/pystencils/include/PyStencilsField.h
+++ b/pystencils/include/PyStencilsField.h
-#pragma once
-
-extern "C++" {
-#ifdef __CUDA_ARCH__
-template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
-  DTYPE_T *data;
-  DTYPE_T shape[DIMENSION];
-  DTYPE_T stride[DIMENSION];
-};
-#else
-#include <array>
-
-template <typename DTYPE_T, std::size_t DIMENSION> struct PyStencilsField {
-  DTYPE_T *data;
-  std::array<DTYPE_T, DIMENSION> shape;
-  std::array<DTYPE_T, DIMENSION> stride;
-};
-#endif
-}
--- a/pystencils/include/aesni_rand.h
+++ b/pystencils/include/aesni_rand.h
-#if !defined(__AES__) || !defined(__SSE2__)
-#error AES-NI and SSE2 need to be enabled
-#endif
-
-#include <emmintrin.h> // SSE2
-#include <wmmintrin.h> // AES
-#ifdef __AVX512VL__
-#include <immintrin.h> // AVX*
-#endif
-#include <cstdint>
-
-#define QUALIFIERS inline
-#define TWOPOW53_INV_DOUBLE (1.1102230246251565e-16)
-#define TWOPOW32_INV_FLOAT (2.3283064e-10f)
-
-typedef std::uint32_t uint32;
-typedef std::uint64_t uint64;
-
-QUALIFIERS __m128i aesni1xm128i(const __m128i & in, const __m128i & k) {
-    __m128i x = _mm_xor_si128(k, in);
-    x = _mm_aesenc_si128(x, k);     // 1
-    x = _mm_aesenc_si128(x, k);     // 2
-    x = _mm_aesenc_si128(x, k);     // 3
-    x = _mm_aesenc_si128(x, k);     // 4
-    x = _mm_aesenc_si128(x, k);     // 5
-    x = _mm_aesenc_si128(x, k);     // 6
-    x = _mm_aesenc_si128(x, k);     // 7
-    x = _mm_aesenc_si128(x, k);     // 8
-    x = _mm_aesenc_si128(x, k);     // 9
-    x = _mm_aesenclast_si128(x, k); // 10
-    return x;
-}
-
-QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v)
-{
-#ifdef __AVX512VL__
-    return _mm_cvtepu32_ps(v);
-#else
-    __m128i v2 = _mm_srli_epi32(v, 1);
-    __m128i v1 = _mm_and_si128(v, _mm_set1_epi32(1));
-    __m128 v2f = _mm_cvtepi32_ps(v2);
-    __m128 v1f = _mm_cvtepi32_ps(v1);
-    return _mm_add_ps(_mm_add_ps(v2f, v2f), v1f);
-#endif
-}
-
-QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x)
-{
-#ifdef __AVX512VL__
-    return _mm_cvtepu64_pd(x);
-#else
-    uint64 r[2];
-    _mm_storeu_si128((__m128i*)r, x);
-    return _mm_set_pd((double)r[1], (double)r[0]);
-#endif
-}
-
-
-QUALIFIERS void aesni_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
-                              uint32 key0, uint32 key1, uint32 key2, uint32 key3,
-                              double & rnd1, double & rnd2)
-{
-    // pack input and call AES
-    __m128i c128 = _mm_set_epi32(ctr3, ctr2, ctr1, ctr0);
-    __m128i k128 = _mm_set_epi32(key3, key2, key1, key0);
-    c128 = aesni1xm128i(c128, k128);
-
-    // convert 32 to 64 bit and put 0th and 2nd element into x, 1st and 3rd element into y
-    __m128i x = _mm_and_si128(c128, _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff));
-    __m128i y = _mm_and_si128(c128, _mm_set_epi32(0xffffffff, 0, 0xffffffff, 0));
-    y = _mm_srli_si128(y, 4);
-
-    // calculate z = x ^ y << (53 - 32))
-    __m128i z = _mm_sll_epi64(y, _mm_set_epi64x(53 - 32, 53 - 32));
-    z = _mm_xor_si128(x, z);
-
-    // convert uint64 to double
-    __m128d rs = _my_cvtepu64_pd(z);
-    // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
-    rs = _mm_mul_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE));
-    rs = _mm_add_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE/2.0));
-
-    // store result
-    double rr[2];
-    _mm_storeu_pd(rr, rs);
-    rnd1 = rr[0];
-    rnd2 = rr[1];
-}
-
-
-QUALIFIERS void aesni_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
-                             uint32 key0, uint32 key1, uint32 key2, uint32 key3,
-                             float & rnd1, float & rnd2, float & rnd3, float & rnd4)
-{
-    // pack input and call AES
-    __m128i c128 = _mm_set_epi32(ctr3, ctr2, ctr1, ctr0);
-    __m128i k128 = _mm_set_epi32(key3, key2, key1, key0);
-    c128 = aesni1xm128i(c128, k128);
-
-    // convert uint32 to float
-    __m128 rs = _my_cvtepu32_ps(c128);
-    // calculate rs * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
-    rs = _mm_mul_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT));
-    rs = _mm_add_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT/2.0f));
-
-    // store result
-    float r[4];
-    _mm_storeu_ps(r, rs);
-    rnd1 = r[0];
-    rnd2 = r[1];
-    rnd3 = r[2];
-    rnd4 = r[3];
-}
\ No newline at end of file
No results found