1e7a1e6f · 1e7a1e6f · 1e7a1e6f · 1e7a1e6f · 1e7a1e6f · 1e7a1e6f
--- a/pystencils/interpolation_astnodes.py
+++ b/pystencils/interpolation_astnodes.py
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
-#
-# Distributed under terms of the GPLv3 license.
-
-"""
-
-"""
-
-import hashlib
-import itertools
-from enum import Enum
-from typing import Set
-
-import sympy as sp
-from sympy.core.cache import cacheit
-
-import pystencils
-from pystencils.astnodes import Node
-from pystencils.data_types import TypedSymbol, cast_func, create_type
-
-try:
-    import pycuda.driver
-except Exception:
-    pass
-
-_hash = hashlib.md5
-
-
-class InterpolationMode(str, Enum):
-    NEAREST_NEIGHBOR = "nearest_neighbour"
-    NN = NEAREST_NEIGHBOR
-    LINEAR = "linear"
-    CUBIC_SPLINE = "cubic_spline"
-
-
-class _InterpolationSymbol(TypedSymbol):
-
-    def __new__(cls, name, field, interpolator):
-        obj = cls.__xnew_cached_(cls, name, field, interpolator)
-        return obj
-
-    def __new_stage2__(cls, name, field, interpolator):
-        obj = super().__xnew__(cls, name, 'dummy_symbol_carrying_field' + field.name)
-        obj.field = field
-        obj.interpolator = interpolator
-        return obj
-
-    def __getnewargs__(self):
-        return self.name, self.field, self.interpolator
-
-    def __getnewargs_ex__(self):
-        return (self.name, self.field, self.interpolator), {}
-
-    # noinspection SpellCheckingInspection
-    __xnew__ = staticmethod(__new_stage2__)
-    # noinspection SpellCheckingInspection
-    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
-
-
-class Interpolator(object):
-    """
-    Implements non-integer accesses on fields using linear interpolation.
-
-    On GPU, this interpolator can be implemented by a :class:`.TextureCachedField` for hardware acceleration.
-
-    Address modes are different boundary handlings possible choices are like for CUDA textures
-
-        **CLAMP**
-
-        The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.
-
-        **BORDER**
-
-        The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.
-
-        Now, to describe the last two address modes, we are forced to consider normalized coordinates,
-        so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.
-
-        **WRAP**
-
-        The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M.
-        In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.
-
-        **MIRROR**
-
-        The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2.
-        In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.
-
-    Explanations from https://stackoverflow.com/questions/19020963/the-different-addressing-modes-of-cuda-textures
-    """
-
-    required_global_declarations = []
-
-    def __init__(self,
-                 parent_field,
-                 interpolation_mode: InterpolationMode,
-                 address_mode='BORDER',
-                 use_normalized_coordinates=False,
-                 allow_textures=True):
-        super().__init__()
-
-        self.field = parent_field
-        self.field.field_type = pystencils.field.FieldType.CUSTOM
-        self.address_mode = address_mode
-        self.use_normalized_coordinates = use_normalized_coordinates
-        self.interpolation_mode = interpolation_mode
-        self.hash_str = hashlib.md5(
-            f'{self.field}_{address_mode}_{self.field.dtype}_{interpolation_mode}'.encode()).hexdigest()
-        self.symbol = _InterpolationSymbol(str(self), parent_field, self)
-        self.allow_textures = allow_textures
-
-    @property
-    def ndim(self):
-        return self.field.ndim
-
-    @property
-    def _hashable_contents(self):
-        return (str(self.address_mode),
-                str(type(self)),
-                self.hash_str,
-                self.use_normalized_coordinates)
-
-    def at(self, offset):
-        return InterpolatorAccess(self.symbol, *[sp.S(o) for o in offset])
-
-    def __getitem__(self, offset):
-        return InterpolatorAccess(self.symbol, *[sp.S(o) for o in offset])
-
-    def __str__(self):
-        return f'{self.field.name}_interpolator_{self.reproducible_hash}'
-
-    def __repr__(self):
-        return self.__str__()
-
-    def __hash__(self):
-        return hash(self._hashable_contents)
-
-    def __eq__(self, other):
-        return hash(self) == hash(other)
-
-    @property
-    def reproducible_hash(self):
-        return _hash(str(self._hashable_contents).encode()).hexdigest()
-
-
-class LinearInterpolator(Interpolator):
-
-    def __init__(self,
-                 parent_field: pystencils.Field,
-                 address_mode='BORDER',
-                 use_normalized_coordinates=False):
-        super().__init__(parent_field,
-                         InterpolationMode.LINEAR,
-                         address_mode,
-                         use_normalized_coordinates)
-
-
-class NearestNeightborInterpolator(Interpolator):
-
-    def __init__(self,
-                 parent_field: pystencils.Field,
-                 address_mode='BORDER',
-                 use_normalized_coordinates=False):
-        super().__init__(parent_field,
-                         InterpolationMode.NN,
-                         address_mode,
-                         use_normalized_coordinates)
-
-
-class InterpolatorAccess(TypedSymbol):
-    def __new__(cls, field, *offsets):
-        obj = InterpolatorAccess.__xnew_cached_(cls, field, *offsets)
-        return obj
-
-    def __new_stage2__(cls, symbol, *offsets):
-        assert offsets is not None
-        obj = super().__xnew__(cls, '%s_interpolator_%s' %
-                               (symbol.field.name, _hash(str(tuple(offsets)).encode()).hexdigest()),
-                               symbol.field.dtype)
-        obj.offsets = offsets
-        obj.symbol = symbol
-        obj.field = symbol.field
-        obj.interpolator = symbol.interpolator
-        return obj
-
-    def _hashable_contents(self):
-        return super()._hashable_content() + ((self.symbol, self.field, tuple(self.offsets), self.symbol.interpolator))
-
-    def __str__(self):
-        return f"{self.field.name}_interpolator({', '.join(str(o) for o in self.offsets)})"
-
-    def __repr__(self):
-        return self.__str__()
-
-    def _latex(self, printer, *_):
-        n = self.field.latex_name if self.field.latex_name else self.field.name
-        foo = ", ".join(str(printer.doprint(o)) for o in self.offsets)
-        return f'{n}_{{interpolator}}\\left({foo}\\right)'
-
-    @property
-    def ndim(self):
-        return len(self.offsets)
-
-    @property
-    def is_texture(self):
-        return isinstance(self.interpolator, TextureCachedField)
-
-    def atoms(self, *types):
-        if self.offsets:
-            offsets = set(o for o in self.offsets if isinstance(o, types))
-            if isinstance(self, *types):
-                offsets.update([self])
-            for o in self.offsets:
-                if hasattr(o, 'atoms'):
-                    offsets.update(set(o.atoms(*types)))
-            return offsets
-        else:
-            return set()
-
-    def neighbor(self, coord_id, offset):
-        offset_list = list(self.offsets)
-        offset_list[coord_id] += offset
-        return self.interpolator.at(tuple(offset_list))
-
-    @property
-    def free_symbols(self):
-        symbols = set()
-        if self.offsets is not None:
-            for o in self.offsets:
-                if hasattr(o, 'free_symbols'):
-                    symbols.update(set(o.free_symbols))
-                # if hasattr(o, 'atoms'):
-                    # symbols.update(set(o.atoms(sp.Symbol)))
-
-        return symbols
-
-    @property
-    def required_global_declarations(self):
-        required_global_declarations = self.symbol.interpolator.required_global_declarations
-        if required_global_declarations:
-            required_global_declarations[0]._symbols_defined.add(self)
-        return required_global_declarations
-
-    @property
-    def args(self):
-        return [self.symbol, *self.offsets]
-
-    @property
-    def symbols_defined(self) -> Set[sp.Symbol]:
-        return {self}
-
-    @property
-    def interpolation_mode(self):
-        return self.interpolator.interpolation_mode
-
-    @property
-    def _diff_interpolation_vec(self):
-        return sp.Matrix([DiffInterpolatorAccess(self.symbol, i, *self.offsets)
-                          for i in range(len(self.offsets))])
-
-    def diff(self, *symbols, **kwargs):
-        if symbols == (self,):
-            return 1
-        rtn = self._diff_interpolation_vec.T * sp.Matrix(self.offsets).diff(*symbols, **kwargs)
-        if rtn.shape == (1, 1):
-            rtn = rtn[0, 0]
-        return rtn
-
-    def implementation_with_stencils(self):
-        field = self.field
-
-        default_int_type = create_type('int64')
-        use_textures = isinstance(self.interpolator, TextureCachedField)
-        if use_textures:
-            def absolute_access(x, _):
-                return self.symbol.interpolator.at((o for o in x))
-        else:
-            absolute_access = field.absolute_access
-
-        sum = [0, ] * (field.shape[0] if field.index_dimensions else 1)
-
-        offsets = self.offsets
-        rounding_functions = (sp.floor, lambda x: sp.floor(x) + 1)
-
-        for channel_idx in range(field.shape[0] if field.index_dimensions else 1):
-            if self.interpolation_mode == InterpolationMode.NN:
-                if use_textures:
-                    sum[channel_idx] = self
-                else:
-                    sum[channel_idx] = absolute_access([sp.floor(i + 0.5) for i in offsets], channel_idx)
-
-            elif self.interpolation_mode == InterpolationMode.LINEAR:
-                # TODO optimization: implement via lerp: https://devblogs.nvidia.com/lerp-faster-cuda/
-                for c in itertools.product(rounding_functions, repeat=field.spatial_dimensions):
-                    weight = sp.Mul(*[1 - sp.Abs(f(offset) - offset) for (f, offset) in zip(c, offsets)])
-                    index = [f(offset) for (f, offset) in zip(c, offsets)]
-                    # Hardware boundary handling on GPU
-                    if use_textures:
-                        weight = sp.Mul(*[1 - sp.Abs(f(offset) - offset) for (f, offset) in zip(c, offsets)])
-                        sum[channel_idx] += \
-                            weight * absolute_access(index, channel_idx if field.index_dimensions else ())
-                    # else boundary handling using software
-                    elif str(self.interpolator.address_mode).lower() == 'border':
-                        is_inside_field = sp.And(
-                            *itertools.chain([i >= 0 for i in index],
-                                             [idx < field.shape[dim] for (dim, idx) in enumerate(index)]))
-                        index = [cast_func(i, default_int_type) for i in index]
-                        sum[channel_idx] += sp.Piecewise(
-                            (weight * absolute_access(index, channel_idx if field.index_dimensions else ()),
-                                is_inside_field),
-                            (sp.simplify(0), True)
-                        )
-                    elif str(self.interpolator.address_mode).lower() == 'clamp':
-                        index = [sp.Min(sp.Max(0, cast_func(i, default_int_type)), field.spatial_shape[dim] - 1)
-                                 for (dim, i) in enumerate(index)]
-                        sum[channel_idx] += weight * \
-                            absolute_access(index, channel_idx if field.index_dimensions else ())
-                    elif str(self.interpolator.address_mode).lower() == 'wrap':
-                        index = [sp.Mod(cast_func(i, default_int_type), field.shape[dim] - 1)
-                                 for (dim, i) in enumerate(index)]
-                        index = [cast_func(sp.Piecewise((i, i > 0),
-                                                        (sp.Abs(cast_func(field.shape[dim] - 1 + i, default_int_type)),
-                                                         True)), default_int_type)
-                                 for (dim, i) in enumerate(index)]
-                        sum[channel_idx] += weight * \
-                            absolute_access(index, channel_idx if field.index_dimensions else ())
-                        # sum[channel_idx] = 0
-                    elif str(self.interpolator.address_mode).lower() == 'mirror':
-                        def triangle_fun(x, half_period):
-                            saw_tooth = cast_func(sp.Abs(cast_func(x, 'int32')), 'int32') % (
-                                cast_func(2 * half_period, create_type('int32')))
-                            return sp.Piecewise((saw_tooth, saw_tooth < half_period),
-                                                (2 * half_period - 1 - saw_tooth, True))
-                        index = [cast_func(triangle_fun(i, field.shape[dim]),
-                                           default_int_type) for (dim, i) in enumerate(index)]
-                        sum[channel_idx] += weight * \
-                            absolute_access(index, channel_idx if field.index_dimensions else ())
-                    else:
-                        raise NotImplementedError()
-            elif self.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
-                raise NotImplementedError("only works with HW interpolation for float32")
-
-            sum = [sp.factor(s) for s in sum]
-
-            if field.index_dimensions:
-                return sp.Matrix(sum)
-            else:
-                return sum[0]
-
-    # noinspection SpellCheckingInspection
-    __xnew__ = staticmethod(__new_stage2__)
-    # noinspection SpellCheckingInspection
-    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
-
-    def __getnewargs__(self):
-        return (self.symbol, *self.offsets)
-
-    def __getnewargs_ex__(self):
-        return (self.symbol, *self.offsets), {}
-
-
-class DiffInterpolatorAccess(InterpolatorAccess):
-    def __new__(cls, symbol, diff_coordinate_idx, *offsets):
-        if symbol.interpolator.interpolation_mode == InterpolationMode.LINEAR:
-            from pystencils.fd import Diff, Discretization2ndOrder
-            return Discretization2ndOrder(1)(Diff(symbol.interpolator.at(offsets), diff_coordinate_idx))
-        obj = DiffInterpolatorAccess.__xnew_cached_(cls, symbol, diff_coordinate_idx, *offsets)
-        return obj
-
-    def __new_stage2__(self, symbol: sp.Symbol, diff_coordinate_idx, *offsets):
-        assert offsets is not None
-        obj = super().__xnew__(self, symbol, *offsets)
-        obj.diff_coordinate_idx = diff_coordinate_idx
-        return obj
-
-    def __hash__(self):
-        return hash((self.symbol, self.field, self.diff_coordinate_idx, tuple(self.offsets), self.interpolator))
-
-    def __str__(self):
-        return '%s_diff%i_interpolator(%s)' % (self.field.name, self.diff_coordinate_idx,
-                                               ', '.join(str(o) for o in self.offsets))
-
-    def __repr__(self):
-        return str(self)
-
-    @property
-    def args(self):
-        return [self.symbol, self.diff_coordinate_idx, *self.offsets]
-
-    @property
-    def symbols_defined(self) -> Set[sp.Symbol]:
-        return {self}
-
-    @property
-    def interpolation_mode(self):
-        return self.interpolator.interpolation_mode
-
-    # noinspection SpellCheckingInspection
-    __xnew__ = staticmethod(__new_stage2__)
-    # noinspection SpellCheckingInspection
-    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
-
-    def __getnewargs__(self):
-        return (self.symbol, self.diff_coordinate_idx, *self.offsets)
-
-    def __getnewargs_ex__(self):
-        return (self.symbol, self.diff_coordinate_idx, *self.offsets), {}
-
-
-##########################################################################################
-# GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols #
-##########################################################################################
-
-
-class TextureCachedField(Interpolator):
-
-    def __init__(self, parent_field,
-                 address_mode=None,
-                 filter_mode=None,
-                 interpolation_mode: InterpolationMode = InterpolationMode.LINEAR,
-                 use_normalized_coordinates=False,
-                 read_as_integer=False
-                 ):
-        super().__init__(parent_field, interpolation_mode, address_mode, use_normalized_coordinates)
-
-        if address_mode is None:
-            address_mode = 'border'
-        if filter_mode is None:
-            filter_mode = pycuda.driver.filter_mode.LINEAR
-
-        self.read_as_integer = read_as_integer
-        self.required_global_declarations = [TextureDeclaration(self)]
-
-    @property
-    def ndim(self):
-        return self.field.ndim
-
-    @classmethod
-    def from_interpolator(cls, interpolator: LinearInterpolator):
-        if (isinstance(interpolator, cls)
-                or (hasattr(interpolator, 'allow_textures') and not interpolator.allow_textures)):
-            return interpolator
-        obj = cls(interpolator.field, interpolator.address_mode, interpolation_mode=interpolator.interpolation_mode)
-        return obj
-
-    def __str__(self):
-        return f'{self.field.name}_texture_{self.reproducible_hash}'
-
-    def __repr__(self):
-        return self.__str__()
-
-    @property
-    def reproducible_hash(self):
-        return _hash(str(self._hashable_contents).encode()).hexdigest()
-
-
-class TextureDeclaration(Node):
-    """
-    A global declaration of a texture. Visible both for device and host code.
-
-    .. code:: cpp
-
-        // This Node represents the following global declaration
-        texture<float, cudaTextureType2D, cudaReadModeElementType> x_texture_5acc9fced7b0dc3e;
-
-        __device__ kernel(...) {
-            // kernel acceses x_texture_5acc9fced7b0dc3e with tex2d(...)
-        }
-
-        __host__ launch_kernel(...) {
-            // Host needs to bind the texture
-            cudaBindTexture(0, x_texture_5acc9fced7b0dc3e, buffer, N*sizeof(float));
-        }
-
-    This has been deprecated by CUDA in favor of :class:`.TextureObject`.
-    But texture objects are not yet supported by PyCUDA (https://github.com/inducer/pycuda/pull/174)
-    """
-
-    def __init__(self, parent_texture):
-        self.texture = parent_texture
-        self._symbols_defined = {self.texture.symbol}
-
-    @property
-    def symbols_defined(self) -> Set[sp.Symbol]:
-        return self._symbols_defined
-
-    @property
-    def args(self) -> Set[sp.Symbol]:
-        return set()
-
-    @property
-    def headers(self):
-        headers = ['"pycuda-helpers.hpp"']
-        if self.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
-            headers.append('"cubicTex%iD.cu"' % self.texture.ndim)
-        return headers
-
-    def __str__(self):
-        from pystencils.backends.cuda_backend import CudaBackend
-        return CudaBackend()(self)
-
-    def __repr__(self):
-        return str(self)
-
-
-class TextureObject(TextureDeclaration):
-    """
-    A CUDA texture object. Opposed to :class:`.TextureDeclaration` it is not declared globally but
-    used as a function argument for the kernel call.
-
-    Like :class:`.TextureDeclaration` it defines :class:`.TextureAccess` symbols.
-    Just the printing representation is a bit different.
-    """
-    pass
-
-
-def dtype_supports_textures(dtype):
-    """
-    Returns whether CUDA natively supports texture fetches with this numpy dtype.
-
-    The maximum word size for a texture fetch is four bytes.
-
-    With this trick also larger dtypes can be fetched:
-    https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp
-
-    """
-    if hasattr(dtype, 'numpy_dtype'):
-        dtype = dtype.numpy_dtype
-
-    if isinstance(dtype, type):
-        return dtype().itemsize <= 4
-
-    return dtype.itemsize <= 4
--- a/pystencils/kerncraft_coupling/__init__.py
+++ b/pystencils/kerncraft_coupling/__init__.py
-from .generate_benchmark import generate_benchmark, run_c_benchmark
-from .kerncraft_interface import KerncraftParameters, PyStencilsKerncraftKernel
-
-__all__ = ['PyStencilsKerncraftKernel', 'KerncraftParameters', 'generate_benchmark', 'run_c_benchmark']
--- a/pystencils/kerncraft_coupling/generate_benchmark.py
+++ b/pystencils/kerncraft_coupling/generate_benchmark.py
-import subprocess
-import warnings
-import tempfile
-from pathlib import Path
-
-from jinja2 import Environment, PackageLoader, StrictUndefined
-
-from pystencils.astnodes import PragmaBlock
-from pystencils.backends.cbackend import generate_c, get_headers
-from pystencils.cpu.cpujit import get_compiler_config, run_compile_step
-from pystencils.data_types import get_base_type
-from pystencils.include import get_pystencils_include_path
-from pystencils.integer_functions import modulo_ceil
-from pystencils.sympyextensions import prod
-
-import numpy as np
-
-
-def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
-    """Return C code of a benchmark program for the given kernel.
-
-    Args:
-        ast: the pystencils AST object as returned by create_kernel
-        likwid: if True likwid markers are added to the code
-        openmp: relevant only if likwid=True, to generated correct likwid initialization code
-        timing: add timing output to the code, prints time per iteration to stdout
-
-    Returns:
-        C code as string
-    """
-    accessed_fields = {f.name: f for f in ast.fields_accessed}
-    constants = []
-    fields = []
-    call_parameters = []
-    for p in ast.get_parameters():
-        if not p.is_field_parameter:
-            constants.append((p.symbol.name, str(p.symbol.dtype)))
-            call_parameters.append(p.symbol.name)
-        else:
-            assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
-            field = accessed_fields[p.field_name]
-            dtype = str(get_base_type(p.symbol.dtype))
-            np_dtype = get_base_type(p.symbol.dtype).numpy_dtype
-            size_data_type = np_dtype.itemsize
-
-            dim0_size = field.shape[-1]
-            dim1_size = np.prod(field.shape[:-1])
-            elements = prod(field.shape)
-
-            if ast.instruction_set:
-                align = ast.instruction_set['width'] * size_data_type
-                padding_elements = modulo_ceil(dim0_size, ast.instruction_set['width']) - dim0_size
-                padding_bytes = padding_elements * size_data_type
-                ghost_layers = max(max(ast.ghost_layers))
-
-                size = dim1_size * padding_bytes + np.prod(field.shape) * size_data_type
-
-                assert align % np_dtype.itemsize == 0
-                offset = ((dim0_size + padding_elements + ghost_layers) % ast.instruction_set['width']) * size_data_type
-
-                fields.append((p.field_name, dtype, elements, size, offset, align))
-                call_parameters.append(p.field_name)
-            else:
-                size = elements * size_data_type
-                fields.append((p.field_name, dtype, elements, size, 0, 0))
-                call_parameters.append(p.field_name)
-
-    header_list = get_headers(ast)
-    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
-
-    # Strip "#pragma omp parallel" from within kernel, because main function takes care of that
-    # when likwid and openmp are enabled
-    if likwid and openmp:
-        if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
-            ast.body.args[0].pragma_line = ''
-
-    jinja_context = {
-        'likwid': likwid,
-        'openmp': openmp,
-        'kernel_code': generate_c(ast, dialect='c'),
-        'kernelName': ast.function_name,
-        'fields': fields,
-        'constants': constants,
-        'call_argument_list': ",".join(call_parameters),
-        'includes': includes,
-        'timing': timing,
-    }
-
-    env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
-
-    return env.get_template('benchmark.c').render(**jinja_context)
-
-
-def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None):
-    """Runs the given kernel with outer loop in C
-
-    Args:
-        ast: pystencils ast which is used to compile the benchmark file
-        inner_iterations: timings are recorded around this many iterations
-        outer_iterations: number of timings recorded
-        path: path where the benchmark file is stored. If None a tmp folder is created
-
-    Returns:
-        list of times per iterations for each outer iteration
-    """
-    import kerncraft
-
-    benchmark_code = generate_benchmark(ast, timing=True)
-
-    if path is None:
-        path = tempfile.mkdtemp()
-
-    if isinstance(path, str):
-        path = Path(path)
-
-    with open(path / 'bench.c', 'w') as f:
-        f.write(benchmark_code)
-
-    kerncraft_path = Path(kerncraft.__file__).parent
-
-    extra_flags = ['-I' + get_pystencils_include_path(),
-                   '-I' + str(kerncraft_path / 'headers')]
-
-    compiler_config = get_compiler_config()
-    compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
-    compile_cmd += [*extra_flags,
-                    str(kerncraft_path / 'headers' / 'timing.c'),
-                    str(kerncraft_path / 'headers' / 'dummy.c'),
-                    str(path / 'bench.c'),
-                    '-o', str(path / 'bench'),
-                    ]
-    run_compile_step(compile_cmd)
-
-    time_pre_estimation_per_iteration = float(subprocess.check_output(['./' / path / 'bench', str(10)]))
-    benchmark_time_limit = 20
-    if benchmark_time_limit / time_pre_estimation_per_iteration < inner_iterations:
-        warn = (f"A benchmark run with {inner_iterations} inner_iterations will probably take longer than "
-                f"{benchmark_time_limit} seconds for this kernel")
-        warnings.warn(warn)
-
-    results = []
-    for _ in range(outer_iterations):
-        benchmark_time = float(subprocess.check_output(['./' / path / 'bench', str(inner_iterations)]))
-        results.append(benchmark_time)
-    return results
--- a/pystencils/kerncraft_coupling/kerncraft_interface.py
+++ b/pystencils/kerncraft_coupling/kerncraft_interface.py
--- a/pystencils/kerncraft_coupling/templates/benchmark.c
+++ b/pystencils/kerncraft_coupling/templates/benchmark.c
--- a/pystencils/kerncraft_coupling/templates/kernel.c
+++ b/pystencils/kerncraft_coupling/templates/kernel.c
-
-#include "kerncraft.h"
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <math.h>
-#include <stdio.h>
-
-{{ includes }}
-
-#define RESTRICT __restrict__
-#define FUNC_PREFIX
-void dummy(void *);
-void timing(double* wcTime, double* cpuTime);
-extern int var_false;
-
-
-{{kernel_code}}
\ No newline at end of file
--- a/pystencils/kerncraft_coupling/templates/kernel.h
+++ b/pystencils/kerncraft_coupling/templates/kernel.h
-#define FUNC_PREFIX
-
-{{function_signature}}
\ No newline at end of file
--- a/pystencils/llvm/__init__.py
+++ b/pystencils/llvm/__init__.py
-from .kernelcreation import create_kernel
-from .llvmjit import make_python_function
-
-__all__ = ['create_kernel', 'make_python_function']
--- a/pystencils/llvm/control_flow.py
+++ b/pystencils/llvm/control_flow.py
--- a/pystencils/llvm/kernelcreation.py
+++ b/pystencils/llvm/kernelcreation.py
--- a/pystencils/llvm/llvm.py
+++ b/pystencils/llvm/llvm.py
--- a/pystencils/llvm/llvmjit.py
+++ b/pystencils/llvm/llvmjit.py
--- a/pystencils/math_optimizations.py
+++ b/pystencils/math_optimizations.py
--- a/pystencils/opencl/__init__.py
+++ b/pystencils/opencl/__init__.py
-"""
-
-"""
-
-from pystencils.opencl.opencljit import (
-    clear_global_ctx, init_globally, init_globally_with_context, make_python_function)
-
-__all__ = ['init_globally', 'init_globally_with_context', 'clear_global_ctx', 'make_python_function']
--- a/pystencils/opencl/autoinit.py
+++ b/pystencils/opencl/autoinit.py
-"""
-Automatically initializes OpenCL context using any device.
-
-Use `pystencils.opencl.{init_globally_with_context,init_globally}` if you want to use a specific device.
-"""
-
-from pystencils.opencl.opencljit import (
-    clear_global_ctx, init_globally, init_globally_with_context, make_python_function)
-
-__all__ = ['init_globally', 'init_globally_with_context', 'clear_global_ctx', 'make_python_function']
-
-try:
-    init_globally()
-except Exception as e:
-    import warnings
-    warnings.warn(str(e))
--- a/pystencils/opencl/opencljit.py
+++ b/pystencils/opencl/opencljit.py
--- a/pystencils_tests/benchmark/SkylakeSP_Gold-5122_allinclusive.yaml
+++ b/pystencils_tests/benchmark/SkylakeSP_Gold-5122_allinclusive.yaml
--- a/pystencils_tests/benchmark/benchmark.py
+++ b/pystencils_tests/benchmark/benchmark.py
--- a/pystencils_tests/benchmark/generate.py
+++ b/pystencils_tests/benchmark/generate.py
--- a/pystencils_tests/benchmark/iacaMarks.h
+++ b/pystencils_tests/benchmark/iacaMarks.h
No results found