147f6901 · 147f6901 · 147f6901 · 147f6901 · 147f6901 · 147f6901
--- a/pystencils_tests/test_kerncraft_coupling.py
+++ b/pystencils_tests/test_kerncraft_coupling.py
-import numpy as np
-import pytest
-import sympy as sp
-from pathlib import Path
-from kerncraft.kernel import KernelCode
-from kerncraft.machinemodel import MachineModel
-from kerncraft.models import ECM, ECMData, Benchmark
-import pystencils as ps
-from pystencils import Assignment, Field
-from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
-from pystencils.cpu import create_kernel
-from pystencils.datahandling import create_data_handling
-from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
-from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
-from pystencils.timeloop import TimeLoop
-SCRIPT_FOLDER = Path(__file__).parent
-INPUT_FOLDER = SCRIPT_FOLDER / "kerncraft_inputs"
-@pytest.mark.kerncraft
-def test_compilation():
-    machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
-    machine = MachineModel(path_to_yaml=machine_file_path)
-    kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
-        reference_kernel.get_kernel_header(name='test_kernel')
-        reference_kernel.get_kernel_code(name='test_kernel')
-        reference_kernel.get_main_code(kernel_function_name='test_kernel')
-    size = [30, 50, 3]
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
-    s = sp.Symbol("s")
-    rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
-    update_rule = Assignment(b[0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    mine = generate_benchmark(ast, likwid=False)
-    print(mine)
-@pytest.mark.kerncraft
-def analysis(kernel, machine, model='ecmdata'):
-    if model == 'ecmdata':
-        model = ECMData(kernel, machine, KerncraftParameters())
-    elif model == 'ecm':
-        model = ECM(kernel, machine, KerncraftParameters())
-    elif model == 'benchmark':
-        model = Benchmark(kernel, machine, KerncraftParameters())
-    else:
-        model = ECM(kernel, machine, KerncraftParameters())
-    model.analyze()
-    return model
-@pytest.mark.kerncraft
-def test_3d_7pt_osaca():
-    size = [20, 200, 200]
-    kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
-    machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
-    machine_model = MachineModel(path_to_yaml=machine_file_path)
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = KernelCode(kernel_file.read(), machine=machine_model, filename=kernel_file_path)
-    reference_kernel.set_constant('M', size[0])
-    reference_kernel.set_constant('N', size[1])
-    assert size[1] == size[2]
-    analysis(reference_kernel, machine_model, model='ecm')
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
-    s = sp.Symbol("s")
-    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
-    update_rule = Assignment(b[0, 0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast, machine=machine_model, debug_print=True)
-    analysis(k, machine_model, model='ecm')
-    assert reference_kernel._flops == k._flops
-    path, lock = k.get_kernel_code(openmp=True)
-    with open(path) as kernel_file:
-        assert "#pragma omp parallel" in kernel_file.read()
-    path, lock = k.get_main_code()
-    with open(path) as kernel_file:
-        assert "likwid_markerInit();" in kernel_file.read()
-@pytest.mark.kerncraft
-def test_2d_5pt():
-    machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
-    machine = MachineModel(path_to_yaml=machine_file_path)
-    size = [30, 50, 3]
-    kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = KernelCode(kernel_file.read(), machine=machine, 
-                                      filename=kernel_file_path)
-    reference = analysis(reference_kernel, machine)
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
-    s = sp.Symbol("s")
-    rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
-    update_rule = Assignment(b[0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast, machine)
-    result = analysis(k, machine)
-    for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
-        assert e1 == e2
-@pytest.mark.kerncraft
-def test_3d_7pt():
-    machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
-    machine = MachineModel(path_to_yaml=machine_file_path)
-    size = [30, 50, 50]
-    kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = KernelCode(kernel_file.read(), machine=machine,
-                                      filename=kernel_file_path)
-    reference_kernel.set_constant('M', size[0])
-    reference_kernel.set_constant('N', size[1])
-    assert size[1] == size[2]
-    reference = analysis(reference_kernel, machine)
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
-    s = sp.Symbol("s")
-    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
-    update_rule = Assignment(b[0, 0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast, machine)
-    result = analysis(k, machine)
-    for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
-        assert e1 == e2
-@pytest.mark.kerncraft
-def test_benchmark():
-    size = [30, 50, 50]
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
-    s = sp.Symbol("s")
-    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
-    update_rule = Assignment(b[0, 0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    c_benchmark_run = run_c_benchmark(ast, inner_iterations=1000, outer_iterations=1)
-    kernel = ast.compile()
-    a = np.full(size, fill_value=0.23)
-    b = np.full(size, fill_value=0.23)
-    timeloop = TimeLoop(steps=1)
-    timeloop.add_call(kernel, {'a': a, 'b': b, 's': 0.23})
-    timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
-    np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=4)
-@pytest.mark.kerncraft
-def test_benchmark_vectorized():
-    instruction_sets = get_supported_instruction_sets()
-    if not instruction_sets:
-        pytest.skip("cannot detect CPU instruction set")
-    for vec in instruction_sets:
-        dh = create_data_handling((20, 20, 20), periodicity=True)
-        width = get_vector_instruction_set(instruction_set=vec)['width'] * 8
-        a = dh.add_array("a", values_per_cell=1, alignment=width)
-        b = dh.add_array("b", values_per_cell=1, alignment=width)
-        rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
-        update_rule = Assignment(b[0, 0, 0], rhs)
-        opt = {'instruction_set': vec, 'assume_aligned': True, 'nontemporal': True, 'assume_inner_stride_one': True}
-        ast = ps.create_kernel(update_rule, cpu_vectorize_info=opt)
-        run_c_benchmark(ast, 5)
--- a/pystencils_tests/test_llvm.py
+++ b/pystencils_tests/test_llvm.py
-import pytest
-try:
-    from pystencils.llvm.llvmjit import generate_and_jit
-    from pystencils.llvm import create_kernel, make_python_function
-    from pystencils.cpu.cpujit import get_llc_command
-    from pystencils import Assignment, Field, show_code
-    import numpy as np
-    import sympy as sp
-except ModuleNotFoundError:
-    pytest.importorskip("llvmlite")
-def test_jacobi_fixed_field_size():
-    size = (30, 20)
-    src_field_llvm = np.random.rand(*size)
-    src_field_py = np.copy(src_field_llvm)
-    dst_field_llvm = np.zeros(size)
-    dst_field_py = np.zeros(size)
-    f = Field.create_from_numpy_array("f", src_field_llvm)
-    d = Field.create_from_numpy_array("d", dst_field_llvm)
-    jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
-    ast = create_kernel([jacobi])
-    for x in range(1, size[0] - 1):
-        for y in range(1, size[1] - 1):
-            dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
-                                         src_field_py[x, y - 1] + src_field_py[x, y + 1])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    error = np.sum(np.abs(dst_field_py - dst_field_llvm))
-    np.testing.assert_almost_equal(error, 0.0)
-@pytest.mark.skipif(not get_llc_command(), reason="Tests requires llc in $PATH")
-def test_jacobi_fixed_field_size_gpu():
-    pytest.importorskip("pycuda")
-    size = (30, 20)
-    import pycuda.autoinit  # noqa
-    from pycuda.gpuarray import to_gpu
-    src_field_llvm = np.random.rand(*size)
-    src_field_py = np.copy(src_field_llvm)
-    dst_field_llvm = np.zeros(size)
-    dst_field_py = np.zeros(size)
-    f = Field.create_from_numpy_array("f", src_field_py)
-    d = Field.create_from_numpy_array("d", dst_field_py)
-    src_field_llvm = to_gpu(src_field_llvm)
-    dst_field_llvm = to_gpu(dst_field_llvm)
-    jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
-    ast = create_kernel([jacobi], target='gpu')
-    show_code(ast)
-    for x in range(1, size[0] - 1):
-        for y in range(1, size[1] - 1):
-            dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
-                                         src_field_py[x, y - 1] + src_field_py[x, y + 1])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    error = np.sum(np.abs(dst_field_py - dst_field_llvm.get()))
-    np.testing.assert_almost_equal(error, 0.0)
-def test_jacobi_variable_field_size():
-    size = (3, 3, 3)
-    f = Field.create_generic("f", 3)
-    d = Field.create_generic("d", 3)
-    jacobi = Assignment(d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4)
-    ast = create_kernel([jacobi])
-    src_field_llvm = np.random.rand(*size)
-    src_field_py = np.copy(src_field_llvm)
-    dst_field_llvm = np.zeros(size)
-    dst_field_py = np.zeros(size)
-    for x in range(1, size[0] - 1):
-        for y in range(1, size[1] - 1):
-            for z in range(1, size[2] - 1):
-                dst_field_py[x, y, z] = 0.25 * (src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] +
-                                                src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z])
-    kernel = make_python_function(ast, {'f': src_field_llvm, 'd': dst_field_llvm})
-    kernel()
-    error = np.sum(np.abs(dst_field_py - dst_field_llvm))
-    np.testing.assert_almost_equal(error, 0.0)
-def test_pow_llvm():
-    size = (30, 20)
-    src_field_llvm = 4 * np.ones(size)
-    dst_field_llvm = np.zeros(size)
-    f = Field.create_from_numpy_array("f", src_field_llvm)
-    d = Field.create_from_numpy_array("d", dst_field_llvm)
-    ur = Assignment(d[0, 0], sp.Pow(f[0, 0], -1.0))
-    ast = create_kernel([ur])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert np.all(0.25 == dst_field_llvm)
-    ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 0.5))
-    ast = create_kernel([ur])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert np.all(2.0 == dst_field_llvm)
-    ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 2.0))
-    ast = create_kernel([ur])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert np.all(16.0 == dst_field_llvm)
-    ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 3.0))
-    ast = create_kernel([ur])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert np.all(64.0 == dst_field_llvm)
-    ur = Assignment(d[0, 0], sp.Pow(f[0, 0], 4.0))
-    ast = create_kernel([ur])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert np.all(256.0 == dst_field_llvm)
-def test_piecewise_llvm():
-    size = (30, 20)
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 10.0
-    f = Field.create_from_numpy_array("f", src_field_llvm)
-    d = Field.create_from_numpy_array("d", dst_field_llvm)
-    picewise_test_strict_less_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] > 10), (0.0, True)))
-    ast = create_kernel([picewise_test_strict_less_than])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[:, :] == 0.0))
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 10.0
-    picewise_test_less_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] >= 10), (0.0, True)))
-    ast = create_kernel([picewise_test_less_than])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[0:15, :] == 1.0))
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 10.0
-    picewise_test_strict_greater_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] < 5), (0.0, True)))
-    ast = create_kernel([picewise_test_strict_greater_than])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[15:, :] == 1.0))
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 10.0
-    picewise_test_greater_than = Assignment(d[0, 0], sp.Piecewise((1.0, f[0, 0] <= 10), (0.0, True)))
-    ast = create_kernel([picewise_test_greater_than])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[:, :] == 1.0))
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 10.0
-    picewise_test_equality = Assignment(d[0, 0], sp.Piecewise((1.0, sp.Equality(f[0, 0], 10.0)), (0.0, True)))
-    ast = create_kernel([picewise_test_equality])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[0:15, :] == 1.0))
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 10.0
-    picewise_test_unequality = Assignment(d[0, 0], sp.Piecewise((1.0, sp.Unequality(f[0, 0], 10.0)), (0.0, True)))
-    ast = create_kernel([picewise_test_unequality])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[15:, :] == 1.0))
-def test_piecewise_or_llvm():
-    size = (30, 20)
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 10.5
-    f = Field.create_from_numpy_array("f", src_field_llvm)
-    d = Field.create_from_numpy_array("d", dst_field_llvm)
-    picewise_test_or = Assignment(d[0, 0], sp.Piecewise((1.0, sp.Or(f[0, 0] > 11, f[0, 0] < 10)), (0.0, True)))
-    ast = create_kernel([picewise_test_or])
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[0:15, :] == 0.0))
-def test_print_function_llvm():
-    size = (30, 20)
-    src_field_llvm = np.zeros(size)
-    dst_field_llvm = np.zeros(size)
-    src_field_llvm[0:15, :] = 0.0
-    f = Field.create_from_numpy_array("f", src_field_llvm)
-    d = Field.create_from_numpy_array("d", dst_field_llvm)
-    up = Assignment(d[0, 0], sp.sin(f[0, 0]))
-    ast = create_kernel([up])
-    # kernel = make_python_function(ast, {'f': src_field_llvm, 'd': dst_field_llvm})
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    assert (np.all(dst_field_llvm[:, :] == 0.0))
-if __name__ == "__main__":
-    test_jacobi_fixed_field_size_gpu()
--- a/pystencils_tests/test_opencl.py
+++ b/pystencils_tests/test_opencl.py
-import numpy as np
-import pytest
-import sympy as sp
-import pystencils
-from pystencils.backends.cuda_backend import CudaBackend
-from pystencils.backends.opencl_backend import OpenClBackend
-from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function
-try:
-    import pyopencl as cl
-    HAS_OPENCL = True
-    import pystencils.opencl.autoinit
-except Exception:
-    HAS_OPENCL = False
-def test_print_opencl():
-    z, y, x = pystencils.fields("z, y, x: [2d]")
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-    print(assignments)
-    ast = pystencils.create_kernel(assignments, target='gpu')
-    print(ast)
-    pystencils.show_code(ast, custom_backend=CudaBackend())
-    opencl_code = pystencils.get_code_str(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-    assert "__global double * RESTRICT const _data_x" in str(opencl_code)
-    assert "__global double * RESTRICT" in str(opencl_code)
-    assert "get_local_id(0)" in str(opencl_code)
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_opencl_jit_fixed_size():
-    pytest.importorskip('pycuda')
-    z, y, x = pystencils.fields("z, y, x: [20,30]")
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-    print(assignments)
-    ast = pystencils.create_kernel(assignments, target='gpu')
-    print(ast)
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-    cuda_kernel = ast.compile()
-    assert cuda_kernel is not None
-    import pycuda.gpuarray as gpuarray
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-    x = gpuarray.to_gpu(x_cpu)
-    y = gpuarray.to_gpu(y_cpu)
-    z = gpuarray.to_gpu(z_cpu)
-    cuda_kernel(x=x, y=y, z=z)
-    result_cuda = z.get()
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
-    result_opencl = z.get(queue)
-    assert np.allclose(result_cuda, result_opencl)
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_opencl_jit():
-    pytest.importorskip('pycuda')
-    z, y, x = pystencils.fields("z, y, x: [2d]")
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-    print(assignments)
-    ast = pystencils.create_kernel(assignments, target='gpu')
-    print(ast)
-    pystencils.show_code(ast, custom_backend=CudaBackend())
-    pystencils.show_code(ast, custom_backend=OpenClBackend())
-    cuda_kernel = ast.compile()
-    assert cuda_kernel is not None
-    import pycuda.gpuarray as gpuarray
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-    x = gpuarray.to_gpu(x_cpu)
-    y = gpuarray.to_gpu(y_cpu)
-    z = gpuarray.to_gpu(z_cpu)
-    cuda_kernel(x=x, y=y, z=z)
-    result_cuda = z.get()
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
-    result_opencl = z.get(queue)
-    assert np.allclose(result_cuda, result_opencl)
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_opencl_jit_with_parameter():
-    pytest.importorskip('pycuda')
-    z, y, x = pystencils.fields("z, y, x: [2d]")
-    a = sp.Symbol('a')
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a
-    })
-    print(assignments)
-    ast = pystencils.create_kernel(assignments, target='gpu')
-    print(ast)
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-    cuda_kernel = ast.compile()
-    assert cuda_kernel is not None
-    import pycuda.gpuarray as gpuarray
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-    x = gpuarray.to_gpu(x_cpu)
-    y = gpuarray.to_gpu(y_cpu)
-    z = gpuarray.to_gpu(z_cpu)
-    cuda_kernel(x=x, y=y, z=z, a=5.)
-    result_cuda = z.get()
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z, a=5.)
-    result_opencl = z.get(queue)
-    assert np.allclose(result_cuda, result_opencl)
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_without_cuda():
-    z, y, x = pystencils.fields("z, y, x: [20,30]")
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-    print(assignments)
-    ast = pystencils.create_kernel(assignments, target='gpu')
-    print(ast)
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_kernel_creation():
-    global pystencils
-    z, y, x = pystencils.fields("z, y, x: [20,30]")
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-    print(assignments)
-    import pystencils.opencl.autoinit
-    ast = pystencils.create_kernel(assignments, target='opencl')
-    print(ast.backend)
-    code = pystencils.get_code_str(ast)
-    print(code)
-    assert 'get_local_size' in code
-    opencl_kernel = ast.compile()
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-    import pyopencl.array as array
-    assert get_global_cl_queue()
-    x = array.to_device(get_global_cl_queue(), x_cpu)
-    y = array.to_device(get_global_cl_queue(), y_cpu)
-    z = array.to_device(get_global_cl_queue(), z_cpu)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
--- a/pystencils_tests/test_phasefield_dentritic_3D.ipynb
+++ b/pystencils_tests/test_phasefield_dentritic_3D.ipynb
-%% Cell type:code id: tags:
-``` python
-import pytest
-pytest.importorskip('pycuda')
-```
-%% Cell type:code id: tags:
-``` python
-from pystencils.session import *
-sp.init_printing()
-frac = sp.Rational
-```
-%% Cell type:markdown id: tags:
-# Phase-field simulation of dentritic solidification in 3D
-This notebook tests the model presented in the dentritic growth tutorial in 3D.
-%% Cell type:code id: tags:
-``` python
-target = 'gpu'
-gpu = target == 'gpu'
-domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
-dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
-φ_field = dh.add_array('phi', latex_name='φ')
-φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
-t_field = dh.add_array('T')
-```
-%% Cell type:code id: tags:
-``` python
-ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
-εb = sp.Symbol("\\bar{\\epsilon}")
-discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
-φ = φ_field.center
-T = t_field.center
-d = ps.fd.Diff
-def f(φ, m):
-    return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
-bulk_free_energy_density = f(φ, m)
-interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
-```
-%% Cell type:markdown id: tags:
-Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
-%% Cell type:code id: tags:
-``` python
-n = sp.Matrix([d(φ, i) for i in range(3)])
-nLen = sp.sqrt(sum(n_i**2 for n_i in n))
-n = n / nLen
-nVal = sum(n_i**4 for n_i in n)
-σ = δ * nVal
-εVal = εb * (1 + σ)
-εVal
-```
-%% Output
-    $\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{1} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{2} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}}\right) + 1\right)$
-                   ⎛  ⎛                            4
-                   ⎜  ⎜                 D(φ[0,0,0])
-    \bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
-                   ⎜  ⎜                                            2
-                   ⎜  ⎜⎛           2              2              2⎞    ⎛
-                   ⎝  ⎝⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0]
-                     4                                               4
-          D(φ[0,0,0])                                     D(φ[0,0,0])
-    ────────────────────────────────── + ─────────────────────────────────────────
-                                     2
-     2              2              2⎞    ⎛           2              2
-    )  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]
-        ⎞    ⎞
-        ⎟    ⎟
-    ────⎟ + 1⎟
-       2⎟    ⎟
-     2⎞ ⎟    ⎟
-    ) ⎠ ⎠    ⎠
-%% Cell type:code id: tags:
-``` python
-def m_func(temperature):
-    return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
-```
-%% Cell type:code id: tags:
-``` python
-substitutions = {m: m_func(T),
-                 ε: εVal}
-fe_i = interface_free_energy_density.subs(substitutions)
-fe_b = bulk_free_energy_density.subs(substitutions)
-μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
-μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
-```
-%% Cell type:code id: tags:
-``` python
-dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
-```
-%% Cell type:code id: tags:
-``` python
-parameters = {
-    τ: 0.0003,
-    κ: 1.8,
-    εb: 0.01,
-    δ: 0.3,
-    γ: 10,
-    j: 6,
-    α: 0.9,
-    Teq: 1.0,
-    θzero: 0.2,
-    sp.pi: sp.pi.evalf()
-}
-parameters
-```
-%% Output
-    $\displaystyle \left\{ \pi : 3.14159265358979, \  T_{eq} : 1.0, \  \bar{\epsilon} : 0.01, \  j : 6, \  α : 0.9, \  γ : 10, \  δ : 0.3, \  θ_{0} : 0.2, \  κ : 1.8, \  τ : 0.0003\right\}$
-    {π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
-     0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
-%% Cell type:code id: tags:
-``` python
-dφ_dt = - dF_dφ / τ
-assignments = [
-    ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
-]
-φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
-φEqs.append(ps.Assignment(φ, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
-temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
-temperatureEqs = [
-    ps.Assignment(T, discretize(temperatureEvolution.subs(parameters)))
-]
-```
-%% Cell type:code id: tags:
-``` python
-temperatureEqs
-```
-%% Output
-    $\displaystyle \left[ {{T}_{(0,0,0)}} \leftarrow 0.0111111111111111 {{T}_{(-1,0,0)}} + 0.0111111111111111 {{T}_{(0,-1,0)}} + 0.0111111111111111 {{T}_{(0,0,-1)}} + 0.933333333333333 {{T}_{(0,0,0)}} + 0.0111111111111111 {{T}_{(0,0,1)}} + 0.0111111111111111 {{T}_{(0,1,0)}} + 0.0111111111111111 {{T}_{(1,0,0)}} + 1.8 \cdot 10^{-5} {{φ_D}_{(0,0,0)}}\right]$
-    [T_C := 0.0111111111111111⋅T_W + 0.0111111111111111⋅T_S + 0.0111111111111111⋅T
-    _B + 0.933333333333333⋅T_C + 0.0111111111111111⋅T_T + 0.0111111111111111⋅T_N +
-     0.0111111111111111⋅T_E + 1.8e-5⋅phidelta_C]
-%% Cell type:code id: tags:
-``` python
-φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
-temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
-```
-%% Cell type:code id: tags:
-``` python
-def time_loop(steps):
-    φ_sync = dh.synchronization_function(['phi'], target=target)
-    temperature_sync = dh.synchronization_function(['T'], target=target)
-    dh.all_to_gpu()
-    for t in range(steps):
-        φ_sync()
-        dh.run_kernel(φ_kernel)
-        temperature_sync()
-        dh.run_kernel(temperatureKernel)
-    dh.all_to_cpu()
-def init(nucleus_size=np.sqrt(5)):
-    for b in dh.iterate():
-        x, y, z = b.cell_index_arrays
-        x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
-        b['phi'].fill(0)
-        b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
-        b['T'].fill(0.0)
-def plot(slice_obj=ps.make_slice[:, :, 0.5]):
-    plt.subplot(1, 3, 1)
-    plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
-    plt.title("φ")
-    plt.colorbar()
-    plt.subplot(1, 3, 2)
-    plt.title("T")
-    plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
-    plt.colorbar()
-    plt.subplot(1, 3, 3)
-    plt.title("∂φ")
-    plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
-    plt.colorbar()
-```
-%% Cell type:code id: tags:
-``` python
-init()
-plot()
-print(dh)
-```
-%% Output
-        Name|      Inner (min/max)|     WithGl (min/max)
-    ----------------------------------------------------
-           T|            (  0,  0)|            (  0,  0)
-         phi|            (  0,  1)|            (  0,  1)
-    phidelta|            (  0,  0)|            (  0,  0)
-%% Cell type:code id: tags:
-``` python
-if 'is_test_run' in globals():
-    time_loop(2)
-    assert np.isfinite(dh.max('phi'))
-    assert np.isfinite(dh.max('T'))
-    assert np.isfinite(dh.max('phidelta'))
-else:
-    from time import perf_counter
-    vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
-    last = perf_counter()
-    for i in range(300):
-        time_loop(100)
-        vtk_writer(i)
-        print("Step ", i, perf_counter() - last, dh.max('phi'))
-        last = perf_counter()
-```
-%% Cell type:code id: tags:
-``` python
-import pytest
-pytest.importorskip('pycuda')
-```
-%% Cell type:code id: tags:
-``` python
-from pystencils.session import *
-sp.init_printing()
-frac = sp.Rational
-```
-%% Cell type:markdown id: tags:
-# Phase-field simulation of dentritic solidification in 3D
-This notebook tests the model presented in the dentritic growth tutorial in 3D.
-%% Cell type:code id: tags:
-``` python
-target = 'gpu'
-gpu = target == 'gpu'
-domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
-dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
-φ_field = dh.add_array('phi', latex_name='φ')
-φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
-t_field = dh.add_array('T')
-```
-%% Cell type:code id: tags:
-``` python
-ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
-εb = sp.Symbol("\\bar{\\epsilon}")
-discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
-φ = φ_field.center
-T = t_field.center
-d = ps.fd.Diff
-def f(φ, m):
-    return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
-bulk_free_energy_density = f(φ, m)
-interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
-```
-%% Cell type:markdown id: tags:
-Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
-%% Cell type:code id: tags:
-``` python
-n = sp.Matrix([d(φ, i) for i in range(3)])
-nLen = sp.sqrt(sum(n_i**2 for n_i in n))
-n = n / nLen
-nVal = sum(n_i**4 for n_i in n)
-σ = δ * nVal
-εVal = εb * (1 + σ)
-εVal
-```
-%% Output
-    $\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{1} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{2} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}}\right) + 1\right)$
-                   ⎛  ⎛                            4
-                   ⎜  ⎜                 D(φ[0,0,0])
-    \bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
-                   ⎜  ⎜                                            2
-                   ⎜  ⎜⎛           2              2              2⎞    ⎛
-                   ⎝  ⎝⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0]
-                     4                                               4
-          D(φ[0,0,0])                                     D(φ[0,0,0])
-    ────────────────────────────────── + ─────────────────────────────────────────
-                                     2
-     2              2              2⎞    ⎛           2              2
-    )  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]
-        ⎞    ⎞
-        ⎟    ⎟
-    ────⎟ + 1⎟
-       2⎟    ⎟
-     2⎞ ⎟    ⎟
-    ) ⎠ ⎠    ⎠
-%% Cell type:code id: tags:
-``` python
-def m_func(temperature):
-    return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
-```
-%% Cell type:code id: tags:
-``` python
-substitutions = {m: m_func(T),
-                 ε: εVal}
-fe_i = interface_free_energy_density.subs(substitutions)
-fe_b = bulk_free_energy_density.subs(substitutions)
-μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
-μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
-```
-%% Cell type:code id: tags:
-``` python
-dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
-```
-%% Cell type:code id: tags:
-``` python
-parameters = {
-    τ: 0.0003,
-    κ: 1.8,
-    εb: 0.01,
-    δ: 0.3,
-    γ: 10,
-    j: 6,
-    α: 0.9,
-    Teq: 1.0,
-    θzero: 0.2,
-    sp.pi: sp.pi.evalf()
-}
-parameters
-```
-%% Output
-    $\displaystyle \left\{ \pi : 3.14159265358979, \  T_{eq} : 1.0, \  \bar{\epsilon} : 0.01, \  j : 6, \  α : 0.9, \  γ : 10, \  δ : 0.3, \  θ_{0} : 0.2, \  κ : 1.8, \  τ : 0.0003\right\}$
-    {π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
-     0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
-%% Cell type:code id: tags:
-``` python
-dφ_dt = - dF_dφ / τ
-assignments = [
-    ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
-]
-φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
-φEqs.append(ps.Assignment(φ, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
-temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
-temperatureEqs = [
-    ps.Assignment(T, discretize(temperatureEvolution.subs(parameters)))
-]
-```
-%% Cell type:code id: tags:
-``` python
-temperatureEqs
-```
-%% Output
-    $\displaystyle \left[ {{T}_{(0,0,0)}} \leftarrow 0.0111111111111111 {{T}_{(-1,0,0)}} + 0.0111111111111111 {{T}_{(0,-1,0)}} + 0.0111111111111111 {{T}_{(0,0,-1)}} + 0.933333333333333 {{T}_{(0,0,0)}} + 0.0111111111111111 {{T}_{(0,0,1)}} + 0.0111111111111111 {{T}_{(0,1,0)}} + 0.0111111111111111 {{T}_{(1,0,0)}} + 1.8 \cdot 10^{-5} {{φ_D}_{(0,0,0)}}\right]$
-    [T_C := 0.0111111111111111⋅T_W + 0.0111111111111111⋅T_S + 0.0111111111111111⋅T
-    _B + 0.933333333333333⋅T_C + 0.0111111111111111⋅T_T + 0.0111111111111111⋅T_N +
-     0.0111111111111111⋅T_E + 1.8e-5⋅phidelta_C]
-%% Cell type:code id: tags:
-``` python
-φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
-temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
-```
-%% Cell type:code id: tags:
-``` python
-def time_loop(steps):
-    φ_sync = dh.synchronization_function(['phi'], target=target)
-    temperature_sync = dh.synchronization_function(['T'], target=target)
-    dh.all_to_gpu()
-    for t in range(steps):
-        φ_sync()
-        dh.run_kernel(φ_kernel)
-        temperature_sync()
-        dh.run_kernel(temperatureKernel)
-    dh.all_to_cpu()
-def init(nucleus_size=np.sqrt(5)):
-    for b in dh.iterate():
-        x, y, z = b.cell_index_arrays
-        x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
-        b['phi'].fill(0)
-        b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
-        b['T'].fill(0.0)
-def plot(slice_obj=ps.make_slice[:, :, 0.5]):
-    plt.subplot(1, 3, 1)
-    plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
-    plt.title("φ")
-    plt.colorbar()
-    plt.subplot(1, 3, 2)
-    plt.title("T")
-    plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
-    plt.colorbar()
-    plt.subplot(1, 3, 3)
-    plt.title("∂φ")
-    plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
-    plt.colorbar()
-```
-%% Cell type:code id: tags:
-``` python
-init()
-plot()
-print(dh)
-```
-%% Output
-        Name|      Inner (min/max)|     WithGl (min/max)
-    ----------------------------------------------------
-           T|            (  0,  0)|            (  0,  0)
-         phi|            (  0,  1)|            (  0,  1)
-    phidelta|            (  0,  0)|            (  0,  0)
-%% Cell type:code id: tags:
-``` python
-if 'is_test_run' in globals():
-    time_loop(2)
-    assert np.isfinite(dh.max('phi'))
-    assert np.isfinite(dh.max('T'))
-    assert np.isfinite(dh.max('phidelta'))
-else:
-    from time import perf_counter
-    vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
-    last = perf_counter()
-    for i in range(300):
-        time_loop(100)
-        vtk_writer(i)
-        print("Step ", i, perf_counter() - last, dh.max('phi'))
-        last = perf_counter()
-```
--- a/pystencils_tests/test_print_infinity.py
+++ b/pystencils_tests/test_print_infinity.py
-import pytest
-import pystencils
-from sympy import oo
-@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
-@pytest.mark.parametrize('negative', (False, 'Negative'))
-@pytest.mark.parametrize('target', ('cpu', 'gpu'))
-def test_print_infinity(type, negative, target):
-    x = pystencils.fields(f'x:  {type}[1d]')
-    if negative:
-        assignment = pystencils.Assignment(x.center, -oo)
-    else:
-        assignment = pystencils.Assignment(x.center, oo)
-    ast = pystencils.create_kernel(assignment, data_type=type, target=target)
-    if target == 'gpu':
-        pytest.importorskip('pycuda')
-    ast.compile()
-    print(ast.compile().code)
--- a/pystencils_tests/test_print_unsupported_node.py
+++ b/pystencils_tests/test_print_unsupported_node.py
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
-#
-# Distributed under terms of the GPLv3 license.
-"""
-"""
-import pytest
-import pystencils
-from pystencils.backends.cbackend import CBackend
-class UnsupportedNode(pystencils.astnodes.Node):
-    def __init__(self):
-        super().__init__()
-def test_print_unsupported_node():
-    with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
-        CBackend()(UnsupportedNode())
--- a/pystencils_tests/test_size_and_layout_checks_llvm.py
+++ b/pystencils_tests/test_size_and_layout_checks_llvm.py
-import numpy as np
-import pytest
-from pystencils import Assignment, Field
-try:
-    from pystencils.llvm import create_kernel, make_python_function
-except ModuleNotFoundError:
-    pytest.importorskip("llvmlite")
-def test_size_check():
-    """Kernel with two fixed-sized fields creating with same size but calling with wrong size"""
-    src = np.zeros((20, 21, 9))
-    dst = np.zeros_like(src)
-    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
-    sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-    ast = create_kernel([update_rule])
-    func = make_python_function(ast)
-    # change size of src field
-    new_shape = [a - 7 for a in src.shape]
-    src = np.zeros(new_shape)
-    dst = np.zeros(new_shape)
-    try:
-        func(src=src, dst=dst)
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
-def test_fixed_size_mismatch_check():
-    """Create kernel with two differently sized but constant fields """
-    src = np.zeros((20, 21, 9))
-    dst = np.zeros((21, 21, 9))
-    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
-    sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-    try:
-        create_kernel([update_rule])
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
-def test_fixed_and_variable_field_check():
-    """Create kernel with two variable sized fields - calling them with different sizes"""
-    src = np.zeros((20, 21, 9))
-    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
-    sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-    try:
-        create_kernel([update_rule])
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
-def test_two_variable_shaped_fields():
-    src = np.zeros((20, 21, 9))
-    dst = np.zeros((22, 21, 9))
-    sym_src = Field.create_generic("src", spatial_dimensions=2, index_dimensions=1)
-    sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-    ast = create_kernel([update_rule])
-    func = make_python_function(ast)
-    try:
-        func(src=src, dst=dst)
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
--- a/pystencils_tests/test_sliced_iteration.py
+++ b/pystencils_tests/test_sliced_iteration.py
-import numpy as np
-import sympy as sp
-from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
-from pystencils.simp import sympy_cse_on_assignment_list
-def test_sliced_iteration():
-    size = (4, 4)
-    src_arr = np.ones(size)
-    dst_arr = np.zeros_like(src_arr)
-    src_field = Field.create_from_numpy_array('src', src_arr)
-    dst_field = Field.create_from_numpy_array('dst', dst_arr)
-    a, b = sp.symbols("a b")
-    update_rule = Assignment(dst_field[0, 0],
-                             (a * src_field[0, 1] + a * src_field[0, -1] +
-                              b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
-    x_end = TypedSymbol("x_end", "int")
-    s = make_slice[1:x_end, 1]
-    x_end_value = size[1] - 1
-    kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
-    kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
-    expected_result = np.zeros(size)
-    expected_result[1:x_end_value, 1] = 1
-    np.testing.assert_almost_equal(expected_result, dst_arr)
-def test_sliced_iteration_llvm():
-    import pytest
-    pytest.importorskip("llvmlite")
-    size = (4, 4)
-    src_arr = np.ones(size)
-    dst_arr = np.zeros_like(src_arr)
-    src_field = Field.create_from_numpy_array('src', src_arr)
-    dst_field = Field.create_from_numpy_array('dst', dst_arr)
-    a, b = sp.symbols("a b")
-    update_rule = Assignment(dst_field[0, 0],
-                             (a * src_field[0, 1] + a * src_field[0, -1] +
-                              b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
-    x_end = TypedSymbol("x_end", "int")
-    s = make_slice[1:x_end, 1]
-    x_end_value = size[1] - 1
-    import pystencils.llvm as llvm_generator
-    ast = llvm_generator.create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s)
-    kernel = llvm_generator.make_python_function(ast)
-    kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
-    expected_result = np.zeros(size)
-    expected_result[1:x_end_value, 1] = 1
-    np.testing.assert_almost_equal(expected_result, dst_arr)
--- a/pystencils_tests/test_small_block_benchmark.ipynb
+++ b/pystencils_tests/test_small_block_benchmark.ipynb
-%% Cell type:code id: tags:
-``` python
-import pytest
-pytest.importorskip('waLBerla')
-```
-%% Cell type:code id: tags:
-``` python
-from pystencils.session import *
-from time import perf_counter
-from statistics import median
-from functools import partial
-```
-%% Cell type:markdown id: tags:
-## Benchmark for Python call overhead
-%% Cell type:code id: tags:
-``` python
-inner_repeats = 100
-outer_repeats = 5
-sizes = [2**i for i in range(1, 8)]
-sizes
-```
-%% Output
-    $\displaystyle \left[ 2, \  4, \  8, \  16, \  32, \  64, \  128\right]$
-    [2, 4, 8, 16, 32, 64, 128]
-%% Cell type:code id: tags:
-``` python
-def benchmark_pure(domain_size, extract_first=False):
-    src = np.zeros(domain_size)
-    dst = np.zeros_like(src)
-    f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    if extract_first:
-        kernel = kernel.kernel
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    else:
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    return (end - start) / inner_repeats
-def benchmark_datahandling(domain_size, parallel=False):
-    dh = ps.create_data_handling(domain_size, parallel=parallel)
-    f_src = dh.add_array('src')
-    f_dst = dh.add_array('dst')
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    start = perf_counter()
-    for i in range(inner_repeats):
-        dh.run_kernel(kernel)
-        dh.swap('src', 'dst')
-    end = perf_counter()
-    return (end - start) / inner_repeats
-name_to_func = {
-    'pure_extract': partial(benchmark_pure, extract_first=True),
-    'pure_no_extract': partial(benchmark_pure, extract_first=False),
-    'dh_serial': partial(benchmark_datahandling, parallel=False),
-    'dh_parallel': partial(benchmark_datahandling, parallel=True),
-}
-```
-%% Cell type:code id: tags:
-``` python
-result = {'block_size': [],
-          'name': [],
-          'time': []}
-for bs in sizes:
-    print("Computing size ", bs)
-    for name, func in name_to_func.items():
-        for i in range(outer_repeats):
-            time = func((bs, bs))
-            result['block_size'].append(bs)
-            result['name'].append(name)
-            result['time'].append(time)
-```
-%% Output
-    Computing size  2
-    Computing size  4
-    Computing size  8
-    Computing size  16
-    Computing size  32
-    Computing size  64
-    Computing size  128
-%% Cell type:code id: tags:
-``` python
-if 'is_test_run' not in globals():
-    import pandas as pd
-    import seaborn as sns
-    data = pd.DataFrame.from_dict(result)
-    plt.subplot(1,2,1)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-    plt.yscale('log')
-    plt.subplot(1,2,2)
-    data = pd.DataFrame.from_dict(result)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-```
-%% Output
-%% Cell type:code id: tags:
-``` python
-import pytest
-pytest.importorskip('waLBerla')
-```
-%% Cell type:code id: tags:
-``` python
-from pystencils.session import *
-from time import perf_counter
-from statistics import median
-from functools import partial
-```
-%% Cell type:markdown id: tags:
-## Benchmark for Python call overhead
-%% Cell type:code id: tags:
-``` python
-inner_repeats = 100
-outer_repeats = 5
-sizes = [2**i for i in range(1, 8)]
-sizes
-```
-%% Output
-    $\displaystyle \left[ 2, \  4, \  8, \  16, \  32, \  64, \  128\right]$
-    [2, 4, 8, 16, 32, 64, 128]
-%% Cell type:code id: tags:
-``` python
-def benchmark_pure(domain_size, extract_first=False):
-    src = np.zeros(domain_size)
-    dst = np.zeros_like(src)
-    f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    if extract_first:
-        kernel = kernel.kernel
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    else:
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    return (end - start) / inner_repeats
-def benchmark_datahandling(domain_size, parallel=False):
-    dh = ps.create_data_handling(domain_size, parallel=parallel)
-    f_src = dh.add_array('src')
-    f_dst = dh.add_array('dst')
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    start = perf_counter()
-    for i in range(inner_repeats):
-        dh.run_kernel(kernel)
-        dh.swap('src', 'dst')
-    end = perf_counter()
-    return (end - start) / inner_repeats
-name_to_func = {
-    'pure_extract': partial(benchmark_pure, extract_first=True),
-    'pure_no_extract': partial(benchmark_pure, extract_first=False),
-    'dh_serial': partial(benchmark_datahandling, parallel=False),
-    'dh_parallel': partial(benchmark_datahandling, parallel=True),
-}
-```
-%% Cell type:code id: tags:
-``` python
-result = {'block_size': [],
-          'name': [],
-          'time': []}
-for bs in sizes:
-    print("Computing size ", bs)
-    for name, func in name_to_func.items():
-        for i in range(outer_repeats):
-            time = func((bs, bs))
-            result['block_size'].append(bs)
-            result['name'].append(name)
-            result['time'].append(time)
-```
-%% Output
-    Computing size  2
-    Computing size  4
-    Computing size  8
-    Computing size  16
-    Computing size  32
-    Computing size  64
-    Computing size  128
-%% Cell type:code id: tags:
-``` python
-if 'is_test_run' not in globals():
-    import pandas as pd
-    import seaborn as sns
-    data = pd.DataFrame.from_dict(result)
-    plt.subplot(1,2,1)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-    plt.yscale('log')
-    plt.subplot(1,2,2)
-    data = pd.DataFrame.from_dict(result)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-```
-%% Output
--- a/pystencils_tests/test_sum_prod.py
+++ b/pystencils_tests/test_sum_prod.py
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
-#
-# Distributed under terms of the GPLv3 license.
-"""
-"""
-import numpy as np
-import sympy
-from sympy.abc import k
-import pystencils
-from pystencils.data_types import create_type
-def test_sum():
-    sum = sympy.Sum(k, (k, 1, 100))
-    expanded_sum = sum.doit()
-    print(sum)
-    print(expanded_sum)
-    x = pystencils.fields('x: float32[1d]')
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-    ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.get_code_obj(ast))
-    kernel = ast.compile()
-    print(code)
-    assert 'double sum' in code
-    array = np.zeros((10,), np.float32)
-    kernel(x=array)
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
-def test_sum_use_float():
-    sum = sympy.Sum(k, (k, 1, 100))
-    expanded_sum = sum.doit()
-    print(sum)
-    print(expanded_sum)
-    x = pystencils.fields('x: float32[1d]')
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-    ast = pystencils.create_kernel(assignments, data_type=create_type('float32'))
-    code = str(pystencils.get_code_obj(ast))
-    kernel = ast.compile()
-    print(code)
-    print(pystencils.get_code_obj(ast))
-    assert 'float sum' in code
-    array = np.zeros((10,), np.float32)
-    kernel(x=array)
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
-def test_product():
-    k = pystencils.TypedSymbol('k', create_type('int64'))
-    sum = sympy.Product(k, (k, 1, 10))
-    expanded_sum = sum.doit()
-    print(sum)
-    print(expanded_sum)
-    x = pystencils.fields('x: int64[1d]')
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-    ast = pystencils.create_kernel(assignments)
-    code = pystencils.get_code_str(ast)
-    kernel = ast.compile()
-    print(code)
-    assert 'int64_t product' in code
-    array = np.zeros((10,), np.int64)
-    kernel(x=array)
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
-def test_prod_var_limit():
-    k = pystencils.TypedSymbol('k', create_type('int64'))
-    limit = pystencils.TypedSymbol('limit', create_type('int64'))
-    sum = sympy.Sum(k, (k, 1, limit))
-    expanded_sum = sum.replace(limit, 100).doit()
-    print(sum)
-    print(expanded_sum)
-    x = pystencils.fields('x: int64[1d]')
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-    ast = pystencils.create_kernel(assignments)
-    pystencils.show_code(ast)
-    kernel = ast.compile()
-    array = np.zeros((10,), np.int64)
-    kernel(x=array, limit=100)
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
--- a/pystencils_tests/test_sympy_optimizations.py
+++ b/pystencils_tests/test_sympy_optimizations.py
-import pytest
-import sympy as sp
-import pystencils
-from pystencils.math_optimizations import HAS_REWRITING, optimize_assignments, optims_pystencils_cpu, optimize_ast
-@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
-def test_sympy_optimizations():
-    for target in ('cpu', 'gpu'):
-        for op_ast in (True, False):
-            x, y, z = pystencils.fields('x, y, z:  float32[2d]')
-            # Triggers Sympy's expm1 optimization
-            # Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
-            # some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
-            # for sympy to work properly ...
-            assignments = pystencils.AssignmentCollection({
-                x[0, 0]: 1.0 * (sp.exp(y[0, 0]) - 1)
-            })
-            if not op_ast:
-                assignments = optimize_assignments(assignments, optims_pystencils_cpu)
-            print(assignments)
-            ast = pystencils.create_kernel(assignments, target=target)
-            if op_ast:
-                optimize_ast(ast, optims_pystencils_cpu)
-            code = pystencils.get_code_str(ast)
-            assert 'expm1(' in code
-@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
-def test_evaluate_constant_terms():
-    for target in ('cpu', 'gpu'):
-        x, y, z = pystencils.fields('x, y, z:  float32[2d]')
-        # Triggers Sympy's cos optimization
-        assignments = pystencils.AssignmentCollection({
-            x[0, 0]: -sp.cos(1) + y[0, 0]
-        })
-        assignments = optimize_assignments(assignments, optims_pystencils_cpu)
-        ast = pystencils.create_kernel(assignments, target=target)
-        code = pystencils.get_code_str(ast)
-        assert 'cos(' not in code
-        print(code)
-@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
-def test_do_not_evaluate_constant_terms():
-    optimizations = pystencils.math_optimizations.optims_pystencils_cpu
-    optimizations.remove(pystencils.math_optimizations.evaluate_constant_terms)
-    for target in ('cpu', 'gpu'):
-        x, y, z = pystencils.fields('x, y, z:  float32[2d]')
-        assignments = pystencils.AssignmentCollection({
-            x[0, 0]: -sp.cos(1) + y[0, 0]
-        })
-        ast = pystencils.create_kernel(assignments, target=target)
-        code = pystencils.get_code_str(ast)
-        assert 'cos(' in code
-        print(code)
--- a/pystencils_tests/test_transformations.py
+++ b/pystencils_tests/test_transformations.py
-import pystencils as ps
-from pystencils import TypedSymbol
-from pystencils.astnodes import LoopOverCoordinate, SympyAssignment
-from pystencils.data_types import create_type
-from pystencils.transformations import filtered_tree_iteration, get_loop_hierarchy, get_loop_counter_symbol_hierarchy
-def test_loop_information():
-    f, g = ps.fields("f, g: double[2D]")
-    update_rule = ps.Assignment(g[0, 0], f[0, 0])
-    ast = ps.create_kernel(update_rule)
-    inner_loops = [l for l in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)
-                   if l.is_innermost_loop]
-    loop_order = []
-    for i in get_loop_hierarchy(inner_loops[0].args[0]):
-        loop_order.append(i)
-    assert loop_order == [0, 1]
-    loop_symbols = get_loop_counter_symbol_hierarchy(inner_loops[0].args[0])
-    assert loop_symbols == [TypedSymbol("ctr_1", create_type("int"), nonnegative=True),
-                            TypedSymbol("ctr_0", create_type("int"), nonnegative=True)]
--- a/pystencils_tests/test_type_interference.py
+++ b/pystencils_tests/test_type_interference.py
-from sympy.abc import a, b, c, d, e, f
-import pystencils
-from pystencils.data_types import cast_func, create_type
-def test_type_interference():
-    x = pystencils.fields('x:  float32[3d]')
-    assignments = pystencils.AssignmentCollection({
-        a: cast_func(10, create_type('float64')),
-        b: cast_func(10, create_type('uint16')),
-        e: 11,
-        c: b,
-        f: c + b,
-        d: c + b + x.center + e,
-        x.center: c + b + x.center
-    })
-    ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.get_code_str(ast))
-    assert 'double a' in code
-    assert 'uint16_t b' in code
-    assert 'uint16_t f' in code
-    assert 'int64_t e' in code
--- a/pystencils_tests/test_vectorization_specific.py
+++ b/pystencils_tests/test_vectorization_specific.py
-import pytest
-import numpy as np
-import sympy as sp
-import pystencils as ps
-from pystencils.backends.simd_instruction_sets import (get_cacheline_size, get_supported_instruction_sets,
-                                                       get_vector_instruction_set)
-from pystencils.data_types import cast_func, VectorType
-supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
-@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
-def test_vectorisation_varying_arch(instruction_set):
-    shape = (9, 9, 3)
-    arr = np.ones(shape, order='f')
-    @ps.kernel
-    def update_rule(s):
-        f = ps.fields("f(3) : [2D]", f=arr)
-        s.tmp0 @= f(0)
-        s.tmp1 @= f(1)
-        s.tmp2 @= f(2)
-        f0, f1, f2 = f(0), f(1), f(2)
-        f0 @= 2 * s.tmp0
-        f1 @= 2 * s.tmp0
-        f2 @= 2 * s.tmp0
-    ast = ps.create_kernel(update_rule, cpu_vectorize_info={'instruction_set': instruction_set})
-    kernel = ast.compile()
-    kernel(f=arr)
-    np.testing.assert_equal(arr, 2)
-@pytest.mark.parametrize('dtype', ('float', 'double'))
-@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
-def test_vectorized_abs(instruction_set, dtype):
-    """Some instructions sets have abs, some don't.
-       Furthermore, the special treatment of unary minus makes this data type-sensitive too.
-    """
-    arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2), dtype=np.float64 if dtype == 'double' else np.float32)
-    arr[-3:, :] = -1
-    f, g = ps.fields(f=arr, g=arr)
-    update_rule = [ps.Assignment(g.center(), sp.Abs(f.center()))]
-    ast = ps.create_kernel(update_rule, cpu_vectorize_info={'instruction_set': instruction_set})
-    func = ast.compile()
-    dst = np.zeros_like(arr)
-    func(g=dst, f=arr)
-    np.testing.assert_equal(np.sum(dst[1:-1, 1:-1]), 2 ** 2 * 2 ** 3)
-@pytest.mark.parametrize('dtype', ('float', 'double'))
-@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
-@pytest.mark.parametrize('gl_field, gl_kernel', [(1, 0), (0, 1), (1, 1)])
-def test_alignment_and_correct_ghost_layers(gl_field, gl_kernel, instruction_set, dtype):
-    itemsize = 8 if dtype == 'double' else 4
-    alignment = get_vector_instruction_set(dtype, instruction_set)['width'] * itemsize
-    dtype = np.float64 if dtype == 'double' else np.float32
-    domain_size = (128, 128)
-    dh = ps.create_data_handling(domain_size, periodicity=(True, True), default_target='cpu')
-    src = dh.add_array("src", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
-    dh.fill(src.name, 1.0, ghost_layers=True)
-    dst = dh.add_array("dst", values_per_cell=1, dtype=dtype, ghost_layers=gl_field, alignment=alignment)
-    dh.fill(dst.name, 1.0, ghost_layers=True)
-    update_rule = ps.Assignment(dst[0, 0], src[0, 0])
-    opt = {'instruction_set': instruction_set, 'assume_aligned': True,
-           'nontemporal': True, 'assume_inner_stride_one': True}
-    ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, ghost_layers=gl_kernel)
-    kernel = ast.compile()
-    if gl_kernel != gl_field:
-        with pytest.raises(ValueError):
-            dh.run_kernel(kernel)
-    else:
-        dh.run_kernel(kernel)
-@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
-def test_cacheline_size(instruction_set):
-    cacheline_size = get_cacheline_size(instruction_set)
-    if cacheline_size is None:
-        pytest.skip()
-    instruction_set = get_vector_instruction_set('double', instruction_set)
-    vector_size = instruction_set['bytes']
-    assert cacheline_size > 8 and cacheline_size < 0x100000, "Cache line size is implausible"
-    assert cacheline_size % vector_size == 0, "Cache line size should be multiple of vector size"
-    assert cacheline_size & (cacheline_size - 1) == 0, "Cache line size is not a power of 2"
--- a/pystencils_tests/test_version_string.py
+++ b/pystencils_tests/test_version_string.py
-import pystencils as ps
-from pathlib import Path
-def test_version_string():
-    file_path = Path(__file__).parent
-    release_version = file_path.parent.absolute() / 'RELEASE-VERSION'
-    if release_version.exists ():
-        with open(release_version, "r") as f:
-            version = f.read()
-        assert ps.__version__ == version
-    else:
-        assert ps.__version__ == "development"
--- a/pytest.ini
+++ b/pytest.ini
 [pytest]
+testpaths = src tests doc/notebooks
+pythonpath = src
 python_files = test_*.py *_test.py scenario_*.py
 norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
 addopts = --doctest-modules --durations=20  --cov-config pytest.ini
 markers =
-       kerncraft: tests depending on kerncraft
+       longrun: tests only run at night since they have large execution time
       notebook: mark for notebooks
+# these warnings all come from third party libraries.
+filterwarnings =
+       ignore:an integer is required:DeprecationWarning
+       ignore:\s*load will be removed, use:PendingDeprecationWarning
+       ignore:the imp module is deprecated in favour of importlib:DeprecationWarning
+       ignore:.*is a deprecated alias for the builtin `bool`:DeprecationWarning
+       ignore:'contextfilter' is renamed to 'pass_context':DeprecationWarning
+       ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
+       ignore:Animation was deleted without rendering anything:UserWarning
 [run]
 branch = True
-source = pystencils
+source = src/pystencils
-         pystencils_tests
+         tests
 omit = doc/*
-       pystencils_tests/*
+       tests/*
       setup.py
+       quicktest.py
       conftest.py
-       pystencils/jupytersetup.py
+       versioneer.py
-       pystencils/cpu/msvc_detection.py
+       src/pystencils/jupytersetup.py
-       pystencils/sympy_gmpy_bug_workaround.py
+       src/pystencils/cpu/msvc_detection.py
-       pystencils/cache.py
+       src/pystencils/sympy_gmpy_bug_workaround.py
-       pystencils/pacxx/benchmark.py
+       src/pystencils/cache.py
+       src/pystencils/pacxx/benchmark.py
+       src/pystencils/_version.py
+       venv/
 [report]
 exclude_lines =
@@ -41,7 +56,7 @@ exclude_lines =
       if __name__ == .__main__.:
 skip_covered = True
-fail_under = 88
+fail_under = 85
 [html]
 directory = coverage_report
--- a/quicktest.py
+++ b/quicktest.py
+#!/usr/bin/env python3
+from contextlib import redirect_stdout
+import io
+from tests.test_quicktests import (
+    test_basic_kernel,
+    test_basic_blocking_staggered,
+    test_basic_vectorization,
+)
+quick_tests = [
+    test_basic_kernel,
+    test_basic_blocking_staggered,
+    test_basic_vectorization,
+]
+if __name__ == "__main__":
+    print("Running pystencils quicktests")
+    for qt in quick_tests:
+        print(f"   -> {qt.__name__}")
+        with redirect_stdout(io.StringIO()):
+            qt()
--- a/setup.py
+++ b/setup.py
-import distutils
+from setuptools import setup, __version__ as setuptools_version
-import io
-import os
-import sys
-from contextlib import redirect_stdout
-from importlib import import_module
-import setuptools
+if int(setuptools_version.split('.')[0]) < 61:
+    raise Exception(
+        "[ERROR] pystencils requires at least setuptools version 61 to install.\n"
+        "If this error occurs during an installation via pip, it is likely that there is a conflict between "
+        "versions of setuptools installed by pip and the system package manager. "
+        "In this case, it is recommended to install pystencils into a virtual environment instead."
+    )
-try:
+import versioneer
-    import cython  # noqa
-    USE_CYTHON = True
-except ImportError:
-    USE_CYTHON = False
-quick_tests = [
-    'test_datahandling.test_kernel',
-    'test_blocking_staggered.test_blocking_staggered',
-    'test_blocking_staggered.test_blocking_staggered',
-    'test_vectorization.test_vectorization_variable_size',
-]
+def get_cmdclass():
+    return versioneer.get_cmdclass()
-class SimpleTestRunner(distutils.cmd.Command):
-    """A custom command to run selected tests"""
-    description = 'run some quick tests'
+setup(
-    user_options = []
+    version=versioneer.get_version(),
+    cmdclass=get_cmdclass(),
-    @staticmethod
+)
-    def _run_tests_in_module(test):
-        """Short test runner function - to work also if py.test is not installed."""
-        test = 'pystencils_tests.' + test
-        mod, function_name = test.rsplit('.', 1)
-        if isinstance(mod, str):
-            mod = import_module(mod)
-        func = getattr(mod, function_name)
-        print("   -> %s in %s" % (function_name, mod.__name__))
-        with redirect_stdout(io.StringIO()):
-            func()
-    def initialize_options(self):
-        pass
-    def finalize_options(self):
-        pass
-    def run(self):
-        """Run command."""
-        for test in quick_tests:
-            self._run_tests_in_module(test)
-def readme():
-    with open('README.md') as f:
-        return f.read()
-def cython_extensions(*extensions):
-    from distutils.extension import Extension
-    if USE_CYTHON:
-        ext = '.pyx'
-        result = [Extension(e, [os.path.join(*e.split(".")) + ext]) for e in extensions]
-        from Cython.Build import cythonize
-        result = cythonize(result, language_level=3)
-        return result
-    elif all([os.path.exists(os.path.join(*e.split(".")) + '.c') for e in extensions]):
-        ext = '.c'
-        result = [Extension(e, [os.path.join(*e.split(".")) + ext]) for e in extensions]
-        return result
-    else:
-        return None
-try:
-    sys.path.insert(0, os.path.abspath('doc'))
-    from version_from_git import version_number_from_git
-    version = version_number_from_git()
-    with open("RELEASE-VERSION", "w") as f:
-        f.write(version)
-except ImportError:
-    version = open('RELEASE-VERSION', 'r').read()
-setuptools.setup(name='pystencils',
-                 description='Speeding up stencil computations on CPUs and GPUs',
-                 version=version,
-                 long_description=readme(),
-                 long_description_content_type="text/markdown",
-                 author='Martin Bauer',
-                 license='AGPLv3',
-                 author_email='martin.bauer@fau.de',
-                 url='https://i10git.cs.fau.de/pycodegen/pystencils/',
-                 packages=['pystencils'] + ['pystencils.' + s for s in setuptools.find_packages('pystencils')],
-                 install_requires=['sympy>=1.2', 'numpy>=1.8.0', 'appdirs', 'joblib'],
-                 package_data={'pystencils': ['include/*.h',
-                                              'kerncraft_coupling/templates/*',
-                                              'backends/cuda_known_functions.txt',
-                                              'backends/opencl1.1_known_functions.txt',
-                                              'boundaries/createindexlistcython.c',
-                                              'boundaries/createindexlistcython.pyx']},
-                 ext_modules=cython_extensions("pystencils.boundaries.createindexlistcython"),
-                 classifiers=[
-                     'Development Status :: 4 - Beta',
-                     'Framework :: Jupyter',
-                     'Topic :: Software Development :: Code Generators',
-                     'Topic :: Scientific/Engineering :: Physics',
-                     'Intended Audience :: Developers',
-                     'Intended Audience :: Science/Research',
-                     'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
-                 ],
-                 project_urls={
-                     "Bug Tracker": "https://i10git.cs.fau.de/pycodegen/pystencils/issues",
-                     "Documentation": "http://pycodegen.pages.walberla.net/pystencils/",
-                     "Source Code": "https://i10git.cs.fau.de/pycodegen/pystencils",
-                 },
-                 extras_require={
-                     'gpu': ['pycuda'],
-                     'opencl': ['pyopencl'],
-                     'alltrafos': ['islpy', 'py-cpuinfo'],
-                     'bench_db': ['blitzdb', 'pymongo', 'pandas'],
-                     'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk', 'rich', 'graphviz'],
-                     'autodiff': ['pystencils-autodiff'],
-                     'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx',
-                             'sphinxcontrib-bibtex', 'sphinx_autodoc_typehints', 'pandoc'],
-                     'use_cython': ['Cython']
-                 },
-                 tests_require=['pytest',
-                                'pytest-cov',
-                                'pytest-html',
-                                'ansi2html',
-                                'pytest-xdist',
-                                'flake8',
-                                'nbformat',
-                                'nbconvert',
-                                'ipython',
-                                'randomgen>=1.18'],
-                 python_requires=">=3.6",
-                 cmdclass={
-                     'quicktest': SimpleTestRunner
-                 },
-                 )
--- a/pystencils/__init__.py
+++ b/pystencils/__init__.py
 """Module to generate stencil kernels in C or CUDA using sympy expressions and call them as Python functions"""
-from . import sympy_gmpy_bug_workaround  # NOQA
+from .enums import Backend, Target
 from . import fd
 from . import stencil as stencil
-from .assignment import Assignment, assignment_from_stencil
+from .assignment import Assignment, AddAugmentedAssignment, assignment_from_stencil
-from .data_types import TypedSymbol
+from .typing.typed_sympy import TypedSymbol
-from .datahandling import create_data_handling
+from .display_utils import get_code_obj, get_code_str, show_code, to_dot
-from .display_utils import show_code, get_code_obj, get_code_str, to_dot
 from .field import Field, FieldType, fields
-from .kernel_decorator import kernel
+from .config import CreateKernelConfig
-from .kernelcreation import create_indexed_kernel, create_kernel, create_staggered_kernel
+from .cache import clear_cache
+from .kernel_decorator import kernel, kernel_config
+from .kernelcreation import create_kernel, create_staggered_kernel
 from .simp import AssignmentCollection
 from .slicing import make_slice
+from .spatial_coordinates import x_, x_staggered, x_staggered_vector, x_vector, y_, y_staggered, z_, z_staggered
 from .sympyextensions import SymbolCreator
-from .spatial_coordinates import (x_, x_staggered, x_staggered_vector, x_vector,
+from .datahandling import create_data_handling
-                                  y_, y_staggered, z_, z_staggered)
-try:
-    import pystencils_autodiff
-    autodiff = pystencils_autodiff
-except ImportError:
-    pass
-def _get_release_file():
-    import os.path
-    file_path = os.path.abspath(os.path.dirname(__file__))
-    return os.path.join(file_path, '..', 'RELEASE-VERSION')
-try:
-    __version__ = open(_get_release_file(), 'r').read()
-except IOError:
-    __version__ = 'development'
 __all__ = ['Field', 'FieldType', 'fields',
           'TypedSymbol',
           'make_slice',
-           'create_kernel', 'create_indexed_kernel', 'create_staggered_kernel',
+           'CreateKernelConfig',
+           'create_kernel', 'create_staggered_kernel',
+           'Target', 'Backend',
           'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
           'AssignmentCollection',
-           'Assignment',
+           'Assignment', 'AddAugmentedAssignment',
           'assignment_from_stencil',
           'SymbolCreator',
           'create_data_handling',
-           'kernel',
+           'clear_cache',
+           'kernel', 'kernel_config',
           'x_', 'y_', 'z_',
           'x_staggered', 'y_staggered', 'z_staggered',
           'x_vector', 'x_staggered_vector',
           'fd',
           'stencil']
+from . import _version
+__version__ = _version.get_versions()['version']
--- a/src/pystencils/_version.py
+++ b/src/pystencils/_version.py
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+# This file is released into the public domain.
+# Generated by versioneer-0.29
+# https://github.com/python-versioneer/python-versioneer
+"""Git implementation of _version.py."""
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Any, Callable, Dict, List, Optional, Tuple
+import functools
+def get_keywords() -> Dict[str, str]:
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+    VCS: str
+    style: str
+    tag_prefix: str
+    parentdir_prefix: str
+    versionfile_source: str
+    verbose: bool
+def get_config() -> VersioneerConfig:
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = "release/"
+    cfg.parentdir_prefix = "pystencils-"
+    cfg.versionfile_source = "src/pystencils/_version.py"
+    cfg.verbose = False
+    return cfg
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f: Callable) -> Callable:
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+def run_command(
+    commands: List[str],
+    args: List[str],
+    cwd: Optional[str] = None,
+    verbose: bool = False,
+    hide_stderr: bool = False,
+    env: Optional[Dict[str, str]] = None,
+) -> Tuple[Optional[str], Optional[int]]:
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    popen_kwargs: Dict[str, Any] = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
+            break
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+def versions_from_parentdir(
+    parentdir_prefix: str,
+    root: str,
+    verbose: bool,
+) -> Dict[str, Any]:
+    """Try to determine the version from the parent directory name.
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords: Dict[str, str] = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(
+    keywords: Dict[str, str],
+    tag_prefix: str,
+    verbose: bool,
+) -> Dict[str, Any]:
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(
+    tag_prefix: str,
+    root: str,
+    verbose: bool,
+    runner: Callable = run_command
+) -> Dict[str, Any]:
+    """Get version from 'git describe' in the root of the source tree.
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+    pieces: Dict[str, Any] = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+    pieces["branch"] = branch_name
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+    # now we have TAG-NUM-gHEX or HEX
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    return pieces
+def plus_or_dot(pieces: Dict[str, Any]) -> str:
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+def render_pep440(pieces: Dict[str, Any]) -> str:
+    """Build up version string, with post-release "local version identifier".
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+def render_pep440_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
+    """Split pep440 version string at the post-release segment.
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+def render_pep440_pre(pieces: Dict[str, Any]) -> str:
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+def render_pep440_post(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+    The ".dev0" means not master branch.
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+def render_pep440_old(pieces: Dict[str, Any]) -> str:
+    """TAG[.postDISTANCE[.dev0]] .
+    The ".dev0" means dirty.
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+def render_git_describe(pieces: Dict[str, Any]) -> str:
+    """TAG[-DISTANCE-gHEX][-dirty].
+    Like 'git describe --tags --dirty --always'.
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+def render_git_describe_long(pieces: Dict[str, Any]) -> str:
+    """TAG-DISTANCE-gHEX[-dirty].
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+    if not style or style == "default":
+        style = "pep440"  # the default
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+def get_versions() -> Dict[str, Any]:
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+    cfg = get_config()
+    verbose = cfg.verbose
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
No results found