e6a30483 · e6a30483 · e6a30483 · e6a30483 · e6a30483 · e6a30483
--- a/pystencils_tests/test_interpolation.py
+++ b/pystencils_tests/test_interpolation.py
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
-#
-# Distributed under terms of the GPLv3 license.
-
-"""
-
-"""
-import itertools
-from os.path import dirname, join
-
-import numpy as np
-import pytest
-import sympy
-
-import pycuda.autoinit  # NOQA
-import pycuda.gpuarray as gpuarray
-import pystencils
-from pystencils.interpolation_astnodes import LinearInterpolator
-from pystencils.spatial_coordinates import x_, y_
-
-type_map = {
-    np.float32: 'float32',
-    np.float64: 'float64',
-    np.int32: 'int32',
-}
-
-try:
-    import pyconrad.autoinit
-except Exception:
-    import unittest.mock
-    pyconrad = unittest.mock.MagicMock()
-
-LENNA_FILE = join(dirname(__file__), 'test_data', 'lenna.png')
-
-try:
-    import skimage.io
-    lenna = skimage.io.imread(LENNA_FILE, as_gray=True).astype(np.float64)
-    pyconrad.imshow(lenna)
-except Exception:
-    lenna = np.random.rand(20, 30)
-
-
-def test_interpolation():
-    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
-
-    assignments = pystencils.AssignmentCollection({
-        y_f.center(): LinearInterpolator(x_f).at([x_ + 2.7, y_ + 7.2])
-    })
-    print(assignments)
-    ast = pystencils.create_kernel(assignments)
-    print(ast)
-    print(pystencils.show_code(ast))
-    kernel = ast.compile()
-
-    pyconrad.imshow(lenna)
-
-    out = np.zeros_like(lenna)
-    kernel(x=lenna, y=out)
-    pyconrad.imshow(out, "out")
-
-
-def test_scale_interpolation():
-    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
-
-    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
-        assignments = pystencils.AssignmentCollection({
-            y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at([0.5 * x_ + 2.7, 0.25 * y_ + 7.2])
-        })
-        print(assignments)
-        ast = pystencils.create_kernel(assignments)
-        print(ast)
-        print(pystencils.show_code(ast))
-        kernel = ast.compile()
-
-        out = np.zeros_like(lenna)
-        kernel(x=lenna, y=out)
-        pyconrad.imshow(out, "out " + address_mode)
-
-
-def test_rotate_interpolation():
-    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
-
-    rotation_angle = sympy.pi / 5
-
-    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
-
-        transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
-        assignments = pystencils.AssignmentCollection({
-            y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
-        })
-        print(assignments)
-        ast = pystencils.create_kernel(assignments)
-        print(ast)
-        print(pystencils.show_code(ast))
-        kernel = ast.compile()
-
-        out = np.zeros_like(lenna)
-        kernel(x=lenna, y=out)
-        pyconrad.imshow(out, "out " + address_mode)
-
-
-def test_rotate_interpolation_gpu():
-
-    rotation_angle = sympy.pi / 5
-    scale = 1
-
-    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
-        previous_result = None
-        for dtype in [np.int32, np.float32, np.float64]:
-            if dtype == np.int32:
-                lenna_gpu = gpuarray.to_gpu(
-                    np.ascontiguousarray(lenna * 255, dtype))
-            else:
-                lenna_gpu = gpuarray.to_gpu(
-                    np.ascontiguousarray(lenna, dtype))
-            for use_textures in [True, False]:
-                x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
-
-                transformed = scale * \
-                    sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2])
-                assignments = pystencils.AssignmentCollection({
-                    y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
-                })
-                print(assignments)
-                ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
-                print(ast)
-                print(pystencils.show_code(ast))
-                kernel = ast.compile()
-
-                out = gpuarray.zeros_like(lenna_gpu)
-                kernel(x=lenna_gpu, y=out)
-                pyconrad.imshow(out,
-                                f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
-                skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif",
-                                  np.ascontiguousarray(out.get(), np.float32))
-                if previous_result is not None:
-                    try:
-                        assert np.allclose(previous_result[4:-4, 4:-4], out.get()[4:-4, 4:-4], rtol=100, atol=1e-3)
-                    except AssertionError as e:  # NOQA
-                        print("Max error: %f" % np.max(previous_result - out.get()))
-                        # pyconrad.imshow(previous_result - out.get(), "Difference image")
-                        # raise e
-                previous_result = out.get()
-
-
-def test_shift_interpolation_gpu():
-
-    rotation_angle = 0  # sympy.pi / 5
-    scale = 1
-    # shift = - sympy.Matrix([1.5, 1.5])
-    shift = sympy.Matrix((0.0, 0.0))
-
-    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
-        previous_result = None
-        for dtype in [np.float64, np.float32, np.int32]:
-            if dtype == np.int32:
-                lenna_gpu = gpuarray.to_gpu(
-                    np.ascontiguousarray(lenna * 255, dtype))
-            else:
-                lenna_gpu = gpuarray.to_gpu(
-                    np.ascontiguousarray(lenna, dtype))
-            for use_textures in [True, False]:
-                x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
-
-                if use_textures:
-                    transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
-                else:
-                    transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
-                assignments = pystencils.AssignmentCollection({
-                    y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
-                })
-                # print(assignments)
-                ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
-                # print(ast)
-                print(pystencils.show_code(ast))
-                kernel = ast.compile()
-
-                out = gpuarray.zeros_like(lenna_gpu)
-                kernel(x=lenna_gpu, y=out)
-                pyconrad.imshow(out,
-                                f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
-                skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif",
-                                  np.ascontiguousarray(out.get(), np.float32))
-                # if not (use_single_precision and use_textures):
-                # if previous_result is not None:
-                # try:
-                # assert np.allclose(previous_result[4:-4, 4:-4], out.get()
-                # [4:-4, 4:-4], rtol=1e-3, atol=1e-2)
-                # except AssertionError as e:
-                # print("Max error: %f" % np.max(np.abs(previous_result[4:-4, 4:-4] - out.get()[4:-4, 4:-4])))
-                # pyconrad.imshow(previous_result[4:-4, 4:-4] - out.get()[4:-4, 4:-4], "Difference image")
-                # raise e
-                # previous_result = out.get()
-
-
-def test_rotate_interpolation_size_change():
-    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
-
-    rotation_angle = sympy.pi / 5
-
-    for address_mode in ['border', 'wrap', 'clamp', 'mirror']:
-
-        transformed = sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_))
-        assignments = pystencils.AssignmentCollection({
-            y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
-        })
-        print(assignments)
-        ast = pystencils.create_kernel(assignments)
-        print(ast)
-        print(pystencils.show_code(ast))
-        kernel = ast.compile()
-
-        out = np.zeros((100, 150), np.float64)
-        kernel(x=lenna, y=out)
-        pyconrad.imshow(out, "small out " + address_mode)
-
-
-@pytest.mark.parametrize('address_mode, target',
-                         itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu', 'gpu']))
-def test_field_interpolated(address_mode, target):
-    x_f, y_f = pystencils.fields('x,y: float64 [2d]')
-
-    assignments = pystencils.AssignmentCollection({
-        y_f.center(): x_f.interpolated_access([0.5 * x_ + 2.7, 0.25 * y_ + 7.2], address_mode=address_mode)
-    })
-    print(assignments)
-    ast = pystencils.create_kernel(assignments)
-    print(ast)
-    print(pystencils.show_code(ast))
-    kernel = ast.compile()
-
-    out = np.zeros_like(lenna)
-    kernel(x=lenna, y=out)
-    pyconrad.imshow(out, "out " + address_mode)
--- a/pystencils_tests/test_jacobi_llvm.py
+++ b/pystencils_tests/test_jacobi_llvm.py
-import numpy as np
-import pytest
-
-from pystencils import Assignment, Field, show_code
-from pystencils.cpu.cpujit import get_llc_command
-from pystencils.llvm import create_kernel, make_python_function
-from pystencils.llvm.llvmjit import generate_and_jit
-
-
-def test_jacobi_fixed_field_size():
-    size = (30, 20)
-
-    src_field_llvm = np.random.rand(*size)
-    src_field_py = np.copy(src_field_llvm)
-    dst_field_llvm = np.zeros(size)
-    dst_field_py = np.zeros(size)
-
-    f = Field.create_from_numpy_array("f", src_field_llvm)
-    d = Field.create_from_numpy_array("d", dst_field_llvm)
-
-    jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
-    ast = create_kernel([jacobi])
-
-    for x in range(1, size[0] - 1):
-        for y in range(1, size[1] - 1):
-            dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
-                                         src_field_py[x, y - 1] + src_field_py[x, y + 1])
-
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    error = np.sum(np.abs(dst_field_py - dst_field_llvm))
-    np.testing.assert_almost_equal(error, 0.0)
-
-
-@pytest.mark.skipif(not get_llc_command(), reason="Tests requires llc in $PATH")
-def test_jacobi_fixed_field_size_gpu():
-    size = (30, 20)
-
-    import pycuda.autoinit  # noqa
-    from pycuda.gpuarray import to_gpu
-
-    src_field_llvm = np.random.rand(*size)
-    src_field_py = np.copy(src_field_llvm)
-    dst_field_llvm = np.zeros(size)
-    dst_field_py = np.zeros(size)
-
-    f = Field.create_from_numpy_array("f", src_field_py)
-    d = Field.create_from_numpy_array("d", dst_field_py)
-
-    src_field_llvm = to_gpu(src_field_llvm)
-    dst_field_llvm = to_gpu(dst_field_llvm)
-
-    jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
-    ast = create_kernel([jacobi], target='gpu')
-    print(show_code(ast))
-
-    for x in range(1, size[0] - 1):
-        for y in range(1, size[1] - 1):
-            dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
-                                         src_field_py[x, y - 1] + src_field_py[x, y + 1])
-
-    jit = generate_and_jit(ast)
-    jit('kernel', dst_field_llvm, src_field_llvm)
-    error = np.sum(np.abs(dst_field_py - dst_field_llvm.get()))
-    np.testing.assert_almost_equal(error, 0.0)
-
-
-def test_jacobi_variable_field_size():
-    size = (3, 3, 3)
-    f = Field.create_generic("f", 3)
-    d = Field.create_generic("d", 3)
-    jacobi = Assignment(d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4)
-    ast = create_kernel([jacobi])
-
-    src_field_llvm = np.random.rand(*size)
-    src_field_py = np.copy(src_field_llvm)
-    dst_field_llvm = np.zeros(size)
-    dst_field_py = np.zeros(size)
-
-    for x in range(1, size[0] - 1):
-        for y in range(1, size[1] - 1):
-            for z in range(1, size[2] - 1):
-                dst_field_py[x, y, z] = 0.25 * (src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] +
-                                                src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z])
-
-    kernel = make_python_function(ast, {'f': src_field_llvm, 'd': dst_field_llvm})
-    kernel()
-    error = np.sum(np.abs(dst_field_py - dst_field_llvm))
-    np.testing.assert_almost_equal(error, 0.0)
-
-
-if __name__ == "__main__":
-    test_jacobi_fixed_field_size_gpu()
--- a/pystencils_tests/test_kerncraft_coupling.py
+++ b/pystencils_tests/test_kerncraft_coupling.py
-import os
-
-import numpy as np
-import pytest
-import sympy as sp
-import kerncraft
-
-from pystencils import Assignment, Field
-from pystencils.cpu import create_kernel
-from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
-from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
-
-SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))
-INPUT_FOLDER = os.path.join(SCRIPT_FOLDER, "kerncraft_inputs")
-
-
-@pytest.mark.kernkraft
-def test_compilation():
-    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
-
-    kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
-        reference_kernel.as_code('likwid')
-
-    size = [30, 50, 3]
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
-    s = sp.Symbol("s")
-    rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
-    update_rule = Assignment(b[0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    mine = generate_benchmark(ast, likwid=False)
-    print(mine)
-
-
-@pytest.mark.kernkraft
-def analysis(kernel, model='ecmdata'):
-    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
-    if model == 'ecmdata':
-        model = kerncraft.models.ECMData(kernel, machine, KerncraftParameters())
-    elif model == 'ecm':
-        model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
-        # model.analyze()
-        # model.plot()
-    elif model == 'benchmark':
-        model = kerncraft.models.Benchmark(kernel, machine, KerncraftParameters())
-    else:
-        model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
-    model.analyze()
-    return model
-
-
-@pytest.mark.kernkraft
-def test_3d_7pt_iaca():
-    # Make sure you use the intel compiler
-    size = [20, 200, 200]
-    kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
-    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
-    reference_kernel.set_constant('M', size[0])
-    reference_kernel.set_constant('N', size[1])
-    assert size[1] == size[2]
-    analysis(reference_kernel, model='ecm')
-
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
-    s = sp.Symbol("s")
-    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
-
-    update_rule = Assignment(b[0, 0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast, machine)
-    analysis(k, model='ecm')
-    assert reference_kernel._flops == k._flops
-    # assert reference.results['cl throughput'] == analysis.results['cl throughput']
-
-
-@pytest.mark.kernkraft
-def test_2d_5pt():
-    size = [30, 50, 3]
-    kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
-    reference = analysis(reference_kernel)
-
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
-    s = sp.Symbol("s")
-    rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
-    update_rule = Assignment(b[0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast)
-    result = analysis(k)
-
-    for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
-        assert e1 == e2
-
-
-@pytest.mark.kernkraft
-def test_3d_7pt():
-    size = [30, 50, 50]
-    kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
-    with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
-    reference_kernel.set_constant('M', size[0])
-    reference_kernel.set_constant('N', size[1])
-    assert size[1] == size[2]
-    reference = analysis(reference_kernel)
-
-    arr = np.zeros(size)
-    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
-    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
-    s = sp.Symbol("s")
-    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
-
-    update_rule = Assignment(b[0, 0, 0], s * rhs)
-    ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast)
-    result = analysis(k)
-
-    for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
-        assert e1 == e2
--- a/pystencils_tests/test_opencl.py
+++ b/pystencils_tests/test_opencl.py
-import numpy as np
-import pytest
-
-import pystencils
-import sympy as sp
-from pystencils.backends.cuda_backend import CudaBackend
-from pystencils.backends.opencl_backend import OpenClBackend
-from pystencils.opencl.opencljit import make_python_function, init_globally, get_global_cl_queue
-
-try:
-    import pyopencl as cl
-    HAS_OPENCL = True
-except Exception:
-    HAS_OPENCL = False
-
-
-def test_print_opencl():
-    z, y, x = pystencils.fields("z, y, x: [2d]")
-
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-
-    print(assignments)
-
-    ast = pystencils.create_kernel(assignments, target='gpu')
-
-    print(ast)
-
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
-
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-
-    assert "__global double * RESTRICT const _data_x" in str(opencl_code)
-    assert "__global double * RESTRICT" in str(opencl_code)
-    assert "get_local_id(0)" in str(opencl_code)
-
-
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_opencl_jit_fixed_size():
-    pytest.importorskip('pycuda')
-
-    z, y, x = pystencils.fields("z, y, x: [20,30]")
-
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-
-    print(assignments)
-
-    ast = pystencils.create_kernel(assignments, target='gpu')
-
-    print(ast)
-
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-
-    cuda_kernel = ast.compile()
-    assert cuda_kernel is not None
-
-    import pycuda.gpuarray as gpuarray
-
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-
-    x = gpuarray.to_gpu(x_cpu)
-    y = gpuarray.to_gpu(y_cpu)
-    z = gpuarray.to_gpu(z_cpu)
-    cuda_kernel(x=x, y=y, z=z)
-
-    result_cuda = z.get()
-
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
-
-    result_opencl = z.get(queue)
-
-    assert np.allclose(result_cuda, result_opencl)
-
-
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_opencl_jit():
-    pytest.importorskip('pycuda')
-
-    z, y, x = pystencils.fields("z, y, x: [2d]")
-
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-
-    print(assignments)
-
-    ast = pystencils.create_kernel(assignments, target='gpu')
-
-    print(ast)
-
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-
-    cuda_kernel = ast.compile()
-    assert cuda_kernel is not None
-
-    import pycuda.gpuarray as gpuarray
-
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-
-    x = gpuarray.to_gpu(x_cpu)
-    y = gpuarray.to_gpu(y_cpu)
-    z = gpuarray.to_gpu(z_cpu)
-    cuda_kernel(x=x, y=y, z=z)
-
-    result_cuda = z.get()
-
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
-
-    result_opencl = z.get(queue)
-
-    assert np.allclose(result_cuda, result_opencl)
-
-
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_opencl_jit_with_parameter():
-    pytest.importorskip('pycuda')
-
-    z, y, x = pystencils.fields("z, y, x: [2d]")
-
-    a = sp.Symbol('a')
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a
-    })
-
-    print(assignments)
-
-    ast = pystencils.create_kernel(assignments, target='gpu')
-
-    print(ast)
-
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-
-    cuda_kernel = ast.compile()
-    assert cuda_kernel is not None
-
-    import pycuda.gpuarray as gpuarray
-
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-
-    x = gpuarray.to_gpu(x_cpu)
-    y = gpuarray.to_gpu(y_cpu)
-    z = gpuarray.to_gpu(z_cpu)
-    cuda_kernel(x=x, y=y, z=z, a=5.)
-
-    result_cuda = z.get()
-
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z, a=5.)
-
-    result_opencl = z.get(queue)
-
-    assert np.allclose(result_cuda, result_opencl)
-
-
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_without_cuda():
-    z, y, x = pystencils.fields("z, y, x: [20,30]")
-
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-
-    print(assignments)
-
-    ast = pystencils.create_kernel(assignments, target='gpu')
-
-    print(ast)
-
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
-
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-
-    import pyopencl.array as array
-    ctx = cl.create_some_context(0)
-    queue = cl.CommandQueue(ctx)
-
-    x = array.to_device(queue, x_cpu)
-    y = array.to_device(queue, y_cpu)
-    z = array.to_device(queue, z_cpu)
-
-    opencl_kernel = make_python_function(ast, queue, ctx)
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
-
-
-
-@pytest.mark.skipif(not HAS_OPENCL, reason="Test requires pyopencl")
-def test_kernel_creation():
-    z, y, x = pystencils.fields("z, y, x: [20,30]")
-
-    assignments = pystencils.AssignmentCollection({
-        z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])
-    })
-
-    print(assignments)
-
-
-    init_globally()
-    ast = pystencils.create_kernel(assignments, target='opencl')
-
-    print(ast.backend)
-
-    code = str(pystencils.show_code(ast))
-    print(code)
-    assert 'get_local_size' in code
-
-    opencl_kernel = ast.compile()
-
-    x_cpu = np.random.rand(20, 30)
-    y_cpu = np.random.rand(20, 30)
-    z_cpu = np.random.rand(20, 30)
-
-    import pyopencl.array as array
-    assert get_global_cl_queue()
-    x = array.to_device(get_global_cl_queue(), x_cpu)
-    y = array.to_device(get_global_cl_queue(), y_cpu)
-    z = array.to_device(get_global_cl_queue(), z_cpu)
-
-    assert opencl_kernel is not None
-    opencl_kernel(x=x, y=y, z=z)
-
--- a/pystencils_tests/test_phasefield_dentritic_3D.ipynb
+++ b/pystencils_tests/test_phasefield_dentritic_3D.ipynb
-%% Cell type:code id: tags:
-
-``` python
-from pystencils.session import *
-sp.init_printing()
-frac = sp.Rational
-```
-
-%% Cell type:markdown id: tags:
-
-# Phase-field simulation of dentritic solidification in 3D
-
-This notebook tests the model presented in the dentritic growth tutorial in 3D.
-
-%% Cell type:code id: tags:
-
-``` python
-target = 'gpu'
-gpu = target == 'gpu'
-domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
-
-dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
-φ_field = dh.add_array('phi', latex_name='φ')
-φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
-t_field = dh.add_array('T')
-```
-
-%% Cell type:code id: tags:
-
-``` python
-ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
-εb = sp.Symbol("\\bar{\\epsilon}")
-discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
-
-φ = φ_field.center
-T = t_field.center
-d = ps.fd.Diff
-
-def f(φ, m):
-    return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
-
-
-
-bulk_free_energy_density = f(φ, m)
-interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
-```
-
-%% Cell type:markdown id: tags:
-
-Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
-
-%% Cell type:code id: tags:
-
-``` python
-n = sp.Matrix([d(φ, i) for i in range(3)])
-nLen = sp.sqrt(sum(n_i**2 for n_i in n))
-n = n / nLen
-nVal = sum(n_i**4 for n_i in n)
-σ = δ * nVal
-
-εVal = εb * (1 + σ)
-εVal
-```
-
-%% Output
-
-    $\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{1} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{2} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}}\right) + 1\right)$
-                   ⎛  ⎛                            4
-                   ⎜  ⎜                 D(φ[0,0,0])
-    \bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
-                   ⎜  ⎜                                            2
-                   ⎜  ⎜⎛           2              2              2⎞    ⎛
-                   ⎝  ⎝⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0]
-    
-                     4                                               4
-          D(φ[0,0,0])                                     D(φ[0,0,0])
-    ────────────────────────────────── + ─────────────────────────────────────────
-                                     2
-     2              2              2⎞    ⎛           2              2
-    )  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]
-    
-        ⎞    ⎞
-        ⎟    ⎟
-    ────⎟ + 1⎟
-       2⎟    ⎟
-     2⎞ ⎟    ⎟
-    ) ⎠ ⎠    ⎠
-
-%% Cell type:code id: tags:
-
-``` python
-def m_func(temperature):
-    return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-substitutions = {m: m_func(T),
-                 ε: εVal}
-
-fe_i = interface_free_energy_density.subs(substitutions)
-fe_b = bulk_free_energy_density.subs(substitutions)
-
-μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
-μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
-```
-
-%% Cell type:code id: tags:
-
-``` python
-dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-parameters = {
-    τ: 0.0003,
-    κ: 1.8,
-    εb: 0.01,
-    δ: 0.3,
-    γ: 10,
-    j: 6,
-    α: 0.9,
-    Teq: 1.0,
-    θzero: 0.2,
-    sp.pi: sp.pi.evalf()
-}
-parameters
-```
-
-%% Output
-
-
-    $\displaystyle \left\{ \pi : 3.14159265358979, \  T_{eq} : 1.0, \  \bar{\epsilon} : 0.01, \  j : 6, \  α : 0.9, \  γ : 10, \  δ : 0.3, \  θ_{0} : 0.2, \  κ : 1.8, \  τ : 0.0003\right\}$
-    {π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
-     0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
-
-%% Cell type:code id: tags:
-
-``` python
-dφ_dt = - dF_dφ / τ
-assignments = [
-    ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
-]
-φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
-φEqs.append(ps.Assignment(φ, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
-
-
-temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
-temperatureEqs = [
-    ps.Assignment(T, discretize(temperatureEvolution.subs(parameters)))
-]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-temperatureEqs
-```
-
-%% Output
-
-    $\displaystyle \left[ {{T}_{(0,0,0)}} \leftarrow 0.0111111111111111 {{T}_{(-1,0,0)}} + 0.0111111111111111 {{T}_{(0,-1,0)}} + 0.0111111111111111 {{T}_{(0,0,-1)}} + 0.933333333333333 {{T}_{(0,0,0)}} + 0.0111111111111111 {{T}_{(0,0,1)}} + 0.0111111111111111 {{T}_{(0,1,0)}} + 0.0111111111111111 {{T}_{(1,0,0)}} + 1.8 \cdot 10^{-5} {{φ_D}_{(0,0,0)}}\right]$
-    [T_C := 0.0111111111111111⋅T_W + 0.0111111111111111⋅T_S + 0.0111111111111111⋅T
-    _B + 0.933333333333333⋅T_C + 0.0111111111111111⋅T_T + 0.0111111111111111⋅T_N +
-     0.0111111111111111⋅T_E + 1.8e-5⋅phidelta_C]
-
-%% Cell type:code id: tags:
-
-``` python
-φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
-temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
-```
-
-%% Cell type:code id: tags:
-
-``` python
-def time_loop(steps):
-    φ_sync = dh.synchronization_function(['phi'], target=target)
-    temperature_sync = dh.synchronization_function(['T'], target=target)
-    dh.all_to_gpu()
-    for t in range(steps):
-        φ_sync()
-        dh.run_kernel(φ_kernel)
-        temperature_sync()
-        dh.run_kernel(temperatureKernel)
-    dh.all_to_cpu()
-
-
-def init(nucleus_size=np.sqrt(5)):
-    for b in dh.iterate():
-        x, y, z = b.cell_index_arrays
-        x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
-        b['phi'].fill(0)
-        b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
-        b['T'].fill(0.0)
-
-
-def plot(slice_obj=ps.make_slice[:, :, 0.5]):
-    plt.subplot(1, 3, 1)
-    plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
-    plt.title("φ")
-    plt.colorbar()
-    plt.subplot(1, 3, 2)
-    plt.title("T")
-    plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
-    plt.colorbar()
-    plt.subplot(1, 3, 3)
-    plt.title("∂φ")
-    plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
-    plt.colorbar()
-```
-
-%% Cell type:code id: tags:
-
-``` python
-init()
-plot()
-print(dh)
-```
-
-%% Output
-
-        Name|      Inner (min/max)|     WithGl (min/max)
-    ----------------------------------------------------
-           T|            (  0,  0)|            (  0,  0)
-         phi|            (  0,  1)|            (  0,  1)
-    phidelta|            (  0,  0)|            (  0,  0)
-    
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-if 'is_test_run' in globals():
-    time_loop(2)
-    assert np.isfinite(dh.max('phi'))
-    assert np.isfinite(dh.max('T'))
-    assert np.isfinite(dh.max('phidelta'))
-else:
-    from time import perf_counter
-    vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
-    last = perf_counter()
-    for i in range(300):
-        time_loop(100)
-        vtk_writer(i)
-        print("Step ", i, perf_counter() - last, dh.max('phi'))
-        last = perf_counter()
-```
-%% Cell type:code id: tags:
-
-``` python
-from pystencils.session import *
-sp.init_printing()
-frac = sp.Rational
-```
-
-%% Cell type:markdown id: tags:
-
-# Phase-field simulation of dentritic solidification in 3D
-
-This notebook tests the model presented in the dentritic growth tutorial in 3D.
-
-%% Cell type:code id: tags:
-
-``` python
-target = 'gpu'
-gpu = target == 'gpu'
-domain_size = (25, 25, 25) if 'is_test_run' in globals() else (300, 300, 300)
-
-dh = ps.create_data_handling(domain_size=domain_size, periodicity=True, default_target=target)
-φ_field = dh.add_array('phi', latex_name='φ')
-φ_delta_field = dh.add_array('phidelta', latex_name='φ_D')
-t_field = dh.add_array('T')
-```
-
-%% Cell type:code id: tags:
-
-``` python
-ε, m, δ, j, θzero, α, γ, Teq, κ, τ = sp.symbols("ε m δ j θ_0 α γ T_eq κ τ")
-εb = sp.Symbol("\\bar{\\epsilon}")
-discretize = ps.fd.Discretization2ndOrder(dx=0.03, dt=1e-5)
-
-φ = φ_field.center
-T = t_field.center
-d = ps.fd.Diff
-
-def f(φ, m):
-    return φ**4 / 4 - (frac(1, 2) - m/3) * φ**3 + (frac(1,4)-m/2)*φ**2
-
-
-
-bulk_free_energy_density = f(φ, m)
-interface_free_energy_density = ε ** 2 / 2 * (d(φ, 0) ** 2 + d(φ, 1) ** 2 + d(φ, 2) ** 2)
-```
-
-%% Cell type:markdown id: tags:
-
-Here comes the major change, that has to be made for the 3D model: $\epsilon$ depends on the interface normal, which can not be computed simply as atan() as in the 2D case
-
-%% Cell type:code id: tags:
-
-``` python
-n = sp.Matrix([d(φ, i) for i in range(3)])
-nLen = sp.sqrt(sum(n_i**2 for n_i in n))
-n = n / nLen
-nVal = sum(n_i**4 for n_i in n)
-σ = δ * nVal
-
-εVal = εb * (1 + σ)
-εVal
-```
-
-%% Output
-
-    $\displaystyle \bar{\epsilon} \left(δ \left(\frac{{\partial_{0} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{1} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}} + \frac{{\partial_{2} {{φ}_{(0,0,0)}}}^{4}}{\left({\partial_{0} {{φ}_{(0,0,0)}}}^{2} + {\partial_{1} {{φ}_{(0,0,0)}}}^{2} + {\partial_{2} {{φ}_{(0,0,0)}}}^{2}\right)^{2}}\right) + 1\right)$
-                   ⎛  ⎛                            4
-                   ⎜  ⎜                 D(φ[0,0,0])
-    \bar{\epsilon}⋅⎜δ⋅⎜───────────────────────────────────────────── + ───────────
-                   ⎜  ⎜                                            2
-                   ⎜  ⎜⎛           2              2              2⎞    ⎛
-                   ⎝  ⎝⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0]
-    
-                     4                                               4
-          D(φ[0,0,0])                                     D(φ[0,0,0])
-    ────────────────────────────────── + ─────────────────────────────────────────
-                                     2
-     2              2              2⎞    ⎛           2              2
-    )  + D(φ[0,0,0])  + D(φ[0,0,0]) ⎠    ⎝D(φ[0,0,0])  + D(φ[0,0,0])  + D(φ[0,0,0]
-    
-        ⎞    ⎞
-        ⎟    ⎟
-    ────⎟ + 1⎟
-       2⎟    ⎟
-     2⎞ ⎟    ⎟
-    ) ⎠ ⎠    ⎠
-
-%% Cell type:code id: tags:
-
-``` python
-def m_func(temperature):
-    return (α / sp.pi) * sp.atan(γ * (Teq - temperature))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-substitutions = {m: m_func(T),
-                 ε: εVal}
-
-fe_i = interface_free_energy_density.subs(substitutions)
-fe_b = bulk_free_energy_density.subs(substitutions)
-
-μ_if = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_i, φ), functions=[φ])
-μ_b = ps.fd.expand_diff_full(ps.fd.functional_derivative(fe_b, φ), functions=[φ])
-```
-
-%% Cell type:code id: tags:
-
-``` python
-dF_dφ = μ_b + sp.Piecewise((μ_if, nLen**2 > 1e-10), (0, True))
-```
-
-%% Cell type:code id: tags:
-
-``` python
-parameters = {
-    τ: 0.0003,
-    κ: 1.8,
-    εb: 0.01,
-    δ: 0.3,
-    γ: 10,
-    j: 6,
-    α: 0.9,
-    Teq: 1.0,
-    θzero: 0.2,
-    sp.pi: sp.pi.evalf()
-}
-parameters
-```
-
-%% Output
-
-
-    $\displaystyle \left\{ \pi : 3.14159265358979, \  T_{eq} : 1.0, \  \bar{\epsilon} : 0.01, \  j : 6, \  α : 0.9, \  γ : 10, \  δ : 0.3, \  θ_{0} : 0.2, \  κ : 1.8, \  τ : 0.0003\right\}$
-    {π: 3.14159265358979, T_eq: 1.0, \bar{\epsilon}: 0.01, j: 6, α: 0.9, γ: 10, δ:
-     0.3, θ₀: 0.2, κ: 1.8, τ: 0.0003}
-
-%% Cell type:code id: tags:
-
-``` python
-dφ_dt = - dF_dφ / τ
-assignments = [
-    ps.Assignment(φ_delta_field.center, discretize(dφ_dt.subs(parameters))),
-]
-φEqs = ps.simp.sympy_cse_on_assignment_list(assignments)
-φEqs.append(ps.Assignment(φ, discretize(ps.fd.transient(φ) - φ_delta_field.center)))
-
-
-temperatureEvolution = -ps.fd.transient(T) + ps.fd.diffusion(T, 1) + κ * φ_delta_field.center
-temperatureEqs = [
-    ps.Assignment(T, discretize(temperatureEvolution.subs(parameters)))
-]
-```
-
-%% Cell type:code id: tags:
-
-``` python
-temperatureEqs
-```
-
-%% Output
-
-    $\displaystyle \left[ {{T}_{(0,0,0)}} \leftarrow 0.0111111111111111 {{T}_{(-1,0,0)}} + 0.0111111111111111 {{T}_{(0,-1,0)}} + 0.0111111111111111 {{T}_{(0,0,-1)}} + 0.933333333333333 {{T}_{(0,0,0)}} + 0.0111111111111111 {{T}_{(0,0,1)}} + 0.0111111111111111 {{T}_{(0,1,0)}} + 0.0111111111111111 {{T}_{(1,0,0)}} + 1.8 \cdot 10^{-5} {{φ_D}_{(0,0,0)}}\right]$
-    [T_C := 0.0111111111111111⋅T_W + 0.0111111111111111⋅T_S + 0.0111111111111111⋅T
-    _B + 0.933333333333333⋅T_C + 0.0111111111111111⋅T_T + 0.0111111111111111⋅T_N +
-     0.0111111111111111⋅T_E + 1.8e-5⋅phidelta_C]
-
-%% Cell type:code id: tags:
-
-``` python
-φ_kernel = ps.create_kernel(φEqs, cpu_openmp=4, target=target).compile()
-temperatureKernel = ps.create_kernel(temperatureEqs, cpu_openmp=4, target=target).compile()
-```
-
-%% Cell type:code id: tags:
-
-``` python
-def time_loop(steps):
-    φ_sync = dh.synchronization_function(['phi'], target=target)
-    temperature_sync = dh.synchronization_function(['T'], target=target)
-    dh.all_to_gpu()
-    for t in range(steps):
-        φ_sync()
-        dh.run_kernel(φ_kernel)
-        temperature_sync()
-        dh.run_kernel(temperatureKernel)
-    dh.all_to_cpu()
-
-
-def init(nucleus_size=np.sqrt(5)):
-    for b in dh.iterate():
-        x, y, z = b.cell_index_arrays
-        x, y, z = x - b.shape[0] // 2, y - b.shape[1] // 2, z - b.shape[2] // 2
-        b['phi'].fill(0)
-        b['phi'][(x ** 2 + y ** 2 + z ** 2) < nucleus_size ** 2] = 1.0
-        b['T'].fill(0.0)
-
-
-def plot(slice_obj=ps.make_slice[:, :, 0.5]):
-    plt.subplot(1, 3, 1)
-    plt.scalar_field(dh.gather_array('phi', slice_obj).squeeze())
-    plt.title("φ")
-    plt.colorbar()
-    plt.subplot(1, 3, 2)
-    plt.title("T")
-    plt.scalar_field(dh.gather_array('T', slice_obj).squeeze())
-    plt.colorbar()
-    plt.subplot(1, 3, 3)
-    plt.title("∂φ")
-    plt.scalar_field(dh.gather_array('phidelta', slice_obj).squeeze())
-    plt.colorbar()
-```
-
-%% Cell type:code id: tags:
-
-``` python
-init()
-plot()
-print(dh)
-```
-
-%% Output
-
-        Name|      Inner (min/max)|     WithGl (min/max)
-    ----------------------------------------------------
-           T|            (  0,  0)|            (  0,  0)
-         phi|            (  0,  1)|            (  0,  1)
-    phidelta|            (  0,  0)|            (  0,  0)
-    
-
-
-
-%% Cell type:code id: tags:
-
-``` python
-if 'is_test_run' in globals():
-    time_loop(2)
-    assert np.isfinite(dh.max('phi'))
-    assert np.isfinite(dh.max('T'))
-    assert np.isfinite(dh.max('phidelta'))
-else:
-    from time import perf_counter
-    vtk_writer = dh.create_vtk_writer('dentritic_growth_large', ['phi'])
-    last = perf_counter()
-    for i in range(300):
-        time_loop(100)
-        vtk_writer(i)
-        print("Step ", i, perf_counter() - last, dh.max('phi'))
-        last = perf_counter()
-```
--- a/pystencils_tests/test_print_infinity.py
+++ b/pystencils_tests/test_print_infinity.py
-import pytest
-
-import pystencils
-from sympy import oo
-
-
-@pytest.mark.parametrize('type', ('float32', 'float64', 'int64'))
-@pytest.mark.parametrize('negative', (False, 'Negative'))
-@pytest.mark.parametrize('target', ('cpu', 'gpu'))
-def test_print_infinity(type, negative, target):
-
-    x = pystencils.fields(f'x:  {type}[1d]')
-
-    if negative:
-        assignment = pystencils.Assignment(x.center, -oo)
-    else:
-        assignment = pystencils.Assignment(x.center, oo)
-    ast = pystencils.create_kernel(assignment, data_type=type, target=target)
-
-    ast.compile()
-
-    print(ast.compile().code)
--- a/pystencils_tests/test_print_unsupported_node.py
+++ b/pystencils_tests/test_print_unsupported_node.py
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
-#
-# Distributed under terms of the GPLv3 license.
-
-"""
-
-"""
-import pytest
-
-import pystencils
-from pystencils.backends.cbackend import CBackend
-
-
-class UnsupportedNode(pystencils.astnodes.Node):
-
-    def __init__(self):
-        super().__init__()
-
-
-def test_print_unsupported_node():
-    with pytest.raises(NotImplementedError, match='CBackend does not support node of type UnsupportedNode'):
-        CBackend()(UnsupportedNode())
--- a/pystencils_tests/test_random.py
+++ b/pystencils_tests/test_random.py
-import numpy as np
-
-import pystencils as ps
-from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles
-
-
-# curand_Philox4x32_10(make_uint4(124, i, j, 0), make_uint2(0, 0))
-philox_reference = np.array([[[3576608082, 1252663339, 1987745383,  348040302],
-                              [1032407765,  970978240, 2217005168, 2424826293]],
-                             [[2958765206, 3725192638, 2623672781, 1373196132],
-                              [ 850605163, 1694561295, 3285694973, 2799652583]]])
-
-def test_philox_double():
-    for target in ('cpu', 'gpu'):
-        dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
-        f = dh.add_array("f", values_per_cell=2)
-
-        dh.fill('f', 42.0)
-
-        philox_node = PhiloxTwoDoubles(dh.dim)
-        assignments = [philox_node,
-                       ps.Assignment(f(0), philox_node.result_symbols[0]),
-                       ps.Assignment(f(1), philox_node.result_symbols[1])]
-        kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
-
-        dh.all_to_gpu()
-        dh.run_kernel(kernel, time_step=124)
-        dh.all_to_cpu()
-
-        arr = dh.gather_array('f')
-        assert np.logical_and(arr <= 1.0, arr >= 0).all()
-
-        x = philox_reference[:,:,0::2]
-        y = philox_reference[:,:,1::2]
-        z = x ^ y << (53 - 32)
-        double_reference = z * 2.**-53 + 2.**-54
-        assert(np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps))
-
-
-def test_philox_float():
-    for target in ('cpu', 'gpu'):
-        dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
-        f = dh.add_array("f", values_per_cell=4)
-
-        dh.fill('f', 42.0)
-
-        philox_node = PhiloxFourFloats(dh.dim)
-        assignments = [philox_node] + [ps.Assignment(f(i), philox_node.result_symbols[i]) for i in range(4)]
-        kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
-
-        dh.all_to_gpu()
-        dh.run_kernel(kernel, time_step=124)
-        dh.all_to_cpu()
-        arr = dh.gather_array('f')
-        assert np.logical_and(arr <= 1.0, arr >= 0).all()
-
-        float_reference = philox_reference * 2.**-32 + 2.**-33
-        assert(np.allclose(arr, float_reference, rtol=0, atol=np.finfo(np.float32).eps))
-
-def test_aesni_double():
-    dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu")
-    f = dh.add_array("f", values_per_cell=2)
-
-    dh.fill('f', 42.0)
-
-    aesni_node = AESNITwoDoubles(dh.dim)
-    assignments = [aesni_node,
-                   ps.Assignment(f(0), aesni_node.result_symbols[0]),
-                   ps.Assignment(f(1), aesni_node.result_symbols[1])]
-    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
-
-    dh.all_to_gpu()
-    dh.run_kernel(kernel, time_step=124)
-    dh.all_to_cpu()
-
-    arr = dh.gather_array('f')
-    assert np.logical_and(arr <= 1.0, arr >= 0).all()
-
-
-def test_aesni_float():
-    dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target="cpu")
-    f = dh.add_array("f", values_per_cell=4)
-
-    dh.fill('f', 42.0)
-
-    aesni_node = AESNIFourFloats(dh.dim)
-    assignments = [aesni_node] + [ps.Assignment(f(i), aesni_node.result_symbols[i]) for i in range(4)]
-    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()
-
-    dh.all_to_gpu()
-    dh.run_kernel(kernel, time_step=124)
-    dh.all_to_cpu()
-    arr = dh.gather_array('f')
-    assert np.logical_and(arr <= 1.0, arr >= 0).all()
\ No newline at end of file
--- a/pystencils_tests/test_size_and_layout_checks_llvm.py
+++ b/pystencils_tests/test_size_and_layout_checks_llvm.py
-import numpy as np
-
-from pystencils import Assignment, Field
-from pystencils.llvm import create_kernel, make_python_function
-
-
-def test_size_check():
-    """Kernel with two fixed-sized fields creating with same size but calling with wrong size"""
-    src = np.zeros((20, 21, 9))
-    dst = np.zeros_like(src)
-
-    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
-    sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-    ast = create_kernel([update_rule])
-    func = make_python_function(ast)
-
-    # change size of src field
-    new_shape = [a - 7 for a in src.shape]
-    src = np.zeros(new_shape)
-    dst = np.zeros(new_shape)
-
-    try:
-        func(src=src, dst=dst)
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
-
-
-def test_fixed_size_mismatch_check():
-    """Create kernel with two differently sized but constant fields """
-    src = np.zeros((20, 21, 9))
-    dst = np.zeros((21, 21, 9))
-
-    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
-    sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-
-    try:
-        create_kernel([update_rule])
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
-
-
-def test_fixed_and_variable_field_check():
-    """Create kernel with two variable sized fields - calling them with different sizes"""
-    src = np.zeros((20, 21, 9))
-
-    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
-    sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
-
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-
-    try:
-        create_kernel([update_rule])
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
-
-
-def test_two_variable_shaped_fields():
-    src = np.zeros((20, 21, 9))
-    dst = np.zeros((22, 21, 9))
-
-    sym_src = Field.create_generic("src", spatial_dimensions=2, index_dimensions=1)
-    sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1)
-    update_rule = Assignment(sym_dst(0),
-                             sym_src[-1, 1](1) + sym_src[1, -1](2))
-
-    ast = create_kernel([update_rule])
-    func = make_python_function(ast)
-
-    try:
-        func(src=src, dst=dst)
-        assert False, "Expected ValueError because fields with different sized where passed"
-    except ValueError:
-        pass
--- a/pystencils_tests/test_sliced_iteration.py
+++ b/pystencils_tests/test_sliced_iteration.py
-import numpy as np
-import sympy as sp
-
-from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice
-from pystencils.simp import sympy_cse_on_assignment_list
-
-
-def test_sliced_iteration():
-    size = (4, 4)
-    src_arr = np.ones(size)
-    dst_arr = np.zeros_like(src_arr)
-    src_field = Field.create_from_numpy_array('src', src_arr)
-    dst_field = Field.create_from_numpy_array('dst', dst_arr)
-
-    a, b = sp.symbols("a b")
-    update_rule = Assignment(dst_field[0, 0],
-                             (a * src_field[0, 1] + a * src_field[0, -1] +
-                              b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
-
-    x_end = TypedSymbol("x_end", "int")
-    s = make_slice[1:x_end, 1]
-    x_end_value = size[1] - 1
-    kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile()
-
-    kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
-
-    expected_result = np.zeros(size)
-    expected_result[1:x_end_value, 1] = 1
-    np.testing.assert_almost_equal(expected_result, dst_arr)
-
-
-def test_sliced_iteration_llvm():
-    size = (4, 4)
-    src_arr = np.ones(size)
-    dst_arr = np.zeros_like(src_arr)
-    src_field = Field.create_from_numpy_array('src', src_arr)
-    dst_field = Field.create_from_numpy_array('dst', dst_arr)
-
-    a, b = sp.symbols("a b")
-    update_rule = Assignment(dst_field[0, 0],
-                             (a * src_field[0, 1] + a * src_field[0, -1] +
-                              b * src_field[1, 0] + b * src_field[-1, 0]) / 4)
-
-    x_end = TypedSymbol("x_end", "int")
-    s = make_slice[1:x_end, 1]
-    x_end_value = size[1] - 1
-    import pystencils.llvm as llvm_generator
-    ast = llvm_generator.create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s)
-    kernel = llvm_generator.make_python_function(ast)
-
-    kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value)
-
-    expected_result = np.zeros(size)
-    expected_result[1:x_end_value, 1] = 1
-    np.testing.assert_almost_equal(expected_result, dst_arr)
--- a/pystencils_tests/test_small_block_benchmark.ipynb
+++ b/pystencils_tests/test_small_block_benchmark.ipynb
-%% Cell type:code id: tags:
-
-``` python
-from pystencils.session import *
-from time import perf_counter
-from statistics import median
-from functools import partial
-```
-
-%% Cell type:markdown id: tags:
-
-## Benchmark for Python call overhead
-
-%% Cell type:code id: tags:
-
-``` python
-inner_repeats = 100
-outer_repeats = 5
-sizes = [2**i for i in range(1, 8)]
-sizes
-```
-
-%% Output
-
-
-    $$\left [ 2, \quad 4, \quad 8, \quad 16, \quad 32, \quad 64, \quad 128\right ]$$
-    [2, 4, 8, 16, 32, 64, 128]
-
-%% Cell type:code id: tags:
-
-``` python
-def benchmark_pure(domain_size, extract_first=False):
-    src = np.zeros(domain_size)
-    dst = np.zeros_like(src)
-    f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    if extract_first:
-        kernel = kernel.kernel
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    else:
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    return (end - start) / inner_repeats
-
-def benchmark_datahandling(domain_size, parallel=False):
-    dh = ps.create_data_handling(domain_size, parallel=parallel)
-    f_src = dh.add_array('src')
-    f_dst = dh.add_array('dst')
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    start = perf_counter()
-    for i in range(inner_repeats):
-        dh.run_kernel(kernel)
-        dh.swap('src', 'dst')
-    end = perf_counter()
-    return (end - start) / inner_repeats
-
-
-name_to_func = {
-    'pure_extract': partial(benchmark_pure, extract_first=True),
-    'pure_no_extract': partial(benchmark_pure, extract_first=False),
-    'dh_serial': partial(benchmark_datahandling, parallel=False),
-    'dh_parallel': partial(benchmark_datahandling, parallel=True),
-}
-```
-
-%% Cell type:code id: tags:
-
-``` python
-result = {'block_size': [],
-          'name': [],
-          'time': []}
-
-for bs in sizes:
-    print("Computing size ", bs)
-    for name, func in name_to_func.items():
-        for i in range(outer_repeats):
-            time = func((bs, bs))
-            result['block_size'].append(bs)
-            result['name'].append(name)
-            result['time'].append(time)
-```
-
-%% Output
-
-    Computing size  2
-    Computing size  4
-    Computing size  8
-    Computing size  16
-    Computing size  32
-    Computing size  64
-    Computing size  128
-
-%% Cell type:code id: tags:
-
-``` python
-if 'is_test_run' not in globals():
-    import pandas as pd
-    import seaborn as sns
-
-    data = pd.DataFrame.from_dict(result)
-
-    plt.subplot(1,2,1)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-    plt.yscale('log')
-
-    plt.subplot(1,2,2)
-    data = pd.DataFrame.from_dict(result)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-```
-
-%% Output
-
-
-%% Cell type:code id: tags:
-
-``` python
-from pystencils.session import *
-from time import perf_counter
-from statistics import median
-from functools import partial
-```
-
-%% Cell type:markdown id: tags:
-
-## Benchmark for Python call overhead
-
-%% Cell type:code id: tags:
-
-``` python
-inner_repeats = 100
-outer_repeats = 5
-sizes = [2**i for i in range(1, 8)]
-sizes
-```
-
-%% Output
-
-
-    $$\left [ 2, \quad 4, \quad 8, \quad 16, \quad 32, \quad 64, \quad 128\right ]$$
-    [2, 4, 8, 16, 32, 64, 128]
-
-%% Cell type:code id: tags:
-
-``` python
-def benchmark_pure(domain_size, extract_first=False):
-    src = np.zeros(domain_size)
-    dst = np.zeros_like(src)
-    f_src, f_dst = ps.fields("src, dst", src=src, dst=dst)
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    if extract_first:
-        kernel = kernel.kernel
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    else:
-        start = perf_counter()
-        for i in range(inner_repeats):
-            kernel(src=src, dst=dst)
-            src, dst = dst, src
-        end = perf_counter()
-    return (end - start) / inner_repeats
-
-def benchmark_datahandling(domain_size, parallel=False):
-    dh = ps.create_data_handling(domain_size, parallel=parallel)
-    f_src = dh.add_array('src')
-    f_dst = dh.add_array('dst')
-    kernel = ps.create_kernel(ps.Assignment(f_dst.center, f_src.center)).compile()
-    start = perf_counter()
-    for i in range(inner_repeats):
-        dh.run_kernel(kernel)
-        dh.swap('src', 'dst')
-    end = perf_counter()
-    return (end - start) / inner_repeats
-
-
-name_to_func = {
-    'pure_extract': partial(benchmark_pure, extract_first=True),
-    'pure_no_extract': partial(benchmark_pure, extract_first=False),
-    'dh_serial': partial(benchmark_datahandling, parallel=False),
-    'dh_parallel': partial(benchmark_datahandling, parallel=True),
-}
-```
-
-%% Cell type:code id: tags:
-
-``` python
-result = {'block_size': [],
-          'name': [],
-          'time': []}
-
-for bs in sizes:
-    print("Computing size ", bs)
-    for name, func in name_to_func.items():
-        for i in range(outer_repeats):
-            time = func((bs, bs))
-            result['block_size'].append(bs)
-            result['name'].append(name)
-            result['time'].append(time)
-```
-
-%% Output
-
-    Computing size  2
-    Computing size  4
-    Computing size  8
-    Computing size  16
-    Computing size  32
-    Computing size  64
-    Computing size  128
-
-%% Cell type:code id: tags:
-
-``` python
-if 'is_test_run' not in globals():
-    import pandas as pd
-    import seaborn as sns
-
-    data = pd.DataFrame.from_dict(result)
-
-    plt.subplot(1,2,1)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-    plt.yscale('log')
-
-    plt.subplot(1,2,2)
-    data = pd.DataFrame.from_dict(result)
-    sns.barplot(x='block_size', y='time', hue='name', data=data, alpha=0.6)
-```
-
-%% Output
-
-
--- a/pystencils_tests/test_stencils.py
+++ b/pystencils_tests/test_stencils.py
-import pystencils as ps
--- a/pystencils_tests/test_sum_prod.py
+++ b/pystencils_tests/test_sum_prod.py
-# -*- coding: utf-8 -*-
-#
-# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
-#
-# Distributed under terms of the GPLv3 license.
-
-"""
-
-"""
-import numpy as np
-import sympy
-from sympy.abc import k
-
-import pystencils
-from pystencils.data_types import create_type
-
-
-def test_sum():
-
-    sum = sympy.Sum(k, (k, 1, 100))
-    expanded_sum = sum.doit()
-
-    print(sum)
-    print(expanded_sum)
-
-    x = pystencils.fields('x: float32[1d]')
-
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-
-    ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.show_code(ast))
-    kernel = ast.compile()
-
-    print(code)
-    assert 'double sum' in code
-
-    array = np.zeros((10,), np.float32)
-
-    kernel(x=array)
-
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
-
-
-def test_sum_use_float():
-
-    sum = sympy.Sum(k, (k, 1, 100))
-    expanded_sum = sum.doit()
-
-    print(sum)
-    print(expanded_sum)
-
-    x = pystencils.fields('x: float32[1d]')
-
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-
-    ast = pystencils.create_kernel(assignments, data_type=create_type('float32'))
-    code = str(pystencils.show_code(ast))
-    kernel = ast.compile()
-
-    print(code)
-    print(pystencils.show_code(ast))
-    assert 'float sum' in code
-
-    array = np.zeros((10,), np.float32)
-
-    kernel(x=array)
-
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
-
-
-def test_product():
-
-    k = pystencils.TypedSymbol('k', create_type('int64'))
-
-    sum = sympy.Product(k, (k, 1, 10))
-    expanded_sum = sum.doit()
-
-    print(sum)
-    print(expanded_sum)
-
-    x = pystencils.fields('x: int64[1d]')
-
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-
-    ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.show_code(ast))
-    kernel = ast.compile()
-
-    print(code)
-    assert 'int64_t product' in code
-
-    array = np.zeros((10,), np.int64)
-
-    kernel(x=array)
-
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
-
-
-def test_prod_var_limit():
-
-    k = pystencils.TypedSymbol('k', create_type('int64'))
-    limit = pystencils.TypedSymbol('limit', create_type('int64'))
-
-    sum = sympy.Sum(k, (k, 1, limit))
-    expanded_sum = sum.replace(limit, 100).doit()
-
-    print(sum)
-    print(expanded_sum)
-
-    x = pystencils.fields('x: int64[1d]')
-
-    assignments = pystencils.AssignmentCollection({
-        x.center(): sum
-    })
-
-    ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.show_code(ast))
-    kernel = ast.compile()
-
-    print(code)
-
-    array = np.zeros((10,), np.int64)
-
-    kernel(x=array, limit=100)
-
-    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
--- a/pystencils_tests/test_sympy_optimizations.py
+++ b/pystencils_tests/test_sympy_optimizations.py
-import pytest
-import sympy as sp
-
-import pystencils
-from pystencils.math_optimizations import HAS_REWRITING, optimize_assignments, optims_pystencils_cpu
-
-
-@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
-def test_sympy_optimizations():
-    for target in ('cpu', 'gpu'):
-        x, y, z = pystencils.fields('x, y, z:  float32[2d]')
-
-        # Triggers Sympy's expm1 optimization
-        assignments = pystencils.AssignmentCollection({
-            x[0, 0]: sp.exp(y[0, 0]) - 1
-        })
-
-        assignments = optimize_assignments(assignments, optims_pystencils_cpu)
-
-        ast = pystencils.create_kernel(assignments, target=target)
-        code = str(pystencils.show_code(ast))
-        assert 'expm1(' in code
-
-
-@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
-def test_evaluate_constant_terms():
-    for target in ('cpu', 'gpu'):
-        x, y, z = pystencils.fields('x, y, z:  float32[2d]')
-
-        # Triggers Sympy's expm1 optimization
-        assignments = pystencils.AssignmentCollection({
-            x[0, 0]: -sp.cos(1) + y[0, 0]
-        })
-
-        assignments = optimize_assignments(assignments, optims_pystencils_cpu)
-
-        ast = pystencils.create_kernel(assignments, target=target)
-        code = str(pystencils.show_code(ast))
-        assert 'cos(' not in code
-        print(code)
-
-
-@pytest.mark.skipif(not HAS_REWRITING, reason="need sympy.codegen.rewriting")
-def test_do_not_evaluate_constant_terms():
-    optimizations = pystencils.math_optimizations.optims_pystencils_cpu
-    optimizations.remove(pystencils.math_optimizations.evaluate_constant_terms)
-
-    for target in ('cpu', 'gpu'):
-        x, y, z = pystencils.fields('x, y, z:  float32[2d]')
-
-        assignments = pystencils.AssignmentCollection({
-            x[0, 0]: -sp.cos(1) + y[0, 0]
-        })
-
-        optimize_assignments(assignments, optimizations)
-
-        ast = pystencils.create_kernel(assignments, target=target)
-        code = str(pystencils.show_code(ast))
-        assert 'cos(' in code
-        print(code)
--- a/pystencils_tests/test_types.py
+++ b/pystencils_tests/test_types.py
-import sympy as sp
-import numpy as np
-import pystencils as ps
-from pystencils import data_types
-from pystencils.data_types import TypedSymbol, get_type_of_expression, VectorType, collate_types, create_type
-
-
-def test_parsing():
-    assert str(data_types.create_composite_type_from_string("const double *")) == "double const *"
-    assert str(data_types.create_composite_type_from_string("double const *")) == "double const *"
-
-    t1 = data_types.create_composite_type_from_string("const double * const * const restrict")
-    t2 = data_types.create_composite_type_from_string(str(t1))
-    assert t1 == t2
-
-
-def test_collation():
-    double_type = create_type("double")
-    float_type = create_type("float32")
-    double4_type = VectorType(double_type, 4)
-    float4_type = VectorType(float_type, 4)
-    assert collate_types([double_type, float_type]) == double_type
-    assert collate_types([double4_type, float_type]) == double4_type
-    assert collate_types([double4_type, float4_type]) == double4_type
-
-
-def test_dtype_of_constants():
-    # Some come constants are neither of type Integer,Float,Rational and don't have args
-    # >>> isinstance(pi, Integer)
-    # False
-    # >>> isinstance(pi, Float)
-    # False
-    # >>> isinstance(pi, Rational)
-    # False
-    # >>> pi.args
-    # ()
-    get_type_of_expression(sp.pi)
-
-
-def test_assumptions():
-    x = ps.fields('x:  float32[3d]')
-
-    assert x.shape[0].is_nonnegative
-    assert (2 * x.shape[0]).is_nonnegative
-    assert (2 * x.shape[0]).is_integer
-    assert (TypedSymbol('a', create_type('uint64'))).is_nonnegative
-    assert (TypedSymbol('a', create_type('uint64'))).is_positive is None
-    assert (TypedSymbol('a', create_type('uint64')) + 1).is_positive
-    assert (x.shape[0] + 1).is_real
-
-
-def test_sqrt_of_integer():
-    """Regression test for bug where sqrt(3) was classified as integer"""
-    f = ps.fields("f: [1D]")
-    tmp = sp.symbols("tmp")
-
-    assignments = [ps.Assignment(tmp, sp.sqrt(3)),
-                   ps.Assignment(f[0], tmp)]
-    arr = np.array([1], dtype=np.float64)
-    kernel = ps.create_kernel(assignments).compile()
-    kernel(f=arr)
-    assert 1.7 < arr[0] < 1.8
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
-import numpy as np
-import sympy as sp
-
-import pystencils as ps
-from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
-from pystencils.cpu.vectorization import vectorize
-from pystencils.transformations import replace_inner_stride_with_one
-
-
-def test_vector_type_propagation():
-    a, b, c, d, e = sp.symbols("a b c d e")
-    arr = np.ones((2 ** 2 + 2, 2 ** 3 + 2))
-    arr *= 10.0
-
-    f, g = ps.fields(f=arr, g=arr)
-    update_rule = [ps.Assignment(a, f[1, 0]),
-                   ps.Assignment(b, a),
-                   ps.Assignment(g[0, 0], b + 3 + f[0, 1])]
-
-    ast = ps.create_kernel(update_rule)
-    vectorize(ast)
-
-    func = ast.compile()
-    dst = np.zeros_like(arr)
-    func(g=dst, f=arr)
-    np.testing.assert_equal(dst[1:-1, 1:-1], 2 * 10.0 + 3)
-
-
-def test_inplace_update():
-    shape = (9, 9, 3)
-    arr = np.ones(shape, order='f')
-
-    @ps.kernel
-    def update_rule(s):
-        f = ps.fields("f(3) : [2D]", f=arr)
-        s.tmp0 @= f(0)
-        s.tmp1 @= f(1)
-        s.tmp2 @= f(2)
-        f0, f1, f2 = f(0), f(1), f(2)
-        f0 @= 2 * s.tmp0
-        f1 @= 2 * s.tmp0
-        f2 @= 2 * s.tmp0
-
-    ast = ps.create_kernel(update_rule, cpu_vectorize_info={'instruction_set': 'avx'})
-    kernel = ast.compile()
-    kernel(f=arr)
-    np.testing.assert_equal(arr, 2)
-
-
-def test_vectorization_fixed_size():
-    configurations = []
-    # Fixed size - multiple of four
-    arr = np.ones((20 + 2, 24 + 2)) * 5.0
-    f, g = ps.fields(f=arr, g=arr)
-    configurations.append((arr, f, g))
-    # Fixed size - no multiple of four
-    arr = np.ones((21 + 2, 25 + 2)) * 5.0
-    f, g = ps.fields(f=arr, g=arr)
-    configurations.append((arr, f, g))
-    # Fixed size - different remainder
-    arr = np.ones((23 + 2, 17 + 2)) * 5.0
-    f, g = ps.fields(f=arr, g=arr)
-    configurations.append((arr, f, g))
-
-    for arr, f, g in configurations:
-        update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
-
-        ast = ps.create_kernel(update_rule)
-        vectorize(ast)
-
-        func = ast.compile()
-        dst = np.zeros_like(arr)
-        func(g=dst, f=arr)
-        np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
-
-
-def test_vectorization_variable_size():
-    f, g = ps.fields("f, g : double[2D]")
-    update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]
-    ast = ps.create_kernel(update_rule)
-
-    replace_inner_stride_with_one(ast)
-    vectorize(ast)
-    func = ast.compile()
-
-    arr = np.ones((23 + 2, 17 + 2)) * 5.0
-    dst = np.zeros_like(arr)
-
-    func(g=dst, f=arr)
-    np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
-
-
-def test_piecewise1():
-    a, b, c, d, e = sp.symbols("a b c d e")
-    arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0
-
-    f, g = ps.fields(f=arr, g=arr)
-    update_rule = [ps.Assignment(a, f[1, 0]),
-                   ps.Assignment(b, a),
-                   ps.Assignment(c, f[0, 0] > 0.0),
-                   ps.Assignment(g[0, 0], sp.Piecewise((b + 3 + f[0, 1], c), (0.0, True)))]
-
-    ast = ps.create_kernel(update_rule)
-    vectorize(ast)
-    func = ast.compile()
-    dst = np.zeros_like(arr)
-    func(g=dst, f=arr)
-    np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0)
-
-
-def test_piecewise2():
-
-    arr = np.zeros((20, 20))
-
-    @ps.kernel
-    def test_kernel(s):
-        f, g = ps.fields(f=arr, g=arr)
-
-        s.condition @= f[0, 0] > 1
-        s.result    @= 0.0 if s.condition else 1.0
-        g[0, 0]     @= s.result
-
-    ast = ps.create_kernel(test_kernel)
-    vectorize(ast)
-    func = ast.compile()
-    func(f=arr, g=arr)
-    np.testing.assert_equal(arr, np.ones_like(arr))
-
-
-def test_piecewise3():
-
-    arr = np.zeros((22, 22))
-
-    @ps.kernel
-    def test_kernel(s):
-        f, g = ps.fields(f=arr, g=arr)
-        s.b     @= f[0, 1]
-        g[0, 0] @= 1.0 / (s.b + s.k) if f[0, 0] > 0.0 else 1.0
-
-    ast = ps.create_kernel(test_kernel)
-    vectorize(ast)
-    ast.compile()
-
-
-def test_logical_operators():
-    arr = np.zeros((22, 22))
-
-    @ps.kernel
-    def test_kernel(s):
-        f, g = ps.fields(f=arr, g=arr)
-        s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0)
-        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])
-
-    ast = ps.create_kernel(test_kernel)
-    vectorize(ast)
-    ast.compile()
-
-
-def test_hardware_query():
-    instruction_sets = get_supported_instruction_sets()
-    assert 'sse' in instruction_sets
--- a/pytest.ini
+++ b/pytest.ini
 [pytest]
+testpaths = src tests doc/notebooks
+pythonpath = src
 python_files = test_*.py *_test.py scenario_*.py
 norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
 addopts = --doctest-modules --durations=20  --cov-config pytest.ini
 markers =
-       gpu: test that require a gpu
-       kerncraft: tests depending on kerncraft
+       longrun: tests only run at night since they have large execution time
+       notebook: mark for notebooks
+# these warnings all come from third party libraries.
+filterwarnings =
+       ignore:an integer is required:DeprecationWarning
+       ignore:\s*load will be removed, use:PendingDeprecationWarning
+       ignore:the imp module is deprecated in favour of importlib:DeprecationWarning
+       ignore:.*is a deprecated alias for the builtin `bool`:DeprecationWarning
+       ignore:'contextfilter' is renamed to 'pass_context':DeprecationWarning
+       ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
+       ignore:Animation was deleted without rendering anything:UserWarning

 [run]
 branch = True
-source = pystencils
-         pystencils_tests
+source = src/pystencils
+         tests

 omit = doc/*
-       pystencils_tests/*
+       tests/*
       setup.py
+       quicktest.py
       conftest.py
-       pystencils/jupytersetup.py
-       pystencils/cpu/msvc_detection.py
-       pystencils/sympy_gmpy_bug_workaround.py
-       pystencils/cache.py
-       pystencils/pacxx/benchmark.py
+       versioneer.py
+       src/pystencils/jupytersetup.py
+       src/pystencils/cpu/msvc_detection.py
+       src/pystencils/sympy_gmpy_bug_workaround.py
+       src/pystencils/cache.py
+       src/pystencils/pacxx/benchmark.py
+       src/pystencils/_version.py
+       venv/

 [report]
 exclude_lines =
@@ -27,10 +42,12 @@ exclude_lines =
       pragma: no cover

       def __repr__
+       def _repr_html_

       # Don't complain if tests don't hit defensive assertion code:
       raise AssertionError
       raise NotImplementedError
+       NotImplementedError()
       #raise ValueError

       # Don't complain if non-runnable code isn't run:
@@ -39,7 +56,7 @@ exclude_lines =
       if __name__ == .__main__.:

 skip_covered = True
-fail_under = 74
+fail_under = 85

 [html]
 directory = coverage_report
--- a/quicktest.py
+++ b/quicktest.py
+#!/usr/bin/env python3
+
+from contextlib import redirect_stdout
+import io
+from tests.test_quicktests import (
+    test_basic_kernel,
+    test_basic_blocking_staggered,
+    test_basic_vectorization,
+)
+
+quick_tests = [
+    test_basic_kernel,
+    test_basic_blocking_staggered,
+    test_basic_vectorization,
+]
+
+if __name__ == "__main__":
+    print("Running pystencils quicktests")
+    for qt in quick_tests:
+        print(f"   -> {qt.__name__}")
+        with redirect_stdout(io.StringIO()):
+            qt()
--- a/release.sh
+++ b/release.sh
@@ -8,6 +8,6 @@ read new_version

 git tag -s release/${new_version}
 git push origin master release/${new_version}
-python setup.py sdist bdist_wheel
 rm -rf dist
-twine upload dist/*
\ No newline at end of file
+python setup.py sdist
+twine upload dist/*
--- a/setup.py
+++ b/setup.py
-import distutils
-import io
-import os
-import sys
-from contextlib import redirect_stdout
-from importlib import import_module
+from setuptools import setup, __version__ as setuptools_version

-from setuptools import find_packages, setup
+if int(setuptools_version.split('.')[0]) < 61:
+    raise Exception(
+        "[ERROR] pystencils requires at least setuptools version 61 to install.\n"
+        "If this error occurs during an installation via pip, it is likely that there is a conflict between "
+        "versions of setuptools installed by pip and the system package manager. "
+        "In this case, it is recommended to install pystencils into a virtual environment instead."
+    )

-if '--use-cython' in sys.argv:
-    USE_CYTHON = True
-    sys.argv.remove('--use-cython')
-else:
-    USE_CYTHON = False
+import versioneer

-quick_tests = [
-    'test_datahandling.test_kernel',
-    'test_blocking_staggered.test_blocking_staggered',
-    'test_blocking_staggered.test_blocking_staggered',
-    'test_vectorization.test_vectorization_variable_size',
-]

+def get_cmdclass():
+    return versioneer.get_cmdclass()

-class SimpleTestRunner(distutils.cmd.Command):
-    """A custom command to run selected tests"""

-    description = 'run some quick tests'
-    user_options = []
-
-    @staticmethod
-    def _run_tests_in_module(test):
-        """Short test runner function - to work also if py.test is not installed."""
-        test = 'pystencils_tests.' + test
-        mod, function_name = test.rsplit('.', 1)
-        if isinstance(mod, str):
-            mod = import_module(mod)
-
-        func = getattr(mod, function_name)
-        print("   -> %s in %s" % (function_name, mod.__name__))
-        with redirect_stdout(io.StringIO()):
-            func()
-
-    def initialize_options(self):
-        pass
-
-    def finalize_options(self):
-        pass
-
-    def run(self):
-        """Run command."""
-        for test in quick_tests:
-            self._run_tests_in_module(test)
-
-
-def readme():
-    with open('README.md') as f:
-        return f.read()
-
-
-def cython_extensions(*extensions):
-    from distutils.extension import Extension
-    ext = '.pyx' if USE_CYTHON else '.c'
-    result = [Extension(e, [e.replace('.', '/') + ext]) for e in extensions]
-    if USE_CYTHON:
-        from Cython.Build import cythonize
-        result = cythonize(result, language_level=3)
-    return result
-
-
-try:
-    sys.path.insert(0, os.path.abspath('doc'))
-    from version_from_git import version_number_from_git
-
-    version = version_number_from_git()
-    with open("RELEASE-VERSION", "w") as f:
-        f.write(version)
-except ImportError:
-    version = open('RELEASE-VERSION', 'r').read()
-
-setup(name='pystencils',
-      description='Speeding up stencil computations on CPUs and GPUs',
-      version=version,
-      long_description=readme(),
-      long_description_content_type="text/markdown",
-      author='Martin Bauer',
-      license='AGPLv3',
-      author_email='martin.bauer@fau.de',
-      url='https://i10git.cs.fau.de/pycodegen/pystencils/',
-      packages=['pystencils'] + ['pystencils.' + s for s in find_packages('pystencils')],
-      install_requires=['sympy>=1.1', 'numpy', 'appdirs', 'joblib'],
-      package_data={'pystencils': ['include/*.h',
-                                   'backends/cuda_known_functions.txt',
-                                   'backends/opencl1.1_known_functions.txt']},
-
-      ext_modules=cython_extensions("pystencils.boundaries.createindexlistcython"),
-      classifiers=[
-          'Development Status :: 4 - Beta',
-          'Framework :: Jupyter',
-          'Topic :: Software Development :: Code Generators',
-          'Topic :: Scientific/Engineering :: Physics',
-          'Intended Audience :: Developers',
-          'Intended Audience :: Science/Research',
-          'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
-      ],
-      project_urls={
-          "Bug Tracker": "https://i10git.cs.fau.de/pycodegen/pystencils/issues",
-          "Documentation": "http://pycodegen.pages.walberla.net/pystencils/",
-          "Source Code": "https://i10git.cs.fau.de/pycodegen/pystencils",
-      },
-      extras_require={
-          'gpu': ['pycuda'],
-          'opencl': ['pyopencl'],
-          'alltrafos': ['islpy', 'py-cpuinfo'],
-          'bench_db': ['blitzdb', 'pymongo', 'pandas'],
-          'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk'],
-          'autodiff': ['pystencils-autodiff'],
-          'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx',
-                  'sphinxcontrib-bibtex', 'sphinx_autodoc_typehints', 'pandoc'],
-      },
-      tests_require=['pytest',
-                     'pytest-cov',
-                     'pytest-html',
-                     'ansi2html',
-                     'pytest-xdist',
-                     'flake8',
-                     'nbformat',
-                     'nbconvert',
-                     'ipython'],
-
-      python_requires=">=3.6",
-      cmdclass={
-          'quicktest': SimpleTestRunner
-      },
-      )
+setup(
+    version=versioneer.get_version(),
+    cmdclass=get_cmdclass(),
+)
No results found