5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6 · 5600b6b6
--- a/tests/test_conditional_vec.py
+++ b/tests/test_conditional_vec.py
+import numpy as np
+import sympy as sp
+import pytest
+
+import pystencils as ps
+from pystencils.alignedarray import aligned_zeros
+from pystencils.astnodes import Block, Conditional, SympyAssignment
+from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets, get_vector_instruction_set
+from pystencils.enums import Target
+from pystencils.cpu.vectorization import vec_all, vec_any
+from pystencils.node_collection import NodeCollection
+
+supported_instruction_sets = get_supported_instruction_sets() if get_supported_instruction_sets() else []
+
+
+@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
+@pytest.mark.parametrize('dtype', ('float32', 'float64'))
+def test_vec_any(instruction_set, dtype):
+    if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
+        width = 4  # we don't know the actual value
+    else:
+        width = get_vector_instruction_set(dtype, instruction_set)['width']
+    data_arr = np.zeros((4 * width, 4 * width), dtype=dtype)
+
+    data_arr[3:9, 1:3 * width - 1] = 1.0
+    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
+
+    c = [
+        SympyAssignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)),
+        Conditional(vec_any(data.center() > 0.0), Block([SympyAssignment(data.center(), 2.0)]))
+    ]
+
+    assignmets = NodeCollection(c)
+    ast = ps.create_kernel(assignments=assignmets, target=ps.Target.CPU,
+                           cpu_vectorize_info={'instruction_set': instruction_set})
+    kernel = ast.compile()
+    kernel(data=data_arr)
+    if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
+        # we only know that the first value has changed
+        np.testing.assert_equal(data_arr[3:9, :3 * width - 1], 2.0)
+    else:
+        np.testing.assert_equal(data_arr[3:9, :3 * width], 2.0)
+
+
+@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
+@pytest.mark.parametrize('dtype', ('float32', 'float64'))
+def test_vec_all(instruction_set, dtype):
+    if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
+        width = 1000  # we don't know the actual value, need something guaranteed larger than vector
+    else:
+        width = get_vector_instruction_set(dtype, instruction_set)['width']
+    data_arr = np.zeros((4 * width, 4 * width), dtype=dtype)
+
+    data_arr[3:9, 1:3 * width - 1] = 1.0
+    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
+
+    c = [Conditional(vec_all(data.center() > 0.0), Block([SympyAssignment(data.center(), 2.0)]))]
+    assignmets = NodeCollection(c)
+    ast = ps.create_kernel(assignmets, target=Target.CPU,
+                           cpu_vectorize_info={'instruction_set': instruction_set})
+    kernel = ast.compile()
+    kernel(data=data_arr)
+    if instruction_set in ['sve', 'sve2', 'sme', 'rvv']:
+        # we only know that some values in the middle have been replaced
+        assert np.all(data_arr[3:9, :2] <= 1.0)
+        assert np.any(data_arr[3:9, 2:] == 2.0)
+    else:
+        np.testing.assert_equal(data_arr[3:9, :1], 0.0)
+        np.testing.assert_equal(data_arr[3:9, 1:width], 1.0)
+        np.testing.assert_equal(data_arr[3:9, width:2 * width], 2.0)
+        np.testing.assert_equal(data_arr[3:9, 2 * width:3 * width - 1], 1.0)
+        np.testing.assert_equal(data_arr[3:9, 3 * width - 1:], 0.0)
+
+
+@pytest.mark.skipif(not supported_instruction_sets, reason='cannot detect CPU instruction set')
+def test_boolean_before_loop():
+    t1, t2 = sp.symbols('t1, t2')
+    f_arr = np.ones((10, 10))
+    g_arr = np.zeros_like(f_arr)
+    f, g = ps.fields("f, g : double[2D]", f=f_arr, g=g_arr)
+
+    a = [
+        ps.Assignment(t1, t2 > 0),
+        ps.Assignment(g[0, 0],
+                      sp.Piecewise((f[0, 0], t1), (42, True)))
+    ]
+    ast = ps.create_kernel(a, cpu_vectorize_info={'instruction_set': supported_instruction_sets[-1]})
+    kernel = ast.compile()
+    kernel(f=f_arr, g=g_arr, t2=1.0)
+    # print(g)
+    np.testing.assert_array_equal(g_arr, 1.0)
+    kernel(f=f_arr, g=g_arr, t2=-1.0)
+    np.testing.assert_array_equal(g_arr, 42.0)
+
+
+@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
+@pytest.mark.parametrize('dtype', ('float32', 'float64'))
+@pytest.mark.parametrize('nontemporal', [False, True])
+@pytest.mark.parametrize('aligned', [False, True])
+def test_vec_maskstore(instruction_set, dtype, nontemporal, aligned):
+    data_arr = (aligned_zeros if aligned else np.zeros)((16, 16), dtype=dtype)
+    data_arr[3:-3, 3:-3] = 1.0
+    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)
+
+    c = [Conditional(data.center() < 1.0, Block([SympyAssignment(data.center(), 2.0)]))]
+
+    assignmets = NodeCollection(c)
+    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
+                                                       'nontemporal': nontemporal,
+                                                       'assume_aligned': aligned},
+                                   default_number_float=dtype)
+    ast = ps.create_kernel(assignmets, config=config)
+    if 'maskStore' in ast.instruction_set:
+        instruction = 'maskStream' if nontemporal and 'maskStream' in ast.instruction_set else (
+                      'maskStoreA' if aligned and 'maskStoreA' in ast.instruction_set else 'maskStore')
+        assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
+    print(ps.get_code_str(ast))
+    kernel = ast.compile()
+    kernel(data=data_arr)
+    np.testing.assert_equal(data_arr[:3, :], 2.0)
+    np.testing.assert_equal(data_arr[-3:, :], 2.0)
+    np.testing.assert_equal(data_arr[:, :3], 2.0)
+    np.testing.assert_equal(data_arr[:, -3:], 2.0)
+    np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
+
+
+@pytest.mark.parametrize('instruction_set', supported_instruction_sets)
+@pytest.mark.parametrize('dtype', ('float32', 'float64'))
+@pytest.mark.parametrize('nontemporal', [False, True])
+def test_vec_maskscatter(instruction_set, dtype, nontemporal):
+    data_arr = np.zeros((16, 16), dtype=dtype)
+    data_arr[3:-3, 3:-3] = 1.0
+    data = ps.fields(f"data: {dtype}[2D]")
+
+    c = [Conditional(data.center() < 1.0, Block([SympyAssignment(data.center(), 2.0)]))]
+
+    assignmets = NodeCollection(c)
+    config = ps.CreateKernelConfig(cpu_vectorize_info={'instruction_set': instruction_set,
+                                                       'nontemporal': nontemporal},
+                                   default_number_float=dtype)
+    if 'maskStoreS' not in get_vector_instruction_set(dtype, instruction_set) \
+            and not instruction_set.startswith('sve'):
+        with pytest.warns(UserWarning) as warn:
+            ast = ps.create_kernel(assignmets, config=config)
+            assert 'Could not vectorize loop' in warn[0].message.args[0]
+    else:
+        with pytest.warns(None) as warn:
+            ast = ps.create_kernel(assignmets, config=config)
+            assert len(warn) == 0
+        instruction = 'maskStreamS' if nontemporal and 'maskStreamS' in ast.instruction_set else 'maskStoreS'
+        assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
+    print(ps.get_code_str(ast))
+    kernel = ast.compile()
+    kernel(data=data_arr)
+    np.testing.assert_equal(data_arr[:3, :], 2.0)
+    np.testing.assert_equal(data_arr[-3:, :], 2.0)
+    np.testing.assert_equal(data_arr[:, :3], 2.0)
+    np.testing.assert_equal(data_arr[:, -3:], 2.0)
+    np.testing.assert_equal(data_arr[3:-3, 3:-3], 1.0)
--- a/tests/test_config.py
+++ b/tests/test_config.py
+from collections import defaultdict
+import numpy as np
+import pytest
+
+from pystencils import CreateKernelConfig, Target, Backend
+from pystencils.typing import BasicType
+
+
+def test_config():
+    # targets
+    config = CreateKernelConfig(target=Target.CPU)
+    assert config.target == Target.CPU
+    assert config.backend == Backend.C
+
+    config = CreateKernelConfig(target=Target.GPU)
+    assert config.target == Target.GPU
+    assert config.backend == Backend.CUDA
+
+    # typing
+    config = CreateKernelConfig(data_type=np.float64)
+    assert isinstance(config.data_type, defaultdict)
+    assert config.data_type.default_factory() == BasicType('float64')
+    assert config.default_number_float == BasicType('float64')
+    assert config.default_number_int == BasicType('int64')
+
+    config = CreateKernelConfig(data_type=np.float32)
+    assert isinstance(config.data_type, defaultdict)
+    assert config.data_type.default_factory() == BasicType('float32')
+    assert config.default_number_float == BasicType('float32')
+    assert config.default_number_int == BasicType('int64')
+
+    config = CreateKernelConfig(data_type=np.float32, default_number_float=np.float64)
+    assert isinstance(config.data_type, defaultdict)
+    assert config.data_type.default_factory() == BasicType('float32')
+    assert config.default_number_float == BasicType('float64')
+    assert config.default_number_int == BasicType('int64')
+
+    config = CreateKernelConfig(data_type=np.float32, default_number_float=np.float64, default_number_int=np.int16)
+    assert isinstance(config.data_type, defaultdict)
+    assert config.data_type.default_factory() == BasicType('float32')
+    assert config.default_number_float == BasicType('float64')
+    assert config.default_number_int == BasicType('int16')
+
+    config = CreateKernelConfig(data_type='float64')
+    assert isinstance(config.data_type, defaultdict)
+    assert config.data_type.default_factory() == BasicType('float64')
+    assert config.default_number_float == BasicType('float64')
+    assert config.default_number_int == BasicType('int64')
+
+    config = CreateKernelConfig(data_type={'a': np.float64, 'b': np.float32})
+    assert isinstance(config.data_type, defaultdict)
+    assert config.data_type.default_factory() == BasicType('float64')
+    assert config.default_number_float == BasicType('float64')
+    assert config.default_number_int == BasicType('int64')
+
+    config = CreateKernelConfig(data_type={'a': np.float32, 'b': np.int32})
+    assert isinstance(config.data_type, defaultdict)
+    assert config.data_type.default_factory() == BasicType('float32')
+    assert config.default_number_float == BasicType('float32')
+    assert config.default_number_int == BasicType('int64')
+
+
+def test_config_target_as_string():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(target='cpu')
+
+
+def test_config_backend_as_string():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(backend='C')
+
+
+def test_config_python_types():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(data_type=float)
+
+
+def test_config_python_types2():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(data_type={'a': float})
+
+
+def test_config_python_types3():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(default_number_float=float)
+
+
+def test_config_python_types4():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(default_number_int=int)
+
+
+def test_config_python_types5():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(data_type="float")
+
+
+def test_config_python_types6():
+    with pytest.raises(ValueError):
+        CreateKernelConfig(default_number_float="float")
+
+
+def test_config_python_types7():
+    dtype = defaultdict(lambda: 'float', {'a': np.float64, 'b': np.int64})
+    with pytest.raises(ValueError):
+        CreateKernelConfig(data_type=dtype)
+
+
+def test_config_python_types8():
+    dtype = defaultdict(lambda: float, {'a': np.float64, 'b': np.int64})
+    with pytest.raises(ValueError):
+        CreateKernelConfig(data_type=dtype)
+
+
+def test_config_python_types9():
+    dtype = defaultdict(lambda: 'float32', {'a': 'float', 'b': np.int64})
+    with pytest.raises(ValueError):
+        CreateKernelConfig(data_type=dtype)
+
+
+def test_config_python_types10():
+    dtype = defaultdict(lambda: 'float32', {'a': float, 'b': np.int64})
+    with pytest.raises(ValueError):
+        CreateKernelConfig(data_type=dtype)
--- a/tests/test_create_kernel_config.py
+++ b/tests/test_create_kernel_config.py
+import numpy as np
+import sympy as sp
+import pystencils as ps
+import pystencils.config
+
+
+def test_create_kernel_config():
+    c = pystencils.config.CreateKernelConfig()
+    assert c.backend == ps.Backend.C
+    assert c.target == ps.Target.CPU
+
+    c = pystencils.config.CreateKernelConfig(target=ps.Target.GPU)
+    assert c.backend == ps.Backend.CUDA
+
+    c = pystencils.config.CreateKernelConfig(backend=ps.Backend.CUDA)
+    assert c.target == ps.Target.CPU
+    assert c.backend == ps.Backend.CUDA
+
+
+def test_kernel_decorator_config():
+    config = pystencils.config.CreateKernelConfig()
+    a, b, c = ps.fields(a=np.ones(100), b=np.ones(100), c=np.ones(100))
+
+    @ps.kernel_config(config)
+    def test():
+        a[0] @= b[0] + c[0]
+
+    ps.create_kernel(**test)
+
+
+def test_kernel_decorator2():
+    h = sp.symbols("h")
+    dtype = "float64"
+
+    src, dst = ps.fields(f"src, src_tmp: {dtype}[3D]")
+
+    @ps.kernel
+    def kernel_func():
+        dst[0, 0, 0] @= (src[1, 0, 0] + src[-1, 0, 0]
+                         + src[0, 1, 0] + src[0, -1, 0]
+                         + src[0, 0, 1] + src[0, 0, -1]) / (6 * h ** 2)
+
+    # assignments = ps.assignment_from_stencil(stencil, src, dst, normalization_factor=2)
+    ast = ps.create_kernel(kernel_func)
+
+    code = ps.get_code_str(ast)
--- a/pystencils_tests/test_custom_backends.py
+++ b/pystencils_tests/test_custom_backends.py
 from subprocess import CalledProcessError

-import pycuda.driver
 import pytest
-import sympy

 import pystencils
 import pystencils.cpu.cpujit
-import pystencils.gpucuda.cudajit
 from pystencils.backends.cbackend import CBackend
 from pystencils.backends.cuda_backend import CudaBackend
+from pystencils.enums import Target


 class ScreamingBackend(CBackend):
@@ -25,26 +23,29 @@ class ScreamingGpuBackend(CudaBackend):
        return normal_code.upper()


-def test_custom_backends():
-    z, x, y = pystencils.fields("z, y, x: [2d]")
+def test_custom_backends_cpu():
+    z, y, x = pystencils.fields("z, y, x: [2d]")

    normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
-        z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], [])
+        z[0, 0], x[0, 0] * x[0, 0] * y[0, 0])], [])

-    ast = pystencils.create_kernel(normal_assignments, target='cpu')
-    print(pystencils.show_code(ast, ScreamingBackend()))
+    ast = pystencils.create_kernel(normal_assignments, target=Target.CPU)
+    pystencils.show_code(ast, ScreamingBackend())
    with pytest.raises(CalledProcessError):
        pystencils.cpu.cpujit.make_python_function(ast, custom_backend=ScreamingBackend())

-    ast = pystencils.create_kernel(normal_assignments, target='gpu')
-    print(pystencils.show_code(ast, ScreamingGpuBackend()))
-    with pytest.raises(pycuda.driver.CompileError):
-        pystencils.gpucuda.cudajit.make_python_function(ast, custom_backend=ScreamingGpuBackend())

+def test_custom_backends_gpu():
+    pytest.importorskip('cupy')
+    import cupy
+    import pystencils.gpu.gpujit

-def main():
-    test_custom_backends()
+    z, x, y = pystencils.fields("z, y, x: [2d]")

+    normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
+        z[0, 0], x[0, 0] * x[0, 0] * y[0, 0])], [])

-if __name__ == "__main__":
-    main()
+    ast = pystencils.create_kernel(normal_assignments, target=Target.GPU)
+    pystencils.show_code(ast, ScreamingGpuBackend())
+    with pytest.raises((cupy.cuda.compiler.JitifyException, cupy.cuda.compiler.CompileException)):
+        pystencils.gpu.gpujit.make_python_function(ast, custom_backend=ScreamingGpuBackend())
--- a/tests/test_data/datahandling_load_test.npz
+++ b/tests/test_data/datahandling_load_test.npz
--- a/tests/test_data/datahandling_parallel_load_test/dst.dat
+++ b/tests/test_data/datahandling_parallel_load_test/dst.dat
--- a/tests/test_data/datahandling_parallel_load_test/src.dat
+++ b/tests/test_data/datahandling_parallel_load_test/src.dat
--- a/tests/test_data/datahandling_parallel_save_test/dst.dat
+++ b/tests/test_data/datahandling_parallel_save_test/dst.dat
--- a/tests/test_data/datahandling_parallel_save_test/src.dat
+++ b/tests/test_data/datahandling_parallel_save_test/src.dat
--- a/tests/test_data/datahandling_save_test.npz
+++ b/tests/test_data/datahandling_save_test.npz
--- a/tests/test_data/lenna.png
+++ b/tests/test_data/lenna.png
--- a/tests/test_data/test_vessel2d_mask.png
+++ b/tests/test_data/test_vessel2d_mask.png
--- a/pystencils_tests/test_datahandling.py
+++ b/pystencils_tests/test_datahandling.py
 import os
 from tempfile import TemporaryDirectory
+from pathlib import Path

 import numpy as np

 import pystencils as ps
 from pystencils import create_data_handling, create_kernel
+from pystencils.gpu.gpu_array_handler import GPUArrayHandler
+from pystencils.enums import Target
+
+try:
+    import pytest
+except ImportError:
+    import unittest.mock
+    pytest = unittest.mock.MagicMock()
+
+try:
+    import cupy.cuda.runtime
+    device_numbers = range(cupy.cuda.runtime.getDeviceCount())
+except ImportError:
+    device_numbers = []
+
+SCRIPT_FOLDER = Path(__file__).parent.absolute()
+INPUT_FOLDER = SCRIPT_FOLDER / "test_data"


 def basic_iteration(dh):
@@ -17,7 +35,7 @@ def basic_iteration(dh):


 def access_and_gather(dh, domain_size):
-    dh.add_array('f1', dtype=np.dtype(np.int32))
+    dh.add_array('f1', dtype=np.dtype(np.int8))
    dh.add_array_like('f2', 'f1')
    dh.add_array('v1', values_per_cell=3, dtype=np.int64, ghost_layers=2)
    dh.add_array_like('v2', 'v1')
@@ -28,7 +46,7 @@ def access_and_gather(dh, domain_size):
    # Check symbolic field properties
    assert dh.fields.f1.index_dimensions == 0
    assert dh.fields.f1.spatial_dimensions == len(domain_size)
-    assert dh.fields.f1.dtype.numpy_dtype == np.int32
+    assert dh.fields.f1.dtype.numpy_dtype == np.int8

    assert dh.fields.v1.index_dimensions == 1
    assert dh.fields.v1.spatial_dimensions == len(domain_size)
@@ -73,14 +91,10 @@ def access_and_gather(dh, domain_size):
 def synchronization(dh, test_gpu=False):
    field_name = 'comm_field_test'
    if test_gpu:
-        try:
-            from pycuda import driver
-            import pycuda.autoinit
-        except ImportError:
-            return
+        pytest.importorskip("cupy")
        field_name += 'Gpu'

-    dh.add_array(field_name, ghost_layers=1, dtype=np.int32, cpu=True, gpu=test_gpu)
+    dh.add_array(field_name, ghost_layers=1, dtype=np.int8, cpu=True, gpu=test_gpu)

    # initialize everything with 1
    for b in dh.iterate(ghost_layers=1):
@@ -90,8 +104,10 @@ def synchronization(dh, test_gpu=False):

    if test_gpu:
        dh.to_gpu(field_name)
+        dh.synchronization_function_gpu(field_name)()
+    else:
+        dh.synchronization_function_cpu(field_name)()

-    dh.synchronization_function(field_name, target='gpu' if test_gpu else 'cpu')()

    if test_gpu:
        dh.to_cpu(field_name)
@@ -100,17 +116,16 @@ def synchronization(dh, test_gpu=False):
        np.testing.assert_equal(42, b[field_name])


-def kernel_execution_jacobi(dh, test_gpu=False):
-    if test_gpu:
-        try:
-            from pycuda import driver
-            import pycuda.autoinit
-        except ImportError:
-            print("Skipping kernel_execution_jacobi GPU version, because pycuda not available")
-            return
+def kernel_execution_jacobi(dh, target):

+    test_gpu = target == Target.GPU
    dh.add_array('f', gpu=test_gpu)
    dh.add_array('tmp', gpu=test_gpu)
+
+    if test_gpu:
+        assert dh.is_on_gpu('f')
+        assert dh.is_on_gpu('tmp')
+
    stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
    stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
    stencil = stencil_2d if dh.dim == 2 else stencil_3d
@@ -119,7 +134,7 @@ def kernel_execution_jacobi(dh, test_gpu=False):
    def jacobi():
        dh.fields.tmp.center @= sum(dh.fields.f.neighbors(stencil)) / len(stencil)

-    kernel = create_kernel(jacobi, target='gpu' if test_gpu else 'cpu').compile()
+    kernel = create_kernel(jacobi, config=ps.CreateKernelConfig(target=target)).compile()
    for b in dh.iterate(ghost_layers=1):
        b['f'].fill(42)
    dh.run_kernel(kernel)
@@ -128,6 +143,7 @@ def kernel_execution_jacobi(dh, test_gpu=False):


 def vtk_output(dh):
+    pytest.importorskip('pyevtk')
    dh.add_array('scalar_field')
    dh.add_array('vector_field', values_per_cell=dh.dim)
    dh.add_array('multiple_scalar_field', values_per_cell=9)
@@ -196,18 +212,200 @@ def test_access_and_gather():
 def test_kernel():
    for domain_shape in [(4, 5), (3, 4, 5)]:
        dh = create_data_handling(domain_size=domain_shape, periodicity=True)
-        kernel_execution_jacobi(dh, test_gpu=True)
+        assert all(dh.periodicity)
+        kernel_execution_jacobi(dh, Target.CPU)
        reduction(dh)

        try:
-            import pycuda
+            import cupy
            dh = create_data_handling(domain_size=domain_shape, periodicity=True)
-            kernel_execution_jacobi(dh, test_gpu=False)
+            kernel_execution_jacobi(dh, Target.GPU)
        except ImportError:
            pass


+@pytest.mark.parametrize('target', (Target.CPU, Target.GPU))
+def test_kernel_param(target):
+    for domain_shape in [(4, 5), (3, 4, 5)]:
+        if target == Target.GPU:
+            pytest.importorskip('cupy')
+
+        dh = create_data_handling(domain_size=domain_shape, periodicity=True, default_target=target)
+        kernel_execution_jacobi(dh, target)
+        reduction(dh)
+
+
 def test_vtk_output():
+    pytest.importorskip('pyevtk')
    for domain_shape in [(4, 5), (3, 4, 5)]:
        dh = create_data_handling(domain_size=domain_shape, periodicity=True)
        vtk_output(dh)
+
+
+def test_add_arrays():
+    domain_shape = (3, 4, 5)
+    field_description = 'x, y(9)'
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=0, default_layout='numpy')
+    x_, y_ = dh.add_arrays(field_description)
+
+    x, y = ps.fields(field_description + ': [3,4,5]')
+
+    assert x_ == x
+    assert y_ == y
+    assert x == dh.fields['x']
+    assert y == dh.fields['y']
+
+
+@pytest.mark.parametrize('shape', [(17, 12), (7, 11, 18)])
+@pytest.mark.parametrize('layout', ['zyxf', 'fzyx'])
+def test_add_arrays_with_layout(shape, layout):
+    pytest.importorskip('cupy')
+
+    dh = create_data_handling(domain_size=shape, default_layout=layout, default_target=ps.Target.GPU)
+    f1 = dh.add_array("f1", values_per_cell=19)
+    dh.fill(f1.name, 1.0)
+
+    assert dh.cpu_arrays[f1.name].shape == dh.gpu_arrays[f1.name].shape
+    assert dh.cpu_arrays[f1.name].strides == dh.gpu_arrays[f1.name].strides
+    assert dh.cpu_arrays[f1.name].dtype == dh.gpu_arrays[f1.name].dtype
+
+
+def test_get_kwarg():
+    domain_shape = (10, 10)
+    field_description = 'src, dst'
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    src, dst = dh.add_arrays(field_description)
+    dh.fill("src", 1.0, ghost_layers=True)
+    dh.fill("dst", 0.0, ghost_layers=True)
+
+    with pytest.raises(ValueError):
+        dh.add_array('src', values_per_cell=1)
+
+    ur = ps.Assignment(src.center, dst.center)
+    kernel = ps.create_kernel(ur).compile()
+
+    kw = dh.get_kernel_kwargs(kernel)
+    assert np.all(kw[0]['src'] == dh.cpu_arrays['src'])
+    assert np.all(kw[0]['dst'] == dh.cpu_arrays['dst'])
+
+
+def test_add_custom_data():
+    pytest.importorskip('cupy')
+    import cupy as cp
+
+    def cpu_data_create_func():
+        return np.ones((2, 2), dtype=np.float64)
+
+    def gpu_data_create_func():
+        return cp.zeros((2, 2), dtype=np.float64)
+
+    def cpu_to_gpu_transfer_func(gpuarr, cpuarray):
+        gpuarr.set(cpuarray)
+
+    def gpu_to_cpu_transfer_func(gpuarr, cpuarray):
+        cpuarray[:] = gpuarr.get()
+
+    dh = create_data_handling(domain_size=(10, 10))
+    dh.add_custom_data('custom_data',
+                       cpu_data_create_func,
+                       gpu_data_create_func,
+                       cpu_to_gpu_transfer_func,
+                       gpu_to_cpu_transfer_func)
+
+    assert np.all(dh.custom_data_cpu['custom_data'] == 1)
+    assert np.all(dh.custom_data_gpu['custom_data'].get() == 0)
+
+    dh.to_cpu(name='custom_data')
+    dh.to_gpu(name='custom_data')
+
+    assert 'custom_data' in dh.custom_data_names
+
+
+def test_log():
+    dh = create_data_handling(domain_size=(10, 10))
+    dh.log_on_root()
+    assert dh.is_root
+    assert dh.world_rank == 0
+
+
+def test_save_data():
+    domain_shape = (2, 2)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 1.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 1.0, ghost_layers=True)
+
+    dh.save_all(str(INPUT_FOLDER) + '/datahandling_save_test')
+
+
+def test_load_data():
+    domain_shape = (2, 2)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 0.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 0.0, ghost_layers=True)
+
+    dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
+    assert np.all(dh.cpu_arrays['src']) == 1
+    assert np.all(dh.cpu_arrays['dst']) == 1
+
+    domain_shape = (3, 3)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 0.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 0.0, ghost_layers=True)
+    dh.add_array("dst2", values_per_cell=9)
+    dh.fill("dst2", 0.0, ghost_layers=True)
+
+    dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
+    assert np.all(dh.cpu_arrays['src']) == 0
+    assert np.all(dh.cpu_arrays['dst']) == 0
+    assert np.all(dh.cpu_arrays['dst2']) == 0
+
+
+@pytest.mark.parametrize("device_number", device_numbers)
+def test_array_handler(device_number):
+    size = (2, 2)
+    pytest.importorskip('cupy')
+    array_handler = GPUArrayHandler(device_number)
+
+    zero_array = array_handler.zeros(size)
+    cpu_array = np.empty(size)
+    array_handler.download(zero_array, cpu_array)
+    assert np.all(cpu_array) == 0
+
+    ones_array = array_handler.ones(size)
+    cpu_array = np.empty(size)
+    array_handler.download(ones_array, cpu_array)
+    assert np.all(cpu_array) == 1
+
+    empty = array_handler.empty(size)
+    assert empty.strides == (16, 8)
+    empty = array_handler.empty(shape=size, order="F")
+    assert empty.strides == (8, 16)
+
+    random_array = array_handler.randn(size)
+
+    cpu_array = np.empty((20, 40), dtype=np.float64)
+    gpu_array = array_handler.to_gpu(cpu_array)
+
+    assert cpu_array.base is None
+    assert gpu_array.base is None
+    assert gpu_array.strides == cpu_array.strides
+
+    cpu_array2 = np.empty((20, 40), dtype=np.float64)
+    cpu_array2 = cpu_array2.swapaxes(0, 1)
+    gpu_array2 = array_handler.to_gpu(cpu_array2)
+
+    assert cpu_array2.base is not None
+    assert gpu_array2.base is not None
+    assert gpu_array2.strides == cpu_array2.strides
+
--- a/tests/test_datahandling_parallel.py
+++ b/tests/test_datahandling_parallel.py
+import numpy as np
+import waLBerla as wlb
+
+import pystencils
+from pystencils import make_slice
+
+from pathlib import Path
+
+from pystencils.boundaries import BoundaryHandling, Neumann
+from pystencils.slicing import slice_from_direction
+
+from pystencils.datahandling.parallel_datahandling import ParallelDataHandling
+from pystencils.datahandling import create_data_handling
+from tests.test_datahandling import (
+    access_and_gather, kernel_execution_jacobi, reduction, synchronization, vtk_output)
+
+SCRIPT_FOLDER = Path(__file__).parent.absolute()
+INPUT_FOLDER = SCRIPT_FOLDER / "test_data"
+
+try:
+    import pytest
+except ImportError:
+    import unittest.mock
+    pytest = unittest.mock.MagicMock()
+
+
+def test_access_and_gather():
+    block_size = (4, 7, 1)
+    num_blocks = (3, 2, 1)
+    cells = tuple(a * b for a, b in zip(block_size, num_blocks))
+    blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False,
+                                        periodic=(1, 1, 1))
+    dh = ParallelDataHandling(blocks, default_ghost_layers=2)
+    access_and_gather(dh, cells)
+    synchronization(dh, test_gpu=False)
+    if hasattr(wlb, 'gpu'):
+        synchronization(dh, test_gpu=True)
+
+
+def test_gpu():
+    pytest.importorskip('waLBerla.gpu')
+
+    block_size = (4, 7, 1)
+    num_blocks = (3, 2, 1)
+    blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks, default_ghost_layers=2)
+    dh.add_array('v', values_per_cell=3, dtype=np.int64, ghost_layers=2, gpu=True)
+
+    for b in dh.iterate():
+        b['v'].fill(42)
+    dh.all_to_gpu()
+    for b in dh.iterate():
+        b['v'].fill(0)
+    dh.to_cpu('v')
+    for b in dh.iterate():
+        np.testing.assert_equal(b['v'], 42)
+
+
+@pytest.mark.parametrize('target', (pystencils.Target.CPU, pystencils.Target.GPU))
+def test_kernel(target):
+    if target == pystencils.Target.GPU:
+        pytest.importorskip('waLBerla.gpu')
+
+    # 3D
+    blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks, default_target=target)
+    kernel_execution_jacobi(dh, target)
+    reduction(dh)
+
+    # 2D
+    blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 1), cellsPerBlock=(3, 2, 1), oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks, dim=2, default_target=target)
+    kernel_execution_jacobi(dh, target)
+    reduction(dh)
+
+
+def test_vtk_output():
+    blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks)
+    vtk_output(dh)
+
+
+def test_block_iteration():
+    block_size = (16, 16, 16)
+    num_blocks = (2, 2, 2)
+    blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks, default_ghost_layers=2)
+    dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2)
+
+    for b in dh.iterate():
+        b['v'].fill(1)
+
+    s = 0
+    for b in dh.iterate():
+        s += np.sum(b['v'])
+
+    assert s == 40*40*40
+
+    sl = make_slice[0:18, 0:18, 0:18]
+    for b in dh.iterate(slice_obj=sl):
+        b['v'].fill(0)
+
+    s = 0
+    for b in dh.iterate():
+        s += np.sum(b['v'])
+
+    assert s == 40*40*40 - 20*20*20
+
+
+def test_getter_setter():
+    pytest.importorskip('waLBerla.gpu')
+
+    block_size = (2, 2, 2)
+    num_blocks = (2, 2, 2)
+    blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks, default_ghost_layers=2, default_target=pystencils.Target.GPU)
+    dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
+
+    assert dh.shape == (4, 4, 4)
+    assert dh.periodicity == (False, False, False)
+    assert dh.values_per_cell('v') == 1
+    assert dh.has_data('v') is True
+    assert 'v' in dh.array_names
+    dh.log_on_root()
+    assert dh.is_root is True
+    assert dh.world_rank == 0
+
+    dh.to_gpu('v')
+    assert dh.is_on_gpu('v') is True
+    dh.all_to_cpu()
+
+
+def test_parallel_datahandling_boundary_conditions():
+    pytest.importorskip('waLBerla.gpu')
+
+    dh = create_data_handling(domain_size=(7, 7), periodicity=True, parallel=True,
+                              default_target=pystencils.Target.GPU)
+
+    src = dh.add_array('src', values_per_cell=1)
+    dh.fill(src.name, 0.0, ghost_layers=True)
+    dh.fill(src.name, 1.0, ghost_layers=False)
+
+    src2 = dh.add_array('src2', values_per_cell=1)
+
+    src_cpu = dh.add_array('src_cpu', values_per_cell=1, gpu=False)
+    dh.fill(src_cpu.name, 0.0, ghost_layers=True)
+    dh.fill(src_cpu.name, 1.0, ghost_layers=False)
+
+    boundary_stencil = [(1, 0), (-1, 0), (0, 1), (0, -1)]
+    boundary_handling_cpu = BoundaryHandling(dh, src_cpu.name, boundary_stencil,
+                                             name="boundary_handling_cpu", target=pystencils.Target.CPU)
+
+    boundary_handling = BoundaryHandling(dh, src.name, boundary_stencil,
+                                         name="boundary_handling_gpu", target=pystencils.Target.GPU)
+
+    neumann = Neumann()
+    for d in ('N', 'S', 'W', 'E'):
+        boundary_handling.set_boundary(neumann, slice_from_direction(d, dim=2))
+        boundary_handling_cpu.set_boundary(neumann, slice_from_direction(d, dim=2))
+
+    boundary_handling.prepare()
+    boundary_handling_cpu.prepare()
+
+    boundary_handling_cpu()
+
+    dh.all_to_gpu()
+    boundary_handling()
+    dh.all_to_cpu()
+    for block in dh.iterate():
+        np.testing.assert_almost_equal(block[src_cpu.name], block[src.name])
+
+    assert dh.custom_data_names == ('boundary_handling_cpuIndexArrays', 'boundary_handling_gpuIndexArrays')
+    dh.swap(src.name, src2.name, gpu=True)
+
+
+def test_save_data():
+    domain_shape = (2, 2)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1, parallel=True)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 1.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 1.0, ghost_layers=True)
+
+    dh.save_all(str(INPUT_FOLDER) + '/datahandling_parallel_save_test')
+
+
+def test_load_data():
+    domain_shape = (2, 2)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1, parallel=True)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 0.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 0.0, ghost_layers=True)
+
+    dh.load_all(str(INPUT_FOLDER) + '/datahandling_parallel_load_test')
+    assert np.all(dh.gather_array('src')) == 1
+    assert np.all(dh.gather_array('src')) == 1
--- a/pystencils_tests/test_derivative.py
+++ b/pystencils_tests/test_derivative.py
--- a/tests/test_dot_printer.ipynb
+++ b/tests/test_dot_printer.ipynb
+%% Cell type:code id: tags:
+
+``` python
+from pystencils.session import *
+from pystencils.astnodes import Block, Conditional, SympyAssignment
+```
+
+%% Cell type:code id: tags:
+
+``` python
+src, dst = ps.fields("src, dst: double[2D]", layout='c')
+
+true_block = Block([SympyAssignment(dst[0, 0], src[-1, 0])])
+false_block = Block([SympyAssignment(dst[0, 0], src[1, 0])])
+ur = [true_block, Conditional(dst.center() > 0.0, true_block, false_block)]
+
+ast = ps.create_kernel(ur)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+ps.show_code(ast)
+```
+
+%% Output
+
+
+
+%% Cell type:code id: tags:
+
+``` python
+```
+%% Cell type:code id: tags:
+
+``` python
+from pystencils.session import *
+from pystencils.astnodes import Block, Conditional, SympyAssignment
+```
+
+%% Cell type:code id: tags:
+
+``` python
+src, dst = ps.fields("src, dst: double[2D]", layout='c')
+
+true_block = Block([SympyAssignment(dst[0, 0], src[-1, 0])])
+false_block = Block([SympyAssignment(dst[0, 0], src[1, 0])])
+ur = [true_block, Conditional(dst.center() > 0.0, true_block, false_block)]
+
+ast = ps.create_kernel(ur)
+```
+
+%% Cell type:code id: tags:
+
+``` python
+ps.show_code(ast)
+```
+
+%% Output
+
+
+
+%% Cell type:code id: tags:
+
+``` python
+```
--- a/tests/test_dot_printer.py
+++ b/tests/test_dot_printer.py
+import pystencils as ps
+
+from pystencils.astnodes import Block, Conditional, SympyAssignment
+
+
+def test_dot_print():
+    src, dst = ps.fields("src, dst: double[2D]", layout='c')
+
+    true_block = Block([SympyAssignment(dst[0, 0], src[-1, 0])])
+    false_block = Block([SympyAssignment(dst[0, 0], src[1, 0])])
+    ur = [true_block, Conditional(dst.center() > 0.0, true_block, false_block)]
+
+    ast = ps.create_kernel(ur)
--- a/pystencils_tests/test_dtype_check.py
+++ b/pystencils_tests/test_dtype_check.py
--- a/pystencils_tests/test_fast_approximation.py
+++ b/pystencils_tests/test_fast_approximation.py
+import pytest
 import sympy as sp

 import pystencils as ps
@@ -6,13 +7,15 @@ from pystencils.fast_approximation import (


 def test_fast_sqrt():
+    pytest.importorskip('cupy')
    f, g = ps.fields("f, g: double[2D]")
    expr = sp.sqrt(f[0, 0] + f[1, 0])

    assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1
    assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1
-    ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
-    code_str = str(ps.show_code(ast))
+    ast_gpu = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target=ps.Target.GPU)
+    ast_gpu.compile()
+    code_str = ps.get_code_str(ast_gpu)
    assert '__fsqrt_rn' in code_str

    expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
@@ -20,12 +23,14 @@ def test_fast_sqrt():

    ac = ps.AssignmentCollection([expr], [])
    assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1
-    ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
-    code_str = str(ps.show_code(ast))
+    ast_gpu = ps.create_kernel(insert_fast_sqrts(ac), target=ps.Target.GPU)
+    ast_gpu.compile()
+    code_str = ps.get_code_str(ast_gpu)
    assert '__frsqrt_rn' in code_str


 def test_fast_divisions():
+    pytest.importorskip('cupy')
    f, g = ps.fields("f, g: double[2D]")
    expr = f[0, 0] / f[1, 0]
    assert len(insert_fast_divisions(expr).atoms(fast_division)) == 1
@@ -33,6 +38,7 @@ def test_fast_divisions():
    expr = 1 / f[0, 0] * 2 / f[0, 1]
    assert len(insert_fast_divisions(expr).atoms(fast_division)) == 1

-    ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_divisions(expr)), target='gpu')
-    code_str = str(ps.show_code(ast))
+    ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_divisions(expr)), target=ps.Target.GPU)
+    ast.compile()
+    code_str = ps.get_code_str(ast)
    assert '__fdividef' in code_str
--- a/tests/test_fd_derivation.ipynb
+++ b/tests/test_fd_derivation.ipynb
+%% Cell type:code id: tags:
+
+``` python
+from pystencils.session import *
+from pystencils.fd.derivation import *
+```
+
+%% Cell type:markdown id: tags:
+
+# 2D standard stencils
+
+%% Cell type:code id: tags:
+
+``` python
+stencil = [(-1, 0), (1, 0), (0, -1), (0, 1), (0, 0)]
+standard_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
+f = ps.fields("f: [2D]")
+standard_2d_00_res = standard_2d_00.get_stencil()
+res = standard_2d_00_res.apply(f.center)
+expected = f[-1, 0] - 2 * f[0, 0] + f[1, 0]
+assert res == expected
+```
+
+%% Cell type:code id: tags:
+
+``` python
+assert standard_2d_00_res.accuracy == 2
+assert not standard_2d_00_res.is_isotropic
+standard_2d_00_res
+```
+
+%% Output
+
+    Finite difference stencil of accuracy 2, isotropic error: False
+
+%% Cell type:code id: tags:
+
+``` python
+standard_2d_00.get_stencil().as_array()
+```
+
+%% Output
+
+    $\displaystyle \left[\begin{matrix}0 & 0 & 0\\1 & -2 & 1\\0 & 0 & 0\end{matrix}\right]$
+    [[0, 0, 0], [1, -2, 1], [0, 0, 0]]
+
+%% Cell type:markdown id: tags:
+
+# 2D isotropic stencils
+
+## second x-derivative
+
+%% Cell type:code id: tags:
+
+``` python
+stencil = [(i, j) for i in (-1, 0, 1) for j in (-1, 0, 1)]
+isotropic_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
+isotropic_2d_00_res = isotropic_2d_00.get_stencil(isotropic=True)
+assert isotropic_2d_00_res.is_isotropic
+assert isotropic_2d_00_res.accuracy == 2
+isotropic_2d_00_res
+```
+
+%% Output
+
+    Finite difference stencil of accuracy 2, isotropic error: True
+
+%% Cell type:code id: tags:
+
+``` python
+isotropic_2d_00_res.as_array()
+```
+
+%% Output
+
+    $\displaystyle \left[\begin{matrix}\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\\\frac{5}{6} & - \frac{5}{3} & \frac{5}{6}\\\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\end{matrix}\right]$
+    [[1/12, -1/6, 1/12], [5/6, -5/3, 5/6], [1/12, -1/6, 1/12]]
+
+%% Cell type:code id: tags:
+
+``` python
+plt.figure(figsize=(2,2))
+isotropic_2d_00_res.visualize()
+```
+
+%% Output
+
+
+
+%% Cell type:code id: tags:
+
+``` python
+expected_result = sp.Array([[1, -2, 1], [10, -20, 10], [1, -2, 1]]) / 12
+assert expected_result == isotropic_2d_00_res.as_array()
+```
+
+%% Cell type:markdown id: tags:
+
+## Isotropic laplacian
+
+%% Cell type:code id: tags:
+
+``` python
+isotropic_2d_11 = FiniteDifferenceStencilDerivation((1,1), stencil)
+isotropic_2d_11_res = isotropic_2d_11.get_stencil(isotropic=True)
+iso_laplacian = isotropic_2d_00_res.as_array() + isotropic_2d_11_res.as_array()
+iso_laplacian
+```
+
+%% Output
+
+    $\displaystyle \left[\begin{matrix}\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\\\frac{2}{3} & - \frac{10}{3} & \frac{2}{3}\\\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\end{matrix}\right]$
+    [[1/6, 2/3, 1/6], [2/3, -10/3, 2/3], [1/6, 2/3, 1/6]]
+
+%% Cell type:code id: tags:
+
+``` python
+expected_result = sp.Array([[1, 4, 1], [4, -20, 4], [1, 4, 1]]) / 6
+assert iso_laplacian == expected_result
+```
+
+%% Cell type:markdown id: tags:
+
+# stencils for staggered fields
+
+%% Cell type:code id: tags:
+
+``` python
+half = sp.Rational(1, 2)
+
+fd_points_ex = (
+    (half, 0),
+    (-half, 0),
+    (half, 1),
+    (half, -1),
+    (-half, 1),
+    (-half, -1)
+)
+assert set(fd_points_ex) == set(FiniteDifferenceStaggeredStencilDerivation("E", 2).stencil)
+
+fd_points_ey = (
+    (0, half),
+    (0, -half),
+    (-1,-half),
+    (-1, half),
+    (1, -half),
+    (1, half)
+)
+assert set(fd_points_ey) == set(FiniteDifferenceStaggeredStencilDerivation("N",2).stencil)
+
+fd_points_c = (
+    (half, half),
+    (-half, -half),
+    (half, -half),
+    (-half, half)
+)
+assert set(fd_points_c) ==  set(FiniteDifferenceStaggeredStencilDerivation("NE",2).stencil)
+
+assert len(FiniteDifferenceStaggeredStencilDerivation("E",3).points) == 10
+assert len(FiniteDifferenceStaggeredStencilDerivation("NE",3).points) == 12
+assert len(FiniteDifferenceStaggeredStencilDerivation("TNE",3).points) == 8
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c = ps.fields("c: [2D]")
+c3 = ps.fields("c3: [3D]")
+
+assert FiniteDifferenceStaggeredStencilDerivation("E", 2, (0,)).apply(c.center) == c[1, 0] - c[0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("W", 2, (0,)).apply(c.center) == c[0, 0] - c[-1, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("N", 2, (1,)).apply(c.center) == c[0, 1] - c[0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("S", 2, (1,)).apply(c.center) == c[0, 0] - c[0, -1]
+
+assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(c3.center) == c3[1, 0, 0] - c3[0, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("W", 3, (0,)).apply(c3.center) == c3[0, 0, 0] - c3[-1, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("N", 3, (1,)).apply(c3.center) == c3[0, 1, 0] - c3[0, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("S", 3, (1,)).apply(c3.center) == c3[0, 0, 0] - c3[0, -1, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("T", 3, (2,)).apply(c3.center) == c3[0, 0, 1] - c3[0, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("B", 3, (2,)).apply(c3.center) == c3[0, 0, 0] - c3[0, 0, -1]
+
+assert FiniteDifferenceStaggeredStencilDerivation("S", 2, (0,)).apply(c.center) == \
+       (c[1, 0] + c[1, -1] - c[-1, 0] - c[-1, -1])/4
+
+assert FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0,)).apply(c.center) + \
+       FiniteDifferenceStaggeredStencilDerivation("NE", 2, (1,)).apply(c.center) == c[1, 1] - c[0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("NE", 3, (0,)).apply(c3.center) + \
+       FiniteDifferenceStaggeredStencilDerivation("NE", 3, (1,)).apply(c3.center) == c3[1, 1, 0] - c3[0, 0, 0]
+```
+
+%% Cell type:code id: tags:
+
+``` python
+d = FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0, 1))
+assert d.apply(c.center) == c[0,0] + c[1,1] - c[1,0] - c[0,1]
+d.visualize()
+```
+
+%% Output
+
+
+
+%% Cell type:code id: tags:
+
+``` python
+v3 = ps.fields("v(3): [3D]")
+for i in range(*v3.index_shape):
+    assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(v3.center_vector[i]) == \
+           v3[1,0,0](i) - v3[0,0,0](i)
+```
+%% Cell type:code id: tags:
+
+``` python
+from pystencils.session import *
+from pystencils.fd.derivation import *
+```
+
+%% Cell type:markdown id: tags:
+
+# 2D standard stencils
+
+%% Cell type:code id: tags:
+
+``` python
+stencil = [(-1, 0), (1, 0), (0, -1), (0, 1), (0, 0)]
+standard_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
+f = ps.fields("f: [2D]")
+standard_2d_00_res = standard_2d_00.get_stencil()
+res = standard_2d_00_res.apply(f.center)
+expected = f[-1, 0] - 2 * f[0, 0] + f[1, 0]
+assert res == expected
+```
+
+%% Cell type:code id: tags:
+
+``` python
+assert standard_2d_00_res.accuracy == 2
+assert not standard_2d_00_res.is_isotropic
+standard_2d_00_res
+```
+
+%% Output
+
+    Finite difference stencil of accuracy 2, isotropic error: False
+
+%% Cell type:code id: tags:
+
+``` python
+standard_2d_00.get_stencil().as_array()
+```
+
+%% Output
+
+    $\displaystyle \left[\begin{matrix}0 & 0 & 0\\1 & -2 & 1\\0 & 0 & 0\end{matrix}\right]$
+    [[0, 0, 0], [1, -2, 1], [0, 0, 0]]
+
+%% Cell type:markdown id: tags:
+
+# 2D isotropic stencils
+
+## second x-derivative
+
+%% Cell type:code id: tags:
+
+``` python
+stencil = [(i, j) for i in (-1, 0, 1) for j in (-1, 0, 1)]
+isotropic_2d_00 = FiniteDifferenceStencilDerivation((0,0), stencil)
+isotropic_2d_00_res = isotropic_2d_00.get_stencil(isotropic=True)
+assert isotropic_2d_00_res.is_isotropic
+assert isotropic_2d_00_res.accuracy == 2
+isotropic_2d_00_res
+```
+
+%% Output
+
+    Finite difference stencil of accuracy 2, isotropic error: True
+
+%% Cell type:code id: tags:
+
+``` python
+isotropic_2d_00_res.as_array()
+```
+
+%% Output
+
+    $\displaystyle \left[\begin{matrix}\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\\\frac{5}{6} & - \frac{5}{3} & \frac{5}{6}\\\frac{1}{12} & - \frac{1}{6} & \frac{1}{12}\end{matrix}\right]$
+    [[1/12, -1/6, 1/12], [5/6, -5/3, 5/6], [1/12, -1/6, 1/12]]
+
+%% Cell type:code id: tags:
+
+``` python
+plt.figure(figsize=(2,2))
+isotropic_2d_00_res.visualize()
+```
+
+%% Output
+
+
+
+%% Cell type:code id: tags:
+
+``` python
+expected_result = sp.Array([[1, -2, 1], [10, -20, 10], [1, -2, 1]]) / 12
+assert expected_result == isotropic_2d_00_res.as_array()
+```
+
+%% Cell type:markdown id: tags:
+
+## Isotropic laplacian
+
+%% Cell type:code id: tags:
+
+``` python
+isotropic_2d_11 = FiniteDifferenceStencilDerivation((1,1), stencil)
+isotropic_2d_11_res = isotropic_2d_11.get_stencil(isotropic=True)
+iso_laplacian = isotropic_2d_00_res.as_array() + isotropic_2d_11_res.as_array()
+iso_laplacian
+```
+
+%% Output
+
+    $\displaystyle \left[\begin{matrix}\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\\\frac{2}{3} & - \frac{10}{3} & \frac{2}{3}\\\frac{1}{6} & \frac{2}{3} & \frac{1}{6}\end{matrix}\right]$
+    [[1/6, 2/3, 1/6], [2/3, -10/3, 2/3], [1/6, 2/3, 1/6]]
+
+%% Cell type:code id: tags:
+
+``` python
+expected_result = sp.Array([[1, 4, 1], [4, -20, 4], [1, 4, 1]]) / 6
+assert iso_laplacian == expected_result
+```
+
+%% Cell type:markdown id: tags:
+
+# stencils for staggered fields
+
+%% Cell type:code id: tags:
+
+``` python
+half = sp.Rational(1, 2)
+
+fd_points_ex = (
+    (half, 0),
+    (-half, 0),
+    (half, 1),
+    (half, -1),
+    (-half, 1),
+    (-half, -1)
+)
+assert set(fd_points_ex) == set(FiniteDifferenceStaggeredStencilDerivation("E", 2).stencil)
+
+fd_points_ey = (
+    (0, half),
+    (0, -half),
+    (-1,-half),
+    (-1, half),
+    (1, -half),
+    (1, half)
+)
+assert set(fd_points_ey) == set(FiniteDifferenceStaggeredStencilDerivation("N",2).stencil)
+
+fd_points_c = (
+    (half, half),
+    (-half, -half),
+    (half, -half),
+    (-half, half)
+)
+assert set(fd_points_c) ==  set(FiniteDifferenceStaggeredStencilDerivation("NE",2).stencil)
+
+assert len(FiniteDifferenceStaggeredStencilDerivation("E",3).points) == 10
+assert len(FiniteDifferenceStaggeredStencilDerivation("NE",3).points) == 12
+assert len(FiniteDifferenceStaggeredStencilDerivation("TNE",3).points) == 8
+```
+
+%% Cell type:code id: tags:
+
+``` python
+c = ps.fields("c: [2D]")
+c3 = ps.fields("c3: [3D]")
+
+assert FiniteDifferenceStaggeredStencilDerivation("E", 2, (0,)).apply(c.center) == c[1, 0] - c[0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("W", 2, (0,)).apply(c.center) == c[0, 0] - c[-1, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("N", 2, (1,)).apply(c.center) == c[0, 1] - c[0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("S", 2, (1,)).apply(c.center) == c[0, 0] - c[0, -1]
+
+assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(c3.center) == c3[1, 0, 0] - c3[0, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("W", 3, (0,)).apply(c3.center) == c3[0, 0, 0] - c3[-1, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("N", 3, (1,)).apply(c3.center) == c3[0, 1, 0] - c3[0, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("S", 3, (1,)).apply(c3.center) == c3[0, 0, 0] - c3[0, -1, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("T", 3, (2,)).apply(c3.center) == c3[0, 0, 1] - c3[0, 0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("B", 3, (2,)).apply(c3.center) == c3[0, 0, 0] - c3[0, 0, -1]
+
+assert FiniteDifferenceStaggeredStencilDerivation("S", 2, (0,)).apply(c.center) == \
+       (c[1, 0] + c[1, -1] - c[-1, 0] - c[-1, -1])/4
+
+assert FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0,)).apply(c.center) + \
+       FiniteDifferenceStaggeredStencilDerivation("NE", 2, (1,)).apply(c.center) == c[1, 1] - c[0, 0]
+assert FiniteDifferenceStaggeredStencilDerivation("NE", 3, (0,)).apply(c3.center) + \
+       FiniteDifferenceStaggeredStencilDerivation("NE", 3, (1,)).apply(c3.center) == c3[1, 1, 0] - c3[0, 0, 0]
+```
+
+%% Cell type:code id: tags:
+
+``` python
+d = FiniteDifferenceStaggeredStencilDerivation("NE", 2, (0, 1))
+assert d.apply(c.center) == c[0,0] + c[1,1] - c[1,0] - c[0,1]
+d.visualize()
+```
+
+%% Output
+
+
+
+%% Cell type:code id: tags:
+
+``` python
+v3 = ps.fields("v(3): [3D]")
+for i in range(*v3.index_shape):
+    assert FiniteDifferenceStaggeredStencilDerivation("E", 3, (0,)).apply(v3.center_vector[i]) == \
+           v3[1,0,0](i) - v3[0,0,0](i)
+```
No results found