diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c49077bb27a2af158624e7222d999973487f0951..449e5e2227c63121d1aeb6c824423c4fbb4e54aa 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -65,16 +65,21 @@ minimal-windows:
     - python -c "import numpy"
     - python setup.py quicktest
 
-minimal-ubuntu:
+ubuntu:
   stage: test
   except:
     variables:
       - $ENABLE_NIGHTLY_BUILDS
-  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/minimal_ubuntu
+  image: i10git.cs.fau.de:5005/pycodegen/pycodegen/ubuntu
   script:
-    - python3 setup.py quicktest
+    - mkdir -p ~/.config/matplotlib
+    - echo "backend:template" > ~/.config/matplotlib/matplotlibrc
+    - sed -i 's/--doctest-modules //g' pytest.ini
+    - pytest-3 -v -m "not longrun"
   tags:
     - docker
+    - cuda
+    - AVX
 
 minimal-conda:
   stage: test
diff --git a/conftest.py b/conftest.py
index c20bde40916e7f7f5163c20e1c48c7b0f3d3b3a2..272f42303e620cd13fc7f26648bc03d437b1fda6 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,8 +1,8 @@
 import os
 import runpy
 import sys
-import warnings
 import tempfile
+import warnings
 
 import nbformat
 import pytest
@@ -34,7 +34,7 @@ def add_path_to_ignore(path):
 
 
 collect_ignore = [os.path.join(SCRIPT_FOLDER, "doc", "conf.py"),
-        os.path.join(SCRIPT_FOLDER, "pystencils", "opencl", "opencl.autoinit")]
+                  os.path.join(SCRIPT_FOLDER, "pystencils", "opencl", "opencl.autoinit")]
 add_path_to_ignore('pystencils_tests/benchmark')
 add_path_to_ignore('_local_tmp')
 
@@ -44,7 +44,7 @@ collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/autodiff.py")]
 try:
     import pycuda
 except ImportError:
-    collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/pystencils_tests/test_cudagpu.py")]
+    collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_cudagpu.py")]
     add_path_to_ignore('pystencils/gpucuda')
 
 try:
@@ -73,7 +73,22 @@ try:
     import blitzdb
 except ImportError:
     add_path_to_ignore('pystencils/runhelper')
+    collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils_tests/test_parameterstudy.py")]
+
+try:
+    import islpy
+except ImportError:
+    collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/integer_set_analysis.py")]
+
+try:
+    import graphviz
+except ImportError:
+    collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/backends/dot.py")]
 
+try:
+    import pyevtk
+except ImportError:
+    collect_ignore += [os.path.join(SCRIPT_FOLDER, "pystencils/datahandling/vtk.py")]
 
 collect_ignore += [os.path.join(SCRIPT_FOLDER, 'setup.py')]
 
@@ -83,8 +98,6 @@ for root, sub_dirs, files in os.walk('.'):
             collect_ignore.append(f)
 
 
-
-
 class IPythonMockup:
     def run_line_magic(self, *args, **kwargs):
         pass
@@ -131,7 +144,7 @@ class IPyNbFile(pytest.File):
         exporter.exclude_markdown = True
         exporter.exclude_input_prompt = True
 
-        notebook_contents = self.fspath.open()
+        notebook_contents = self.fspath.open(encoding='utf-8')
 
         with warnings.catch_warnings():
             warnings.filterwarnings("ignore", "IPython.core.inputsplitter is deprecated")
diff --git a/doc/notebooks/01_tutorial_getting_started.ipynb b/doc/notebooks/01_tutorial_getting_started.ipynb
index 97ff73da1cf3bb63e19fa6fac122499120d71cee..70b3c2a8a3e86d81fb8972ad50ffbdd0ad4bcb89 100644
--- a/doc/notebooks/01_tutorial_getting_started.ipynb
+++ b/doc/notebooks/01_tutorial_getting_started.ipynb
@@ -932,14 +932,18 @@
     }
    ],
    "source": [
-    "import requests\n",
-    "import imageio\n",
-    "from io import BytesIO\n",
+    "try:\n",
+    "    import requests\n",
+    "    import imageio\n",
+    "    from io import BytesIO\n",
     "\n",
-    "response = requests.get(\"https://www.python.org/static/img/python-logo.png\")\n",
-    "img = imageio.imread(BytesIO(response.content)).astype(np.double)\n",
-    "img /= img.max()\n",
-    "plt.imshow(img);"
+    "    response = requests.get(\"https://www.python.org/static/img/python-logo.png\")\n",
+    "    img = imageio.imread(BytesIO(response.content)).astype(np.double)\n",
+    "    img /= img.max()\n",
+    "    plt.imshow(img);\n",
+    "except ImportError:\n",
+    "    print(\"No requests installed\")\n",
+    "    img = np.random.random((82, 290, 4))"
    ]
   },
   {
diff --git a/doc/notebooks/demo_plotting_and_animation.ipynb b/doc/notebooks/demo_plotting_and_animation.ipynb
index 07c980788f7617b1bfb8bfad8a13f2f2d7b5c33d..13d658a9cb4980a78cc984caeeb78e220b543553 100644
--- a/doc/notebooks/demo_plotting_and_animation.ipynb
+++ b/doc/notebooks/demo_plotting_and_animation.ipynb
@@ -6,7 +6,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pystencils.session import *"
+    "from pystencils.session import *\n",
+    "\n",
+    "import shutil"
    ]
   },
   {
@@ -408,9 +410,12 @@
     }
    ],
    "source": [
-    "plt.figure()\n",
-    "animation = plt.scalar_field_animation(run_func, frames=60)\n",
-    "ps.jupyter.display_as_html_video(animation)"
+    "if shutil.which(\"ffmpeg\") is not None:\n",
+    "    plt.figure()\n",
+    "    animation = plt.scalar_field_animation(run_func, frames=60)\n",
+    "    ps.jupyter.display_as_html_video(animation)\n",
+    "else:\n",
+    "    print(\"No ffmpeg installed\")"
    ]
   },
   {
@@ -474,8 +479,11 @@
     }
    ],
    "source": [
-    "animation = plt.surface_plot_animation(run_func, frames=60)\n",
-    "ps.jupyter.display_as_html_video(animation)"
+    "if shutil.which(\"ffmpeg\") is not None:\n",
+    "    animation = plt.surface_plot_animation(run_func, frames=60)\n",
+    "    ps.jupyter.display_as_html_video(animation)\n",
+    "else:\n",
+    "    print(\"No ffmpeg installed\")"
    ]
   },
   {
@@ -636,9 +644,12 @@
     }
    ],
    "source": [
-    "plt.figure()\n",
-    "animation = plt.vector_field_animation(run_func, frames=60)\n",
-    "ps.jupyter.display_as_html_video(animation)"
+    "if shutil.which(\"ffmpeg\") is not None:\n",
+    "    plt.figure()\n",
+    "    animation = plt.vector_field_animation(run_func, frames=60)\n",
+    "    ps.jupyter.display_as_html_video(animation)\n",
+    "else:\n",
+    "    print(\"No ffmpeg installed\")"
    ]
   },
   {
@@ -671,8 +682,11 @@
     }
    ],
    "source": [
-    "animation = plt.vector_field_magnitude_animation(run_func, frames=60)\n",
-    "ps.jupyter.display_as_html_video(animation)"
+    "if shutil.which(\"ffmpeg\") is not None:\n",
+    "    animation = plt.vector_field_magnitude_animation(run_func, frames=60)\n",
+    "    ps.jupyter.display_as_html_video(animation)\n",
+    "else:\n",
+    "    print(\"No ffmpeg installed\")"
    ]
   },
   {
diff --git a/doc/notebooks/demo_wave_equation.ipynb b/doc/notebooks/demo_wave_equation.ipynb
index a4d5a9ff7c1cf5df33b83d3d48c3272f214a4d39..2aa083d96b33e6b2c69bee0783e1c55b4ab616fd 100644
--- a/doc/notebooks/demo_wave_equation.ipynb
+++ b/doc/notebooks/demo_wave_equation.ipynb
@@ -8,7 +8,9 @@
    },
    "outputs": [],
    "source": [
-    "from pystencils.session import *"
+    "from pystencils.session import *\n",
+    "\n",
+    "import shutil"
    ]
   },
   {
@@ -397,8 +399,11 @@
     }
    ],
    "source": [
-    "ani = plt.surface_plot_animation(run, zlim=(-1, 1))\n",
-    "ps.jupyter.display_as_html_video(ani)"
+    "if shutil.which(\"ffmpeg\") is not None:\n",
+    "    ani = plt.surface_plot_animation(run, zlim=(-1, 1))\n",
+    "    ps.jupyter.display_as_html_video(ani)\n",
+    "else:\n",
+    "    print(\"No ffmpeg installed\")"
    ]
   },
   {
@@ -466,9 +471,12 @@
     "            u_arrays[0], u_arrays[1], u_arrays[2] = u_arrays[1], u_arrays[2], u_arrays[0]\n",
     "        return u_arrays[2]\n",
     "    \n",
-    "    ani = plt.surface_plot_animation(run_LLVM, zlim=(-1, 1))\n",
     "    assert np.isfinite(np.max(u_arrays[2]))\n",
-    "ps.jupyter.display_as_html_video(ani)"
+    "    if shutil.which(\"ffmpeg\") is not None:\n",
+    "        ani = plt.surface_plot_animation(run_LLVM, zlim=(-1, 1))\n",
+    "        ps.jupyter.display_as_html_video(ani)\n",
+    "    else:\n",
+    "        print(\"No ffmpeg installed\")"
    ]
   },
   {
diff --git a/pystencils/__init__.py b/pystencils/__init__.py
index ed08ff1327c68a1a116edb21547a86f8b27a8c81..18679382f0014ba12844c72fa965a3ad4fe6e196 100644
--- a/pystencils/__init__.py
+++ b/pystencils/__init__.py
@@ -5,7 +5,7 @@ from . import stencil as stencil
 from .assignment import Assignment, assignment_from_stencil
 from .data_types import TypedSymbol
 from .datahandling import create_data_handling
-from .display_utils import show_code, to_dot
+from .display_utils import show_code, get_code_obj, get_code_str, to_dot
 from .field import Field, FieldType, fields
 from .kernel_decorator import kernel
 from .kernelcreation import create_indexed_kernel, create_kernel, create_staggered_kernel
@@ -25,7 +25,7 @@ __all__ = ['Field', 'FieldType', 'fields',
            'TypedSymbol',
            'make_slice',
            'create_kernel', 'create_indexed_kernel', 'create_staggered_kernel',
-           'show_code', 'to_dot',
+           'show_code', 'to_dot', 'get_code_obj', 'get_code_str',
            'AssignmentCollection',
            'Assignment',
            'assignment_from_stencil',
diff --git a/pystencils/assignment.py b/pystencils/assignment.py
index 0bf68799491be29886d215d5ff76010c034bd174..92027f7773d2202765ab2ef09c5269654098060e 100644
--- a/pystencils/assignment.py
+++ b/pystencils/assignment.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import numpy as np
 import sympy as sp
 from sympy.printing.latex import LatexPrinter
@@ -24,9 +23,20 @@ def assignment_str(assignment):
 
 if Assignment:
 
+    _old_new = sp.codegen.ast.Assignment.__new__
+
+    def _Assignment__new__(cls, lhs, rhs, *args, **kwargs):
+        if isinstance(lhs, (list, tuple, sp.Matrix)) and isinstance(rhs, (list, tuple, sp.Matrix)):
+            assert len(lhs) == len(rhs), f'{lhs} and {rhs} must have same length when performing vector assignment!'
+            return tuple(_old_new(cls, a, b, *args, **kwargs) for a, b in zip(lhs, rhs))
+        return _old_new(cls, lhs, rhs, *args, **kwargs)
+
     Assignment.__str__ = assignment_str
+    Assignment.__new__ = _Assignment__new__
     LatexPrinter._print_Assignment = print_assignment_latex
 
+    sp.MutableDenseMatrix.__hash__ = lambda self: hash(tuple(self))
+
 else:
     # back port for older sympy versions that don't have Assignment  yet
 
diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py
index a9c7c98f2a685112a249b03a7c2bd8be0afcd363..8ec80917af20aefc5883c68ac51f360005b4350b 100644
--- a/pystencils/astnodes.py
+++ b/pystencils/astnodes.py
@@ -110,10 +110,17 @@ class Conditional(Node):
         return result
 
     def __str__(self):
-        return 'if:({!s}) '.format(self.condition_expr)
+        return self.__repr__()
 
     def __repr__(self):
-        return 'if:({!r}) '.format(self.condition_expr)
+        repr = 'if:({!r}) '.format(self.condition_expr)
+        if self.true_block:
+            repr += '\n\t{}) '.format(self.true_block)
+        if self.false_block:
+            repr = 'else: '.format(self.false_block)
+            repr += '\n\t{} '.format(self.false_block)
+
+        return repr
 
     def replace_by_true_block(self):
         """Replaces the conditional by its True block"""
@@ -284,7 +291,10 @@ class Block(Node):
         self._nodes = nodes
         self.parent = None
         for n in self._nodes:
-            n.parent = self
+            try:
+                n.parent = self
+            except AttributeError:
+                pass
 
     @property
     def args(self):
diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index cf58431309e1ddb0ba963fa808f9db5261448091..46d9e3a77e6a163acec4fbfcf1ddf0f9a15f8712 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -1,3 +1,4 @@
+import re
 from collections import namedtuple
 from typing import Set
 
@@ -10,11 +11,12 @@ from sympy.printing.ccode import C89CodePrinter
 from pystencils.astnodes import KernelFunction, Node
 from pystencils.cpu.vectorization import vec_all, vec_any
 from pystencils.data_types import (
-    PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression, reinterpret_cast_func,
-    vector_memory_access)
+    PointerType, VectorType, address_of, cast_func, create_type, get_type_of_expression,
+    reinterpret_cast_func, vector_memory_access)
 from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
 from pystencils.integer_functions import (
-    bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor, int_div, int_power_of_2, modulo_ceil)
+    bit_shift_left, bit_shift_right, bitwise_and, bitwise_or, bitwise_xor,
+    int_div, int_power_of_2, modulo_ceil)
 
 try:
     from sympy.printing.ccode import C99CodePrinter as CCodePrinter
@@ -23,6 +25,9 @@ except ImportError:
 
 __all__ = ['generate_c', 'CustomCodeNode', 'PrintNode', 'get_headers', 'CustomSympyPrinter']
 
+
+HEADER_REGEX = re.compile(r'^[<"].*[">]$')
+
 KERNCRAFT_NO_TERNARY_MODE = False
 
 
@@ -91,7 +96,7 @@ def get_global_declarations(ast):
 
     visit_node(ast)
 
-    return sorted(set(global_declarations), key=lambda x: str(x))
+    return sorted(set(global_declarations), key=str)
 
 
 def get_headers(ast_node: Node) -> Set[str]:
@@ -111,6 +116,9 @@ def get_headers(ast_node: Node) -> Set[str]:
         if isinstance(g, Node):
             headers.update(get_headers(g))
 
+    for h in headers:
+        assert HEADER_REGEX.match(h), f'header /{h}/ does not follow the pattern /"..."/ or /<...>/'
+
     return sorted(headers)
 
 
@@ -392,6 +400,13 @@ class CustomSympyPrinter(CCodePrinter):
             return self._print(expr.args[0])
         elif isinstance(expr, fast_inv_sqrt):
             return "({})".format(self._print(1 / sp.sqrt(expr.args[0])))
+        elif isinstance(expr, sp.Abs):
+            return "abs({})".format(self._print(expr.args[0]))
+        elif isinstance(expr, sp.Mod):
+            if expr.args[0].is_integer and expr.args[1].is_integer:
+                return "({} % {})".format(self._print(expr.args[0]), self._print(expr.args[1]))
+            else:
+                return "fmod({}, {})".format(self._print(expr.args[0]), self._print(expr.args[1]))
         elif expr.func in infix_functions:
             return "(%s %s %s)" % (self._print(expr.args[0]), infix_functions[expr.func], self._print(expr.args[1]))
         elif expr.func == int_power_of_2:
diff --git a/pystencils/backends/cuda_backend.py b/pystencils/backends/cuda_backend.py
index 8bc58468901a006bc7ef8f278e3e7a544234de52..d590d65b4082e72745658f0b06eb152d64872944 100644
--- a/pystencils/backends/cuda_backend.py
+++ b/pystencils/backends/cuda_backend.py
@@ -3,7 +3,7 @@ from os.path import dirname, join
 from pystencils.astnodes import Node
 from pystencils.backends.cbackend import CBackend, CustomSympyPrinter, generate_c
 from pystencils.fast_approximation import fast_division, fast_inv_sqrt, fast_sqrt
-from pystencils.interpolation_astnodes import InterpolationMode
+from pystencils.interpolation_astnodes import DiffInterpolatorAccess, InterpolationMode
 
 with open(join(dirname(__file__), 'cuda_known_functions.txt')) as f:
     lines = f.readlines()
@@ -70,10 +70,13 @@ class CudaSympyPrinter(CustomSympyPrinter):
         super(CudaSympyPrinter, self).__init__()
         self.known_functions.update(CUDA_KNOWN_FUNCTIONS)
 
-    def _print_TextureAccess(self, node):
-        dtype = node.texture.field.dtype.numpy_dtype
+    def _print_InterpolatorAccess(self, node):
+        dtype = node.interpolator.field.dtype.numpy_dtype
 
-        if node.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
+        if type(node) == DiffInterpolatorAccess:
+            # cubicTex3D_1st_derivative_x(texture tex, float3 coord)
+            template = f"cubicTex%iD_1st_derivative_{list(reversed('xyz'[:node.ndim]))[node.diff_coordinate_idx]}(%s, %s)"  # noqa
+        elif node.interpolator.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
             template = "cubicTex%iDSimple(%s, %s)"
         else:
             if dtype.itemsize > 4:
@@ -84,8 +87,8 @@ class CudaSympyPrinter(CustomSympyPrinter):
                 template = "tex%iD(%s, %s)"
 
         code = template % (
-            node.texture.field.spatial_dimensions,
-            str(node.texture),
+            node.interpolator.field.spatial_dimensions,
+            str(node.interpolator),
             # + 0.5 comes from Nvidia's staggered indexing
             ', '.join(self._print(o + 0.5) for o in reversed(node.offsets))
         )
diff --git a/pystencils/boundaries/boundaryhandling.py b/pystencils/boundaries/boundaryhandling.py
index 94d12619663190104f80894efebdc0955d5ac592..0a33fde2e2cee7a2f8f96d59d119f272b89d5b6e 100644
--- a/pystencils/boundaries/boundaryhandling.py
+++ b/pystencils/boundaries/boundaryhandling.py
@@ -8,6 +8,8 @@ from pystencils.boundaries.createindexlist import (
     create_boundary_index_array, numpy_data_type_for_boundary_object)
 from pystencils.cache import memorycache
 from pystencils.data_types import TypedSymbol, create_type
+from pystencils.datahandling import ParallelDataHandling
+from pystencils.datahandling.pycuda import PyCudaArrayHandler
 from pystencils.field import Field
 from pystencils.kernelparameters import FieldPointerSymbol
 
@@ -96,16 +98,23 @@ class BoundaryHandling:
         def to_gpu(gpu_version, cpu_version):
             gpu_version = gpu_version.boundary_object_to_index_list
             cpu_version = cpu_version.boundary_object_to_index_list
+
+            if ParallelDataHandling and isinstance(self.data_handling, ParallelDataHandling):
+                array_handler = PyCudaArrayHandler()
+            else:
+                array_handler = self.data_handling.array_handler
+
             for obj, cpu_arr in cpu_version.items():
                 if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape:
-                    gpu_version[obj] = self.data_handling.array_handler.to_gpu(cpu_arr)
+                    gpu_version[obj] = array_handler.to_gpu(cpu_arr)
                 else:
-                    self.data_handling.array_handler.upload(gpu_version[obj], cpu_arr)
+                    array_handler.upload(gpu_version[obj], cpu_arr)
 
         class_ = self.IndexFieldBlockData
         class_.to_cpu = to_cpu
         class_.to_gpu = to_gpu
-        data_handling.add_custom_class(self._index_array_name, class_)
+        gpu = self._target in data_handling._GPU_LIKE_TARGETS
+        data_handling.add_custom_class(self._index_array_name, class_, cpu=True, gpu=gpu)
 
     @property
     def data_handling(self):
@@ -253,11 +262,13 @@ class BoundaryHandling:
         """
         Writes a VTK field where each cell with the given boundary is marked with 1, other cells are 0
         This can be used to display the simulation geometry in Paraview
-        :param file_name: vtk filename
-        :param boundaries: boundary object, or special string 'domain' for domain cells or special string 'all' for all
-                         boundary conditions.
-                         can also  be a sequence, to write multiple boundaries to VTK file
-        :param ghost_layers: number of ghost layers to write, or True for all, False for none
+
+        Params:
+            file_name: vtk filename
+            boundaries: boundary object, or special string 'domain' for domain cells or special string 'all' for all
+                      boundary conditions.
+                      can also  be a sequence, to write multiple boundaries to VTK file
+            ghost_layers: number of ghost layers to write, or True for all, False for none
         """
         if boundaries == 'all':
             boundaries = list(self._boundary_object_to_boundary_info.keys()) + ['domain']
@@ -329,7 +340,7 @@ class BoundaryHandling:
             self.kernel = kernel
 
     class IndexFieldBlockData:
-        def __init__(self):
+        def __init__(self, *args, **kwargs):
             self.boundary_object_to_index_list = {}
             self.boundary_object_to_data_setter = {}
 
diff --git a/pystencils/cpu/cpujit.py b/pystencils/cpu/cpujit.py
index 6376ffb85bff4dca347e0dde7044a1d92ad7bab7..07a8a84d9c6d10ed8580a7ef83d6720b522cfd98 100644
--- a/pystencils/cpu/cpujit.py
+++ b/pystencils/cpu/cpujit.py
@@ -270,7 +270,8 @@ if( PyErr_Occurred() ) {{ return NULL; }}
 template_extract_complex = """
 PyObject * obj_{name} = PyDict_GetItemString(kwargs, "{name}");
 if( obj_{name} == NULL) {{  PyErr_SetString(PyExc_TypeError, "Keyword argument '{name}' missing"); return NULL; }};
-{target_type} {name}{{ {extract_function_real}( obj_{name} ), {extract_function_imag}( obj_{name} ) }};
+{target_type} {name}{{ ({real_type}) {extract_function_real}( obj_{name} ),
+                       ({real_type}) {extract_function_imag}( obj_{name} ) }};
 if( PyErr_Occurred() ) {{ return NULL; }}
 """
 
@@ -409,6 +410,8 @@ def create_function_boilerplate_code(parameter_info, name, insert_checks=True):
                 pre_call_code += template_extract_complex.format(extract_function_real=extract_function[0],
                                                                  extract_function_imag=extract_function[1],
                                                                  target_type=target_type,
+                                                                 real_type="float" if target_type == "ComplexFloat"
+                                                                           else "double",
                                                                  name=param.symbol.name)
             else:
                 pre_call_code += template_extract_scalar.format(extract_function=extract_function,
diff --git a/pystencils/data_types.py b/pystencils/data_types.py
index 5d553006bcca4c1cccbddb681d8710a92b75c437..786351f528573179543900bde760f675ddfe10a6 100644
--- a/pystencils/data_types.py
+++ b/pystencils/data_types.py
@@ -575,9 +575,7 @@ def get_type_of_expression(expr,
     raise NotImplementedError("Could not determine type for", expr, type(expr))
 
 
-class Type(sp.Basic):
-    is_Atom = True
-
+class Type(sp.Atom):
     def __new__(cls, *args, **kwargs):
         return sp.Basic.__new__(cls)
 
diff --git a/pystencils/display_utils.py b/pystencils/display_utils.py
index 610c404b709885e7445eba3be412f6811fca0241..cb09197bf175b43c4896072693ea11253976691f 100644
--- a/pystencils/display_utils.py
+++ b/pystencils/display_utils.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 import sympy as sp
 
@@ -35,10 +35,10 @@ def highlight_cpp(code: str):
     return HTML(highlight(code, CppLexer(), HtmlFormatter()))
 
 
-def show_code(ast: KernelFunction, custom_backend=None):
+def get_code_obj(ast: Union[KernelFunction, KernelWrapper], custom_backend=None):
     """Returns an object to display generated code (C/C++ or CUDA)
 
-    Can either  be displayed as HTML in Jupyter notebooks or printed as normal string.
+    Can either be displayed as HTML in Jupyter notebooks or printed as normal string.
     """
     from pystencils.backends.cbackend import generate_c
 
@@ -65,3 +65,37 @@ def show_code(ast: KernelFunction, custom_backend=None):
         def __repr__(self):
             return generate_c(self.ast, dialect=dialect, custom_backend=custom_backend)
     return CodeDisplay(ast)
+
+
+def get_code_str(ast, custom_backend=None):
+    return str(get_code_obj(ast, custom_backend))
+
+
+def _isnotebook():
+    try:
+        shell = get_ipython().__class__.__name__
+        if shell == 'ZMQInteractiveShell':
+            return True   # Jupyter notebook or qtconsole
+        elif shell == 'TerminalInteractiveShell':
+            return False  # Terminal running IPython
+        else:
+            return False  # Other type (?)
+    except NameError:
+        return False
+
+
+def show_code(ast: Union[KernelFunction, KernelWrapper], custom_backend=None):
+    code = get_code_obj(ast, custom_backend)
+
+    if _isnotebook():
+        from IPython.display import display
+        display(code)
+    else:
+        try:
+            import rich.syntax
+            import rich.console
+            syntax = rich.syntax.Syntax(str(code), "c++", theme="monokai", line_numbers=True)
+            console = rich.console.Console()
+            console.print(syntax)
+        except ImportError:
+            print(code)
diff --git a/pystencils/fd/derivation.py b/pystencils/fd/derivation.py
index 4af4f3e3653e78668ad5b84e9ce9f19e7586b63c..2b67b89cf6ef0176ee3912960c8be5e859d733bc 100644
--- a/pystencils/fd/derivation.py
+++ b/pystencils/fd/derivation.py
@@ -1,6 +1,6 @@
+import itertools
 import warnings
 from collections import defaultdict
-import itertools
 
 import numpy as np
 import sympy as sp
@@ -184,6 +184,9 @@ class FiniteDifferenceStencilDerivation:
                 result[max_offset - direction[1], max_offset + direction[0]] = weight
             return result
 
+        def __array__(self):
+            return np.array(self.as_array().tolist())
+
         def as_array(self):
             dim = len(self.stencil[0])
             assert (dim == 2 or dim == 3), "Only 2D or 3D matrix representations are available"
@@ -205,12 +208,12 @@ class FiniteDifferenceStencilDerivation:
 
             return result
 
-        def rotate_weights_and_apply(self, field_access: Field.Access, axis):
+        def rotate_weights_and_apply(self, field_access: Field.Access, axes):
             """derive gradient weights of other direction with already calculated weights of one direction
                via rotation and apply them to a field."""
             dim = len(self.stencil[0])
             assert (dim == 2 or dim == 3), "This function is only for 2D or 3D stencils available"
-            rotated_weights = np.rot90(np.array(self.as_array()).reshape(self.as_array().shape), 1, axis)
+            rotated_weights = np.rot90(np.array(self), 1, axes)
 
             result = []
             max_offset = max(max(abs(e) for e in direction) for direction in self.stencil)
diff --git a/pystencils/fd/derivative.py b/pystencils/fd/derivative.py
index 7acd245059615ded27e2c1fe023e344b48c34a6e..31daf4294f4ffa70c766632c4126dd7403690251 100644
--- a/pystencils/fd/derivative.py
+++ b/pystencils/fd/derivative.py
@@ -111,6 +111,16 @@ class Diff(sp.Expr):
     def __str__(self):
         return "D(%s)" % self.arg
 
+    def interpolated_access(self, offset, **kwargs):
+        """Represents an interpolated access on a spatially differentiated field
+
+        Args:
+            offset (Tuple[sympy.Expr]): Absolute position to determine the value of the spatial derivative
+        """
+        from pystencils.interpolation_astnodes import DiffInterpolatorAccess
+        assert isinstance(self.arg.field, Field), "Must be field to enable interpolated accesses"
+        return DiffInterpolatorAccess(self.arg.field.interpolated_access(offset, **kwargs).symbol, self.target, *offset)
+
 
 class DiffOperator(sp.Expr):
     """Un-applied differential, i.e. differential operator
@@ -306,7 +316,8 @@ def expand_diff_full(expr, functions=None, constants=None):
             functions.difference_update(constants)
 
     def visit(e):
-        e = e.expand()
+        if not isinstance(e, sp.Tuple):
+            e = e.expand()
 
         if e.func == Diff:
             result = 0
@@ -331,6 +342,9 @@ def expand_diff_full(expr, functions=None, constants=None):
             return result
         elif isinstance(e, sp.Piecewise):
             return sp.Piecewise(*((expand_diff_full(a, functions, constants), b) for a, b in e.args))
+        elif isinstance(expr, sp.Tuple):
+            new_args = [visit(arg) for arg in e.args]
+            return sp.Tuple(*new_args)
         else:
             new_args = [visit(arg) for arg in e.args]
             return e.func(*new_args) if new_args else e
@@ -370,6 +384,9 @@ def expand_diff_linear(expr, functions=None, constants=None):
                 return diff.split_linear(functions)
     elif isinstance(expr, sp.Piecewise):
         return sp.Piecewise(*((expand_diff_linear(a, functions, constants), b) for a, b in expr.args))
+    elif isinstance(expr, sp.Tuple):
+        new_args = [expand_diff_linear(e, functions) for e in expr.args]
+        return sp.Tuple(*new_args)
     else:
         new_args = [expand_diff_linear(e, functions) for e in expr.args]
         result = sp.expand(expr.func(*new_args) if new_args else expr)
diff --git a/pystencils/fd/finitedifferences.py b/pystencils/fd/finitedifferences.py
index d5bce66e96d8aa5d296df68906c8871d40c08bba..5b6b15f95bebec40939d54a8ff2ad6c58c169f58 100644
--- a/pystencils/fd/finitedifferences.py
+++ b/pystencils/fd/finitedifferences.py
@@ -109,7 +109,9 @@ class Discretization2ndOrder:
             return self._discretize_advection(e)
         elif isinstance(e, Diff):
             arg, *indices = diff_args(e)
-            if not isinstance(arg, Field.Access):
+            from pystencils.interpolation_astnodes import InterpolatorAccess
+
+            if not isinstance(arg, (Field.Access, InterpolatorAccess)):
                 raise ValueError("Only derivatives with field or field accesses as arguments can be discretized")
             return self.spatial_stencil(indices, self.dx, arg)
         else:
diff --git a/pystencils/field.py b/pystencils/field.py
index 0bf80b211348b6e5c1975a86f4d51fc50cb9fdd4..6b85dead11778bf5aa6a820d4f34e8b547a95df4 100644
--- a/pystencils/field.py
+++ b/pystencils/field.py
@@ -332,7 +332,7 @@ class Field(AbstractField):
         self.latex_name: Optional[str] = None
         self.coordinate_origin: tuple[float, sp.Symbol] = sp.Matrix(tuple(
             0 for _ in range(self.spatial_dimensions)
-        ))  # type
+        ))
         self.coordinate_transform = sp.eye(self.spatial_dimensions)
         if field_type == FieldType.STAGGERED:
             assert self.staggered_stencil
diff --git a/pystencils/gpucuda/cudajit.py b/pystencils/gpucuda/cudajit.py
index 28ec47d0ba22e92cd4064b3400a0d055a87be7cb..86f251c97b95f04c52aaa803398ddb78ba42a2c1 100644
--- a/pystencils/gpucuda/cudajit.py
+++ b/pystencils/gpucuda/cudajit.py
@@ -5,13 +5,20 @@ from pystencils.data_types import StructType
 from pystencils.field import FieldType
 from pystencils.gpucuda.texture_utils import ndarray_to_tex
 from pystencils.include import get_pycuda_include_path, get_pystencils_include_path
-from pystencils.interpolation_astnodes import TextureAccess
+from pystencils.interpolation_astnodes import InterpolatorAccess, TextureCachedField
 from pystencils.kernel_wrapper import KernelWrapper
 from pystencils.kernelparameters import FieldPointerSymbol
 
 USE_FAST_MATH = True
 
 
+def get_cubic_interpolation_include_paths():
+    from os.path import join, dirname
+
+    return [join(dirname(__file__), "CubicInterpolationCUDA", "code"),
+            join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
+
+
 def make_python_function(kernel_function_node, argument_dict=None, custom_backend=None):
     """
     Creates a kernel function from an abstract syntax tree which
@@ -39,23 +46,28 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
     code += "#define FUNC_PREFIX __global__\n"
     code += "#define RESTRICT __restrict__\n\n"
     code += str(generate_c(kernel_function_node, dialect='cuda', custom_backend=custom_backend))
-    textures = set(d.texture for d in kernel_function_node.atoms(TextureAccess))
+    textures = set(d.interpolator for d in kernel_function_node.atoms(
+        InterpolatorAccess) if isinstance(d.interpolator, TextureCachedField))
 
     nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"]
     if USE_FAST_MATH:
         nvcc_options.append("-use_fast_math")
 
-    # Code for
-    # if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures):
-        # assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
-        # "Submodule CubicInterpolationCUDA does not exist"
-        # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
-        # nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
-
-        # needed_dims = set(t.field.spatial_dimensions for t in textures
-        # if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE)
-        # for i in needed_dims:
-        # code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
+    # Code for CubicInterpolationCUDA
+    from pystencils.interpolation_astnodes import InterpolationMode
+    from os.path import join, dirname, isdir
+
+    if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures):
+        assert isdir(join(dirname(__file__), ("CubicInterpolationCUDA", "code")),
+                     "Submodule CubicInterpolationCUDA does not exist.\n"
+                     + "Clone https://github.com/theHamsta/CubicInterpolationCUDA into pystencils.gpucuda")
+        nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
+        nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
+
+        needed_dims = set(t.field.spatial_dimensions for t in textures
+                          if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE)
+        for i in needed_dims:
+            code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
 
     mod = SourceModule(code, options=nvcc_options, include_dirs=[
                        get_pystencils_include_path(), get_pycuda_include_path()])
diff --git a/pystencils/gpucuda/texture_utils.py b/pystencils/gpucuda/texture_utils.py
index d29d862929530b27f840da445467a2bae44b1bfb..ff3db430fc841f4121b9602b57a1c1aa08ef86f9 100644
--- a/pystencils/gpucuda/texture_utils.py
+++ b/pystencils/gpucuda/texture_utils.py
@@ -15,6 +15,7 @@ import numpy as np
 try:
     import pycuda.driver as cuda
     from pycuda import gpuarray
+    import pycuda
 except Exception:
     pass
 
@@ -35,6 +36,8 @@ def ndarray_to_tex(tex_ref,
                    use_normalized_coordinates=False,
                    read_as_integer=False):
 
+    if isinstance(address_mode, str):
+        address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
     if address_mode is None:
         address_mode = cuda.address_mode.BORDER
     if filter_mode is None:
diff --git a/pystencils/include/cuda_complex.hpp b/pystencils/include/cuda_complex.hpp
index ad555264a87881d8eaee6b2476c482039d606f71..535aa52e30781a03e170298e8e5ebbed872585dc 100644
--- a/pystencils/include/cuda_complex.hpp
+++ b/pystencils/include/cuda_complex.hpp
@@ -1173,53 +1173,53 @@ operator<<(std::basic_ostream<_CharT, _Traits> &__os, const complex<_Tp> &__x) {
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator*(const complex<U> &complexNumber,
                                     const V &scalar) -> complex<U> {
-  return complex<U>{real(complexNumber) * scalar, imag(complexNumber) * scalar};
+  return complex<U>(real(complexNumber) * scalar, imag(complexNumber) * scalar);
 }
 
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator*(const V &scalar,
                                     const complex<U> &complexNumber)
     -> complex<U> {
-  return complex<U>{real(complexNumber) * scalar, imag(complexNumber) * scalar};
+  return complex<U>(real(complexNumber) * scalar, imag(complexNumber) * scalar);
 }
 
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator+(const complex<U> &complexNumber,
                                     const V &scalar) -> complex<U> {
-  return complex<U>{real(complexNumber) + scalar, imag(complexNumber)};
+  return complex<U>(real(complexNumber) + scalar, imag(complexNumber));
 }
 
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator+(const V &scalar,
                                     const complex<U> &complexNumber)
     -> complex<U> {
-  return complex<U>{real(complexNumber) + scalar, imag(complexNumber)};
+  return complex<U>(real(complexNumber) + scalar, imag(complexNumber));
 }
 
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator-(const complex<U> &complexNumber,
                                     const V &scalar) -> complex<U> {
-  return complex<U>{real(complexNumber) - scalar, imag(complexNumber)};
+  return complex<U>(real(complexNumber) - scalar, imag(complexNumber));
 }
 
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator-(const V &scalar,
                                     const complex<U> &complexNumber)
     -> complex<U> {
-  return complex<U>{scalar - real(complexNumber), imag(complexNumber)};
+  return complex<U>(scalar - real(complexNumber), imag(complexNumber));
 }
 
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator/(const complex<U> &complexNumber,
                                     const V scalar) -> complex<U> {
-  return complex<U>{real(complexNumber) / scalar, imag(complexNumber) / scalar};
+  return complex<U>(real(complexNumber) / scalar, imag(complexNumber) / scalar);
 }
 
 template <class U, class V>
 CUDA_CALLABLE_MEMBER auto operator/(const V scalar,
                                     const complex<U> &complexNumber)
     -> complex<U> {
-  return complex<U>{scalar, 0} / complexNumber;
+  return complex<U>(scalar, 0) / complexNumber;
 }
 
 using ComplexDouble = complex<double>;
diff --git a/pystencils/interpolation_astnodes.py b/pystencils/interpolation_astnodes.py
index 62d291b08e634cc0ad57eb37dca7a779581d88e1..76bd340b7e2c9bd66d9c82f77bc0399208017e52 100644
--- a/pystencils/interpolation_astnodes.py
+++ b/pystencils/interpolation_astnodes.py
@@ -35,6 +35,27 @@ class InterpolationMode(str, Enum):
     CUBIC_SPLINE = "cubic_spline"
 
 
+class _InterpolationSymbol(TypedSymbol):
+
+    def __new__(cls, name, field, interpolator):
+        obj = cls.__xnew_cached_(cls, name, field, interpolator)
+        return obj
+
+    def __new_stage2__(cls, name, field, interpolator):
+        obj = super().__xnew__(cls, name, 'dummy_symbol_carrying_field' + field.name)
+        obj.field = field
+        obj.interpolator = interpolator
+        return obj
+
+    def __getnewargs__(self):
+        return self.name, self.field, self.interpolator
+
+    # noinspection SpellCheckingInspection
+    __xnew__ = staticmethod(__new_stage2__)
+    # noinspection SpellCheckingInspection
+    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+
 class Interpolator(object):
     """
     Implements non-integer accesses on fields using linear interpolation.
@@ -77,24 +98,25 @@ class Interpolator(object):
                  allow_textures=True):
         super().__init__()
 
-        self.field = parent_field.new_field_with_different_name(parent_field.name)
+        self.field = parent_field
         self.field.field_type = pystencils.field.FieldType.CUSTOM
         self.address_mode = address_mode
         self.use_normalized_coordinates = use_normalized_coordinates
-        hash_str = "%x" % abs(hash(self.field) + hash(address_mode))
-        self.symbol = TypedSymbol('dummy_symbol_carrying_field' + self.field.name + hash_str,
-                                  'dummy_symbol_carrying_field' + self.field.name + hash_str)
-        self.symbol.field = self.field
-        self.symbol.interpolator = self
-        self.allow_textures = allow_textures
         self.interpolation_mode = interpolation_mode
+        self.hash_str = hashlib.md5(
+            f'{self.field}_{address_mode}_{self.field.dtype}_{interpolation_mode}'.encode()).hexdigest()
+        self.symbol = _InterpolationSymbol(str(self), parent_field, self)
+        self.allow_textures = allow_textures
+
+    @property
+    def ndim(self):
+        return self.field.ndim
 
     @property
     def _hashable_contents(self):
         return (str(self.address_mode),
                 str(type(self)),
-                self.symbol,
-                self.address_mode,
+                self.hash_str,
                 self.use_normalized_coordinates)
 
     def at(self, offset):
@@ -112,6 +134,9 @@ class Interpolator(object):
     def __hash__(self):
         return hash(self._hashable_contents)
 
+    def __eq__(self, other):
+        return hash(self) == hash(other)
+
     @property
     def reproducible_hash(self):
         return _hash(str(self._hashable_contents).encode()).hexdigest()
@@ -142,29 +167,43 @@ class NearestNeightborInterpolator(Interpolator):
 
 
 class InterpolatorAccess(TypedSymbol):
-    def __new__(cls, field, offsets, *args, **kwargs):
-        obj = TextureAccess.__xnew_cached_(cls, field, offsets, *args, **kwargs)
+    def __new__(cls, field, *offsets, **kwargs):
+        obj = InterpolatorAccess.__xnew_cached_(cls, field, *offsets, **kwargs)
         return obj
 
-    def __new_stage2__(self, symbol, *offsets):
+    def __new_stage2__(cls, symbol, *offsets):
         assert offsets is not None
-        obj = super().__xnew__(self, '%s_interpolator_%x' %
-                               (symbol.field.name, abs(hash(tuple(offsets)))), symbol.field.dtype)
+        obj = super().__xnew__(cls, '%s_interpolator_%s' %
+                               (symbol.field.name, _hash(str(tuple(offsets)).encode()).hexdigest()),
+                               symbol.field.dtype)
         obj.offsets = offsets
         obj.symbol = symbol
         obj.field = symbol.field
         obj.interpolator = symbol.interpolator
         return obj
 
-    def __hash__(self):
-        return hash((self.symbol, self.field, tuple(self.offsets), self.interpolator))
+    def _hashable_contents(self):
+        return super()._hashable_content() + ((self.symbol, self.field, tuple(self.offsets), self.symbol.interpolator))
 
     def __str__(self):
-        return '%s_interpolator(%s)' % (self.field.name, ','.join(str(o) for o in self.offsets))
+        return '%s_interpolator(%s)' % (self.field.name, ', '.join(str(o) for o in self.offsets))
 
     def __repr__(self):
         return self.__str__()
 
+    def _latex(self, printer, *_):
+        n = self.field.latex_name if self.field.latex_name else self.field.name
+        foo = ", ".join(str(printer.doprint(o)) for o in self.offsets)
+        return f'{n}_{{interpolator}}\\left({foo}\\right)'
+
+    @property
+    def ndim(self):
+        return len(self.offsets)
+
+    @property
+    def is_texture(self):
+        return isinstance(self.interpolator, TextureCachedField)
+
     def atoms(self, *types):
         if self.offsets:
             offsets = set(o for o in self.offsets if isinstance(o, types))
@@ -177,6 +216,11 @@ class InterpolatorAccess(TypedSymbol):
         else:
             return set()
 
+    def neighbor(self, coord_id, offset):
+        offset_list = list(self.offsets)
+        offset_list[coord_id] += offset
+        return self.interpolator.at(tuple(offset_list))
+
     @property
     def free_symbols(self):
         symbols = set()
@@ -189,6 +233,13 @@ class InterpolatorAccess(TypedSymbol):
 
         return symbols
 
+    @property
+    def required_global_declarations(self):
+        required_global_declarations = self.symbol.interpolator.required_global_declarations
+        if required_global_declarations:
+            required_global_declarations[0]._symbols_defined.add(self)
+        return required_global_declarations
+
     @property
     def args(self):
         return [self.symbol, *self.offsets]
@@ -201,14 +252,27 @@ class InterpolatorAccess(TypedSymbol):
     def interpolation_mode(self):
         return self.interpolator.interpolation_mode
 
+    @property
+    def _diff_interpolation_vec(self):
+        return sp.Matrix([DiffInterpolatorAccess(self.symbol, i, *self.offsets)
+                          for i in range(len(self.offsets))])
+
+    def diff(self, *symbols, **kwargs):
+        if symbols == (self,):
+            return 1
+        rtn = self._diff_interpolation_vec.T * sp.Matrix(self.offsets).diff(*symbols, **kwargs)
+        if rtn.shape == (1, 1):
+            rtn = rtn[0, 0]
+        return rtn
+
     def implementation_with_stencils(self):
         field = self.field
 
         default_int_type = create_type('int64')
-        use_textures = isinstance(self, TextureAccess)
+        use_textures = isinstance(self.interpolator, TextureCachedField)
         if use_textures:
             def absolute_access(x, _):
-                return self.texture.at((o for o in x))
+                return self.symbol.interpolator.at((o for o in x))
         else:
             absolute_access = field.absolute_access
 
@@ -255,7 +319,7 @@ class InterpolatorAccess(TypedSymbol):
                                  for (dim, i) in enumerate(index)]
                         index = [cast_func(sp.Piecewise((i, i > 0),
                                                         (sp.Abs(cast_func(field.shape[dim] - 1 + i, default_int_type)),
-                                                            True)), default_int_type)
+                                                         True)), default_int_type)
                                  for (dim, i) in enumerate(index)]
                         sum[channel_idx] += weight * \
                             absolute_access(index, channel_idx if field.index_dimensions else ())
@@ -287,12 +351,61 @@ class InterpolatorAccess(TypedSymbol):
     # noinspection SpellCheckingInspection
     __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
 
+    def __getnewargs__(self):
+        return tuple(self.symbol, *self.offsets)
+
+
+class DiffInterpolatorAccess(InterpolatorAccess):
+    def __new__(cls, symbol, diff_coordinate_idx, *offsets, **kwargs):
+        if symbol.interpolator.interpolation_mode == InterpolationMode.LINEAR:
+            from pystencils.fd import Diff, Discretization2ndOrder
+            return Discretization2ndOrder(1)(Diff(symbol.interpolator.at(offsets), diff_coordinate_idx))
+        obj = DiffInterpolatorAccess.__xnew_cached_(cls, symbol, diff_coordinate_idx, *offsets, **kwargs)
+        return obj
+
+    def __new_stage2__(self, symbol: sp.Symbol, diff_coordinate_idx, *offsets):
+        assert offsets is not None
+        obj = super().__xnew__(self, symbol, *offsets)
+        obj.diff_coordinate_idx = diff_coordinate_idx
+        return obj
+
+    def __hash__(self):
+        return hash((self.symbol, self.field, self.diff_coordinate_idx, tuple(self.offsets), self.interpolator))
+
+    def __str__(self):
+        return '%s_diff%i_interpolator(%s)' % (self.field.name, self.diff_coordinate_idx,
+                                               ', '.join(str(o) for o in self.offsets))
+
+    def __repr__(self):
+        return str(self)
+
+    @property
+    def args(self):
+        return [self.symbol, self.diff_coordinate_idx, *self.offsets]
+
+    @property
+    def symbols_defined(self) -> Set[sp.Symbol]:
+        return {self}
+
+    @property
+    def interpolation_mode(self):
+        return self.interpolator.interpolation_mode
+
+    # noinspection SpellCheckingInspection
+    __xnew__ = staticmethod(__new_stage2__)
+    # noinspection SpellCheckingInspection
+    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
+
+    def __getnewargs__(self):
+        return tuple(self.symbol, self.diff_coordinate_idx, *self.offsets)
+
+
 ##########################################################################################
 # GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols #
 ##########################################################################################
 
 
-class TextureCachedField:
+class TextureCachedField(Interpolator):
 
     def __init__(self, parent_field,
                  address_mode=None,
@@ -301,28 +414,19 @@ class TextureCachedField:
                  use_normalized_coordinates=False,
                  read_as_integer=False
                  ):
-        if isinstance(address_mode, str):
-            address_mode = getattr(pycuda.driver.address_mode, address_mode.upper())
+        super().__init__(parent_field, interpolation_mode, address_mode, use_normalized_coordinates)
 
         if address_mode is None:
-            address_mode = pycuda.driver.address_mode.BORDER
+            address_mode = 'border'
         if filter_mode is None:
             filter_mode = pycuda.driver.filter_mode.LINEAR
 
-        # self, field_name, field_type, dtype, layout, shape, strides
-        self.field = parent_field
-        self.address_mode = address_mode
-        self.filter_mode = filter_mode
         self.read_as_integer = read_as_integer
-        self.use_normalized_coordinates = use_normalized_coordinates
-        self.interpolation_mode = interpolation_mode
-        self.symbol = TypedSymbol(str(self), self.field.dtype.numpy_dtype)
-        self.symbol.interpolator = self
-        self.symbol.field = self.field
         self.required_global_declarations = [TextureDeclaration(self)]
 
-        # assert str(self.field.dtype) != 'double', "CUDA does not support double textures!"
-        # assert dtype_supports_textures(self.field.dtype), "CUDA only supports texture types with 32 bits or less"
+    @property
+    def ndim(self):
+        return self.field.ndim
 
     @classmethod
     def from_interpolator(cls, interpolator: LinearInterpolator):
@@ -332,59 +436,17 @@ class TextureCachedField:
         obj = cls(interpolator.field, interpolator.address_mode, interpolation_mode=interpolator.interpolation_mode)
         return obj
 
-    def at(self, offset):
-        return TextureAccess(self.symbol, *offset)
-
-    def __getitem__(self, offset):
-        return TextureAccess(self.symbol, *offset)
-
     def __str__(self):
         return '%s_texture_%s' % (self.field.name, self.reproducible_hash)
 
     def __repr__(self):
         return self.__str__()
 
-    @property
-    def _hashable_contents(self):
-        return (type(self),
-                self.address_mode,
-                self.filter_mode,
-                self.read_as_integer,
-                self.interpolation_mode,
-                self.use_normalized_coordinates)
-
-    def __hash__(self):
-        return hash(self._hashable_contents)
-
     @property
     def reproducible_hash(self):
         return _hash(str(self._hashable_contents).encode()).hexdigest()
 
 
-class TextureAccess(InterpolatorAccess):
-    def __new__(cls, texture_symbol, offsets, *args, **kwargs):
-        obj = TextureAccess.__xnew_cached_(cls, texture_symbol, offsets, *args, **kwargs)
-        return obj
-
-    def __new_stage2__(self, symbol, *offsets):
-        obj = super().__xnew__(self, symbol, *offsets)
-        obj.required_global_declarations = symbol.interpolator.required_global_declarations
-        obj.required_global_declarations[0]._symbols_defined.add(obj)
-        return obj
-
-    def __str__(self):
-        return '%s_texture(%s)' % (self.interpolator.field.name, ','.join(str(o) for o in self.offsets))
-
-    @property
-    def texture(self):
-        return self.interpolator
-
-    # noinspection SpellCheckingInspection
-    __xnew__ = staticmethod(__new_stage2__)
-    # noinspection SpellCheckingInspection
-    __xnew_cached_ = staticmethod(cacheit(__new_stage2__))
-
-
 class TextureDeclaration(Node):
     """
     A global declaration of a texture. Visible both for device and host code.
@@ -421,12 +483,18 @@ class TextureDeclaration(Node):
 
     @property
     def headers(self):
-        return ['"pycuda-helpers.hpp"']
+        headers = ['"pycuda-helpers.hpp"']
+        if self.texture.interpolation_mode == InterpolationMode.CUBIC_SPLINE:
+            headers.append('"cubicTex%iD.cu"' % self.texture.ndim)
+        return headers
 
     def __str__(self):
         from pystencils.backends.cuda_backend import CudaBackend
         return CudaBackend()(self)
 
+    def __repr__(self):
+        return str(self)
+
 
 class TextureObject(TextureDeclaration):
     """
diff --git a/pystencils/kernel_wrapper.py b/pystencils/kernel_wrapper.py
index 0e327711e5a355219cc2664ac9a6c8a02d88bc09..b76ec4e3d79c1b345c9f0a8b2bd8a97b59866b8d 100644
--- a/pystencils/kernel_wrapper.py
+++ b/pystencils/kernel_wrapper.py
@@ -1,11 +1,14 @@
-"""
-Light-weight wrapper around a compiled kernel
-"""
 import pystencils
 
 
 class KernelWrapper:
-    def __init__(self, kernel, parameters, ast_node):
+    """
+    Light-weight wrapper around a compiled kernel.
+
+    Can be called while still providing access to underlying AST.
+    """
+
+    def __init__(self, kernel, parameters, ast_node: pystencils.astnodes.KernelFunction):
         self.kernel = kernel
         self.parameters = parameters
         self.ast = ast_node
@@ -16,4 +19,4 @@ class KernelWrapper:
 
     @property
     def code(self):
-        return str(pystencils.show_code(self.ast))
+        return pystencils.get_code_str(self.ast)
diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py
index aaca6d3124c4303ae289e831fdb7b7680369c408..2c8aabaa530f985f7b645a41a29a9537a655fa07 100644
--- a/pystencils/kernelcreation.py
+++ b/pystencils/kernelcreation.py
@@ -79,14 +79,14 @@ def create_kernel(assignments,
                [0., 0., 0., 0., 0.]])
     """
     # ----  Normalizing parameters
+    if isinstance(assignments, Assignment):
+        assignments = [assignments]
     assert assignments, "Assignments must not be empty!"
     split_groups = ()
     if isinstance(assignments, AssignmentCollection):
         if 'split_groups' in assignments.simplification_hints:
             split_groups = assignments.simplification_hints['split_groups']
         assignments = assignments.all_assignments
-    if isinstance(assignments, Assignment):
-        assignments = [assignments]
 
     # ----  Creating ast
     if target == 'cpu':
diff --git a/pystencils/math_optimizations.py b/pystencils/math_optimizations.py
index ad0114782e15977fb329baacfb6ad4b6bac1e33b..b9420318f1a0a5b7a4aa538c35b6866672282bea 100644
--- a/pystencils/math_optimizations.py
+++ b/pystencils/math_optimizations.py
@@ -44,3 +44,13 @@ def optimize_assignments(assignments, optimizations):
             a.optimize(optimizations)
 
     return assignments
+
+
+def optimize_ast(ast, optimizations):
+
+    if HAS_REWRITING:
+        assignments_nodes = ast.atoms(SympyAssignment)
+        for a in assignments_nodes:
+            a.optimize(optimizations)
+
+    return ast
diff --git a/pystencils/simp/assignment_collection.py b/pystencils/simp/assignment_collection.py
index 3a8bb2bd3b315e23fb9e11ef6385f264428c7bd5..706008c859bd0cdc9f2f3b892557242a5aa01184 100644
--- a/pystencils/simp/assignment_collection.py
+++ b/pystencils/simp/assignment_collection.py
@@ -1,3 +1,4 @@
+import itertools
 from copy import copy
 from typing import Any, Dict, Iterable, Iterator, List, Optional, Sequence, Set, Union
 
@@ -43,6 +44,11 @@ class AssignmentCollection:
             subexpressions = [Assignment(k, v)
                               for k, v in subexpressions.items()]
 
+        main_assignments = list(itertools.chain.from_iterable(
+            [(a if isinstance(a, Iterable) else [a]) for a in main_assignments]))
+        subexpressions = list(itertools.chain.from_iterable(
+            [(a if isinstance(a, Iterable) else [a]) for a in subexpressions]))
+
         self.main_assignments = main_assignments
         self.subexpressions = subexpressions
 
@@ -150,6 +156,9 @@ class AssignmentCollection:
         """See :func:`count_operations` """
         return count_operations(self.all_assignments, only_type=None)
 
+    def atoms(self, *args):
+        return set().union(*[a.atoms(*args) for a in self.all_assignments])
+
     def dependent_symbols(self, symbols: Iterable[sp.Symbol]) -> Set[sp.Symbol]:
         """Returns all symbols that depend on one of the passed symbols.
 
diff --git a/pystencils/test_type_interference.py b/pystencils/test_type_interference.py
index 0daa6f9d2a36948184d537809457b4aaf8001d29..953b87742304b2d629a1bd564fc23e0982d4f6d9 100644
--- a/pystencils/test_type_interference.py
+++ b/pystencils/test_type_interference.py
@@ -18,8 +18,7 @@ def test_type_interference():
 
     ast = pystencils.create_kernel(assignments)
 
-    code = str(pystencils.show_code(ast))
-    print(code)
+    code = str(pystencils.get_code_str(ast))
     assert 'double a' in code
     assert 'uint16_t b' in code
     assert 'uint16_t f' in code
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index bb089ccb91ea5dfd678d69d6990981fdd28793fc..448991a842f2bd9cee6a47fc559fe16bbf8996ed 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -14,8 +14,8 @@ import pystencils.astnodes as ast
 import pystencils.integer_functions
 from pystencils.assignment import Assignment
 from pystencils.data_types import (
-    PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type, get_base_type,
-    get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
+    PointerType, StructType, TypedImaginaryUnit, TypedSymbol, cast_func, collate_types, create_type,
+    get_base_type, get_type_of_expression, pointer_arithmetic_func, reinterpret_cast_func)
 from pystencils.field import AbstractField, Field, FieldType
 from pystencils.kernelparameters import FieldPointerSymbol
 from pystencils.simp.assignment_collection import AssignmentCollection
@@ -1314,7 +1314,7 @@ def implement_interpolations(ast_node: ast.Node,
                              implement_by_texture_accesses: bool = False,
                              vectorize: bool = False,
                              use_hardware_interpolation_for_f32=True):
-    from pystencils.interpolation_astnodes import InterpolatorAccess, TextureAccess, TextureCachedField
+    from pystencils.interpolation_astnodes import (InterpolatorAccess, TextureCachedField)
     # TODO: perform this function on assignments, when unify_shape_symbols allows differently sized fields
 
     assert not(implement_by_texture_accesses and vectorize), \
@@ -1322,37 +1322,42 @@ def implement_interpolations(ast_node: ast.Node,
     FLOAT32_T = create_type('float32')
 
     interpolation_accesses = ast_node.atoms(InterpolatorAccess)
+    if not interpolation_accesses:
+        return ast_node
 
     def can_use_hw_interpolation(i):
-        return use_hardware_interpolation_for_f32 and i.dtype == FLOAT32_T and isinstance(i, TextureAccess)
+        return (use_hardware_interpolation_for_f32
+                and implement_by_texture_accesses
+                and i.dtype == FLOAT32_T
+                and isinstance(i.symbol.interpolator, TextureCachedField))
 
     if implement_by_texture_accesses:
 
-        interpolators = {a.symbol.interpolator for a in interpolation_accesses}
-        to_texture_map = {i: TextureCachedField.from_interpolator(i) for i in interpolators}
+        for i in interpolation_accesses:
+            from pystencils.interpolation_astnodes import _InterpolationSymbol
 
-        substitutions = {i: to_texture_map[i.symbol.interpolator].at(
-            [o for o in i.offsets]) for i in interpolation_accesses}
-
-        try:
-            import pycuda.driver as cuda
-            for texture in substitutions.values():
-                if can_use_hw_interpolation(texture):
+            try:
+                import pycuda.driver as cuda
+                texture = TextureCachedField.from_interpolator(i.interpolator)
+                if can_use_hw_interpolation(i):
                     texture.filter_mode = cuda.filter_mode.LINEAR
                 else:
                     texture.filter_mode = cuda.filter_mode.POINT
                     texture.read_as_integer = True
-        except Exception:
-            pass
+            except Exception as e:
+                raise e
+            i.symbol = _InterpolationSymbol(str(texture), i.symbol.field, texture)
 
-        if isinstance(ast_node, AssignmentCollection):
-            ast_node = ast_node.subs(substitutions)
-        else:
-            ast_node.subs(substitutions)
+    # from pystencils.math_optimizations import ReplaceOptim, optimize_ast
 
-        # Update after replacements
-        interpolation_accesses = ast_node.atoms(InterpolatorAccess)
+    # ImplementInterpolationByStencils = ReplaceOptim(lambda e: isinstance(e, InterpolatorAccess)
+            # and not can_use_hw_interpolation(i),
+            # lambda e: e.implementation_with_stencils()
+            # )
 
+    # RemoveConjugate = ReplaceOptim(lambda e: isinstance(e, sp.conjugate),
+            # lambda e: e.args[0]
+            # )
     if vectorize:
         # TODO can be done in _interpolator_access_to_stencils field.absolute_access == simd_gather
         raise NotImplementedError()
diff --git a/pystencils_tests/test_address_of.py b/pystencils_tests/test_address_of.py
index b4c46c1c6045520e30826fe84ac91ae76339ad1b..717f6e43720a4ddddfca8c7a3fce3ff9e15982ff 100644
--- a/pystencils_tests/test_address_of.py
+++ b/pystencils_tests/test_address_of.py
@@ -17,16 +17,14 @@ def test_address_of():
     }, {})
 
     ast = pystencils.create_kernel(assignments)
-    code = pystencils.show_code(ast)
-    print(code)
+    pystencils.show_code(ast)
 
     assignments = pystencils.AssignmentCollection({
         y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64'))
     }, {})
 
     ast = pystencils.create_kernel(assignments)
-    code = pystencils.show_code(ast)
-    print(code)
+    pystencils.show_code(ast)
 
 
 def test_address_of_with_cse():
@@ -39,9 +37,8 @@ def test_address_of_with_cse():
     }, {})
 
     ast = pystencils.create_kernel(assignments)
-    code = pystencils.show_code(ast)
+    pystencils.show_code(ast)
     assignments_cse = sympy_cse(assignments)
 
     ast = pystencils.create_kernel(assignments_cse)
-    code = pystencils.show_code(ast)
-    print(code)
+    pystencils.show_code(ast)
diff --git a/pystencils_tests/test_assignment_collection.py b/pystencils_tests/test_assignment_collection.py
index 42e3791ab331cc111343bfe410bc59e3f52ba80b..a16d51db44bb722ee1a9da9a20ad4192d4f6188e 100644
--- a/pystencils_tests/test_assignment_collection.py
+++ b/pystencils_tests/test_assignment_collection.py
@@ -1,3 +1,4 @@
+import pytest
 import sympy as sp
 
 from pystencils import Assignment, AssignmentCollection
@@ -40,3 +41,39 @@ def test_free_and_defined_symbols():
 
     print(ac)
     print(ac.__repr__)
+
+
+def test_vector_assignments():
+    """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)"""
+
+    import pystencils as ps
+    import sympy as sp
+    a, b, c = sp.symbols("a b c")
+    assignments = ps.Assignment(sp.Matrix([a,b,c]), sp.Matrix([1,2,3]))
+    print(assignments)
+
+
+def test_wrong_vector_assignments():
+    """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)"""
+
+    import pystencils as ps
+    import sympy as sp
+    a, b = sp.symbols("a b")
+    
+    with pytest.raises(AssertionError,
+            match=r'Matrix(.*) and Matrix(.*) must have same length when performing vector assignment!'):
+        ps.Assignment(sp.Matrix([a,b]), sp.Matrix([1,2,3]))
+
+
+def test_vector_assignment_collection():
+    """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)"""
+
+    import pystencils as ps
+    import sympy as sp
+    a, b, c = sp.symbols("a b c")
+    y, x = sp.Matrix([a,b,c]), sp.Matrix([1,2,3])
+    assignments = ps.AssignmentCollection({y: x})
+    print(assignments)
+
+    assignments = ps.AssignmentCollection([ps.Assignment(y,x)])
+    print(assignments)
diff --git a/pystencils_tests/test_boundary.py b/pystencils_tests/test_boundary.py
index 096b1348fc59181e14eb15042d8db0098e71c521..23770c8ef6d61c15110f94875b0000c3e7d11fac 100644
--- a/pystencils_tests/test_boundary.py
+++ b/pystencils_tests/test_boundary.py
@@ -3,6 +3,8 @@ from tempfile import TemporaryDirectory
 
 import numpy as np
 
+import pytest
+
 from pystencils import Assignment, create_kernel
 from pystencils.boundaries import BoundaryHandling, Neumann, add_neumann_boundary
 from pystencils.datahandling import SerialDataHandling
@@ -83,5 +85,6 @@ def test_kernel_vs_copy_boundary():
     np.testing.assert_almost_equal(python_copy_result, handling_result)
 
     with TemporaryDirectory() as tmp_dir:
+        pytest.importorskip('pyevtk')
         boundary_handling.geometry_to_vtk(file_name=os.path.join(tmp_dir, 'test_output1'), ghost_layers=False)
         boundary_handling.geometry_to_vtk(file_name=os.path.join(tmp_dir, 'test_output2'), ghost_layers=True)
diff --git a/pystencils_tests/test_buffer_gpu.py b/pystencils_tests/test_buffer_gpu.py
index fc27a2331be1690b8bb1d0da043b47b3ba6fcfc9..f9ee96e9b19c147d95f006de4221980900375cb1 100644
--- a/pystencils_tests/test_buffer_gpu.py
+++ b/pystencils_tests/test_buffer_gpu.py
@@ -22,6 +22,7 @@ FIELD_SIZES = [(4, 3), (9, 3, 7)]
 
 
 def _generate_fields(dt=np.uint8, stencil_directions=1, layout='numpy'):
+    pytest.importorskip('pycuda')
     field_sizes = FIELD_SIZES
     if stencil_directions > 1:
         field_sizes = [s + (stencil_directions,) for s in field_sizes]
@@ -44,7 +45,6 @@ def _generate_fields(dt=np.uint8, stencil_directions=1, layout='numpy'):
     return fields
 
 
-@pytest.mark.gpu
 def test_full_scalar_field():
     """Tests fully (un)packing a scalar field (from)to a GPU buffer."""
     fields = _generate_fields()
@@ -73,7 +73,6 @@ def test_full_scalar_field():
         np.testing.assert_equal(src_arr, dst_arr)
 
 
-@pytest.mark.gpu
 def test_field_slice():
     """Tests (un)packing slices of a scalar field (from)to a buffer."""
     fields = _generate_fields()
@@ -109,7 +108,6 @@ def test_field_slice():
             np.testing.assert_equal(src_arr[pack_slice], dst_arr[unpack_slice])
 
 
-@pytest.mark.gpu
 def test_all_cell_values():
     """Tests (un)packing all cell values of the a field (from)to a buffer."""
     num_cell_values = 7
@@ -148,7 +146,6 @@ def test_all_cell_values():
         np.testing.assert_equal(src_arr, dst_arr)
 
 
-@pytest.mark.gpu
 def test_subset_cell_values():
     """Tests (un)packing a subset of cell values of the a field (from)to a buffer."""
     num_cell_values = 7
@@ -190,7 +187,6 @@ def test_subset_cell_values():
         np.testing.assert_equal(dst_arr, mask_arr.filled(int(0)))
 
 
-@pytest.mark.gpu
 def test_field_layouts():
     num_cell_values = 7
     for layout_str in ['numpy', 'fzyx', 'zyxf', 'reverse_numpy']:
diff --git a/pystencils_tests/test_complex_numbers.py b/pystencils_tests/test_complex_numbers.py
index 9f9164640f50e417348ee635af620dd891249d77..41dc76a40411129c4ee68f0ee42028b57875d71e 100644
--- a/pystencils_tests/test_complex_numbers.py
+++ b/pystencils_tests/test_complex_numbers.py
@@ -52,11 +52,14 @@ def test_complex_numbers(assignment, scalar_dtypes, target):
     ast = pystencils.create_kernel(assignment,
                                    target=target,
                                    data_type=scalar_dtypes)
-    code = str(pystencils.show_code(ast))
+    code = pystencils.get_code_str(ast)
 
     print(code)
     assert "Not supported" not in code
 
+    if target == 'gpu':
+        pytest.importorskip('pycuda')
+
     kernel = ast.compile()
     assert kernel is not None
 
@@ -95,11 +98,14 @@ def test_complex_numbers_64(assignment, target):
     ast = pystencils.create_kernel(assignment,
                                    target=target,
                                    data_type='double')
-    code = str(pystencils.show_code(ast))
+    code = pystencils.get_code_str(ast)
 
     print(code)
     assert "Not supported" not in code
 
+    if target == 'gpu':
+        pytest.importorskip('pycuda')
+
     kernel = ast.compile()
     assert kernel is not None
 
@@ -125,6 +131,7 @@ def test_complex_execution(dtype, target, with_complex_argument):
     })
 
     if target == 'gpu':
+        pytest.importorskip('pycuda')
         from pycuda.gpuarray import zeros
         x_arr = zeros((20, 30), complex_dtype)
         y_arr = zeros((20, 30), complex_dtype)
diff --git a/pystencils_tests/test_conditional_field_access.py b/pystencils_tests/test_conditional_field_access.py
index f68b34679ae5de0996a09a30921c85ec49bece58..a4bd53228476ea49f977e08f71acfd1d596231fe 100644
--- a/pystencils_tests/test_conditional_field_access.py
+++ b/pystencils_tests/test_conditional_field_access.py
@@ -63,7 +63,7 @@ def test_boundary_check(with_cse):
 
     print(assignments)
     kernel_checked = ps.create_kernel(assignments, ghost_layers=0).compile()
-    print(ps.show_code(kernel_checked))
+    ps.show_code(kernel_checked)
 
     # No SEGFAULT, please!!
     kernel_checked(f=f_arr, g=g_arr)
diff --git a/pystencils_tests/test_cuda_known_functions.py b/pystencils_tests/test_cuda_known_functions.py
index fe33b98df27cc6a6c2ec77eda49e704b9afbec5e..dcdabeaef6934e06b289351127b168d77d803be9 100644
--- a/pystencils_tests/test_cuda_known_functions.py
+++ b/pystencils_tests/test_cuda_known_functions.py
@@ -1,5 +1,7 @@
 import sympy
 
+import pytest
+
 import pystencils
 from pystencils.astnodes import get_dummy_symbol
 from pystencils.backends.cuda_backend import CudaSympyPrinter
@@ -18,7 +20,8 @@ def test_cuda_known_functions():
     })
 
     ast = pystencils.create_kernel(assignments, 'gpu')
-    print(pystencils.show_code(ast))
+    pytest.importorskip('pycuda')
+    pystencils.show_code(ast)
     kernel = ast.compile()
     assert(kernel is not None)
 
@@ -32,7 +35,7 @@ def test_cuda_but_not_c():
     })
 
     ast = pystencils.create_kernel(assignments, 'cpu')
-    print(pystencils.show_code(ast))
+    pystencils.show_code(ast)
 
 
 def test_cuda_unknown():
@@ -43,5 +46,4 @@ def test_cuda_unknown():
     })
 
     ast = pystencils.create_kernel(assignments, 'gpu')
-    code = str(pystencils.show_code(ast))
-    print(code)
+    pystencils.show_code(ast)
diff --git a/pystencils_tests/test_custom_backends.py b/pystencils_tests/test_custom_backends.py
index 1ff775d41355b79c1714a935125f27c261045011..6500076ce278bff4fc2a81f65c55553bd9523d78 100644
--- a/pystencils_tests/test_custom_backends.py
+++ b/pystencils_tests/test_custom_backends.py
@@ -1,9 +1,9 @@
 from subprocess import CalledProcessError
 
-import pycuda.driver
 import pytest
 import sympy
 
+import pycuda.driver
 import pystencils
 import pystencils.cpu.cpujit
 import pystencils.gpucuda.cudajit
@@ -25,18 +25,28 @@ class ScreamingGpuBackend(CudaBackend):
         return normal_code.upper()
 
 
-def test_custom_backends():
+def test_custom_backends_cpu():
     z, x, y = pystencils.fields("z, y, x: [2d]")
 
     normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
         z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], [])
 
     ast = pystencils.create_kernel(normal_assignments, target='cpu')
-    print(pystencils.show_code(ast, ScreamingBackend()))
+    pystencils.show_code(ast, ScreamingBackend())
     with pytest.raises(CalledProcessError):
         pystencils.cpu.cpujit.make_python_function(ast, custom_backend=ScreamingBackend())
 
+
+def test_custom_backends_gpu():
+    pytest.importorskip('pycuda')
+    import pycuda.driver
+
+    z, x, y = pystencils.fields("z, y, x: [2d]")
+
+    normal_assignments = pystencils.AssignmentCollection([pystencils.Assignment(
+        z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))], [])
+
     ast = pystencils.create_kernel(normal_assignments, target='gpu')
-    print(pystencils.show_code(ast, ScreamingGpuBackend()))
+    pystencils.show_code(ast, ScreamingGpuBackend())
     with pytest.raises(pycuda.driver.CompileError):
         pystencils.gpucuda.cudajit.make_python_function(ast, custom_backend=ScreamingGpuBackend())
diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py
index 1b79fdbec5aa8bbbfb90450d2014d058065e998b..7f95fe1f04d1bbf72770d2837bf6723681c1f50d 100644
--- a/pystencils_tests/test_datahandling.py
+++ b/pystencils_tests/test_datahandling.py
@@ -128,6 +128,7 @@ def kernel_execution_jacobi(dh, target):
 
 
 def vtk_output(dh):
+    pytest.importorskip('pyevtk')
     dh.add_array('scalar_field')
     dh.add_array('vector_field', values_per_cell=dh.dim)
     dh.add_array('multiple_scalar_field', values_per_cell=9)
@@ -223,6 +224,7 @@ def test_kernel_param(target):
 
 
 def test_vtk_output():
+    pytest.importorskip('pyevtk')
     for domain_shape in [(4, 5), (3, 4, 5)]:
         dh = create_data_handling(domain_size=domain_shape, periodicity=True)
         vtk_output(dh)
diff --git a/pystencils_tests/test_fast_approximation.py b/pystencils_tests/test_fast_approximation.py
index 76bef174eb7451234dcf347818fdb2d456ce7005..6c9539f64ee2a911123a8fd9f10845a3c67dcbea 100644
--- a/pystencils_tests/test_fast_approximation.py
+++ b/pystencils_tests/test_fast_approximation.py
@@ -12,7 +12,7 @@ def test_fast_sqrt():
     assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1
     assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1
     ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
-    code_str = str(ps.show_code(ast))
+    code_str = ps.get_code_str(ast)
     assert '__fsqrt_rn' in code_str
 
     expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
@@ -21,7 +21,7 @@ def test_fast_sqrt():
     ac = ps.AssignmentCollection([expr], [])
     assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1
     ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
-    code_str = str(ps.show_code(ast))
+    code_str = ps.get_code_str(ast)
     assert '__frsqrt_rn' in code_str
 
 
@@ -34,5 +34,5 @@ def test_fast_divisions():
     assert len(insert_fast_divisions(expr).atoms(fast_division)) == 1
 
     ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_divisions(expr)), target='gpu')
-    code_str = str(ps.show_code(ast))
+    code_str = ps.get_code_str(ast)
     assert '__fdividef' in code_str
diff --git a/pystencils_tests/test_field.py b/pystencils_tests/test_field.py
index a2813e34f9c72c8bbcd848d21993c181cf31fc43..253ca9f26547d78e6b23b2c6dbc582196b49960d 100644
--- a/pystencils_tests/test_field.py
+++ b/pystencils_tests/test_field.py
@@ -44,7 +44,7 @@ def test_error_handling():
         Field.create_generic('f', spatial_dimensions=2, index_dimensions=1, dtype=struct_dtype)
     assert 'index dimension' in str(e.value)
 
-    arr = np.array([[1, 2.0, 3], [1, 2.0, 3]], dtype=struct_dtype)
+    arr = np.array([[[(1,)*3, (2,)*3, (3,)*3]]*2], dtype=struct_dtype)
     Field.create_from_numpy_array('f', arr, index_dimensions=0)
     with pytest.raises(ValueError) as e:
         Field.create_from_numpy_array('f', arr, index_dimensions=1)
diff --git a/pystencils_tests/test_helpful_errors.py b/pystencils_tests/test_helpful_errors.py
new file mode 100644
index 0000000000000000000000000000000000000000..a13178afebd424c0f3e5e3355f8b0ecf97eab870
--- /dev/null
+++ b/pystencils_tests/test_helpful_errors.py
@@ -0,0 +1,37 @@
+"""
+
+"""
+
+import pytest
+
+from pystencils.astnodes import Block
+from pystencils.backends.cbackend import CustomCodeNode, get_headers
+
+
+def test_headers_have_quotes_or_brackets():
+    class ErrorNode1(CustomCodeNode):
+
+        def __init__(self):
+            super().__init__("", [], [])
+            self.headers = ["iostream"]
+
+    class ErrorNode2(CustomCodeNode):
+        headers = ["<iostream>", "foo"]
+
+        def __init__(self):
+            super().__init__("", [], [])
+            self.headers = ["<iostream>", "foo"]
+
+    class OkNode3(CustomCodeNode):
+
+        def __init__(self):
+            super().__init__("", [], [])
+            self.headers = ["<iostream>", '"foo"']
+
+    with pytest.raises(AssertionError, match='.* does not follow the pattern .*'):
+        get_headers(Block([ErrorNode1()]))
+
+    with pytest.raises(AssertionError, match='.* does not follow the pattern .*'):
+        get_headers(ErrorNode2())
+
+    get_headers(OkNode3())
diff --git a/pystencils_tests/test_interpolation.py b/pystencils_tests/test_interpolation.py
index dd4ee06f4ba9197111a04af572df6cdaf2695ab5..477765bb31289dbfdc48927e3cc55f10d49f16a0 100644
--- a/pystencils_tests/test_interpolation.py
+++ b/pystencils_tests/test_interpolation.py
@@ -11,8 +11,6 @@ import itertools
 from os.path import dirname, join
 
 import numpy as np
-import pycuda.autoinit  # NOQA
-import pycuda.gpuarray as gpuarray
 import pytest
 import sympy
 
@@ -83,9 +81,9 @@ def test_scale_interpolation():
                          ['border',
                           'clamp',
                           pytest.param('warp', marks=pytest.mark.xfail(
-                              reason="Fails on newer SymPy version due to complex conjugate()")),
+                              reason="requires interpolation-refactoring branch")),
                           pytest.param('mirror', marks=pytest.mark.xfail(
-                              reason="Fails on newer SymPy version due to complex conjugate()")),
+                              reason="requires interpolation-refactoring branch")),
                           ])
 def test_rotate_interpolation(address_mode):
     """
@@ -110,87 +108,90 @@ def test_rotate_interpolation(address_mode):
     pyconrad.imshow(out, "out " + address_mode)
 
 
-@pytest.mark.parametrize('address_mode', ['border', 'wrap', 'clamp', 'mirror'])
-def test_rotate_interpolation_gpu(address_mode):
+@pytest.mark.parametrize('dtype', (np.int32, np.float32, np.float64))
+@pytest.mark.parametrize('address_mode', ('border', 'wrap', 'clamp', 'mirror'))
+@pytest.mark.parametrize('use_textures', ('use_textures', False))
+def test_rotate_interpolation_gpu(dtype, address_mode, use_textures):
+    pytest.importorskip('pycuda')
 
+    import pycuda.gpuarray as gpuarray
+    import pycuda.autoinit  # noqa
     rotation_angle = sympy.pi / 5
     scale = 1
 
-    previous_result = None
-    for dtype in [np.int32, np.float32, np.float64]:
-        if dtype == np.int32:
-            lenna_gpu = gpuarray.to_gpu(
-                np.ascontiguousarray(lenna * 255, dtype))
-        else:
-            lenna_gpu = gpuarray.to_gpu(
-                np.ascontiguousarray(lenna, dtype))
-        for use_textures in [True, False]:
-            x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
-
-            transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * \
-                sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2])
-            assignments = pystencils.AssignmentCollection({
-                y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
-            })
-            print(assignments)
-            ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
-            print(ast)
-            print(pystencils.show_code(ast))
-            kernel = ast.compile()
-
-            out = gpuarray.zeros_like(lenna_gpu)
-            kernel(x=lenna_gpu, y=out)
-            pyconrad.imshow(out,
-                            f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
-            skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif",
-                              np.ascontiguousarray(out.get(), np.float32))
-            if previous_result is not None:
-                try:
-                    assert np.allclose(previous_result[4:-4, 4:-4], out.get()[4:-4, 4:-4], rtol=100, atol=1e-3)
-                except AssertionError:  # NOQA
-                    print("Max error: %f" % np.max(previous_result - out.get()))
-                    # pyconrad.imshow(previous_result - out.get(), "Difference image")
-                    # raise e
-            previous_result = out.get()
-
-
-@pytest.mark.parametrize('address_mode', ['border', 'wrap', 'clamp', 'mirror'])
-def test_shift_interpolation_gpu(address_mode):
+    if dtype == np.int32:
+        lenna_gpu = gpuarray.to_gpu(
+            np.ascontiguousarray(lenna * 255, dtype))
+    else:
+        lenna_gpu = gpuarray.to_gpu(
+            np.ascontiguousarray(lenna, dtype))
+    x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
+
+    transformed = scale * \
+        sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) - sympy.Matrix([2, 2])
+    assignments = pystencils.AssignmentCollection({
+        y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
+    })
+    print(assignments)
+    ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
+    print(ast)
+    print(pystencils.show_code(ast))
+    kernel = ast.compile()
+
+    out = gpuarray.zeros_like(lenna_gpu)
+    kernel(x=lenna_gpu, y=out)
+    pyconrad.imshow(out,
+                    f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
+
+
+@pytest.mark.parametrize('address_mode', ['border', 'wrap',
+                                          pytest.param('warp', marks=pytest.mark.xfail(
+                                              reason="% printed as fmod on old sympy")),
+                                          pytest.param('mirror', marks=pytest.mark.xfail(
+                                              reason="% printed as fmod on old sympy")),
+                                          ])
+@pytest.mark.parametrize('dtype', [np.float64, np.float32, np.int32])
+@pytest.mark.parametrize('use_textures', ('use_textures', False,))
+def test_shift_interpolation_gpu(address_mode, dtype, use_textures):
+    sver = sympy.__version__.split(".")
+    if (int(sver[0]) == 1 and int(sver[1]) < 2) and address_mode in ['mirror', 'warp']:
+        pytest.skip()
+    pytest.importorskip('pycuda')
+
+    import pycuda.gpuarray as gpuarray
+    import pycuda.autoinit  # noqa
 
     rotation_angle = 0  # sympy.pi / 5
     scale = 1
     # shift = - sympy.Matrix([1.5, 1.5])
     shift = sympy.Matrix((0.0, 0.0))
 
-    for dtype in [np.float64, np.float32, np.int32]:
-        if dtype == np.int32:
-            lenna_gpu = gpuarray.to_gpu(
-                np.ascontiguousarray(lenna * 255, dtype))
-        else:
-            lenna_gpu = gpuarray.to_gpu(
-                np.ascontiguousarray(lenna, dtype))
-        for use_textures in [True, False]:
-            x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
-
-            if use_textures:
-                transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
-            else:
-                transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
-            assignments = pystencils.AssignmentCollection({
-                y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
-            })
-            # print(assignments)
-            ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
-            # print(ast)
-            print(pystencils.show_code(ast))
-            kernel = ast.compile()
-
-            out = gpuarray.zeros_like(lenna_gpu)
-            kernel(x=lenna_gpu, y=out)
-            pyconrad.imshow(out,
-                            f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
-            skimage.io.imsave(f"/tmp/out {address_mode} texture:{use_textures} {type_map[dtype]}.tif",
-                              np.ascontiguousarray(out.get(), np.float32))
+    if dtype == np.int32:
+        lenna_gpu = gpuarray.to_gpu(
+            np.ascontiguousarray(lenna * 255, dtype))
+    else:
+        lenna_gpu = gpuarray.to_gpu(
+            np.ascontiguousarray(lenna, dtype))
+
+    x_f, y_f = pystencils.fields('x,y: %s [2d]' % type_map[dtype], ghost_layers=0)
+
+    if use_textures:
+        transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
+    else:
+        transformed = scale * sympy.rot_axis3(rotation_angle)[:2, :2] * sympy.Matrix((x_, y_)) + shift
+    assignments = pystencils.AssignmentCollection({
+        y_f.center(): LinearInterpolator(x_f, address_mode=address_mode).at(transformed)
+    })
+    # print(assignments)
+    ast = pystencils.create_kernel(assignments, target='gpu', use_textures_for_interpolation=use_textures)
+    # print(ast)
+    print(pystencils.show_code(ast))
+    kernel = ast.compile()
+
+    out = gpuarray.zeros_like(lenna_gpu)
+    kernel(x=lenna_gpu, y=out)
+    pyconrad.imshow(out,
+                    f"out {address_mode} texture:{use_textures} {type_map[dtype]}")
 
 
 @pytest.mark.parametrize('address_mode', ['border', 'clamp'])
@@ -218,7 +219,7 @@ def test_rotate_interpolation_size_change(address_mode):
 
 
 @pytest.mark.parametrize('address_mode, target',
-                         itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu', 'gpu']))
+                         itertools.product(['border', 'wrap', 'clamp', 'mirror'], ['cpu']))
 def test_field_interpolated(address_mode, target):
     x_f, y_f = pystencils.fields('x,y: float64 [2d]')
 
@@ -226,7 +227,7 @@ def test_field_interpolated(address_mode, target):
         y_f.center(): x_f.interpolated_access([0.5 * x_ + 2.7, 0.25 * y_ + 7.2], address_mode=address_mode)
     })
     print(assignments)
-    ast = pystencils.create_kernel(assignments)
+    ast = pystencils.create_kernel(assignments, target=target)
     print(ast)
     print(pystencils.show_code(ast))
     kernel = ast.compile()
@@ -234,5 +235,11 @@ def test_field_interpolated(address_mode, target):
     out = np.zeros_like(lenna)
     kernel(x=lenna, y=out)
     pyconrad.imshow(out, "out " + address_mode)
-    kernel(x=lenna, y=out)
-    pyconrad.imshow(out, "out " + address_mode)
+
+
+def test_spatial_derivative():
+    x, y = pystencils.fields('x, y:  float32[2d]')
+    tx, ty = pystencils.fields('t_x, t_y: float32[2d]')
+
+    diff = sympy.diff(x.interpolated_access((tx.center, ty.center)), tx.center)
+    print("diff: " + str(diff))
diff --git a/pystencils_tests/test_jacobi_cbackend.py b/pystencils_tests/test_jacobi_cbackend.py
index 9206aeedaf4bb035444f9280cf58968c9ca385c9..6d86ecb05f920f7a6c54b018ea5dd3508172b727 100644
--- a/pystencils_tests/test_jacobi_cbackend.py
+++ b/pystencils_tests/test_jacobi_cbackend.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from pystencils import show_code
+from pystencils import get_code_obj
 from pystencils.astnodes import Block, KernelFunction, SympyAssignment
 from pystencils.cpu import make_python_function
 from pystencils.field import Field
@@ -36,7 +36,7 @@ def test_jacobi_fixed_field_size():
     error = np.sum(np.abs(dst_field_py - dst_field_c))
     np.testing.assert_allclose(error, 0.0, atol=1e-13)
 
-    code_display = show_code(ast_node)
+    code_display = get_code_obj(ast_node)
     assert 'for' in str(code_display)
     assert 'for' in code_display._repr_html_()
 
diff --git a/pystencils_tests/test_jacobi_llvm.py b/pystencils_tests/test_jacobi_llvm.py
index 13b22aa4b0235bb98b4789e3b9266bf672985111..95bd52d2edd706fc3e8b6c595c7ad9855ae5e4f6 100644
--- a/pystencils_tests/test_jacobi_llvm.py
+++ b/pystencils_tests/test_jacobi_llvm.py
@@ -52,7 +52,7 @@ def test_jacobi_fixed_field_size_gpu():
 
     jacobi = Assignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
     ast = create_kernel([jacobi], target='gpu')
-    print(show_code(ast))
+    show_code(ast)
 
     for x in range(1, size[0] - 1):
         for y in range(1, size[1] - 1):
diff --git a/pystencils_tests/test_loop_cutting.py b/pystencils_tests/test_loop_cutting.py
index cd89f37f6f365b4223e1463db68874f50e81c46d..0291074b87c051bcf381883e474078c663274ac1 100644
--- a/pystencils_tests/test_loop_cutting.py
+++ b/pystencils_tests/test_loop_cutting.py
@@ -1,6 +1,8 @@
 import numpy as np
 import sympy as sp
 
+import pytest
+
 import pystencils as ps
 import pystencils.astnodes as ast
 from pystencils.field import Field, FieldType
@@ -57,6 +59,7 @@ def test_staggered_iteration():
                                sum(f[o] for o in offsets_in_plane(d, -1, dim)))
         assignments = [ps.Assignment(s.staggered_access(d), expressions[i]) for i, d in enumerate(s.staggered_stencil)]
         func_optimized = create_staggered_kernel(assignments).compile()
+        pytest.importorskip('islpy')
         assert not func_optimized.ast.atoms(Conditional), "Loop cutting optimization did not work"
 
         func(f=f_arr, s=s_arr_ref)
@@ -99,6 +102,7 @@ def test_staggered_iteration_manual():
     move_constants_before_loop(kernel_ast.body)
     cleanup_blocks(kernel_ast.body)
 
+    pytest.importorskip('islpy')
     assert not kernel_ast.atoms(Conditional), "Loop cutting optimization did not work"
 
     func_optimized = make_python_function(kernel_ast)
diff --git a/pystencils_tests/test_opencl.py b/pystencils_tests/test_opencl.py
index ea5b1ecbc83f21672731b2ecbcb54c78faa1603b..6c013cefed7387827888385feeb11352b96cc8e6 100644
--- a/pystencils_tests/test_opencl.py
+++ b/pystencils_tests/test_opencl.py
@@ -5,11 +5,13 @@ import sympy as sp
 import pystencils
 from pystencils.backends.cuda_backend import CudaBackend
 from pystencils.backends.opencl_backend import OpenClBackend
-from pystencils.opencl.opencljit import get_global_cl_queue, init_globally, make_python_function
+from pystencils.opencl.opencljit import get_global_cl_queue, make_python_function
 
 try:
     import pyopencl as cl
     HAS_OPENCL = True
+    import pystencils.opencl.autoinit
+
 except Exception:
     HAS_OPENCL = False
 
@@ -27,10 +29,9 @@ def test_print_opencl():
 
     print(ast)
 
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
+    pystencils.show_code(ast, custom_backend=CudaBackend())
 
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
+    opencl_code = pystencils.get_code_str(ast, custom_backend=OpenClBackend())
     print(opencl_code)
 
     assert "__global double * RESTRICT const _data_x" in str(opencl_code)
@@ -108,10 +109,9 @@ def test_opencl_jit():
 
     print(ast)
 
-    code = pystencils.show_code(ast, custom_backend=CudaBackend())
-    print(code)
-    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
-    print(opencl_code)
+    pystencils.show_code(ast, custom_backend=CudaBackend())
+
+    pystencils.show_code(ast, custom_backend=OpenClBackend())
 
     cuda_kernel = ast.compile()
     assert cuda_kernel is not None
@@ -246,14 +246,12 @@ def test_kernel_creation():
 
     print(assignments)
 
-    pystencils.opencl.clear_global_ctx()
-
     import pystencils.opencl.autoinit
     ast = pystencils.create_kernel(assignments, target='opencl')
 
     print(ast.backend)
 
-    code = str(pystencils.show_code(ast))
+    code = pystencils.get_code_str(ast)
     print(code)
     assert 'get_local_size' in code
 
diff --git a/pystencils_tests/test_phasefield_dentritic_3D.ipynb b/pystencils_tests/test_phasefield_dentritic_3D.ipynb
index aebdeefdddf53e9736feb7d8673c09bc5d8fa6d0..1a01f103891fb229385d8192f263fb53dd837a78 100644
--- a/pystencils_tests/test_phasefield_dentritic_3D.ipynb
+++ b/pystencils_tests/test_phasefield_dentritic_3D.ipynb
@@ -1,5 +1,15 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pytest\n",
+    "pytest.importorskip('pycuda')"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
diff --git a/pystencils_tests/test_plot.py b/pystencils_tests/test_plot.py
index 6234334bb91352c80c643c474c76c15b968ce1c7..4c9207216266e48d4f2b0b14db1a5e160732c5cb 100644
--- a/pystencils_tests/test_plot.py
+++ b/pystencils_tests/test_plot.py
@@ -1,5 +1,8 @@
 import os
 from tempfile import TemporaryDirectory
+import shutil
+
+import pytest
 
 import numpy as np
 
@@ -20,6 +23,7 @@ def example_vector_field(t=0, shape=(40, 40)):
     return result
 
 
+@pytest.mark.skipif(shutil.which('ffmpeg') is None, reason="ffmpeg not available")
 def test_animation():
     t = 0
 
diff --git a/pystencils_tests/test_print_infinity.py b/pystencils_tests/test_print_infinity.py
index 4b1e488822163e684dcad4009fca1b72c7d87d2c..9e2dbd29b18389265b206513ec6c4625e0722a10 100644
--- a/pystencils_tests/test_print_infinity.py
+++ b/pystencils_tests/test_print_infinity.py
@@ -17,6 +17,9 @@ def test_print_infinity(type, negative, target):
         assignment = pystencils.Assignment(x.center, oo)
     ast = pystencils.create_kernel(assignment, data_type=type, target=target)
 
+    if target == 'gpu':
+        pytest.importorskip('pycuda')
+
     ast.compile()
 
     print(ast.compile().code)
diff --git a/pystencils_tests/test_random.py b/pystencils_tests/test_random.py
index 473aa3d1215449f51e70aa3b09f2c605ed13c313..1b3f89f2f81aa6e079f7bc030f27da74e778dd13 100644
--- a/pystencils_tests/test_random.py
+++ b/pystencils_tests/test_random.py
@@ -1,5 +1,7 @@
 import numpy as np
 
+import pytest
+
 import pystencils as ps
 from pystencils.rng import PhiloxFourFloats, PhiloxTwoDoubles, AESNIFourFloats, AESNITwoDoubles
 
@@ -12,6 +14,9 @@ philox_reference = np.array([[[3576608082, 1252663339, 1987745383,  348040302],
 
 def test_philox_double():
     for target in ('cpu', 'gpu'):
+        if target == 'gpu':
+            pytest.importorskip('pycuda')
+
         dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
         f = dh.add_array("f", values_per_cell=2)
 
@@ -39,6 +44,9 @@ def test_philox_double():
 
 def test_philox_float():
     for target in ('cpu', 'gpu'):
+        if target == 'gpu':
+            pytest.importorskip('pycuda')
+
         dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target)
         f = dh.add_array("f", values_per_cell=4)
 
diff --git a/pystencils_tests/test_source_code_comment.py b/pystencils_tests/test_source_code_comment.py
index 6cc66feb6f7f92174029e274feee22dfc4455b8d..16e6e5a9647843db4483b951aa631b591240dac5 100644
--- a/pystencils_tests/test_source_code_comment.py
+++ b/pystencils_tests/test_source_code_comment.py
@@ -27,4 +27,5 @@ def test_source_code_comment():
     print(ast)
     compiled = ast.compile()
     assert compiled is not None
-    print(pystencils.show_code(ast))
+
+    pystencils.show_code(ast)
diff --git a/pystencils_tests/test_staggered_kernel.py b/pystencils_tests/test_staggered_kernel.py
index 310220d21442dc1d1a24b0e74e50f3e068374b67..c4f491393d2b4d08fbb9b4c99356abdc7e7199b6 100644
--- a/pystencils_tests/test_staggered_kernel.py
+++ b/pystencils_tests/test_staggered_kernel.py
@@ -1,6 +1,8 @@
 import numpy as np
 import sympy as sp
 
+import pytest
+
 import pystencils as ps
 
 
@@ -88,6 +90,7 @@ def test_staggered_subexpressions():
 
 
 def test_staggered_loop_cutting():
+    pytest.importorskip('islpy')
     dh = ps.create_data_handling((4, 4), periodicity=True, default_target='cpu')
     j = dh.add_array('j', values_per_cell=4, field_type=ps.FieldType.STAGGERED)
     assignments = [ps.Assignment(j.staggered_access("SW"), 1)]
diff --git a/pystencils_tests/test_sum_prod.py b/pystencils_tests/test_sum_prod.py
index 4fa5c0618612b013edda4d164dd035dafdd2438a..4b4cd7131ad4b860c754865cd35f3960399e9b5a 100644
--- a/pystencils_tests/test_sum_prod.py
+++ b/pystencils_tests/test_sum_prod.py
@@ -30,7 +30,7 @@ def test_sum():
     })
 
     ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.show_code(ast))
+    code = str(pystencils.get_code_obj(ast))
     kernel = ast.compile()
 
     print(code)
@@ -58,11 +58,11 @@ def test_sum_use_float():
     })
 
     ast = pystencils.create_kernel(assignments, data_type=create_type('float32'))
-    code = str(pystencils.show_code(ast))
+    code = str(pystencils.get_code_obj(ast))
     kernel = ast.compile()
 
     print(code)
-    print(pystencils.show_code(ast))
+    print(pystencils.get_code_obj(ast))
     assert 'float sum' in code
 
     array = np.zeros((10,), np.float32)
@@ -89,7 +89,7 @@ def test_product():
     })
 
     ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.show_code(ast))
+    code = pystencils.get_code_str(ast)
     kernel = ast.compile()
 
     print(code)
@@ -120,11 +120,9 @@ def test_prod_var_limit():
     })
 
     ast = pystencils.create_kernel(assignments)
-    code = str(pystencils.show_code(ast))
+    pystencils.show_code(ast)
     kernel = ast.compile()
 
-    print(code)
-
     array = np.zeros((10,), np.int64)
 
     kernel(x=array, limit=100)
diff --git a/pystencils_tests/test_sympy_optimizations.py b/pystencils_tests/test_sympy_optimizations.py
index 745b936ea7d0e3ce5da6c684dee42c56c09dd835..b22ad9d7fb07c84b88388455b45e76f6b5decc3a 100644
--- a/pystencils_tests/test_sympy_optimizations.py
+++ b/pystencils_tests/test_sympy_optimizations.py
@@ -16,9 +16,10 @@ def test_sympy_optimizations():
         })
 
         assignments = optimize_assignments(assignments, optims_pystencils_cpu)
+        print(assignments)
 
         ast = pystencils.create_kernel(assignments, target=target)
-        code = str(pystencils.show_code(ast))
+        code = pystencils.get_code_str(ast)
         assert 'expm1(' in code
 
 
@@ -35,7 +36,7 @@ def test_evaluate_constant_terms():
         assignments = optimize_assignments(assignments, optims_pystencils_cpu)
 
         ast = pystencils.create_kernel(assignments, target=target)
-        code = str(pystencils.show_code(ast))
+        code = pystencils.get_code_str(ast)
         assert 'cos(' not in code
         print(code)
 
@@ -55,6 +56,6 @@ def test_do_not_evaluate_constant_terms():
         optimize_assignments(assignments, optimizations)
 
         ast = pystencils.create_kernel(assignments, target=target)
-        code = str(pystencils.show_code(ast))
+        code = pystencils.get_code_str(ast)
         assert 'cos(' in code
         print(code)
diff --git a/pytest.ini b/pytest.ini
index 4e9f5caddfef20639da974ad9327e53fa8b6ced4..0070425966626449378fcd1ff9eabe468256a112 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,7 +3,6 @@ python_files = test_*.py *_test.py scenario_*.py
 norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
 addopts = --doctest-modules --durations=20  --cov-config pytest.ini
 markers =
-       gpu: test that require a gpu
        kerncraft: tests depending on kerncraft
 
 [run]
diff --git a/setup.py b/setup.py
index 83a76f870f02f64d8a371f294c1224c15c919061..8eeeff7b2e4a324feb7dc31fc9b8605254ae2da0 100644
--- a/setup.py
+++ b/setup.py
@@ -112,7 +112,7 @@ setup(name='pystencils',
           'opencl': ['pyopencl'],
           'alltrafos': ['islpy', 'py-cpuinfo'],
           'bench_db': ['blitzdb', 'pymongo', 'pandas'],
-          'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk'],
+          'interactive': ['matplotlib', 'ipy_table', 'imageio', 'jupyter', 'pyevtk', 'rich'],
           'autodiff': ['pystencils-autodiff'],
           'doc': ['sphinx', 'sphinx_rtd_theme', 'nbsphinx',
                   'sphinxcontrib-bibtex', 'sphinx_autodoc_typehints', 'pandoc'],