diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 94f3477290ecf79fcd8a095389a752d1505d67e2..aca5e5da1e36774dbd7b69b1a0f0f57012bc4dd6 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -298,7 +298,7 @@ class CBackend:
         return node.get_code(self._dialect, self._vector_instruction_set)
 
     def _print_SourceCodeComment(self, node):
-        return "/* " + node.text + " */"
+        return f"/* {node.text } */"
 
     def _print_EmptyLine(self, node):
         return ""
@@ -316,7 +316,7 @@ class CBackend:
         result = f"if ({condition_expr})\n{true_block} "
         if node.false_block:
             false_block = self._print_Block(node.false_block)
-            result += "else " + false_block
+            result += f"else {false_block}"
         return result
 
 
@@ -336,7 +336,7 @@ class CustomSympyPrinter(CCodePrinter):
             return self._typed_number(expr.evalf(), get_type_of_expression(expr))
 
         if expr.exp.is_integer and expr.exp.is_number and 0 < expr.exp < 8:
-            return "(" + self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False)) + ")"
+            return f"({self._print(sp.Mul(*[expr.base] * expr.exp, evaluate=False))})"
         elif expr.exp.is_integer and expr.exp.is_number and - 8 < expr.exp < 0:
             return f"1 / ({self._print(sp.Mul(*([expr.base] * -expr.exp), evaluate=False))})"
         else:
@@ -589,9 +589,6 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
             result = self.instruction_set['&'].format(result, item)
         return result
 
-    def _print_Max(self, expr):
-        return "test"
-
     def _print_Or(self, expr):
         result = self._scalarFallback('_print_Or', expr)
         if result:
diff --git a/pystencils/backends/cuda_backend.py b/pystencils/backends/cuda_backend.py
index b43cfb4ededa3ea5c84ff2eef9d434d602f358b2..2d7dc579e932aee814e5207801971246122a9a88 100644
--- a/pystencils/backends/cuda_backend.py
+++ b/pystencils/backends/cuda_backend.py
@@ -104,7 +104,6 @@ class CudaSympyPrinter(CustomSympyPrinter):
             assert len(expr.args) == 1, f"__fsqrt_rn has one argument, but {len(expr.args)} where given"
             return f"__fsqrt_rn({self._print(expr.args[0])})"
         elif isinstance(expr, fast_inv_sqrt):
-            print(len(expr.args) == 1)
             assert len(expr.args) == 1, f"__frsqrt_rn has one argument, but {len(expr.args)} where given"
             return f"__frsqrt_rn({self._print(expr.args[0])})"
         return super()._print_Function(expr)
diff --git a/pystencils/datahandling/datahandling_interface.py b/pystencils/datahandling/datahandling_interface.py
index 272f1660ce1c2a6623fb1bb069b37b16d68042f3..0eb101815ec39788cb2b1b9e720f2628bf087edf 100644
--- a/pystencils/datahandling/datahandling_interface.py
+++ b/pystencils/datahandling/datahandling_interface.py
@@ -86,6 +86,13 @@ class DataHandling(ABC):
         Args:
             description (str): String description of the fields to add
             dtype: data type of the array as numpy data type
+            ghost_layers: number of ghost layers - if not specified a default value specified in the constructor
+                         is used
+            layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
+                    this is only important if values_per_cell > 1
+            cpu: allocate field on the CPU
+            gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
+            alignment: either False for no alignment, or the number of bytes to align to
         Returns:
             Fields representing the just created arrays
         """
@@ -200,6 +207,10 @@ class DataHandling(ABC):
         directly passed to the kernel function and override possible parameters from the DataHandling
         """
 
+    @abstractmethod
+    def get_kernel_kwargs(self, kernel_function, **kwargs):
+        """Returns the input arguments of a kernel"""
+
     @abstractmethod
     def swap(self, name1, name2, gpu=False):
         """Swaps data of two arrays"""
diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py
index 7352951d22393d58ca6f6bef52522e5760ba71cb..ce4629f6ab7f35c96cec437ab76b778dfec83e04 100644
--- a/pystencils/datahandling/serial_datahandling.py
+++ b/pystencils/datahandling/serial_datahandling.py
@@ -266,10 +266,10 @@ class SerialDataHandling(DataHandling):
         return name in self.gpu_arrays
 
     def synchronization_function_cpu(self, names, stencil_name=None, **_):
-        return self.synchronization_function(names, stencil_name, 'cpu')
+        return self.synchronization_function(names, stencil_name, target='cpu')
 
     def synchronization_function_gpu(self, names, stencil_name=None, **_):
-        return self.synchronization_function(names, stencil_name, 'gpu')
+        return self.synchronization_function(names, stencil_name, target='gpu')
 
     def synchronization_function(self, names, stencil=None, target=None, **_):
         if target is None:
@@ -425,14 +425,15 @@ class SerialDataHandling(DataHandling):
         np.savez_compressed(file, **self.cpu_arrays)
 
     def load_all(self, file):
+        if '.npz' not in file:
+            file += '.npz'
         file_contents = np.load(file)
         for arr_name, arr_contents in self.cpu_arrays.items():
             if arr_name not in file_contents:
                 print(f"Skipping read data {arr_name} because there is no data with this name in data handling")
                 continue
             if file_contents[arr_name].shape != arr_contents.shape:
-                print("Skipping read data {} because shapes don't match. "
-                      "Read array shape {}, existing array shape {}".format(arr_name, file_contents[arr_name].shape,
-                                                                            arr_contents.shape))
+                print(f"Skipping read data {arr_name} because shapes don't match. "
+                      f"Read array shape {file_contents[arr_name].shape}, existing array shape {arr_contents.shape}")
                 continue
             np.copyto(arr_contents, file_contents[arr_name])
diff --git a/pystencils/fd/derivative.py b/pystencils/fd/derivative.py
index 0e2890ec558db92a364c6912b19b26cb676f7217..c119d1e2ec34c32c67f18f7837c43dee05cfc65b 100644
--- a/pystencils/fd/derivative.py
+++ b/pystencils/fd/derivative.py
@@ -228,7 +228,9 @@ def diff_terms(expr):
 
     Example:
         >>> x, y = sp.symbols("x, y")
-        >>> diff_terms( diff(x, 0, 0)  )
+        >>> diff_terms( diff(x, 0, 0) )
+        {Diff(Diff(x, 0, -1), 0, -1)}
+        >>> diff_terms( diff(x, 0, 0) + y )
         {Diff(Diff(x, 0, -1), 0, -1)}
     """
     result = set()
diff --git a/pystencils/simp/__init__.py b/pystencils/simp/__init__.py
index ab0d608fb8efcd659d8811ef0e274f28e72c1cf0..dadaa7911a536ed36eafa75b720d2cddfae6a6d9 100644
--- a/pystencils/simp/__init__.py
+++ b/pystencils/simp/__init__.py
@@ -1,7 +1,7 @@
 from .assignment_collection import AssignmentCollection
 from .simplifications import (
     add_subexpressions_for_divisions, add_subexpressions_for_field_reads,
-    apply_on_all_subexpressions, apply_to_all_assignments,
+    add_subexpressions_for_sums, apply_on_all_subexpressions, apply_to_all_assignments,
     subexpression_substitution_in_existing_subexpressions,
     subexpression_substitution_in_main_assignments, sympy_cse, sympy_cse_on_assignment_list)
 from .simplificationstrategy import SimplificationStrategy
@@ -10,4 +10,4 @@ __all__ = ['AssignmentCollection', 'SimplificationStrategy',
            'sympy_cse', 'sympy_cse_on_assignment_list', 'apply_to_all_assignments',
            'apply_on_all_subexpressions', 'subexpression_substitution_in_existing_subexpressions',
            'subexpression_substitution_in_main_assignments', 'add_subexpressions_for_divisions',
-           'add_subexpressions_for_field_reads']
+           'add_subexpressions_for_sums', 'add_subexpressions_for_field_reads']
diff --git a/pystencils/simp/assignment_collection.py b/pystencils/simp/assignment_collection.py
index 696038dd59f7faad83e74dbeecdfcfb038ea127a..6bd1c66021c129e35288101c24cdcca96fcebd46 100644
--- a/pystencils/simp/assignment_collection.py
+++ b/pystencils/simp/assignment_collection.py
@@ -6,8 +6,7 @@ import sympy as sp
 
 import pystencils
 from pystencils.assignment import Assignment
-from pystencils.simp.simplifications import (
-    sort_assignments_topologically, transform_lhs_and_rhs, transform_rhs)
+from pystencils.simp.simplifications import (sort_assignments_topologically, transform_lhs_and_rhs, transform_rhs)
 from pystencils.sympyextensions import count_operations, fast_subs
 
 
@@ -263,7 +262,7 @@ class AssignmentCollection:
         own_definitions = set([e.lhs for e in self.main_assignments])
         other_definitions = set([e.lhs for e in other.main_assignments])
         assert len(own_definitions.intersection(other_definitions)) == 0, \
-            "Cannot new_merged, since both collection define the same symbols"
+            "Cannot merge collections, since both define the same symbols"
 
         own_subexpression_symbols = {e.lhs: e.rhs for e in self.subexpressions}
         substitution_dict = {}
@@ -334,7 +333,7 @@ class AssignmentCollection:
         kept_subexpressions = []
         if self.subexpressions[0].lhs in subexpressions_to_keep:
             substitution_dict = {}
-            kept_subexpressions = self.subexpressions[0]
+            kept_subexpressions.append(self.subexpressions[0])
         else:
             substitution_dict = {self.subexpressions[0].lhs: self.subexpressions[0].rhs}
 
diff --git a/pystencils/simp/simplifications.py b/pystencils/simp/simplifications.py
index 5d9b819d59dc877349ccb703d771582a0c150ffa..234b7a373bcc5ad4dc7939f7cdb0044b50bb0c6d 100644
--- a/pystencils/simp/simplifications.py
+++ b/pystencils/simp/simplifications.py
@@ -18,7 +18,7 @@ def sort_assignments_topologically(assignments: Sequence[Union[Assignment, Node]
         elif isinstance(e1, Node):
             symbols = e1.symbols_defined
         else:
-            raise NotImplementedError("Cannot sort topologically. Object of type " + type(e1) + " cannot be handled.")
+            raise NotImplementedError(f"Cannot sort topologically. Object of type {type(e1)} cannot be handled.")
 
         for lhs in symbols:
             for c2, e2 in enumerate(assignments):
@@ -112,14 +112,14 @@ def add_subexpressions_for_sums(ac):
     addends = []
 
     def contains_sum(term):
-        if term.func == sp.add.Add:
+        if term.func == sp.Add:
             return True
         if term.is_Atom:
             return False
         return any([contains_sum(a) for a in term.args])
 
     def search_addends(term):
-        if term.func == sp.add.Add:
+        if term.func == sp.Add:
             if all([not contains_sum(a) for a in term.args]):
                 addends.extend(term.args)
         for a in term.args:
diff --git a/pystencils/stencil.py b/pystencils/stencil.py
index 359a0a29492c537957b8e24a952fa7e052ee898f..28d179c50d2716e4aa1530b2409abb77e6b10e66 100644
--- a/pystencils/stencil.py
+++ b/pystencils/stencil.py
@@ -34,6 +34,8 @@ def is_valid(stencil, max_neighborhood=None):
         True
         >>> is_valid([(2, 0), (1, 0)], max_neighborhood=1)
         False
+        >>> is_valid([(2, 0), (1, 0)], max_neighborhood=2)
+        True
     """
     expected_dim = len(stencil[0])
     for d in stencil:
@@ -67,8 +69,11 @@ def have_same_entries(s1, s2):
     Examples:
         >>> stencil1 = [(1, 0), (-1, 0), (0, 1), (0, -1)]
         >>> stencil2 = [(-1, 0), (0, -1), (1, 0), (0, 1)]
+        >>> stencil3 = [(-1, 0), (0, -1), (1, 0)]
         >>> have_same_entries(stencil1, stencil2)
         True
+        >>> have_same_entries(stencil1, stencil3)
+        False
     """
     if len(s1) != len(s2):
         return False
diff --git a/pystencils/sympyextensions.py b/pystencils/sympyextensions.py
index cd9519d0668f842879ae57eafb1a5aec34878a2c..31e42224ab19f14895e99d7ba9685b5c33a2ef54 100644
--- a/pystencils/sympyextensions.py
+++ b/pystencils/sympyextensions.py
@@ -272,7 +272,7 @@ def subs_additive(expr: sp.Expr, replacement: sp.Expr, subexpression: sp.Expr,
 def replace_second_order_products(expr: sp.Expr, search_symbols: Iterable[sp.Symbol],
                                   positive: Optional[bool] = None,
                                   replace_mixed: Optional[List[Assignment]] = None) -> sp.Expr:
-    """Replaces second order mixed terms like x*y by 2*( (x+y)**2 - x**2 - y**2 ).
+    """Replaces second order mixed terms like 4*x*y by 2*( (x+y)**2 - x**2 - y**2 ).
 
     This makes the term longer - simplify usually is undoing these - however this
     transformation can be done to find more common sub-expressions
@@ -293,7 +293,7 @@ def replace_second_order_products(expr: sp.Expr, search_symbols: Iterable[sp.Sym
     if expr.is_Mul:
         distinct_search_symbols = set()
         nr_of_search_terms = 0
-        other_factors = 1
+        other_factors = sp.Integer(1)
         for t in expr.args:
             if t in search_symbols:
                 nr_of_search_terms += 1
@@ -509,13 +509,14 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]],
                     if t.exp >= 0:
                         result['muls'] += int(t.exp) - 1
                     else:
-                        result['muls'] -= 1
+                        if result['muls'] > 0:
+                            result['muls'] -= 1
                         result['divs'] += 1
                         result['muls'] += (-int(t.exp)) - 1
                 elif sp.nsimplify(t.exp) == sp.Rational(1, 2):
                     result['sqrts'] += 1
                 else:
-                    warnings.warn("Cannot handle exponent", t.exp, " of sp.Pow node")
+                    warnings.warn(f"Cannot handle exponent {t.exp} of sp.Pow node")
             else:
                 warnings.warn("Counting operations: only integer exponents are supported in Pow, "
                               "counting will be inaccurate")
@@ -526,7 +527,7 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]],
         elif isinstance(t, sp.Rel):
             pass
         else:
-            warnings.warn("Unknown sympy node of type " + str(t.func) + " counting will be inaccurate")
+            warnings.warn(f"Unknown sympy node of type {str(t.func)} counting will be inaccurate")
 
         if visit_children:
             for a in t.args:
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index b3f9431bbf3035aaabd25f6eb430c738dddaf3a7..5e306f2de168994575562a92156408f128ab447c 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -1206,13 +1206,13 @@ def get_loop_hierarchy(ast_node):
     return reversed(result)
 
 
-def get_loop_counter_symbol_hierarchy(astNode):
+def get_loop_counter_symbol_hierarchy(ast_node):
     """Determines the loop counter symbols around a given AST node.
-    :param astNode: the AST node
+    :param ast_node: the AST node
     :return: list of loop counter symbols, where the first list entry is the symbol of the innermost loop
     """
     result = []
-    node = astNode
+    node = ast_node
     while node is not None:
         node = get_next_parent_of_type(node, ast.LoopOverCoordinate)
         if node:
diff --git a/pystencils/utils.py b/pystencils/utils.py
index 0c8f11ee3531a84a3477197a8427f88a7da9941f..bdeab95363c651e4a7b31348521726d82e82c646 100644
--- a/pystencils/utils.py
+++ b/pystencils/utils.py
@@ -1,4 +1,5 @@
 import os
+import itertools
 from collections import Counter
 from contextlib import contextmanager
 from tempfile import NamedTemporaryFile
@@ -96,16 +97,21 @@ def fully_contains(l1, l2):
 
 
 def boolean_array_bounding_box(boolean_array):
-    """Returns bounding box around "true" area of boolean array"""
-    dim = len(boolean_array.shape)
+    """Returns bounding box around "true" area of boolean array
+
+    >>> a = np.zeros((4, 4), dtype=bool)
+    >>> a[1:-1, 1:-1] = True
+    >>> boolean_array_bounding_box(a)
+    [(1, 3), (1, 3)]
+    """
+    dim = boolean_array.ndim
+    shape = boolean_array.shape
+    assert 0 not in shape, "Shape must not contain zero"
     bounds = []
-    for i in range(dim):
-        for j in range(dim):
-            if i != j:
-                arr_1d = np.any(boolean_array, axis=j)
-        begin = np.argmax(arr_1d)
-        end = begin + np.argmin(arr_1d[begin:])
-        bounds.append((begin, end))
+    for ax in itertools.combinations(reversed(range(dim)), dim - 1):
+        nonzero = np.any(boolean_array, axis=ax)
+        t = np.where(nonzero)[0][[0, -1]]
+        bounds.append((t[0], t[1] + 1))
     return bounds
 
 
@@ -217,7 +223,8 @@ class LinearEquationSystem:
                 return 'multiple'
 
     def solution(self):
-        """Solves the system if it has a single solution. Returns a dictionary mapping symbol to solution value."""
+        """Solves the system. Under- and overdetermined systems are supported.
+        Returns a dictionary mapping symbol to solution value."""
         return sp.solve_linear_system(self._matrix, *self.unknowns)
 
     def _resize_if_necessary(self, new_rows=1):
@@ -233,8 +240,3 @@ class LinearEquationSystem:
                 break
             result -= 1
         self.next_zero_row = result
-
-
-def find_unique_solutions_with_zeros(system: LinearEquationSystem):
-    if not system.solution_structure() != 'multiple':
-        raise ValueError("Function works only for underdetermined systems")
diff --git a/pystencils_tests/test_assignment_collection.py b/pystencils_tests/test_assignment_collection.py
index a16d51db44bb722ee1a9da9a20ad4192d4f6188e..f0c1f2a91e93739c1f40d8e2223641bd2f8b9bbb 100644
--- a/pystencils_tests/test_assignment_collection.py
+++ b/pystencils_tests/test_assignment_collection.py
@@ -1,15 +1,19 @@
 import pytest
 import sympy as sp
+import pystencils as ps
 
 from pystencils import Assignment, AssignmentCollection
 from pystencils.astnodes import Conditional
 from pystencils.simp.assignment_collection import SymbolGen
 
+a, b, c = sp.symbols("a b c")
+x, y, z, t = sp.symbols("x y z t")
+symbol_gen = SymbolGen("a")
+f = ps.fields("f(2) : [2D]")
+d = ps.fields("d(2) : [2D]")
 
-def test_assignment_collection():
-    x, y, z, t = sp.symbols("x y z t")
-    symbol_gen = SymbolGen("a")
 
+def test_assignment_collection():
     ac = AssignmentCollection([Assignment(z, x + y)],
                               [], subexpression_symbol_generator=symbol_gen)
 
@@ -32,10 +36,6 @@ def test_assignment_collection():
 
 
 def test_free_and_defined_symbols():
-    x, y, z, t = sp.symbols("x y z t")
-    a, b = sp.symbols("a b")
-    symbol_gen = SymbolGen("a")
-
     ac = AssignmentCollection([Assignment(z, x + y), Conditional(t > 0, Assignment(a, b+1), Assignment(a, b+2))],
                               [], subexpression_symbol_generator=symbol_gen)
 
@@ -45,35 +45,128 @@ def test_free_and_defined_symbols():
 
 def test_vector_assignments():
     """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)"""
-
-    import pystencils as ps
-    import sympy as sp
-    a, b, c = sp.symbols("a b c")
-    assignments = ps.Assignment(sp.Matrix([a,b,c]), sp.Matrix([1,2,3]))
+    assignments = ps.Assignment(sp.Matrix([a, b, c]), sp.Matrix([1, 2, 3]))
     print(assignments)
 
 
 def test_wrong_vector_assignments():
     """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)"""
-
-    import pystencils as ps
-    import sympy as sp
-    a, b = sp.symbols("a b")
-    
     with pytest.raises(AssertionError,
-            match=r'Matrix(.*) and Matrix(.*) must have same length when performing vector assignment!'):
-        ps.Assignment(sp.Matrix([a,b]), sp.Matrix([1,2,3]))
+                       match=r'Matrix(.*) and Matrix(.*) must have same length when performing vector assignment!'):
+        ps.Assignment(sp.Matrix([a, b]), sp.Matrix([1, 2, 3]))
 
 
 def test_vector_assignment_collection():
     """From #17 (https://i10git.cs.fau.de/pycodegen/pystencils/issues/17)"""
 
-    import pystencils as ps
-    import sympy as sp
-    a, b, c = sp.symbols("a b c")
-    y, x = sp.Matrix([a,b,c]), sp.Matrix([1,2,3])
-    assignments = ps.AssignmentCollection({y: x})
+    y_m, x_m = sp.Matrix([a, b, c]), sp.Matrix([1, 2, 3])
+    assignments = ps.AssignmentCollection({y_m: x_m})
     print(assignments)
 
-    assignments = ps.AssignmentCollection([ps.Assignment(y,x)])
+    assignments = ps.AssignmentCollection([ps.Assignment(y_m, x_m)])
     print(assignments)
+
+
+def test_new_with_substitutions():
+    a1 = ps.Assignment(f[0, 0](0), a * b)
+    a2 = ps.Assignment(f[0, 0](1), b * c)
+
+    ac = ps.AssignmentCollection([a1, a2], subexpressions=[])
+    subs_dict = {f[0, 0](0): d[0, 0](0), f[0, 0](1): d[0, 0](1)}
+    subs_ac = ac.new_with_substitutions(subs_dict,
+                                        add_substitutions_as_subexpressions=False,
+                                        substitute_on_lhs=True,
+                                        sort_topologically=True)
+
+    assert subs_ac.main_assignments[0].lhs == d[0, 0](0)
+    assert subs_ac.main_assignments[1].lhs == d[0, 0](1)
+
+    subs_ac = ac.new_with_substitutions(subs_dict,
+                                        add_substitutions_as_subexpressions=False,
+                                        substitute_on_lhs=False,
+                                        sort_topologically=True)
+
+    assert subs_ac.main_assignments[0].lhs == f[0, 0](0)
+    assert subs_ac.main_assignments[1].lhs == f[0, 0](1)
+
+    subs_dict = {a * b: sp.symbols('xi')}
+    subs_ac = ac.new_with_substitutions(subs_dict,
+                                        add_substitutions_as_subexpressions=False,
+                                        substitute_on_lhs=False,
+                                        sort_topologically=True)
+
+    assert subs_ac.main_assignments[0].rhs == sp.symbols('xi')
+    assert len(subs_ac.subexpressions) == 0
+
+    subs_ac = ac.new_with_substitutions(subs_dict,
+                                        add_substitutions_as_subexpressions=True,
+                                        substitute_on_lhs=False,
+                                        sort_topologically=True)
+
+    assert subs_ac.main_assignments[0].rhs == sp.symbols('xi')
+    assert len(subs_ac.subexpressions) == 1
+    assert subs_ac.subexpressions[0].lhs == sp.symbols('xi')
+
+
+def test_copy():
+    a1 = ps.Assignment(f[0, 0](0), a * b)
+    a2 = ps.Assignment(f[0, 0](1), b * c)
+
+    ac = ps.AssignmentCollection([a1, a2], subexpressions=[])
+    ac2 = ac.copy()
+    assert ac2 == ac
+
+
+def test_set_expressions():
+    a1 = ps.Assignment(f[0, 0](0), a * b)
+    a2 = ps.Assignment(f[0, 0](1), b * c)
+
+    ac = ps.AssignmentCollection([a1, a2], subexpressions=[])
+
+    ac.set_main_assignments_from_dict({d[0, 0](0): b * c})
+    assert len(ac.main_assignments) == 1
+    assert ac.main_assignments[0] == ps.Assignment(d[0, 0](0), b * c)
+
+    ac.set_sub_expressions_from_dict({sp.symbols('xi'): a * b})
+    assert len(ac.subexpressions) == 1
+    assert ac.subexpressions[0] == ps.Assignment(sp.symbols('xi'), a * b)
+
+    ac = ac.new_without_subexpressions(subexpressions_to_keep={sp.symbols('xi')})
+    assert ac.subexpressions[0] == ps.Assignment(sp.symbols('xi'), a * b)
+
+    ac = ac.new_without_unused_subexpressions()
+    assert len(ac.subexpressions) == 0
+
+    ac2 = ac.new_without_subexpressions()
+    assert ac == ac2
+
+
+def test_free_and_bound_symbols():
+    a1 = ps.Assignment(a, d[0, 0](0))
+    a2 = ps.Assignment(f[0, 0](1), b * c)
+
+    ac = ps.AssignmentCollection([a2], subexpressions=[a1])
+    assert f[0, 0](1) in ac.bound_symbols
+    assert d[0, 0](0) in ac.free_symbols
+
+
+def test_new_merged():
+    a1 = ps.Assignment(a, b * c)
+    a2 = ps.Assignment(a, x * y)
+    a3 = ps.Assignment(t, x ** 2)
+
+    # main assignments
+    a4 = ps.Assignment(f[0, 0](0), a)
+    a5 = ps.Assignment(d[0, 0](0), a)
+
+    ac = ps.AssignmentCollection([a4], subexpressions=[a1])
+    ac2 = ps.AssignmentCollection([a5], subexpressions=[a2, a3])
+
+    merged_ac = ac.new_merged(ac2)
+
+    assert len(merged_ac.subexpressions) == 3
+    assert len(merged_ac.main_assignments) == 2
+    assert ps.Assignment(sp.symbols('xi_0'), x * y) in merged_ac.subexpressions
+    assert ps.Assignment(d[0, 0](0), sp.symbols('xi_0')) in merged_ac.main_assignments
+    assert a1 in merged_ac.subexpressions
+    assert a3 in merged_ac.subexpressions
diff --git a/pystencils_tests/test_astnodes.py b/pystencils_tests/test_astnodes.py
new file mode 100644
index 0000000000000000000000000000000000000000..98c755efd4d5b57e0b33f47ffe5d7fb2e11df573
--- /dev/null
+++ b/pystencils_tests/test_astnodes.py
@@ -0,0 +1,100 @@
+import sympy as sp
+import pystencils as ps
+
+from pystencils import Assignment
+from pystencils.astnodes import Block, SkipIteration, LoopOverCoordinate, SympyAssignment
+from sympy.codegen.rewriting import optims_c99
+
+dst = ps.fields('dst(8): double[2D]')
+s = sp.symbols('s_:8')
+x = sp.symbols('x')
+y = sp.symbols('y')
+
+
+def test_kernel_function():
+    assignments = [
+        Assignment(dst[0, 0](0), s[0]),
+        Assignment(x, dst[0, 0](2))
+    ]
+
+    ast_node = ps.create_kernel(assignments)
+
+    assert ast_node.target == 'cpu'
+    assert ast_node.backend == 'c'
+    # symbols_defined and undefined_symbols will always return an emtpy set
+    assert ast_node.symbols_defined == set()
+    assert ast_node.undefined_symbols == set()
+    assert ast_node.fields_written == {dst}
+    assert ast_node.fields_read == {dst}
+
+
+def test_skip_iteration():
+    # skip iteration is an object which should give back empty data structures.
+    skipped = SkipIteration()
+    assert skipped.args == []
+    assert skipped.symbols_defined == set()
+    assert skipped.undefined_symbols == set()
+
+
+def test_block():
+    assignments = [
+        Assignment(dst[0, 0](0), s[0]),
+        Assignment(x, dst[0, 0](2))
+    ]
+    bl = Block(assignments)
+    assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x}
+
+    bl.append([Assignment(y, 10)])
+    assert bl.symbols_defined == {dst[0, 0](0), dst[0, 0](2), s[0], x, y}
+    assert len(bl.args) == 3
+
+    list_iterator = iter([Assignment(s[1], 11)])
+    bl.insert_front(list_iterator)
+
+    assert bl.args[0] == Assignment(s[1], 11)
+
+
+def test_loop_over_coordinate():
+    assignments = [
+        Assignment(dst[0, 0](0), s[0]),
+        Assignment(x, dst[0, 0](2))
+    ]
+
+    body = Block(assignments)
+    loop = LoopOverCoordinate(body, coordinate_to_loop_over=0, start=0, stop=10, step=1)
+
+    assert loop.body == body
+
+    new_body = Block([assignments[0]])
+    loop = loop.new_loop_with_different_body(new_body)
+    assert loop.body == new_body
+
+    assert loop.start == 0
+    assert loop.stop == 10
+    assert loop.step == 1
+
+    loop.replace(loop.start, 2)
+    loop.replace(loop.stop, 20)
+    loop.replace(loop.step, 2)
+
+    assert loop.start == 2
+    assert loop.stop == 20
+    assert loop.step == 2
+
+
+def test_sympy_assignment():
+
+    assignment = SympyAssignment(dst[0, 0](0), sp.log(x + 3) / sp.log(2) + sp.log(x ** 2 + 1))
+    assignment.optimize(optims_c99)
+
+    ast = ps.create_kernel([assignment])
+    code = ps.get_code_str(ast)
+
+    assert 'log1p' in code
+    assert 'log2' in code
+
+    assignment.replace(assignment.lhs, dst[0, 0](1))
+    assignment.replace(assignment.rhs, sp.log(2))
+
+    assert assignment.lhs == dst[0, 0](1)
+    assert assignment.rhs == sp.log(2)
diff --git a/pystencils_tests/test_data/datahandling_load_test.npz b/pystencils_tests/test_data/datahandling_load_test.npz
new file mode 100644
index 0000000000000000000000000000000000000000..d363a8a0aba1bb78a06314a19b887eb4c4975334
Binary files /dev/null and b/pystencils_tests/test_data/datahandling_load_test.npz differ
diff --git a/pystencils_tests/test_data/datahandling_save_test.npz b/pystencils_tests/test_data/datahandling_save_test.npz
new file mode 100644
index 0000000000000000000000000000000000000000..d363a8a0aba1bb78a06314a19b887eb4c4975334
Binary files /dev/null and b/pystencils_tests/test_data/datahandling_save_test.npz differ
diff --git a/pystencils_tests/test_datahandling.py b/pystencils_tests/test_datahandling.py
index 7f95fe1f04d1bbf72770d2837bf6723681c1f50d..949f067896d3374a7fcb9ecb1369fe5a37eb1f9a 100644
--- a/pystencils_tests/test_datahandling.py
+++ b/pystencils_tests/test_datahandling.py
@@ -1,5 +1,6 @@
 import os
 from tempfile import TemporaryDirectory
+from pathlib import Path
 
 import numpy as np
 
@@ -12,6 +13,9 @@ except ImportError:
     import unittest.mock
     pytest = unittest.mock.MagicMock()
 
+SCRIPT_FOLDER = Path(__file__).parent.absolute()
+INPUT_FOLDER = SCRIPT_FOLDER / "test_data"
+
 
 def basic_iteration(dh):
     dh.add_array('basic_iter_test_gl_default')
@@ -111,6 +115,11 @@ def kernel_execution_jacobi(dh, target):
     test_gpu = target == 'gpu' or target == 'opencl'
     dh.add_array('f', gpu=test_gpu)
     dh.add_array('tmp', gpu=test_gpu)
+
+    if test_gpu:
+        assert dh.is_on_gpu('f')
+        assert dh.is_on_gpu('tmp')
+
     stencil_2d = [(1, 0), (-1, 0), (0, 1), (0, -1)]
     stencil_3d = [(1, 0, 0), (-1, 0, 0), (0, 1, 0), (0, -1, 0), (0, 0, 1), (0, 0, -1)]
     stencil = stencil_2d if dh.dim == 2 else stencil_3d
@@ -197,6 +206,7 @@ def test_access_and_gather():
 def test_kernel():
     for domain_shape in [(4, 5), (3, 4, 5)]:
         dh = create_data_handling(domain_size=domain_shape, periodicity=True)
+        assert all(dh.periodicity)
         kernel_execution_jacobi(dh, 'cpu')
         reduction(dh)
 
@@ -243,3 +253,105 @@ def test_add_arrays():
     assert y_ == y
     assert x == dh.fields['x']
     assert y == dh.fields['y']
+
+
+def test_get_kwarg():
+    domain_shape = (10, 10)
+    field_description = 'src, dst'
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    src, dst = dh.add_arrays(field_description)
+    dh.fill("src", 1.0, ghost_layers=True)
+    dh.fill("dst", 0.0, ghost_layers=True)
+
+    with pytest.raises(ValueError):
+        dh.add_array('src')
+
+    ur = ps.Assignment(src.center, dst.center)
+    kernel = ps.create_kernel(ur).compile()
+
+    kw = dh.get_kernel_kwargs(kernel)
+    assert np.all(kw[0]['src'] == dh.cpu_arrays['src'])
+    assert np.all(kw[0]['dst'] == dh.cpu_arrays['dst'])
+
+
+def test_add_custom_data():
+    pytest.importorskip('pycuda')
+
+    import pycuda.gpuarray as gpuarray
+    import pycuda.autoinit  # noqa
+
+    def cpu_data_create_func():
+        return np.ones((2, 2), dtype=np.float64)
+
+    def gpu_data_create_func():
+        return gpuarray.zeros((2, 2), dtype=np.float64)
+
+    def cpu_to_gpu_transfer_func(gpuarr, cpuarray):
+        gpuarr.set(cpuarray)
+
+    def gpu_to_cpu_transfer_func(gpuarr, cpuarray):
+        gpuarr.get(cpuarray)
+
+    dh = create_data_handling(domain_size=(10, 10))
+    dh.add_custom_data('custom_data',
+                       cpu_data_create_func,
+                       gpu_data_create_func,
+                       cpu_to_gpu_transfer_func,
+                       gpu_to_cpu_transfer_func)
+
+    assert np.all(dh.custom_data_cpu['custom_data'] == 1)
+    assert np.all(dh.custom_data_gpu['custom_data'].get() == 0)
+
+    dh.to_cpu(name='custom_data')
+    dh.to_gpu(name='custom_data')
+
+    assert 'custom_data' in dh.custom_data_names
+
+
+def test_log():
+    dh = create_data_handling(domain_size=(10, 10))
+    dh.log_on_root()
+    assert dh.is_root
+    assert dh.world_rank == 0
+
+
+def test_save_data():
+    domain_shape = (2, 2)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 1.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 1.0, ghost_layers=True)
+
+    dh.save_all(str(INPUT_FOLDER) + '/datahandling_save_test')
+
+
+def test_load_data():
+    domain_shape = (2, 2)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 0.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 0.0, ghost_layers=True)
+
+    dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
+    assert np.all(dh.cpu_arrays['src']) == 1
+    assert np.all(dh.cpu_arrays['dst']) == 1
+
+    domain_shape = (3, 3)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    dh.add_array("src", values_per_cell=9)
+    dh.fill("src", 0.0, ghost_layers=True)
+    dh.add_array("dst", values_per_cell=9)
+    dh.fill("dst", 0.0, ghost_layers=True)
+    dh.add_array("dst2", values_per_cell=9)
+    dh.fill("dst2", 0.0, ghost_layers=True)
+
+    dh.load_all(str(INPUT_FOLDER) + '/datahandling_load_test')
+    assert np.all(dh.cpu_arrays['src']) == 0
+    assert np.all(dh.cpu_arrays['dst']) == 0
+    assert np.all(dh.cpu_arrays['dst2']) == 0
diff --git a/pystencils_tests/test_datahandling_parallel.py b/pystencils_tests/test_datahandling_parallel.py
index 0bfbfbd02fc86952e7fba84ce6aa080d2080ec0a..efe106dc0090bf69dff0d187008c1a8653c4a98e 100644
--- a/pystencils_tests/test_datahandling_parallel.py
+++ b/pystencils_tests/test_datahandling_parallel.py
@@ -1,10 +1,17 @@
 import numpy as np
 import waLBerla as wlb
+from pystencils import make_slice
 
 from pystencils.datahandling.parallel_datahandling import ParallelDataHandling
 from pystencils_tests.test_datahandling import (
     access_and_gather, kernel_execution_jacobi, reduction, synchronization, vtk_output)
 
+try:
+    import pytest
+except ImportError:
+    import unittest.mock
+    pytest = unittest.mock.MagicMock()
+
 
 def test_access_and_gather():
     block_size = (4, 7, 1)
@@ -64,3 +71,51 @@ def test_vtk_output():
     blocks = wlb.createUniformBlockGrid(blocks=(3, 2, 4), cellsPerBlock=(3, 2, 5), oneBlockPerProcess=False)
     dh = ParallelDataHandling(blocks)
     vtk_output(dh)
+
+
+def test_block_iteration():
+    block_size = (16, 16, 16)
+    num_blocks = (2, 2, 2)
+    blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks, default_ghost_layers=2)
+    dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
+
+    for b in dh.iterate():
+        b['v'].fill(1)
+
+    s = 0
+    for b in dh.iterate():
+        s += np.sum(b['v'])
+
+    assert s == 40*40*40
+
+    sl = make_slice[0:18, 0:18, 0:18]
+    for b in dh.iterate(slice_obj=sl):
+        b['v'].fill(0)
+
+    s = 0
+    for b in dh.iterate():
+        s += np.sum(b['v'])
+
+    assert s == 40*40*40 - 20*20*20
+
+
+def test_getter_setter():
+    block_size = (2, 2, 2)
+    num_blocks = (2, 2, 2)
+    blocks = wlb.createUniformBlockGrid(blocks=num_blocks, cellsPerBlock=block_size, oneBlockPerProcess=False)
+    dh = ParallelDataHandling(blocks, default_ghost_layers=2)
+    dh.add_array('v', values_per_cell=1, dtype=np.int64, ghost_layers=2, gpu=True)
+
+    assert dh.shape == (4, 4, 4)
+    assert dh.periodicity == (False, False, False)
+    assert dh.values_per_cell('v') == 1
+    assert dh.has_data('v') is True
+    assert 'v' in dh.array_names
+    dh.log_on_root()
+    assert dh.is_root is True
+    assert dh.world_rank == 0
+
+    dh.to_gpu('v')
+    assert dh.is_on_gpu('v') is True
+    dh.all_to_cpu()
diff --git a/pystencils_tests/test_fast_approximation.py b/pystencils_tests/test_fast_approximation.py
index f4d19fa19fa615aaa76a7b1bb445fa7bdb886237..ccd2d7b8ea15a4c36069a58bc1197b314cce2ca1 100644
--- a/pystencils_tests/test_fast_approximation.py
+++ b/pystencils_tests/test_fast_approximation.py
@@ -11,9 +11,9 @@ def test_fast_sqrt():
 
     assert len(insert_fast_sqrts(expr).atoms(fast_sqrt)) == 1
     assert len(insert_fast_sqrts([expr])[0].atoms(fast_sqrt)) == 1
-    ast = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
-    ast.compile()
-    code_str = ps.get_code_str(ast)
+    ast_gpu = ps.create_kernel(ps.Assignment(g[0, 0], insert_fast_sqrts(expr)), target='gpu')
+    ast_gpu.compile()
+    code_str = ps.get_code_str(ast_gpu)
     assert '__fsqrt_rn' in code_str
 
     expr = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
@@ -21,9 +21,9 @@ def test_fast_sqrt():
 
     ac = ps.AssignmentCollection([expr], [])
     assert len(insert_fast_sqrts(ac).main_assignments[0].atoms(fast_inv_sqrt)) == 1
-    ast = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
-    ast.compile()
-    code_str = ps.get_code_str(ast)
+    ast_gpu = ps.create_kernel(insert_fast_sqrts(ac), target='gpu')
+    ast_gpu.compile()
+    code_str = ps.get_code_str(ast_gpu)
     assert '__frsqrt_rn' in code_str
 
 
diff --git a/pystencils_tests/test_fd_derivation.py b/pystencils_tests/test_fd_derivation.py
deleted file mode 100644
index c2bb1aa08e7007e024aaf0867c15764c5e0880ce..0000000000000000000000000000000000000000
--- a/pystencils_tests/test_fd_derivation.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import pytest
-import sympy as sp
-
-from pystencils.utils import LinearEquationSystem
-
-
-def test_linear_equation_system():
-    unknowns = sp.symbols("x_:3")
-    x, y, z = unknowns
-    m = LinearEquationSystem(unknowns)
-    m.add_equation(x + y - 2)
-    m.add_equation(x - y - 1)
-    assert m.solution_structure() == 'multiple'
-    m.set_unknown_zero(2)
-    assert m.solution_structure() == 'single'
-    solution = m.solution()
-    assert solution[unknowns[2]] == 0
-    assert solution[unknowns[1]] == sp.Rational(1, 2)
-    assert solution[unknowns[0]] == sp.Rational(3, 2)
-
-    m.set_unknown_zero(0)
-    assert m.solution_structure() == 'none'
-
-    # special case where less rows than unknowns, but no solution
-    m = LinearEquationSystem(unknowns)
-    m.add_equation(x - 3)
-    m.add_equation(x - 4)
-    assert m.solution_structure() == 'none'
-    m.add_equation(y - 4)
-    assert m.solution_structure() == 'none'
-
-    with pytest.raises(ValueError) as e:
-        m.add_equation(x**2 - 1)
-    assert 'Not a linear equation' in str(e.value)
diff --git a/pystencils_tests/test_fd_derivative.py b/pystencils_tests/test_fd_derivative.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc2fca655440574659f001048f340a1c008028f5
--- /dev/null
+++ b/pystencils_tests/test_fd_derivative.py
@@ -0,0 +1,24 @@
+import sympy as sp
+from pystencils import fields
+from pystencils.fd import Diff, diff, collect_diffs
+from pystencils.fd.derivative import replace_generic_laplacian
+
+
+def test_fs():
+    f = sp.Symbol("f", commutative=False)
+
+    a = Diff(Diff(Diff(f, 1), 0), 0)
+    assert a.is_commutative is False
+    print(str(a))
+
+    assert diff(f) == f
+
+    x, y = sp.symbols("x, y")
+    collected_terms = collect_diffs(diff(x, 0, 0))
+    assert collected_terms == Diff(Diff(x, 0, -1), 0, -1)
+
+    src = fields("src : double[2D]")
+    expr = sp.Add(Diff(Diff(src[0, 0])), 10)
+    expected = Diff(Diff(src[0, 0], 0, -1), 0, -1) + Diff(Diff(src[0, 0], 1, -1), 1, -1) + 10
+    result = replace_generic_laplacian(expr, 3)
+    assert result == expected
\ No newline at end of file
diff --git a/pystencils_tests/test_kerncraft_coupling.py b/pystencils_tests/test_kerncraft_coupling.py
index 653ed34d90e6ecd45a7a5785fb71d522cc3734f5..aeb4b7acb6f62730f0896b10d6b8fa6d655c871d 100644
--- a/pystencils_tests/test_kerncraft_coupling.py
+++ b/pystencils_tests/test_kerncraft_coupling.py
@@ -7,7 +7,7 @@ from kerncraft.kernel import KernelCode
 from kerncraft.machinemodel import MachineModel
 from kerncraft.models import ECM, ECMData, Benchmark
 
-from pystencils import Assignment, Field
+from pystencils import Assignment, Field, fields
 from pystencils.cpu import create_kernel
 from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
 from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
@@ -159,3 +159,15 @@ def test_benchmark():
     timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
 
     np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=4)
+
+
+@pytest.mark.kerncraft
+def test_kerncraft_generic_field():
+    a = fields('a: double[3D]')
+    b = fields('b: double[3D]')
+    s = sp.Symbol("s")
+    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
+
+    update_rule = Assignment(b[0, 0, 0], s * rhs)
+    ast = create_kernel([update_rule])
+    k = PyStencilsKerncraftKernel(ast, debug_print=True)
diff --git a/pystencils_tests/test_simplification_strategy.py b/pystencils_tests/test_simplification_strategy.py
index 189482c006197160e1f49771dc534206f8d0ef9e..5176ae5f49aa45ed952882cb0313d5e5f7754177 100644
--- a/pystencils_tests/test_simplification_strategy.py
+++ b/pystencils_tests/test_simplification_strategy.py
@@ -1,5 +1,6 @@
 import sympy as sp
 
+import pystencils as ps
 from pystencils import Assignment, AssignmentCollection
 from pystencils.simp import (
     SimplificationStrategy, apply_on_all_subexpressions,
@@ -43,3 +44,39 @@ def test_simplification_strategy():
     assert 'Adds' in report._repr_html_()
 
     assert 'factor' in str(strategy)
+
+
+def test_split_inner_loop():
+    dst = ps.fields('dst(8): double[2D]')
+    s = sp.symbols('s_:8')
+    x = sp.symbols('x')
+    subexpressions = []
+    main = [
+        Assignment(dst[0, 0](0), s[0]),
+        Assignment(dst[0, 0](1), s[1]),
+        Assignment(dst[0, 0](2), s[2]),
+        Assignment(dst[0, 0](3), s[3]),
+        Assignment(dst[0, 0](4), s[4]),
+        Assignment(dst[0, 0](5), s[5]),
+        Assignment(dst[0, 0](6), s[6]),
+        Assignment(dst[0, 0](7), s[7]),
+        Assignment(x, sum(s))
+    ]
+    ac = AssignmentCollection(main, subexpressions)
+    split_groups = [[dst[0, 0](0), dst[0, 0](1)],
+                    [dst[0, 0](2), dst[0, 0](3)],
+                    [dst[0, 0](4), dst[0, 0](5)],
+                    [dst[0, 0](6), dst[0, 0](7), x]]
+    ac.simplification_hints['split_groups'] = split_groups
+    ast = ps.create_kernel(ac)
+
+    code = ps.get_code_str(ast)
+    # we have four inner loops as indicated in split groups (4 elements) plus one outer loop
+    assert code.count('for') == 5
+
+    ac = AssignmentCollection(main, subexpressions)
+    ast = ps.create_kernel(ac)
+
+    code = ps.get_code_str(ast)
+    # one inner loop and one outer loop
+    assert code.count('for') == 2
diff --git a/pystencils_tests/test_simplifications.py b/pystencils_tests/test_simplifications.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9f9cc8a16308702df1cf1274c62ca086ef72d3f
--- /dev/null
+++ b/pystencils_tests/test_simplifications.py
@@ -0,0 +1,97 @@
+import sympy as sp
+
+from pystencils.simp import subexpression_substitution_in_main_assignments
+from pystencils.simp import add_subexpressions_for_divisions
+from pystencils.simp import add_subexpressions_for_sums
+from pystencils.simp import add_subexpressions_for_field_reads
+from pystencils import Assignment, AssignmentCollection, fields
+
+a, b, c, d, x, y, z = sp.symbols("a b c d x y z")
+s0, s1, s2, s3 = sp.symbols("s_:4")
+f = sp.symbols("f_:9")
+
+
+def test_subexpression_substitution_in_main_assignments():
+    subexpressions = [
+        Assignment(s0, 2 * a + 2 * b),
+        Assignment(s1, 2 * a + 2 * b + 2 * c),
+        Assignment(s2, 2 * a + 2 * b + 2 * c + 2 * d),
+        Assignment(s3, 2 * a + 2 * b * c),
+        Assignment(x, s1 + s2 + s0 + s3)
+    ]
+    main = [
+        Assignment(f[0], s1 + s2 + s0 + s3),
+        Assignment(f[1], s1 + s2 + s0 + s3),
+        Assignment(f[2], s1 + s2 + s0 + s3),
+        Assignment(f[3], s1 + s2 + s0 + s3),
+        Assignment(f[4], s1 + s2 + s0 + s3)
+    ]
+    ac = AssignmentCollection(main, subexpressions)
+    ac = subexpression_substitution_in_main_assignments(ac)
+    for i in range(0, len(ac.main_assignments)):
+        assert ac.main_assignments[i].rhs == x
+
+
+def test_add_subexpressions_for_divisions():
+    subexpressions = [
+        Assignment(s0, 2 / a + 2 / b),
+        Assignment(s1, 2 / a + 2 / b + 2 / c),
+        Assignment(s2, 2 / a + 2 / b + 2 / c + 2 / d),
+        Assignment(s3, 2 / a + 2 / b / c),
+        Assignment(x, s1 + s2 + s0 + s3)
+    ]
+    main = [
+        Assignment(f[0], s1 + s2 + s0 + s3)
+    ]
+    ac = AssignmentCollection(main, subexpressions)
+    divs_before_optimisation = ac.operation_count["divs"]
+    ac = add_subexpressions_for_divisions(ac)
+    divs_after_optimisation = ac.operation_count["divs"]
+    assert divs_before_optimisation - divs_after_optimisation == 8
+    rhs = []
+    for i in range(len(ac.subexpressions)):
+        rhs.append(ac.subexpressions[i].rhs)
+
+    assert 1/a in rhs
+    assert 1/b in rhs
+    assert 1/c in rhs
+    assert 1/d in rhs
+
+
+def test_add_subexpressions_for_sums():
+    subexpressions = [
+        Assignment(s0, a + b + c + d),
+        Assignment(s1, 3 * a * sp.sqrt(x) + 4 * b + c),
+        Assignment(s2, 3 * a * sp.sqrt(x) + 4 * b + c),
+        Assignment(s3, 3 * a * sp.sqrt(x) + 4 * b + c)
+    ]
+    main = [
+        Assignment(f[0], s1 + s2 + s0 + s3)
+    ]
+    ac = AssignmentCollection(main, subexpressions)
+    ops_before_optimisation = ac.operation_count
+    ac = add_subexpressions_for_sums(ac)
+    ops_after_optimisation = ac.operation_count
+    assert ops_after_optimisation["adds"] == ops_before_optimisation["adds"]
+    assert ops_after_optimisation["muls"] < ops_before_optimisation["muls"]
+    assert ops_after_optimisation["sqrts"] < ops_before_optimisation["sqrts"]
+
+    rhs = []
+    for i in range(len(ac.subexpressions)):
+        rhs.append(ac.subexpressions[i].rhs)
+
+    assert a + b + c + d in rhs
+    assert 3 * a * sp.sqrt(x) in rhs
+
+
+def test_add_subexpressions_for_field_reads():
+    s, v = fields("s(5), v(5): double[2D]")
+    subexpressions = []
+    main = [
+        Assignment(s[0, 0](0), 3 * v[0, 0](0)),
+        Assignment(s[0, 0](1), 10 * v[0, 0](1))
+    ]
+    ac = AssignmentCollection(main, subexpressions)
+    assert len(ac.subexpressions) == 0
+    ac = add_subexpressions_for_field_reads(ac)
+    assert len(ac.subexpressions) == 2
diff --git a/pystencils_tests/test_slicing.py b/pystencils_tests/test_slicing.py
new file mode 100644
index 0000000000000000000000000000000000000000..79e36576bfb622cae3f4f9ed865a8b2f8308430b
--- /dev/null
+++ b/pystencils_tests/test_slicing.py
@@ -0,0 +1,73 @@
+import numpy as np
+from pystencils import create_data_handling
+from pystencils.slicing import SlicedGetter, make_slice, SlicedGetterDataHandling, shift_slice, slice_intersection
+
+
+def test_sliced_getter():
+    def get_slice(slice_obj=None):
+        arr = np.ones((10, 10))
+        if slice_obj is None:
+            slice_obj = make_slice[:, :]
+
+        return arr[slice_obj]
+
+    sli = SlicedGetter(get_slice)
+
+    test = make_slice[2:-2, 2:-2]
+    assert sli[test].shape == (6, 6)
+
+
+def test_sliced_getter_data_handling():
+    domain_shape = (10, 10)
+
+    dh = create_data_handling(domain_size=domain_shape, default_ghost_layers=1)
+    dh.add_array("src", values_per_cell=1)
+    dh.fill("src", 1.0, ghost_layers=True)
+
+    dh.add_array("dst", values_per_cell=1)
+    dh.fill("dst", 0.0, ghost_layers=True)
+
+    sli = SlicedGetterDataHandling(dh, 'dst')
+    slice_obj = make_slice[2:-2, 2:-2]
+    assert np.sum(sli[slice_obj]) == 0
+
+    sli = SlicedGetterDataHandling(dh, 'src')
+    slice_obj = make_slice[2:-2, 2:-2]
+    assert np.sum(sli[slice_obj]) == 36
+
+
+def test_shift_slice():
+
+    sh = shift_slice(make_slice[2:-2, 2:-2], [1, 2])
+    assert sh[0] == slice(3, -1, None)
+    assert sh[1] == slice(4, 0, None)
+
+    sh = shift_slice(make_slice[2:-2, 2:-2], 1)
+    assert sh[0] == slice(3, -1, None)
+    assert sh[1] == slice(3, -1, None)
+
+    sh = shift_slice([2, 4], 1)
+    assert sh[0] == 3
+    assert sh[1] == 5
+
+    sh = shift_slice([2, None], 1)
+    assert sh[0] == 3
+    assert sh[1] is None
+
+    sh = shift_slice([1.5, 1.5], 1)
+    assert sh[0] == 1.5
+    assert sh[1] == 1.5
+
+
+def test_slice_intersection():
+    sl1 = make_slice[1:10, 1:10]
+    sl2 = make_slice[5:15, 5:15]
+
+    intersection = slice_intersection(sl1, sl2)
+    assert intersection[0] == slice(5, 10, None)
+    assert intersection[1] == slice(5, 10, None)
+
+    sl2 = make_slice[12:15, 12:15]
+
+    intersection = slice_intersection(sl1, sl2)
+    assert intersection is None
diff --git a/pystencils_tests/test_staggered_kernel.py b/pystencils_tests/test_staggered_kernel.py
index c4f491393d2b4d08fbb9b4c99356abdc7e7199b6..eecbf706c5538ad544d29a6b37366944aa504680 100644
--- a/pystencils_tests/test_staggered_kernel.py
+++ b/pystencils_tests/test_staggered_kernel.py
@@ -4,6 +4,7 @@ import sympy as sp
 import pytest
 
 import pystencils as ps
+from pystencils import x_staggered_vector, TypedSymbol
 
 
 class TestStaggeredDiffusion:
@@ -96,3 +97,12 @@ def test_staggered_loop_cutting():
     assignments = [ps.Assignment(j.staggered_access("SW"), 1)]
     ast = ps.create_staggered_kernel(assignments, target=dh.default_target)
     assert not ast.atoms(ps.astnodes.Conditional)
+
+
+def test_staggered_vector():
+    dim = 2
+    v = x_staggered_vector(dim)
+    ctr0 = TypedSymbol('ctr_0', 'int', nonnegative=True)
+    ctr1 = TypedSymbol('ctr_1', 'int', nonnegative=True)
+    expected_result = sp.Matrix(tuple((ctr0 + 0.5, ctr1 + 0.5)))
+    assert v == expected_result
\ No newline at end of file
diff --git a/pystencils_tests/test_stencils.py b/pystencils_tests/test_stencils.py
index bf1418e053d41ebeb80b6ae623f1834b24a8a713..fa205ad7f04a446e1e16d5ea95e679cea70ecde4 100644
--- a/pystencils_tests/test_stencils.py
+++ b/pystencils_tests/test_stencils.py
@@ -1 +1,30 @@
 import pystencils as ps
+import sympy as sp
+
+from pystencils.stencil import coefficient_list, plot_expression
+
+
+def test_coefficient_list():
+    f = ps.fields("f: double[1D]")
+    expr = 2 * f[1] + 3 * f[-1]
+    coff = coefficient_list(expr)
+    assert coff == [3, 0, 2]
+    plot_expression(expr, matrix_form=True)
+
+    f = ps.fields("f: double[3D]")
+    expr = 2 * f[1, 0, 0] + 3 * f[0, -1, 0]
+    coff = coefficient_list(expr)
+    assert coff == [[[0, 3, 0], [0, 0, 2], [0, 0, 0]]]
+
+    expr = 2 * f[1, 0, 0] + 3 * f[0, -1, 0] + 4 * f[0, 0, 1]
+    coff = coefficient_list(expr, matrix_form=True)
+    assert coff[0] == sp.zeros(3, 3)
+
+    # in 3D plot only works if there are entries on every of the three 2D planes. In the above examples z-1 was empty
+    expr = 2 * f[1, 0, 0] + 1 * f[0, -1, 0] + 1 * f[0, 0, 1] + f[0, 0, -1]
+    plot_expression(expr)
+
+
+def test_plot_expression():
+    f = ps.fields("f: double[2D]")
+    plot_expression(2 * f[1, 0] + 3 * f[0, -1], matrix_form=True)
\ No newline at end of file
diff --git a/pystencils_tests/test_sympyextensions.py b/pystencils_tests/test_sympyextensions.py
new file mode 100644
index 0000000000000000000000000000000000000000..2135ee88e7ee5d9375ce5213a2bdb953bea3d5b8
--- /dev/null
+++ b/pystencils_tests/test_sympyextensions.py
@@ -0,0 +1,140 @@
+import sympy
+import pystencils
+
+from pystencils.sympyextensions import replace_second_order_products
+from pystencils.sympyextensions import remove_higher_order_terms
+from pystencils.sympyextensions import complete_the_squares_in_exp
+from pystencils.sympyextensions import extract_most_common_factor
+from pystencils.sympyextensions import count_operations
+from pystencils.sympyextensions import common_denominator
+from pystencils.sympyextensions import get_symmetric_part
+
+from pystencils import Assignment
+from pystencils.fast_approximation import (fast_division, fast_inv_sqrt, fast_sqrt,
+                                           insert_fast_divisions, insert_fast_sqrts)
+
+
+def test_replace_second_order_products():
+    x, y = sympy.symbols('x y')
+    expr = 4 * x * y
+    expected_expr_positive = 2 * ((x + y) ** 2 - x ** 2 - y ** 2)
+    expected_expr_negative = 2 * (-(x - y) ** 2 + x ** 2 + y ** 2)
+
+    result = replace_second_order_products(expr, search_symbols=[x, y], positive=True)
+    assert result == expected_expr_positive
+    assert (result - expected_expr_positive).simplify() == 0
+
+    result = replace_second_order_products(expr, search_symbols=[x, y], positive=False)
+    assert result == expected_expr_negative
+    assert (result - expected_expr_negative).simplify() == 0
+
+    result = replace_second_order_products(expr, search_symbols=[x, y], positive=None)
+    assert result == expected_expr_positive
+
+    a = [Assignment(sympy.symbols('z'), x + y)]
+    replace_second_order_products(expr, search_symbols=[x, y], positive=True, replace_mixed=a)
+    assert len(a) == 2
+
+
+def test_remove_higher_order_terms():
+    x, y = sympy.symbols('x y')
+
+    expr = sympy.Mul(x, y)
+
+    result = remove_higher_order_terms(expr, order=1, symbols=[x, y])
+    assert result == 0
+    result = remove_higher_order_terms(expr, order=2, symbols=[x, y])
+    assert result == expr
+
+    expr = sympy.Pow(x, 3)
+
+    result = remove_higher_order_terms(expr, order=2, symbols=[x, y])
+    assert result == 0
+    result = remove_higher_order_terms(expr, order=3, symbols=[x, y])
+    assert result == expr
+
+
+def test_complete_the_squares_in_exp():
+    a, b, c, s, n = sympy.symbols('a b c s n')
+    expr = a * s ** 2 + b * s + c
+    result = complete_the_squares_in_exp(expr, symbols_to_complete=[s])
+    assert result == expr
+
+    expr = sympy.exp(a * s ** 2 + b * s + c)
+    expected_result = sympy.exp(a*s**2 + c - b**2 / (4*a))
+    result = complete_the_squares_in_exp(expr, symbols_to_complete=[s])
+    assert result == expected_result
+
+
+def test_extract_most_common_factor():
+    x, y = sympy.symbols('x y')
+    expr = 1 / (x + y) + 3 / (x + y) + 3 / (x + y)
+    most_common_factor = extract_most_common_factor(expr)
+
+    assert most_common_factor[0] == 7
+    assert sympy.prod(most_common_factor) == expr
+
+    expr = 1 / x + 3 / (x + y) + 3 / y
+    most_common_factor = extract_most_common_factor(expr)
+
+    assert most_common_factor[0] == 3
+    assert sympy.prod(most_common_factor) == expr
+
+    expr = 1 / x
+    most_common_factor = extract_most_common_factor(expr)
+
+    assert most_common_factor[0] == 1
+    assert sympy.prod(most_common_factor) == expr
+    assert most_common_factor[1] == expr
+
+
+def test_count_operations():
+    x, y, z = sympy.symbols('x y z')
+    expr = 1/x + y * sympy.sqrt(z)
+    ops = count_operations(expr, only_type=None)
+    assert ops['adds'] == 1
+    assert ops['muls'] == 1
+    assert ops['divs'] == 1
+    assert ops['sqrts'] == 1
+
+    expr = sympy.sqrt(x + y)
+    expr = insert_fast_sqrts(expr).atoms(fast_sqrt)
+    ops = count_operations(*expr, only_type=None)
+    assert ops['fast_sqrts'] == 1
+
+    expr = sympy.sqrt(x / y)
+    expr = insert_fast_divisions(expr).atoms(fast_division)
+    ops = count_operations(*expr, only_type=None)
+    assert ops['fast_div'] == 1
+
+    expr = pystencils.Assignment(sympy.Symbol('tmp'), 3 / sympy.sqrt(x + y))
+    expr = insert_fast_sqrts(expr).atoms(fast_inv_sqrt)
+    ops = count_operations(*expr, only_type=None)
+    assert ops['fast_inv_sqrts'] == 1
+
+    expr = sympy.Piecewise((1.0, x > 0), (0.0, True)) + y * z
+    ops = count_operations(expr, only_type=None)
+    assert ops['adds'] == 1
+
+    expr = sympy.Pow(1/x + y * sympy.sqrt(z), 100)
+    ops = count_operations(expr, only_type=None)
+    assert ops['adds'] == 1
+    assert ops['muls'] == 99
+    assert ops['divs'] == 1
+    assert ops['sqrts'] == 1
+
+
+def test_common_denominator():
+    x = sympy.symbols('x')
+    expr = sympy.Rational(1, 2) + x * sympy.Rational(2, 3)
+    cm = common_denominator(expr)
+    assert cm == 6
+
+
+def test_get_symmetric_part():
+    x, y, z = sympy.symbols('x y z')
+    expr = x / 9 - y ** 2 / 6 + z ** 2 / 3 + z / 3
+    expected_result = x / 9 - y ** 2 / 6 + z ** 2 / 3
+    sym_part = get_symmetric_part(expr, sympy.symbols(f'y z'))
+
+    assert sym_part == expected_result
diff --git a/pystencils_tests/test_timeloop.py b/pystencils_tests/test_timeloop.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4e0d92a39205fe8c627e30ff3b3db3b66e2a9e1
--- /dev/null
+++ b/pystencils_tests/test_timeloop.py
@@ -0,0 +1,62 @@
+import time
+import numpy as np
+
+from pystencils import Assignment
+from pystencils import create_kernel
+from pystencils.datahandling import create_data_handling
+from pystencils.timeloop import TimeLoop
+
+
+def test_timeloop():
+    dh = create_data_handling(domain_size=(2, 2), periodicity=True)
+
+    pre = dh.add_array('pre_run_field', values_per_cell=1)
+    dh.fill("pre_run_field", 0.0, ghost_layers=True)
+    f = dh.add_array('field', values_per_cell=1)
+    dh.fill("field", 0.0, ghost_layers=True)
+    post = dh.add_array('post_run_field', values_per_cell=1)
+    dh.fill("post_run_field", 0.0, ghost_layers=True)
+    single_step = dh.add_array('single_step_field', values_per_cell=1)
+    dh.fill("single_step_field", 0.0, ghost_layers=True)
+
+    pre_assignments = Assignment(pre.center, pre.center + 1)
+    pre_kernel = create_kernel(pre_assignments).compile()
+    assignments = Assignment(f.center, f.center + 1)
+    kernel = create_kernel(assignments).compile()
+    post_assignments = Assignment(post.center, post.center + 1)
+    post_kernel = create_kernel(post_assignments).compile()
+    single_step_assignments = Assignment(single_step.center, single_step.center + 1)
+    single_step_kernel = create_kernel(single_step_assignments).compile()
+
+    fixed_steps = 2
+    timeloop = TimeLoop(steps=fixed_steps)
+    assert timeloop.fixed_steps == fixed_steps
+
+    def pre_run():
+        dh.run_kernel(pre_kernel)
+
+    def post_run():
+        dh.run_kernel(post_kernel)
+
+    def single_step_run():
+        dh.run_kernel(single_step_kernel)
+
+    timeloop.add_pre_run_function(pre_run)
+    timeloop.add_post_run_function(post_run)
+    timeloop.add_single_step_function(single_step_run)
+    timeloop.add_call(kernel, {'field': dh.cpu_arrays["field"]})
+
+    # the timeloop is initialised with 2 steps. This means a single time step consists of two steps.
+    # Therefore, we have 2 main iterations and one single step iteration in this configuration
+    timeloop.run(time_steps=5)
+    assert np.all(dh.cpu_arrays["pre_run_field"] == 1.0)
+    assert np.all(dh.cpu_arrays["field"] == 2.0)
+    assert np.all(dh.cpu_arrays["single_step_field"] == 1.0)
+    assert np.all(dh.cpu_arrays["post_run_field"] == 1.0)
+
+    seconds = 2
+    start = time.perf_counter()
+    timeloop.run_time_span(seconds=seconds)
+    end = time.perf_counter()
+
+    np.testing.assert_almost_equal(seconds, end - start, decimal=3)
diff --git a/pystencils/test_type_interference.py b/pystencils_tests/test_type_interference.py
similarity index 100%
rename from pystencils/test_type_interference.py
rename to pystencils_tests/test_type_interference.py
diff --git a/pystencils_tests/test_utils.py b/pystencils_tests/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3085ef61a3a33f599a53d5c7233d892a59367518
--- /dev/null
+++ b/pystencils_tests/test_utils.py
@@ -0,0 +1,81 @@
+import pytest
+import sympy as sp
+from pystencils.utils import LinearEquationSystem
+from pystencils.utils import DotDict
+
+
+def test_linear_equation_system():
+    unknowns = sp.symbols("x_:3")
+    x, y, z = unknowns
+    m = LinearEquationSystem(unknowns)
+    m.add_equation(x + y - 2)
+    m.add_equation(x - y - 1)
+    assert m.solution_structure() == 'multiple'
+    m.set_unknown_zero(2)
+    assert m.solution_structure() == 'single'
+    solution = m.solution()
+    assert solution[unknowns[2]] == 0
+    assert solution[unknowns[1]] == sp.Rational(1, 2)
+    assert solution[unknowns[0]] == sp.Rational(3, 2)
+
+    m.set_unknown_zero(0)
+    assert m.solution_structure() == 'none'
+
+    # special case where less rows than unknowns, but no solution
+    m = LinearEquationSystem(unknowns)
+    m.add_equation(x - 3)
+    m.add_equation(x - 4)
+    assert m.solution_structure() == 'none'
+    m.add_equation(y - 4)
+    assert m.solution_structure() == 'none'
+
+    with pytest.raises(ValueError) as e:
+        m.add_equation(x**2 - 1)
+    assert 'Not a linear equation' in str(e.value)
+
+    x, y, z = sp.symbols("x, y, z")
+    les = LinearEquationSystem([x, y, z])
+    les.add_equation(1 * x + 2 * y - 1 * z + 4)
+    les.add_equation(2 * x + 1 * y + 1 * z - 2)
+    les.add_equation(1 * x + 2 * y + 1 * z + 2)
+
+    # usually reduce is not necessary since almost every function of LinearEquationSystem calls reduce beforehand
+    les.reduce()
+
+    expected_matrix = sp.Matrix([[1, 0, 0, sp.Rational(5, 3)],
+                                 [0, 1, 0, sp.Rational(-7, 3)],
+                                 [0, 0, 1, sp.Integer(1)]])
+    assert les.matrix == expected_matrix
+    assert les.rank == 3
+
+    sol = les.solution()
+    assert sol[x] == sp.Rational(5, 3)
+    assert sol[y] == sp.Rational(-7, 3)
+    assert sol[z] == sp.Integer(1)
+
+    les = LinearEquationSystem([x, y])
+    assert les.solution_structure() == 'multiple'
+
+    les.add_equation(x + 1)
+    assert les.solution_structure() == 'multiple'
+
+    les.add_equation(y + 2)
+    assert les.solution_structure() == 'single'
+
+    les.add_equation(x + y + 5)
+    assert les.solution_structure() == 'none'
+
+
+def test_dot_dict():
+    d = {'a': {'c': 7}, 'b': 6}
+    t = DotDict(d)
+    assert t.a.c == 7
+    assert t.b == 6
+    assert len(t) == 2
+
+    delattr(t, 'b')
+    assert len(t) == 1
+
+    t.b = 6
+    assert len(t) == 2
+    assert t.b == 6
diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py
index 1fa1812eb8fced19d114ef3cb32900ec9b93f995..3cc1be4f36b126baf19d074657b4394cdd2cefbb 100644
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -4,6 +4,7 @@ import sympy as sp
 import pystencils as ps
 from pystencils.backends.simd_instruction_sets import get_supported_instruction_sets
 from pystencils.cpu.vectorization import vectorize
+from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
 from pystencils.transformations import replace_inner_stride_with_one
 
 
@@ -109,7 +110,6 @@ def test_piecewise1():
 
 
 def test_piecewise2():
-
     arr = np.zeros((20, 20))
 
     @ps.kernel
@@ -128,7 +128,6 @@ def test_piecewise2():
 
 
 def test_piecewise3():
-
     arr = np.zeros((22, 22))
 
     @ps.kernel
@@ -146,12 +145,32 @@ def test_logical_operators():
     arr = np.zeros((22, 22))
 
     @ps.kernel
-    def test_kernel(s):
+    def kernel_and(s):
         f, g = ps.fields(f=arr, g=arr)
         s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0)
         g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])
 
-    ast = ps.create_kernel(test_kernel)
+    ast = ps.create_kernel(kernel_and)
+    vectorize(ast)
+    ast.compile()
+
+    @ps.kernel
+    def kernel_or(s):
+        f, g = ps.fields(f=arr, g=arr)
+        s.c @= sp.Or(f[0, 1] < 0.0, f[1, 0] < 0.0)
+        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])
+
+    ast = ps.create_kernel(kernel_or)
+    vectorize(ast)
+    ast.compile()
+
+    @ps.kernel
+    def kernel_equal(s):
+        f, g = ps.fields(f=arr, g=arr)
+        s.c @= sp.Eq(f[0, 1], 2.0)
+        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])
+
+    ast = ps.create_kernel(kernel_equal)
     vectorize(ast)
     ast.compile()
 
@@ -159,3 +178,61 @@ def test_logical_operators():
 def test_hardware_query():
     instruction_sets = get_supported_instruction_sets()
     assert 'sse' in instruction_sets
+
+
+def test_vectorised_pow():
+    arr = np.zeros((24, 24))
+    f, g = ps.fields(f=arr, g=arr)
+
+    as1 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 2))
+    as2 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 0.5))
+    as3 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -0.5))
+    as4 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], 4))
+    as5 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -4))
+    as6 = ps.Assignment(g[0, 0], sp.Pow(f[0, 0], -1))
+
+    ast = ps.create_kernel(as1)
+    vectorize(ast)
+    ast.compile()
+
+    ast = ps.create_kernel(as2)
+    vectorize(ast)
+    ast.compile()
+
+    ast = ps.create_kernel(as3)
+    vectorize(ast)
+    ast.compile()
+
+    ast = ps.create_kernel(as4)
+    vectorize(ast)
+    ast.compile()
+
+    ast = ps.create_kernel(as5)
+    vectorize(ast)
+    ast.compile()
+
+    ast = ps.create_kernel(as6)
+    vectorize(ast)
+    ast.compile()
+
+
+def test_vectorised_fast_approximations():
+    arr = np.zeros((24, 24))
+    f, g = ps.fields(f=arr, g=arr)
+
+    expr = sp.sqrt(f[0, 0] + f[1, 0])
+    assignment = ps.Assignment(g[0, 0], insert_fast_sqrts(expr))
+    ast = ps.create_kernel(assignment)
+    vectorize(ast)
+    ast.compile()
+
+    expr = f[0, 0] / f[1, 0]
+    assignment = ps.Assignment(g[0, 0], insert_fast_divisions(expr))
+    ast = ps.create_kernel(assignment)
+    vectorize(ast)
+    ast.compile()
+
+    assignment = ps.Assignment(sp.Symbol("tmp"), 3 / sp.sqrt(f[0, 0] + f[1, 0]))
+    ast = ps.create_kernel(insert_fast_sqrts(assignment))
+    vectorize(ast)
+    ast.compile()
diff --git a/pytest.ini b/pytest.ini
index 2795fb9d85e18838ddc963f307d232ef1a6365bf..5cdf16be1c51127d7b0a93c8f2a2a86a9f6d3933 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -39,7 +39,7 @@ exclude_lines =
        if __name__ == .__main__.:
 
 skip_covered = True
-fail_under = 75
+fail_under = 83
 
 [html]
 directory = coverage_report