diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 92a6080c73389a815163157efefac2186aeee09e..4e503c4acad55919f353a0cdb2d30f44f4f4742b 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -164,6 +164,13 @@ class CBackend:
         return "%s%s\n%s" % (prefix, loop_str, self._print(node.body))
 
     def _print_SympyAssignment(self, node):
+        if self._dialect == 'cuda' and isinstance(node.lhs, sp.Symbol) and node.lhs.name.startswith("shmemslot"):
+            result = "__shared__ volatile double %s[512]; %s[threadIdx.z * " \
+                     "blockDim.x*blockDim.y + threadIdx.y * " \
+                     "blockDim.x + threadIdx.x] = %s;" % \
+                     (node.lhs.name, node.lhs.name, self.sympy_printer.doprint(node.rhs))
+            return result
+
         if node.is_declaration:
             data_type = "const " + str(node.lhs.dtype) + " " if node.is_const else str(node.lhs.dtype) + " "
             return "%s%s = %s;" % (data_type, self.sympy_printer.doprint(node.lhs),
@@ -254,6 +261,12 @@ class CustomSympyPrinter(CCodePrinter):
         res = str(expr.evalf().num)
         return res
 
+    def _print_Symbol(self, expr):
+        if self._dialect == 'cuda' and expr.name.startswith("shmemslot"):
+            return expr.name + "[threadIdx.z * blockDim.x*blockDim.y + threadIdx.y * blockDim.x + threadIdx.x]"
+        else:
+            return super(CustomSympyPrinter, self)._print_Symbol(expr)
+
     def _print_Equality(self, expr):
         """Equality operator is not printable in default printer"""
         return '((' + self._print(expr.lhs) + ") == (" + self._print(expr.rhs) + '))'
diff --git a/pystencils/simp/liveness_opts.py b/pystencils/simp/liveness_opts.py
index 3bee292def73128da0c4db1124087877d69d91c3..370887dadd1c1f34a182ed814b4a796eb22e7b2b 100644
--- a/pystencils/simp/liveness_opts.py
+++ b/pystencils/simp/liveness_opts.py
@@ -1,69 +1,51 @@
-from sympy import Symbol, Dummy
-
-from pystencils import Field, Assignment
-
+import sympy as sp
 import random
 import copy
+from typing import List
+from pystencils import Field, Assignment
 
-
-def get_usage(atoms):
-    reg_usage = {}
-    for atom in atoms:
-        reg_usage[atom.lhs] = 0
-    for atom in atoms:
-        for arg in atom.rhs.atoms():
-            if isinstance(arg, Symbol) and not isinstance(arg, Field.Access):
-                if arg in reg_usage:
-                    reg_usage[arg] += 1
-                else:
-                    print(str(arg) + " is unsatisfied")
-    return reg_usage
-
-
-def get_definitions(eqs):
-    definitions = {}
-    for eq in eqs:
-        definitions[eq.lhs] = eq
-    return definitions
+fa_symbol_iter = sp.numbered_symbols("fa_")
 
 
-def get_roots(eqs):
-    roots = []
-    for eq in eqs:
-        if isinstance(eq.lhs, Field.Access):
-            roots.append(eq.lhs)
-    if not roots:
-        roots.append(eqs[-1].lhs)
-    return roots
-
-
-def merge_field_accesses(eqs):
+def merge_field_accesses(assignments):
+    """Transformation that introduces symbols for all read field accesses
+    for multiple read accesses only one symbol is introduced"""
     field_accesses = {}
 
-    for eq in eqs:
-        for arg in eq.rhs.atoms():
+    new_eqs = copy.copy(assignments)
+    for assignment in new_eqs:
+        for arg in assignment.rhs.atoms():
             if isinstance(arg, Field.Access) and arg not in field_accesses:
-                field_accesses[arg] = Dummy()
+                field_accesses[arg] = next(fa_symbol_iter)
 
-    for i in range(0, len(eqs)):
+    for i in range(0, len(new_eqs)):
         for f, s in field_accesses.items():
-            if f in eqs[i].atoms():
-                eqs[i] = eqs[i].subs(f, s)
+            if f in new_eqs[i].atoms():
+                new_eqs[i] = new_eqs[i].subs(f, s)
 
     for f, s in field_accesses.items():
-        eqs.insert(0, Assignment(s, f))
+        new_eqs.insert(0, Assignment(s, f))
+
+    return new_eqs
 
-    return eqs
 
+def fuse_eqs(input_eqs, max_depth=1, max_usage=1):
+    """Inserts subexpressions that are used not more than `max_usage`
 
-def refuse_eqs(input_eqs, max_depth=0, max_usage=1):
+    Args:
+        max_depth: complexity metric for the subexpression to insert
+                   if max_depth is larger than the expression tree of the subexpression
+                   the subexpressions is not inserted
+
+    Somewhat the inverse of common subexpression elimination.
+    """
     eqs = copy.copy(input_eqs)
     usages = get_usage(eqs)
     definitions = get_definitions(eqs)
 
     def inline_trivially_schedulable(sym, depth):
 
-        if sym not in usages or usages[sym] > max_usage or depth > max_depth:
+        if sym not in definitions or sym not in usages or usages[sym] > max_usage or depth > max_depth:
             return sym
 
         rhs = definitions[sym].rhs
@@ -74,13 +56,13 @@ def refuse_eqs(input_eqs, max_depth=0, max_usage=1):
 
     for idx, eq in enumerate(eqs):
         if usages[eq.lhs] > 1 or isinstance(eq.lhs, Field.Access):
-            if not isinstance(eq.rhs, Symbol):
-
-                eqs[idx] = Assignment(eq.lhs,
-                                      eq.rhs.func(*[inline_trivially_schedulable(arg, 0) for arg in eq.rhs.args]))
+            if not isinstance(eq.rhs, sp.Symbol):
+                eqs[idx] = Assignment(
+                    eq.lhs,
+                    eq.rhs.func(*[inline_trivially_schedulable(arg, 0) for arg in eq.rhs.args]))
 
     count = 0
-    while (len(eqs) != count):
+    while len(eqs) != count:
         count = len(eqs)
         usages = get_usage(eqs)
         eqs = [eq for eq in eqs if usages[eq.lhs] > 0 or isinstance(eq.lhs, Field.Access)]
@@ -88,16 +70,26 @@ def refuse_eqs(input_eqs, max_depth=0, max_usage=1):
     return eqs
 
 
-def schedule_eqs(eqs, candidate_count=20):
+def schedule_eqs(assignments: List[Assignment], candidate_count=20):
+    """Changes order of assignments to save registers.
+
+    Args:
+        assignments:
+        candidate_count: tuning parameter, small means fast, but bad scheduling quality
+                         1 corresponds to full greedy search
+
+    Returns:
+        list of re-ordered assignments
+    """
     if candidate_count == 0:
-        return eqs
+        return assignments
 
-    definitions = get_definitions(eqs)
+    definitions = get_definitions(assignments)
     definition_atoms = {}
     for sym, definition in definitions.items():
-        definition_atoms[sym] = list(definition.rhs.atoms(Symbol))
-    roots = get_roots(eqs)
-    initial_usages = get_usage(eqs)
+        definition_atoms[sym] = list(definition.rhs.atoms(sp.Symbol))
+    roots = get_roots(assignments)
+    initial_usages = get_usage(assignments)
 
     level = 0
     current_level_set = set([frozenset(roots)])
@@ -111,12 +103,18 @@ def schedule_eqs(eqs, candidate_count=20):
 
         min_regs = min([len(current_usages[dec_set]) for dec_set in current_level_set])
         max_regs = max(max_regs, min_regs)
-        candidates = [(dec_set, len(current_usages[dec_set])) for dec_set in current_level_set]
+
+        def score_dec_set(dec_set):
+            score = len(current_usages[dec_set])  # current_schedules[dec_set][0]
+            return dec_set, score
+
+        candidates = [score_dec_set(dec_set) for dec_set in current_level_set]
 
         random.shuffle(candidates)
         candidates.sort(key=lambda d: d[1])
 
         for dec_set, regs in candidates[:candidate_count]:
+
             for dec in dec_set:
                 new_dec_set = set(dec_set)
                 new_dec_set.remove(dec)
@@ -126,7 +124,7 @@ def schedule_eqs(eqs, candidate_count=20):
                 for arg in atoms:
                     if not isinstance(arg, Field.Access):
                         argu = usage.get(arg, initial_usages[arg]) - 1
-                        if argu == 0:
+                        if argu == 0 and arg in definitions:
                             new_dec_set.add(arg)
                         usage[arg] = argu
                 frozen_new_dec_set = frozenset(new_dec_set)
@@ -134,7 +132,6 @@ def schedule_eqs(eqs, candidate_count=20):
                 max_reg_count = max(len(usage), schedule[0])
 
                 if frozen_new_dec_set not in new_schedules or max_reg_count < new_schedules[frozen_new_dec_set][0]:
-
                     new_schedule = list(schedule[1])
                     new_schedule.append(definitions[dec])
                     new_schedules[frozen_new_dec_set] = (max_reg_count, new_schedule)
@@ -150,9 +147,77 @@ def schedule_eqs(eqs, candidate_count=20):
         level += 1
 
     schedule = current_schedules[frozenset()]
+
     schedule[1].reverse()
-    return (schedule[1])
+    return schedule[1]
 
 
 def liveness_opt_transformation(eqs):
-    return refuse_eqs(merge_field_accesses(schedule_eqs(eqs, 3)), 1, 3)
+    return fuse_eqs(merge_field_accesses(schedule_eqs(eqs, 30)), 1, 3)
+
+
+# ---------- Utilities   -----------------------------------------------------------------------------------------
+
+
+def get_usage(assignments: List[Assignment]):
+    """Count number of reads for all symbols in list of assignments
+
+    Returns:
+        dictionary mapping symbol to number of its reads
+    """
+    reg_usage = {}
+    for assignment in assignments:
+        for arg in assignment.rhs.atoms():
+            if isinstance(arg, sp.Symbol) and not isinstance(arg, Field.Access):
+                if arg in reg_usage:
+                    reg_usage[arg] += 1
+                else:
+                    reg_usage[arg] = 1
+    return reg_usage
+
+
+def get_definitions(assignments: List[Assignment]):
+    """Returns dictionary mapping symbol to its defining assignment"""
+    definitions = {}
+    for assignment in assignments:
+        definitions[assignment.lhs] = assignment
+    return definitions
+
+
+def get_roots(eqs):
+    """Returns all field accesses that are used as lhs in assignment (stores)
+    In case there are no independent assignments, the last one is returned (TODO try if necessary)
+    """
+    roots = []
+    for eq in eqs:
+        if isinstance(eq.lhs, Field.Access):
+            roots.append(eq.lhs)
+    if not roots:
+        roots.append(eqs[-1].lhs)
+    return roots
+
+
+# ---------- Staggered kernels -----------------------------------------------------------------------------------------
+
+def unpack_staggered_eqs(field, expressions, subexpressions):
+    eqs = copy.deepcopy(subexpressions)
+    for dim in range(0, len(expressions)):
+        for vec in range(0, len(expressions[dim])):
+            eqs.append(Assignment(Field.Access(field, (0, 0, 0, dim, vec)), expressions[dim][vec]))
+    return eqs
+
+
+def pack_staggered_eqs(eqs, field, expressions, subexpressions):
+    new_matrix_list = [0] * (field.shape[-1] * field.shape[-2])
+
+    for eq in eqs:
+        if isinstance(eq.lhs, Field.Access):
+            new_matrix_list[eq.lhs.offsets[-2] * field.shape[-1] + eq.lhs.offsets[-1]] = eq.rhs
+
+    subexpressions = [eq for eq in eqs if not isinstance(eq.lhs, Field.Access)]
+
+    return (field, [
+        sp.Matrix(field.shape[-1], 1,
+               new_matrix_list[dim * field.shape[-1]:(dim + 1) * field.shape[-1]])
+        for dim in range(field.shape[-2])
+    ], subexpressions)
diff --git a/pystencils/simp/liveness_opts_exp.py b/pystencils/simp/liveness_opts_exp.py
new file mode 100644
index 0000000000000000000000000000000000000000..735ab1f5f8f5d9ef006fb7286ee800894f14143d
--- /dev/null
+++ b/pystencils/simp/liveness_opts_exp.py
@@ -0,0 +1,972 @@
+import sympy
+import itertools
+from sympy import Symbol, Piecewise, Number, postorder_traversal, numbered_symbols
+from pystencils.simp.liveness_opts import *
+
+atom_symbol_iter = numbered_symbols("atom_")
+
+
+def three_operand_form(assignments):
+    """Transforms list of assignments in three operand form"""
+
+    def atomize(expr, atoms):
+        if len(expr.args) == 0:
+            return expr
+
+        atom = next(atom_symbol_iter)
+        if len(expr.args) == 1:
+            atoms.append(Assignment(atom, expr.func(atomize(expr.args[0], atoms))))
+            return atom
+
+        if isinstance(expr, Piecewise):
+            atoms.append(
+                Assignment(
+                    atom,
+                    Piecewise(*[(atomize(expr.expr, atoms), expr.cond) for expr in expr.args])))
+            return atom
+
+        atoms.append(Assignment(atom, expr.func(atomize(expr.args[0], atoms), atomize(expr.args[1], atoms))))
+
+        current_atom = atom
+        for i in range(2, len(expr.args)):
+            atom = next(atom_symbol_iter)
+            atoms.append(Assignment(atom, expr.func(atomize(expr.args[i], atoms), current_atom)))
+            current_atom = atom
+
+        return current_atom
+
+    atoms = []
+    for eq in assignments:
+        new_atoms = []
+        atomize(eq.rhs, new_atoms)
+        if len(new_atoms) > 0:
+            new_atoms[-1] = Assignment(eq.lhs, new_atoms[-1].rhs)
+        else:
+            new_atoms.append(eq)
+        atoms.extend(new_atoms)
+
+    return atoms
+
+
+def var_to_shmem(eqs, var_count=8):
+    if var_count > 8:
+        return eqs
+    if var_count == 0:
+        return copy.copy(eqs)
+    for eq in eqs:
+        if eq.lhs.name.startswith("shmemslot"):
+            return eqs
+
+    usage = get_usage(eqs)
+    usage_list = [(s, usage[s]) for s in usage]
+
+    usage_list.sort(key=lambda s: -s[1])
+
+    vars = [Symbol("shmemslot" + str(i)) for i in range(0, var_count)]
+    shmem_eqs = []
+    for idx, eq in enumerate(eqs):
+        shmem_eqs.append(eq.subs([(usage_list[i][0], vars[i]) for i in range(0, var_count)]))
+    return shmem_eqs
+
+
+def shift_fa_eqs(eqs, direction=1):
+    def shift_fa(expr, direction):
+        if isinstance(expr, Field.Access):
+            return expr.neighbor(0, direction)
+        if len(expr.args) == 0:
+            return expr
+        else:
+            return expr.func(*[shift_fa(arg, direction) for arg in expr.args])
+
+    new_eqs = []
+    for eq in eqs:
+        new_eqs.append(shift_fa(eq, direction))
+    return new_eqs
+
+
+def get_steal_list(eqs, shifted_eqs):
+    def is_equal_arg(left_arg, right_arg, steal_list, left_def, right_def, verbose=False):
+
+        if verbose: print("is_equal_arg: IN left_arg " + str(left_arg))
+        if verbose: print("is_equal_arg: IN right_arg " + str(right_arg))
+
+        if verbose: print("is_equal_arg: SUB left_arg " + str(left_arg))
+        if verbose: print("is_equal_arg: SUB left_arg " + str(right_arg))
+
+        if isinstance(left_arg, Number): return left_arg == right_arg
+        if isinstance(left_arg, Field.Access): return left_arg == right_arg
+
+        if left_arg not in steal_list: return False
+
+        if verbose: print("is_equal_arg: stolen" + str(steal_list[left_arg]))
+
+        return steal_list[left_arg] == right_arg
+
+    def is_equal_expr(left_expr, right_expr, steal_list, left_def, right_def, verbose=False):
+
+        # print(str(left_expr) + " =?= " + str(right_expr))
+
+        if type(left_expr) != type(right_expr): return False
+
+        if left_expr.func != right_expr.func or len(left_expr.args) != len(right_expr.args):
+            return False
+
+        if len(left_expr.args) == 0:
+            return is_equal_arg(left_expr, right_expr, steal_list, left_def, right_def, verbose)
+
+        for left_arg_perm in itertools.permutations(left_expr.args):
+            equal_args = True
+            for idx, left_arg in enumerate(left_arg_perm):
+                if not is_equal_arg(left_arg, right_expr.args[idx], steal_list, left_def, right_def,
+                                    verbose):
+                    equal_args = False
+                    break
+            if equal_args: return True
+
+        return False
+
+    steal_from_e = {}
+    left_def = get_definitions(eqs)
+    right_def = get_definitions(shifted_eqs)
+    for lidx, asgn_left in enumerate(eqs):
+        verbose = False
+
+        for left_subexpr in sympy.postorder_traversal(asgn_left.rhs):
+            if isinstance(left_subexpr, sympy.Number) or isinstance(
+                    left_subexpr, Field.Access) or isinstance(left_subexpr, Assignment):
+                continue
+            for ridx, asgn_right in enumerate(shifted_eqs):
+
+                for right_subexpr in sympy.postorder_traversal(asgn_right.rhs):
+                    left_arg = left_subexpr
+                    right_arg = right_subexpr
+                    if isinstance(left_subexpr,
+                                  Symbol) and not isinstance(left_subexpr, Field.Access):
+                        left_arg = left_def[left_subexpr].rhs
+                    if isinstance(right_subexpr,
+                                  Symbol) and not isinstance(right_subexpr, Field.Access):
+                        right_arg = right_def[right_subexpr].rhs
+
+                    if is_equal_expr(left_arg, right_arg, steal_from_e, left_def, right_def,
+                                     verbose):
+                        steal_from_e[left_subexpr] = right_subexpr
+                        # if verbose:
+                        print(str(left_subexpr) + " ==  " + str(right_subexpr))
+
+    return steal_from_e
+
+
+def find_symbol(eqs, name):
+    for eq in eqs:
+        if eq.lhs.name == name: return eq.lhs
+
+
+def find_expr(eqs, expr):
+    for idx, eq in enumerate(eqs):
+        for sub_expr in postorder_traversal(eq):
+            if sub_expr == expr:
+                return (idx, sub_expr, eq)
+
+
+def left_steal(eqs, steal_count=2):
+    shifted_eqs = shift_fa_eqs(eqs)
+    steal_from_e = get_steal_list(eqs, shifted_eqs)
+
+    usage = get_usage(eqs)
+    definitions = get_definitions(eqs)
+
+    def count_nodes_up(node):
+        if isinstance(node, Field.Access):
+            return 1
+
+        if node in definitions:
+            node = definitions[node].rhs
+
+        node_count = 0
+        for arg in node.args:
+            if not (arg in usage and usage[arg] > 1):
+                node_count += count_nodes_up(arg)
+        return node_count + 1
+
+    new_eqs = copy.copy(eqs)
+    for i in range(0, steal_count):
+
+        scores = [(s, count_nodes_up(s)) for s in steal_from_e if isinstance(s, Symbol)]
+        scores.sort(key=lambda s: s[1], reverse=True)
+
+        print(scores[0:10])
+
+        sym_xi = scores[0][0]
+        print(sym_xi)
+        print(steal_from_e[sym_xi])
+        steal_src = find_expr(new_eqs, shift_fa_eqs([steal_from_e[sym_xi]], -1)[0])
+        shmem_var = Symbol("shmemslot" + str(i))
+
+        new_eqs.insert(steal_src[0] + 1, Assignment(shmem_var, steal_src[1]))
+
+        steal_dst = find_expr(new_eqs, sym_xi)
+
+        print(steal_dst)
+        print(steal_src)
+        print()
+
+        for idx, eq in enumerate(new_eqs):
+            if steal_dst[1] in eq.atoms():
+                new_eqs[idx] = Assignment(new_eqs[idx].lhs, new_eqs[idx].rhs.subs(
+                    steal_dst[1], shmem_var))
+
+        new_eqs.pop(steal_dst[0])
+
+        # Ancestors of donated value cannot be stolen, therefore remove from steal list
+        def get_ancestor_nodes(node, definitions):
+            ancestors = [node]
+            if isinstance(node, sympy.Number):
+                return []
+            if node in definitions:
+                ancestors.extend(get_ancestor_nodes(definitions[node].rhs, definitions))
+            for arg in node.args:
+                ancestors.extend(get_ancestor_nodes(arg, definitions))
+            return ancestors
+
+        for a in get_ancestor_nodes(steal_from_e[sym_xi], definitions):
+            if a in steal_from_e:
+                steal_from_e.pop(a)
+
+        # Remove value just stolen from steal list
+        steal_from_e.pop(sym_xi)
+
+    return new_eqs
+
+    # eqs = atomize_eqs(eqs)
+
+
+def move_forward(atoms):
+    reg_usage = get_usage(atoms)
+    i = 0
+    while i < len(atoms):
+        atom = atoms[i]
+        killed_regs = 0
+        for arg in atom.rhs.atoms():
+            if isinstance(arg, Field.Access) or not isinstance(arg, Symbol):
+                continue
+            reg_usage[arg] -= 1
+            if reg_usage[arg] == 0:
+                killed_regs += 1
+        if killed_regs == 0:
+            first_usage = i
+            for n in range(i, len(atoms)) or len(
+                    [x for x in atoms[n].rhs.atoms() if x in atoms[i].rhs.atoms()]) != 0:
+                usage = atoms[n].rhs
+                if atom.lhs in usage.atoms():
+                    first_usage = n
+                    break
+            if first_usage - i > 5:
+                atoms.insert(first_usage - 1, atoms.pop(i))
+                for arg in atom.rhs.atoms():
+                    if isinstance(arg, Field.Access) or not isinstance(arg, Symbol):
+                        continue
+                    reg_usage[arg] += 1
+                # print("_move " + str(i) + " " + str(first_usage) + " " +
+                #     str(atom))
+                i -= 1
+        i += 1
+    return atoms
+
+
+def move_backward(atoms):
+    reg_usage = get_usage(atoms)
+    i = 0
+    while i < len(atoms):
+        atom = atoms[i]
+        killed_regs = 0
+        for arg in atom.rhs.atoms():
+            if isinstance(arg, Field.Access) or not isinstance(arg, Symbol):
+                continue
+            reg_usage[arg] -= 1
+            if reg_usage[arg] == 0:
+                killed_regs += 1
+        if killed_regs > 1:
+            last_defined = 0
+            for n in range(i - 1, 0, -1):
+                if len([x for x in atoms[n].rhs.atoms() if x in atoms[i].rhs.atoms()
+                        ]) != 0 or atoms[n].lhs in atom.rhs.atoms():
+                    last_defined = n
+                    break
+            if i - last_defined > 5:
+                atoms.insert(last_defined + 1, atoms.pop(i))
+                # print("_move " + str(i) + " " + str(last_defined) + " " +
+                #      str(atom) + " " + str(atoms[last_defined]))
+        i += 1
+
+    return atoms
+
+
+def liveness_analysis(atoms):
+    max_alive_regs = 0
+    reg_usage = get_usage(atoms)
+    alive_atoms = []
+    alive_at_peak = []
+    for atom in atoms:
+
+        if not isinstance(atom.lhs, Field.Access):
+            alive_atoms.append(atom.lhs)
+        for arg in atom.rhs.atoms():
+            if isinstance(arg, Field.Access) or not isinstance(arg, Symbol):
+                continue
+            if arg not in alive_atoms:
+                print("_referenced Symbol " + str(arg) + " is not alive")
+            else:
+                reg_usage[arg] -= 1
+                if reg_usage[arg] == 0:
+                    alive_atoms.remove(arg)
+        if max_alive_regs < len(alive_atoms):
+            max_alive_regs = len(alive_atoms)
+            alive_at_peak = list(alive_atoms)
+
+    #    print("_max alive _registers " + str(max_alive_regs))
+    return (alive_at_peak, max_alive_regs)
+
+
+def schedule_eqs1(eqs):
+    definitions = get_definitions(eqs)
+    roots = get_roots(eqs)
+
+    def label_eqs(sym, labels):
+        if sym not in definitions:
+            return (0, 0)
+        if sym in labels:
+            return labels[sym]
+        if isinstance(definitions[sym].rhs, Field.Access):
+            labels[sym] = (1, 1)
+            return labels[sym]
+        reg_counts = []
+        for arg in definitions[sym].rhs.atoms():
+            reg_counts.append(label_eqs(arg, labels))
+        if len(reg_counts) == 1:
+            labels[sym] = reg_counts[0]
+            return labels[sym]
+        print(reg_counts)
+        reg_counts.sort(key=lambda x: x[0])
+        label = 0
+        if reg_counts[-1] == reg_counts[-2]:
+            label = reg_counts[-1][0] + 1
+        else:
+            label = reg_counts[-1][0]
+        labels[sym] = (label, 1)
+        return labels[sym]
+
+    labels = {}
+    for root in roots:
+        label_eqs(root.lhs, labels)
+    print(labels)
+
+    def schedule_sub_tree(sym, eqs, labels):
+        expr = definitions[sym]
+        if expr in eqs:
+            return
+        args = []
+        for arg in expr.rhs.atoms():
+            if isinstance(arg, Symbol) and not isinstance(arg, Field.Access):
+                args.append(arg)
+        args.sort(key=lambda arg: -labels[arg][0])
+        for arg in args:
+            schedule_sub_tree(arg, eqs, labels)
+        eqs.append(expr)
+
+    rescheduled_eqs = []
+    for root in roots:
+        schedule_sub_tree(root.lhs, rescheduled_eqs, labels)
+
+    return rescheduled_eqs
+
+
+def schedule_eqs2(eqs, target=168, branches=2):
+    definitions = get_definitions(eqs)
+    roots = get_roots(eqs)
+
+    def recursive_schedule(definitions, needed_syms, usages, target, depth):
+        if len(needed_syms) > target:
+            return _none
+        if len(needed_syms) == 0:
+            return []
+        sym_list = needed_syms.items()
+
+        score_list = []
+        for sym, u in sym_list:
+            if u != 0:
+                continue
+            score = 0
+            for arg in definitions[sym].rhs.atoms():
+                if arg not in needed_syms and arg in usages:
+                    score += 1
+
+            score_list.append((sym, score))
+        score_list.sort(key=lambda x: x[1])
+
+        for sym, score in score_list[0:branches]:
+            needed_syms.pop(sym)
+            for arg in definitions[sym].rhs.atoms():
+                if isinstance(arg, Symbol) and not isinstance(arg, Field.Access):
+                    if not arg in needed_syms:
+                        needed_syms[arg] = usages[arg]
+                    needed_syms[arg] -= 1
+
+            instrs = recursive_schedule(definitions, needed_syms, usages, target, depth + 1)
+            if not instrs is _none:
+                instrs.append(definitions[sym])
+                return instrs
+            else:
+                for arg in definitions[sym].rhs.atoms():
+                    if isinstance(arg, Symbol) and not isinstance(arg, Field.Access):
+                        needed_syms[arg] += 1
+                        if needed_syms[arg] == usages[arg]:
+                            needed_syms.pop(arg)
+                needed_syms[sym] = 0
+        return _none
+
+    usages = get_usage(eqs)
+    needed_syms = {u: 0 for u in roots}
+    instrs = recursive_schedule(definitions, needed_syms, usages, target, 0)
+    return instrs
+
+
+def schedule_eqs3(eqs, peak_alive=[]):
+    peak_alive_set = set(peak_alive)
+    # random.shuffle(eqs)
+    definitions = get_definitions(eqs)
+    definition_atoms = {}
+    for sym, definition in definitions.items():
+        definition_atoms[sym] = list(definition.rhs.atoms(Symbol))
+    roots = get_roots(eqs)
+    initial_usages = get_usage(eqs)
+
+    #    levels = [set([frozenset(roots)])]
+    level = 0
+    current_level_set = set([frozenset(roots)])
+    current_usages = {frozenset(roots): {u: 0 for u in roots}}
+    current_schedules = {frozenset(roots): (0, [])}
+    max_regs = 0
+    while len(current_level_set) > 0:
+        new_usages = dict()
+        new_schedules = dict()
+        new_level_set = set()
+        min_regs = min([len(current_usages[dec_set]) for dec_set in current_level_set])
+        max_regs = max(max_regs, min_regs)
+        candidates = [(dec_set, len(current_usages[dec_set]) +
+                       len(peak_alive_set.union(set(current_usages[dec_set].keys()))) * 0.1)
+                      for dec_set in current_level_set]
+        candidates.sort(key=lambda d: d[1])
+
+        for dec_set, regs in candidates[:40]:
+            for dec in dec_set:
+                new_dec_set = set(dec_set)
+                new_dec_set.remove(dec)
+                usage = dict(current_usages[dec_set])
+                usage.pop(dec)
+                atoms = definition_atoms[dec]
+                for arg in atoms:
+                    if not isinstance(arg, Field.Access):
+                        argu = usage.get(arg, initial_usages[arg]) - 1
+                        if argu == 0:
+                            new_dec_set.add(arg)
+                        usage[arg] = argu
+                frozen_new_dec_set = frozenset(new_dec_set)
+                schedule = current_schedules[dec_set]
+                max_reg_count = max(len(usage), schedule[0])
+                if frozen_new_dec_set not in new_schedules or max_reg_count < new_schedules[
+                    frozen_new_dec_set][0]:
+                    new_schedule = list(schedule[1])
+                    new_schedule.append(definitions[dec])
+                    new_schedules[frozen_new_dec_set] = (max_reg_count, new_schedule)
+
+                if len(frozen_new_dec_set) > 0:
+                    new_level_set.add(frozen_new_dec_set)
+                new_usages[frozen_new_dec_set] = usage
+
+        current_schedules = new_schedules
+        current_usages = new_usages
+        current_level_set = new_level_set
+        # print(len(current_level_set))
+        level += 1
+
+    schedule = current_schedules[frozenset()]
+    schedule[1].reverse()
+    return (schedule[1])
+
+
+split_symbol_iter = numbered_symbols("split_")
+
+
+def split_live_range(eqs, splits=2):
+    for i in range(0, splits):
+        max_alive_regs = 0
+        reg_usage = get_usage(eqs)
+        definitions = get_definitions(eqs)
+        alive_atoms = []
+        alive_at_peak = []
+        usage_at_peak = []
+
+        for atom in eqs:
+            if not isinstance(atom.lhs, Field.Access):
+                alive_atoms.append(atom.lhs)
+            for arg in atom.rhs.atoms():
+                if isinstance(
+                        arg, Field.Access) or not isinstance(arg, Symbol) or arg not in alive_atoms:
+                    continue
+
+                else:
+                    reg_usage[arg] -= 1
+                    if reg_usage[arg] == 0:
+                        alive_atoms.remove(arg)
+            if max_alive_regs < len(alive_atoms):
+                max_alive_regs = len(alive_atoms)
+                alive_at_peak = list(alive_atoms)
+                usage_at_peak = {u: reg_usage[u] for u in alive_at_peak}
+                peak_eq = atom
+        peak_idx = eqs.index(peak_eq)
+        for sym in alive_at_peak:
+            dependent = False
+            for arg in definitions[sym].rhs.atoms():
+                if arg in definitions:
+                    dependent = True
+            if dependent:
+                continue
+            next_occurence = 0
+            for i in range(peak_idx, len(eqs)):
+                if sym == eqs[i].lhs or sym in eqs[i].rhs.atoms():
+                    next_occurence = i
+                    break
+            new_sym = next(split_symbol_iter)
+            if next_occurence == 0:
+                continue
+            eqs.insert(next_occurence, Assignment(new_sym, definitions[sym].rhs))
+
+            for i in range(peak_idx, len(eqs)):
+                if sym in eqs[i].rhs.atoms():
+                    eqs[i] = eqs[i].subs(sym, new_sym)
+
+            break
+    return eqs
+
+
+def duplicate_trivial_ops(eqs, nonTrivialLength=3, trivialSymbolLength=1):
+    definitions = get_definitions(eqs)
+    eq_list = list(eqs)
+    idx = 0
+    while (idx < len(eq_list)):
+        eq = eq_list[idx]
+        if isinstance(eq.lhs, Field.Access):
+            idx += 1
+            continue
+
+        trivial = True
+        if len(eq.rhs.atoms()) >= nonTrivialLength:
+            trivial = False
+
+        for arg in eq.rhs.atoms():
+            if arg in definitions:
+                trivial = False
+                break
+
+        if len(eq.rhs.atoms(Symbol)) == trivialSymbolLength:
+            trivial = True
+
+        if trivial:
+            for i in range(idx + 1, len(eq_list)):
+                if eq.lhs in eq_list[i].rhs.atoms():
+                    eq_list[i] = Assignment(eq_list[i].lhs, eq_list[i].rhs.subs({eq.lhs: eq.rhs}))
+
+            eq_list.remove(eq)
+        else:
+            idx += 1
+    return eq_list
+
+
+def scramble_eqs(eqs, attempts=1000):
+    max_alive_regs = 0
+    reg_usage = get_usage(eqs)
+    alive_atoms = []
+    alive_at_eq = {}
+    for atom in eqs:
+        alive_at_eq[atom] = {u: reg_usage[u] for u in alive_atoms}
+
+        if not isinstance(atom.lhs, Field.Access):
+            alive_atoms.append(atom.lhs)
+        for arg in atom.rhs.atoms():
+            if isinstance(arg, Field.Access) or not isinstance(arg, Symbol):
+                continue
+            if arg not in alive_atoms:
+                print("_referenced Symbol " + str(arg) + " is not alive")
+            else:
+                reg_usage[arg] -= 1
+                if reg_usage[arg] == 0:
+                    alive_atoms.remove(arg)
+        if max_alive_regs < len(alive_atoms):
+            max_alive_regs = len(alive_atoms)
+
+    orig_usage = get_usage(eqs)
+    for i in range(0, attempts):
+        a = random.randint(0, len(eqs) - 10)
+        if a + 2 >= len(eqs) - 10: continue
+        b = random.randint(a + 2, min(len(eqs) - 10, a + 20))
+        eqa = eqs[a]
+        eqb = eqs[b]
+        if max_alive_regs - len(alive_at_eq[eqs[a]]) < 15:
+            continue
+
+        if alive_at_eq[eqs[a + 1]][eqa.lhs] == alive_at_eq[eqb].get(eqa.lhs, 0):
+
+            #            print()
+            #           print(eqa)
+            #          print(eqb)
+
+            usage = alive_at_eq[eqs[a - 1]]
+            eqs.insert(b - 1, eqs.pop(a))
+            for n in range(a - 1, b + 1):
+                atom = eqs[n]
+                # print(str(n) + " " + str(atom))
+                alive_at_eq[atom] = dict(usage)
+                if not isinstance(atom.lhs, Field.Access):
+                    usage[atom.lhs] = orig_usage[atom.lhs]
+                for arg in atom.rhs.atoms():
+                    if isinstance(arg, Field.Access) or not isinstance(arg, Symbol):
+                        continue
+                    if arg not in usage:
+                        pass
+                    else:
+                        usage[arg] -= 1
+                        if usage[arg] == 0:
+                            usage.pop(arg)
+    return eqs
+
+
+def scheduling_iteration(eqs):
+    atomized_eqs = three_operand_form(eqs)
+    eqs = duplicate_trivial_ops(eqs)
+    rescheduled_eqs = schedule_eqs3(eqs)
+    alive_at_peak = liveness_analysis(rescheduled_eqs)
+
+    for n in range(0, 5):
+        split_live_range(atomized_eqs)
+    rescheduled_eqs = schedule_eqs3(rescheduled_eqs, alive_at_peak)
+    for i in range(0, 10):
+        scramble_eqs(atomized_eqs)
+    return atomized_eqs
+
+
+def fuse_subs(eqs):
+    new_eqs = copy.copy(eqs)
+    for eq in eqs:
+        if isinstance(eq.rhs, sympy.Mul) and len(eq.rhs.args) == 2:
+            if eq.rhs.args[0] == -1 or eq.rhs.args[1] == -1:
+                for i, new_eq in enumerate(new_eqs):
+                    if eq.lhs in new_eqs[i].atoms():
+                        new_eqs[i] = Assignment(new_eqs[i].lhs, new_eq.rhs.subs(eq.lhs, eq.rhs))
+                new_eqs.remove(eq)
+    return new_eqs
+
+
+def fuse_f_m_as(eqs, max_usage=1):
+    usage = get_usage(eqs)
+    new_eqs = copy.copy(eqs)
+    for eq in eqs:
+        if isinstance(eq.rhs, sympy.Mul) and len(eq.rhs.args) == 2 and usage[eq.lhs] <= max_usage:
+            for index, new_eq in enumerate(new_eqs):
+                if isinstance(new_eq.rhs, sympy.Add) and eq.lhs in new_eq.rhs.atoms():
+                    no_mul_in_args = True
+                    for arg in new_eq.rhs.args:
+                        if isinstance(arg, sympy.Mul):
+                            no_mul_in_args = False
+                    if no_mul_in_args:
+                        new_eqs[index] = new_eqs[index].subs(eq.lhs, eq.rhs)
+                        usage[eq.lhs] -= 1
+                        if usage[eq.lhs] == 0:
+                            new_eqs.remove(eq)
+                    break
+
+    return new_eqs
+
+
+def remove_sqrt(input_eqs):
+    def remove_in_expr(expr):
+        if len(expr.args) > 0:
+            if expr.func == sympy._pow and (expr.args[0] == 0.5 or expr.args[1] == 0.5):
+                return sympy.Mul(*[remove_in_expr(a) for a in expr.args])
+            else:
+                return expr.func(*[remove_in_expr(a) for a in expr.args])
+        else:
+            return expr
+
+    eqs = []
+    for eq in input_eqs:
+        eqs.append(remove_in_expr(eq))
+    return eqs
+
+
+def remove_div(input_eqs):
+    def remove_in_expr(expr):
+        if len(expr.args) > 0:
+            if expr.func == sympy._pow and (expr.args[0] == -1 or expr.args[1] == -1):
+                return sympy.Mul(*[remove_in_expr(a) for a in expr.args])
+            else:
+                return expr.func(*[remove_in_expr(a) for a in expr.args])
+        else:
+            return expr
+
+    eqs = []
+    for eq in input_eqs:
+        eqs.append(remove_in_expr(eq))
+    return eqs
+
+
+def remove_piecewise(input_eqs):
+    def remove_in_expr(expr):
+        if len(expr.args) > 0:
+            if expr.func == sympy.Piecewise:
+                cond = 1.0
+                summands = []
+                for a in expr.args:
+                    #     print(remove_in_expr(a[0]))
+                    #     print(remove_in_expr(a[1]))
+                    #     print( sympy.Mul(cond, remove_in_expr(a[0]), remove_in_expr(a[1])))
+                    summands.append(sympy.Mul(cond, remove_in_expr(a[0]), remove_in_expr(a[1])))
+                    cond = cond * (1 - remove_in_expr(a[1]))
+                #    print()
+
+                #               print(sympy.Add(*summands))
+                #               print("----")
+                return sympy.Add(*summands)
+            else:
+                return expr.func(*[remove_in_expr(a) for a in expr.args])
+        elif expr == sympy.true:
+            return 1.0
+        elif expr == sympy.false:
+            return 0.0
+        else:
+            return expr
+
+    eqs = []
+    for eq in input_eqs:
+        eqs.append(remove_in_expr(eq))
+    return eqs
+
+
+def option_none(eqs):
+    return eqs
+
+
+def option_atomize(eqs):
+    return three_operand_form(eqs)
+
+
+def option_reschedule(eqs):
+    return schedule_eqs(eqs)
+
+
+def option_reschedule_shmem(eqs):
+    return schedule_eqs(var_to_shmem(eqs, 8))
+
+
+def option_liveness_opt_transformation(eqs):
+    return liveness_opt_transformation(eqs)
+
+
+def liveness_opt_transformation_shmem(eqs):
+    return var_to_shmem(duplicate_trivial_ops(schedule_eqs(eqs, 20)), 6)
+
+
+def liveness_opt_transformation_shmem2(eqs):
+    return scramble_eqs(schedule_eqs(duplicate_trivial_ops(var_to_shmem(eqs, 6)), 40))
+
+
+def option_liveness_opt_transformation_shmem(eqs):
+    return liveness_opt_transformation_shmem(eqs)
+
+
+def option_liveness_opt_transformation_shmem2(eqs):
+    return liveness_opt_transformation_shmem2(eqs)
+
+
+def option_atomize_reschedule_no_sqrt(eqs):
+    cse_atomized = remove_sqrt(three_operand_form(eqs))
+    return schedule_eqs(cse_atomized)
+
+
+def option_atomize_reschedule_no_div(eqs):
+    cse_atomized = remove_div(three_operand_form(eqs))
+    return schedule_eqs(cse_atomized)
+
+
+def option_atomize_reschedule_no_piecewise(eqs):
+    cse_atomized = remove_piecewise(three_operand_form(eqs))
+    return schedule_eqs(cse_atomized)
+
+
+def option_atomize_reschedule_no_sqrt_div(eqs):
+    cse_atomized = remove_div(remove_sqrt(three_operand_form(eqs)))
+    return schedule_eqs(cse_atomized)
+
+
+def option_atomize_reschedule_no_all(eqs):
+    cse_atomized = remove_piecewise(remove_div(remove_sqrt(three_operand_form(eqs))))
+    return schedule_eqs(cse_atomized)
+
+
+def option_atomize_reschedule(eqs):
+    return schedule_eqs(three_operand_form(eqs))
+
+
+def option_reschedule_atomize(eqs):
+    return three_operand_form(schedule_eqs(eqs))
+
+
+def option_reschedule_atomize_scramble(eqs):
+    eqs = three_operand_form(schedule_eqs(eqs))
+    for i in range(0, 10):
+        scramble_eqs(eqs)
+    return eqs
+
+
+def option_dupl_reschedule(eqs):
+    eqs = duplicate_trivial_ops(eqs)
+    return schedule_eqs(eqs)
+
+
+def option_dupl_atomize_reschedule(eqs):
+    eqs = duplicate_trivial_ops(eqs)
+    return schedule_eqs(three_operand_form(eqs))
+
+
+def option_dupl_atomize_refuse_reschedule(eqs):
+    eqs = duplicate_trivial_ops(eqs)
+    eqs = fuse_eqs(three_operand_form(eqs))
+    return schedule_eqs(eqs)
+
+
+def option_dupl_reschedule_atomize(eqs):
+    eqs = duplicate_trivial_ops(eqs)
+    return three_operand_form(schedule_eqs(eqs))
+
+
+def option_dupl_reschedule_atomize_scramble(eqs):
+    eqs = duplicate_trivial_ops(eqs)
+    eqs = three_operand_form(schedule_eqs(eqs))
+    for i in range(0, 10):
+        scramble_eqs(eqs)
+    return eqs
+
+
+def option_sched_iteration(eqs):
+    return scheduling_iteration(eqs)
+
+
+def option_fuse_subs(eqs):
+    atomized_eqs = three_operand_form(eqs)
+    fused_eqs = fuse_subs(eqs)
+    return schedule_eqs(fused_eqs)
+
+
+def option_fuse_f_m_as(eqs):
+    atomized_eqs = three_operand_form(eqs)
+    fused_eqs = fuse_f_m_as(eqs)
+    return schedule_eqs(fused_eqs)
+
+
+def option_fuse_both(eqs):
+    atomized_eqs = three_operand_form(eqs)
+    fused_eqs = fuse_f_m_as(fuse_subs(eqs))
+    return schedule_eqs(fused_eqs)
+
+
+all_sched_options = []
+# all_sched_options.append(option_none)
+# all_sched_options.append(option_atomize)
+# all_sched_options.append(option_reschedule)
+# all_sched_options.append(option_atomize_reschedule)
+# all_sched_options.append(option_fuse_subs)
+# all_sched_options.append(option_fuse_f_m_as)
+# all_sched_options.append(option_fuse_both)
+# all_sched_options.append(option_reschedule_atomize)
+# all_sched_options.append(option_reschedule_atomize_scramble)
+# all_sched_options.append(option_dupl_atomize_reschedule)
+# all_sched_options.append(option_dupl_atomize_refuse_reschedule)
+# all_sched_options.append(option_dupl_reschedule_atomize)
+# all_sched_options.append(option_dupl_reschedule_atomize_scramble)
+# all_sched_options.append(option_sched_iteration)
+# all_sched_options.append(option_atomize_reschedule_no_piecewise)
+# all_sched_options.append(option_atomize_reschedule_no_sqrt)
+# all_sched_options.append(option_atomize_reschedule_no_div)
+# all_sched_options.append(option_atomize_reschedule_no_sqrt_div)
+# all_sched_options.append(option_atomize_reschedule_no_all)
+# all_sched_options.append(option_liveness_opt_transformation)
+all_sched_options.append(option_reschedule_shmem)
+all_sched_options.append(option_liveness_opt_transformation_shmem)
+all_sched_options.append(option_liveness_opt_transformation_shmem2)
+
+
+def replace_accesses(eqs):
+    access_vars = {}
+    for eq in eqs:
+        for atom in eq.rhs.atoms():
+            if isinstance(atom, Field.Access) and not atom in access_vars:
+                access_vars[atom] = Dummy()
+
+    new_eqs = []
+    for access in access_vars:
+        new_eqs.append(Assignment(access_vars[access], access))
+
+    def replace_accesses_in_expr(expr):
+        if len(expr.args) == 0:
+            if isinstance(expr, Field.Access):
+                return access_vars[expr]
+            else:
+                return expr
+        else:
+            return expr.func(*[replace_accesses_in_expr(a) for a in expr.args])
+
+    for eq in eqs:
+        new_eqs.append(Assignment(eq.lhs, replace_accesses_in_expr(eq.rhs)))
+
+    return new_eqs
+
+
+def shifted_equations():
+    pass
+
+
+def get_used_nodes(eqs):
+    used_nodes = {}
+    for eq in eqs:
+        used_nodes[eq.lhs] = []
+        for atom in eq.rhs.atoms(Symbol):
+            used_nodes[eq.lhs].append(atom)
+    return used_nodes
+
+
+def get_used_by(eqs):
+    used_by = {}
+    for eq in eqs:
+        for arg in eq.rhs.atoms():
+            used_by.setdefault(arg, []).append(eq.lhs)
+    return used_by
+
+# shifted_equations = copy.deepcopy(equations)
+
+# def shift_accesses(expr):
+#    new_args = list(expr.args)
+#    print(expr.args)
+#    for i in range(0, len(new_args)):
+#        if isinstance(new_args[i], Field.Access):
+#            new_args[i] = new_args[i].get_shifted(2, 0, 0)
+#        if isinstance(new_args[i], sp._expr):
+#            shift_accesses(new_args[i])
+#    expr._args = new_args
+#    print(expr.args)
+
+# for eq in shifted_equations:
+#    shift_accesses(eq.rhs)
+
+# for lhs in equations:
+#    for rhs in shifted_equations:
+#        if lhs == rhs:
+#            print(str(lhs.lhs) + " " + str(rhs.lhs))
diff --git a/pystencils/simp/liveness_permutations.py b/pystencils/simp/liveness_permutations.py
new file mode 100644
index 0000000000000000000000000000000000000000..b16c8f56cb96966b922a1618107703c86dd6efff
--- /dev/null
+++ b/pystencils/simp/liveness_permutations.py
@@ -0,0 +1,190 @@
+from pygrandchem.grandchem import StaggeredKernelParams
+
+from pystencils.simp.liveness_opts import *
+from pystencils.simp.liveness_opts_exp import *
+import random
+import pycuda.driver as drv
+
+import pystencils as ps
+
+from pystencils import show_code
+from timeit import default_timer as timer
+
+import copy
+
+optSequenceCache = {}
+
+all_opts = [[atomize_eqs, []], [schedule_eqs, [2]], [duplicate_trivial_ops, [3, 1]],
+            [merge_field_accesses, []], [refuse_eqs, [1, 1]], [var_to_shmem, [4]],
+            [var_to_shmem_lt, [4]]]
+
+
+def mutateOptSequence(seq):
+    changed = False
+    new_seq = copy.deepcopy(seq)
+    while not changed:
+        choice = random.randint(0, 4)
+        if choice == 0:
+            new_seq.opts.append(random.choice(all_opts))
+            changed = True
+        elif choice == 1:
+            if len(new_seq.opts) > 1:
+                a = random.randint(0, len(new_seq.opts) - 1)
+                b = random.randint(0, len(new_seq.opts) - 1)
+                new_seq.opts[a], new_seq.opts[b] = new_seq.opts[b], new_seq.opts[a]
+                changed = True
+        elif choice == 2:
+            if len(new_seq.opts) > 0:
+                new_seq.opts.remove(random.choice(new_seq.opts))
+                changed = True
+        elif choice == 3:
+            if len(new_seq.opts) > 0:
+                opt = random.choice(new_seq.opts)
+                change = random.choice([-1, 1])
+                factor = 1
+                if change < 0:
+                    factor = random.uniform(0.3, 1.0)
+                if change > 0:
+                    factor = random.uniform(1.0, 3.0)
+
+                if len(opt[1]) > 0:
+                    arg = random.randint(0, len(opt[1]) - 1)
+
+                    opt[1][arg] = int(max(0, opt[1][arg] * factor + change))
+                    changed = True
+        else:
+            dim = random.randint(0, 2)
+            change = random.randint(0, 1)
+            newBlockSize = list(seq.blockSize)
+
+            if change == 0:
+                newBlockSize[dim] = min(512, newBlockSize[dim] * 2)
+            else:
+                newBlockSize[dim] = max(1, newBlockSize[dim] // 2)
+
+            if newBlockSize[0] * newBlockSize[1] * newBlockSize[2] <= 512 and (
+                    newBlockSize[0] >= 32 or newBlockSize[0] >= seq.blockSize[0]):
+                seq.blockSize = tuple(newBlockSize)
+                changed = True
+
+    return new_seq
+
+
+def evolvePopulation(pop, eqs_set, dhs, staggered_params=None):
+
+    pop.append(livenessOptSequence())
+    once_mutated = [mutateOptSequence(seq) for seq in pop[0:6]]
+    twice_mutated = [mutateOptSequence(mutateOptSequence(seq)) for seq in pop[0:4]]
+    thrice_mutated = [
+        mutateOptSequence(mutateOptSequence(mutateOptSequence(seq))) for seq in pop[0:3]
+    ]
+
+    new_pop = list(set(pop + once_mutated + twice_mutated + thrice_mutated))
+
+    scores = []
+    for seq in new_pop:
+        scores.append((seq, *rateSequence(seq, eqs_set, dhs, staggered_params)))
+
+    old_scores = []
+    for s in optSequenceCache:
+        if s not in new_pop:
+            if s not in optSequenceCache:
+                print("Not in optSequenceCache: ")
+                print(s)
+                print(hash(s))
+            old_scores.append((s, optSequenceCache[s][0], [0, 0]))
+    old_scores.sort(key=lambda s: sum(s[1]))
+
+    if len(old_scores) > 0: scores.append(old_scores[0])
+
+    print()
+    scores.sort(key=lambda s: sum(s[1]))
+    new_pop = []
+    count_old_seqs = 0
+
+    for score in scores:
+        if score[0] not in optSequenceCache:
+            print("Everything in scores: ")
+            for s in scores:
+                print(s[0])
+
+            print("Not in optSequenceCache: ")
+            print(score[0])
+            print(hash(score[0]))
+
+        survive = False
+        if (len(new_pop) < 4 or count_old_seqs < 3) and len(new_pop) < 10:
+            if optSequenceCache[score[0]][1] > 3:
+                count_old_seqs += 1
+            new_pop.append(score[0])
+            survive = True
+
+        print("".join(["{:6.2f} ".format(sc) for sc in score[1]]) + "(" +
+              "".join(["{:3d} ".format(sc) for sc in score[2]]) + "): " +
+              "{:2d}".format(optSequenceCache[score[0]][1]) + (" * " if survive else "   ") +
+              str(score[0]))
+    print()
+
+    return new_pop
+
+
+def rateSequence(seq, eqs_set, dh, staggered_params=None):
+
+    if seq not in optSequenceCache:
+        optSequenceCache[seq] = [[], 0]
+    cache_entry = optSequenceCache[seq]
+
+    if cache_entry[1] > 10:
+        return (cache_entry[0], [0, 0])
+
+    print(cache_entry[1], end=" ")
+    print(seq)
+
+    start = timer()
+    transformed_eqs_set = [seq.applyOpts(eqs) for eqs in eqs_set]
+    end = timer()
+
+    kernel_results = [
+        bench_kernel(eqs, dh, seq.blockSize, staggered_params) for eqs in transformed_eqs_set
+    ]
+    kernel_registers = [k[1] for k in kernel_results]
+    result = [k[0] for k in kernel_results
+              ] + [k[0] * max(0.0, (len(seq.opts) - 3) * 0.1) for k in kernel_results]
+
+    if cache_entry[1] == 0:
+        cache_entry[0] = result
+    else:
+        for i in range(0, len(result)):
+            cache_entry[0][i] = (cache_entry[0][i] * cache_entry[1] + result[i]) / (
+                cache_entry[1] + 1)
+
+    cache_entry[1] += 1
+
+    return cache_entry[0], kernel_registers
+
+
+def bench_kernel(eqs, dh, blockSize=(64, 2, 1), staggered_params=None):
+
+    if staggered_params is None:
+        kernel = ps.create_kernel(
+            eqs, target="gpu", gpu_indexing_params={
+                "block_size": blockSize
+            }).compile()
+    else:
+        kernel = ps.create_staggered_kernel(
+            *pack_staggered_eqs(eqs, *staggered_params),
+            target="gpu",
+            gpu_indexing_params={
+                "block_size": blockSize
+            }).compile()
+
+    start = drv.Event()
+    end = drv.Event()
+
+    start.record()
+    dh.run_kernel(kernel, timestep=1)
+    dh.run_kernel(kernel, timestep=1)
+    end.record()
+    end.synchronize()
+    msec = start.time_till(end) / 2
+    return msec, kernel.num_regs
diff --git a/pystencils_tests/liveness_opts/compare_seqs.py b/pystencils_tests/liveness_opts/compare_seqs.py
new file mode 100644
index 0000000000000000000000000000000000000000..0604cabf82bfcd93fc69006b77cdbf87fe10a4b0
--- /dev/null
+++ b/pystencils_tests/liveness_opts/compare_seqs.py
@@ -0,0 +1,181 @@
+# coding: utf-8
+
+# In[32]:
+
+import pickle
+import warnings
+import pystencils as ps
+from pygrandchem.grandchem import GrandChemGenerator
+from pygrandchem.scenarios import system_4_2, system_3_1
+from pygrandchem.initialization import init_boxes, smooth_fields
+from pygrandchem.scenarios import benchmark_configs
+
+from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational
+from pystencils.simp import sympy_cse_on_assignment_list
+from pystencils.simp.liveness_opts import *
+from pystencils.simp.liveness_opts_exp import *
+
+from pystencils.simp.liveness_permutations import *
+
+import pycuda
+
+import sys
+from subprocess import run, PIPE
+
+from pystencils import show_code
+import pycuda.driver as drv
+
+import importlib
+
+configs = benchmark_configs()
+
+
+def get_config(name):
+    return configs[name]
+
+
+domain_size = (512, 512, 128)
+periodicity = (True, True, False)
+
+optimization = {'gpu_indexing_params': {"block_size": (32, 4, 2)}}
+#bestSeqs = pickle.load(open('best_seq.pickle', 'rb'))
+
+scenarios = ["42_varT_freeEnergy", "31_varT_aniso_rot"]
+kernel_types = ["phi_full", "phi_partial1", "phi_partial2", "mu_full", "mu_partial1", "mu_partial2"]
+
+liveness_trans_seqs = importlib.import_module(
+    "gpu_liveness_trans_sequences").gpu_liveness_trans_sequences
+
+for scenario in scenarios:
+
+    config = get_config(scenario)
+
+    phases, components = config['Parameters']['phases'], config['Parameters']['components']
+    format_args = {'p': phases, 'c': components, 's': ','.join(str(e) for e in domain_size)}
+
+    # Adding fields
+    dh = ps.create_data_handling(domain_size, periodicity=periodicity, default_target='gpu')
+    f = dh.fields
+    phi_src = dh.add_array(
+        'phi_src',
+        values_per_cell=config['Parameters']['phases'],
+        layout='fzyx',
+        latex_name='phi_s')
+    mu_src = dh.add_array(
+        'mu_src',
+        values_per_cell=config['Parameters']['components'],
+        layout='fzyx',
+        latex_name="mu_s")
+    mu_stag = dh.add_array(
+        'mu_stag', values_per_cell=(dh.dim, config['Parameters']['components']), layout='f')
+    phi_stag = dh.add_array('phi_stag', values_per_cell=(dh.dim, phases), layout='f')
+
+    phi_dst = dh.add_array_like('phi_dst', 'phi_src')
+    mu_dst = dh.add_array_like('mu_dst', 'mu_src')
+
+    gc = GrandChemGenerator(
+        phi_src,
+        phi_dst,
+        mu_src,
+        mu_dst,
+        config['FreeEnergy'],
+        config['Parameters'],
+        #conc=c,
+        mu_staggered=mu_stag,
+        phi_staggered=phi_stag,
+        use_block_offsets=False,
+        compile_kernel=False)
+
+    mu_full_eqs = gc.mu_full()
+    phi_full_eqs = gc.phi_full()
+
+    phi_kernel = ps.create_kernel(phi_full_eqs, target='gpu', **optimization).compile()
+    mu_kernel = ps.create_kernel(mu_full_eqs, target='gpu', **optimization).compile()
+
+    c = dh.add_array(
+        'c', values_per_cell=config['Parameters']['components'], layout='fzyx', gpu=False)
+
+    init_boxes(dh)
+    #initialize_concentration_field(dh, free_energy, config['Parameters']['initial_concentration'])
+    smooth_fields(dh, sigma=0.4, iterations=5, dim=dh.dim)
+    dh.synchronization_function(['phi_src', 'phi_dst', 'mu_src', 'mu_dst'])()
+
+    staggered_params = None
+
+    def bench_kernels(mu_kernel, phi_kernel):
+
+        start = drv.Event()
+        end = drv.Event()
+
+        dh.run_kernel(mu_kernel, timestep=1)
+        start.record()
+        dh.run_kernel(mu_kernel, timestep=1)
+        dh.run_kernel(mu_kernel, timestep=1)
+        end.record()
+        end.synchronize()
+        msec = start.time_till(end) / 2
+        print("mu_kernel: {}  {:5.3f} ms".format(mu_kernel.num_regs, msec))
+
+        dh.run_kernel(phi_kernel, timestep=1)
+        start.record()
+        dh.run_kernel(phi_kernel, timestep=1)
+        dh.run_kernel(phi_kernel, timestep=1)
+        end.record()
+        end.synchronize()
+        msec = start.time_till(end) / 2
+        print("phi_kernel: {}  {:5.3f} ms".format(phi_kernel.num_regs, msec))
+
+    print("warmup")
+    bench_kernels(mu_kernel, phi_kernel)
+    dh.swap('mu_src', 'mu_dst')
+    dh.swap('phi_src', 'phi_dst')
+    print()
+
+    for kernel_type in kernel_types:
+        print(scenario + " " + kernel_type)
+        for div_sqrt_approx in [True, False]:
+            print("Approximations for div/sqrt: " + str(div_sqrt_approx))
+            for liveness_trans in [True, False]:
+
+                gc = GrandChemGenerator(
+                    phi_src,
+                    phi_dst,
+                    mu_src,
+                    mu_dst,
+                    config['FreeEnergy'],
+                    config['Parameters'],
+                    #conc=c,
+                    mu_staggered=mu_stag,
+                    phi_staggered=phi_stag,
+                    use_block_offsets=False,
+                    compile_kernel=False,
+                    fast_divisions=div_sqrt_approx,
+                    fast_sqrts=div_sqrt_approx,
+                    gpu_liveness_trans_sequences=(liveness_trans_seqs[scenario]
+                                                  if liveness_trans else None))
+
+                if kernel_type == "phi_full":
+                    eqs = gc.phi_full()
+                elif kernel_type == "mu_full":
+                    eqs = gc.mu_full()
+                elif kernel_type == "mu_partial1":
+                    staggered_params = gc.mu_partial1()
+                elif kernel_type == "mu_partial2":
+                    eqs = gc.mu_partial2()
+                elif kernel_type == "phi_partial1":
+                    staggered_params = gc.phi_partial1()
+                elif kernel_type == "phi_partial2":
+                    eqs = gc.phi_partial2()
+                else:
+                    print("Specified kernel does not exist")
+                    exit()
+
+                if not staggered_params is None:
+                    eqs = unpack_staggered_eqs(*staggered_params)
+
+                print(
+                    bench_kernel(
+                        eqs, dh, liveness_trans_seqs[scenario][(kernel_type,
+                                                                 liveness_trans)].blockSize,
+                        staggered_params))
+                print()
diff --git a/pystencils_tests/liveness_opts/count_ops.ipynb b/pystencils_tests/liveness_opts/count_ops.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b48a14d07cd1392701777c22badeaf5e1d6e494b
--- /dev/null
+++ b/pystencils_tests/liveness_opts/count_ops.ipynb
@@ -0,0 +1,2150 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "\n",
+    "\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 1\n",
+    "%aimport pystencils.simp.liveness_opts\n",
+    "%aimport pystencils.simp.liveness_opts_exp\n",
+    "%aimport pystencils.shmemvar\n",
+    "%aimport pystencils.backends.cbackend\n",
+    "%aimport pystencils.transformations\n",
+    "%aimport pygrandchem.grandchem_generation\n",
+    "\n",
+    "\n",
+    "%load_ext line_profiler\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lbmpy.session import *\n",
+    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from pygrandchem.grandchem_generation import *\n",
+    "from pygrandchem.chemicalpotential import free_energy_from_config_object, FreeEnergy\n",
+    "from pygrandchem.initialization import *\n",
+    "from pygrandchem_tests.config_anisotropic import get_system\n",
+    "from pystencils.boundaries import *\n",
+    "\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "\n",
+    "from pystencils.shmemvar import *\n",
+    "import graphviz\n",
+    "\n",
+    "\n",
+    "import pycuda.compiler\n",
+    "from pycuda.compiler import SourceModule\n",
+    "\n",
+    "import sys\n",
+    "from subprocess import run, PIPE\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Compiling and simplifying φ update equations - this may take a while\n",
+      "Compiling and simplifying μ update equations - this may take a while\n",
+      "Compiling and simplifying μ update equations - this may take a while\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "domain_size = (128, 128, 128)\n",
+    "periodicity = (True, True, False)\n",
+    "fast_simplex_projection = True\n",
+    "optimization = {'gpu_indexing_params': {\"block_size\": (32, 4, 4)}}\n",
+    "config = get_system(dim=len(domain_size))\n",
+    "\n",
+    "phases = config['Parameters']['phases']\n",
+    "components = config['Parameters']['components']\n",
+    "diffusion_matrices = config['Parameters']['diffusion']\n",
+    "free_energy = config['FreeEnergy']\n",
+    "\n",
+    "# Adding fields\n",
+    "dh = create_data_handling(domain_size, periodicity=periodicity, default_target='gpu')\n",
+    "f = dh.fields\n",
+    "phi_src = dh.add_array('phi_src', values_per_cell=phases, layout='fzyx', latex_name='phi_s')\n",
+    "mu_src = dh.add_array('mu_src', values_per_cell=components, layout='fzyx', latex_name=\"mu_s\")\n",
+    "mu_stag = dh.add_array('mu_stag', values_per_cell=(dh.dim, components), layout='f')\n",
+    "phi_dst = dh.add_array_like('phi_dst', 'phi_src')\n",
+    "mu_dst = dh.add_array_like('mu_dst', 'mu_src')\n",
+    "\n",
+    "c = dh.add_array('c', values_per_cell=components, layout='fzyx', gpu=False)\n",
+    "f = dh.fields\n",
+    "\n",
+    "\n",
+    "phi_update_eqs = create_phi_update_equations(\n",
+    "    phi_src, phi_dst, mu_src, free_energy, config['Parameters'], simplex_projection=fast_simplex_projection)\n",
+    "\n",
+    "mu_update_eqs = create_mu_update_equations(phi_src, phi_dst, mu_src, mu_dst, free_energy, diffusion_matrices,\n",
+    "                                           config['Parameters'])\n",
+    "\n",
+    "mu_stag_update_eqs = create_mu_update_equations(\n",
+    "    phi_src,\n",
+    "    phi_dst,\n",
+    "    mu_src,\n",
+    "    mu_dst,\n",
+    "    free_energy,\n",
+    "    diffusion_matrices,\n",
+    "    config['Parameters'],\n",
+    "    mu_staggered_field=mu_stag)\n",
+    "\n",
+    "\n",
+    "mu_stag_precomp_eqs = create_mu_update_staggered_eqs(\n",
+    "    phi_src,\n",
+    "    phi_dst,\n",
+    "    mu_src,\n",
+    "    mu_stag,\n",
+    "    free_energy,\n",
+    "    diffusion_matrices,\n",
+    "    config['Parameters'])\n",
+    "\n",
+    "        \n",
+    "mu_stag_precomp_kernel = create_mu_update_staggered_ast(\n",
+    "    phi_src,\n",
+    "    phi_dst,\n",
+    "    mu_src,\n",
+    "    mu_stag,\n",
+    "    free_energy,\n",
+    "    diffusion_matrices,\n",
+    "    config['Parameters'],\n",
+    "    target='gpu')\n",
+    "\n",
+    "\n",
+    "\n",
+    "#phi_eqs = create_phi_update_equations(\n",
+    "#    f['phi_src'], f['phi_dst'], f['mu_src'], free_energy, config['Parameters'], simplex_projection=True)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "243\n",
+      "xi_0 ← phi_src_C^0**2\n",
+      "xi_1 ← phi_src_C^1**2\n",
+      "xi_2 ← phi_src_C^2**2\n",
+      "xi_3 ← xi_0 + xi_1 + xi_2\n",
+      "xi_4 ← phi_dst_C^1**2\n",
+      "xi_5 ← phi_dst_C^0**2\n",
+      "xi_6 ← phi_dst_C^2**2\n",
+      "xi_7 ← 32.0/(xi_4 + xi_5 + xi_6)\n",
+      "xi_8 ← 32.0/xi_3\n",
+      "xi_9 ← 2.0*mu_src_C\n",
+      "xi_10 ← phi_src_C^0/2\n",
+      "xi_11 ← phi_src_W^0/2 + xi_10\n",
+      "xi_12 ← xi_11**2\n",
+      "xi_13 ← phi_src_C^1/2\n",
+      "xi_14 ← phi_src_W^1/2 + xi_13\n",
+      "xi_15 ← xi_14**2\n",
+      "xi_16 ← phi_src_C^2/2\n",
+      "xi_17 ← phi_src_W^2/2 + xi_16\n",
+      "xi_18 ← xi_17**2\n",
+      "xi_19 ← 1/(xi_12 + xi_15 + xi_18)\n",
+      "xi_20 ← xi_12*xi_19\n",
+      "xi_21 ← xi_15*xi_19\n",
+      "xi_22 ← xi_18*xi_19\n",
+      "xi_23 ← 2.0*phi_src_C^2\n",
+      "xi_24 ← -2.0*phi_src_W^2 + xi_23\n",
+      "xi_25 ← sqrt(xi_11*xi_17)\n",
+      "xi_26 ← 2.0*phi_src_C^0\n",
+      "xi_27 ← -2.0*phi_src_W^0 + xi_26\n",
+      "xi_28 ← 0.5*phi_src_NW^0\n",
+      "xi_29 ← -0.5*phi_src_SW^0\n",
+      "xi_30 ← -0.5*phi_src_S^0 + 0.5*phi_src_N^0\n",
+      "xi_31 ← xi_28 + xi_29 + xi_30\n",
+      "xi_32 ← 0.5*phi_src_TW^0\n",
+      "xi_33 ← -0.5*phi_src_BW^0\n",
+      "xi_34 ← -0.5*phi_src_B^0 + 0.5*phi_src_T^0\n",
+      "xi_35 ← xi_32 + xi_33 + xi_34\n",
+      "xi_36 ← sqrt(xi_27**2 + xi_31**2 + xi_35**2)\n",
+      "xi_37 ← 0.5*phi_src_NW^2\n",
+      "xi_38 ← -0.5*phi_src_SW^2\n",
+      "xi_39 ← -0.5*phi_src_S^2 + 0.5*phi_src_N^2\n",
+      "xi_40 ← xi_37 + xi_38 + xi_39\n",
+      "xi_41 ← 0.5*phi_src_TW^2\n",
+      "xi_42 ← -0.5*phi_src_BW^2\n",
+      "xi_43 ← -0.5*phi_src_B^2 + 0.5*phi_src_T^2\n",
+      "xi_44 ← xi_41 + xi_42 + xi_43\n",
+      "xi_45 ← xi_24**2 + xi_40**2 + xi_44**2\n",
+      "xi_46 ← sqrt(xi_45)\n",
+      "xi_47 ← 0.0726237719428938*mu_src_C + 7.27037126746791\n",
+      "xi_48 ← xi_22*(0.0726237719428938*mu_src_W + xi_47)\n",
+      "xi_49 ← 0.0138651175277251*mu_src_C + 1.37922908109611\n",
+      "xi_50 ← 16.0*phi_dst_C^0 - 16.0*phi_src_C^0\n",
+      "xi_51 ← xi_22/xi_45\n",
+      "xi_52 ← sqrt(xi_14*xi_17)\n",
+      "xi_53 ← 2.0*phi_src_C^1\n",
+      "xi_54 ← -2.0*phi_src_W^1 + xi_53\n",
+      "xi_55 ← 0.5*phi_src_NW^1\n",
+      "xi_56 ← -0.5*phi_src_SW^1\n",
+      "xi_57 ← -0.5*phi_src_S^1 + 0.5*phi_src_N^1\n",
+      "xi_58 ← xi_55 + xi_56 + xi_57\n",
+      "xi_59 ← 0.5*phi_src_TW^1\n",
+      "xi_60 ← -0.5*phi_src_BW^1\n",
+      "xi_61 ← -0.5*phi_src_B^1 + 0.5*phi_src_T^1\n",
+      "xi_62 ← xi_59 + xi_60 + xi_61\n",
+      "xi_63 ← sqrt(xi_54**2 + xi_58**2 + xi_62**2)\n",
+      "xi_64 ← 0.00672506491627283*mu_src_C + 0.974209571226215\n",
+      "xi_65 ← 16.0*phi_dst_C^1 - 16.0*phi_src_C^1\n",
+      "xi_66 ← -xi_9\n",
+      "xi_67 ← phi_src_E^0/2 + xi_10\n",
+      "xi_68 ← xi_67**2\n",
+      "xi_69 ← phi_src_E^1/2 + xi_13\n",
+      "xi_70 ← xi_69**2\n",
+      "xi_71 ← phi_src_E^2/2 + xi_16\n",
+      "xi_72 ← xi_71**2\n",
+      "xi_73 ← 1/(xi_68 + xi_70 + xi_72)\n",
+      "xi_74 ← xi_68*xi_73\n",
+      "xi_75 ← xi_70*xi_73\n",
+      "xi_76 ← xi_72*xi_73\n",
+      "xi_77 ← -xi_23\n",
+      "xi_78 ← 2.0*phi_src_E^2 + xi_77\n",
+      "xi_79 ← sqrt(xi_67*xi_71)\n",
+      "xi_80 ← -xi_26\n",
+      "xi_81 ← 2.0*phi_src_E^0 + xi_80\n",
+      "xi_82 ← 0.5*phi_src_NE^0\n",
+      "xi_83 ← 0.5*phi_src_SE^0\n",
+      "xi_84 ← xi_30 + xi_82 - xi_83\n",
+      "xi_85 ← 0.5*phi_src_TE^0\n",
+      "xi_86 ← 0.5*phi_src_BE^0\n",
+      "xi_87 ← xi_34 + xi_85 - xi_86\n",
+      "xi_88 ← sqrt(xi_81**2 + xi_84**2 + xi_87**2)\n",
+      "xi_89 ← 0.5*phi_src_NE^2\n",
+      "xi_90 ← 0.5*phi_src_SE^2\n",
+      "xi_91 ← xi_39 + xi_89 - xi_90\n",
+      "xi_92 ← 0.5*phi_src_TE^2\n",
+      "xi_93 ← 0.5*phi_src_BE^2\n",
+      "xi_94 ← xi_43 + xi_92 - xi_93\n",
+      "xi_95 ← xi_78**2 + xi_91**2 + xi_94**2\n",
+      "xi_96 ← sqrt(xi_95)\n",
+      "xi_97 ← xi_76*(0.0726237719428938*mu_src_E + xi_47)\n",
+      "xi_98 ← xi_76/xi_95\n",
+      "xi_99 ← sqrt(xi_69*xi_71)\n",
+      "xi_100 ← -xi_53\n",
+      "xi_101 ← 2.0*phi_src_E^1 + xi_100\n",
+      "xi_102 ← 0.5*phi_src_NE^1\n",
+      "xi_103 ← 0.5*phi_src_SE^1\n",
+      "xi_104 ← xi_102 - xi_103 + xi_57\n",
+      "xi_105 ← 0.5*phi_src_TE^1\n",
+      "xi_106 ← 0.5*phi_src_BE^1\n",
+      "xi_107 ← xi_105 - xi_106 + xi_61\n",
+      "xi_108 ← sqrt(xi_101**2 + xi_104**2 + xi_107**2)\n",
+      "xi_109 ← phi_src_S^0/2 + xi_10\n",
+      "xi_110 ← xi_109**2\n",
+      "xi_111 ← phi_src_S^1/2 + xi_13\n",
+      "xi_112 ← xi_111**2\n",
+      "xi_113 ← phi_src_S^2/2 + xi_16\n",
+      "xi_114 ← xi_113**2\n",
+      "xi_115 ← 1/(xi_110 + xi_112 + xi_114)\n",
+      "xi_116 ← xi_110*xi_115\n",
+      "xi_117 ← xi_112*xi_115\n",
+      "xi_118 ← xi_114*xi_115\n",
+      "xi_119 ← -2.0*phi_src_S^2 + xi_23\n",
+      "xi_120 ← sqrt(xi_109*xi_113)\n",
+      "xi_121 ← -2.0*phi_src_S^0 + xi_26\n",
+      "xi_122 ← -0.5*phi_src_W^0 + 0.5*phi_src_E^0\n",
+      "xi_123 ← xi_122 + xi_29 + xi_83\n",
+      "xi_124 ← 0.5*phi_src_TS^0\n",
+      "xi_125 ← -0.5*phi_src_BS^0\n",
+      "xi_126 ← xi_124 + xi_125 + xi_34\n",
+      "xi_127 ← sqrt(xi_121**2 + xi_123**2 + xi_126**2)\n",
+      "xi_128 ← -0.5*phi_src_W^2 + 0.5*phi_src_E^2\n",
+      "xi_129 ← xi_128 + xi_38 + xi_90\n",
+      "xi_130 ← 0.5*phi_src_TS^2\n",
+      "xi_131 ← -0.5*phi_src_BS^2\n",
+      "xi_132 ← xi_130 + xi_131 + xi_43\n",
+      "xi_133 ← xi_119**2 + xi_129**2 + xi_132**2\n",
+      "xi_134 ← sqrt(xi_133)\n",
+      "xi_135 ← xi_118*(0.0726237719428938*mu_src_S + xi_47)\n",
+      "xi_136 ← xi_118/xi_133\n",
+      "xi_137 ← sqrt(xi_111*xi_113)\n",
+      "xi_138 ← -2.0*phi_src_S^1 + xi_53\n",
+      "xi_139 ← -0.5*phi_src_W^1 + 0.5*phi_src_E^1\n",
+      "xi_140 ← xi_103 + xi_139 + xi_56\n",
+      "xi_141 ← 0.5*phi_src_TS^1\n",
+      "xi_142 ← -0.5*phi_src_BS^1\n",
+      "xi_143 ← xi_141 + xi_142 + xi_61\n",
+      "xi_144 ← sqrt(xi_138**2 + xi_140**2 + xi_143**2)\n",
+      "xi_145 ← phi_src_N^0/2 + xi_10\n",
+      "xi_146 ← xi_145**2\n",
+      "xi_147 ← phi_src_N^1/2 + xi_13\n",
+      "xi_148 ← xi_147**2\n",
+      "xi_149 ← phi_src_N^2/2 + xi_16\n",
+      "xi_150 ← xi_149**2\n",
+      "xi_151 ← 1/(xi_146 + xi_148 + xi_150)\n",
+      "xi_152 ← xi_146*xi_151\n",
+      "xi_153 ← xi_148*xi_151\n",
+      "xi_154 ← xi_150*xi_151\n",
+      "xi_155 ← 2.0*phi_src_N^2 + xi_77\n",
+      "xi_156 ← sqrt(xi_145*xi_149)\n",
+      "xi_157 ← 2.0*phi_src_N^0 + xi_80\n",
+      "xi_158 ← xi_122 - xi_28 + xi_82\n",
+      "xi_159 ← 0.5*phi_src_TN^0\n",
+      "xi_160 ← 0.5*phi_src_BN^0\n",
+      "xi_161 ← xi_159 - xi_160 + xi_34\n",
+      "xi_162 ← sqrt(xi_157**2 + xi_158**2 + xi_161**2)\n",
+      "xi_163 ← xi_128 - xi_37 + xi_89\n",
+      "xi_164 ← 0.5*phi_src_TN^2\n",
+      "xi_165 ← 0.5*phi_src_BN^2\n",
+      "xi_166 ← xi_164 - xi_165 + xi_43\n",
+      "xi_167 ← xi_155**2 + xi_163**2 + xi_166**2\n",
+      "xi_168 ← sqrt(xi_167)\n",
+      "xi_169 ← xi_154*(0.0726237719428938*mu_src_N + xi_47)\n",
+      "xi_170 ← xi_154/xi_167\n",
+      "xi_171 ← sqrt(xi_147*xi_149)\n",
+      "xi_172 ← 2.0*phi_src_N^1 + xi_100\n",
+      "xi_173 ← xi_102 + xi_139 - xi_55\n",
+      "xi_174 ← 0.5*phi_src_TN^1\n",
+      "xi_175 ← 0.5*phi_src_BN^1\n",
+      "xi_176 ← xi_174 - xi_175 + xi_61\n",
+      "xi_177 ← sqrt(xi_172**2 + xi_173**2 + xi_176**2)\n",
+      "xi_178 ← phi_src_B^0/2 + xi_10\n",
+      "xi_179 ← xi_178**2\n",
+      "xi_180 ← phi_src_B^1/2 + xi_13\n",
+      "xi_181 ← xi_180**2\n",
+      "xi_182 ← phi_src_B^2/2 + xi_16\n",
+      "xi_183 ← xi_182**2\n",
+      "xi_184 ← 1/(xi_179 + xi_181 + xi_183)\n",
+      "xi_185 ← xi_179*xi_184\n",
+      "xi_186 ← xi_181*xi_184\n",
+      "xi_187 ← xi_183*xi_184\n",
+      "xi_188 ← -2.0*phi_src_B^2 + xi_23\n",
+      "xi_189 ← sqrt(xi_178*xi_182)\n",
+      "xi_190 ← -2.0*phi_src_B^0 + xi_26\n",
+      "xi_191 ← xi_122 + xi_33 + xi_86\n",
+      "xi_192 ← xi_125 + xi_160 + xi_30\n",
+      "xi_193 ← sqrt(xi_190**2 + xi_191**2 + xi_192**2)\n",
+      "xi_194 ← xi_128 + xi_42 + xi_93\n",
+      "xi_195 ← xi_131 + xi_165 + xi_39\n",
+      "xi_196 ← xi_188**2 + xi_194**2 + xi_195**2\n",
+      "xi_197 ← sqrt(xi_196)\n",
+      "xi_198 ← xi_187*(0.0726237719428938*mu_src_B + xi_47)\n",
+      "xi_199 ← xi_187/xi_196\n",
+      "xi_200 ← sqrt(xi_180*xi_182)\n",
+      "xi_201 ← -2.0*phi_src_B^1 + xi_53\n",
+      "xi_202 ← xi_106 + xi_139 + xi_60\n",
+      "xi_203 ← xi_142 + xi_175 + xi_57\n",
+      "xi_204 ← sqrt(xi_201**2 + xi_202**2 + xi_203**2)\n",
+      "xi_205 ← phi_src_T^0/2 + xi_10\n",
+      "xi_206 ← xi_205**2\n",
+      "xi_207 ← phi_src_T^1/2 + xi_13\n",
+      "xi_208 ← xi_207**2\n",
+      "xi_209 ← phi_src_T^2/2 + xi_16\n",
+      "xi_210 ← xi_209**2\n",
+      "xi_211 ← 1/(xi_206 + xi_208 + xi_210)\n",
+      "xi_212 ← xi_206*xi_211\n",
+      "xi_213 ← xi_208*xi_211\n",
+      "xi_214 ← xi_210*xi_211\n",
+      "xi_215 ← 2.0*phi_src_T^2 + xi_77\n",
+      "xi_216 ← sqrt(xi_205*xi_209)\n",
+      "xi_217 ← 2.0*phi_src_T^0 + xi_80\n",
+      "xi_218 ← xi_122 - xi_32 + xi_85\n",
+      "xi_219 ← -xi_124 + xi_159 + xi_30\n",
+      "xi_220 ← sqrt(xi_217**2 + xi_218**2 + xi_219**2)\n",
+      "xi_221 ← xi_128 - xi_41 + xi_92\n",
+      "xi_222 ← -xi_130 + xi_164 + xi_39\n",
+      "xi_223 ← xi_215**2 + xi_221**2 + xi_222**2\n",
+      "xi_224 ← sqrt(xi_223)\n",
+      "xi_225 ← xi_214*(0.0726237719428938*mu_src_T + xi_47)\n",
+      "xi_226 ← xi_214/xi_223\n",
+      "xi_227 ← sqrt(xi_207*xi_209)\n",
+      "xi_228 ← 2.0*phi_src_T^1 + xi_100\n",
+      "xi_229 ← xi_105 + xi_139 - xi_59\n",
+      "xi_230 ← -xi_141 + xi_174 + xi_57\n",
+      "xi_231 ← sqrt(xi_228**2 + xi_229**2 + xi_230**2)\n",
+      "dc_dmu_0_0 ← xi_3/(0.0277302350554502*xi_0 + 0.0134501298325457*xi_1 + 0.145247543885788*xi_2)\n",
+      "dc_dphi_dt_0 ← (0.0134501298325457*mu_src_C + 0.974209571226215)*(-xi_1*xi_8 + xi_4*xi_7) + (0.0277302350554502*mu_src_C + 1.37922908109611)*(-xi_0*xi_8 + xi_5*xi_7) + (0.145247543885788*mu_src_C + 7.27037126746791)*(-xi_2*xi_8 + xi_6*xi_7)\n",
+      "dc_dT_dt_0 ← 0\n",
+      "staggered_down_0_0 ← -xi_24*(3.92699081698724*Piecewise((0, (xi_25 < 1.0e-9) | (xi_36*xi_46 < 1.0e-9)), (xi_11*xi_51*(-xi_20*(0.0138651175277251*mu_src_W + xi_49) + xi_48)*(16.0*phi_dst_W^0 - 16.0*phi_src_W^0 + xi_50)*(xi_24*xi_27 + xi_31*xi_40 + xi_35*xi_44)/(xi_25*xi_36), True)) + 3.92699081698724*Piecewise((0, (xi_52 < 1.0e-9) | (xi_46*xi_63 < 1.0e-9)), (xi_14*xi_51*(-xi_21*(0.00672506491627283*mu_src_W + xi_64) + xi_48)*(16.0*phi_dst_W^1 - 16.0*phi_src_W^1 + xi_65)*(xi_24*xi_54 + xi_40*xi_58 + xi_44*xi_62)/(xi_52*xi_63), True))) + (-2.0*mu_src_W + xi_9)*(2.77302350554502e-5*xi_20 + 1.34501298325457e-5*xi_21 + 0.145247543885788*xi_22)\n",
+      "staggered_up_0_0 ← -xi_78*(3.92699081698724*Piecewise((0, (xi_79 < 1.0e-9) | (xi_88*xi_96 < 1.0e-9)), (xi_67*xi_98*(-xi_74*(0.0138651175277251*mu_src_E + xi_49) + xi_97)*(16.0*phi_dst_E^0 - 16.0*phi_src_E^0 + xi_50)*(xi_78*xi_81 + xi_84*xi_91 + xi_87*xi_94)/(xi_79*xi_88), True)) + 3.92699081698724*Piecewise((0, (xi_99 < 1.0e-9) | (xi_108*xi_96 < 1.0e-9)), (xi_69*xi_98*(-xi_75*(0.00672506491627283*mu_src_E + xi_64) + xi_97)*(16.0*phi_dst_E^1 - 16.0*phi_src_E^1 + xi_65)*(xi_101*xi_78 + xi_104*xi_91 + xi_107*xi_94)/(xi_108*xi_99), True))) + (2.0*mu_src_E + xi_66)*(2.77302350554502e-5*xi_74 + 1.34501298325457e-5*xi_75 + 0.145247543885788*xi_76)\n",
+      "staggered_down_1_0 ← -xi_119*(3.92699081698724*Piecewise((0, (xi_120 < 1.0e-9) | (xi_127*xi_134 < 1.0e-9)), (xi_109*xi_136*(-xi_116*(0.0138651175277251*mu_src_S + xi_49) + xi_135)*(16.0*phi_dst_S^0 - 16.0*phi_src_S^0 + xi_50)*(xi_119*xi_121 + xi_123*xi_129 + xi_126*xi_132)/(xi_120*xi_127), True)) + 3.92699081698724*Piecewise((0, (xi_137 < 1.0e-9) | (xi_134*xi_144 < 1.0e-9)), (xi_111*xi_136*(-xi_117*(0.00672506491627283*mu_src_S + xi_64) + xi_135)*(16.0*phi_dst_S^1 - 16.0*phi_src_S^1 + xi_65)*(xi_119*xi_138 + xi_129*xi_140 + xi_132*xi_143)/(xi_137*xi_144), True))) + (-2.0*mu_src_S + xi_9)*(2.77302350554502e-5*xi_116 + 1.34501298325457e-5*xi_117 + 0.145247543885788*xi_118)\n",
+      "staggered_up_1_0 ← -xi_155*(3.92699081698724*Piecewise((0, (xi_156 < 1.0e-9) | (xi_162*xi_168 < 1.0e-9)), (xi_145*xi_170*(-xi_152*(0.0138651175277251*mu_src_N + xi_49) + xi_169)*(16.0*phi_dst_N^0 - 16.0*phi_src_N^0 + xi_50)*(xi_155*xi_157 + xi_158*xi_163 + xi_161*xi_166)/(xi_156*xi_162), True)) + 3.92699081698724*Piecewise((0, (xi_171 < 1.0e-9) | (xi_168*xi_177 < 1.0e-9)), (xi_147*xi_170*(-xi_153*(0.00672506491627283*mu_src_N + xi_64) + xi_169)*(16.0*phi_dst_N^1 - 16.0*phi_src_N^1 + xi_65)*(xi_155*xi_172 + xi_163*xi_173 + xi_166*xi_176)/(xi_171*xi_177), True))) + (2.0*mu_src_N + xi_66)*(2.77302350554502e-5*xi_152 + 1.34501298325457e-5*xi_153 + 0.145247543885788*xi_154)\n",
+      "staggered_down_2_0 ← -xi_188*(3.92699081698724*Piecewise((0, (xi_189 < 1.0e-9) | (xi_193*xi_197 < 1.0e-9)), (xi_178*xi_199*(-xi_185*(0.0138651175277251*mu_src_B + xi_49) + xi_198)*(16.0*phi_dst_B^0 - 16.0*phi_src_B^0 + xi_50)*(xi_188*xi_190 + xi_191*xi_194 + xi_192*xi_195)/(xi_189*xi_193), True)) + 3.92699081698724*Piecewise((0, (xi_200 < 1.0e-9) | (xi_197*xi_204 < 1.0e-9)), (xi_180*xi_199*(-xi_186*(0.00672506491627283*mu_src_B + xi_64) + xi_198)*(16.0*phi_dst_B^1 - 16.0*phi_src_B^1 + xi_65)*(xi_188*xi_201 + xi_194*xi_202 + xi_195*xi_203)/(xi_200*xi_204), True))) + (-2.0*mu_src_B + xi_9)*(2.77302350554502e-5*xi_185 + 1.34501298325457e-5*xi_186 + 0.145247543885788*xi_187)\n",
+      "staggered_up_2_0 ← -xi_215*(3.92699081698724*Piecewise((0, (xi_216 < 1.0e-9) | (xi_220*xi_224 < 1.0e-9)), (xi_205*xi_226*(-xi_212*(0.0138651175277251*mu_src_T + xi_49) + xi_225)*(16.0*phi_dst_T^0 - 16.0*phi_src_T^0 + xi_50)*(xi_215*xi_217 + xi_218*xi_221 + xi_219*xi_222)/(xi_216*xi_220), True)) + 3.92699081698724*Piecewise((0, (xi_227 < 1.0e-9) | (xi_224*xi_231 < 1.0e-9)), (xi_207*xi_226*(-xi_213*(0.00672506491627283*mu_src_T + xi_64) + xi_225)*(16.0*phi_dst_T^1 - 16.0*phi_src_T^1 + xi_65)*(xi_215*xi_228 + xi_221*xi_229 + xi_222*xi_230)/(xi_227*xi_231), True))) + (2.0*mu_src_T + xi_66)*(2.77302350554502e-5*xi_212 + 1.34501298325457e-5*xi_213 + 0.145247543885788*xi_214)\n",
+      "divMgradmu_0 ← -2.0*staggered_down_0_0 - 2.0*staggered_down_1_0 - 2.0*staggered_down_2_0 + 2.0*staggered_up_0_0 + 2.0*staggered_up_1_0 + 2.0*staggered_up_2_0\n",
+      "mu_dst[0,0,0] ← mu_src_C + 0.03125*dc_dmu_0_0*(-dc_dT_dt_0 - dc_dphi_dt_0 + divMgradmu_0)\n"
+     ]
+    }
+   ],
+   "source": [
+    "stag_eqs = []\n",
+    "\n",
+    "\n",
+    "for block in mu_stag_precomp_eqs:\n",
+    "    for eq in block.true_block._nodes:\n",
+    "        stag_eqs.append(eq)\n",
+    "\n",
+    "print(len(mu_update_eqs))\n",
+    "for eq in mu_update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "163\n",
+      "201\n",
+      "435\n",
+      "69\n"
+     ]
+    }
+   ],
+   "source": [
+    "fas = 0\n",
+    "adds = 0\n",
+    "muls = 0\n",
+    "mufus = 0\n",
+    "\n",
+    "def count_ops(expr):\n",
+    "    global fas\n",
+    "    global adds\n",
+    "    global muls\n",
+    "    global mufus\n",
+    "    for arg in expr.args:\n",
+    "        count_ops(arg)\n",
+    "    \n",
+    "    if isinstance(expr, Field.Access):\n",
+    "        fas += 1\n",
+    "    elif isinstance(expr, sympy.Add):\n",
+    "        adds += 1\n",
+    "    elif isinstance(expr, sympy.Mul):\n",
+    "        muls += 1\n",
+    "    elif isinstance(expr, sympy.Pow) and expr.exp == -1 :\n",
+    "        mufus += 1\n",
+    "    elif isinstance(expr, sympy.Pow) and expr.exp == 0.5:\n",
+    "        mufus += 1\n",
+    "    elif isinstance(expr, sympy.Pow):\n",
+    "        muls += 1\n",
+    "        \n",
+    "for eq in mu_update_eqs:\n",
+    "    count_ops(eq)\n",
+    "    \n",
+    "print(fas)\n",
+    "print(adds)\n",
+    "print(muls)\n",
+    "print(mufus)\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "FUNC_PREFIX void kernel(double * const _data_mu_src, double * _data_mu_stag, double * const _data_phi_dst, double * const _data_phi_src)\n",
+      "{\n",
+      "   if (blockDim.x*blockIdx.x + threadIdx.x + 1 < 130 && blockDim.y*blockIdx.y + threadIdx.y + 1 < 130 && blockDim.z*blockIdx.z + threadIdx.z + 1 < 130)\n",
+      "   {\n",
+      "      int64_t ctr_0 = blockDim.x*blockIdx.x + threadIdx.x + 1;\n",
+      "      int64_t ctr_1 = blockDim.y*blockIdx.y + threadIdx.y + 1;\n",
+      "      int64_t ctr_2 = blockDim.z*blockIdx.z + threadIdx.z + 1;\n",
+      "      if (ctr_1 < 129 && ctr_2 < 129)\n",
+      "      {\n",
+      "         double * const _data_mu_src_10_20 = _data_mu_src + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x0 = 2.0*_data_mu_src_10_20[ctr_0];\n",
+      "         double * const _data_phi_src_10_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x1 = 0.5*_data_phi_src_10_20_30[ctr_0];\n",
+      "         double x2 = x1 + 0.5*_data_phi_src_10_20_30[ctr_0 - 1];\n",
+      "         double x3 = (x2*x2);\n",
+      "         double * const _data_phi_src_10_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2197000;\n",
+      "         double x4 = 0.5*_data_phi_src_10_20_31[ctr_0];\n",
+      "         double x5 = x4 + 0.5*_data_phi_src_10_20_31[ctr_0 - 1];\n",
+      "         double x6 = (x5*x5);\n",
+      "         double * const _data_phi_src_10_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4394000;\n",
+      "         double x7 = 0.5*_data_phi_src_10_20_32[ctr_0];\n",
+      "         double x8 = x7 + 0.5*_data_phi_src_10_20_32[ctr_0 - 1];\n",
+      "         double x9 = (x8*x8);\n",
+      "         double x10 = (float) __frcp_rn( (float) x3 + x6 + x9);\n",
+      "         double x11 = x10*x3;\n",
+      "         double x12 = x10*x6;\n",
+      "         double x13 = x10*x9;\n",
+      "         double x14 = 2.0*_data_phi_src_10_20_32[ctr_0];\n",
+      "         double x15 = x14 - 2.0*_data_phi_src_10_20_32[ctr_0 - 1];\n",
+      "         double x16 = (float) sqrtf((float) x2*x8);\n",
+      "         double x17 = 2.0*_data_phi_src_10_20_30[ctr_0];\n",
+      "         double x18 = x17 - 2.0*_data_phi_src_10_20_30[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_1m1_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 130;\n",
+      "         double x19 = -0.5*_data_phi_src_1m1_20_30[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_11_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 130;\n",
+      "         double x20 = -0.5*_data_phi_src_1m1_20_30[ctr_0] + 0.5*_data_phi_src_11_20_30[ctr_0];\n",
+      "         double x21 = x19 + x20 + 0.5*_data_phi_src_11_20_30[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_2m1_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 16900;\n",
+      "         double x22 = -0.5*_data_phi_src_10_2m1_30[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_21_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 16900;\n",
+      "         double x23 = -0.5*_data_phi_src_10_2m1_30[ctr_0] + 0.5*_data_phi_src_10_21_30[ctr_0];\n",
+      "         double x24 = x22 + x23 + 0.5*_data_phi_src_10_21_30[ctr_0 - 1];\n",
+      "         double x25 = (float) sqrtf((float) (x18*x18) + (x21*x21) + (x24*x24));\n",
+      "         double * const _data_phi_src_1m1_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4393870;\n",
+      "         double x26 = -0.5*_data_phi_src_1m1_20_32[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_11_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4394130;\n",
+      "         double x27 = -0.5*_data_phi_src_1m1_20_32[ctr_0] + 0.5*_data_phi_src_11_20_32[ctr_0];\n",
+      "         double x28 = x26 + x27 + 0.5*_data_phi_src_11_20_32[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_2m1_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4377100;\n",
+      "         double x29 = -0.5*_data_phi_src_10_2m1_32[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_21_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4410900;\n",
+      "         double x30 = -0.5*_data_phi_src_10_2m1_32[ctr_0] + 0.5*_data_phi_src_10_21_32[ctr_0];\n",
+      "         double x31 = x29 + x30 + 0.5*_data_phi_src_10_21_32[ctr_0 - 1];\n",
+      "         double x32 = (x15*x15) + (x28*x28) + (x31*x31);\n",
+      "         double x33 = (float) sqrtf((float) x32);\n",
+      "         double x34 = 0.07262377194289385*_data_mu_src_10_20[ctr_0] + 7.2703712674679055;\n",
+      "         double x35 = x13*x34 + 0.07262377194289385*_data_mu_src_10_20[ctr_0 - 1];\n",
+      "         double x36 = 0.013865117527725106*_data_mu_src_10_20[ctr_0] + 1.3792290810961054;\n",
+      "         double * const _data_phi_dst_10_20_30 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x37 = -16.0*_data_phi_src_10_20_30[ctr_0] + 16.0*_data_phi_dst_10_20_30[ctr_0];\n",
+      "         double x38 = fdividef(x13, x32);\n",
+      "         double x39 = (float) sqrtf((float) x5*x8);\n",
+      "         double x40 = 2.0*_data_phi_src_10_20_31[ctr_0];\n",
+      "         double x41 = x40 - 2.0*_data_phi_src_10_20_31[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_1m1_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2196870;\n",
+      "         double x42 = -0.5*_data_phi_src_1m1_20_31[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_11_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2197130;\n",
+      "         double x43 = -0.5*_data_phi_src_1m1_20_31[ctr_0] + 0.5*_data_phi_src_11_20_31[ctr_0];\n",
+      "         double x44 = x42 + x43 + 0.5*_data_phi_src_11_20_31[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_2m1_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2180100;\n",
+      "         double x45 = -0.5*_data_phi_src_10_2m1_31[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_21_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2213900;\n",
+      "         double x46 = -0.5*_data_phi_src_10_2m1_31[ctr_0] + 0.5*_data_phi_src_10_21_31[ctr_0];\n",
+      "         double x47 = x45 + x46 + 0.5*_data_phi_src_10_21_31[ctr_0 - 1];\n",
+      "         double x48 = (float) sqrtf((float) (x41*x41) + (x44*x44) + (x47*x47));\n",
+      "         double x49 = 0.0067250649162728321*_data_mu_src_10_20[ctr_0] + 0.97420957122621499;\n",
+      "         double * const _data_phi_dst_10_20_31 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2 + 2197000;\n",
+      "         double x50 = -16.0*_data_phi_src_10_20_31[ctr_0] + 16.0*_data_phi_dst_10_20_31[ctr_0];\n",
+      "         double * _data_mu_stag_10_20_30_40 = _data_mu_stag + 130*ctr_1 + 16900*ctr_2;\n",
+      "         _data_mu_stag_10_20_30_40[ctr_0] = -x15*((x16 < 1.0000000000000001e-9 || x25*x33 < 1.0000000000000001e-9) ? (0): (fdividef(x2*x38*-x11*x36 + 0.013865117527725106*_data_mu_src_10_20[ctr_0 - 1] + x35*x37 - 16.0*_data_phi_src_10_20_30[ctr_0 - 1] + 16.0*_data_phi_dst_10_20_30[ctr_0 - 1]*x15*x18 + x21*x28 + x24*x31, x16*x25)))*3.9269908169872414 + ((x39 < 1.0000000000000001e-9 || x33*x48 < 1.0000000000000001e-9) ? (0): (fdividef(x38*x5*-x12*x49 + 0.0067250649162728321*_data_mu_src_10_20[ctr_0 - 1] + x35*x50 - 16.0*_data_phi_src_10_20_31[ctr_0 - 1] + 16.0*_data_phi_dst_10_20_31[ctr_0 - 1]*x15*x41 + x28*x44 + x31*x47, x39*x48)))*3.9269908169872414 + x0 - 2.0*_data_mu_src_10_20[ctr_0 - 1]*x11*2.7730235055450212e-5 + x12*1.3450129832545665e-5 + x13*0.1452475438857877;\n",
+      "      } \n",
+      "      if (ctr_0 < 129 && ctr_2 < 129)\n",
+      "      {\n",
+      "         double * const _data_mu_src_10_20 = _data_mu_src + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x0 = 2.0*_data_mu_src_10_20[ctr_0];\n",
+      "         double * const _data_phi_src_10_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x1 = 0.5*_data_phi_src_10_20_30[ctr_0];\n",
+      "         double * const _data_phi_src_10_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2197000;\n",
+      "         double x4 = 0.5*_data_phi_src_10_20_31[ctr_0];\n",
+      "         double * const _data_phi_src_10_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4394000;\n",
+      "         double x7 = 0.5*_data_phi_src_10_20_32[ctr_0];\n",
+      "         double x14 = 2.0*_data_phi_src_10_20_32[ctr_0];\n",
+      "         double x17 = 2.0*_data_phi_src_10_20_30[ctr_0];\n",
+      "         double * const _data_phi_src_1m1_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 130;\n",
+      "         double x19 = -0.5*_data_phi_src_1m1_20_30[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_2m1_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 16900;\n",
+      "         double * const _data_phi_src_10_21_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 16900;\n",
+      "         double x23 = -0.5*_data_phi_src_10_2m1_30[ctr_0] + 0.5*_data_phi_src_10_21_30[ctr_0];\n",
+      "         double * const _data_phi_src_1m1_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4393870;\n",
+      "         double x26 = -0.5*_data_phi_src_1m1_20_32[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_2m1_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4377100;\n",
+      "         double * const _data_phi_src_10_21_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4410900;\n",
+      "         double x30 = -0.5*_data_phi_src_10_2m1_32[ctr_0] + 0.5*_data_phi_src_10_21_32[ctr_0];\n",
+      "         double x34 = 0.07262377194289385*_data_mu_src_10_20[ctr_0] + 7.2703712674679055;\n",
+      "         double x36 = 0.013865117527725106*_data_mu_src_10_20[ctr_0] + 1.3792290810961054;\n",
+      "         double * const _data_phi_dst_10_20_30 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x37 = -16.0*_data_phi_src_10_20_30[ctr_0] + 16.0*_data_phi_dst_10_20_30[ctr_0];\n",
+      "         double x40 = 2.0*_data_phi_src_10_20_31[ctr_0];\n",
+      "         double * const _data_phi_src_1m1_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2196870;\n",
+      "         double x42 = -0.5*_data_phi_src_1m1_20_31[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_10_2m1_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2180100;\n",
+      "         double * const _data_phi_src_10_21_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2213900;\n",
+      "         double x46 = -0.5*_data_phi_src_10_2m1_31[ctr_0] + 0.5*_data_phi_src_10_21_31[ctr_0];\n",
+      "         double x49 = 0.0067250649162728321*_data_mu_src_10_20[ctr_0] + 0.97420957122621499;\n",
+      "         double * const _data_phi_dst_10_20_31 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2 + 2197000;\n",
+      "         double x50 = -16.0*_data_phi_src_10_20_31[ctr_0] + 16.0*_data_phi_dst_10_20_31[ctr_0];\n",
+      "         double x51 = x1 + 0.5*_data_phi_src_1m1_20_30[ctr_0];\n",
+      "         double x52 = (x51*x51);\n",
+      "         double x53 = x4 + 0.5*_data_phi_src_1m1_20_31[ctr_0];\n",
+      "         double x54 = (x53*x53);\n",
+      "         double x55 = x7 + 0.5*_data_phi_src_1m1_20_32[ctr_0];\n",
+      "         double x56 = (x55*x55);\n",
+      "         double x57 = (float) __frcp_rn( (float) x52 + x54 + x56);\n",
+      "         double x58 = x52*x57;\n",
+      "         double x59 = x54*x57;\n",
+      "         double x60 = x56*x57;\n",
+      "         double x61 = x14 - 2.0*_data_phi_src_1m1_20_32[ctr_0];\n",
+      "         double x62 = (float) sqrtf((float) x51*x55);\n",
+      "         double x63 = x17 - 2.0*_data_phi_src_1m1_20_30[ctr_0];\n",
+      "         double x64 = -0.5*_data_phi_src_10_20_30[ctr_0 - 1] + 0.5*_data_phi_src_10_20_30[ctr_0 + 1];\n",
+      "         double x65 = x19 + x64 + 0.5*_data_phi_src_1m1_20_30[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_1m1_2m1_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 17030;\n",
+      "         double x66 = -0.5*_data_phi_src_1m1_2m1_30[ctr_0];\n",
+      "         double * const _data_phi_src_1m1_21_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 16770;\n",
+      "         double x67 = x23 + x66 + 0.5*_data_phi_src_1m1_21_30[ctr_0];\n",
+      "         double x68 = (float) sqrtf((float) (x63*x63) + (x65*x65) + (x67*x67));\n",
+      "         double x69 = -0.5*_data_phi_src_10_20_32[ctr_0 - 1] + 0.5*_data_phi_src_10_20_32[ctr_0 + 1];\n",
+      "         double x70 = x26 + x69 + 0.5*_data_phi_src_1m1_20_32[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_1m1_2m1_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4376970;\n",
+      "         double x71 = -0.5*_data_phi_src_1m1_2m1_32[ctr_0];\n",
+      "         double * const _data_phi_src_1m1_21_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4410770;\n",
+      "         double x72 = x30 + x71 + 0.5*_data_phi_src_1m1_21_32[ctr_0];\n",
+      "         double x73 = (x61*x61) + (x70*x70) + (x72*x72);\n",
+      "         double x74 = (float) sqrtf((float) x73);\n",
+      "         double * const _data_mu_src_1m1_20 = _data_mu_src + 130*ctr_1 + 16900*ctr_2 - 130;\n",
+      "         double x75 = x60*x34 + 0.07262377194289385*_data_mu_src_1m1_20[ctr_0];\n",
+      "         double x76 = fdividef(x60, x73);\n",
+      "         double x77 = (float) sqrtf((float) x53*x55);\n",
+      "         double x78 = x40 - 2.0*_data_phi_src_1m1_20_31[ctr_0];\n",
+      "         double x79 = -0.5*_data_phi_src_10_20_31[ctr_0 - 1] + 0.5*_data_phi_src_10_20_31[ctr_0 + 1];\n",
+      "         double x80 = x42 + x79 + 0.5*_data_phi_src_1m1_20_31[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_1m1_2m1_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2179970;\n",
+      "         double x81 = -0.5*_data_phi_src_1m1_2m1_31[ctr_0];\n",
+      "         double * const _data_phi_src_1m1_21_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2213770;\n",
+      "         double x82 = x46 + x81 + 0.5*_data_phi_src_1m1_21_31[ctr_0];\n",
+      "         double x83 = (float) sqrtf((float) (x78*x78) + (x80*x80) + (x82*x82));\n",
+      "         double * _data_mu_stag_10_20_31_40 = _data_mu_stag + 130*ctr_1 + 16900*ctr_2 + 2197000;\n",
+      "         double * const _data_phi_dst_1m1_20_30 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2 - 130;\n",
+      "         double * const _data_phi_dst_1m1_20_31 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2 + 2196870;\n",
+      "         _data_mu_stag_10_20_31_40[ctr_0] = -x61*((x62 < 1.0000000000000001e-9 || x68*x74 < 1.0000000000000001e-9) ? (0): (fdividef(x51*x76*-x58*x36 + 0.013865117527725106*_data_mu_src_1m1_20[ctr_0] + x75*x37 - 16.0*_data_phi_src_1m1_20_30[ctr_0] + 16.0*_data_phi_dst_1m1_20_30[ctr_0]*x61*x63 + x65*x70 + x67*x72, x62*x68)))*3.9269908169872414 + ((x77 < 1.0000000000000001e-9 || x74*x83 < 1.0000000000000001e-9) ? (0): (fdividef(x53*x76*-x59*x49 + 0.0067250649162728321*_data_mu_src_1m1_20[ctr_0] + x75*x50 - 16.0*_data_phi_src_1m1_20_31[ctr_0] + 16.0*_data_phi_dst_1m1_20_31[ctr_0]*x61*x78 + x70*x80 + x72*x82, x77*x83)))*3.9269908169872414 + x0 - 2.0*_data_mu_src_1m1_20[ctr_0]*x58*2.7730235055450212e-5 + x59*1.3450129832545665e-5 + x60*0.1452475438857877;\n",
+      "      } \n",
+      "      if (ctr_0 < 129 && ctr_1 < 129)\n",
+      "      {\n",
+      "         double * const _data_mu_src_10_20 = _data_mu_src + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x0 = 2.0*_data_mu_src_10_20[ctr_0];\n",
+      "         double * const _data_phi_src_10_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x1 = 0.5*_data_phi_src_10_20_30[ctr_0];\n",
+      "         double * const _data_phi_src_10_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2197000;\n",
+      "         double x4 = 0.5*_data_phi_src_10_20_31[ctr_0];\n",
+      "         double * const _data_phi_src_10_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4394000;\n",
+      "         double x7 = 0.5*_data_phi_src_10_20_32[ctr_0];\n",
+      "         double x14 = 2.0*_data_phi_src_10_20_32[ctr_0];\n",
+      "         double x17 = 2.0*_data_phi_src_10_20_30[ctr_0];\n",
+      "         double * const _data_phi_src_11_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 130;\n",
+      "         double * const _data_phi_src_1m1_20_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 130;\n",
+      "         double x20 = -0.5*_data_phi_src_1m1_20_30[ctr_0] + 0.5*_data_phi_src_11_20_30[ctr_0];\n",
+      "         double * const _data_phi_src_10_2m1_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 16900;\n",
+      "         double x22 = -0.5*_data_phi_src_10_2m1_30[ctr_0 - 1];\n",
+      "         double * const _data_phi_src_11_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4394130;\n",
+      "         double * const _data_phi_src_1m1_20_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4393870;\n",
+      "         double x27 = -0.5*_data_phi_src_1m1_20_32[ctr_0] + 0.5*_data_phi_src_11_20_32[ctr_0];\n",
+      "         double * const _data_phi_src_10_2m1_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4377100;\n",
+      "         double x29 = -0.5*_data_phi_src_10_2m1_32[ctr_0 - 1];\n",
+      "         double x34 = 0.07262377194289385*_data_mu_src_10_20[ctr_0] + 7.2703712674679055;\n",
+      "         double x36 = 0.013865117527725106*_data_mu_src_10_20[ctr_0] + 1.3792290810961054;\n",
+      "         double * const _data_phi_dst_10_20_30 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2;\n",
+      "         double x37 = -16.0*_data_phi_src_10_20_30[ctr_0] + 16.0*_data_phi_dst_10_20_30[ctr_0];\n",
+      "         double x40 = 2.0*_data_phi_src_10_20_31[ctr_0];\n",
+      "         double * const _data_phi_src_11_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2197130;\n",
+      "         double * const _data_phi_src_1m1_20_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2196870;\n",
+      "         double x43 = -0.5*_data_phi_src_1m1_20_31[ctr_0] + 0.5*_data_phi_src_11_20_31[ctr_0];\n",
+      "         double * const _data_phi_src_10_2m1_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2180100;\n",
+      "         double x45 = -0.5*_data_phi_src_10_2m1_31[ctr_0 - 1];\n",
+      "         double x49 = 0.0067250649162728321*_data_mu_src_10_20[ctr_0] + 0.97420957122621499;\n",
+      "         double * const _data_phi_dst_10_20_31 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2 + 2197000;\n",
+      "         double x50 = -16.0*_data_phi_src_10_20_31[ctr_0] + 16.0*_data_phi_dst_10_20_31[ctr_0];\n",
+      "         double x64 = -0.5*_data_phi_src_10_20_30[ctr_0 - 1] + 0.5*_data_phi_src_10_20_30[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_1m1_2m1_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 17030;\n",
+      "         double x66 = -0.5*_data_phi_src_1m1_2m1_30[ctr_0];\n",
+      "         double x69 = -0.5*_data_phi_src_10_20_32[ctr_0 - 1] + 0.5*_data_phi_src_10_20_32[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_1m1_2m1_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4376970;\n",
+      "         double x71 = -0.5*_data_phi_src_1m1_2m1_32[ctr_0];\n",
+      "         double x79 = -0.5*_data_phi_src_10_20_31[ctr_0 - 1] + 0.5*_data_phi_src_10_20_31[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_1m1_2m1_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2179970;\n",
+      "         double x81 = -0.5*_data_phi_src_1m1_2m1_31[ctr_0];\n",
+      "         double x84 = x1 + 0.5*_data_phi_src_10_2m1_30[ctr_0];\n",
+      "         double x85 = (x84*x84);\n",
+      "         double x86 = x4 + 0.5*_data_phi_src_10_2m1_31[ctr_0];\n",
+      "         double x87 = (x86*x86);\n",
+      "         double x88 = x7 + 0.5*_data_phi_src_10_2m1_32[ctr_0];\n",
+      "         double x89 = (x88*x88);\n",
+      "         double x90 = (float) __frcp_rn( (float) x85 + x87 + x89);\n",
+      "         double x91 = x85*x90;\n",
+      "         double x92 = x87*x90;\n",
+      "         double x93 = x89*x90;\n",
+      "         double x94 = x14 - 2.0*_data_phi_src_10_2m1_32[ctr_0];\n",
+      "         double x95 = (float) sqrtf((float) x84*x88);\n",
+      "         double x96 = x17 - 2.0*_data_phi_src_10_2m1_30[ctr_0];\n",
+      "         double x97 = x22 + x64 + 0.5*_data_phi_src_10_2m1_30[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_11_2m1_30 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 - 16770;\n",
+      "         double x98 = x20 + x66 + 0.5*_data_phi_src_11_2m1_30[ctr_0];\n",
+      "         double x99 = (float) sqrtf((float) (x96*x96) + (x97*x97) + (x98*x98));\n",
+      "         double x100 = x29 + x69 + 0.5*_data_phi_src_10_2m1_32[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_11_2m1_32 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 4377230;\n",
+      "         double x101 = x27 + x71 + 0.5*_data_phi_src_11_2m1_32[ctr_0];\n",
+      "         double x102 = (x100*x100) + (x101*x101) + (x94*x94);\n",
+      "         double x103 = (float) sqrtf((float) x102);\n",
+      "         double * const _data_mu_src_10_2m1 = _data_mu_src + 130*ctr_1 + 16900*ctr_2 - 16900;\n",
+      "         double x104 = x93*x34 + 0.07262377194289385*_data_mu_src_10_2m1[ctr_0];\n",
+      "         double x105 = fdividef(x93, x102);\n",
+      "         double x106 = (float) sqrtf((float) x86*x88);\n",
+      "         double x107 = x40 - 2.0*_data_phi_src_10_2m1_31[ctr_0];\n",
+      "         double x108 = x45 + x79 + 0.5*_data_phi_src_10_2m1_31[ctr_0 + 1];\n",
+      "         double * const _data_phi_src_11_2m1_31 = _data_phi_src + 130*ctr_1 + 16900*ctr_2 + 2180230;\n",
+      "         double x109 = x43 + x81 + 0.5*_data_phi_src_11_2m1_31[ctr_0];\n",
+      "         double x110 = (float) sqrtf((float) (x107*x107) + (x108*x108) + (x109*x109));\n",
+      "         double * _data_mu_stag_10_20_32_40 = _data_mu_stag + 130*ctr_1 + 16900*ctr_2 + 4394000;\n",
+      "         double * const _data_phi_dst_10_2m1_30 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2 - 16900;\n",
+      "         double * const _data_phi_dst_10_2m1_31 = _data_phi_dst + 130*ctr_1 + 16900*ctr_2 + 2180100;\n",
+      "         _data_mu_stag_10_20_32_40[ctr_0] = -x94*((x106 < 1.0000000000000001e-9 || x103*x110 < 1.0000000000000001e-9) ? (0): (fdividef(x105*x86*x104 - x92*x49 + 0.0067250649162728321*_data_mu_src_10_2m1[ctr_0]*x50 - 16.0*_data_phi_src_10_2m1_31[ctr_0] + 16.0*_data_phi_dst_10_2m1_31[ctr_0]*x100*x108 + x101*x109 + x107*x94, x106*x110)))*3.9269908169872414 + ((x95 < 1.0000000000000001e-9 || x103*x99 < 1.0000000000000001e-9) ? (0): (fdividef(x105*x84*x104 - x91*x36 + 0.013865117527725106*_data_mu_src_10_2m1[ctr_0]*x37 - 16.0*_data_phi_src_10_2m1_30[ctr_0] + 16.0*_data_phi_dst_10_2m1_30[ctr_0]*x100*x97 + x101*x98 + x94*x96, x95*x99)))*3.9269908169872414 + x0 - 2.0*_data_mu_src_10_2m1[ctr_0]*x91*2.7730235055450212e-5 + x92*1.3450129832545665e-5 + x93*0.1452475438857877;\n",
+      "      } \n",
+      "   } \n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(show_code(mu_stag_precomp_kernel))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rescheduled_eqs = schedule_eqs(atomize_eqs(mu_stag_update_eqs))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "63\n",
+      "xi_0 ← phi_src_C^0**2\n",
+      "xi_1 ← phi_src_C^1**2\n",
+      "xi_2 ← phi_src_C^2**2\n",
+      "xi_3 ← xi_0 + xi_1 + xi_2\n",
+      "xi_4 ← phi_dst_C^1**2\n",
+      "xi_5 ← phi_dst_C^0**2\n",
+      "xi_6 ← phi_dst_C^2**2\n",
+      "xi_7 ← 32.0/(xi_4 + xi_5 + xi_6)\n",
+      "xi_8 ← 32.0/xi_3\n",
+      "xi_9 ← 2.0*mu_src_C\n",
+      "xi_10 ← phi_src_C^0/2\n",
+      "xi_11 ← phi_src_W^0/2 + xi_10\n",
+      "xi_12 ← xi_11**2\n",
+      "xi_13 ← phi_src_C^1/2\n",
+      "xi_14 ← phi_src_W^1/2 + xi_13\n",
+      "xi_15 ← xi_14**2\n",
+      "xi_16 ← phi_src_C^2/2\n",
+      "xi_17 ← phi_src_W^2/2 + xi_16\n",
+      "xi_18 ← xi_17**2\n",
+      "xi_19 ← 1/(xi_12 + xi_15 + xi_18)\n",
+      "xi_20 ← xi_12*xi_19\n",
+      "xi_21 ← xi_15*xi_19\n",
+      "xi_22 ← xi_18*xi_19\n",
+      "xi_23 ← 2.0*phi_src_C^2\n",
+      "xi_24 ← -2.0*phi_src_W^2 + xi_23\n",
+      "xi_25 ← sqrt(xi_11*xi_17)\n",
+      "xi_26 ← 2.0*phi_src_C^0\n",
+      "xi_27 ← -2.0*phi_src_W^0 + xi_26\n",
+      "xi_28 ← 0.5*phi_src_NW^0\n",
+      "xi_29 ← -0.5*phi_src_SW^0\n",
+      "xi_30 ← -0.5*phi_src_S^0 + 0.5*phi_src_N^0\n",
+      "xi_31 ← xi_28 + xi_29 + xi_30\n",
+      "xi_32 ← 0.5*phi_src_TW^0\n",
+      "xi_33 ← -0.5*phi_src_BW^0\n",
+      "xi_34 ← -0.5*phi_src_B^0 + 0.5*phi_src_T^0\n",
+      "xi_35 ← xi_32 + xi_33 + xi_34\n",
+      "xi_36 ← sqrt(xi_27**2 + xi_31**2 + xi_35**2)\n",
+      "xi_37 ← 0.5*phi_src_NW^2\n",
+      "xi_38 ← -0.5*phi_src_SW^2\n",
+      "xi_39 ← -0.5*phi_src_S^2 + 0.5*phi_src_N^2\n",
+      "xi_40 ← xi_37 + xi_38 + xi_39\n",
+      "xi_41 ← 0.5*phi_src_TW^2\n",
+      "xi_42 ← -0.5*phi_src_BW^2\n",
+      "xi_43 ← -0.5*phi_src_B^2 + 0.5*phi_src_T^2\n",
+      "xi_44 ← xi_41 + xi_42 + xi_43\n",
+      "xi_45 ← xi_24**2 + xi_40**2 + xi_44**2\n",
+      "xi_46 ← sqrt(xi_45)\n",
+      "xi_47 ← 0.0726237719428938*mu_src_C + 7.27037126746791\n",
+      "xi_48 ← xi_22*(0.0726237719428938*mu_src_W + xi_47)\n",
+      "xi_49 ← 0.0138651175277251*mu_src_C + 1.37922908109611\n",
+      "xi_50 ← 16.0*phi_dst_C^0 - 16.0*phi_src_C^0\n",
+      "xi_51 ← xi_22/xi_45\n",
+      "xi_52 ← sqrt(xi_14*xi_17)\n",
+      "xi_53 ← 2.0*phi_src_C^1\n",
+      "xi_54 ← -2.0*phi_src_W^1 + xi_53\n",
+      "xi_55 ← 0.5*phi_src_NW^1\n",
+      "xi_56 ← -0.5*phi_src_SW^1\n",
+      "xi_57 ← -0.5*phi_src_S^1 + 0.5*phi_src_N^1\n",
+      "xi_58 ← xi_55 + xi_56 + xi_57\n",
+      "xi_59 ← 0.5*phi_src_TW^1\n",
+      "xi_60 ← -0.5*phi_src_BW^1\n",
+      "xi_61 ← -0.5*phi_src_B^1 + 0.5*phi_src_T^1\n",
+      "xi_62 ← xi_59 + xi_60 + xi_61\n",
+      "xi_63 ← sqrt(xi_54**2 + xi_58**2 + xi_62**2)\n",
+      "xi_64 ← 0.00672506491627283*mu_src_C + 0.974209571226215\n",
+      "xi_65 ← 16.0*phi_dst_C^1 - 16.0*phi_src_C^1\n",
+      "xi_66 ← -xi_9\n",
+      "xi_67 ← phi_src_E^0/2 + xi_10\n",
+      "xi_68 ← xi_67**2\n",
+      "xi_69 ← phi_src_E^1/2 + xi_13\n",
+      "xi_70 ← xi_69**2\n",
+      "xi_71 ← phi_src_E^2/2 + xi_16\n",
+      "xi_72 ← xi_71**2\n",
+      "xi_73 ← 1/(xi_68 + xi_70 + xi_72)\n",
+      "xi_74 ← xi_68*xi_73\n",
+      "xi_75 ← xi_70*xi_73\n",
+      "xi_76 ← xi_72*xi_73\n",
+      "xi_77 ← -xi_23\n",
+      "xi_78 ← 2.0*phi_src_E^2 + xi_77\n",
+      "xi_79 ← sqrt(xi_67*xi_71)\n",
+      "xi_80 ← -xi_26\n",
+      "xi_81 ← 2.0*phi_src_E^0 + xi_80\n",
+      "xi_82 ← 0.5*phi_src_NE^0\n",
+      "xi_83 ← 0.5*phi_src_SE^0\n",
+      "xi_84 ← xi_30 + xi_82 - xi_83\n",
+      "xi_85 ← 0.5*phi_src_TE^0\n",
+      "xi_86 ← 0.5*phi_src_BE^0\n",
+      "xi_87 ← xi_34 + xi_85 - xi_86\n",
+      "xi_88 ← sqrt(xi_81**2 + xi_84**2 + xi_87**2)\n",
+      "xi_89 ← 0.5*phi_src_NE^2\n",
+      "xi_90 ← 0.5*phi_src_SE^2\n",
+      "xi_91 ← xi_39 + xi_89 - xi_90\n",
+      "xi_92 ← 0.5*phi_src_TE^2\n",
+      "xi_93 ← 0.5*phi_src_BE^2\n",
+      "xi_94 ← xi_43 + xi_92 - xi_93\n",
+      "xi_95 ← xi_78**2 + xi_91**2 + xi_94**2\n",
+      "xi_96 ← sqrt(xi_95)\n",
+      "xi_97 ← xi_76*(0.0726237719428938*mu_src_E + xi_47)\n",
+      "xi_98 ← xi_76/xi_95\n",
+      "xi_99 ← sqrt(xi_69*xi_71)\n",
+      "xi_100 ← -xi_53\n",
+      "xi_101 ← 2.0*phi_src_E^1 + xi_100\n",
+      "xi_102 ← 0.5*phi_src_NE^1\n",
+      "xi_103 ← 0.5*phi_src_SE^1\n",
+      "xi_104 ← xi_102 - xi_103 + xi_57\n",
+      "xi_105 ← 0.5*phi_src_TE^1\n",
+      "xi_106 ← 0.5*phi_src_BE^1\n",
+      "xi_107 ← xi_105 - xi_106 + xi_61\n",
+      "xi_108 ← sqrt(xi_101**2 + xi_104**2 + xi_107**2)\n",
+      "xi_109 ← phi_src_S^0/2 + xi_10\n",
+      "xi_110 ← xi_109**2\n",
+      "xi_111 ← phi_src_S^1/2 + xi_13\n",
+      "xi_112 ← xi_111**2\n",
+      "xi_113 ← phi_src_S^2/2 + xi_16\n",
+      "xi_114 ← xi_113**2\n",
+      "xi_115 ← 1/(xi_110 + xi_112 + xi_114)\n",
+      "xi_116 ← xi_110*xi_115\n",
+      "xi_117 ← xi_112*xi_115\n",
+      "xi_118 ← xi_114*xi_115\n",
+      "xi_119 ← -2.0*phi_src_S^2 + xi_23\n",
+      "xi_120 ← sqrt(xi_109*xi_113)\n",
+      "xi_121 ← -2.0*phi_src_S^0 + xi_26\n",
+      "xi_122 ← -0.5*phi_src_W^0 + 0.5*phi_src_E^0\n",
+      "xi_123 ← xi_122 + xi_29 + xi_83\n",
+      "xi_124 ← 0.5*phi_src_TS^0\n",
+      "xi_125 ← -0.5*phi_src_BS^0\n",
+      "xi_126 ← xi_124 + xi_125 + xi_34\n",
+      "xi_127 ← sqrt(xi_121**2 + xi_123**2 + xi_126**2)\n",
+      "xi_128 ← -0.5*phi_src_W^2 + 0.5*phi_src_E^2\n",
+      "xi_129 ← xi_128 + xi_38 + xi_90\n",
+      "xi_130 ← 0.5*phi_src_TS^2\n",
+      "xi_131 ← -0.5*phi_src_BS^2\n",
+      "xi_132 ← xi_130 + xi_131 + xi_43\n",
+      "xi_133 ← xi_119**2 + xi_129**2 + xi_132**2\n",
+      "xi_134 ← sqrt(xi_133)\n",
+      "xi_135 ← xi_118*(0.0726237719428938*mu_src_S + xi_47)\n",
+      "xi_136 ← xi_118/xi_133\n",
+      "xi_137 ← sqrt(xi_111*xi_113)\n",
+      "xi_138 ← -2.0*phi_src_S^1 + xi_53\n",
+      "xi_139 ← -0.5*phi_src_W^1 + 0.5*phi_src_E^1\n",
+      "xi_140 ← xi_103 + xi_139 + xi_56\n",
+      "xi_141 ← 0.5*phi_src_TS^1\n",
+      "xi_142 ← -0.5*phi_src_BS^1\n",
+      "xi_143 ← xi_141 + xi_142 + xi_61\n",
+      "xi_144 ← sqrt(xi_138**2 + xi_140**2 + xi_143**2)\n",
+      "xi_145 ← phi_src_N^0/2 + xi_10\n",
+      "xi_146 ← xi_145**2\n",
+      "xi_147 ← phi_src_N^1/2 + xi_13\n",
+      "xi_148 ← xi_147**2\n",
+      "xi_149 ← phi_src_N^2/2 + xi_16\n",
+      "xi_150 ← xi_149**2\n",
+      "xi_151 ← 1/(xi_146 + xi_148 + xi_150)\n",
+      "xi_152 ← xi_146*xi_151\n",
+      "xi_153 ← xi_148*xi_151\n",
+      "xi_154 ← xi_150*xi_151\n",
+      "xi_155 ← 2.0*phi_src_N^2 + xi_77\n",
+      "xi_156 ← sqrt(xi_145*xi_149)\n",
+      "xi_157 ← 2.0*phi_src_N^0 + xi_80\n",
+      "xi_158 ← xi_122 - xi_28 + xi_82\n",
+      "xi_159 ← 0.5*phi_src_TN^0\n",
+      "xi_160 ← 0.5*phi_src_BN^0\n",
+      "xi_161 ← xi_159 - xi_160 + xi_34\n",
+      "xi_162 ← sqrt(xi_157**2 + xi_158**2 + xi_161**2)\n",
+      "xi_163 ← xi_128 - xi_37 + xi_89\n",
+      "xi_164 ← 0.5*phi_src_TN^2\n",
+      "xi_165 ← 0.5*phi_src_BN^2\n",
+      "xi_166 ← xi_164 - xi_165 + xi_43\n",
+      "xi_167 ← xi_155**2 + xi_163**2 + xi_166**2\n",
+      "xi_168 ← sqrt(xi_167)\n",
+      "xi_169 ← xi_154*(0.0726237719428938*mu_src_N + xi_47)\n",
+      "xi_170 ← xi_154/xi_167\n",
+      "xi_171 ← sqrt(xi_147*xi_149)\n",
+      "xi_172 ← 2.0*phi_src_N^1 + xi_100\n",
+      "xi_173 ← xi_102 + xi_139 - xi_55\n",
+      "xi_174 ← 0.5*phi_src_TN^1\n",
+      "xi_175 ← 0.5*phi_src_BN^1\n",
+      "xi_176 ← xi_174 - xi_175 + xi_61\n",
+      "xi_177 ← sqrt(xi_172**2 + xi_173**2 + xi_176**2)\n",
+      "xi_178 ← phi_src_B^0/2 + xi_10\n",
+      "xi_179 ← xi_178**2\n",
+      "xi_180 ← phi_src_B^1/2 + xi_13\n",
+      "xi_181 ← xi_180**2\n",
+      "xi_182 ← phi_src_B^2/2 + xi_16\n",
+      "xi_183 ← xi_182**2\n",
+      "xi_184 ← 1/(xi_179 + xi_181 + xi_183)\n",
+      "xi_185 ← xi_179*xi_184\n",
+      "xi_186 ← xi_181*xi_184\n",
+      "xi_187 ← xi_183*xi_184\n",
+      "xi_188 ← -2.0*phi_src_B^2 + xi_23\n",
+      "xi_189 ← sqrt(xi_178*xi_182)\n",
+      "xi_190 ← -2.0*phi_src_B^0 + xi_26\n",
+      "xi_191 ← xi_122 + xi_33 + xi_86\n",
+      "xi_192 ← xi_125 + xi_160 + xi_30\n",
+      "xi_193 ← sqrt(xi_190**2 + xi_191**2 + xi_192**2)\n",
+      "xi_194 ← xi_128 + xi_42 + xi_93\n",
+      "xi_195 ← xi_131 + xi_165 + xi_39\n",
+      "xi_196 ← xi_188**2 + xi_194**2 + xi_195**2\n",
+      "xi_197 ← sqrt(xi_196)\n",
+      "xi_198 ← xi_187*(0.0726237719428938*mu_src_B + xi_47)\n",
+      "xi_199 ← xi_187/xi_196\n",
+      "xi_200 ← sqrt(xi_180*xi_182)\n",
+      "xi_201 ← -2.0*phi_src_B^1 + xi_53\n",
+      "xi_202 ← xi_106 + xi_139 + xi_60\n",
+      "xi_203 ← xi_142 + xi_175 + xi_57\n",
+      "xi_204 ← sqrt(xi_201**2 + xi_202**2 + xi_203**2)\n",
+      "xi_205 ← phi_src_T^0/2 + xi_10\n",
+      "xi_206 ← xi_205**2\n",
+      "xi_207 ← phi_src_T^1/2 + xi_13\n",
+      "xi_208 ← xi_207**2\n",
+      "xi_209 ← phi_src_T^2/2 + xi_16\n",
+      "xi_210 ← xi_209**2\n",
+      "xi_211 ← 1/(xi_206 + xi_208 + xi_210)\n",
+      "xi_212 ← xi_206*xi_211\n",
+      "xi_213 ← xi_208*xi_211\n",
+      "xi_214 ← xi_210*xi_211\n",
+      "xi_215 ← 2.0*phi_src_T^2 + xi_77\n",
+      "xi_216 ← sqrt(xi_205*xi_209)\n",
+      "xi_217 ← 2.0*phi_src_T^0 + xi_80\n",
+      "xi_218 ← xi_122 - xi_32 + xi_85\n",
+      "xi_219 ← -xi_124 + xi_159 + xi_30\n",
+      "xi_220 ← sqrt(xi_217**2 + xi_218**2 + xi_219**2)\n",
+      "xi_221 ← xi_128 - xi_41 + xi_92\n",
+      "xi_222 ← -xi_130 + xi_164 + xi_39\n",
+      "xi_223 ← xi_215**2 + xi_221**2 + xi_222**2\n",
+      "xi_224 ← sqrt(xi_223)\n",
+      "xi_225 ← xi_214*(0.0726237719428938*mu_src_T + xi_47)\n",
+      "xi_226 ← xi_214/xi_223\n",
+      "xi_227 ← sqrt(xi_207*xi_209)\n",
+      "xi_228 ← 2.0*phi_src_T^1 + xi_100\n",
+      "xi_229 ← xi_105 + xi_139 - xi_59\n",
+      "xi_230 ← -xi_141 + xi_174 + xi_57\n",
+      "xi_231 ← sqrt(xi_228**2 + xi_229**2 + xi_230**2)\n",
+      "dc_dmu_0_0 ← xi_3/(0.0277302350554502*xi_0 + 0.0134501298325457*xi_1 + 0.145247543885788*xi_2)\n",
+      "dc_dphi_dt_0 ← (0.0134501298325457*mu_src_C + 0.974209571226215)*(-xi_1*xi_8 + xi_4*xi_7) + (0.0277302350554502*mu_src_C + 1.37922908109611)*(-xi_0*xi_8 + xi_5*xi_7) + (0.145247543885788*mu_src_C + 7.27037126746791)*(-xi_2*xi_8 + xi_6*xi_7)\n",
+      "dc_dT_dt_0 ← 0\n",
+      "staggered_down_0_0 ← -xi_24*(3.92699081698724*Piecewise((0, (xi_25 < 1.0e-9) | (xi_36*xi_46 < 1.0e-9)), (xi_11*xi_51*(-xi_20*(0.0138651175277251*mu_src_W + xi_49) + xi_48)*(16.0*phi_dst_W^0 - 16.0*phi_src_W^0 + xi_50)*(xi_24*xi_27 + xi_31*xi_40 + xi_35*xi_44)/(xi_25*xi_36), True)) + 3.92699081698724*Piecewise((0, (xi_52 < 1.0e-9) | (xi_46*xi_63 < 1.0e-9)), (xi_14*xi_51*(-xi_21*(0.00672506491627283*mu_src_W + xi_64) + xi_48)*(16.0*phi_dst_W^1 - 16.0*phi_src_W^1 + xi_65)*(xi_24*xi_54 + xi_40*xi_58 + xi_44*xi_62)/(xi_52*xi_63), True))) + (-2.0*mu_src_W + xi_9)*(2.77302350554502e-5*xi_20 + 1.34501298325457e-5*xi_21 + 0.145247543885788*xi_22)\n",
+      "staggered_up_0_0 ← -xi_78*(3.92699081698724*Piecewise((0, (xi_79 < 1.0e-9) | (xi_88*xi_96 < 1.0e-9)), (xi_67*xi_98*(-xi_74*(0.0138651175277251*mu_src_E + xi_49) + xi_97)*(16.0*phi_dst_E^0 - 16.0*phi_src_E^0 + xi_50)*(xi_78*xi_81 + xi_84*xi_91 + xi_87*xi_94)/(xi_79*xi_88), True)) + 3.92699081698724*Piecewise((0, (xi_99 < 1.0e-9) | (xi_108*xi_96 < 1.0e-9)), (xi_69*xi_98*(-xi_75*(0.00672506491627283*mu_src_E + xi_64) + xi_97)*(16.0*phi_dst_E^1 - 16.0*phi_src_E^1 + xi_65)*(xi_101*xi_78 + xi_104*xi_91 + xi_107*xi_94)/(xi_108*xi_99), True))) + (2.0*mu_src_E + xi_66)*(2.77302350554502e-5*xi_74 + 1.34501298325457e-5*xi_75 + 0.145247543885788*xi_76)\n",
+      "staggered_down_1_0 ← -xi_119*(3.92699081698724*Piecewise((0, (xi_120 < 1.0e-9) | (xi_127*xi_134 < 1.0e-9)), (xi_109*xi_136*(-xi_116*(0.0138651175277251*mu_src_S + xi_49) + xi_135)*(16.0*phi_dst_S^0 - 16.0*phi_src_S^0 + xi_50)*(xi_119*xi_121 + xi_123*xi_129 + xi_126*xi_132)/(xi_120*xi_127), True)) + 3.92699081698724*Piecewise((0, (xi_137 < 1.0e-9) | (xi_134*xi_144 < 1.0e-9)), (xi_111*xi_136*(-xi_117*(0.00672506491627283*mu_src_S + xi_64) + xi_135)*(16.0*phi_dst_S^1 - 16.0*phi_src_S^1 + xi_65)*(xi_119*xi_138 + xi_129*xi_140 + xi_132*xi_143)/(xi_137*xi_144), True))) + (-2.0*mu_src_S + xi_9)*(2.77302350554502e-5*xi_116 + 1.34501298325457e-5*xi_117 + 0.145247543885788*xi_118)\n",
+      "staggered_up_1_0 ← -xi_155*(3.92699081698724*Piecewise((0, (xi_156 < 1.0e-9) | (xi_162*xi_168 < 1.0e-9)), (xi_145*xi_170*(-xi_152*(0.0138651175277251*mu_src_N + xi_49) + xi_169)*(16.0*phi_dst_N^0 - 16.0*phi_src_N^0 + xi_50)*(xi_155*xi_157 + xi_158*xi_163 + xi_161*xi_166)/(xi_156*xi_162), True)) + 3.92699081698724*Piecewise((0, (xi_171 < 1.0e-9) | (xi_168*xi_177 < 1.0e-9)), (xi_147*xi_170*(-xi_153*(0.00672506491627283*mu_src_N + xi_64) + xi_169)*(16.0*phi_dst_N^1 - 16.0*phi_src_N^1 + xi_65)*(xi_155*xi_172 + xi_163*xi_173 + xi_166*xi_176)/(xi_171*xi_177), True))) + (2.0*mu_src_N + xi_66)*(2.77302350554502e-5*xi_152 + 1.34501298325457e-5*xi_153 + 0.145247543885788*xi_154)\n",
+      "staggered_down_2_0 ← -xi_188*(3.92699081698724*Piecewise((0, (xi_189 < 1.0e-9) | (xi_193*xi_197 < 1.0e-9)), (xi_178*xi_199*(-xi_185*(0.0138651175277251*mu_src_B + xi_49) + xi_198)*(16.0*phi_dst_B^0 - 16.0*phi_src_B^0 + xi_50)*(xi_188*xi_190 + xi_191*xi_194 + xi_192*xi_195)/(xi_189*xi_193), True)) + 3.92699081698724*Piecewise((0, (xi_200 < 1.0e-9) | (xi_197*xi_204 < 1.0e-9)), (xi_180*xi_199*(-xi_186*(0.00672506491627283*mu_src_B + xi_64) + xi_198)*(16.0*phi_dst_B^1 - 16.0*phi_src_B^1 + xi_65)*(xi_188*xi_201 + xi_194*xi_202 + xi_195*xi_203)/(xi_200*xi_204), True))) + (-2.0*mu_src_B + xi_9)*(2.77302350554502e-5*xi_185 + 1.34501298325457e-5*xi_186 + 0.145247543885788*xi_187)\n",
+      "staggered_up_2_0 ← -xi_215*(3.92699081698724*Piecewise((0, (xi_216 < 1.0e-9) | (xi_220*xi_224 < 1.0e-9)), (xi_205*xi_226*(-xi_212*(0.0138651175277251*mu_src_T + xi_49) + xi_225)*(16.0*phi_dst_T^0 - 16.0*phi_src_T^0 + xi_50)*(xi_215*xi_217 + xi_218*xi_221 + xi_219*xi_222)/(xi_216*xi_220), True)) + 3.92699081698724*Piecewise((0, (xi_227 < 1.0e-9) | (xi_224*xi_231 < 1.0e-9)), (xi_207*xi_226*(-xi_213*(0.00672506491627283*mu_src_T + xi_64) + xi_225)*(16.0*phi_dst_T^1 - 16.0*phi_src_T^1 + xi_65)*(xi_215*xi_228 + xi_221*xi_229 + xi_222*xi_230)/(xi_227*xi_231), True))) + (2.0*mu_src_T + xi_66)*(2.77302350554502e-5*xi_212 + 1.34501298325457e-5*xi_213 + 0.145247543885788*xi_214)\n",
+      "divMgradmu_0 ← -2.0*staggered_down_0_0 - 2.0*staggered_down_1_0 - 2.0*staggered_down_2_0 + 2.0*staggered_up_0_0 + 2.0*staggered_up_1_0 + 2.0*staggered_up_2_0\n",
+      "mu_dst[0,0,0] ← mu_src_C + 0.03125*dc_dmu_0_0*(-dc_dT_dt_0 - dc_dphi_dt_0 + divMgradmu_0)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(rescheduled_eqs))\n",
+    "for eq in mu_update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "128\n",
+      "1018\n",
+      "\t.headerflags\t@\"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM60 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM60)\"\n",
+      "\t.elftype\t@\"ET_EXEC\"\n",
+      "\n",
+      "\n",
+      "//--------------------- .text.kernel              --------------------------\n",
+      "\t.section\t.text.kernel,\"ax\",@progbits\n",
+      "\t.sectioninfo\t@\"SHI_REGISTERS=72\"\n",
+      "\t.align\t32\n",
+      "        .global         kernel\n",
+      "        .type           kernel,@function\n",
+      "        .size           kernel,(.L_48 - kernel)\n",
+      "        .other          kernel,@\"STO_CUDA_ENTRY STV_DEFAULT\"\n",
+      "kernel:\n",
+      ".text.kernel:\n",
+      "        /*0008*/                   MOV R1, c[0x0][0x20] ;\n",
+      "        /*0010*/                   S2R R0, SR_CTAID.Y ;\n",
+      "        /*0018*/                   S2R R2, SR_TID.Y ;\n",
+      "        /*0028*/                   S2R R50, SR_CTAID.X ;\n",
+      "        /*0030*/                   S2R R3, SR_TID.X ;\n",
+      "        /*0038*/                   S2R R4, SR_CTAID.Z ;\n",
+      "        /*0048*/         {         XMAD R5, R0.reuse, c[0x0] [0xc], R2   SLOT 0;\n",
+      "        /*0050*/                   S2R R2, SR_TID.Z   SLOT 1        }\n",
+      "        /*0058*/                   XMAD.MRG R6, R0, c[0x0] [0xc].H1, RZ ;\n",
+      "        /*0068*/                   XMAD.PSL.CBCC R0, R0.H1, R6.H1, R5 ;\n",
+      "        /*0070*/                   XMAD R3, R50.reuse, c[0x0] [0x8], R3 ;\n",
+      "        /*0078*/                   XMAD.MRG R5, R50.reuse, c[0x0] [0x8].H1, RZ ;\n",
+      "        /*0088*/                   IADD32I R0, R0, 0x1 ;\n",
+      "        /*0090*/                   XMAD.PSL.CBCC R50, R50.H1, R5.H1, R3 ;\n",
+      "        /*0098*/                   ISETP.GE.U32.AND P0, PT, R0, 0x82, PT ;\n",
+      "        /*00a8*/                   IADD32I R50, R50, 0x1 ;\n",
+      "        /*00b0*/                   XMAD R2, R4.reuse, c[0x0] [0x10], R2 ;\n",
+      "        /*00b8*/                   XMAD.MRG R3, R4.reuse, c[0x0] [0x10].H1, RZ ;\n",
+      "        /*00c8*/                   XMAD.PSL.CBCC R2, R4.H1, R3.H1, R2 ;\n",
+      "        /*00d0*/                   ISETP.LT.U32.AND P0, PT, R50, 0x82, !P0 ;\n",
+      "        /*00d8*/                   IADD32I R2, R2, 0x1 ;\n",
+      "        /*00e8*/                   ISETP.LT.U32.AND P0, PT, R2, 0x82, P0 ;\n",
+      "        /*00f0*/              @!P0 EXIT ;\n",
+      "        /*00f8*/         {         MOV32I R6, 0x82 ;\n",
+      "        /*0108*/                   SSY `(.L_1)         }\n",
+      "        /*0110*/                   MOV32I R9, 0x82 ;\n",
+      "        /*0118*/                   MOV32I R11, 0x4204 ;\n",
+      "        /*0128*/                   XMAD R8, R2, 0x4204, RZ ;\n",
+      "        /*0130*/                   XMAD R3, R0.reuse, 0x82, RZ ;\n",
+      "        /*0138*/                   XMAD R5, R0, 0x82, RZ ;\n",
+      "        /*0148*/                   XMAD R7, R2.reuse, 0x4204, RZ ;\n",
+      "        /*0150*/                   MOV32I R12, 0x4204 ;\n",
+      "        /*0158*/                   XMAD R4, R0.reuse, R6.H1, RZ ;\n",
+      "        /*0168*/                   XMAD R6, R0.H1, R9.H1, RZ ;\n",
+      "        /*0170*/                   XMAD R9, R2.reuse, R11.H1, RZ ;\n",
+      "        /*0178*/                   XMAD.CHI R11, R2.H1, 0x4204, R8 ;\n",
+      "        /*0188*/                   XMAD.PSL R3, R0.H1.reuse, 0x82, R3 ;\n",
+      "        /*0190*/                   XMAD.CHI R5, R0.H1, 0x82, R5 ;\n",
+      "        /*0198*/                   XMAD R10, R2.H1.reuse, R12.H1, RZ ;\n",
+      "        /*01a8*/                   XMAD.PSL R8, R2.H1.reuse, 0x4204, R7 ;\n",
+      "        /*01b0*/                   ISETP.GE.U32.AND P1, PT, R2, 0x81, PT ;\n",
+      "        /*01b8*/                   IADD3.RS R4, R5, R4, R6 ;\n",
+      "        /*01c8*/                   IADD3.RS R9, R11, R9, R10 ;\n",
+      "        /*01d0*/                   IADD R3.CC, R8, R3 ;\n",
+      "        /*01d8*/                   ISETP.GE.U32.AND P0, PT, R0.reuse, 0x81, PT ;\n",
+      "        /*01e8*/                   IADD.X R4, R9, R4 ;\n",
+      "        /*01f0*/                   IADD R50.CC, R50, R3 ;\n",
+      "        /*01f8*/                   ISETP.LT.U32.AND P2, PT, R0, 0x81, !P1 ;\n",
+      "        /*0208*/                   IADD.X R4, RZ, R4 ;\n",
+      "        /*0210*/                   SHL R3, R50.reuse, 0x3 ;\n",
+      "        /*0218*/                   SHF.L.U64 R2, R50, 0x3, R4 ;\n",
+      "        /*0228*/                   IADD R62.CC, R3, c[0x0][0x150] ;\n",
+      "        /*0230*/         {         IADD.X R63, R2, c[0x0][0x154] ;\n",
+      "        /*0238*/              @!P2 SYNC                                                (*\"TARGET= .L_1 \"*)        }\n",
+      "        /*0248*/         {         IADD R60.CC, R3, c[0x0][0x158] ;\n",
+      "        /*0250*/                   SSY `(.L_2)         }\n",
+      "        /*0258*/                   IADD.X R61, R2, c[0x0][0x15c] ;\n",
+      "        /*0268*/                   LDG.E.64 R4, [R60+-0x8] ;\n",
+      "        /*0270*/                   LDG.E.64 R24, [R60] ;\n",
+      "        /*0278*/                   IADD32I R16.CC, R60, 0x1000000 ;\n",
+      "        /*0288*/                   IADD.X R17, RZ, R61 ;\n",
+      "        /*0290*/         {         IADD32I R20.CC, R60, 0x2000000 ;\n",
+      "        /*0298*/                   LDG.E.64 R6, [R16+0xc3038]         }\n",
+      "        /*02a8*/         {         IADD.X R21, RZ, R61 ;\n",
+      "        /*02b0*/                   LDG.E.64 R8, [R16+0xc3040]         }\n",
+      "        /*02b8*/         {         IADD R58.CC, R3, c[0x0][0x140] ;\n",
+      "        /*02c8*/                   LDG.E.64 R18, [R20+0x186078]         }\n",
+      "        /*02d0*/         {         IADD.X R59, R2, c[0x0][0x144] ;\n",
+      "        /*02d8*/                   LDG.E.64 R28, [R20+0x186080]         }\n",
+      "        /*02e8*/                   LDG.E.64 R10, [R58] ;\n",
+      "        /*02f0*/                   DEPBAR.LE SB5, 0x4 ;\n",
+      "        /*02f8*/                   DMUL R14, R4, 0.5 ;\n",
+      "        /*0308*/                   DFMA R12, R24, 0.5, R14 ;\n",
+      "        /*0310*/                   DMUL R22, R12, R12 ;\n",
+      "        /*0318*/                   DSETP.GEU.AND P2, PT, |R22|, c[0x2][0x0], PT ;\n",
+      "        /*0328*/                   F2F.F32.F64 R0, R22 ;\n",
+      "        /*0330*/                   DEPBAR.LE SB5, 0x2 ;\n",
+      "        /*0338*/                   DMUL R26, R6, 0.5 ;\n",
+      "        /*0348*/                   DMUL R14, R18, 0.5 ;\n",
+      "        /*0350*/              @!P2 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*0358*/                   DFMA R26, R8, 0.5, R26 ;\n",
+      "        /*0368*/         {         FMUL.FTZ R0, R0, 1 ;\n",
+      "        /*0370*/                   DEPBAR.LE SB5, 0x1         }\n",
+      "        /*0378*/                   DFMA R14, R28, 0.5, R14 ;\n",
+      "        /*0388*/                   DMUL R16, R26, R26 ;\n",
+      "        /*0390*/                   F2F.F64.F32 R30, R0 ;\n",
+      "        /*0398*/                   DMUL R20, R14, R14 ;\n",
+      "        /*03a8*/                   DADD R30, R16, R30 ;\n",
+      "        /*03b0*/                   DADD R30, R20, R30 ;\n",
+      "        /*03b8*/                   DSETP.GEU.AND P2, PT, |R30|, c[0x2][0x0], PT ;\n",
+      "        /*03c8*/                   F2F.F32.F64 R0, R30 ;\n",
+      "        /*03d0*/              @!P2 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*03d8*/                   IADD32I R30, R0, 0x1800000 ;\n",
+      "        /*03e8*/                   LOP32I.AND R30, R30, 0x7f800000 ;\n",
+      "        /*03f0*/                   ISETP.GT.U32.AND P2, PT, R30, c[0x2][0x8], PT ;\n",
+      "        /*03f8*/               @P2 BRA `(.L_3) ;\n",
+      "        /*0408*/                   CAL `($kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath) ;\n",
+      "        /*0410*/                   SYNC                                                (*\"TARGET= .L_2 \"*);\n",
+      ".L_3:\n",
+      "        /*0418*/                   MUFU.RCP R30, R0 ;\n",
+      "        /*0428*/                   FFMA R31, R0, R30, c[0x2][0xc] ;\n",
+      "        /*0430*/                   FADD.FTZ R31, -R31, -RZ ;\n",
+      "        /*0438*/         {         FFMA R64, R30, R31, R30 ;\n",
+      "        /*0448*/                   SYNC                                                (*\"TARGET= .L_2 \"*)        }\n",
+      ".L_2:\n",
+      "        /*0450*/         {         IADD32I R68.CC, R60.reuse, 0x2000000 ;\n",
+      "        /*0458*/                   LDG.E.64 R38, [R60+0x410]         }\n",
+      "        /*0468*/         {         IADD.X R69, RZ, R61.reuse ;\n",
+      "        /*0470*/                   LDG.E.64 R34, [R60+-0x410]         }\n",
+      "        /*0478*/         {         DADD R56, R28, R28 ;\n",
+      "        /*0488*/                   LDG.E.64 R44, [R60+-0x418]         }\n",
+      "        /*0490*/         {         DADD R18, R18, R18 ;\n",
+      "        /*0498*/                   LDG.E.64 R52, [R68+0x186490]         }\n",
+      "        /*04a8*/         {         IADD32I R42.CC, R60, 0x1000000 ;\n",
+      "        /*04b0*/                   LDG.E.64 R36, [R68+0x1a70a0]         }\n",
+      "        /*04b8*/         {         DADD R18, R56, -R18 ;\n",
+      "        /*04c8*/                   LDG.E.64 R50, [R68+0x185c70]         }\n",
+      "        /*04d0*/         {         IADD.X R43, RZ, R61 ;\n",
+      "        /*04d8*/                   LDG.E.64 R46, [R68+0x165060]         }\n",
+      "        /*04e8*/         {         DSETP.GEU.AND P2, PT, |R12|, c[0x2][0x0], PT ;\n",
+      "        /*04f0*/                   LDG.E.64 R30, [R68+0x185c68]         }\n",
+      "        /*04f8*/         {         DSETP.GEU.AND P3, PT, |R26|, c[0x2][0x0], PT ;\n",
+      "        /*0508*/                   LDG.E.64 R40, [R68+0x165058]         }\n",
+      "        /*0510*/         {         F2F.F32.F64 R0, R12 ;\n",
+      "        /*0518*/                   LDG.E.64 R48, [R68+0x186488]         }\n",
+      "        /*0528*/         {         F2F.F32.F64 R26, R26 ;\n",
+      "        /*0530*/                   LDG.E.64 R28, [R60+0x21020]         }\n",
+      "        /*0538*/         {         FMUL.FTZ R64, R64, 1 ;\n",
+      "        /*0548*/                   LDG.E.64 R56, [R60+-0x21020]         }\n",
+      "        /*0550*/                   LDG.E.64 R32, [R68+0x1a7098] ;\n",
+      "        /*0558*/                   LDG.E.64 R54, [R42+0xc3450] ;\n",
+      "        /*0568*/                   LDG.E.64 R66, [R42+0xc2c30] ;\n",
+      "        /*0570*/                   LDG.E.64 R58, [R58+-0x8] ;\n",
+      "        /*0578*/         {    @!P2 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*0588*/                   SSY `(.L_4)         }\n",
+      "        /*0590*/              @!P3 FMUL R26, R26, 1.175494350822287508e-38 ;\n",
+      "        /*0598*/                   FMUL.FTZ R0, R0, 1 ;\n",
+      "        /*05a8*/                   FMUL.FTZ R26, R26, 1 ;\n",
+      "        /*05b0*/                   F2F.F64.F32 R68, R26 ;\n",
+      "        /*05b8*/                   DEPBAR.LE SB5, 0x8 ;\n",
+      "        /*05c8*/                   DMUL R52, R52, 0.5 ;\n",
+      "        /*05d0*/                   DMUL R36, R36, 0.5 ;\n",
+      "        /*05d8*/         {         DFMA R52, R50, -0.5, R52 ;\n",
+      "        /*05e8*/                   DEPBAR.LE SB5, 0x6         }\n",
+      "        /*05f0*/         {         DFMA R70, R46, -0.5, R36 ;\n",
+      "        /*05f8*/                   LDG.E.64 R50, [R60+-0x21028]         }\n",
+      "        /*0608*/                   LDG.E.64 R46, [R42+0xc2c28] ;\n",
+      "        /*0610*/                   LDG.E.64 R36, [R42+0xe4060] ;\n",
+      "        /*0618*/                   DFMA R30, R30, -0.5, R52 ;\n",
+      "        /*0628*/         {         F2F.F64.F32 R52, R0 ;\n",
+      "        /*0630*/                   DEPBAR.LE SB5, 0x8         }\n",
+      "        /*0638*/                   DFMA R70, R40, -0.5, R70 ;\n",
+      "        /*0648*/                   LDG.E.64 R40, [R42+0xa2020] ;\n",
+      "        /*0650*/                   DMUL R26, R14, R52 ;\n",
+      "        /*0658*/         {         DMUL R52, R38, 0.5 ;\n",
+      "        /*0668*/                   DEPBAR.LE SB5, 0x8         }\n",
+      "        /*0670*/                   DFMA R48, R48, 0.5, R30 ;\n",
+      "        /*0678*/                   LDG.E.64 R30, [R60+0x408] ;\n",
+      "        /*0688*/                   LDG.E.64 R60, [R60+0x21018] ;\n",
+      "        /*0690*/                   DFMA R38, R34, -0.5, R52 ;\n",
+      "        /*0698*/                   LDG.E.64 R34, [R42+0xa2018] ;\n",
+      "        /*06a8*/                   LDG.E.64 R52, [R42+0xe4058] ;\n",
+      "        /*06b0*/                   DFMA R44, R44, -0.5, R38 ;\n",
+      "        /*06b8*/                   LDG.E.64 R38, [R42+0xc3448] ;\n",
+      "        /*06c8*/                   DEPBAR.LE SB5, 0x9 ;\n",
+      "        /*06d0*/                   DMUL R28, R28, 0.5 ;\n",
+      "        /*06d8*/                   DFMA R32, R32, 0.5, R70 ;\n",
+      "        /*06e8*/                   DFMA R28, R56, -0.5, R28 ;\n",
+      "        /*06f0*/         {         IADD32I R42.CC, R62, 0x1000000 ;\n",
+      "        /*06f8*/                   LDG.E.64 R56, [R62]         }\n",
+      "        /*0708*/                   F2F.F64.F32 R70, R64 ;\n",
+      "        /*0710*/                   DEPBAR.LE SB5, 0x8 ;\n",
+      "        /*0718*/                   DMUL R64, R54, 0.5 ;\n",
+      "        /*0728*/                   IADD.X R43, RZ, R63 ;\n",
+      "        /*0730*/                   LDG.E.64 R54, [R42+0xc3040] ;\n",
+      "        /*0738*/                   DFMA R64, R66, -0.5, R64 ;\n",
+      "        /*0748*/                   DSETP.GEU.AND P2, PT, |R26|, c[0x2][0x0], PT ;\n",
+      "        /*0750*/                   F2F.F32.F64 R0, R26 ;\n",
+      "        /*0758*/                   DMUL R20, R20, R70 ;\n",
+      "        /*0768*/              @!P2 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*0770*/                   MUFU.SQRT R0, R0 ;\n",
+      "        /*0778*/                   DMUL R14, R14, R68 ;\n",
+      "        /*0788*/                   DSETP.GEU.AND P5, PT, |R20|, c[0x2][0x0], PT ;\n",
+      "        /*0790*/                   DMUL R22, R22, R70.reuse ;\n",
+      "        /*0798*/                   DMUL R16, R16, R70 ;\n",
+      "        /*07a8*/                   DEPBAR.LE SB5, 0x6 ;\n",
+      "        /*07b0*/                   DFMA R50, R50, -0.5, R28 ;\n",
+      "        /*07b8*/                   DADD R28, R8, R8 ;\n",
+      "        /*07c8*/                   DFMA R64, R46, -0.5, R64 ;\n",
+      "        /*07d0*/                   DADD R46, R6, R6 ;\n",
+      "        /*07d8*/                   DMUL R66, R36, 0.5 ;\n",
+      "        /*07e8*/                   DADD R36, R24, R24 ;\n",
+      "        /*07f0*/                   DADD R28, R28, -R46 ;\n",
+      "        /*07f8*/         {         DADD R46, R4, R4 ;\n",
+      "        /*0808*/                   DEPBAR.LE SB5, 0x3         }\n",
+      "        /*0810*/                   DFMA R40, R40, -0.5, R66 ;\n",
+      "        /*0818*/                   DFMA R30, R30, 0.5, R44 ;\n",
+      "        /*0828*/                   DADD R36, R36, -R46 ;\n",
+      "        /*0830*/                   DMUL R44, R28, R28 ;\n",
+      "        /*0838*/                   DFMA R34, R34, -0.5, R40 ;\n",
+      "        /*0848*/                   DMUL R40, R36, R36 ;\n",
+      "        /*0850*/                   DSETP.GEU.AND P4, PT, |R44|, c[0x2][0x0], PT ;\n",
+      "        /*0858*/                   F2F.F32.F64 R47, R44 ;\n",
+      "        /*0868*/                   DSETP.GEU.AND P3, PT, |R40|, c[0x2][0x0], PT ;\n",
+      "        /*0870*/                   DMUL R44, R48, R48 ;\n",
+      "        /*0878*/                   F2F.F32.F64 R46, R40 ;\n",
+      "        /*0888*/              @!P4 FMUL R47, R47, 1.175494350822287508e-38 ;\n",
+      "        /*0890*/                   DFMA R26, R18, R18, R44 ;\n",
+      "        /*0898*/                   FMUL.FTZ R40, R47, 1 ;\n",
+      "        /*08a8*/         {    @!P3 FMUL R46, R46, 1.175494350822287508e-38 ;\n",
+      "        /*08b0*/                   DEPBAR.LE SB5, 0x2         }\n",
+      "        /*08b8*/                   DFMA R38, R38, 0.5, R64 ;\n",
+      "        /*08c8*/                   F2F.F64.F32 R40, R40 ;\n",
+      "        /*08d0*/                   FMUL.FTZ R44, R46, 1 ;\n",
+      "        /*08d8*/                   DFMA R26, R32, R32, R26 ;\n",
+      "        /*08e8*/                   DFMA R34, R52, 0.5, R34 ;\n",
+      "        /*08f0*/                   F2F.F64.F32 R44, R44 ;\n",
+      "        /*08f8*/                   DFMA R52, R38, R38, R40 ;\n",
+      "        /*0908*/                   DSETP.GEU.AND P2, PT, |R26|, c[0x2][0x0], PT ;\n",
+      "        /*0910*/                   FMUL.FTZ R46, R0, 1 ;\n",
+      "        /*0918*/                   DFMA R50, R60, 0.5, R50 ;\n",
+      "        /*0928*/                   F2F.F32.F64 R40, R26 ;\n",
+      "        /*0930*/                   DFMA R60, R30, R30, R44 ;\n",
+      "        /*0938*/                   DFMA R44, R34, R34, R52 ;\n",
+      "        /*0948*/                   F2F.F64.F32 R26, R46 ;\n",
+      "        /*0950*/                   MOV32I R64, 0x349e35fd ;\n",
+      "        /*0958*/                   F2F.F32.F64 R46, R20 ;\n",
+      "        /*0968*/                   MOV32I R65, 0x401d14dc ;\n",
+      "        /*0970*/                   DSETP.GEU.AND P4, PT, |R14|, c[0x2][0x0], PT ;\n",
+      "        /*0978*/                   DFMA R52, R50, R50, R60 ;\n",
+      "        /*0988*/                   DSETP.GEU.AND P3, PT, |R44|, c[0x2][0x0], PT ;\n",
+      "        /*0990*/              @!P2 FMUL R40, R40, 1.175494350822287508e-38 ;\n",
+      "        /*0998*/         {         DSETP.GEU.AND P2, PT, R26, c[0x2][0x18], PT ;\n",
+      "        /*09a8*/                   MUFU.RCP R47, R40         }\n",
+      "        /*09b0*/         {         DFMA R66, R10, c[0x2][0x10], R64 ;\n",
+      "        /*09b8*/                   MUFU.SQRT R41, R40         }\n",
+      "        /*09c8*/                   F2F.F32.F64 R60, R14 ;\n",
+      "        /*09d0*/                   F2F.F32.F64 R61, R44 ;\n",
+      "        /*09d8*/                   DMUL R26, R56, 16 ;\n",
+      "        /*09e8*/              @!P5 FMUL R46, R46, 1.175494350822287508e-38 ;\n",
+      "        /*09f0*/                   DSETP.GEU.AND P5, PT, |R52|, c[0x2][0x0], PT ;\n",
+      "        /*09f8*/                   F2F.F32.F64 R52, R52 ;\n",
+      "        /*0a08*/                   DMUL R14, R54, 16 ;\n",
+      "        /*0a10*/                   DFMA R26, R24, -16, R26 ;\n",
+      "        /*0a18*/              @!P4 FMUL R60, R60, 1.175494350822287508e-38 ;\n",
+      "        /*0a28*/         {         DMUL R24, R20, R66 ;\n",
+      "        /*0a30*/                   MUFU.SQRT R53, R60         }\n",
+      "        /*0a38*/              @!P3 FMUL R61, R61, 1.175494350822287508e-38 ;\n",
+      "        /*0a48*/         {         FMUL.FTZ R46, R46, R47 ;\n",
+      "        /*0a50*/                   MUFU.SQRT R54, R61         }\n",
+      "        /*0a58*/                   FMUL.FTZ R44, R41, 1 ;\n",
+      "        /*0a68*/                   DFMA R14, R8, -16, R14 ;\n",
+      "        /*0a70*/                   MOV R40, RZ ;\n",
+      "        /*0a78*/                   DFMA R24, R58, c[0x2][0x10], R24 ;\n",
+      "        /*0a88*/                   F2F.F64.F32 R46, R46 ;\n",
+      "        /*0a90*/                   F2F.F64.F32 R44, R44 ;\n",
+      "        /*0a98*/                   MOV R41, RZ ;\n",
+      "        /*0aa8*/                   MOV R56, RZ ;\n",
+      "        /*0ab0*/                   MOV R57, RZ ;\n",
+      "        /*0ab8*/         {    @!P5 FMUL R52, R52, 1.175494350822287508e-38 ;\n",
+      "        /*0ac8*/              @!P2 SYNC                                                (*\"TARGET= .L_4 \"*)        }\n",
+      "        /*0ad0*/                   MUFU.SQRT R55, R52 ;\n",
+      "        /*0ad8*/                   FMUL.FTZ R66, R55, 1 ;\n",
+      "        /*0ae8*/                   F2F.F64.F32 R66, R66 ;\n",
+      "        /*0af0*/                   DMUL R66, R44, R66 ;\n",
+      "        /*0af8*/                   DSETP.GEU.AND P2, PT, R66, c[0x2][0x18], PT ;\n",
+      "        /*0b08*/              @!P2 SYNC                                                (*\"TARGET= .L_4 \"*);\n",
+      "        /*0b10*/         {         DMUL R12, R12, R46 ;\n",
+      "        /*0b18*/                   LDG.E.64 R56, [R62+-0x8]         }\n",
+      "        /*0b28*/                   MOV32I R60, 0x834fff9c ;\n",
+      "        /*0b30*/                   MOV32I R61, 0x3ff61152 ;\n",
+      "        /*0b38*/                   FMUL.FTZ R0, R0, R55 ;\n",
+      "        /*0b48*/                   MUFU.RCP R0, R0 ;\n",
+      "        /*0b50*/                   DMUL R66, R22, R12 ;\n",
+      "        /*0b58*/                   DFMA R12, R10, c[0x2][0x20], R60 ;\n",
+      "        /*0b68*/                   DMUL R12, R12, R66 ;\n",
+      "        /*0b70*/                   DFMA R12, R58, c[0x2][0x20], -R12 ;\n",
+      "        /*0b78*/                   DFMA R26, R24, R26, R12 ;\n",
+      "        /*0b88*/                   DFMA R26, R4, 16.NEG, R26 ;\n",
+      "        /*0b90*/                   DMUL R56, R56, 16 ;\n",
+      "        /*0b98*/                   DMUL R56, R18, R56 ;\n",
+      "        /*0ba8*/                   DFMA R26, R36, R56, R26 ;\n",
+      "        /*0bb0*/                   DFMA R26, R30, R48, R26 ;\n",
+      "        /*0bb8*/                   DFMA R26, R50, R32, R26 ;\n",
+      "        /*0bc8*/                   DSETP.GEU.AND P2, PT, |R26|, c[0x2][0x0], PT ;\n",
+      "        /*0bd0*/                   F2F.F32.F64 R26, R26 ;\n",
+      "        /*0bd8*/              @!P2 FMUL R26, R26, 1.175494350822287508e-38 ;\n",
+      "        /*0be8*/                   FMUL.FTZ R26, R26, R0 ;\n",
+      "        /*0bf0*/         {         F2F.F64.F32 R56, R26 ;\n",
+      "        /*0bf8*/                   SYNC                                                (*\"TARGET= .L_4 \"*)        }\n",
+      ".L_4:\n",
+      "        /*0c08*/         {         FMUL.FTZ R26, R54, 1 ;\n",
+      "        /*0c10*/                   SSY `(.L_5)         }\n",
+      "        /*0c18*/                   FMUL.FTZ R4, R53, 1 ;\n",
+      "        /*0c28*/                   F2F.F64.F32 R26, R26 ;\n",
+      "        /*0c30*/                   F2F.F64.F32 R4, R4 ;\n",
+      "        /*0c38*/                   DMUL R26, R44, R26 ;\n",
+      "        /*0c48*/                   DSETP.GEU.AND P2, PT, R26, c[0x2][0x18], PT ;\n",
+      "        /*0c50*/                   DSETP.LT.OR P2, PT, R4, c[0x2][0x18], !P2 ;\n",
+      "        /*0c58*/               @P2 SYNC                                                (*\"TARGET= .L_5 \"*);\n",
+      "        /*0c68*/         {         DMUL R26, R6, 0.5 ;\n",
+      "        /*0c70*/                   LDG.E.64 R42, [R42+0xc3038]         }\n",
+      "        /*0c78*/                   MOV32I R4, 0x8cfbbca1 ;\n",
+      "        /*0c88*/                   MOV32I R5, 0x3fef2cb9 ;\n",
+      "        /*0c90*/                   FMUL.FTZ R53, R53, R54 ;\n",
+      "        /*0c98*/                   MUFU.RCP R53, R53 ;\n",
+      "        /*0ca8*/                   DFMA R26, R8, 0.5, R26 ;\n",
+      "        /*0cb0*/                   DFMA R4, R10, c[0x2][0x28], R4 ;\n",
+      "        /*0cb8*/                   DMUL R26, R26, R46 ;\n",
+      "        /*0cc8*/                   DMUL R26, R16, R26 ;\n",
+      "        /*0cd0*/                   DMUL R4, R4, R26 ;\n",
+      "        /*0cd8*/                   DFMA R4, R58, c[0x2][0x28], -R4 ;\n",
+      "        /*0ce8*/                   DFMA R4, R24, R14, R4 ;\n",
+      "        /*0cf0*/                   DFMA R6, R6, 16.NEG, R4 ;\n",
+      "        /*0cf8*/                   DMUL R8, R42, 16 ;\n",
+      "        /*0d08*/                   DMUL R8, R18, R8 ;\n",
+      "        /*0d10*/                   DFMA R6, R28, R8, R6 ;\n",
+      "        /*0d18*/                   DFMA R6, R48, R38, R6 ;\n",
+      "        /*0d28*/                   DFMA R6, R32, R34, R6 ;\n",
+      "        /*0d30*/                   DSETP.GEU.AND P2, PT, |R6|, c[0x2][0x0], PT ;\n",
+      "        /*0d38*/                   F2F.F32.F64 R6, R6 ;\n",
+      "        /*0d48*/              @!P2 FMUL R6, R6, 1.175494350822287508e-38 ;\n",
+      "        /*0d50*/                   FMUL.FTZ R6, R6, R53 ;\n",
+      "        /*0d58*/                   F2F.F64.F32 R40, R6 ;\n",
+      "        /*0d68*/         {         DMUL R40, R40, c[0x2][0x30] ;\n",
+      "        /*0d70*/                   SYNC                                                (*\"TARGET= .L_5 \"*)        }\n",
+      ".L_5:\n",
+      "        /*0d78*/                   DMUL R18, R18, R56 ;\n",
+      "        /*0d88*/                   DADD R4, R58, R58 ;\n",
+      "        /*0d90*/                   DFMA R40, R18, c[0x2][0x38], R40 ;\n",
+      "        /*0d98*/                   DMUL R4, R22, R4 ;\n",
+      "        /*0da8*/                   DFMA R10, R10, 2, R40 ;\n",
+      "        /*0db0*/                   DFMA R10, R4, c[0x2][0x40], R10 ;\n",
+      "        /*0db8*/                   IADD R4.CC, R3, c[0x0][0x148] ;\n",
+      "        /*0dc8*/                   DFMA R10, R16, c[0x2][0x48], R10 ;\n",
+      "        /*0dd0*/                   IADD.X R5, R2, c[0x0][0x14c] ;\n",
+      "        /*0dd8*/                   DFMA R10, R20, c[0x2][0x50], R10 ;\n",
+      "        /*0de8*/                   STG.E.64 [R4], R10 ;\n",
+      "        /*0df0*/                   SYNC                                                (*\"TARGET= .L_1 \"*);\n",
+      ".L_1:\n",
+      "        /*0df8*/                   S2R R0, SR_TID.X ;\n",
+      "        /*0e08*/                   SSY `(.L_6) ;\n",
+      "        /*0e10*/                   S2R R5, SR_CTAID.X ;\n",
+      "        /*0e18*/                   XMAD R0, R5.reuse, c[0x0] [0x8], R0 ;\n",
+      "        /*0e28*/                   XMAD.MRG R6, R5.reuse, c[0x0] [0x8].H1, RZ ;\n",
+      "        /*0e30*/                   XMAD.PSL.CBCC R0, R5.H1, R6.H1, R0 ;\n",
+      "        /*0e38*/                   IADD32I R50, R0, 0x1 ;\n",
+      "        /*0e48*/                   ISETP.LT.U32.AND P1, PT, R50, 0x81, !P1 ;\n",
+      "        /*0e50*/              @!P1 SYNC                                                (*\"TARGET= .L_6 \"*);\n",
+      "        /*0e58*/         {         IADD R60.CC, R3.reuse, c[0x0][0x158] ;\n",
+      "        /*0e68*/                   LDG.E.64 R42, [R62]         }\n",
+      "        /*0e70*/         {         IADD.X R61, R2, c[0x0][0x15c] ;\n",
+      "        /*0e78*/                   SSY `(.L_7)         }\n",
+      "        /*0e88*/         {         IADD32I R28.CC, R60.reuse, 0x1000000 ;\n",
+      "        /*0e90*/                   LDG.E.64 R4, [R60+-0x410]         }\n",
+      "        /*0e98*/         {         IADD.X R29, RZ, R61.reuse ;\n",
+      "        /*0ea8*/                   LDG.E.64 R40, [R60]         }\n",
+      "        /*0eb0*/         {         IADD32I R30.CC, R60, 0x2000000 ;\n",
+      "        /*0eb8*/                   LDG.E.64 R52, [R60+0x21020]         }\n",
+      "        /*0ec8*/         {         IADD.X R31, RZ, R61 ;\n",
+      "        /*0ed0*/                   LDG.E.64 R6, [R28+0xc2c30]         }\n",
+      "        /*0ed8*/         {         IADD32I R38.CC, R62, 0x1000000 ;\n",
+      "        /*0ee8*/                   LDG.E.64 R10, [R28+0xc3040]         }\n",
+      "        /*0ef0*/         {         IADD.X R39, RZ, R63 ;\n",
+      "        /*0ef8*/                   LDG.E.64 R8, [R30+0x185c70]         }\n",
+      "        /*0f08*/         {         IADD R58.CC, R3, c[0x0][0x140] ;\n",
+      "        /*0f10*/                   LDG.E.64 R44, [R30+0x186080]         }\n",
+      "        /*0f18*/         {         IADD.X R59, R2, c[0x0][0x144] ;\n",
+      "        /*0f28*/                   LDG.E.64 R48, [R30+0x1a70a0]         }\n",
+      "        /*0f30*/                   LDG.E.64 R46, [R28+0xe4060] ;\n",
+      "        /*0f38*/                   LDG.E.64 R34, [R30+0x165060] ;\n",
+      "        /*0f48*/                   LDG.E.64 R38, [R38+0xc3040] ;\n",
+      "        /*0f50*/                   LDG.E.64 R32, [R60+-0x21020] ;\n",
+      "        /*0f58*/                   LDG.E.64 R36, [R28+0xa2020] ;\n",
+      "        /*0f68*/                   LDG.E.64 R12, [R58] ;\n",
+      "        /*0f70*/                   LDG.E.64 R14, [R60+-0x418] ;\n",
+      "        /*0f78*/                   LDG.E.64 R18, [R28+0xc2c28] ;\n",
+      "        /*0f88*/                   LDG.E.64 R16, [R30+0x185c68] ;\n",
+      "        /*0f90*/                   DEPBAR.LE SB5, 0xc ;\n",
+      "        /*0f98*/                   DMUL R22, R4, 0.5 ;\n",
+      "        /*0fa8*/                   DFMA R20, R40, 0.5, R22 ;\n",
+      "        /*0fb0*/                   DMUL R22, R20, R20 ;\n",
+      "        /*0fb8*/                   DSETP.GEU.AND P1, PT, |R22|, c[0x2][0x0], PT ;\n",
+      "        /*0fc8*/                   F2F.F32.F64 R0, R22 ;\n",
+      "        /*0fd0*/                   DEPBAR.LE SB5, 0xa ;\n",
+      "        /*0fd8*/                   DMUL R24, R6, 0.5 ;\n",
+      "        /*0fe8*/                   DMUL R26, R8, 0.5 ;\n",
+      "        /*0ff0*/              @!P1 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*0ff8*/                   DFMA R24, R10, 0.5, R24 ;\n",
+      "        /*1008*/         {         FMUL.FTZ R0, R0, 1 ;\n",
+      "        /*1010*/                   DEPBAR.LE SB5, 0x9         }\n",
+      "        /*1018*/                   DFMA R26, R44, 0.5, R26 ;\n",
+      "        /*1028*/                   DMUL R28, R24, R24 ;\n",
+      "        /*1030*/                   F2F.F64.F32 R54, R0 ;\n",
+      "        /*1038*/                   DMUL R30, R26, R26 ;\n",
+      "        /*1048*/                   DADD R56, R28, R54 ;\n",
+      "        /*1050*/                   DADD R56, R30, R56 ;\n",
+      "        /*1058*/                   DSETP.GEU.AND P1, PT, |R56|, c[0x2][0x0], PT ;\n",
+      "        /*1068*/                   F2F.F32.F64 R0, R56 ;\n",
+      "        /*1070*/              @!P1 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*1078*/                   IADD32I R51, R0, 0x1800000 ;\n",
+      "        /*1088*/         {         DMUL R54, R52, 0.5 ;\n",
+      "        /*1090*/                   DEPBAR.LE SB5, 0x7         }\n",
+      "        /*1098*/                   DMUL R48, R48, 0.5 ;\n",
+      "        /*10a8*/                   LOP32I.AND R51, R51, 0x7f800000 ;\n",
+      "        /*10b0*/                   DMUL R46, R46, 0.5 ;\n",
+      "        /*10b8*/         {         DMUL R42, R42, 16 ;\n",
+      "        /*10c8*/                   DEPBAR.LE SB5, 0x4         }\n",
+      "        /*10d0*/                   ISETP.GT.U32.AND P1, PT, R51, c[0x2][0x8], PT ;\n",
+      "        /*10d8*/                   DFMA R34, R34, -0.5, R48 ;\n",
+      "        /*10e8*/                   DFMA R32, R32, -0.5, R54 ;\n",
+      "        /*10f0*/                   DMUL R48, R38, 16 ;\n",
+      "        /*10f8*/                   MOV32I R54, 0x349e35fd ;\n",
+      "        /*1108*/                   MOV32I R55, 0x401d14dc ;\n",
+      "        /*1110*/                   DFMA R36, R36, -0.5, R46 ;\n",
+      "        /*1118*/                   DFMA R38, R40.reuse, -16, R42 ;\n",
+      "        /*1128*/                   DADD R44, R44, R44 ;\n",
+      "        /*1130*/                   DADD R40, R40, R40 ;\n",
+      "        /*1138*/                   DADD R46, R10.reuse, R10 ;\n",
+      "        /*1148*/                   DFMA R48, R10, -16, R48 ;\n",
+      "        /*1150*/         {         DFMA R42, R12, c[0x2][0x10], R54 ;\n",
+      "        /*1158*/               @P1 BRA `(.L_8)         }\n",
+      "        /*1168*/                   CAL `($kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath) ;\n",
+      "        /*1170*/                   SYNC                                                (*\"TARGET= .L_7 \"*);\n",
+      ".L_8:\n",
+      "        /*1178*/                   MUFU.RCP R51, R0 ;\n",
+      "        /*1188*/                   FFMA R52, R0, R51, c[0x2][0xc] ;\n",
+      "        /*1190*/                   FADD.FTZ R52, -R52, -RZ ;\n",
+      "        /*1198*/         {         FFMA R64, R51, R52, R51 ;\n",
+      "        /*11a8*/                   SYNC                                                (*\"TARGET= .L_7 \"*)        }\n",
+      ".L_7:\n",
+      "        /*11b0*/         {         IADD32I R52.CC, R60, 0x2000000 ;\n",
+      "        /*11b8*/                   LDG.E.64 R58, [R58+-0x410]         }\n",
+      "        /*11c8*/         {         IADD.X R53, RZ, R61 ;\n",
+      "        /*11d0*/                   SSY `(.L_9)         }\n",
+      "        /*11d8*/         {         DADD R70, R8, R8 ;\n",
+      "        /*11e8*/                   LDG.E.64 R68, [R52+0x186088]         }\n",
+      "        /*11f0*/         {         DADD R44, R44, -R70 ;\n",
+      "        /*11f8*/                   LDG.E.64 R66, [R52+0x186078]         }\n",
+      "        /*1208*/         {         DSETP.GEU.AND P1, PT, |R20|, c[0x2][0x0], PT ;\n",
+      "        /*1210*/                   LDG.E.64 R56, [R52+0x164c50]         }\n",
+      "        /*1218*/         {         F2F.F32.F64 R0, R20 ;\n",
+      "        /*1228*/                   LDG.E.64 R54, [R52+0x185c78]         }\n",
+      "        /*1230*/         {         DSETP.GEU.AND P3, PT, |R24|, c[0x2][0x0], PT ;\n",
+      "        /*1238*/                   LDG.E.64 R8, [R60+0x8]         }\n",
+      "        /*1248*/         {         FMUL.FTZ R64, R64, 1 ;\n",
+      "        /*1250*/                   LDG.E.64 R52, [R52+0x1a6c90]         }\n",
+      "        /*1258*/                   DEPBAR.LE SB5, 0x2 ;\n",
+      "        /*1268*/                   DMUL R68, R68, 0.5 ;\n",
+      "        /*1270*/                   DFMA R66, R66, -0.5, R68 ;\n",
+      "        /*1278*/                   DFMA R68, R16, -0.5, R66 ;\n",
+      "        /*1288*/                   LDG.E.64 R16, [R60+-0x8] ;\n",
+      "        /*1290*/                   DEPBAR.LE SB5, 0x2 ;\n",
+      "        /*1298*/                   DFMA R66, R56, -0.5, R34 ;\n",
+      "        /*12a8*/                   IADD32I R34.CC, R60, 0x1000000 ;\n",
+      "        /*12b0*/                   IADD.X R35, RZ, R61 ;\n",
+      "        /*12b8*/                   LDG.E.64 R56, [R34+0xc3048] ;\n",
+      "        /*12c8*/                   DEPBAR.LE SB5, 0x2 ;\n",
+      "        /*12d0*/                   DFMA R52, R52, 0.5, R66 ;\n",
+      "        /*12d8*/                   LDG.E.64 R66, [R34+0xc3038] ;\n",
+      "        /*12e8*/                   DFMA R54, R54, 0.5, R68 ;\n",
+      "        /*12f0*/                   LDG.E.64 R68, [R60+-0x408] ;\n",
+      "        /*12f8*/                   DMUL R70, R8, 0.5 ;\n",
+      "        /*1308*/                   LDG.E.64 R8, [R34+0xc2c38] ;\n",
+      "        /*1310*/                   DEPBAR.LE SB5, 0x4 ;\n",
+      "        /*1318*/                   DFMA R16, R16, -0.5, R70 ;\n",
+      "        /*1328*/                   DFMA R70, R14, -0.5, R16 ;\n",
+      "        /*1330*/                   LDG.E.64 R14, [R60+-0x21430] ;\n",
+      "        /*1338*/                   DEPBAR.LE SB5, 0x3 ;\n",
+      "        /*1348*/                   DMUL R16, R56, 0.5 ;\n",
+      "        /*1350*/                   LDG.E.64 R56, [R34+0xe3c50] ;\n",
+      "        /*1358*/                   DFMA R16, R66, -0.5, R16 ;\n",
+      "        /*1368*/                   LDG.E.64 R66, [R34+0xa1c10] ;\n",
+      "        /*1370*/                   DEPBAR.LE SB5, 0x3 ;\n",
+      "        /*1378*/                   DFMA R68, R68, 0.5, R70 ;\n",
+      "        /*1388*/                   LDG.E.64 R70, [R60+0x20c10] ;\n",
+      "        /*1390*/                   DFMA R18, R18, -0.5, R16 ;\n",
+      "        /*1398*/                   DADD R16, R6, R6 ;\n",
+      "        /*13a8*/              @!P1 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*13b0*/                   DADD R46, R46, -R16 ;\n",
+      "        /*13b8*/                   FMUL.FTZ R0, R0, 1 ;\n",
+      "        /*13c8*/         {         DMUL R16, R46, R46 ;\n",
+      "        /*13d0*/                   DEPBAR.LE SB5, 0x3         }\n",
+      "        /*13d8*/                   DFMA R8, R8, 0.5, R18 ;\n",
+      "        /*13e8*/                   DSETP.GEU.AND P1, PT, |R16|, c[0x2][0x0], PT ;\n",
+      "        /*13f0*/                   F2F.F32.F64 R51, R16 ;\n",
+      "        /*13f8*/                   DMUL R34, R54, R54 ;\n",
+      "        /*1408*/                   F2F.F32.F64 R24, R24 ;\n",
+      "        /*1410*/              @!P1 FMUL R51, R51, 1.175494350822287508e-38 ;\n",
+      "        /*1418*/              @!P3 FMUL R24, R24, 1.175494350822287508e-38 ;\n",
+      "        /*1428*/                   FMUL.FTZ R24, R24, 1 ;\n",
+      "        /*1430*/                   DEPBAR.LE SB5, 0x2 ;\n",
+      "        /*1438*/                   DFMA R32, R14, -0.5, R32 ;\n",
+      "        /*1448*/                   DADD R14, R4, R4 ;\n",
+      "        /*1450*/         {         DADD R40, R40, -R14 ;\n",
+      "        /*1458*/                   DEPBAR.LE SB5, 0x1         }\n",
+      "        /*1468*/                   DFMA R14, R66, -0.5, R36 ;\n",
+      "        /*1470*/                   F2F.F64.F32 R36, R0 ;\n",
+      "        /*1478*/                   DMUL R18, R40, R40 ;\n",
+      "        /*1488*/                   DMUL R16, R26, R36 ;\n",
+      "        /*1490*/                   DSETP.GEU.AND P2, PT, |R18|, c[0x2][0x0], PT ;\n",
+      "        /*1498*/                   F2F.F32.F64 R0, R18 ;\n",
+      "        /*14a8*/                   DFMA R18, R44, R44, R34 ;\n",
+      "        /*14b0*/                   DSETP.GEU.AND P1, PT, |R16|, c[0x2][0x0], PT ;\n",
+      "        /*14b8*/                   F2F.F32.F64 R25, R16 ;\n",
+      "        /*14c8*/                   FMUL.FTZ R34, R51, 1 ;\n",
+      "        /*14d0*/                   F2F.F64.F32 R16, R64 ;\n",
+      "        /*14d8*/                   DFMA R18, R52, R52, R18 ;\n",
+      "        /*14e8*/              @!P2 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*14f0*/                   F2F.F64.F32 R34, R34 ;\n",
+      "        /*14f8*/                   DMUL R36, R30, R16 ;\n",
+      "        /*1508*/                   DFMA R14, R56, 0.5, R14 ;\n",
+      "        /*1510*/                   FMUL.FTZ R30, R0, 1 ;\n",
+      "        /*1518*/              @!P1 FMUL R25, R25, 1.175494350822287508e-38 ;\n",
+      "        /*1528*/         {         F2F.F64.F32 R56, R24 ;\n",
+      "        /*1530*/                   MUFU.SQRT R0, R25         }\n",
+      "        /*1538*/                   DSETP.GEU.AND P1, PT, |R18|, c[0x2][0x0], PT ;\n",
+      "        /*1548*/                   F2F.F32.F64 R24, R18 ;\n",
+      "        /*1550*/                   DFMA R60, R8, R8, R34 ;\n",
+      "        /*1558*/                   F2F.F64.F32 R18, R30 ;\n",
+      "        /*1568*/                   DFMA R70, R70, 0.5, R32 ;\n",
+      "        /*1570*/                   DSETP.GEU.AND P2, PT, |R36|, c[0x2][0x0], PT ;\n",
+      "        /*1578*/                   DMUL R26, R26, R56 ;\n",
+      "        /*1588*/                   DFMA R60, R14, R14, R60 ;\n",
+      "        /*1590*/                   DFMA R32, R68, R68, R18 ;\n",
+      "        /*1598*/                   FMUL.FTZ R18, R0, 1 ;\n",
+      "        /*15a8*/                   F2F.F32.F64 R34, R36 ;\n",
+      "        /*15b0*/                   DSETP.GEU.AND P3, PT, |R26|, c[0x2][0x0], PT ;\n",
+      "        /*15b8*/              @!P1 FMUL R24, R24, 1.175494350822287508e-38 ;\n",
+      "        /*15c8*/         {         DSETP.GEU.AND P4, PT, |R60|, c[0x2][0x0], PT ;\n",
+      "        /*15d0*/                   MUFU.RCP R35, R24         }\n",
+      "        /*15d8*/         {         F2F.F64.F32 R18, R18 ;\n",
+      "        /*15e8*/                   MUFU.SQRT R25, R24         }\n",
+      "        /*15f0*/                   DFMA R30, R70, R70, R32 ;\n",
+      "        /*15f8*/                   F2F.F32.F64 R27, R26 ;\n",
+      "        /*1608*/              @!P2 FMUL R34, R34, 1.175494350822287508e-38 ;\n",
+      "        /*1610*/                   F2F.F32.F64 R26, R60 ;\n",
+      "        /*1618*/                   DSETP.GEU.AND P2, PT, R18, c[0x2][0x18], PT ;\n",
+      "        /*1628*/                   DSETP.GEU.AND P1, PT, |R30|, c[0x2][0x0], PT ;\n",
+      "        /*1630*/                   DMUL R18, R58, c[0x2][0x10] ;\n",
+      "        /*1638*/              @!P3 FMUL R27, R27, 1.175494350822287508e-38 ;\n",
+      "        /*1648*/         {         FMUL.FTZ R32, R34, R35 ;\n",
+      "        /*1650*/                   MUFU.SQRT R27, R27         }\n",
+      "        /*1658*/              @!P4 FMUL R26, R26, 1.175494350822287508e-38 ;\n",
+      "        /*1668*/         {         F2F.F32.F64 R33, R30 ;\n",
+      "        /*1670*/                   MUFU.SQRT R26, R26         }\n",
+      "        /*1678*/                   FMUL.FTZ R25, R25, 1 ;\n",
+      "        /*1688*/                   DFMA R42, R42, R36, R18 ;\n",
+      "        /*1690*/                   DMUL R22, R22, R16.reuse ;\n",
+      "        /*1698*/                   F2F.F64.F32 R18, R32 ;\n",
+      "        /*16a8*/                   DMUL R16, R28, R16 ;\n",
+      "        /*16b0*/                   F2F.F64.F32 R24, R25 ;\n",
+      "        /*16b8*/                   MOV R28, RZ ;\n",
+      "        /*16c8*/                   MOV R29, RZ ;\n",
+      "        /*16d0*/                   MOV R30, RZ ;\n",
+      "        /*16d8*/                   MOV R31, RZ ;\n",
+      "        /*16e8*/         {    @!P1 FMUL R33, R33, 1.175494350822287508e-38 ;\n",
+      "        /*16f0*/              @!P2 SYNC                                                (*\"TARGET= .L_9 \"*)        }\n",
+      "        /*16f8*/                   MUFU.SQRT R33, R33 ;\n",
+      "        /*1708*/                   FMUL.FTZ R34, R33, 1 ;\n",
+      "        /*1710*/                   F2F.F64.F32 R34, R34 ;\n",
+      "        /*1718*/                   DMUL R34, R24, R34 ;\n",
+      "        /*1728*/                   DSETP.GEU.AND P1, PT, R34, c[0x2][0x18], PT ;\n",
+      "        /*1730*/              @!P1 SYNC                                                (*\"TARGET= .L_9 \"*);\n",
+      "        /*1738*/         {         DMUL R20, R20, R18 ;\n",
+      "        /*1748*/                   LDG.E.64 R30, [R62+-0x410]         }\n",
+      "        /*1750*/                   MOV32I R66, 0x834fff9c ;\n",
+      "        /*1758*/                   MOV32I R67, 0x3ff61152 ;\n",
+      "        /*1768*/                   FMUL.FTZ R0, R0, R33 ;\n",
+      "        /*1770*/                   MUFU.RCP R0, R0 ;\n",
+      "        /*1778*/                   DMUL R34, R22, R20 ;\n",
+      "        /*1788*/                   DFMA R20, R12, c[0x2][0x20], R66 ;\n",
+      "        /*1790*/                   DMUL R20, R20, R34 ;\n",
+      "        /*1798*/                   DFMA R20, R58, c[0x2][0x20], -R20 ;\n",
+      "        /*17a8*/                   DFMA R38, R38, R42, R20 ;\n",
+      "        /*17b0*/                   DFMA R38, R4, 16.NEG, R38 ;\n",
+      "        /*17b8*/                   DMUL R30, R30, 16 ;\n",
+      "        /*17c8*/                   DMUL R30, R44, R30 ;\n",
+      "        /*17d0*/                   DFMA R30, R40, R30, R38 ;\n",
+      "        /*17d8*/                   DFMA R30, R68, R54, R30 ;\n",
+      "        /*17e8*/                   DFMA R30, R70, R52, R30 ;\n",
+      "        /*17f0*/                   DSETP.GEU.AND P1, PT, |R30|, c[0x2][0x0], PT ;\n",
+      "        /*17f8*/                   F2F.F32.F64 R30, R30 ;\n",
+      "        /*1808*/              @!P1 FMUL R30, R30, 1.175494350822287508e-38 ;\n",
+      "        /*1810*/                   FMUL.FTZ R30, R30, R0 ;\n",
+      "        /*1818*/         {         F2F.F64.F32 R30, R30 ;\n",
+      "        /*1828*/                   SYNC                                                (*\"TARGET= .L_9 \"*)        }\n",
+      ".L_9:\n",
+      "        /*1830*/         {         FMUL.FTZ R34, R26, 1 ;\n",
+      "        /*1838*/                   SSY `(.L_10)         }\n",
+      "        /*1848*/                   FMUL.FTZ R4, R27, 1 ;\n",
+      "        /*1850*/                   F2F.F64.F32 R34, R34 ;\n",
+      "        /*1858*/                   F2F.F64.F32 R4, R4 ;\n",
+      "        /*1868*/                   DMUL R24, R24, R34 ;\n",
+      "        /*1870*/                   DSETP.GEU.AND P1, PT, R24, c[0x2][0x18], PT ;\n",
+      "        /*1878*/                   DSETP.LT.OR P1, PT, R4, c[0x2][0x18], !P1 ;\n",
+      "        /*1888*/               @P1 SYNC                                                (*\"TARGET= .L_10 \"*);\n",
+      "        /*1890*/                   IADD32I R20.CC, R62, 0x1000000 ;\n",
+      "        /*1898*/                   IADD.X R21, RZ, R63 ;\n",
+      "        /*18a8*/                   LDG.E.64 R20, [R20+0xc2c30] ;\n",
+      "        /*18b0*/                   DMUL R24, R6, 0.5 ;\n",
+      "        /*18b8*/                   FMUL.FTZ R26, R27, R26 ;\n",
+      "        /*18c8*/                   MUFU.RCP R26, R26 ;\n",
+      "        /*18d0*/                   DFMA R24, R10, 0.5, R24 ;\n",
+      "        /*18d8*/                   MOV32I R10, 0x8cfbbca1 ;\n",
+      "        /*18e8*/                   MOV32I R11, 0x3fef2cb9 ;\n",
+      "        /*18f0*/                   DMUL R18, R24, R18 ;\n",
+      "        /*18f8*/                   DFMA R4, R12, c[0x2][0x28], R10 ;\n",
+      "        /*1908*/                   DMUL R18, R16, R18 ;\n",
+      "        /*1910*/                   DMUL R4, R4, R18 ;\n",
+      "        /*1918*/                   DFMA R4, R58, c[0x2][0x28], -R4 ;\n",
+      "        /*1928*/                   DFMA R4, R48, R42, R4 ;\n",
+      "        /*1930*/                   DFMA R4, R6, 16.NEG, R4 ;\n",
+      "        /*1938*/                   DMUL R10, R20, 16 ;\n",
+      "        /*1948*/                   DMUL R10, R44, R10 ;\n",
+      "        /*1950*/                   DFMA R4, R46, R10, R4 ;\n",
+      "        /*1958*/                   DFMA R4, R54, R8, R4 ;\n",
+      "        /*1968*/                   DFMA R4, R52, R14, R4 ;\n",
+      "        /*1970*/                   DSETP.GEU.AND P1, PT, |R4|, c[0x2][0x0], PT ;\n",
+      "        /*1978*/                   F2F.F32.F64 R4, R4 ;\n",
+      "        /*1988*/              @!P1 FMUL R4, R4, 1.175494350822287508e-38 ;\n",
+      "        /*1990*/                   FMUL.FTZ R4, R4, R26 ;\n",
+      "        /*1998*/                   F2F.F64.F32 R28, R4 ;\n",
+      "        /*19a8*/         {         DMUL R28, R28, c[0x2][0x30] ;\n",
+      "        /*19b0*/                   SYNC                                                (*\"TARGET= .L_10 \"*)        }\n",
+      ".L_10:\n",
+      "        /*19b8*/                   DMUL R30, R44, R30 ;\n",
+      "        /*19c8*/                   DADD R4, R58, R58 ;\n",
+      "        /*19d0*/                   DFMA R30, R30, c[0x2][0x38], R28 ;\n",
+      "        /*19d8*/                   DMUL R22, R22, R4 ;\n",
+      "        /*19e8*/                   IADD R4.CC, R3, c[0x0][0x148] ;\n",
+      "        /*19f0*/                   DFMA R12, R12, 2, R30 ;\n",
+      "        /*19f8*/                   IADD.X R0, R2, c[0x0][0x14c] ;\n",
+      "        /*1a08*/                   IADD32I R4.CC, R4, 0x1000000 ;\n",
+      "        /*1a10*/                   DFMA R22, R22, c[0x2][0x40], R12 ;\n",
+      "        /*1a18*/                   IADD.X R5, RZ, R0 ;\n",
+      "        /*1a28*/                   DFMA R22, R16, c[0x2][0x48], R22 ;\n",
+      "        /*1a30*/                   DFMA R22, R36, c[0x2][0x50], R22 ;\n",
+      "        /*1a38*/                   STG.E.64 [R4+0xc3040], R22 ;\n",
+      "        /*1a48*/                   SYNC                                                (*\"TARGET= .L_6 \"*);\n",
+      ".L_6:\n",
+      "        /*1a50*/                   ISETP.LT.U32.AND P0, PT, R50, 0x81, !P0 ;\n",
+      "        /*1a58*/              @!P0 EXIT ;\n",
+      "        /*1a68*/         {         IADD R60.CC, R3, c[0x0][0x158] ;\n",
+      "        /*1a70*/                   SSY `(.L_11)         }\n",
+      "        /*1a78*/                   IADD.X R61, R2, c[0x0][0x15c] ;\n",
+      "        /*1a88*/                   LDG.E.64 R56, [R60+-0x21020] ;\n",
+      "        /*1a90*/                   LDG.E.64 R10, [R60] ;\n",
+      "        /*1a98*/         {         IADD32I R68.CC, R60, 0x1000000 ;\n",
+      "        /*1aa8*/                   LDG.E.64 R16, [R60+-0x410]         }\n",
+      "        /*1ab0*/                   IADD.X R69, RZ, R61 ;\n",
+      "        /*1ab8*/         {         IADD32I R66.CC, R60, 0x2000000 ;\n",
+      "        /*1ac8*/                   LDG.E.64 R54, [R68+0xa2020]         }\n",
+      "        /*1ad0*/         {         IADD.X R67, RZ, R61 ;\n",
+      "        /*1ad8*/                   LDG.E.64 R4, [R68+0xc3040]         }\n",
+      "        /*1ae8*/         {         IADD32I R20.CC, R62, 0x1000000 ;\n",
+      "        /*1af0*/                   LDG.E.64 R6, [R68+0xc3450]         }\n",
+      "        /*1af8*/         {         IADD.X R21, RZ, R63 ;\n",
+      "        /*1b08*/                   LDG.E.64 R52, [R66+0x165060]         }\n",
+      "        /*1b10*/         {         IADD R58.CC, R3, c[0x0][0x140] ;\n",
+      "        /*1b18*/                   LDG.E.64 R64, [R66+0x186080]         }\n",
+      "        /*1b28*/         {         IADD.X R59, R2, c[0x0][0x144] ;\n",
+      "        /*1b30*/                   LDG.E.64 R8, [R66+0x186490]         }\n",
+      "        /*1b38*/                   LDG.E.64 R36, [R68+0xc2c30] ;\n",
+      "        /*1b48*/                   LDG.E.64 R40, [R66+0x185c70] ;\n",
+      "        /*1b50*/                   LDG.E.64 R22, [R66+0x186088] ;\n",
+      "        /*1b58*/                   LDG.E.64 R18, [R68+0xc3048] ;\n",
+      "        /*1b68*/                   LDG.E.64 R34, [R66+0x186078] ;\n",
+      "        /*1b70*/                   LDG.E.64 R30, [R68+0xc3038] ;\n",
+      "        /*1b78*/                   LDG.E.64 R2, [R20+0xc3040] ;\n",
+      "        /*1b88*/                   LDG.E.64 R32, [R58] ;\n",
+      "        /*1b90*/                   LDG.E.64 R20, [R60+-0x21028] ;\n",
+      "        /*1b98*/                   DEPBAR.LE SB5, 0xb ;\n",
+      "        /*1ba8*/                   DMUL R12, R56, 0.5 ;\n",
+      "        /*1bb0*/         {         DFMA R50, R10, 0.5, R12 ;\n",
+      "        /*1bb8*/                   DEPBAR.LE SB5, 0xa         }\n",
+      "        /*1bc8*/         {         DMUL R46, R54, 0.5 ;\n",
+      "        /*1bd0*/                   LDG.E.64 R12, [R60+0x410]         }\n",
+      "        /*1bd8*/                   DMUL R48, R50, R50 ;\n",
+      "        /*1be8*/                   DSETP.GEU.AND P0, PT, |R48|, c[0x2][0x0], PT ;\n",
+      "        /*1bf0*/                   F2F.F32.F64 R0, R48 ;\n",
+      "        /*1bf8*/                   DEPBAR.LE SB5, 0xa ;\n",
+      "        /*1c08*/                   DMUL R26, R52, 0.5 ;\n",
+      "        /*1c10*/                   DFMA R46, R4, 0.5, R46 ;\n",
+      "        /*1c18*/              @!P0 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*1c28*/                   DMUL R6, R6, 0.5 ;\n",
+      "        /*1c30*/                   FMUL.FTZ R0, R0, 1 ;\n",
+      "        /*1c38*/         {         DMUL R42, R46, R46 ;\n",
+      "        /*1c48*/                   DEPBAR.LE SB5, 0x8         }\n",
+      "        /*1c50*/                   DFMA R44, R64, 0.5, R26 ;\n",
+      "        /*1c58*/         {         F2F.F64.F32 R24, R0 ;\n",
+      "        /*1c68*/                   LDG.E.64 R26, [R68+0xa1c10]         }\n",
+      "        /*1c70*/         {         DMUL R14, R8, 0.5 ;\n",
+      "        /*1c78*/                   DEPBAR.LE SB5, 0x6         }\n",
+      "        /*1c88*/         {         DFMA R36, R36, -0.5, R6 ;\n",
+      "        /*1c90*/                   LDG.E.64 R8, [R60+0x8]         }\n",
+      "        /*1c98*/         {         DMUL R38, R44, R44 ;\n",
+      "        /*1ca8*/                   LDG.E.64 R6, [R62]         }\n",
+      "        /*1cb0*/                   DADD R24, R42, R24 ;\n",
+      "        /*1cb8*/                   DFMA R40, R40, -0.5, R14 ;\n",
+      "        /*1cc8*/                   LDG.E.64 R14, [R60+-0x8] ;\n",
+      "        /*1cd0*/                   DEPBAR.LE SB5, 0x6 ;\n",
+      "        /*1cd8*/                   DMUL R28, R22, 0.5 ;\n",
+      "        /*1ce8*/         {         DMUL R18, R18, 0.5 ;\n",
+      "        /*1cf0*/                   LDG.E.64 R22, [R66+0x164c50]         }\n",
+      "        /*1cf8*/                   DADD R24, R38, R24 ;\n",
+      "        /*1d08*/         {         DFMA R34, R34, -0.5, R28 ;\n",
+      "        /*1d10*/                   DEPBAR.LE SB5, 0x6         }\n",
+      "        /*1d18*/         {         DFMA R30, R30, -0.5, R18 ;\n",
+      "        /*1d28*/                   LDG.E.64 R28, [R68+0xa2018]         }\n",
+      "        /*1d30*/         {         DSETP.GEU.AND P0, PT, |R24|, c[0x2][0x0], PT ;\n",
+      "        /*1d38*/                   LDG.E.64 R18, [R60+-0x21430]         }\n",
+      "        /*1d48*/                   F2F.F32.F64 R0, R24 ;\n",
+      "        /*1d50*/                   LDG.E.64 R24, [R66+0x165058] ;\n",
+      "        /*1d58*/              @!P0 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*1d68*/                   DEPBAR.LE SB5, 0x8 ;\n",
+      "        /*1d70*/         {         DMUL R2, R2, 16 ;\n",
+      "        /*1d78*/                   DEPBAR.LE SB5, 0x7         }\n",
+      "        /*1d88*/                   DMUL R70, R12, 0.5 ;\n",
+      "        /*1d90*/                   IADD32I R12, R0, 0x1800000 ;\n",
+      "        /*1d98*/                   LOP32I.AND R12, R12, 0x7f800000 ;\n",
+      "        /*1da8*/                   ISETP.GT.U32.AND P0, PT, R12, c[0x2][0x8], PT ;\n",
+      "        /*1db0*/                   DFMA R16, R16, -0.5, R70 ;\n",
+      "        /*1db8*/                   DEPBAR.LE SB5, 0x4 ;\n",
+      "        /*1dc8*/                   DMUL R8, R8, 0.5 ;\n",
+      "        /*1dd0*/                   DMUL R12, R6, 16 ;\n",
+      "        /*1dd8*/                   MOV32I R6, 0x349e35fd ;\n",
+      "        /*1de8*/                   MOV32I R7, 0x401d14dc ;\n",
+      "        /*1df0*/                   DFMA R14, R14, -0.5, R8 ;\n",
+      "        /*1df8*/                   DFMA R12, R10.reuse, -16, R12 ;\n",
+      "        /*1e08*/                   DFMA R8, R32, c[0x2][0x10], R6 ;\n",
+      "        /*1e10*/                   DADD R10, R10, R10 ;\n",
+      "        /*1e18*/                   DFMA R6, R4.reuse, -16, R2 ;\n",
+      "        /*1e28*/                   DADD R4, R4, R4 ;\n",
+      "        /*1e30*/         {         DADD R2, R64, R64 ;\n",
+      "        /*1e38*/               @P0 BRA `(.L_12)         }\n",
+      "        /*1e48*/                   CAL `($kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath) ;\n",
+      "        /*1e50*/                   SYNC                                                (*\"TARGET= .L_11 \"*);\n",
+      ".L_12:\n",
+      "        /*1e58*/                   MUFU.RCP R65, R0 ;\n",
+      "        /*1e68*/                   FFMA R64, R0, R65, c[0x2][0xc] ;\n",
+      "        /*1e70*/                   FADD.FTZ R64, -R64, -RZ ;\n",
+      "        /*1e78*/         {         FFMA R64, R65, R64, R65 ;\n",
+      "        /*1e88*/                   SYNC                                                (*\"TARGET= .L_11 \"*)        }\n",
+      ".L_11:\n",
+      "        /*1e90*/         {         IADD32I R70.CC, R60, 0x2000000 ;\n",
+      "        /*1e98*/                   LDG.E.64 R58, [R58+-0x21020]         }\n",
+      "        /*1ea8*/         {         DFMA R14, R20, -0.5, R14 ;\n",
+      "        /*1eb0*/                   SSY `(.L_13)         }\n",
+      "        /*1eb8*/                   IADD.X R71, RZ, R61 ;\n",
+      "        /*1ec8*/                   LDG.E.64 R66, [R70+0x165068] ;\n",
+      "        /*1ed0*/                   LDG.E.64 R20, [R70+0x165470] ;\n",
+      "        /*1ed8*/                   DEPBAR.LE SB5, 0x2 ;\n",
+      "        /*1ee8*/                   DFMA R18, R18, -0.5, R16 ;\n",
+      "        /*1ef0*/                   LDG.E.64 R16, [R60+-0x21018] ;\n",
+      "        /*1ef8*/                   DFMA R22, R22, -0.5, R40 ;\n",
+      "        /*1f08*/         {         IADD32I R68.CC, R60, 0x1000000 ;\n",
+      "        /*1f10*/                   LDG.E.64 R40, [R60+-0x20c10]         }\n",
+      "        /*1f18*/                   DFMA R24, R24, -0.5, R34 ;\n",
+      "        /*1f28*/                   IADD.X R69, RZ, R61 ;\n",
+      "        /*1f30*/                   LDG.E.64 R34, [R68+0xa2028] ;\n",
+      "        /*1f38*/                   LDG.E.64 R68, [R68+0xa2430] ;\n",
+      "        /*1f48*/                   DSETP.GEU.AND P0, PT, |R46|, c[0x2][0x0], PT ;\n",
+      "        /*1f50*/                   DFMA R26, R26, -0.5, R36 ;\n",
+      "        /*1f58*/                   F2F.F32.F64 R0, R46 ;\n",
+      "        /*1f68*/                   DADD R36, R56, R56 ;\n",
+      "        /*1f70*/                   DFMA R28, R28, -0.5, R30 ;\n",
+      "        /*1f78*/                   DADD R30, R54, R54 ;\n",
+      "        /*1f88*/                   DADD R10, R10, -R36 ;\n",
+      "        /*1f90*/              @!P0 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*1f98*/                   DADD R4, R4, -R30 ;\n",
+      "        /*1fa8*/                   FMUL.FTZ R0, R0, 1 ;\n",
+      "        /*1fb0*/                   DSETP.GEU.AND P0, PT, |R50|, c[0x2][0x0], PT ;\n",
+      "        /*1fb8*/                   DMUL R30, R4, R4 ;\n",
+      "        /*1fc8*/                   DADD R52, R52, R52 ;\n",
+      "        /*1fd0*/                   DSETP.GEU.AND P2, PT, |R30|, c[0x2][0x0], PT ;\n",
+      "        /*1fd8*/                   DADD R2, R2, -R52 ;\n",
+      "        /*1fe8*/                   DMUL R36, R10, R10 ;\n",
+      "        /*1ff0*/                   DEPBAR.LE SB5, 0x5 ;\n",
+      "        /*1ff8*/                   DFMA R24, R66, 0.5, R24 ;\n",
+      "        /*2008*/         {         F2F.F64.F32 R66, R0 ;\n",
+      "        /*2010*/                   DEPBAR.LE SB5, 0x4         }\n",
+      "        /*2018*/                   DSETP.GEU.AND P1, PT, |R36|, c[0x2][0x0], PT ;\n",
+      "        /*2028*/                   DFMA R20, R20, 0.5, R22 ;\n",
+      "        /*2030*/                   F2F.F32.F64 R22, R36 ;\n",
+      "        /*2038*/                   F2F.F32.F64 R23, R50 ;\n",
+      "        /*2048*/                   DMUL R36, R44, R66 ;\n",
+      "        /*2050*/                   DMUL R66, R20, R20 ;\n",
+      "        /*2058*/                   F2F.F32.F64 R0, R30 ;\n",
+      "        /*2068*/              @!P0 FMUL R23, R23, 1.175494350822287508e-38 ;\n",
+      "        /*2070*/         {    @!P1 FMUL R22, R22, 1.175494350822287508e-38 ;\n",
+      "        /*2078*/                   DEPBAR.LE SB5, 0x2         }\n",
+      "        /*2088*/                   DSETP.GEU.AND P0, PT, |R36|, c[0x2][0x0], PT ;\n",
+      "        /*2090*/                   F2F.F32.F64 R36, R36 ;\n",
+      "        /*2098*/                   DFMA R52, R24, R24, R66 ;\n",
+      "        /*20a8*/                   FMUL.FTZ R37, R22, 1 ;\n",
+      "        /*20b0*/                   DFMA R14, R16, 0.5, R14 ;\n",
+      "        /*20b8*/                   FMUL.FTZ R16, R64, 1 ;\n",
+      "        /*20c8*/              @!P2 FMUL R0, R0, 1.175494350822287508e-38 ;\n",
+      "        /*20d0*/                   FMUL.FTZ R30, R23, 1 ;\n",
+      "        /*20d8*/                   DFMA R18, R40, 0.5, R18 ;\n",
+      "        /*20e8*/                   DFMA R22, R2, R2, R52 ;\n",
+      "        /*20f0*/                   F2F.F64.F32 R40, R37 ;\n",
+      "        /*20f8*/                   F2F.F64.F32 R16, R16 ;\n",
+      "        /*2108*/                   FMUL.FTZ R37, R0, 1 ;\n",
+      "        /*2110*/              @!P0 FMUL R36, R36, 1.175494350822287508e-38 ;\n",
+      "        /*2118*/                   MUFU.SQRT R0, R36 ;\n",
+      "        /*2128*/                   F2F.F64.F32 R30, R30 ;\n",
+      "        /*2130*/                   DSETP.GEU.AND P0, PT, |R22|, c[0x2][0x0], PT ;\n",
+      "        /*2138*/                   DFMA R40, R14, R14, R40 ;\n",
+      "        /*2148*/                   DMUL R38, R38, R16 ;\n",
+      "        /*2150*/                   DFMA R28, R34, 0.5, R28 ;\n",
+      "        /*2158*/                   F2F.F64.F32 R34, R37 ;\n",
+      "        /*2168*/                   F2F.F32.F64 R22, R22 ;\n",
+      "        /*2170*/                   DMUL R30, R44, R30 ;\n",
+      "        /*2178*/                   DSETP.GEU.AND P2, PT, |R38|, c[0x2][0x0], PT ;\n",
+      "        /*2188*/                   DFMA R40, R18, R18, R40 ;\n",
+      "        /*2190*/                   DFMA R26, R68, 0.5, R26 ;\n",
+      "        /*2198*/                   DFMA R44, R28, R28, R34 ;\n",
+      "        /*21a8*/                   FMUL.FTZ R34, R0, 1 ;\n",
+      "        /*21b0*/              @!P0 FMUL R22, R22, 1.175494350822287508e-38 ;\n",
+      "        /*21b8*/         {         F2F.F32.F64 R37, R38 ;\n",
+      "        /*21c8*/                   MUFU.RCP R60, R22         }\n",
+      "        /*21d0*/         {         DSETP.GEU.AND P0, PT, |R30|, c[0x2][0x0], PT ;\n",
+      "        /*21d8*/                   MUFU.SQRT R36, R22         }\n",
+      "        /*21e8*/                   DSETP.GEU.AND P1, PT, |R40|, c[0x2][0x0], PT ;\n",
+      "        /*21f0*/                   F2F.F64.F32 R34, R34 ;\n",
+      "        /*21f8*/                   DFMA R44, R26, R26, R44 ;\n",
+      "        /*2208*/                   F2F.F32.F64 R52, R30 ;\n",
+      "        /*2210*/                   F2F.F32.F64 R23, R40 ;\n",
+      "        /*2218*/              @!P2 FMUL R37, R37, 1.175494350822287508e-38 ;\n",
+      "        /*2228*/                   DSETP.GEU.AND P3, PT, R34, c[0x2][0x18], PT ;\n",
+      "        /*2230*/                   DSETP.GEU.AND P2, PT, |R44|, c[0x2][0x0], PT ;\n",
+      "        /*2238*/                   DMUL R30, R58, c[0x2][0x10] ;\n",
+      "        /*2248*/                   FMUL.FTZ R37, R37, R60 ;\n",
+      "        /*2250*/              @!P0 FMUL R52, R52, 1.175494350822287508e-38 ;\n",
+      "        /*2258*/         {    @!P1 FMUL R23, R23, 1.175494350822287508e-38 ;\n",
+      "        /*2268*/                   MUFU.SQRT R22, R52         }\n",
+      "        /*2270*/         {         F2F.F32.F64 R41, R44 ;\n",
+      "        /*2278*/                   MUFU.SQRT R23, R23         }\n",
+      "        /*2288*/                   FMUL.FTZ R34, R36, 1 ;\n",
+      "        /*2290*/                   DMUL R48, R48, R16 ;\n",
+      "        /*2298*/                   F2F.F64.F32 R36, R37 ;\n",
+      "        /*22a8*/                   DMUL R16, R42, R16 ;\n",
+      "        /*22b0*/                   DFMA R8, R8, R38, R30 ;\n",
+      "        /*22b8*/                   F2F.F64.F32 R34, R34 ;\n",
+      "        /*22c8*/                   MOV R30, RZ ;\n",
+      "        /*22d0*/                   MOV R31, RZ ;\n",
+      "        /*22d8*/                   MOV R44, RZ ;\n",
+      "        /*22e8*/                   MOV R45, RZ ;\n",
+      "        /*22f0*/         {    @!P2 FMUL R41, R41, 1.175494350822287508e-38 ;\n",
+      "        /*22f8*/              @!P3 SYNC                                                (*\"TARGET= .L_13 \"*)        }\n",
+      "        /*2308*/                   MUFU.SQRT R41, R41 ;\n",
+      "        /*2310*/                   FMUL.FTZ R52, R41, 1 ;\n",
+      "        /*2318*/                   F2F.F64.F32 R52, R52 ;\n",
+      "        /*2328*/                   DMUL R52, R34, R52 ;\n",
+      "        /*2330*/                   DSETP.GEU.AND P0, PT, R52, c[0x2][0x18], PT ;\n",
+      "        /*2338*/              @!P0 SYNC                                                (*\"TARGET= .L_13 \"*);\n",
+      "        /*2348*/                   IADD32I R60.CC, R62, 0x1000000 ;\n",
+      "        /*2350*/                   IADD.X R61, RZ, R63 ;\n",
+      "        /*2358*/                   LDG.E.64 R42, [R60+0xa2020] ;\n",
+      "        /*2368*/                   MOV32I R66, 0x8cfbbca1 ;\n",
+      "        /*2370*/                   MOV32I R67, 0x3fef2cb9 ;\n",
+      "        /*2378*/                   DMUL R46, R46, R36 ;\n",
+      "        /*2388*/                   DMUL R52, R58, c[0x2][0x28] ;\n",
+      "        /*2390*/                   FMUL.FTZ R0, R0, R41 ;\n",
+      "        /*2398*/                   DFMA R66, R32, c[0x2][0x28], R66 ;\n",
+      "        /*23a8*/                   DMUL R66, R16, R66 ;\n",
+      "        /*23b0*/                   DFMA R44, R8, R46, -R66 ;\n",
+      "        /*23b8*/                   DFMA R44, R6, R52, R44 ;\n",
+      "        /*23c8*/                   DFMA R54, R54, 16.NEG, R44 ;\n",
+      "        /*23d0*/                   DMUL R42, R42, 16 ;\n",
+      "        /*23d8*/                   DMUL R42, R24, R42 ;\n",
+      "        /*23e8*/                   DFMA R28, R28, R42, R54 ;\n",
+      "        /*23f0*/                   DFMA R26, R20, R26, R28 ;\n",
+      "        /*23f8*/                   DFMA R4, R2, R4, R26 ;\n",
+      "        /*2408*/                   DSETP.GEU.AND P0, PT, |R4|, c[0x2][0x0], PT ;\n",
+      "        /*2410*/                   F2F.F32.F64 R4, R4 ;\n",
+      "        /*2418*/                   MUFU.RCP R5, R0 ;\n",
+      "        /*2428*/              @!P0 FMUL R4, R4, 1.175494350822287508e-38 ;\n",
+      "        /*2430*/                   FMUL.FTZ R4, R4, R5 ;\n",
+      "        /*2438*/         {         F2F.F64.F32 R44, R4 ;\n",
+      "        /*2448*/                   SYNC                                                (*\"TARGET= .L_13 \"*)        }\n",
+      ".L_13:\n",
+      "        /*2450*/         {         FMUL.FTZ R4, R23, 1 ;\n",
+      "        /*2458*/                   SSY `(.L_14)         }\n",
+      "        /*2468*/                   F2F.F64.F32 R4, R4 ;\n",
+      "        /*2470*/                   DMUL R4, R34, R4 ;\n",
+      "        /*2478*/                   DSETP.GEU.AND P0, PT, R4, c[0x2][0x18], PT ;\n",
+      "        /*2488*/                   FMUL.FTZ R4, R22, 1 ;\n",
+      "        /*2490*/                   F2F.F64.F32 R4, R4 ;\n",
+      "        /*2498*/                   DSETP.LT.OR P0, PT, R4, c[0x2][0x18], !P0 ;\n",
+      "        /*24a8*/               @P0 SYNC                                                (*\"TARGET= .L_14 \"*);\n",
+      "        /*24b0*/         {         MOV32I R6, 0x834fff9c ;\n",
+      "        /*24b8*/                   LDG.E.64 R62, [R62+-0x21020]         }\n",
+      "        /*24c8*/                   MOV32I R7, 0x3ff61152 ;\n",
+      "        /*24d0*/                   DMUL R50, R50, R36 ;\n",
+      "        /*24d8*/                   DMUL R4, R58, c[0x2][0x20] ;\n",
+      "        /*24e8*/                   FMUL.FTZ R22, R22, R23 ;\n",
+      "        /*24f0*/                   MUFU.RCP R0, R22 ;\n",
+      "        /*24f8*/                   DFMA R6, R32, c[0x2][0x20], R6 ;\n",
+      "        /*2508*/                   DMUL R6, R48, R6 ;\n",
+      "        /*2510*/                   DFMA R6, R8, R50, -R6 ;\n",
+      "        /*2518*/                   DFMA R6, R12, R4, R6 ;\n",
+      "        /*2528*/                   DFMA R56, R56, 16.NEG, R6 ;\n",
+      "        /*2530*/                   DMUL R26, R62, 16 ;\n",
+      "        /*2538*/                   DMUL R24, R24, R26 ;\n",
+      "        /*2548*/                   DFMA R14, R14, R24, R56 ;\n",
+      "        /*2550*/                   DFMA R20, R18, R20, R14 ;\n",
+      "        /*2558*/                   DFMA R10, R2, R10, R20 ;\n",
+      "        /*2568*/                   DSETP.GEU.AND P0, PT, |R10|, c[0x2][0x0], PT ;\n",
+      "        /*2570*/                   F2F.F32.F64 R10, R10 ;\n",
+      "        /*2578*/              @!P0 FMUL R10, R10, 1.175494350822287508e-38 ;\n",
+      "        /*2588*/                   FMUL.FTZ R10, R10, R0 ;\n",
+      "        /*2590*/                   F2F.F64.F32 R30, R10 ;\n",
+      "        /*2598*/         {         DMUL R30, R30, c[0x2][0x30] ;\n",
+      "        /*25a8*/                   SYNC                                                (*\"TARGET= .L_14 \"*)        }\n",
+      ".L_14:\n",
+      "        /*25b0*/         {         DMUL R44, R2, R44 ;\n",
+      "        /*25b8*/                   S2R R4, SR_TID.Y         }\n",
+      "        /*25c8*/         {         DADD R58, R58, R58 ;\n",
+      "        /*25d0*/                   S2R R7, SR_CTAID.Y         }\n",
+      "        /*25d8*/         {         DFMA R30, R44, c[0x2][0x38], R30 ;\n",
+      "        /*25e8*/                   S2R R0, SR_TID.Z         }\n",
+      "        /*25f0*/         {         DMUL R48, R48, R58 ;\n",
+      "        /*25f8*/                   S2R R5, SR_CTAID.Z         }\n",
+      "        /*2608*/         {         DFMA R30, R32, 2, R30 ;\n",
+      "        /*2610*/                   S2R R8, SR_TID.X         }\n",
+      "        /*2618*/         {         DFMA R30, R48, c[0x2][0x40], R30 ;\n",
+      "        /*2628*/                   S2R R10, SR_CTAID.X         }\n",
+      "        /*2630*/                   XMAD R4, R7, c[0x0] [0xc], R4 ;\n",
+      "        /*2638*/                   XMAD.MRG R9, R7.reuse, c[0x0] [0xc].H1, RZ ;\n",
+      "        /*2648*/                   DFMA R16, R16, c[0x2][0x48], R30 ;\n",
+      "        /*2650*/                   XMAD.PSL.CBCC R4, R7.H1, R9.H1, R4 ;\n",
+      "        /*2658*/                   XMAD R0, R5.reuse, c[0x0] [0x10], R0 ;\n",
+      "        /*2668*/                   XMAD.MRG R6, R5, c[0x0] [0x10].H1, RZ ;\n",
+      "        /*2670*/                   DFMA R16, R38, c[0x2][0x50], R16 ;\n",
+      "        /*2678*/                   IADD32I R4, R4, 0x1 ;\n",
+      "        /*2688*/                   XMAD.PSL.CBCC R0, R5.H1, R6.H1, R0 ;\n",
+      "        /*2690*/                   MOV32I R5, 0x82 ;\n",
+      "        /*2698*/                   MOV32I R6, 0x82 ;\n",
+      "        /*26a8*/                   XMAD R3, R4, 0x82, RZ ;\n",
+      "        /*26b0*/                   XMAD R8, R10, c[0x0] [0x8], R8 ;\n",
+      "        /*26b8*/                   XMAD.MRG R11, R10, c[0x0] [0x8].H1, RZ ;\n",
+      "        /*26c8*/                   IADD32I R0, R0, 0x1 ;\n",
+      "        /*26d0*/                   XMAD R2, R4.reuse, 0x82, RZ ;\n",
+      "        /*26d8*/                   XMAD R5, R4.reuse, R5.H1, RZ ;\n",
+      "        /*26e8*/                   XMAD R6, R4.H1.reuse, R6.H1, RZ ;\n",
+      "        /*26f0*/                   XMAD.CHI R3, R4.H1, 0x82, R3 ;\n",
+      "        /*26f8*/                   XMAD.PSL.CBCC R8, R10.H1, R11.H1, R8 ;\n",
+      "        /*2708*/                   MOV32I R10, 0x4204 ;\n",
+      "        /*2710*/                   MOV32I R11, 0x4204 ;\n",
+      "        /*2718*/                   XMAD R9, R0.reuse, 0x4204, RZ ;\n",
+      "        /*2728*/                   XMAD R7, R0, 0x4204, RZ ;\n",
+      "        /*2730*/                   XMAD.PSL R4, R4.H1, 0x82, R2 ;\n",
+      "        /*2738*/                   IADD32I R8, R8, 0x1 ;\n",
+      "        /*2748*/                   XMAD R10, R0.reuse, R10.H1, RZ ;\n",
+      "        /*2750*/                   XMAD R11, R0.H1.reuse, R11.H1, RZ ;\n",
+      "        /*2758*/                   XMAD.CHI R9, R0.H1.reuse, 0x4204, R9 ;\n",
+      "        /*2768*/                   XMAD.PSL R2, R0.H1, 0x4204, R7 ;\n",
+      "        /*2770*/                   IADD3.RS R0, R3, R5, R6 ;\n",
+      "        /*2778*/                   IADD3.RS R9, R9, R10, R11 ;\n",
+      "        /*2788*/                   IADD R2.CC, R2, R4 ;\n",
+      "        /*2790*/                   IADD.X R0, R9, R0 ;\n",
+      "        /*2798*/                   IADD R8.CC, R8, R2 ;\n",
+      "        /*27a8*/                   IADD.X R3, RZ, R0 ;\n",
+      "        /*27b0*/                   LEA R2.CC, R8.reuse, c[0x0][0x148], 0x3 ;\n",
+      "        /*27b8*/                   LEA.HI.X R0, R8, c[0x0][0x14c], R3, 0x3 ;\n",
+      "        /*27c8*/                   IADD32I R2.CC, R2, 0x2000000 ;\n",
+      "        /*27d0*/                   IADD.X R3, RZ, R0 ;\n",
+      "        /*27d8*/                   STG.E.64 [R2+0x186080], R16 ;\n",
+      "        /*27e8*/                   EXIT ;\n",
+      "        .weak           $kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath\n",
+      "        .type           $kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath,@function\n",
+      "        .size           $kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath,(.L_48 - $kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath)\n",
+      "$kernel$__cuda_sm20_rcp_rn_ftz_f32_slowpath:\n",
+      "        /*27f0*/         {         IADD32I R65, R0, 0x1800000 ;\n",
+      "        /*27f8*/                   PBK `(.L_15)         }\n",
+      "        /*2808*/                   LOP32I.AND R65, R65, 0x7f800000 ;\n",
+      "        /*2810*/                   ISETP.NE.U32.AND P2, PT, R65, c[0x2][0x58], PT ;\n",
+      "        /*2818*/              @!P2 MUFU.RCP R64, R0 ;\n",
+      "        /*2828*/              @!P2 BRK                                                 (*\"TARGET= .L_15 \"*);\n",
+      "        /*2830*/                   ISET.EQ.U32.AND R64, R65, c[0x2][0x5c], PT ;\n",
+      "        /*2838*/                   ICMP.NE.U32 R65, RZ, 0x1, R65 ;\n",
+      "        /*2848*/                   IADD R64, -R64, RZ ;\n",
+      "        /*2850*/                   LOP.OR.NZ P2, RZ, R65, R64 ;\n",
+      "        /*2858*/              @!P2 BRA `(.L_16) ;\n",
+      "        /*2868*/                   LOP32I.AND R64, R0, 0x7fffff ;\n",
+      "        /*2870*/                   ISET.EQ.U32.AND R64, R64, RZ, PT ;\n",
+      "        /*2878*/                   IADD R64, -R64, RZ ;\n",
+      "        /*2888*/                   LOP.AND.NZ P2, RZ, R65, R64 ;\n",
+      "        /*2890*/               @P2 LOP32I.AND R64, R0.reuse, 0x80000000 ;\n",
+      "        /*2898*/               @P2 LOP32I.OR R64, R64, 0x800000 ;\n",
+      "        /*28a8*/         {    @!P2 LOP32I.AND R64, R0, 0x80000000 ;\n",
+      "        /*28b0*/                   BRK                                                 (*\"TARGET= .L_15 \"*)        }\n",
+      ".L_16:\n",
+      "        /*28b8*/                   MUFU.RCP R64, R0 ;\n",
+      "        /*28c8*/                   BRK                                                 (*\"TARGET= .L_15 \"*);\n",
+      ".L_15:\n",
+      "        /*28d0*/                   RET ;\n",
+      ".L_17:\n",
+      "        /*28d8*/                   BRA `(.L_17) ;\n",
+      ".L_48:\n",
+      "\n",
+      "108\n",
+      "36\n",
+      "116\n",
+      "152\n",
+      "29\n",
+      "3\n"
+     ]
+    }
+   ],
+   "source": [
+    "phi_kernel = create_kernel(\n",
+    "    mu_update_eqs,\n",
+    "    target=\"gpu\",\n",
+    "    gpu_indexing_params={\n",
+    "        \"block_size\": (32, 4, 1)\n",
+    "    }).compile()\n",
+    "\n",
+    "\n",
+    "\n",
+    "code = \"#include <cstdint>\\n\"\n",
+    "code += \"#define FUNC_PREFIX __global__ __launch_bounds__(128)\\n\"\n",
+    "code += \"#define RESTRICT const __restrict__\\n\\n\"\n",
+    "\n",
+    "#code += str(show_code(phi_kernel.ast))\n",
+    "code += str(show_code(mu_stag_precomp_kernel)) #\n",
+    "\n",
+    "cubin = pycuda.compiler.compile(code, options=[\"-w\", \"-std=c++11\", \"-use_fast_math\" ], arch=\"sm_60\")\n",
+    "\n",
+    "run([  \"echo \\\"\" + code + \"\\\" >> temp.cubin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "newFile = open(\"temp.cusbin\", \"wb\")\n",
+    "newFile.write(cubin)\n",
+    "newFile.close()\n",
+    "\n",
+    "result = run([  \"nvdisasm -c   temp.cusbin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "print(len(result.stdout.decode(\"utf-8\").split(\"\\n\") )  )\n",
+    "\n",
+    "print(result.stdout.decode(\"utf-8\"))\n",
+    "\n",
+    "newFile = open(\"temp.disasm\", \"wb\")\n",
+    "newFile.write(result.stdout)\n",
+    "newFile.close()\n",
+    "\n",
+    "print ( result.stdout.decode(\"utf-8\").count(\"LDG\") )\n",
+    "print ( result.stdout.decode(\"utf-8\").count(\"DADD\") )\n",
+    "print ( result.stdout.decode(\"utf-8\").count(\"DMUL\") )\n",
+    "print ( result.stdout.decode(\"utf-8\").count(\"DFMA\") )\n",
+    "print ( result.stdout.decode(\"utf-8\").count(\"MUFU\") )\n",
+    "print ( result.stdout.decode(\"utf-8\").count(\"STG\") )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(show_code(phi_kernel.ast))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "for eq in rescheduled_eqs:\n",
+    "    print(eq)\n",
+    "    print(eq.rhs.func)\n",
+    "    for arg in eq.rhs.args:\n",
+    "        print(arg)\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = graphviz.Digraph(engine='dot')\n",
+    "for eq in rescheduled_eqs:\n",
+    "    #d.node(eq.lhs.name)\n",
+    "    for arg in eq.rhs.atoms():\n",
+    "        if isinstance(arg, sympy.Symbol) and not isinstance(arg, Field.Access):\n",
+    "            d.edge(arg.name, eq.lhs.name)\n",
+    "d\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/cse_reorder_demo.ipynb b/pystencils_tests/liveness_opts/cse_reorder_demo.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..a17cec6b2890fb48791f2b8f5e81c328accf9b6d
--- /dev/null
+++ b/pystencils_tests/liveness_opts/cse_reorder_demo.ipynb
@@ -0,0 +1,269 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 1\n",
+    "%aimport pystencils.simp.liveness_opts\n",
+    "%aimport pystencils.simp.liveness_opts_exp\n",
+    "%aimport pystencils.shmemvar\n",
+    "%aimport pystencils.backends.cbackend\n",
+    "%aimport pystencils.transformations\n",
+    "\n",
+    "\n",
+    "%load_ext line_profiler\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lbmpy.session import *\n",
+    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from pygrandchem_tests.config2 import get_system\n",
+    "from pygrandchem_tests.config import get_system as get_system_simple\n",
+    "from pystencils.datahandling import SerialDataHandling\n",
+    "from pygrandchem.grandchem_generation import *\n",
+    "from pygrandchem.chemicalpotential import *\n",
+    "from pystencils import show_code, Field\n",
+    "from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "\n",
+    "from pystencils.shmemvar import *\n",
+    "import graphviz\n",
+    "\n",
+    "\n",
+    "import pycuda\n",
+    "\n",
+    "import sys\n",
+    "from subprocess import run, PIPE\n",
+    "\n",
+    "sys.setrecursionlimit(100000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_SSA(eqs):\n",
+    "\n",
+    "    phi_kernel = create_kernel(\n",
+    "        eqs,\n",
+    "        target=\"gpu\",\n",
+    "        gpu_indexing_params={\n",
+    "            \"block_size\": (32, 4, 1)\n",
+    "        }).compile()\n",
+    "\n",
+    "\n",
+    "    code = \"#include <cstdint>\\n\"\n",
+    "    code += \"#define FUNC_PREFIX __global__ __launch_bounds__(128)\\n\"\n",
+    "    code += \"#define RESTRICT __restrict__\\n\\n\"\n",
+    "\n",
+    "    code += str(show_code(phi_kernel.ast))\n",
+    "\n",
+    "    cubin = pycuda.compiler.compile(code, options=[\"-w\", \"-std=c++11\", \"-use_fast_math\" ], arch=\"sm_60\")\n",
+    "\n",
+    "    newFile = open(\"temp.cusbin\", \"wb\")\n",
+    "    newFile.write(cubin)\n",
+    "    newFile.close()\n",
+    "\n",
+    "    result = run([  \"nvdisasm -c   temp.cusbin\"],\n",
+    "            stdout=PIPE,\n",
+    "            shell=True)\n",
+    "\n",
+    "    result_str = result.stdout.decode(\"utf-8\")\n",
+    "    print(len(result_str.split('\\n')))\n",
+    "\n",
+    "    \n",
+    "    print(result_str)\n",
+    "\n",
+    "    newFile = open(\"temp.disasm\", \"wb\")\n",
+    "    newFile.write(result.stdout)\n",
+    "    newFile.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def make_graph_viz(eqs):\n",
+    "    d = graphviz.Digraph(engine='dot')\n",
+    "    for eq in eqs:\n",
+    "        #d.node(eq.lhs.name)\n",
+    "        for arg in eq.rhs.atoms():\n",
+    "            if isinstance(arg, sympy.Symbol) and not isinstance(arg, Field.Access):\n",
+    "                d.edge(arg.name, eq.lhs.name)\n",
+    "    return d\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_system()\n",
+    "free_energy = FreeEnergy(config['FreeEnergy'], config['Temperature'])\n",
+    "\n",
+    "dh = SerialDataHandling((256, 256, 256), periodicity=(True, True, False))\n",
+    "f = dh.fields\n",
+    "dh.add_array('phi_src', values_per_cell=4, layout='fzyx')\n",
+    "dh.add_array('mu_src', values_per_cell=2, layout='fzyx')\n",
+    "dh.add_array_like('phi_dst', 'phi_src')\n",
+    "dh.add_array_like('mu_dst', 'mu_src')\n",
+    "dh.add_array('c', values_per_cell=2, layout='fzyx')\n",
+    "\n",
+    "diffusion_matrices = np.zeros([4, 2, 2])\n",
+    "diffusion_matrices[0] = config['Parameters']['da']\n",
+    "diffusion_matrices[1] = config['Parameters']['db']\n",
+    "diffusion_matrices[2] = config['Parameters']['dg']\n",
+    "diffusion_matrices[3] = config['Parameters']['dl']\n",
+    "\n",
+    "f = dh.fields\n",
+    "\n",
+    "#update_eqs = create_phi_update_equations(\n",
+    "#    f['phi_src'],\n",
+    "#    f['phi_dst'],\n",
+    "#    f['mu_src'],\n",
+    "#    free_energy,\n",
+    "#    config['Parameters'],\n",
+    "#    simplex_projection=True)\n",
+    "\n",
+    "update_eqs = create_mu_update_equations(\n",
+    "    f['phi_src'], f['phi_dst'], f['mu_src'], f['mu_dst'], free_energy,\n",
+    "    diffusion_matrices, config['Parameters'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "make_graph_viz(update_eqs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_SSA(update_eqs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cse_eqs = sympy_cse_on_assignment_list(update_eqs)\n",
+    "for eq in cse_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "make_graph_viz(cse_eqs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_SSA(cse_eqs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rescheduled_eqs = schedule_eqs(cse_eqs)\n",
+    "\n",
+    "for eq in rescheduled_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "make_graph_viz(rescheduled_eqs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "get_SSA(rescheduled_eqs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/grandchem_test.py b/pystencils_tests/liveness_opts/grandchem_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..78c2e7838c7a8844a6076e0bfaff6b4b4ba881c2
--- /dev/null
+++ b/pystencils_tests/liveness_opts/grandchem_test.py
@@ -0,0 +1,148 @@
+#coding : utf - 8
+
+#In[32]:
+
+from lbmpy.session import *
+from scipy.ndimage.filters import gaussian_filter
+
+from pygrandchem_tests.config_anisotropic import get_system
+from pystencils.datahandling import SerialDataHandling
+from pygrandchem.grandchem_generation import *
+from pystencils import show_code
+import pycuda.driver as drv
+from pystencils.simp.liveness_opts import *
+from pystencils.simp.liveness_opts_exp import *
+from pygrandchem.initialization import *
+from pygrandchem.chemicalpotential import free_energy_from_config_object, FreeEnergy
+from pystencils.boundaries import *
+
+from pystencils import show_code
+import pycuda.driver as drv
+from pystencils.simp.liveness_opts import *
+from pystencils.simp.liveness_opts_exp import *
+
+domain_size = (512, 512, 128)
+periodicity = (True, True, False)
+fast_simplex_projection = True
+optimization = { 'gpu_indexing_params': {"block_size": (32, 4, 2)}}
+config = get_system(dim=len(domain_size))
+
+phases = config['Parameters']['phases']
+components = config['Parameters']['components']
+diffusion_matrices = config['Parameters']['diffusion']
+free_energy = config['FreeEnergy']
+
+#Adding fields
+dh = create_data_handling(domain_size, periodicity=periodicity, default_target=optimization['target'])
+f = dh.fields
+phi_src = dh.add_array('phi_src', values_per_cell=phases, layout='fzyx', latex_name='phi_s')
+mu_src = dh.add_array('mu_src', values_per_cell=components, layout='fzyx', latex_name="mu_s")
+mu_stag = dh.add_array('mu_stag', values_per_cell=(dh.dim, components), layout='f')
+phi_dst = dh.add_array_like('phi_dst', 'phi_src')
+mu_dst = dh.add_array_like('mu_dst', 'mu_src')
+
+c = dh.add_array('c', values_per_cell=components, layout='fzyx', gpu=False)
+
+mu_vanilla_eqs = create_mu_update_equations(phi_src, phi_dst, mu_src, mu_dst, free_energy, diffusion_matrices,
+                                            config['Parameters'])
+
+phi_vanilla_eqs = create_phi_update_equations(
+    phi_src, phi_dst, mu_src, free_energy, config['Parameters'], simplex_projection=fast_simplex_projection)
+
+init_boxes(dh, height=0.2)
+initialize_concentration_field(dh, free_energy, config['Parameters']['initial_concentration'])
+smooth_fields(dh, sigma=0.4, iterations=5, dim=dh.dim)
+dh.synchronization_function(['phi_src', 'phi_dst', 'mu_src', 'mu_dst'])()
+print(dh)
+
+
+def bench_kernels(mu_kernel, phi_kernel):
+
+    start = drv.Event()
+    end = drv.Event()
+
+    dh.run_kernel(mu_kernel)
+    start.record()
+    dh.run_kernel(mu_kernel)
+    dh.run_kernel(mu_kernel)
+    end.record()
+    end.synchronize()
+    msec = start.time_till(end) / 2
+    print("mu_kernel: {:5.3f} ms".format(msec))
+
+    dh.run_kernel(phi_kernel)
+    start.record()
+    dh.run_kernel(phi_kernel)
+    dh.run_kernel(phi_kernel)
+    end.record()
+    end.synchronize()
+    msec = start.time_till(end) / 2
+    print("phi_kernel: {:5.3f} ms".format(msec))
+
+
+sched_options = []
+
+sched_options.append(option_none)
+sched_options.append(option_none)
+sched_options.append(option_none)
+#sched_options.append(option_reschedule)
+sched_options.append(option_liveness_opt_transformation)
+#sched_options.append(option_dupl_reschedule)
+sched_options.append(option_liveness_opt_transformation_shmem)
+sched_options.append(option_liveness_opt_transformation_shmem2)
+#sched_options.append(option_reschedule_shmem)
+#sched_options.append(option_liveness_opt_transformation_shmem)
+#sched_options.append(optionFuseSubs)
+#sched_options.append(optionFuseFMAs)
+#sched_options.append(optionFuseBoth)
+#sched_options.append(optionRescheduleAtomize)
+#sched_options.append(optionRescheduleAtomizeScramble)
+#sched_options.append(optionDuplAtomizeReschedule)
+#sched_options.append(optionDuplAtomizeRefuseReschedule)
+#sched_options.append(optionDuplRescheduleAtomize)
+#sched_options.append(optionDuplRescheduleAtomizeScramble)
+#sched_options.append(optionSchedIteration)
+#sched_options.append(optionAtomizeRescheduleNoSqrt)
+#sched_options.append(optionAtomizeRescheduleNoDiv)
+#sched_options.append(optionAtomizeRescheduleNoSqrtDiv)
+#sched_options.append(optionAtomizeRescheduleNoPiecewise)
+#sched_options.append(optionAtomizeRescheduleNoAll)
+
+mu_rescheduled_eqs = sched_options[0](mu_vanilla_eqs)
+phi_rescheduled_eqs = sched_options[0](phi_vanilla_eqs)
+
+mu_kernel = create_kernel(mu_rescheduled_eqs, target="gpu", gpu_indexing_params={"block_size": (32, 4, 2)}).compile()
+
+phi_kernel = create_kernel(phi_rescheduled_eqs, target="gpu", gpu_indexing_params={"block_size": (32, 4, 2)}).compile()
+
+print("mu_kernel: " + str(mu_kernel.num_regs) + " regs")
+print("phi_kernel: " + str(phi_kernel.num_regs) + " regs")
+
+bench_kernels(mu_kernel, phi_kernel)
+
+print()
+
+dh.swap('mu_src', 'mu_dst')
+dh.swap('phi_src', 'phi_dst')
+
+for sched_option in sched_options:
+    print(sched_option.__name__)
+    mu_rescheduled_eqs = sched_option(mu_vanilla_eqs)
+    phi_rescheduled_eqs = sched_option(phi_vanilla_eqs)
+
+    mu_kernel = create_kernel(
+        mu_rescheduled_eqs, target="gpu", gpu_indexing_params={
+            "block_size": (32, 4, 2)
+        }).compile()
+
+    phi_kernel = create_kernel(
+        phi_rescheduled_eqs, target="gpu", gpu_indexing_params={
+            "block_size": (32, 4, 2)
+        }).compile()
+
+    print("mu_kernel: " + str(mu_kernel.num_regs) + " regs")
+    print("phi_kernel: " + str(phi_kernel.num_regs) + " regs")
+
+    bench_kernels(mu_kernel, phi_kernel)
+
+    print()
diff --git a/pystencils_tests/liveness_opts/grandchem_test_staggered.py b/pystencils_tests/liveness_opts/grandchem_test_staggered.py
new file mode 100644
index 0000000000000000000000000000000000000000..a85bf8b02614d4457398562e2d4cf43996179206
--- /dev/null
+++ b/pystencils_tests/liveness_opts/grandchem_test_staggered.py
@@ -0,0 +1,175 @@
+# coding: utf-8
+
+# In[32]:
+
+import warnings
+import pystencils as ps
+from pygrandchem.grandchem import GrandChemGenerator
+from pygrandchem.scenarios import system_4_2, system_3_1
+from pygrandchem.initialization import init_boxes, smooth_fields
+from pygrandchem.scenarios import benchmark_configs
+
+from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational
+from pystencils.simp import sympy_cse_on_assignment_list
+from pystencils.simp.liveness_opts import *
+from pystencils.simp.liveness_opts_exp import *
+
+import graphviz
+
+import pycuda
+
+import sys
+from subprocess import run, PIPE
+
+from pystencils import show_code
+import pycuda.driver as drv
+
+configs = benchmark_configs()
+
+def get_config(name):
+    return configs[name]
+
+
+domain_size = (512, 512, 128)
+periodicity = (True, True, False)
+
+optimization = {'gpu_indexing_params': {"block_size": (32, 4, 2)}}
+config = get_config('42_fixT')
+phases, components = config['Parameters']['phases'], config['Parameters']['components']
+format_args = {'p': phases, 'c': components, 's': ','.join(str(e) for e in domain_size)}
+
+# Adding fields
+dh = ps.create_data_handling(domain_size, periodicity=periodicity, default_target='gpu')
+f = dh.fields
+phi_src = dh.add_array(
+    'phi_src', values_per_cell=config['Parameters']['phases'], layout='fzyx', latex_name='phi_s')
+mu_src = dh.add_array(
+    'mu_src', values_per_cell=config['Parameters']['components'], layout='fzyx', latex_name="mu_s")
+mu_stag = dh.add_array(
+    'mu_stag', values_per_cell=(dh.dim, config['Parameters']['components']), layout='f')
+phi_stag = dh.add_array('phi_stag', values_per_cell=(dh.dim, phases), layout='f')
+
+phi_dst = dh.add_array_like('phi_dst', 'phi_src')
+mu_dst = dh.add_array_like('mu_dst', 'mu_src')
+
+gc = GrandChemGenerator(
+    phi_src,
+    phi_dst,
+    mu_src,
+    mu_dst,
+    config['FreeEnergy'],
+    config['Parameters'],
+    #conc=c,
+    mu_staggered=mu_stag,
+    phi_staggered=phi_stag,
+    use_block_offsets=False,
+    compile_kernel=False)
+
+mu_full_eqs = gc.mu_full()
+phi_full_eqs = gc.phi_full()
+
+mu_partial1_eqs = gc.mu_partial1()
+mu_partial2_eqs = gc.mu_partial2()
+
+phi_partial1_eqs = gc.phi_partial1()
+phi_partial2_eqs = gc.phi_partial2()
+
+phi_kernel = ps.create_kernel(phi_full_eqs, target='gpu', **optimization).compile()
+mu_kernel = ps.create_kernel(mu_full_eqs, target='gpu', **optimization).compile()
+
+c = dh.add_array('c', values_per_cell=config['Parameters']['components'], layout='fzyx', gpu=False)
+
+init_boxes(dh)
+#initialize_concentration_field(dh, free_energy, config['Parameters']['initial_concentration'])
+smooth_fields(dh, sigma=0.4, iterations=5, dim=dh.dim)
+dh.synchronization_function(['phi_src', 'phi_dst', 'mu_src', 'mu_dst'])()
+print(dh)
+
+
+def bench_kernels(mu_kernel, phi_kernel):
+
+    start = drv.Event()
+    end = drv.Event()
+
+    dh.run_kernel(mu_kernel)
+    start.record()
+    dh.run_kernel(mu_kernel)
+    dh.run_kernel(mu_kernel)
+    end.record()
+    end.synchronize()
+    msec = start.time_till(end) / 2
+    print("mu_kernel: {}  {:5.3f} ms".format(mu_kernel.num_regs, msec))
+
+    dh.run_kernel(phi_kernel)
+    start.record()
+    dh.run_kernel(phi_kernel)
+    dh.run_kernel(phi_kernel)
+    end.record()
+    end.synchronize()
+    msec = start.time_till(end) / 2
+    print("phi_kernel: {}  {:5.3f} ms".format(phi_kernel.num_regs, msec))
+
+
+sched_options = []
+
+sched_options.append(option_none)
+#sched_options.append(option_reschedule)
+sched_options.append(option_liveness_opt_transformation)
+#sched_options.append(option_dupl_reschedule)
+sched_options.append(option_liveness_opt_transformation_shmem)
+sched_options.append(option_liveness_opt_transformation_shmem2)
+#sched_options.append(option_reschedule_shmem)
+#sched_options.append(option_liveness_opt_transformation_shmem)
+#sched_options.append(optionFuseSubs)
+#sched_options.append(optionFuseFMAs)
+#sched_options.append(optionFuseBoth)
+#sched_options.append(optionRescheduleAtomize)
+#sched_options.append(optionRescheduleAtomizeScramble)
+#sched_options.append(optionDuplAtomizeReschedule)
+#sched_options.append(optionDuplAtomizeRefuseReschedule)
+#sched_options.append(optionDuplRescheduleAtomize)
+#sched_options.append(optionDuplRescheduleAtomizeScramble)
+#sched_options.append(optionSchedIteration)
+#sched_options.append(optionAtomizeRescheduleNoSqrt)
+#sched_options.append(optionAtomizeRescheduleNoDiv)
+#sched_options.append(optionAtomizeRescheduleNoSqrtDiv)
+#sched_options.append(optionAtomizeRescheduleNoPiecewise)
+#sched_options.append(optionAtomizeRescheduleNoAll)
+
+print("warmup")
+bench_kernels(mu_kernel, phi_kernel)
+dh.swap('mu_src', 'mu_dst')
+dh.swap('phi_src', 'phi_dst')
+
+for sched_option in sched_options:
+    mu_full_opt_eqs = sched_option(mu_full_eqs)
+    phi_full_opt_eqs = sched_option(phi_full_eqs)
+
+
+    #mu_partial1_opt_eqs = sched_option(mu_partial1_eqs)
+    #mu_partial2_opt_eqs = sched_option(mu_partial2_eqs)
+    #phi_partial1_opt_eqs = sched_option(phi_partial1_eqs)
+    #phi_partial2_opt_eqs = sched_option(phi_partial2_eqs)
+
+    mu_full_opt_kernel = ps.create_kernel(mu_full_opt_eqs, target='gpu', **optimization).compile()
+    phi_full_opt_kernel = ps.create_kernel(phi_full_opt_eqs, target='gpu', **optimization).compile()
+
+    #mu_partial1_opt_kernel = ps.create_staggered_kernel(mu_stag,
+    #    mu_partial1_opt_eqs, target='gpu', **optimization).compile()
+    #mu_partial2_opt_kernel = ps.create_kernel(
+    #    mu_partial2_opt_eqs, target='gpu', **optimization).compile()
+
+    #phi_partial1_opt_kernel = ps.create_kernel(
+    #    phi_partial1_opt_eqs, target='gpu', **optimization).compile()
+    #phi_partial2_opt_kernel = ps.create_kernel(
+    #    phi_partial2_opt_eqs, target='gpu', **optimization).compile()
+
+    bench_kernels(mu_full_opt_kernel, phi_kernel)
+    #bench_kernels(mu_partial1_opt_kernel, phi_kernel)
+    #bench_kernels(mu_partial2_opt_kernel, phi_kernel)
+
+    bench_kernels(mu_kernel, phi_full_opt_kernel)
+    #bench_kernels(mu_kernel, phi_partial1_opt_kernel)
+    #bench_kernels(mu_kernel, phi_partial2_opt_kernel)
+
+print()
diff --git a/pystencils_tests/liveness_opts/kernel_split.ipynb b/pystencils_tests/liveness_opts/kernel_split.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..2f811e70ce7ec107c8c4c098b9e5576bfdc9153b
--- /dev/null
+++ b/pystencils_tests/liveness_opts/kernel_split.ipynb
@@ -0,0 +1,900 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 1\n",
+    "%aimport pystencils.simp.liveness_opts\n",
+    "%aimport pystencils.simp.liveness_opts_exp\n",
+    "%aimport pystencils.shmemvar\n",
+    "%aimport pystencils.backends.cbackend\n",
+    "%aimport pystencils.transformations\n",
+    "\n",
+    "\n",
+    "%load_ext line_profiler\n",
+    "\n",
+    "from IPython.core.display import display, HTML\n",
+    "display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "from lbmpy.session import *\n",
+    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from pygrandchem_tests.config2 import get_system\n",
+    "from pygrandchem_tests.config import get_system as get_system_simple\n",
+    "from pystencils.datahandling import SerialDataHandling\n",
+    "from pygrandchem.grandchem_generation import *\n",
+    "from pygrandchem.chemicalpotential import *\n",
+    "from pystencils import show_code, Field\n",
+    "from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "import random\n",
+    "from pystencils.shmemvar import *\n",
+    "import graphviz\n",
+    "\n",
+    "\n",
+    "import pycuda\n",
+    "\n",
+    "import sys\n",
+    "from subprocess import run, PIPE\n",
+    "\n",
+    "sys.setrecursionlimit(100000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_system()\n",
+    "free_energy = FreeEnergy(config['FreeEnergy'], config['Temperature'])\n",
+    "\n",
+    "dh = SerialDataHandling((256, 256, 256), periodicity=(True, True, False))\n",
+    "f = dh.fields\n",
+    "dh.add_array('phi_src', values_per_cell=4, layout='fzyx')\n",
+    "dh.add_array('mu_src', values_per_cell=2, layout='fzyx')\n",
+    "dh.add_array_like('phi_dst', 'phi_src')\n",
+    "dh.add_array_like('mu_dst', 'mu_src')\n",
+    "dh.add_array('c', values_per_cell=2, layout='fzyx')\n",
+    "\n",
+    "diffusion_matrices = np.zeros([4, 2, 2])\n",
+    "diffusion_matrices[0] = config['Parameters']['da']\n",
+    "diffusion_matrices[1] = config['Parameters']['db']\n",
+    "diffusion_matrices[2] = config['Parameters']['dg']\n",
+    "diffusion_matrices[3] = config['Parameters']['dl']\n",
+    "\n",
+    "f = dh.fields\n",
+    "\n",
+    "#update_eqs = create_phi_update_equations(\n",
+    "#    f['phi_src'],\n",
+    "#    f['phi_dst'],\n",
+    "#    f['mu_src'],\n",
+    "#    free_energy,\n",
+    "#    config['Parameters'],\n",
+    "#    simplex_projection=True)\n",
+    "\n",
+    "update_eqs = create_mu_update_equations(\n",
+    "    f['phi_src'], f['phi_dst'], f['mu_src'], f['mu_dst'], free_energy,\n",
+    "    diffusion_matrices, config['Parameters'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "update_eqs = sympy_cse_on_assignment_list(update_eqs)\n",
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "update_eqs = merge_field_accesses(update_eqs)\n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "update_eqs = schedule_eqs(update_eqs)\n",
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = Symbol(\"a\")\n",
+    "b = Symbol(\"b\")\n",
+    "c = Symbol(\"c\")\n",
+    "d = Symbol(\"d\")\n",
+    "\n",
+    "\n",
+    "fake_eqs = [\n",
+    "    Assignment(a, sympy.Add(sympy.Mul(0.1, f['phi_src'][1, 0, 0](0)), sympy.Mul(0.1, f['phi_src'][0, 0, 0](0)))),\n",
+    "    Assignment(b, sympy.Add(sympy.Mul(0.1, f['phi_src'][-1, 0, 0](0)), sympy.Mul(0.1, f['phi_src'][0, 0, 0](0)))),\n",
+    "    Assignment(c, sympy.Add(sympy.Mul(0.1, f['phi_src'][0, 1, 0](0)), sympy.Mul(0.1, f['phi_src'][0, 0, 0](0)))),\n",
+    "    Assignment(d, sympy.Add(sympy.Mul(0.1, f['phi_src'][0, -1, 0](0)), sympy.Mul(0.1, f['phi_src'][0, 0, 0](0)))),\n",
+    "    Assignment(f['phi_dst'][0, 0, 0](0), sympy.Add(a, b, c, d))\n",
+    "]\n",
+    "\n",
+    "fake_eqs = schedule_eqs(atomize_eqs(merge_field_accesses(sympy_cse_on_assignment_list(fake_eqs))))\n",
+    "\n",
+    "for eq in fake_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def split_eqs(eqs):\n",
+    "    \n",
+    "    top_sym = []\n",
+    "    bottom_sym = [eq.lhs for eq in eqs]\n",
+    "    random.shuffle(bottom_sym)\n",
+    "\n",
+    "    #\n",
+    "\n",
+    "\n",
+    "    for n in range(0,10):\n",
+    "        forces = { sym : (len(bottom_sym)) / (len(bottom_sym) + len(top_sym))  for sym in bottom_sym }\n",
+    "        forces.update ({ sym : -(len(top_sym)) / (len(bottom_sym) + len(top_sym))  for sym in top_sym })\n",
+    "        for i in range(0, 2):\n",
+    "            for sym in bottom_sym + top_sym:\n",
+    "                if sym in bottom_sym:\n",
+    "                    new_force = (len(bottom_sym)) / (len(bottom_sym) + len(top_sym))\n",
+    "                if sym in top_sym:\n",
+    "                    new_force = -(len(top_sym)) / (len(bottom_sym) + len(top_sym))\n",
+    "                for n in used_nodes[sym]:\n",
+    "                    if n in top_sym:\n",
+    "                        new_force += forces[n]\n",
+    "                for n in used_by.get(sym, []):\n",
+    "                    new_force += forces[n]\n",
+    "                forces[sym] = new_force / 4\n",
+    "        \n",
+    "        strongest_upforce = 0\n",
+    "        strongest_upforced_node = None\n",
+    "        for f in bottom_sym:\n",
+    "             if forces[f] > strongest_upforce and set(used_nodes[f]) <= set(top_sym) and f in bottom_sym:\n",
+    "                strongest_upforce = forces[f]\n",
+    "                strongest_upforced_node = f\n",
+    "                \n",
+    "                \n",
+    "        strongest_downforce = 0\n",
+    "        strongest_downforced_node = None\n",
+    "        for f in top_sym:\n",
+    "            if forces[f] < strongest_downforce and set(used_by[f]) <= set(bottom_sym) and f in top_sym:\n",
+    "                strongest_downforce = forces[f]\n",
+    "                strongest_downforced_node = f        \n",
+    "        print(strongest_downforced_node)\n",
+    "        print(strongest_upforced_node)\n",
+    "        print()\n",
+    "            \n",
+    "        if (abs(strongest_downforce) < abs(strongest_upforce)):\n",
+    "            top_sym.append(strongest_upforced_node)\n",
+    "            bottom_sym.remove(strongest_upforced_node)\n",
+    "        else:\n",
+    "            bottom_sym.append(strongest_downforced_node)\n",
+    "            top_sym.remove(strongest_downforced_node)\n",
+    "    \n",
+    "    return top_sym, bottom_sym\n",
+    "\n",
+    "top_sym, bottom_sym = split_eqs(update_eqs)\n",
+    "\n",
+    "print(top_sym)\n",
+    "print(bottom_sym)\n",
+    "\n",
+    "graph = graphviz.Digraph(engine='dot')\n",
+    "\n",
+    "with graph.subgraph(name=\"cluster_top\") as c:\n",
+    "    for sym in top_sym:\n",
+    "        c.node(sym.name)\n",
+    "\n",
+    "with graph.subgraph(name=\"cluster_bottom\") as c:\n",
+    "    for sym in bottom_sym:\n",
+    "        c.node(sym.name)\n",
+    "    \n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    for arg in eq.rhs.atoms():\n",
+    "        if isinstance(arg, sympy.Symbol) and not isinstance(arg, Field.Access):\n",
+    "            graph.edge(arg.name, eq.lhs.name)\n",
+    "                \n",
+    "graph\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_ancestors(eqs, eq, used_nodes):\n",
+    "    ancestors = set()\n",
+    "    definitions = get_definitions(eqs)\n",
+    "    def walk_up(eq):\n",
+    "        for atom in used_nodes[eq.lhs]:\n",
+    "            if isinstance(atom, Symbol) and atom not in ancestors:\n",
+    "                ancestors.add(atom)\n",
+    "            if atom in definitions:\n",
+    "                walk_up(definitions[atom])\n",
+    "    \n",
+    "    walk_up(eq)\n",
+    "    return ancestors\n",
+    "\n",
+    "def get_leaving_edge_count(eqs, eq):\n",
+    "    used_nodes = get_used_nodes(eqs)\n",
+    "    ancestors = get_ancestors(eqs, eq, used_nodes)\n",
+    "    used_by = get_used_by(eqs)\n",
+    "    leaving_edges = 0\n",
+    "    accounted_edges = set()\n",
+    "    for anc in ancestors:\n",
+    "        for u in used_by[anc]:\n",
+    "            if u not in ancestors and u not in accounted_edges:\n",
+    "                leaving_edges += 1\n",
+    "                accounted_edges.add(u)\n",
+    "    return leaving_edges\n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    leaving_edges_count = get_leaving_edge_count(update_eqs, eq)\n",
+    "    if leaving_edges_count <= 2:\n",
+    "        print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "used_by = get_used_by(update_eqs)\n",
+    "definitions = get_definitions(update_eqs)\n",
+    "\n",
+    "def get_all_ancestors(eqs):\n",
+    "    all_ancestors = {}\n",
+    "    for eq in eqs:\n",
+    "        ancestors = set()\n",
+    "        for atom in eq.rhs.atoms():\n",
+    "            if isinstance(atom, Symbol) and atom not in ancestors:\n",
+    "                ancestors.add(atom)\n",
+    "            if atom in all_ancestors:\n",
+    "                ancestors.update(all_ancestors[atom])\n",
+    "        all_ancestors[eq] = ancestors\n",
+    "    return all_ancestors\n",
+    "        \n",
+    "def get_edge_cut_count(top_set, used_by, definitions):\n",
+    "    out_edge_count = 0\n",
+    "    for eq in top_set:\n",
+    "        for u in used_by[eq.lhs]:\n",
+    "            if definitions[u] not in top_set:\n",
+    "                out_edge_count += 1\n",
+    "                break\n",
+    "                \n",
+    "    return out_edge_count\n",
+    "        \n",
+    "def get_eligible_eqs(top_set, eqs, all_ancestors):\n",
+    "    eligible_eqs = set()\n",
+    "    for eq in eqs:\n",
+    "        if eq in top_set: continue\n",
+    "        ancestors = all_ancestors[eq]\n",
+    "        eligible = True\n",
+    "        for anc in ancestors:\n",
+    "            if not isinstance(anc, Field.Access) and not definitions[anc] in top_set:\n",
+    "                eligible = False\n",
+    "                break\n",
+    "        if eligible: \n",
+    "            eligible_eqs.add(eq)\n",
+    "    return eligible_eqs\n",
+    "            \n",
+    "    \n",
+    "\n",
+    "largest_top_set = dict()\n",
+    "used_by = get_used_by(update_eqs)\n",
+    "used_nodes = get_used_nodes(update_eqs)\n",
+    "\n",
+    "\n",
+    "\n",
+    "def all_top_sets(top_set, eligible_eqs):\n",
+    "    largest_top_set = copy.copy(top_set)\n",
+    "    eq_list = list(eligible_eqs)\n",
+    "    random.shuffle(eq_list)\n",
+    "    for e in list(eligible_eqs)[:50]:\n",
+    "        top_set.add(e)\n",
+    "        new_eligible_eqs = copy.copy(eligible_eqs)\n",
+    "        new_eligible_eqs.remove(e)\n",
+    "        for u in used_by[e.lhs]:\n",
+    "            eligible = True\n",
+    "            for atom in used_nodes[u]:\n",
+    "                if not isinstance(atom, Field.Access) and definitions[atom] not in top_set:                    \n",
+    "                    eligible = False\n",
+    "                    break\n",
+    "            if eligible: \n",
+    "                new_eligible_eqs.add(definitions[u])\n",
+    "        edge_cuts = get_edge_cut_count(top_set, used_by, definitions)\n",
+    "        if edge_cuts <= 3:\n",
+    "            new_top_set = all_top_sets(top_set, new_eligible_eqs)\n",
+    "            if len(new_top_set) > len(largest_top_set):\n",
+    "                largest_top_set = copy.copy(new_top_set)\n",
+    "\n",
+    "        top_set.remove(e)\n",
+    "    print(len(largest_top_set))\n",
+    "    return largest_top_set\n",
+    "\n",
+    "all_ancestors = get_all_ancestors(update_eqs)\n",
+    "used_by = get_used_by(update_eqs)\n",
+    "definitions = get_definitions(update_eqs)\n",
+    "top_set = set()\n",
+    "eligible_eqs = get_eligible_eqs(top_set, update_eqs, all_ancestors)\n",
+    "\n",
+    "\n",
+    "all_top_sets(top_set, eligible_eqs)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_edge_cut_change(top_set, node):\n",
+    "    edge_cut_change = 1\n",
+    "    for e in used_nodes[node]:\n",
+    "        if isinstance(e, Field.Access): continue\n",
+    "        cut = False\n",
+    "        for e2 in used_by[e]:\n",
+    "            if definitions[e2] not in top_set and e2 != node:\n",
+    "                cut = True\n",
+    "                break\n",
+    "        if not cut: edge_cut_change -= 1\n",
+    "    return edge_cut_change\n",
+    "\n",
+    "def get_new_eligible_eqs(old_top_set, old_eligible_eqs, e):\n",
+    "    new_eligible_eqs = copy.copy(old_eligible_eqs)\n",
+    "    new_eligible_eqs.remove(e)\n",
+    "    for u in used_by[e.lhs]:\n",
+    "        if isinstance(u, Field.Access): continue\n",
+    "        eligible = True\n",
+    "        for u2 in used_nodes[u]:\n",
+    "            if definitions[u2] not in old_top_set and not isinstance(u2, Field.Access) and u2 != e.lhs:\n",
+    "                eligible = False\n",
+    "        if eligible:\n",
+    "            new_eligible_eqs.add(definitions[u])\n",
+    "    return new_eligible_eqs\n",
+    "\n",
+    "            \n",
+    "\n",
+    "def largest_top_set(eqs, breadth):\n",
+    "    used_nodes = get_used_nodes(eqs)\n",
+    "    used_by = get_used_by(eqs)\n",
+    "    all_ancestors = get_all_ancestors(eqs)\n",
+    "    definitions = get_definitions(eqs)\n",
+    "    \n",
+    "    top_set_trace = []\n",
+    "    \n",
+    "    top_sets = set([(frozenset(), 0)])\n",
+    "    eligible_eqs_dict = { frozenset(): get_eligible_eqs(frozenset(), eqs, all_ancestors) }\n",
+    "    \n",
+    "    for i in range(0, 1200):\n",
+    "        candidates = []\n",
+    "        for top_set in top_sets:\n",
+    "            for e in eligible_eqs_dict[top_set[0]]:\n",
+    "                candidates.append((top_set[0], e, top_set[1] + get_edge_cut_change(top_set[0], e.lhs)))\n",
+    "        random.shuffle(candidates)\n",
+    "        candidates.sort(key=lambda c: c[2])\n",
+    "        \n",
+    "        top_sets = set()\n",
+    "        new_eligible_eqs_dict = {}\n",
+    "        for c in candidates[0:breadth]:\n",
+    "            new_top_set = frozenset(list(c[0]) + [c[1]] )\n",
+    "            top_sets.add( ( new_top_set, c[2]) )\n",
+    "            new_eligible_eqs_dict[new_top_set] = get_new_eligible_eqs(c[0], eligible_eqs_dict[c[0]], c[1])\n",
+    "       \n",
+    "        eligible_eqs_dict = new_eligible_eqs_dict\n",
+    "        \n",
+    "        top_set_trace.append((frozenset(list(c[0]) + [c[1]]), c[2] ))\n",
+    "    return top_set_trace\n",
+    "\n",
+    "\n",
+    "def trim_top_set(top_set):\n",
+    "    trimmed_list = list(top_set)\n",
+    "    trimmed_top_set = top_set\n",
+    "    for e in list(top_set):\n",
+    "        on_edge = True\n",
+    "        for u in used_by[e.lhs]:\n",
+    "            if definitions[u] in trimmed_top_set:\n",
+    "                on_edge = False\n",
+    "                break\n",
+    "        if not on_edge: continue\n",
+    "        delta = -1\n",
+    "        for u in used_nodes[e.lhs]:\n",
+    "            already_cut = False\n",
+    "            for u2 in used_by[u]:\n",
+    "                if definitions[u2] not in trimmed_top_set:\n",
+    "                    already_cut = True\n",
+    "            if not already_cut: delta += 1\n",
+    "        if delta <= 0:            \n",
+    "            trimmed_list.remove(e)\n",
+    "            trimmed_top_set = frozenset(trimmed_list)\n",
+    "    return trimmed_top_set\n",
+    "                    \n",
+    "        \n",
+    "            \n",
+    "\n",
+    "\n",
+    "top_sets = largest_top_set(update_eqs, 1)\n",
+    "plt.plot([t[1] for t in top_sets])\n",
+    "top_sets = largest_top_set(update_eqs, 4)\n",
+    "plt.plot([t[1] for t in top_sets])\n",
+    "top_sets = largest_top_set(update_eqs, 16)\n",
+    "plt.plot([t[1] for t in top_sets])\n",
+    "top_sets = largest_top_set(update_eqs, 1024)\n",
+    "plt.plot([t[1] for t in top_sets])\n",
+    "\n",
+    "#trimmed_top_sets = [(trim_top_set(t[0]), 0 ) for t in top_sets]\n",
+    "#retrimmed_top_sets = [(trim_top_set(t[0]), 0 ) for t in trimmed_top_sets]\n",
+    "#reretrimmed_top_sets = [(trim_top_set(t[0]), 0 ) for t in retrimmed_top_sets]\n",
+    "#rereretrimmed_top_sets = [(trim_top_set(t[0]), 0 ) for t in reretrimmed_top_sets]\n",
+    "#rerereretrimmed_top_sets = [(trim_top_set(t[0]), 0 ) for t in rereretrimmed_top_sets]\n",
+    "\n",
+    "\n",
+    "\n",
+    "#plt.plot([get_edge_cut_count(t[0], used_by, definitions) for t in trimmed_top_sets])\n",
+    "#plt.plot([get_edge_cut_count(t[0], used_by, definitions) for t in retrimmed_top_sets])\n",
+    "#plt.plot([get_edge_cut_count(t[0], used_by, definitions) for t in reretrimmed_top_sets])\n",
+    "#plt.plot([get_edge_cut_count(t[0], used_by, definitions) for t in rereretrimmed_top_sets])\n",
+    "\n",
+    "#plt.plot([len(t[0]) for t in trimmed_top_sets],  [get_edge_cut_count(t[0], used_by, definitions) for t in trimmed_top_sets])\n",
+    "#plt.plot([len(t[0]) for t in retrimmed_top_sets],  [get_edge_cut_count(t[0], used_by, definitions) for t in retrimmed_top_sets])\n",
+    "#plt.plot([len(t[0]) for t in reretrimmed_top_sets],  [get_edge_cut_count(t[0], used_by, definitions) for t in reretrimmed_top_sets])\n",
+    "#plt.plot([len(t[0]) for t in rereretrimmed_top_sets],  [get_edge_cut_count(t[0], used_by, definitions) for t in rereretrimmed_top_sets])\n",
+    "#plt.plot([len(t[0]) for t in rerereretrimmed_top_sets],  [get_edge_cut_count(t[0], used_by, definitions) for t in rerereretrimmed_top_sets])\n",
+    "\n",
+    "\n",
+    "            \n",
+    "plt.ylim(bottom=0)\n",
+    "plt.xlim(left=1)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Vertex:\n",
+    "    def __init__(self, name, nodes=[], parts = []):\n",
+    "        if nodes == []:\n",
+    "            self.nodes = [self]\n",
+    "        else:\n",
+    "            self.nodes = nodes\n",
+    "        self.name = name\n",
+    "        self.pred = []\n",
+    "        self.succ = []\n",
+    "        self.top_level = -1\n",
+    "        self.parts = parts\n",
+    "    def __str__(self):\n",
+    "        if self.name != \"\": return self.name\n",
+    "        eqstr = self.nodes[0].name\n",
+    "        ctr = 0\n",
+    "        for n in self.nodes[1:]:\n",
+    "            eqstr += \" + \" + n.name\n",
+    "            ctr += 1\n",
+    "            if ctr % int(math.sqrt(len(self.nodes)/2)+1) == 0:\n",
+    "                eqstr += \"\\n\"\n",
+    "        eqstr += \", \" + str(self.top_level)\n",
+    "        return eqstr\n",
+    "    \n",
+    "    def __repr__(self):\n",
+    "        return str(self)\n",
+    "    \n",
+    "def comp_top_level(vertices):  \n",
+    "    def rec_comp_top_level(v, top_level):\n",
+    "        v.top_level = top_level\n",
+    "        if top_level > 15: print(str(v) + \" \" + str(top_level))\n",
+    "        for s in v.succ:\n",
+    "            if top_level > 25: break\n",
+    "            if top_level >= s.top_level or s.top_level == -1:\n",
+    "                rec_comp_top_level(s, top_level + 1)\n",
+    "\n",
+    "    for v in vertices:\n",
+    "        if len(v.pred) == 0:\n",
+    "            rec_comp_top_level(v, 0)\n",
+    "        \n",
+    "def comp_top_level2(vertices):  \n",
+    "    pass\n",
+    "    done_vertices = set()\n",
+    "    rem_vertices = set(vertices)\n",
+    "    \n",
+    "    top_level = 0\n",
+    "    \n",
+    "    while(len(rem_vertices) > 0):\n",
+    "        for v in list(rem_vertices):\n",
+    "            ready = True\n",
+    "            for p in v.pred:\n",
+    "                if p not in done_vertices:\n",
+    "                    ready = False\n",
+    "                    break\n",
+    "            if ready:\n",
+    "                v.top_level = top_level\n",
+    "                top_level += 1\n",
+    "                rem_vertices.remove(v)\n",
+    "                done_vertices.add(v)\n",
+    "                        \n",
+    "def build_vertices(eqs):\n",
+    "    vertices = {}\n",
+    "    for eq in eqs:\n",
+    "        new_vertex = Vertex(str(eq.lhs))\n",
+    "        for atom in eq.rhs.atoms(Symbol):\n",
+    "            if not isinstance(atom, Field.Access): new_vertex.pred.append(vertices[atom])\n",
+    "        vertices[eq.lhs] = new_vertex\n",
+    "    for eq in eqs:\n",
+    "        for atom in eq.rhs.atoms(Symbol):\n",
+    "            if atom in vertices:\n",
+    "                vertices[atom].succ.append(vertices[eq.lhs])\n",
+    "    \n",
+    "    comp_top_level(list(vertices.values()))\n",
+    "    return vertices\n",
+    "\n",
+    "def copy_vertices(vertices):\n",
+    "    translation_dict = {}\n",
+    "    new_vertices = []\n",
+    "    for v in vertices:\n",
+    "        new_vertex = Vertex(v.name, v.nodes, [v])\n",
+    "        translation_dict[v] = new_vertex\n",
+    "        new_vertices.append(new_vertex)\n",
+    "    for v in vertices:\n",
+    "        translation_dict[v].pred = [translation_dict[p] for p in v.pred]\n",
+    "        translation_dict[v].succ = [translation_dict[s] for s in v.succ]\n",
+    "    return new_vertices, translation_dict\n",
+    "        \n",
+    "        \n",
+    "\n",
+    "\n",
+    "def comp_coarse_graph_matching(vertices):\n",
+    "    match = set()\n",
+    "    mark = { v : False for v in vertices }\n",
+    "    for u in vertices:\n",
+    "        if mark[u]: continue\n",
+    "            \n",
+    "        edges = u.succ + u.pred\n",
+    "        edges.sort(key = lambda v : - len(v.nodes))\n",
+    "        for v in u.succ + u.pred:\n",
+    "            if mark[v]: continue        \n",
+    "\n",
+    "            #print(str(u) + \" \" + str(v))\n",
+    "            if v in u.pred:\n",
+    "                if v.top_level != u.top_level-1 and len(v.succ) != 1 and len(u.pred) != 1:\n",
+    "                    continue\n",
+    "                match.add((v,u))\n",
+    "                for w in v.succ:\n",
+    "                    if v.top_level == w.top_level -1:\n",
+    "                        mark[w] = True\n",
+    "            else:\n",
+    "                if u.top_level != v.top_level-1 and len(v.pred) != 1 and len(u.succ) != 1:\n",
+    "                    continue\n",
+    "                match.add((u,v))\n",
+    "                for w in u.succ:\n",
+    "                    if u.top_level == w.top_level -1:\n",
+    "                        mark[w] = True\n",
+    "            mark[u] = True\n",
+    "            mark[v] = True\n",
+    "            break\n",
+    "    return match\n",
+    "\n",
+    "def comp_coarse_vertices(vertices, match):\n",
+    "    coarse_vertices, translation_dict = copy_vertices(vertices)\n",
+    "    \n",
+    "    for m in match:\n",
+    "\n",
+    "        u = translation_dict[m[0]]\n",
+    "        v = translation_dict[m[1]]\n",
+    "        \n",
+    "\n",
+    "        coarse_vertices.remove(u)\n",
+    "        coarse_vertices.remove(v)\n",
+    "        cv = Vertex(\"\", u.nodes + v.nodes, [m[0],m[1]] )\n",
+    "        coarse_vertices.append(cv)\n",
+    "    \n",
+    "        cv.pred = list(set(u.pred + v.pred))\n",
+    "        cv.succ = list(set(u.succ + v.succ))\n",
+    "        cv.pred.remove(u)\n",
+    "        cv.succ.remove(v)\n",
+    "                \n",
+    "        if u in cv.succ: cv.succ.remove(u)\n",
+    "        if v in cv.pred: cv.pred.remove(v)\n",
+    "        \n",
+    "        preds = u.pred + v.pred\n",
+    "        succs = u.succ + v.succ\n",
+    "    \n",
+    "        for p in cv.pred:\n",
+    "            if u in p.succ: p.succ.remove(u)\n",
+    "            if v in p.succ: p.succ.remove(v)\n",
+    "            if cv not in p.succ: p.succ.append(cv)\n",
+    "            \n",
+    "        for p in cv.succ:\n",
+    "            if u in p.pred: p.pred.remove(u)\n",
+    "            if v in p.pred: p.pred.remove(v)\n",
+    "            if cv not in p.pred: p.pred.append(cv)\n",
+    "                    \n",
+    "    comp_top_level(coarse_vertices)\n",
+    "    return coarse_vertices\n",
+    "        \n",
+    "def compute_edge_cost(vertices):\n",
+    "    edge_costs = {}\n",
+    "    for u in vertices:\n",
+    "        for v in u.succ:\n",
+    "            req_nodes = set()\n",
+    "            for n in v.nodes: req_nodes.update(n.pred)\n",
+    "            av_nodes = req_nodes.intersection(set(u.nodes))\n",
+    "            edge_costs[(u,v)] = len(av_nodes)\n",
+    "    return edge_costs\n",
+    "            \n",
+    "            \n",
+    "            \n",
+    "    \n",
+    "def make_graphviz(vertices):\n",
+    "    graph = graphviz.Digraph(engine='dot')\n",
+    "    for v in vertices:\n",
+    "        for p in v.pred:\n",
+    "            graph.edge(str(p), str(v), spline=\"none\")\n",
+    "    return graph\n",
+    "\n",
+    "def make_dual_graphviz(vertices):\n",
+    "    graph = graphviz.Digraph(engine='dot')\n",
+    "    for v in vertices:\n",
+    "        for p in v.pred:\n",
+    "            graph.edge(str(p), str(v))\n",
+    "        for p in v.succ:\n",
+    "            graph.edge(str(v), str(p), arrowhead=\"odot\")\n",
+    "    return graph\n",
+    "\n",
+    "def make_graphviz_topset(vertices, topset):\n",
+    "    graph = graphviz.Digraph(engine='dot')\n",
+    "            \n",
+    "    with graph.subgraph(name=\"cluster_top\") as c:\n",
+    "        for v in topset:\n",
+    "            c.node(str(v), color=\"azure2\", style=\"filled\")\n",
+    "    with graph.subgraph(name=\"cluster_bottom\") as c:        \n",
+    "        for v in vertices:\n",
+    "            if v not in topset:\n",
+    "                c.node(str(v))\n",
+    "                \n",
+    "    for v in vertices:\n",
+    "        for p in v.pred:\n",
+    "            graph.edge(str(p), str(v), spline=\"none\")\n",
+    "    return graph\n",
+    "\n",
+    "\n",
+    "def make_graphviz_with_edge_cost(vertices, edge_cost):\n",
+    "    graph = graphviz.Digraph(engine='dot')\n",
+    "    for v in vertices:\n",
+    "        for p in v.pred:\n",
+    "            graph.edge(str(p), str(v), taillabel=str(edge_cost[(p,v)]))\n",
+    "    return graph\n",
+    "\n",
+    "def visualize_matching(vertices, match):\n",
+    "    graph = graphviz.Digraph(engine='dot')\n",
+    "    for v in vertices:\n",
+    "        for p in v.pred:\n",
+    "            if (p, v) in match:\n",
+    "                graph.edge(str(p), str(v), color=\"red\", weight=\"200\")\n",
+    "            else:\n",
+    "                graph.edge(str(p), str(v), weight = \"1\")\n",
+    "    return graph\n",
+    "\n",
+    "def visualize_matching2(vertices, match):\n",
+    "    graph = graphviz.Digraph(engine='dot')\n",
+    "    for m in match:\n",
+    "        v = m[1]\n",
+    "        for p in v.pred:\n",
+    "            if (p, v) in match:\n",
+    "                graph.edge(str(p), str(v), color=\"red\", weight=\"200\")\n",
+    "            else:\n",
+    "                graph.edge(str(p), str(v), weight=\"1\")\n",
+    "        for p in v.succ:\n",
+    "            if (v, p) in match:\n",
+    "                graph.edge(str(v), str(p), color=\"red\", weight=\"200\")\n",
+    "            else:\n",
+    "                graph.edge(str(v), str(p), weight=\"1\")\n",
+    "    return graph\n",
+    "\n",
+    "\n",
+    "vertices = list(build_vertices(update_eqs).values())\n",
+    "#random.shuffle(vertices)\n",
+    "\n",
+    "\n",
+    "#vlevels = [vertices]\n",
+    "\n",
+    "vlevels = []\n",
+    "vlevels.append(vertices)\n",
+    "print(len(vlevels[-1]))\n",
+    "for i in range(1,25):\n",
+    "    vlevels[-1].sort(key = lambda v: len(v.nodes))\n",
+    "    match = comp_coarse_graph_matching(vlevels[-1])\n",
+    "    vlevels.append(comp_coarse_vertices(vlevels[-1], match))\n",
+    "    print(len(vlevels[-1]))\n",
+    "\n",
+    "edge_costs = [compute_edge_cost(vlevel) for vlevel in vlevels]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def refine_topset(vertices, topset, target_size):\n",
+    "    while(True):\n",
+    "        topset_symbols = sum( [v.nodes for v in topset], [])\n",
+    "        reqs = {}\n",
+    "        for v in vertices:\n",
+    "            if v in topset: continue\n",
+    "            for n in v.nodes:\n",
+    "                for p in n.pred:\n",
+    "                    if p not in topset_symbols: continue\n",
+    "                    if p not in reqs: reqs[p] = 0\n",
+    "                    reqs[p] += 1\n",
+    "            \n",
+    "        edges = 0\n",
+    "        for t in topset:\n",
+    "            for n in t.nodes:\n",
+    "                if n in reqs:\n",
+    "                    edges += 1\n",
+    "\n",
+    "        print(\"edges: \" + str(edges))\n",
+    "        print(\"size: \" + str(len(topset_symbols)))\n",
+    "        \n",
+    "        best_v = None\n",
+    "        best_gain = -1\n",
+    "        best_direction = 0\n",
+    "        random.shuffle(vertices)\n",
+    "        for v in vertices:\n",
+    "            if v in topset:\n",
+    "                free = True\n",
+    "                for s in v.succ:\n",
+    "                    if s in topset: \n",
+    "                        free = False\n",
+    "                        break\n",
+    "                if not free: continue\n",
+    "                edge_inc = len(set.union(*[set(n.pred) for n in v.nodes]) - set(v.nodes))\n",
+    "                edge_dec = sum([n in reqs for n in v.nodes])\n",
+    "                edge_delta = edge_inc - edge_dec\n",
+    "                size_delta = -len(v.nodes)\n",
+    "                direction = -1\n",
+    "            else:\n",
+    "                free = True\n",
+    "                for p in v.pred:\n",
+    "                    if p not in topset: \n",
+    "                        free = False\n",
+    "                        break\n",
+    "                if not free: continue\n",
+    "                edge_inc = len(set.union(*[set(n.pred) for s in v.succ for n in s.nodes]).intersection(set(v.nodes)))\n",
+    "                edge_dec = sum([reqs.get(r, 0) == 1 for r in set.union(*[set(n.pred) for n in v.nodes]) ])\n",
+    "                edge_delta = edge_inc - edge_dec\n",
+    "                size_delta = len(v.nodes)\n",
+    "                direction = 1\n",
+    "            \n",
+    "            topset_size = sum([len(t.nodes) for t in topset])\n",
+    "            size_gain = - min(0, (topset_size - target_size) / target_size) + min(0, (topset_size - target_size + size_delta) / target_size)   # ((topset_size - target_size) / target_size)**2  - ((topset_size - target_size + size_delta) / target_size)**2\n",
+    "            if topset_size == 0:\n",
+    "                edge_gain = - edge_delta / size_delta\n",
+    "            elif topset_size + size_delta == 0:\n",
+    "                edge_gain = -10    \n",
+    "            else:\n",
+    "                edge_gain = edges / topset_size - (edges + edge_delta) / (topset_size + size_delta)\n",
+    "            #print(v)\n",
+    "            #print(edge_gain)\n",
+    "            #print(size_gain)\n",
+    "            #print()\n",
+    "            total_gain =  edge_gain + size_gain\n",
+    "            if total_gain > best_gain:\n",
+    "                best_gain = total_gain\n",
+    "                best_v = v\n",
+    "                best_direction = direction\n",
+    "                \n",
+    "        print(best_gain)\n",
+    "        print(best_v)\n",
+    "        print(best_direction)\n",
+    "        print()\n",
+    "        \n",
+    "\n",
+    "        if best_v in topset:\n",
+    "            topset.remove(best_v)\n",
+    "        else:\n",
+    "            topset.append(best_v)\n",
+    "\n",
+    "        if best_gain < 0:\n",
+    "            break\n",
+    "                        \n",
+    "    return topset\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "topset = refine_topset(vlevels[-1], [], 50)\n",
+    "\n",
+    "\n",
+    "for l in range(-2, -26, -1):\n",
+    "    print(\"New level\")\n",
+    "    topset = sum([t.parts for t in topset], [])\n",
+    "    topset = refine_topset(vlevels[l], topset, 50)    \n",
+    "    \n",
+    "make_graphviz_topset(vlevels[l], topset)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "celltoolbar": "Raw Cell Format",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/liveness_evo_opt.ipynb b/pystencils_tests/liveness_opts/liveness_evo_opt.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..afb35f23d6375647352e35da11a8b37b7850f279
--- /dev/null
+++ b/pystencils_tests/liveness_opts/liveness_evo_opt.ipynb
@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "\n",
+    "#%load_ext autoreload\n",
+    "#%autoreload 1\n",
+    "#%aimport pystencils.simp.liveness_opts\n",
+    "#%aimport pystencils.simp.liveness_opts_exp\n",
+    "#%aimport pystencils.simp.liveness_permutations\n",
+    "\n",
+    "\n",
+    "#%load_ext line_profiler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lbmpy.session import *\n",
+    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from pygrandchem_tests.config2 import get_system\n",
+    "from pygrandchem_tests.config import get_system as get_system_simple\n",
+    "from pystencils.datahandling import SerialDataHandling\n",
+    "from pygrandchem.grandchem_generation import *\n",
+    "from pygrandchem.chemicalpotential import *\n",
+    "from pystencils import show_code, Field\n",
+    "from sympy import Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "import matplotlib.pyplot as plt\n",
+    "from pystencils.backends.cbackend import generate_c\n",
+    "import pycuda.driver as drv\n",
+    "\n",
+    "import graphviz\n",
+    "\n",
+    "from pystencils.simp.liveness_permutations import *\n",
+    "\n",
+    "import pycuda\n",
+    "import pycuda.autoinit  # NOQA\n",
+    "from pycuda.compiler import SourceModule\n",
+    "\n",
+    "import sys\n",
+    "from subprocess import run, PIPE\n",
+    "\n",
+    "sys.setrecursionlimit(100000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_system()\n",
+    "free_energy = FreeEnergy(config['FreeEnergy'], config['Temperature'])\n",
+    "\n",
+    "dh = SerialDataHandling((512, 512, 128), periodicity=(True, True, False))\n",
+    "f = dh.fields\n",
+    "dh.add_array('phi_src', values_per_cell=4, layout='fzyx', gpu=True)\n",
+    "dh.add_array('mu_src', values_per_cell=2, layout='fzyx', gpu=True)\n",
+    "dh.add_array_like('phi_dst', 'phi_src', gpu=True)\n",
+    "dh.add_array_like('mu_dst', 'mu_src', gpu=True)\n",
+    "\n",
+    "dh.add_array('c', values_per_cell=2, layout='fzyx', gpu=True)\n",
+    "\n",
+    "# In[34]:\n",
+    "\n",
+    "diffusion_matrices = np.zeros([4, 2, 2])\n",
+    "diffusion_matrices[0] = config['Parameters']['da']\n",
+    "diffusion_matrices[1] = config['Parameters']['db']\n",
+    "diffusion_matrices[2] = config['Parameters']['dg']\n",
+    "diffusion_matrices[3] = config['Parameters']['dl']\n",
+    "\n",
+    "f = dh.fields\n",
+    "\n",
+    "mu_eqs = create_mu_update_equations(\n",
+    "    f['phi_src'], f['phi_dst'], f['mu_src'], f['mu_dst'], free_energy,\n",
+    "    diffusion_matrices, config['Parameters'])\n",
+    "\n",
+    "phi_eqs = create_phi_update_equations(\n",
+    "    f['phi_src'],\n",
+    "    f['phi_dst'],\n",
+    "    f['mu_src'],\n",
+    "    free_energy,\n",
+    "    config['Parameters'],\n",
+    "    simplex_projection=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample = livenessOptSequence()\n",
+    "sample.opts = [[scheduleEqs, [6]], [fuseFMAs, [1]], [scrambleEqs, [1000]],\n",
+    "               [moveBackward, []], [refuseEqs, [1,1]],\n",
+    "               [mergeFieldAccesses, []], [refuseEqs, [4,3]]]\n",
+    "\n",
+    "sample2 = livenessOptSequence()\n",
+    "sample2.opts = [[scheduleEqs, [6]], [mergeFieldAccesses, []],\n",
+    "                [moveBackward, []], [refuseEqs, [1, 1]],\n",
+    "                [fuseFMAs, [1]], [scrambleEqs, [968]],\n",
+    "                [refuseEqs, [4, 3]], [moveBackward, []],\n",
+    "                [scrambleEqs, [1114]], [fuseFMAs, [1]], [fuseSubs, []] ]\n",
+    "\n",
+    "sample3 = livenessOptSequence()\n",
+    "sample3.opts = [[scheduleEqs, [1]], [mergeFieldAccesses, []], [refuseEqs, [1, 3]] ]\n",
+    "\n",
+    "sample4 = livenessOptSequence()\n",
+    "sample4.opts = [[scheduleEqs, [1]], [mergeFieldAccesses, []], [scrambleEqs, [1000]], [refuseEqs, [1, 3]] ]\n",
+    "\n",
+    "sample5 = livenessOptSequence()\n",
+    "sample5.opts = [[scheduleEqs, [10]], [mergeFieldAccesses, []], [refuseEqs, [2, 3]] ]\n",
+    "\n",
+    "\n",
+    "pop = [sample, sample2, sample3, sample4, sample5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "while True:\n",
+    "    pop = evolvePopulation(pop, [mu_eqs, phi_eqs], dh)\n",
+    "    print()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/liveness_evo_opt.py b/pystencils_tests/liveness_opts/liveness_evo_opt.py
new file mode 100644
index 0000000000000000000000000000000000000000..c13042672ea1e011a52b49c473b69897e9ff3a89
--- /dev/null
+++ b/pystencils_tests/liveness_opts/liveness_evo_opt.py
@@ -0,0 +1,195 @@
+# coding: utf-8
+
+# In[32]:
+
+
+import pickle
+import warnings
+import pystencils as ps
+from pygrandchem.grandchem import GrandChemGenerator
+from pygrandchem.scenarios import system_4_2, system_3_1
+from pygrandchem.initialization import init_boxes, smooth_fields
+from pygrandchem.scenarios import benchmark_configs
+
+from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational
+from pystencils.simp import sympy_cse_on_assignment_list
+from pystencils.simp.liveness_opts import *
+from pystencils.simp.liveness_opts_exp import *
+
+from pystencils.simp.liveness_permutations import *
+
+
+import pycuda
+
+import sys
+from subprocess import run, PIPE
+
+from pystencils import show_code
+import pycuda.driver as drv
+
+
+configs = benchmark_configs()
+
+def get_config(name):
+    return configs[name]
+
+
+domain_size = (512, 512, 128)
+periodicity = (True, True, False)
+
+optimization = {'gpu_indexing_params': {"block_size": (32, 4, 2)}}
+
+
+
+if len(sys.argv) < 4:
+    print("Kernel, config and div/sqrt arguments are required")
+    exit()
+
+config = get_config(sys.argv[2])
+print ("optimizing for config " + sys.argv[2])
+
+approx = False
+if sys.argv[3] == "true":
+    approx = True
+
+
+phases, components = config['Parameters']['phases'], config['Parameters']['components']
+format_args = {'p': phases, 'c': components, 's': ','.join(str(e) for e in domain_size)}
+
+# Adding fields
+dh = ps.create_data_handling(domain_size, periodicity=periodicity, default_target='gpu')
+f = dh.fields
+phi_src = dh.add_array(
+    'phi_src', values_per_cell=config['Parameters']['phases'], layout='fzyx', latex_name='phi_s')
+mu_src = dh.add_array(
+    'mu_src', values_per_cell=config['Parameters']['components'], layout='fzyx', latex_name="mu_s")
+mu_stag = dh.add_array(
+    'mu_stag', values_per_cell=(dh.dim, config['Parameters']['components']), layout='f')
+phi_stag = dh.add_array('phi_stag', values_per_cell=(dh.dim, phases), layout='f')
+
+phi_dst = dh.add_array_like('phi_dst', 'phi_src')
+mu_dst = dh.add_array_like('mu_dst', 'mu_src')
+
+gc = GrandChemGenerator(
+    phi_src,
+    phi_dst,
+    mu_src,
+    mu_dst,
+    config['FreeEnergy'],
+    config['Parameters'],
+    #conc=c,
+    mu_staggered=mu_stag,
+    phi_staggered=phi_stag,
+    use_block_offsets=False,
+    compile_kernel=False,
+    fast_divisions=approx,
+    fast_sqrts=approx)
+
+mu_full_eqs = gc.mu_full()
+phi_full_eqs = gc.phi_full()
+
+
+staggered_params = None
+
+
+if sys.argv[1] == "phi_full":
+    eqs = phi_full_eqs
+
+elif sys.argv[1] == "mu_full":
+    eqs = mu_full_eqs
+elif sys.argv[1] == "mu_partial1":
+    staggered_params = gc.mu_partial1()
+elif sys.argv[1] == "mu_partial2":
+    eqs = gc.mu_partial2()
+elif sys.argv[1] == "phi_partial1":
+    staggered_params = gc.phi_partial1()
+elif sys.argv[1] == "phi_partial2":
+    eqs = gc.phi_partial2()
+else:
+    print("Specified kernel does not exist")
+    exit()
+
+
+if not staggered_params is None:
+    eqs = unpack_staggered_eqs(*staggered_params)
+
+
+phi_kernel = ps.create_kernel(phi_full_eqs, target='gpu', **optimization).compile()
+mu_kernel = ps.create_kernel(mu_full_eqs, target='gpu', **optimization).compile()
+
+c = dh.add_array('c', values_per_cell=config['Parameters']['components'], layout='fzyx', gpu=False)
+
+init_boxes(dh)
+#initialize_concentration_field(dh, free_energy, config['Parameters']['initial_concentration'])
+smooth_fields(dh, sigma=0.4, iterations=5, dim=dh.dim)
+dh.synchronization_function(['phi_src', 'phi_dst', 'mu_src', 'mu_dst'])()
+print(dh)
+
+
+def bench_kernels(mu_kernel, phi_kernel):
+
+    start = drv.Event()
+    end = drv.Event()
+
+    dh.run_kernel(mu_kernel, timestep=1)
+    start.record()
+    dh.run_kernel(mu_kernel, timestep=1)
+    dh.run_kernel(mu_kernel, timestep=1)
+    end.record()
+    end.synchronize()
+    msec = start.time_till(end) / 2
+    print("mu_kernel: {}  {:5.3f} ms".format(mu_kernel.num_regs, msec))
+
+    dh.run_kernel(phi_kernel, timestep=1)
+    start.record()
+    dh.run_kernel(phi_kernel, timestep=1)
+    dh.run_kernel(phi_kernel, timestep=1)
+    end.record()
+    end.synchronize()
+    msec = start.time_till(end) / 2
+    print("phi_kernel: {}  {:5.3f} ms".format(phi_kernel.num_regs, msec))
+
+
+
+print("warmup")
+bench_kernels(mu_kernel, phi_kernel)
+dh.swap('mu_src', 'mu_dst')
+dh.swap('phi_src', 'phi_dst')
+
+
+
+
+
+
+
+bestSeqs = pickle.load(open('best_seq.pickle', 'rb'))
+
+#bestSeqs= {}
+#bestSeqs[( "mu_full", "42_fixT")] =  livenessOptSequence([ [schedule_eqs, [12]], [duplicate_trivial_ops, [1, 1]], [refuse_eqs, [0, 1]],  [var_to_shmem_lt, [3]]],  (64, 4, 2))
+
+#bestSeqs[("phi_full", "42_fixT")] = livenessOptSequence([[schedule_eqs, [34]]],  (16, 2, 2))
+
+
+
+
+if (sys.argv[1], sys.argv[2], sys.argv[3]) in bestSeqs:
+    pop = [bestSeqs[(sys.argv[1], sys.argv[2], sys.argv[3])]]
+elif (sys.argv[1], sys.argv[2]) in bestSeqs:
+    pop = [bestSeqs[(sys.argv[1], sys.argv[2])]]
+else:
+    pop = [livenessOptSequence()]
+
+print("Loaded seq: " + str(pop[0]) + "\n")
+
+
+
+
+while True:
+    pop = evolvePopulation(pop, [eqs], dh, staggered_params)
+    bench_kernels(mu_kernel, phi_kernel)
+
+    bestSeqs = pickle.load(open('best_seq.pickle', 'rb'))
+    bestSeqs[(sys.argv[1], sys.argv[2], sys.argv[3])] = pop[0]
+    pickle.dump(bestSeqs, open('best_seq.pickle', 'wb'))
+    print()
+
diff --git a/pystencils_tests/liveness_opts/reorder_depth.ipynb b/pystencils_tests/liveness_opts/reorder_depth.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e77ffaab3d2ac2404a2ef47101beb67d9fbc821f
--- /dev/null
+++ b/pystencils_tests/liveness_opts/reorder_depth.ipynb
@@ -0,0 +1,293 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "\n",
+    "#%load_ext autoreload\n",
+    "#%autoreload 1\n",
+    "#%aimport pystencils.simp.liveness_opts\n",
+    "#%aimport pystencils.simp.liveness_opts_exp\n",
+    "\n",
+    "#%load_ext line_profiler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lbmpy.session import *\n",
+    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from pygrandchem_tests.config2 import get_system\n",
+    "from pygrandchem_tests.config import get_system as get_system_simple\n",
+    "from pystencils.datahandling import SerialDataHandling\n",
+    "from pygrandchem.grandchem_generation import *\n",
+    "from pygrandchem.chemicalpotential import *\n",
+    "from pystencils import show_code, Field\n",
+    "from sympy import Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "import matplotlib.pyplot as plt\n",
+    "from pystencils.backends.cbackend import generate_c\n",
+    "import pycuda.driver as drv\n",
+    "\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "import graphviz\n",
+    "\n",
+    "\n",
+    "import pycuda\n",
+    "import pycuda.autoinit  # NOQA\n",
+    "from pycuda.compiler import SourceModule\n",
+    "\n",
+    "import sys\n",
+    "from subprocess import run, PIPE\n",
+    "\n",
+    "sys.setrecursionlimit(100000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def bench_kernel(kernel):\n",
+    "    \n",
+    "    start = drv.Event()\n",
+    "    end = drv.Event()\n",
+    "\n",
+    "    dh.run_kernel(kernel)\n",
+    "    start.record()\n",
+    "    dh.run_kernel(kernel)\n",
+    "    end.record()\n",
+    "    end.synchronize()\n",
+    "    msec = start.time_till(end)\n",
+    "\n",
+    "    return msec\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_system()\n",
+    "free_energy = FreeEnergy(config['FreeEnergy'], config['Temperature'])\n",
+    "\n",
+    "dh = SerialDataHandling((128, 128, 128), periodicity=(True, True, False))\n",
+    "f = dh.fields\n",
+    "dh.add_array('phi_src', values_per_cell=4, layout='fzyx', gpu=True)\n",
+    "dh.add_array('mu_src', values_per_cell=2, layout='fzyx', gpu=True)\n",
+    "dh.add_array_like('phi_dst', 'phi_src', gpu=True)\n",
+    "dh.add_array_like('mu_dst', 'mu_src', gpu=True)\n",
+    "\n",
+    "dh.add_array('c', values_per_cell=2, layout='fzyx', gpu=True)\n",
+    "\n",
+    "# In[34]:\n",
+    "\n",
+    "diffusion_matrices = np.zeros([4, 2, 2])\n",
+    "diffusion_matrices[0] = config['Parameters']['da']\n",
+    "diffusion_matrices[1] = config['Parameters']['db']\n",
+    "diffusion_matrices[2] = config['Parameters']['dg']\n",
+    "diffusion_matrices[3] = config['Parameters']['dl']\n",
+    "\n",
+    "f = dh.fields\n",
+    "\n",
+    "eqs = create_mu_update_equations(\n",
+    "    f['phi_src'], f['phi_dst'], f['mu_src'], f['mu_dst'], free_energy,\n",
+    "    diffusion_matrices, config['Parameters'])\n",
+    "\n",
+    "#eqs = create_phi_update_equations(\n",
+    "#    f['phi_src'],\n",
+    "#    f['phi_dst'],\n",
+    "#    f['mu_src'],\n",
+    "#    free_energy,\n",
+    "#    config['Parameters'],\n",
+    "#    simplex_projection=True)\n",
+    "\n",
+    "update_eqs = sympy_cse_on_assignment_list(eqs)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n = 0\n",
+    "ana_regs_result = []\n",
+    "nvcc_regs_result = []\n",
+    "time_result = []\n",
+    "while n <= 16:\n",
+    "    ana_regs_result.append([])\n",
+    "    nvcc_regs_result.append([])\n",
+    "    time_result.append([])\n",
+    "    for i in range(0, 5):\n",
+    "        if n != 0:\n",
+    "            rescheduled_eqs = var_to_shmem(duplicate_trivial_ops(schedule_eqs(update_eqs, n)), 1)\n",
+    "        else:\n",
+    "            rescheduled_eqs = var_to_shmem(duplicate_trivial_ops(update_eqs), 1)\n",
+    "        aliveAtPeak, aliveRegs = liveness_analysis(rescheduled_eqs)\n",
+    "        ana_regs_result[-1].append(aliveRegs)\n",
+    "\n",
+    "        kernel = create_kernel(\n",
+    "            rescheduled_eqs,\n",
+    "            target=\"gpu\",\n",
+    "            gpu_indexing_params={\n",
+    "                \"block_size\": (64, 2, 1)\n",
+    "            }).compile()\n",
+    "\n",
+    "        nvcc_regs_result[-1].append(kernel.num_regs)\n",
+    "        time = bench_kernel(kernel)\n",
+    "        time_result[-1].append(time)\n",
+    "        \n",
+    "        print(str(aliveRegs) + \" \" + str(kernel.num_regs) + \" \" + str(time) )\n",
+    "    print()\n",
+    "    n = max(n*2, n+1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, ax1 = plt.subplots()\n",
+    "\n",
+    "medians = []\n",
+    "for idx, r in enumerate(ana_regs_result):\n",
+    "    r.sort()\n",
+    "    medians.append(r[len(r)//2])\n",
+    "    ax1.plot( [idx] * len(r), r, \"o\", color=\"C0\")\n",
+    "ax1.plot(medians, color = \"C0\")\n",
+    "\n",
+    "ax2 = ax1.twinx()\n",
+    "\n",
+    "medians = []\n",
+    "for idx, r in enumerate(nvcc_regs_result):\n",
+    "    r.sort()\n",
+    "    medians.append(r[len(r)//2])\n",
+    "    ax2.plot( [idx] * len(r), r, \"x\", color=\"C1\")\n",
+    "ax2.plot(medians, color = \"C1\")\n",
+    "\n",
+    "medians = []\n",
+    "for idx, r in enumerate(time_result):\n",
+    "    r = [r/15 for r in r]\n",
+    "    r.sort()\n",
+    "    medians.append(r[len(r)//2])\n",
+    "    ax1.plot( [idx] * len(r), r, \"+\", color=\"C2\")\n",
+    "ax1.plot(medians, color = \"C2\")\n",
+    "\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "reorder_depths = [0, 1, 4 , 16, 32, 64]\n",
+    "shmem_counts = [0, 1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]\n",
+    "\n",
+    "nvcc_regs = np.zeros((len(reorder_depths), len(shmem_counts)))\n",
+    "times = np.zeros((len(reorder_depths), len(shmem_counts)))\n",
+    "\n",
+    "for didx, reorder_depth in enumerate(reorder_depths):\n",
+    "    for sidx, shmem_count in enumerate(shmem_counts):\n",
+    "        for i in range(0, 1):\n",
+    "            if reorder_depths != 0:\n",
+    "                rescheduled_eqs = var_to_shmem(duplicate_trivial_ops(schedule_eqs(update_eqs, reorder_depth)), shmem_count)\n",
+    "            else:\n",
+    "                rescheduled_eqs = var_to_shmem(duplicate_trivial_ops(update_eqs), shmem_count)\n",
+    "                \n",
+    "            aliveAtPeak, aliveRegs = liveness_analysis(rescheduled_eqs)\n",
+    "\n",
+    "            kernel = create_kernel(\n",
+    "                rescheduled_eqs,\n",
+    "                target=\"gpu\",\n",
+    "                gpu_indexing_params={\n",
+    "                    \"block_size\": (64, 2, 1)\n",
+    "                }).compile()\n",
+    "\n",
+    "        time = bench_kernel(kernel)\n",
+    "        nvcc_regs[didx, sidx] = kernel.num_regs\n",
+    "        times[didx, sidx] = time\n",
+    "        \n",
+    "        print(str(aliveRegs) + \" \" + str(kernel.num_regs) + \" \" + str(time) )\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.imshow(nvcc_regs)\n",
+    "plt.colorbar()\n",
+    "plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.imshow(times)\n",
+    "plt.colorbar()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/sum_tree.ipynb b/pystencils_tests/liveness_opts/sum_tree.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..2f9a84465d8a4d14bbc5186d3ad32f45064e3ead
--- /dev/null
+++ b/pystencils_tests/liveness_opts/sum_tree.ipynb
@@ -0,0 +1,163 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 1\n",
+    "%aimport liveness_opts\n",
+    "%load_ext line_profiler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lbmpy.session import *\n",
+    "from pygrandchem_tests.config2 import get_system\n",
+    "from pygrandchem_tests.config import get_system as get_system_simple\n",
+    "from pystencils.datahandling import SerialDataHandling\n",
+    "from pygrandchem.grandchemGeneration import *\n",
+    "from pygrandchem.chemicalpotential import *\n",
+    "from pystencils import show_code, Field\n",
+    "from sympy import Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, sympify\n",
+    "\n",
+    "from liveness_opts import *\n",
+    "from subprocess import run, PIPE\n",
+    "import subprocess\n",
+    "\n",
+    "import pycuda\n",
+    "import string"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_system()\n",
+    "free_energy = FreeEnergy(config['FreeEnergy'], config['Temperature'])\n",
+    "\n",
+    "dh = SerialDataHandling((256, ), periodicity=(False))\n",
+    "f = dh.fields\n",
+    "dh.add_array('phi_src', values_per_cell=1, layout='f')\n",
+    "dh.add_array_like('phi_dst', 'phi_src')\n",
+    "\n",
+    "f = dh.fields\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eqs = []\n",
+    "\n",
+    "num = 512\n",
+    "\n",
+    "for i in range(0,num):\n",
+    "    eqs.append(Assignment(Symbol(\"A\" + str(num) + \"_\" + str(i)), f['phi_src'][0] * (0.01 + i / 10.0) + 0.1) )\n",
+    "\n",
+    "num = num // 2    \n",
+    "while num > 0:\n",
+    "    for i in range(0,num):\n",
+    "        eqs.append(Assignment(Symbol(\"A\" + str(num) + \"_\" + str(i)),\n",
+    "                              Symbol(\"A\" + str(num*2) + \"_\" + str(i*2)) + Symbol(\"A\" + str(num*2) + \"_\" + str(i*2+1))))\n",
+    "    num = num // 2\n",
+    "\n",
+    "eqs.append(Assignment( f['phi_dst'][0], Symbol(\"A1_0\")))\n",
+    "\n",
+    "livenessAnalysis(eqs)\n",
+    "eqs = scheduleEqs3(eqs)\n",
+    "livenessAnalysis(eqs)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "kernel = create_kernel(\n",
+    "    eqs,\n",
+    "    target=\"gpu\",\n",
+    "    gpu_indexing_params={\n",
+    "        \"block_size\": (256, 1, 1)\n",
+    "    }).compile()\n",
+    "\n",
+    "code = \"#include <cstdint>\\n\"\n",
+    "code += \"#define FUNC_PREFIX __global__\\n\"\n",
+    "code += \"#define RESTRICT __restrict__\\n\\n\"\n",
+    "\n",
+    "code += str(show_code(kernel.ast))\n",
+    "print(code)\n",
+    "\n",
+    "\n",
+    "cubin = pycuda.compiler.compile(code, options=[\"-w\", \"-std=c++11\"], arch=\"sm_60\")\n",
+    "\n",
+    "run([  \"echo \\\"\" + code + \"\\\" >> temp.cubin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "newFile = open(\"temp.cusbin\", \"wb\")\n",
+    "newFile.write(cubin)\n",
+    "newFile.close()\n",
+    "\n",
+    "result = run([  \"nvdisasm -c -plr -lrm narrow temp.cusbin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "print(result.stdout.decode(\"utf-8\"))\n",
+    "\n",
+    "newFile = open(\"temp.disasm\", \"wb\")\n",
+    "newFile.write(result.stdout)\n",
+    "newFile.close()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/test_grandchem_staggered_gpu.ipynb b/pystencils_tests/liveness_opts/test_grandchem_staggered_gpu.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..496fc07288a8a3383268f1ae52aca6a339b234e2
--- /dev/null
+++ b/pystencils_tests/liveness_opts/test_grandchem_staggered_gpu.ipynb
@@ -0,0 +1,355 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "\n",
+    "\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 1\n",
+    "%aimport pystencils.simp.liveness_opts\n",
+    "%aimport pystencils.simp.liveness_opts_exp\n",
+    "%aimport pystencils.shmemvar\n",
+    "%aimport pystencils.backends.cbackend\n",
+    "%aimport pystencils.transformations\n",
+    "\n",
+    "\n",
+    "%load_ext line_profiler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from lbmpy.session import *\n",
+    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from pygrandchem.grandchem_generation import *\n",
+    "from pygrandchem.chemicalpotential import free_energy_from_config_object, FreeEnergy\n",
+    "from pygrandchem.initialization import *\n",
+    "from pygrandchem_tests.config_anisotropic import get_system\n",
+    "from pystencils.boundaries import *\n",
+    "\n",
+    "#import pyximport\n",
+    "#pyximport.install(language_level=3)\n",
+    "#from lbmpy.phasefield.simplex_projection import simplex_projection_2d, simplex_projection_3d"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "domain_size = (120, 250)\n",
+    "periodicity=(True, False)\n",
+    "fast_simplex_projection = True\n",
+    "optimization = {'cpu_openmp': 4, 'target': 'gpu'}\n",
+    "config = get_system(dim=len(domain_size))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating data and compute kernels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "phases = config['Parameters']['phases']\n",
+    "components = config['Parameters']['components']\n",
+    "diffusion_matrices = config['Parameters']['diffusion']\n",
+    "free_energy = config['FreeEnergy']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "256\n",
+      "256\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Adding fields\n",
+    "dh = create_data_handling(domain_size, periodicity=periodicity, default_target=optimization['target'])\n",
+    "f = dh.fields\n",
+    "phi_src = dh.add_array('phi_src', values_per_cell=phases,    layout='fzyx', latex_name='phi_s')\n",
+    "mu_src  = dh.add_array('mu_src', values_per_cell=components, layout='fzyx', latex_name=\"mu_s\")\n",
+    "mu_stag = dh.add_array('mu_stag', values_per_cell=(dh.dim, components), layout='f')\n",
+    "phi_dst = dh.add_array_like('phi_dst', 'phi_src')\n",
+    "mu_dst  = dh.add_array_like('mu_dst', 'mu_src')\n",
+    "\n",
+    "c = dh.add_array('c', values_per_cell=components, layout='fzyx', gpu=False);\n",
+    "\n",
+    "# Boundary\n",
+    "boundary_phi = BoundaryHandling(dh, phi_dst.name, get_stencil('D2Q9'), target=dh.default_target, name='phi_boundary')\n",
+    "boundary_mu  = BoundaryHandling(dh, mu_dst.name, get_stencil('D2Q9'), target=dh.default_target, name='mu_boundary')\n",
+    "\n",
+    "neumann_boundaries = ('N', 'S')\n",
+    "for direction in neumann_boundaries:\n",
+    "    boundary_phi.set_boundary(Neumann(), slice_from_direction(direction, dh.dim))\n",
+    "    boundary_mu.set_boundary(Neumann(), slice_from_direction(direction, dh.dim))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Compute kernels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Compiling and simplifying φ update equations - this may take a while\n",
+      "Compiling and simplifying μ update equations - this may take a while\n",
+      "Compiling and simplifying μ update equations - this may take a while\n",
+      "256\n",
+      "256\n",
+      "256\n",
+      "256\n"
+     ]
+    }
+   ],
+   "source": [
+    "c_from_mu_eqs = create_concentration_from_mu_kernel(c, phi_src, mu_src, free_energy)\n",
+    "\n",
+    "phi_update_eqs = create_phi_update_equations(phi_src, phi_dst, mu_src, free_energy, config['Parameters'], \n",
+    "                                             simplex_projection=fast_simplex_projection)\n",
+    "mu_update_eqs = create_mu_update_equations(phi_src, phi_dst, mu_src, mu_dst,\n",
+    "                                           free_energy, diffusion_matrices,\n",
+    "                                           config['Parameters'])\n",
+    "\n",
+    "mu_stag_update_eqs = create_mu_update_equations(phi_src, phi_dst, mu_src, mu_dst,\n",
+    "                                                free_energy, diffusion_matrices,\n",
+    "                                                config['Parameters'], mu_staggered_field=mu_stag)\n",
+    "\n",
+    "mu_stag_precomp_eqs = create_mu_update_staggered_equations(phi_src, phi_dst, mu_src, \n",
+    "                                                        mu_stag, free_energy, diffusion_matrices, \n",
+    "                                                        config['Parameters'], target=optimization['target'])\n",
+    "\n",
+    "\n",
+    "\n",
+    "mu_stag_precomp_kernel = create_mu_update_staggered_ast(phi_src, phi_dst, mu_src, \n",
+    "                                                        mu_stag, free_energy, diffusion_matrices, \n",
+    "                                                        config['Parameters'], target=optimization['target']).compile()\n",
+    "\n",
+    "\n",
+    "phi_kernel = create_kernel(phi_update_eqs, **optimization).compile()\n",
+    "mu_kernel = create_kernel(mu_update_eqs, **optimization).compile()\n",
+    "mu_stag_kernel = create_kernel(mu_stag_update_eqs, **optimization).compile()\n",
+    "\n",
+    "conc_from_mu = create_kernel(c_from_mu_eqs, target='cpu').compile()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Geometry setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "256\n",
+      "256\n",
+      "256\n",
+      "256\n",
+      "256\n",
+      "256\n",
+      "256\n",
+      "256\n",
+      "             Name|      Inner (min/max)|     WithGl (min/max)\n",
+      "-------------------------------------------------------------\n",
+      "                c|       (0.0254,0.318)|       (0.0254,0.318)\n",
+      " mu_boundaryFlags|            (  1,  1)|            (  1,  2)\n",
+      "           mu_dst|        (-48.8,-48.8)|        (-48.8,-48.8)\n",
+      "           mu_src|        (-48.8,-48.8)|        (-48.8,-48.8)\n",
+      "          mu_stag|            (  0,  0)|            (  0,  0)\n",
+      "phi_boundaryFlags|            (  1,  1)|            (  1,  2)\n",
+      "          phi_dst|            (  0,  1)|            (  0,  1)\n",
+      "          phi_src|            (  0,  1)|            (  0,  1)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "init_boxes(dh, height=0.2)\n",
+    "initialize_concentration_field(dh, free_energy, config['Parameters']['initial_concentration'])\n",
+    "smooth_fields(dh, sigma=0.4, iterations=5, dim=dh.dim)\n",
+    "dh.synchronization_function(['phi_src','phi_dst','mu_src', 'mu_dst'])()\n",
+    "print(dh)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "256\n",
+      "256\n",
+      "256\n",
+      "256\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "phi_dst_comm = dh.synchronization_function(['phi_dst'])\n",
+    "mu_dst_comm = dh.synchronization_function(['mu_dst'])\n",
+    "\n",
+    "def time_loop(iterations):\n",
+    "    dh.all_to_gpu()\n",
+    "    start = time.perf_counter()\n",
+    "    for t in range(iterations):\n",
+    "        dh.run_kernel(phi_kernel)\n",
+    "        if not fast_simplex_projection:\n",
+    "            for b in dh.iterate():\n",
+    "                simplex_projection_3d(b['phi_dst'])\n",
+    "        \n",
+    "        phi_dst_comm()\n",
+    "        boundary_phi()\n",
+    "        \n",
+    "        #dh.run_kernel(mu_kernel)\n",
+    "        dh.run_kernel(mu_stag_precomp_kernel)\n",
+    "        dh.run_kernel(mu_stag_kernel)\n",
+    "        mu_dst_comm()\n",
+    "        boundary_mu()\n",
+    "        \n",
+    "        dh.swap('mu_src', 'mu_dst')\n",
+    "        dh.swap('phi_src', 'phi_dst')\n",
+    "    duration = (time.perf_counter() - start) / iterations\n",
+    "    dh.all_to_cpu()\n",
+    "    return duration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAANQAAAAPBAMAAABuCfzHAAAAMFBMVEX///8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAv3aB7AAAAD3RSTlMAiXaZIs1UEN1mu6tEMu+iw/3TAAAACXBIWXMAAA7EAAAOxAGVKw4bAAADNElEQVQ4EZVUS2gTURQ9MdOZ5tcm8QeCGKwLQbDBKGLrp+DGXQui+EGdhWs7omJXbUStooLBDyqIVkQUXRgoiFq0sxFd2dGVG7ELacUibdXEttrG8+5LnGBBMIFz571zzz3vzb0J8K9PDDhcxRtXqhb/9ejBagECydWikugDEgkPdYUdp2Al57rMiKVhlkqlmbIgkFhcZhINlSq+2kg0UZNpdih8Rxjddpq4HR1MLUdZCGxxI1nUnj/mIAg8Il+bR2B3JtOtBcY5PNZMjW2dmKU+hFARHanYR1i9+2hyemIhEMoi0MNKEn3AVoR7ECGBa8ARhuV5VdpIa0FNGvc1cxVYNEvdaFsFPAduU9hIq7wqFM7D/FGJshCQPW21SKxil5kI7mhBO/W04CEeAO2OL9T0axcFfrGEWX+s6tOIFrkh0YeaQe5pq4G7WMeW1sjJFutE7FW0MN8YbV9YqWdOWlO08aqsumg1SZlEH+r3JNfT6kKDh0ipwQaWiZXRoxNReNPgCmMpq5wvrNR7lZNbudrqydINQGscsa+0kuhD107U2Qg7xi++hUILjJxYRVydaBVyGBLGGAc+pHxhud7bPk/16mFOW91Fq1YyWyqMqzwNXWMIDnMb92DOe/gLAYhVaznRKgEbHcWIVdwXyhN1ZhGdbqzf1laciSy64oiqW0n0oT6LEC8EbGq5CHPGuaSt5pcTMQHcdBVzlOccSPnCSj2MOtaqBZVeAcHvqss8gIyFWZSFQG0bQpO4QyuPl2vdn9JWZ8qJOEmrnGJs9qpdjUW1Gu/57mx1UA4qJzDchuC0+vWEZNhVlIUGudUnoN8a43hc7+vrH4nD+sk3IYJ7tHqpmNx9PjlVQkVbJUdb7aIdreqyiIwhNIhgGzck+hBlrwYRBz7jI8u3AHV5IMoraMEAe6WZG8Cav9UY4X+Rc8AxprVVgPIeoBudHn/aEqvgFjptbMacYbxwcIGKeuaaaoCUAOG4NaSZWtv6Mkt9DeYkljvhHPN5KyONlQ5wcOkz4Hg5ykIgkOG2kel1EOvNuGzrvqkUDA64FmBus6cZK7nCm6WOZppcBBLrmf509GwKm5vX4jflHmnZdri02wAAAABJRU5ErkJggg==\n",
+      "text/latex": [
+       "$$0.0006584706800003915$$"
+      ],
+      "text/plain": [
+       "0.0006584706800003915"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "time_loop(100)\n",
+    "#assert interface_location() >= 53"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "xi_0 ← phi_src_C^0**2\n",
+      "xi_1 ← phi_src_C^1**2\n",
+      "xi_2 ← phi_src_C^2**2\n",
+      "xi_3 ← xi_0 + xi_1 + xi_2\n",
+      "xi_4 ← phi_dst_C^1**2\n",
+      "xi_5 ← phi_dst_C^0**2\n",
+      "xi_6 ← phi_dst_C^2**2\n",
+      "xi_7 ← 32.0/(xi_4 + xi_5 + xi_6)\n",
+      "xi_8 ← 32.0/xi_3\n",
+      "dc_dmu_0_0 ← xi_3/(0.0277302350554502*xi_0 + 0.0134501298325457*xi_1 + 0.145247543885788*xi_2)\n",
+      "dc_dphi_dt_0 ← (0.0134501298325457*mu_src_C + 0.974209571226215)*(-xi_1*xi_8 + xi_4*xi_7) + (0.0277302350554502*mu_src_C + 1.37922908109611)*(-xi_0*xi_8 + xi_5*xi_7) + (0.145247543885788*mu_src_C + 7.27037126746791)*(-xi_2*xi_8 + xi_6*xi_7)\n",
+      "dc_dT_dt_0 ← 0\n",
+      "divMgradmu_0 ← -2.0*mu_stag_C^0,0 - 2.0*mu_stag_C^1,0 + 2.0*mu_stag_N^1,0 + 2.0*mu_stag_E^0,0\n",
+      "mu_dst[0,0] ← mu_src_C + 0.03125*dc_dmu_0_0*(-dc_dT_dt_0 - dc_dphi_dt_0 + divMgradmu_0)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for eq in mu_stag_update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/test_liveness_opts.ipynb b/pystencils_tests/liveness_opts/test_liveness_opts.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..67c1a8a549a6e03a1707b9ef00612da44adc12d7
--- /dev/null
+++ b/pystencils_tests/liveness_opts/test_liveness_opts.ipynb
@@ -0,0 +1,959 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 1\n",
+    "%aimport pystencils.simp.liveness_opts\n",
+    "%aimport pystencils.simp.liveness_opts_exp\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import warnings\n",
+    "import pystencils as ps\n",
+    "from pygrandchem.grandchem import GrandChemGenerator\n",
+    "from pygrandchem.scenarios import system_4_2, system_3_1\n",
+    "\n",
+    "from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "\n",
+    "import graphviz\n",
+    "\n",
+    "#import pycuda\n",
+    "\n",
+    "import sys\n",
+    "from subprocess import run, PIPE\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rotation = [\n",
+    "    [(None,) * 4, (80, 0.03, 0.5, 0.3), (10, 1, 0, 0)],\n",
+    "    [(None,) * 4, (None,) * 4, (11, 0.7, 0.5, 0.3)],\n",
+    "]\n",
+    "\n",
+    "common = {'noise_amplitude': 0, 'dim': 3}\n",
+    "configs = {\n",
+    "    '42_fixT': lambda: system_4_2(variable_temperature=False),\n",
+    "    '42_varT': lambda: system_4_2(variable_temperature=True),\n",
+    "    '31_fixT_iso': lambda: system_3_1(variable_temperature=False, fab_value=0, **common),\n",
+    "    '31_varT_iso': lambda: system_3_1(variable_temperature=True, fab_value=0, **common),\n",
+    "    '31_fixT_aniso': lambda: system_3_1(variable_temperature=False, **common),\n",
+    "    '31_varT_aniso': lambda: system_3_1(variable_temperature=True, **common),\n",
+    "    '31_fixT_aniso_rot': lambda: system_3_1(variable_temperature=False, anisotropy_rotation=rotation, **common),\n",
+    "    '31_varT_aniso_rot': lambda: system_3_1(variable_temperature=True, anisotropy_rotation=rotation, **common),\n",
+    "}\n",
+    "\n",
+    "\n",
+    "def get_config(name):\n",
+    "    return configs[name]()\n",
+    "\n",
+    "\n",
+    "def get_generator(domain_size, config, **kwargs):\n",
+    "    assert len(domain_size) == 3\n",
+    "    assert config['Parameters']['dim'] == 3\n",
+    "\n",
+    "    phases, components = config['Parameters']['phases'], config['Parameters']['components']\n",
+    "\n",
+    "    format_args = {'p': phases, 'c': components, 's': ','.join(str(e) for e in domain_size)}\n",
+    "    phi_src, phi_dst = ps.fields(\"phi_src({p}), phi_dst({p}) : double[{s}]\".format(**format_args), layout='f')\n",
+    "    mu_src, mu_dst = ps.fields(\"mu_src({c}), mu_dst({c}) : double[{s}]\".format(**format_args), layout='f')\n",
+    "    mu_stag, phi_stag = ps.fields(\"mu_stag(3, {c}), phi_stag(3, {p}) : double[{s}]\".format(**format_args),\n",
+    "                                  layout='f')\n",
+    "    c = ps.fields(\"c({c}) : double[{s}]\".format(**format_args))\n",
+    "    \n",
+    "    gc = GrandChemGenerator(phi_src, phi_dst, mu_src, mu_dst,\n",
+    "                            config['FreeEnergy'], config['Parameters'],\n",
+    "                            conc=c, mu_staggered=mu_stag, phi_staggered=phi_stag,\n",
+    "                            use_block_offsets=False,\n",
+    "                            compile_kernel=False,\n",
+    "                            **kwargs)\n",
+    "    return gc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gc = get_generator((256, 256, 256), get_config('42_fixT'))\n",
+    "mu_kernel = gc.mu_full()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "xi_0 ← phi_dst_C^0**2\n",
+      "xi_1 ← phi_dst_C^1**2\n",
+      "xi_2 ← phi_dst_C^2**2\n",
+      "xi_3 ← phi_dst_C^3**2\n",
+      "xi_4 ← phi_src_C^0**2\n",
+      "xi_5 ← phi_src_C^1**2\n",
+      "xi_6 ← phi_src_C^2**2\n",
+      "xi_7 ← phi_src_C^3**2\n",
+      "xi_8 ← 200.0/((xi_0 + xi_1 + xi_2 + xi_3)*(xi_4 + xi_5 + xi_6 + xi_7))\n",
+      "xi_9 ← 0.2*xi_0\n",
+      "xi_10 ← 0.2*xi_1\n",
+      "xi_11 ← -xi_10*xi_4 + xi_5*xi_9\n",
+      "xi_12 ← xi_0*xi_7\n",
+      "xi_13 ← xi_1*xi_7\n",
+      "xi_14 ← 0.2*xi_2\n",
+      "xi_15 ← 0.0666666666666667*xi_2*xi_7\n",
+      "xi_16 ← xi_3*xi_4\n",
+      "xi_17 ← xi_3*xi_5\n",
+      "xi_18 ← 0.0666666666666667*xi_3*xi_6\n",
+      "xi_19 ← 1.0*mu_src_C^0\n",
+      "xi_20 ← phi_src_C^3/2\n",
+      "xi_21 ← phi_src_W^3/2 + xi_20\n",
+      "xi_22 ← xi_21**2\n",
+      "xi_23 ← phi_src_C^0/2\n",
+      "xi_24 ← phi_src_W^0/2 + xi_23\n",
+      "xi_25 ← xi_24**2\n",
+      "xi_26 ← phi_src_C^1/2\n",
+      "xi_27 ← phi_src_W^1/2 + xi_26\n",
+      "xi_28 ← xi_27**2\n",
+      "xi_29 ← phi_src_C^2/2\n",
+      "xi_30 ← phi_src_W^2/2 + xi_29\n",
+      "xi_31 ← xi_30**2\n",
+      "xi_32 ← 1/(xi_22 + xi_25 + xi_28 + xi_31)\n",
+      "xi_33 ← xi_22*xi_32*(-1.0*mu_src_W^0 + xi_19)\n",
+      "xi_34 ← 1.0*mu_src_C^1\n",
+      "xi_35 ← xi_22*xi_32*(-1.0*mu_src_W^1 + xi_34)\n",
+      "xi_36 ← 1.0*phi_src_C^3\n",
+      "xi_37 ← -1.0*phi_src_W^3 + xi_36\n",
+      "xi_38 ← sqrt(xi_21*xi_24)\n",
+      "xi_39 ← 1.0*phi_src_C^0\n",
+      "xi_40 ← -1.0*phi_src_W^0 + xi_39\n",
+      "xi_41 ← -0.25*phi_src_SW^0\n",
+      "xi_42 ← 0.25*phi_src_NW^0\n",
+      "xi_43 ← -0.25*phi_src_S^0 + 0.25*phi_src_N^0\n",
+      "xi_44 ← xi_41 + xi_42 + xi_43\n",
+      "xi_45 ← -0.25*phi_src_BW^0\n",
+      "xi_46 ← 0.25*phi_src_TW^0\n",
+      "xi_47 ← -0.25*phi_src_B^0 + 0.25*phi_src_T^0\n",
+      "xi_48 ← xi_45 + xi_46 + xi_47\n",
+      "xi_49 ← sqrt(xi_40**2 + xi_44**2 + xi_48**2)\n",
+      "xi_50 ← -0.25*phi_src_SW^3\n",
+      "xi_51 ← 0.25*phi_src_NW^3\n",
+      "xi_52 ← -0.25*phi_src_S^3 + 0.25*phi_src_N^3\n",
+      "xi_53 ← xi_50 + xi_51 + xi_52\n",
+      "xi_54 ← -0.25*phi_src_BW^3\n",
+      "xi_55 ← 0.25*phi_src_TW^3\n",
+      "xi_56 ← -0.25*phi_src_B^3 + 0.25*phi_src_T^3\n",
+      "xi_57 ← xi_54 + xi_55 + xi_56\n",
+      "xi_58 ← xi_37**2 + xi_53**2 + xi_57**2\n",
+      "xi_59 ← sqrt(xi_58)\n",
+      "xi_60 ← (xi_38 < 1.0e-9) | (xi_49*xi_59 < 1.0e-9)\n",
+      "xi_61 ← 0.166666666666667*mu_src_C^0\n",
+      "xi_62 ← -0.0833333333333333*mu_src_C^1\n",
+      "xi_63 ← 0.166666666666667*mu_src_W^0 - 0.0833333333333333*mu_src_W^1 + xi_61 + xi_62\n",
+      "xi_64 ← xi_22*xi_32\n",
+      "xi_65 ← xi_64*(xi_63 + 0.333333333333333)\n",
+      "xi_66 ← xi_25*xi_32\n",
+      "xi_67 ← 50.0*phi_dst_C^0 - 50.0*phi_src_C^0\n",
+      "xi_68 ← 1/xi_58\n",
+      "xi_69 ← xi_22*xi_24*xi_32*xi_68*(50.0*phi_dst_W^0 - 50.0*phi_src_W^0 + xi_67)*(xi_37*xi_40 + xi_44*xi_53 + xi_48*xi_57)/(xi_38*xi_49)\n",
+      "xi_70 ← sqrt(xi_21*xi_27)\n",
+      "xi_71 ← 1.0*phi_src_C^1\n",
+      "xi_72 ← -1.0*phi_src_W^1 + xi_71\n",
+      "xi_73 ← -0.25*phi_src_SW^1\n",
+      "xi_74 ← 0.25*phi_src_NW^1\n",
+      "xi_75 ← -0.25*phi_src_S^1 + 0.25*phi_src_N^1\n",
+      "xi_76 ← xi_73 + xi_74 + xi_75\n",
+      "xi_77 ← -0.25*phi_src_BW^1\n",
+      "xi_78 ← 0.25*phi_src_TW^1\n",
+      "xi_79 ← -0.25*phi_src_B^1 + 0.25*phi_src_T^1\n",
+      "xi_80 ← xi_77 + xi_78 + xi_79\n",
+      "xi_81 ← sqrt(xi_72**2 + xi_76**2 + xi_80**2)\n",
+      "xi_82 ← (xi_70 < 1.0e-9) | (xi_59*xi_81 < 1.0e-9)\n",
+      "xi_83 ← xi_63 + 0.2\n",
+      "xi_84 ← xi_28*xi_32\n",
+      "xi_85 ← 50.0*phi_dst_C^1 - 50.0*phi_src_C^1\n",
+      "xi_86 ← xi_22*xi_27*xi_32*xi_68*(50.0*phi_dst_W^1 - 50.0*phi_src_W^1 + xi_85)*(xi_37*xi_72 + xi_53*xi_76 + xi_57*xi_80)/(xi_70*xi_81)\n",
+      "xi_87 ← sqrt(xi_21*xi_30)\n",
+      "xi_88 ← 1.0*phi_src_C^2\n",
+      "xi_89 ← -1.0*phi_src_W^2 + xi_88\n",
+      "xi_90 ← -0.25*phi_src_SW^2\n",
+      "xi_91 ← 0.25*phi_src_NW^2\n",
+      "xi_92 ← -0.25*phi_src_S^2 + 0.25*phi_src_N^2\n",
+      "xi_93 ← xi_90 + xi_91 + xi_92\n",
+      "xi_94 ← -0.25*phi_src_BW^2\n",
+      "xi_95 ← 0.25*phi_src_TW^2\n",
+      "xi_96 ← -0.25*phi_src_B^2 + 0.25*phi_src_T^2\n",
+      "xi_97 ← xi_94 + xi_95 + xi_96\n",
+      "xi_98 ← sqrt(xi_89**2 + xi_93**2 + xi_97**2)\n",
+      "xi_99 ← (xi_87 < 1.0e-9) | (xi_59*xi_98 < 1.0e-9)\n",
+      "xi_100 ← xi_31*xi_32\n",
+      "xi_101 ← 50.0*phi_dst_C^2 - 50.0*phi_src_C^2\n",
+      "xi_102 ← xi_22*xi_30*xi_32*xi_68*(50.0*phi_dst_W^2 - 50.0*phi_src_W^2 + xi_101)*(xi_37*xi_89 + xi_53*xi_93 + xi_57*xi_97)/(xi_87*xi_98)\n",
+      "xi_103 ← -0.0833333333333333*mu_src_C^0\n",
+      "xi_104 ← 0.166666666666667*mu_src_C^1\n",
+      "xi_105 ← -0.0833333333333333*mu_src_W^0 + 0.166666666666667*mu_src_W^1 + xi_103 + xi_104\n",
+      "xi_106 ← xi_64*(xi_105 + 0.333333333333333)\n",
+      "xi_107 ← xi_105 + 0.2\n",
+      "xi_108 ← -xi_19\n",
+      "xi_109 ← phi_src_E^3/2 + xi_20\n",
+      "xi_110 ← xi_109**2\n",
+      "xi_111 ← phi_src_E^0/2 + xi_23\n",
+      "xi_112 ← xi_111**2\n",
+      "xi_113 ← phi_src_E^1/2 + xi_26\n",
+      "xi_114 ← xi_113**2\n",
+      "xi_115 ← phi_src_E^2/2 + xi_29\n",
+      "xi_116 ← xi_115**2\n",
+      "xi_117 ← 1/(xi_110 + xi_112 + xi_114 + xi_116)\n",
+      "xi_118 ← xi_110*xi_117*(1.0*mu_src_E^0 + xi_108)\n",
+      "xi_119 ← -xi_34\n",
+      "xi_120 ← xi_110*xi_117*(1.0*mu_src_E^1 + xi_119)\n",
+      "xi_121 ← -xi_36\n",
+      "xi_122 ← 1.0*phi_src_E^3 + xi_121\n",
+      "xi_123 ← sqrt(xi_109*xi_111)\n",
+      "xi_124 ← -xi_39\n",
+      "xi_125 ← 1.0*phi_src_E^0 + xi_124\n",
+      "xi_126 ← 0.25*phi_src_SE^0\n",
+      "xi_127 ← 0.25*phi_src_NE^0\n",
+      "xi_128 ← -xi_126 + xi_127 + xi_43\n",
+      "xi_129 ← 0.25*phi_src_BE^0\n",
+      "xi_130 ← 0.25*phi_src_TE^0\n",
+      "xi_131 ← -xi_129 + xi_130 + xi_47\n",
+      "xi_132 ← sqrt(xi_125**2 + xi_128**2 + xi_131**2)\n",
+      "xi_133 ← 0.25*phi_src_SE^3\n",
+      "xi_134 ← 0.25*phi_src_NE^3\n",
+      "xi_135 ← -xi_133 + xi_134 + xi_52\n",
+      "xi_136 ← 0.25*phi_src_BE^3\n",
+      "xi_137 ← 0.25*phi_src_TE^3\n",
+      "xi_138 ← -xi_136 + xi_137 + xi_56\n",
+      "xi_139 ← xi_122**2 + xi_135**2 + xi_138**2\n",
+      "xi_140 ← sqrt(xi_139)\n",
+      "xi_141 ← (xi_123 < 1.0e-9) | (xi_132*xi_140 < 1.0e-9)\n",
+      "xi_142 ← 0.166666666666667*mu_src_E^0 - 0.0833333333333333*mu_src_E^1 + xi_61 + xi_62\n",
+      "xi_143 ← xi_110*xi_117\n",
+      "xi_144 ← xi_143*(xi_142 + 0.333333333333333)\n",
+      "xi_145 ← xi_112*xi_117\n",
+      "xi_146 ← 1/xi_139\n",
+      "xi_147 ← xi_110*xi_111*xi_117*xi_146*(50.0*phi_dst_E^0 - 50.0*phi_src_E^0 + xi_67)*(xi_122*xi_125 + xi_128*xi_135 + xi_131*xi_138)/(xi_123*xi_132)\n",
+      "xi_148 ← sqrt(xi_109*xi_113)\n",
+      "xi_149 ← -xi_71\n",
+      "xi_150 ← 1.0*phi_src_E^1 + xi_149\n",
+      "xi_151 ← 0.25*phi_src_SE^1\n",
+      "xi_152 ← 0.25*phi_src_NE^1\n",
+      "xi_153 ← -xi_151 + xi_152 + xi_75\n",
+      "xi_154 ← 0.25*phi_src_BE^1\n",
+      "xi_155 ← 0.25*phi_src_TE^1\n",
+      "xi_156 ← -xi_154 + xi_155 + xi_79\n",
+      "xi_157 ← sqrt(xi_150**2 + xi_153**2 + xi_156**2)\n",
+      "xi_158 ← (xi_148 < 1.0e-9) | (xi_140*xi_157 < 1.0e-9)\n",
+      "xi_159 ← xi_142 + 0.2\n",
+      "xi_160 ← xi_114*xi_117\n",
+      "xi_161 ← xi_110*xi_113*xi_117*xi_146*(50.0*phi_dst_E^1 - 50.0*phi_src_E^1 + xi_85)*(xi_122*xi_150 + xi_135*xi_153 + xi_138*xi_156)/(xi_148*xi_157)\n",
+      "xi_162 ← sqrt(xi_109*xi_115)\n",
+      "xi_163 ← -xi_88\n",
+      "xi_164 ← 1.0*phi_src_E^2 + xi_163\n",
+      "xi_165 ← 0.25*phi_src_SE^2\n",
+      "xi_166 ← 0.25*phi_src_NE^2\n",
+      "xi_167 ← -xi_165 + xi_166 + xi_92\n",
+      "xi_168 ← 0.25*phi_src_BE^2\n",
+      "xi_169 ← 0.25*phi_src_TE^2\n",
+      "xi_170 ← -xi_168 + xi_169 + xi_96\n",
+      "xi_171 ← sqrt(xi_164**2 + xi_167**2 + xi_170**2)\n",
+      "xi_172 ← (xi_162 < 1.0e-9) | (xi_140*xi_171 < 1.0e-9)\n",
+      "xi_173 ← xi_116*xi_117\n",
+      "xi_174 ← xi_110*xi_115*xi_117*xi_146*(50.0*phi_dst_E^2 - 50.0*phi_src_E^2 + xi_101)*(xi_122*xi_164 + xi_135*xi_167 + xi_138*xi_170)/(xi_162*xi_171)\n",
+      "xi_175 ← -0.0833333333333333*mu_src_E^0 + 0.166666666666667*mu_src_E^1 + xi_103 + xi_104\n",
+      "xi_176 ← xi_143*(xi_175 + 0.333333333333333)\n",
+      "xi_177 ← xi_175 + 0.2\n",
+      "xi_178 ← phi_src_S^3/2 + xi_20\n",
+      "xi_179 ← xi_178**2\n",
+      "xi_180 ← phi_src_S^0/2 + xi_23\n",
+      "xi_181 ← xi_180**2\n",
+      "xi_182 ← phi_src_S^1/2 + xi_26\n",
+      "xi_183 ← xi_182**2\n",
+      "xi_184 ← phi_src_S^2/2 + xi_29\n",
+      "xi_185 ← xi_184**2\n",
+      "xi_186 ← 1/(xi_179 + xi_181 + xi_183 + xi_185)\n",
+      "xi_187 ← xi_179*xi_186*(-1.0*mu_src_S^0 + xi_19)\n",
+      "xi_188 ← xi_179*xi_186*(-1.0*mu_src_S^1 + xi_34)\n",
+      "xi_189 ← -1.0*phi_src_S^3 + xi_36\n",
+      "xi_190 ← sqrt(xi_178*xi_180)\n",
+      "xi_191 ← -1.0*phi_src_S^0 + xi_39\n",
+      "xi_192 ← -0.25*phi_src_W^0 + 0.25*phi_src_E^0\n",
+      "xi_193 ← xi_126 + xi_192 + xi_41\n",
+      "xi_194 ← -0.25*phi_src_BS^0\n",
+      "xi_195 ← 0.25*phi_src_TS^0\n",
+      "xi_196 ← xi_194 + xi_195 + xi_47\n",
+      "xi_197 ← sqrt(xi_191**2 + xi_193**2 + xi_196**2)\n",
+      "xi_198 ← -0.25*phi_src_W^3 + 0.25*phi_src_E^3\n",
+      "xi_199 ← xi_133 + xi_198 + xi_50\n",
+      "xi_200 ← -0.25*phi_src_BS^3\n",
+      "xi_201 ← 0.25*phi_src_TS^3\n",
+      "xi_202 ← xi_200 + xi_201 + xi_56\n",
+      "xi_203 ← xi_189**2 + xi_199**2 + xi_202**2\n",
+      "xi_204 ← sqrt(xi_203)\n",
+      "xi_205 ← (xi_190 < 1.0e-9) | (xi_197*xi_204 < 1.0e-9)\n",
+      "xi_206 ← 0.166666666666667*mu_src_S^0 - 0.0833333333333333*mu_src_S^1 + xi_61 + xi_62\n",
+      "xi_207 ← xi_179*xi_186\n",
+      "xi_208 ← xi_207*(xi_206 + 0.333333333333333)\n",
+      "xi_209 ← xi_181*xi_186\n",
+      "xi_210 ← 1/xi_203\n",
+      "xi_211 ← xi_179*xi_180*xi_186*xi_210*(50.0*phi_dst_S^0 - 50.0*phi_src_S^0 + xi_67)*(xi_189*xi_191 + xi_193*xi_199 + xi_196*xi_202)/(xi_190*xi_197)\n",
+      "xi_212 ← sqrt(xi_178*xi_182)\n",
+      "xi_213 ← -1.0*phi_src_S^1 + xi_71\n",
+      "xi_214 ← -0.25*phi_src_W^1 + 0.25*phi_src_E^1\n",
+      "xi_215 ← xi_151 + xi_214 + xi_73\n",
+      "xi_216 ← -0.25*phi_src_BS^1\n",
+      "xi_217 ← 0.25*phi_src_TS^1\n",
+      "xi_218 ← xi_216 + xi_217 + xi_79\n",
+      "xi_219 ← sqrt(xi_213**2 + xi_215**2 + xi_218**2)\n",
+      "xi_220 ← (xi_212 < 1.0e-9) | (xi_204*xi_219 < 1.0e-9)\n",
+      "xi_221 ← xi_206 + 0.2\n",
+      "xi_222 ← xi_183*xi_186\n",
+      "xi_223 ← xi_179*xi_182*xi_186*xi_210*(50.0*phi_dst_S^1 - 50.0*phi_src_S^1 + xi_85)*(xi_189*xi_213 + xi_199*xi_215 + xi_202*xi_218)/(xi_212*xi_219)\n",
+      "xi_224 ← sqrt(xi_178*xi_184)\n",
+      "xi_225 ← -1.0*phi_src_S^2 + xi_88\n",
+      "xi_226 ← -0.25*phi_src_W^2 + 0.25*phi_src_E^2\n",
+      "xi_227 ← xi_165 + xi_226 + xi_90\n",
+      "xi_228 ← -0.25*phi_src_BS^2\n",
+      "xi_229 ← 0.25*phi_src_TS^2\n",
+      "xi_230 ← xi_228 + xi_229 + xi_96\n",
+      "xi_231 ← sqrt(xi_225**2 + xi_227**2 + xi_230**2)\n",
+      "xi_232 ← (xi_224 < 1.0e-9) | (xi_204*xi_231 < 1.0e-9)\n",
+      "xi_233 ← xi_185*xi_186\n",
+      "xi_234 ← xi_179*xi_184*xi_186*xi_210*(50.0*phi_dst_S^2 - 50.0*phi_src_S^2 + xi_101)*(xi_189*xi_225 + xi_199*xi_227 + xi_202*xi_230)/(xi_224*xi_231)\n",
+      "xi_235 ← -0.0833333333333333*mu_src_S^0 + 0.166666666666667*mu_src_S^1 + xi_103 + xi_104\n",
+      "xi_236 ← xi_207*(xi_235 + 0.333333333333333)\n",
+      "xi_237 ← xi_235 + 0.2\n",
+      "xi_238 ← phi_src_N^3/2 + xi_20\n",
+      "xi_239 ← xi_238**2\n",
+      "xi_240 ← phi_src_N^0/2 + xi_23\n",
+      "xi_241 ← xi_240**2\n",
+      "xi_242 ← phi_src_N^1/2 + xi_26\n",
+      "xi_243 ← xi_242**2\n",
+      "xi_244 ← phi_src_N^2/2 + xi_29\n",
+      "xi_245 ← xi_244**2\n",
+      "xi_246 ← 1/(xi_239 + xi_241 + xi_243 + xi_245)\n",
+      "xi_247 ← xi_239*xi_246*(1.0*mu_src_N^0 + xi_108)\n",
+      "xi_248 ← xi_239*xi_246*(1.0*mu_src_N^1 + xi_119)\n",
+      "xi_249 ← 1.0*phi_src_N^3 + xi_121\n",
+      "xi_250 ← sqrt(xi_238*xi_240)\n",
+      "xi_251 ← 1.0*phi_src_N^0 + xi_124\n",
+      "xi_252 ← xi_127 + xi_192 - xi_42\n",
+      "xi_253 ← 0.25*phi_src_BN^0\n",
+      "xi_254 ← 0.25*phi_src_TN^0\n",
+      "xi_255 ← -xi_253 + xi_254 + xi_47\n",
+      "xi_256 ← sqrt(xi_251**2 + xi_252**2 + xi_255**2)\n",
+      "xi_257 ← xi_134 + xi_198 - xi_51\n",
+      "xi_258 ← 0.25*phi_src_BN^3\n",
+      "xi_259 ← 0.25*phi_src_TN^3\n",
+      "xi_260 ← -xi_258 + xi_259 + xi_56\n",
+      "xi_261 ← xi_249**2 + xi_257**2 + xi_260**2\n",
+      "xi_262 ← sqrt(xi_261)\n",
+      "xi_263 ← (xi_250 < 1.0e-9) | (xi_256*xi_262 < 1.0e-9)\n",
+      "xi_264 ← 0.166666666666667*mu_src_N^0 - 0.0833333333333333*mu_src_N^1 + xi_61 + xi_62\n",
+      "xi_265 ← xi_239*xi_246\n",
+      "xi_266 ← xi_265*(xi_264 + 0.333333333333333)\n",
+      "xi_267 ← xi_241*xi_246\n",
+      "xi_268 ← 1/xi_261\n",
+      "xi_269 ← xi_239*xi_240*xi_246*xi_268*(50.0*phi_dst_N^0 - 50.0*phi_src_N^0 + xi_67)*(xi_249*xi_251 + xi_252*xi_257 + xi_255*xi_260)/(xi_250*xi_256)\n",
+      "xi_270 ← sqrt(xi_238*xi_242)\n",
+      "xi_271 ← 1.0*phi_src_N^1 + xi_149\n",
+      "xi_272 ← xi_152 + xi_214 - xi_74\n",
+      "xi_273 ← 0.25*phi_src_BN^1\n",
+      "xi_274 ← 0.25*phi_src_TN^1\n",
+      "xi_275 ← -xi_273 + xi_274 + xi_79\n",
+      "xi_276 ← sqrt(xi_271**2 + xi_272**2 + xi_275**2)\n",
+      "xi_277 ← (xi_270 < 1.0e-9) | (xi_262*xi_276 < 1.0e-9)\n",
+      "xi_278 ← xi_264 + 0.2\n",
+      "xi_279 ← xi_243*xi_246\n",
+      "xi_280 ← xi_239*xi_242*xi_246*xi_268*(50.0*phi_dst_N^1 - 50.0*phi_src_N^1 + xi_85)*(xi_249*xi_271 + xi_257*xi_272 + xi_260*xi_275)/(xi_270*xi_276)\n",
+      "xi_281 ← sqrt(xi_238*xi_244)\n",
+      "xi_282 ← 1.0*phi_src_N^2 + xi_163\n",
+      "xi_283 ← xi_166 + xi_226 - xi_91\n",
+      "xi_284 ← 0.25*phi_src_BN^2\n",
+      "xi_285 ← 0.25*phi_src_TN^2\n",
+      "xi_286 ← -xi_284 + xi_285 + xi_96\n",
+      "xi_287 ← sqrt(xi_282**2 + xi_283**2 + xi_286**2)\n",
+      "xi_288 ← (xi_281 < 1.0e-9) | (xi_262*xi_287 < 1.0e-9)\n",
+      "xi_289 ← xi_245*xi_246\n",
+      "xi_290 ← xi_239*xi_244*xi_246*xi_268*(50.0*phi_dst_N^2 - 50.0*phi_src_N^2 + xi_101)*(xi_249*xi_282 + xi_257*xi_283 + xi_260*xi_286)/(xi_281*xi_287)\n",
+      "xi_291 ← -0.0833333333333333*mu_src_N^0 + 0.166666666666667*mu_src_N^1 + xi_103 + xi_104\n",
+      "xi_292 ← xi_265*(xi_291 + 0.333333333333333)\n",
+      "xi_293 ← xi_291 + 0.2\n",
+      "xi_294 ← phi_src_B^3/2 + xi_20\n",
+      "xi_295 ← xi_294**2\n",
+      "xi_296 ← phi_src_B^0/2 + xi_23\n",
+      "xi_297 ← xi_296**2\n",
+      "xi_298 ← phi_src_B^1/2 + xi_26\n",
+      "xi_299 ← xi_298**2\n",
+      "xi_300 ← phi_src_B^2/2 + xi_29\n",
+      "xi_301 ← xi_300**2\n",
+      "xi_302 ← 1/(xi_295 + xi_297 + xi_299 + xi_301)\n",
+      "xi_303 ← xi_295*xi_302*(-1.0*mu_src_B^0 + xi_19)\n",
+      "xi_304 ← xi_295*xi_302*(-1.0*mu_src_B^1 + xi_34)\n",
+      "xi_305 ← -1.0*phi_src_B^3 + xi_36\n",
+      "xi_306 ← sqrt(xi_294*xi_296)\n",
+      "xi_307 ← -1.0*phi_src_B^0 + xi_39\n",
+      "xi_308 ← xi_129 + xi_192 + xi_45\n",
+      "xi_309 ← xi_194 + xi_253 + xi_43\n",
+      "xi_310 ← sqrt(xi_307**2 + xi_308**2 + xi_309**2)\n",
+      "xi_311 ← xi_136 + xi_198 + xi_54\n",
+      "xi_312 ← xi_200 + xi_258 + xi_52\n",
+      "xi_313 ← xi_305**2 + xi_311**2 + xi_312**2\n",
+      "xi_314 ← sqrt(xi_313)\n",
+      "xi_315 ← (xi_306 < 1.0e-9) | (xi_310*xi_314 < 1.0e-9)\n",
+      "xi_316 ← 0.166666666666667*mu_src_B^0 - 0.0833333333333333*mu_src_B^1 + xi_61 + xi_62\n",
+      "xi_317 ← xi_295*xi_302\n",
+      "xi_318 ← xi_317*(xi_316 + 0.333333333333333)\n",
+      "xi_319 ← xi_297*xi_302\n",
+      "xi_320 ← 1/xi_313\n",
+      "xi_321 ← xi_295*xi_296*xi_302*xi_320*(50.0*phi_dst_B^0 - 50.0*phi_src_B^0 + xi_67)*(xi_305*xi_307 + xi_308*xi_311 + xi_309*xi_312)/(xi_306*xi_310)\n",
+      "xi_322 ← sqrt(xi_294*xi_298)\n",
+      "xi_323 ← -1.0*phi_src_B^1 + xi_71\n",
+      "xi_324 ← xi_154 + xi_214 + xi_77\n",
+      "xi_325 ← xi_216 + xi_273 + xi_75\n",
+      "xi_326 ← sqrt(xi_323**2 + xi_324**2 + xi_325**2)\n",
+      "xi_327 ← (xi_322 < 1.0e-9) | (xi_314*xi_326 < 1.0e-9)\n",
+      "xi_328 ← xi_316 + 0.2\n",
+      "xi_329 ← xi_299*xi_302\n",
+      "xi_330 ← xi_295*xi_298*xi_302*xi_320*(50.0*phi_dst_B^1 - 50.0*phi_src_B^1 + xi_85)*(xi_305*xi_323 + xi_311*xi_324 + xi_312*xi_325)/(xi_322*xi_326)\n",
+      "xi_331 ← sqrt(xi_294*xi_300)\n",
+      "xi_332 ← -1.0*phi_src_B^2 + xi_88\n",
+      "xi_333 ← xi_168 + xi_226 + xi_94\n",
+      "xi_334 ← xi_228 + xi_284 + xi_92\n",
+      "xi_335 ← sqrt(xi_332**2 + xi_333**2 + xi_334**2)\n",
+      "xi_336 ← (xi_331 < 1.0e-9) | (xi_314*xi_335 < 1.0e-9)\n",
+      "xi_337 ← xi_301*xi_302\n",
+      "xi_338 ← xi_295*xi_300*xi_302*xi_320*(50.0*phi_dst_B^2 - 50.0*phi_src_B^2 + xi_101)*(xi_305*xi_332 + xi_311*xi_333 + xi_312*xi_334)/(xi_331*xi_335)\n",
+      "xi_339 ← -0.0833333333333333*mu_src_B^0 + 0.166666666666667*mu_src_B^1 + xi_103 + xi_104\n",
+      "xi_340 ← xi_317*(xi_339 + 0.333333333333333)\n",
+      "xi_341 ← xi_339 + 0.2\n",
+      "xi_342 ← phi_src_T^3/2 + xi_20\n",
+      "xi_343 ← xi_342**2\n",
+      "xi_344 ← phi_src_T^0/2 + xi_23\n",
+      "xi_345 ← xi_344**2\n",
+      "xi_346 ← phi_src_T^1/2 + xi_26\n",
+      "xi_347 ← xi_346**2\n",
+      "xi_348 ← phi_src_T^2/2 + xi_29\n",
+      "xi_349 ← xi_348**2\n",
+      "xi_350 ← 1/(xi_343 + xi_345 + xi_347 + xi_349)\n",
+      "xi_351 ← xi_343*xi_350*(1.0*mu_src_T^0 + xi_108)\n",
+      "xi_352 ← xi_343*xi_350*(1.0*mu_src_T^1 + xi_119)\n",
+      "xi_353 ← 1.0*phi_src_T^3 + xi_121\n",
+      "xi_354 ← sqrt(xi_342*xi_344)\n",
+      "xi_355 ← 1.0*phi_src_T^0 + xi_124\n",
+      "xi_356 ← xi_130 + xi_192 - xi_46\n",
+      "xi_357 ← -xi_195 + xi_254 + xi_43\n",
+      "xi_358 ← sqrt(xi_355**2 + xi_356**2 + xi_357**2)\n",
+      "xi_359 ← xi_137 + xi_198 - xi_55\n",
+      "xi_360 ← -xi_201 + xi_259 + xi_52\n",
+      "xi_361 ← xi_353**2 + xi_359**2 + xi_360**2\n",
+      "xi_362 ← sqrt(xi_361)\n",
+      "xi_363 ← (xi_354 < 1.0e-9) | (xi_358*xi_362 < 1.0e-9)\n",
+      "xi_364 ← 0.166666666666667*mu_src_T^0 - 0.0833333333333333*mu_src_T^1 + xi_61 + xi_62\n",
+      "xi_365 ← xi_343*xi_350\n",
+      "xi_366 ← xi_365*(xi_364 + 0.333333333333333)\n",
+      "xi_367 ← xi_345*xi_350\n",
+      "xi_368 ← 1/xi_361\n",
+      "xi_369 ← xi_343*xi_344*xi_350*xi_368*(50.0*phi_dst_T^0 - 50.0*phi_src_T^0 + xi_67)*(xi_353*xi_355 + xi_356*xi_359 + xi_357*xi_360)/(xi_354*xi_358)\n",
+      "xi_370 ← sqrt(xi_342*xi_346)\n",
+      "xi_371 ← 1.0*phi_src_T^1 + xi_149\n",
+      "xi_372 ← xi_155 + xi_214 - xi_78\n",
+      "xi_373 ← -xi_217 + xi_274 + xi_75\n",
+      "xi_374 ← sqrt(xi_371**2 + xi_372**2 + xi_373**2)\n",
+      "xi_375 ← (xi_370 < 1.0e-9) | (xi_362*xi_374 < 1.0e-9)\n",
+      "xi_376 ← xi_364 + 0.2\n",
+      "xi_377 ← xi_347*xi_350\n",
+      "xi_378 ← xi_343*xi_346*xi_350*xi_368*(50.0*phi_dst_T^1 - 50.0*phi_src_T^1 + xi_85)*(xi_353*xi_371 + xi_359*xi_372 + xi_360*xi_373)/(xi_370*xi_374)\n",
+      "xi_379 ← sqrt(xi_342*xi_348)\n",
+      "xi_380 ← 1.0*phi_src_T^2 + xi_163\n",
+      "xi_381 ← xi_169 + xi_226 - xi_95\n",
+      "xi_382 ← -xi_229 + xi_285 + xi_92\n",
+      "xi_383 ← sqrt(xi_380**2 + xi_381**2 + xi_382**2)\n",
+      "xi_384 ← (xi_379 < 1.0e-9) | (xi_362*xi_383 < 1.0e-9)\n",
+      "xi_385 ← xi_349*xi_350\n",
+      "xi_386 ← xi_343*xi_348*xi_350*xi_368*(50.0*phi_dst_T^2 - 50.0*phi_src_T^2 + xi_101)*(xi_353*xi_380 + xi_359*xi_381 + xi_360*xi_382)/(xi_379*xi_383)\n",
+      "xi_387 ← -0.0833333333333333*mu_src_T^0 + 0.166666666666667*mu_src_T^1 + xi_103 + xi_104\n",
+      "xi_388 ← xi_365*(xi_387 + 0.333333333333333)\n",
+      "xi_389 ← xi_387 + 0.2\n",
+      "dc_dmu_0_0 ← 4.00000000000000\n",
+      "dc_dmu_0_1 ← 2.00000000000000\n",
+      "dc_dmu_1_0 ← 2.00000000000000\n",
+      "dc_dmu_1_1 ← 4.00000000000000\n",
+      "dc_dphi_dt_0 ← xi_8*(xi_11 + 0.133333333333333*xi_12 - 0.0666666666666667*xi_13 - xi_14*xi_4 - xi_15 - 0.133333333333333*xi_16 + 0.0666666666666667*xi_17 + xi_18 + xi_6*xi_9)\n",
+      "dc_dphi_dt_1 ← -xi_8*(-xi_10*xi_6 + xi_11 + 0.0666666666666667*xi_12 - 0.133333333333333*xi_13 + xi_14*xi_5 + xi_15 - 0.0666666666666667*xi_16 + 0.133333333333333*xi_17 - xi_18)\n",
+      "dc_dT_dt_0 ← 0\n",
+      "dc_dT_dt_1 ← 0\n",
+      "staggered_down_0_0 ← 0.333333333333333*xi_33 - 0.166666666666667*xi_35 - xi_37*(3.92699081698724*Piecewise((0, xi_60), (xi_69*(xi_65 - xi_66*(xi_63 + 0.6)), True)) + 3.92699081698724*Piecewise((0, xi_82), (xi_86*(xi_65 - xi_83*xi_84), True)) + 3.92699081698724*Piecewise((0, xi_99), (xi_102*(-xi_100*xi_83 + xi_65), True)))\n",
+      "staggered_down_0_1 ← -0.166666666666667*xi_33 + 0.333333333333333*xi_35 - xi_37*(3.92699081698724*Piecewise((0, xi_60), (xi_69*(xi_106 - xi_107*xi_66), True)) + 3.92699081698724*Piecewise((0, xi_82), (xi_86*(xi_106 - xi_84*(xi_105 + 0.6)), True)) + 3.92699081698724*Piecewise((0, xi_99), (xi_102*(-xi_100*xi_107 + xi_106), True)))\n",
+      "staggered_up_0_0 ← 0.333333333333333*xi_118 - 0.166666666666667*xi_120 - xi_122*(3.92699081698724*Piecewise((0, xi_141), (xi_147*(xi_144 - xi_145*(xi_142 + 0.6)), True)) + 3.92699081698724*Piecewise((0, xi_158), (xi_161*(xi_144 - xi_159*xi_160), True)) + 3.92699081698724*Piecewise((0, xi_172), (xi_174*(xi_144 - xi_159*xi_173), True)))\n",
+      "staggered_up_0_1 ← -0.166666666666667*xi_118 + 0.333333333333333*xi_120 - xi_122*(3.92699081698724*Piecewise((0, xi_141), (xi_147*(-xi_145*xi_177 + xi_176), True)) + 3.92699081698724*Piecewise((0, xi_158), (xi_161*(-xi_160*(xi_175 + 0.6) + xi_176), True)) + 3.92699081698724*Piecewise((0, xi_172), (xi_174*(-xi_173*xi_177 + xi_176), True)))\n",
+      "staggered_down_1_0 ← 0.333333333333333*xi_187 - 0.166666666666667*xi_188 - xi_189*(3.92699081698724*Piecewise((0, xi_205), (xi_211*(xi_208 - xi_209*(xi_206 + 0.6)), True)) + 3.92699081698724*Piecewise((0, xi_220), (xi_223*(xi_208 - xi_221*xi_222), True)) + 3.92699081698724*Piecewise((0, xi_232), (xi_234*(xi_208 - xi_221*xi_233), True)))\n",
+      "staggered_down_1_1 ← -0.166666666666667*xi_187 + 0.333333333333333*xi_188 - xi_189*(3.92699081698724*Piecewise((0, xi_205), (xi_211*(-xi_209*xi_237 + xi_236), True)) + 3.92699081698724*Piecewise((0, xi_220), (xi_223*(-xi_222*(xi_235 + 0.6) + xi_236), True)) + 3.92699081698724*Piecewise((0, xi_232), (xi_234*(-xi_233*xi_237 + xi_236), True)))\n",
+      "staggered_up_1_0 ← 0.333333333333333*xi_247 - 0.166666666666667*xi_248 - xi_249*(3.92699081698724*Piecewise((0, xi_263), (xi_269*(xi_266 - xi_267*(xi_264 + 0.6)), True)) + 3.92699081698724*Piecewise((0, xi_277), (xi_280*(xi_266 - xi_278*xi_279), True)) + 3.92699081698724*Piecewise((0, xi_288), (xi_290*(xi_266 - xi_278*xi_289), True)))\n",
+      "staggered_up_1_1 ← -0.166666666666667*xi_247 + 0.333333333333333*xi_248 - xi_249*(3.92699081698724*Piecewise((0, xi_263), (xi_269*(-xi_267*xi_293 + xi_292), True)) + 3.92699081698724*Piecewise((0, xi_277), (xi_280*(-xi_279*(xi_291 + 0.6) + xi_292), True)) + 3.92699081698724*Piecewise((0, xi_288), (xi_290*(-xi_289*xi_293 + xi_292), True)))\n",
+      "staggered_down_2_0 ← 0.333333333333333*xi_303 - 0.166666666666667*xi_304 - xi_305*(3.92699081698724*Piecewise((0, xi_315), (xi_321*(xi_318 - xi_319*(xi_316 + 0.6)), True)) + 3.92699081698724*Piecewise((0, xi_327), (xi_330*(xi_318 - xi_328*xi_329), True)) + 3.92699081698724*Piecewise((0, xi_336), (xi_338*(xi_318 - xi_328*xi_337), True)))\n",
+      "staggered_down_2_1 ← -0.166666666666667*xi_303 + 0.333333333333333*xi_304 - xi_305*(3.92699081698724*Piecewise((0, xi_315), (xi_321*(-xi_319*xi_341 + xi_340), True)) + 3.92699081698724*Piecewise((0, xi_327), (xi_330*(-xi_329*(xi_339 + 0.6) + xi_340), True)) + 3.92699081698724*Piecewise((0, xi_336), (xi_338*(-xi_337*xi_341 + xi_340), True)))\n",
+      "staggered_up_2_0 ← 0.333333333333333*xi_351 - 0.166666666666667*xi_352 - xi_353*(3.92699081698724*Piecewise((0, xi_363), (xi_369*(xi_366 - xi_367*(xi_364 + 0.6)), True)) + 3.92699081698724*Piecewise((0, xi_375), (xi_378*(xi_366 - xi_376*xi_377), True)) + 3.92699081698724*Piecewise((0, xi_384), (xi_386*(xi_366 - xi_376*xi_385), True)))\n",
+      "staggered_up_2_1 ← -0.166666666666667*xi_351 + 0.333333333333333*xi_352 - xi_353*(3.92699081698724*Piecewise((0, xi_363), (xi_369*(-xi_367*xi_389 + xi_388), True)) + 3.92699081698724*Piecewise((0, xi_375), (xi_378*(-xi_377*(xi_387 + 0.6) + xi_388), True)) + 3.92699081698724*Piecewise((0, xi_384), (xi_386*(-xi_385*xi_389 + xi_388), True)))\n",
+      "divMgradmu_0 ← -1.0*staggered_down_0_0 - 1.0*staggered_down_1_0 - 1.0*staggered_down_2_0 + 1.0*staggered_up_0_0 + 1.0*staggered_up_1_0 + 1.0*staggered_up_2_0\n",
+      "xi_390 ← -0.01*dc_dT_dt_0 - 0.01*dc_dphi_dt_0 + 0.01*divMgradmu_0\n",
+      "divMgradmu_1 ← -1.0*staggered_down_0_1 - 1.0*staggered_down_1_1 - 1.0*staggered_down_2_1 + 1.0*staggered_up_0_1 + 1.0*staggered_up_1_1 + 1.0*staggered_up_2_1\n",
+      "xi_391 ← -0.01*dc_dT_dt_1 - 0.01*dc_dphi_dt_1 + 0.01*divMgradmu_1\n",
+      "mu_dst[0,0,0] ← mu_src_C^0 + dc_dmu_0_0*xi_390 + dc_dmu_0_1*xi_391\n",
+      "mu_dst[0,0,0](1) ← mu_src_C^1 + dc_dmu_1_0*xi_390 + dc_dmu_1_1*xi_391\n"
+     ]
+    }
+   ],
+   "source": [
+    "update_eqs = mu_kernel\n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    print(eq)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rescheduled_eqs = duplicate_trivial_ops(update_eqs, 3, 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "282\n",
+      "xi_8 ← 200.0/((phi_dst_C^0**2 + phi_dst_C^1**2 + phi_dst_C^2**2 + phi_dst_C^3**2)*(phi_src_C^0**2 + phi_src_C^1**2 + phi_src_C^2**2 + phi_src_C^3**2))\n",
+      "xi_9 ← 0.2*phi_dst_C^0**2\n",
+      "xi_10 ← 0.2*phi_dst_C^1**2\n",
+      "xi_11 ← -phi_src_C^0**2*xi_10 + phi_src_C^1**2*xi_9\n",
+      "xi_12 ← phi_dst_C^0**2*phi_src_C^3**2\n",
+      "xi_13 ← phi_dst_C^1**2*phi_src_C^3**2\n",
+      "xi_14 ← 0.2*phi_dst_C^2**2\n",
+      "xi_15 ← 0.0666666666666667*phi_dst_C^2**2*phi_src_C^3**2\n",
+      "xi_16 ← phi_dst_C^3**2*phi_src_C^0**2\n",
+      "xi_17 ← phi_dst_C^3**2*phi_src_C^1**2\n",
+      "xi_18 ← 0.0666666666666667*phi_dst_C^3**2*phi_src_C^2**2\n",
+      "xi_21 ← phi_src_W^3/2 + phi_src_C^3/2\n",
+      "xi_22 ← xi_21**2\n",
+      "xi_24 ← phi_src_W^0/2 + phi_src_C^0/2\n",
+      "xi_25 ← xi_24**2\n",
+      "xi_27 ← phi_src_W^1/2 + phi_src_C^1/2\n",
+      "xi_28 ← xi_27**2\n",
+      "xi_30 ← phi_src_W^2/2 + phi_src_C^2/2\n",
+      "xi_31 ← xi_30**2\n",
+      "xi_32 ← 1/(xi_22 + xi_25 + xi_28 + xi_31)\n",
+      "xi_33 ← xi_22*xi_32*(-1.0*mu_src_W^0 + 1.0*mu_src_C^0)\n",
+      "xi_35 ← xi_22*xi_32*(-1.0*mu_src_W^1 + 1.0*mu_src_C^1)\n",
+      "xi_37 ← -1.0*phi_src_W^3 + 1.0*phi_src_C^3\n",
+      "xi_38 ← sqrt(xi_21*xi_24)\n",
+      "xi_40 ← -1.0*phi_src_W^0 + 1.0*phi_src_C^0\n",
+      "xi_41 ← -0.25*phi_src_SW^0\n",
+      "xi_43 ← -0.25*phi_src_S^0 + 0.25*phi_src_N^0\n",
+      "xi_45 ← -0.25*phi_src_BW^0\n",
+      "xi_47 ← -0.25*phi_src_B^0 + 0.25*phi_src_T^0\n",
+      "xi_49 ← sqrt(xi_40**2 + (0.25*phi_src_TW^0 + xi_45 + xi_47)**2 + (0.25*phi_src_NW^0 + xi_41 + xi_43)**2)\n",
+      "xi_50 ← -0.25*phi_src_SW^3\n",
+      "xi_52 ← -0.25*phi_src_S^3 + 0.25*phi_src_N^3\n",
+      "xi_54 ← -0.25*phi_src_BW^3\n",
+      "xi_56 ← -0.25*phi_src_B^3 + 0.25*phi_src_T^3\n",
+      "xi_58 ← xi_37**2 + (0.25*phi_src_TW^3 + xi_54 + xi_56)**2 + (0.25*phi_src_NW^3 + xi_50 + xi_52)**2\n",
+      "xi_59 ← sqrt(xi_58)\n",
+      "xi_62 ← -0.0833333333333333*mu_src_C^1\n",
+      "xi_63 ← 0.166666666666667*mu_src_W^0 - 0.0833333333333333*mu_src_W^1 + 0.166666666666667*mu_src_C^0 + xi_62\n",
+      "xi_64 ← xi_22*xi_32\n",
+      "xi_65 ← xi_64*(xi_63 + 0.333333333333333)\n",
+      "xi_66 ← xi_25*xi_32\n",
+      "xi_67 ← 50.0*phi_dst_C^0 - 50.0*phi_src_C^0\n",
+      "xi_68 ← 1/xi_58\n",
+      "xi_69 ← xi_22*xi_24*xi_32*xi_68*(50.0*phi_dst_W^0 - 50.0*phi_src_W^0 + xi_67)*(xi_37*xi_40 + (0.25*phi_src_TW^0 + xi_45 + xi_47)*(0.25*phi_src_TW^3 + xi_54 + xi_56) + (0.25*phi_src_NW^0 + xi_41 + xi_43)*(0.25*phi_src_NW^3 + xi_50 + xi_52))/(xi_38*xi_49)\n",
+      "xi_70 ← sqrt(xi_21*xi_27)\n",
+      "xi_72 ← -1.0*phi_src_W^1 + 1.0*phi_src_C^1\n",
+      "xi_73 ← -0.25*phi_src_SW^1\n",
+      "xi_75 ← -0.25*phi_src_S^1 + 0.25*phi_src_N^1\n",
+      "xi_77 ← -0.25*phi_src_BW^1\n",
+      "xi_79 ← -0.25*phi_src_B^1 + 0.25*phi_src_T^1\n",
+      "xi_81 ← sqrt(xi_72**2 + (0.25*phi_src_TW^1 + xi_77 + xi_79)**2 + (0.25*phi_src_NW^1 + xi_73 + xi_75)**2)\n",
+      "xi_83 ← xi_63 + 0.2\n",
+      "xi_84 ← xi_28*xi_32\n",
+      "xi_85 ← 50.0*phi_dst_C^1 - 50.0*phi_src_C^1\n",
+      "xi_86 ← xi_22*xi_27*xi_32*xi_68*(50.0*phi_dst_W^1 - 50.0*phi_src_W^1 + xi_85)*(xi_37*xi_72 + (0.25*phi_src_TW^1 + xi_77 + xi_79)*(0.25*phi_src_TW^3 + xi_54 + xi_56) + (0.25*phi_src_NW^1 + xi_73 + xi_75)*(0.25*phi_src_NW^3 + xi_50 + xi_52))/(xi_70*xi_81)\n",
+      "xi_87 ← sqrt(xi_21*xi_30)\n",
+      "xi_89 ← -1.0*phi_src_W^2 + 1.0*phi_src_C^2\n",
+      "xi_90 ← -0.25*phi_src_SW^2\n",
+      "xi_92 ← -0.25*phi_src_S^2 + 0.25*phi_src_N^2\n",
+      "xi_94 ← -0.25*phi_src_BW^2\n",
+      "xi_96 ← -0.25*phi_src_B^2 + 0.25*phi_src_T^2\n",
+      "xi_98 ← sqrt(xi_89**2 + (0.25*phi_src_TW^2 + xi_94 + xi_96)**2 + (0.25*phi_src_NW^2 + xi_90 + xi_92)**2)\n",
+      "xi_100 ← xi_31*xi_32\n",
+      "xi_101 ← 50.0*phi_dst_C^2 - 50.0*phi_src_C^2\n",
+      "xi_102 ← xi_22*xi_30*xi_32*xi_68*(50.0*phi_dst_W^2 - 50.0*phi_src_W^2 + xi_101)*(xi_37*xi_89 + (0.25*phi_src_TW^2 + xi_94 + xi_96)*(0.25*phi_src_TW^3 + xi_54 + xi_56) + (0.25*phi_src_NW^2 + xi_90 + xi_92)*(0.25*phi_src_NW^3 + xi_50 + xi_52))/(xi_87*xi_98)\n",
+      "xi_103 ← -0.0833333333333333*mu_src_C^0\n",
+      "xi_105 ← -0.0833333333333333*mu_src_W^0 + 0.166666666666667*mu_src_W^1 + 0.166666666666667*mu_src_C^1 + xi_103\n",
+      "xi_106 ← xi_64*(xi_105 + 0.333333333333333)\n",
+      "xi_107 ← xi_105 + 0.2\n",
+      "xi_109 ← phi_src_C^3/2 + phi_src_E^3/2\n",
+      "xi_110 ← xi_109**2\n",
+      "xi_111 ← phi_src_C^0/2 + phi_src_E^0/2\n",
+      "xi_112 ← xi_111**2\n",
+      "xi_113 ← phi_src_C^1/2 + phi_src_E^1/2\n",
+      "xi_114 ← xi_113**2\n",
+      "xi_115 ← phi_src_C^2/2 + phi_src_E^2/2\n",
+      "xi_116 ← xi_115**2\n",
+      "xi_117 ← 1/(xi_110 + xi_112 + xi_114 + xi_116)\n",
+      "xi_118 ← xi_110*xi_117*(-1.0*mu_src_C^0 + 1.0*mu_src_E^0)\n",
+      "xi_120 ← xi_110*xi_117*(-1.0*mu_src_C^1 + 1.0*mu_src_E^1)\n",
+      "xi_122 ← -1.0*phi_src_C^3 + 1.0*phi_src_E^3\n",
+      "xi_123 ← sqrt(xi_109*xi_111)\n",
+      "xi_125 ← -1.0*phi_src_C^0 + 1.0*phi_src_E^0\n",
+      "xi_132 ← sqrt(xi_125**2 + (-0.25*phi_src_SE^0 + 0.25*phi_src_NE^0 + xi_43)**2 + (-0.25*phi_src_BE^0 + 0.25*phi_src_TE^0 + xi_47)**2)\n",
+      "xi_139 ← xi_122**2 + (-0.25*phi_src_SE^3 + 0.25*phi_src_NE^3 + xi_52)**2 + (-0.25*phi_src_BE^3 + 0.25*phi_src_TE^3 + xi_56)**2\n",
+      "xi_140 ← sqrt(xi_139)\n",
+      "xi_142 ← 0.166666666666667*mu_src_C^0 + 0.166666666666667*mu_src_E^0 - 0.0833333333333333*mu_src_E^1 + xi_62\n",
+      "xi_143 ← xi_110*xi_117\n",
+      "xi_144 ← xi_143*(xi_142 + 0.333333333333333)\n",
+      "xi_145 ← xi_112*xi_117\n",
+      "xi_146 ← 1/xi_139\n",
+      "xi_147 ← xi_110*xi_111*xi_117*xi_146*(50.0*phi_dst_E^0 - 50.0*phi_src_E^0 + xi_67)*(xi_122*xi_125 + (-0.25*phi_src_SE^0 + 0.25*phi_src_NE^0 + xi_43)*(-0.25*phi_src_SE^3 + 0.25*phi_src_NE^3 + xi_52) + (-0.25*phi_src_BE^0 + 0.25*phi_src_TE^0 + xi_47)*(-0.25*phi_src_BE^3 + 0.25*phi_src_TE^3 + xi_56))/(xi_123*xi_132)\n",
+      "xi_148 ← sqrt(xi_109*xi_113)\n",
+      "xi_150 ← -1.0*phi_src_C^1 + 1.0*phi_src_E^1\n",
+      "xi_157 ← sqrt(xi_150**2 + (-0.25*phi_src_SE^1 + 0.25*phi_src_NE^1 + xi_75)**2 + (-0.25*phi_src_BE^1 + 0.25*phi_src_TE^1 + xi_79)**2)\n",
+      "xi_159 ← xi_142 + 0.2\n",
+      "xi_160 ← xi_114*xi_117\n",
+      "xi_161 ← xi_110*xi_113*xi_117*xi_146*(50.0*phi_dst_E^1 - 50.0*phi_src_E^1 + xi_85)*(xi_122*xi_150 + (-0.25*phi_src_SE^1 + 0.25*phi_src_NE^1 + xi_75)*(-0.25*phi_src_SE^3 + 0.25*phi_src_NE^3 + xi_52) + (-0.25*phi_src_BE^1 + 0.25*phi_src_TE^1 + xi_79)*(-0.25*phi_src_BE^3 + 0.25*phi_src_TE^3 + xi_56))/(xi_148*xi_157)\n",
+      "xi_162 ← sqrt(xi_109*xi_115)\n",
+      "xi_164 ← -1.0*phi_src_C^2 + 1.0*phi_src_E^2\n",
+      "xi_171 ← sqrt(xi_164**2 + (-0.25*phi_src_SE^2 + 0.25*phi_src_NE^2 + xi_92)**2 + (-0.25*phi_src_BE^2 + 0.25*phi_src_TE^2 + xi_96)**2)\n",
+      "xi_173 ← xi_116*xi_117\n",
+      "xi_174 ← xi_110*xi_115*xi_117*xi_146*(50.0*phi_dst_E^2 - 50.0*phi_src_E^2 + xi_101)*(xi_122*xi_164 + (-0.25*phi_src_SE^2 + 0.25*phi_src_NE^2 + xi_92)*(-0.25*phi_src_SE^3 + 0.25*phi_src_NE^3 + xi_52) + (-0.25*phi_src_BE^2 + 0.25*phi_src_TE^2 + xi_96)*(-0.25*phi_src_BE^3 + 0.25*phi_src_TE^3 + xi_56))/(xi_162*xi_171)\n",
+      "xi_175 ← 0.166666666666667*mu_src_C^1 - 0.0833333333333333*mu_src_E^0 + 0.166666666666667*mu_src_E^1 + xi_103\n",
+      "xi_176 ← xi_143*(xi_175 + 0.333333333333333)\n",
+      "xi_177 ← xi_175 + 0.2\n",
+      "xi_178 ← phi_src_S^3/2 + phi_src_C^3/2\n",
+      "xi_179 ← xi_178**2\n",
+      "xi_180 ← phi_src_S^0/2 + phi_src_C^0/2\n",
+      "xi_181 ← xi_180**2\n",
+      "xi_182 ← phi_src_S^1/2 + phi_src_C^1/2\n",
+      "xi_183 ← xi_182**2\n",
+      "xi_184 ← phi_src_S^2/2 + phi_src_C^2/2\n",
+      "xi_185 ← xi_184**2\n",
+      "xi_186 ← 1/(xi_179 + xi_181 + xi_183 + xi_185)\n",
+      "xi_187 ← xi_179*xi_186*(-1.0*mu_src_S^0 + 1.0*mu_src_C^0)\n",
+      "xi_188 ← xi_179*xi_186*(-1.0*mu_src_S^1 + 1.0*mu_src_C^1)\n",
+      "xi_189 ← -1.0*phi_src_S^3 + 1.0*phi_src_C^3\n",
+      "xi_190 ← sqrt(xi_178*xi_180)\n",
+      "xi_191 ← -1.0*phi_src_S^0 + 1.0*phi_src_C^0\n",
+      "xi_192 ← -0.25*phi_src_W^0 + 0.25*phi_src_E^0\n",
+      "xi_194 ← -0.25*phi_src_BS^0\n",
+      "xi_197 ← sqrt(xi_191**2 + (0.25*phi_src_TS^0 + xi_194 + xi_47)**2 + (0.25*phi_src_SE^0 + xi_192 + xi_41)**2)\n",
+      "xi_198 ← -0.25*phi_src_W^3 + 0.25*phi_src_E^3\n",
+      "xi_200 ← -0.25*phi_src_BS^3\n",
+      "xi_203 ← xi_189**2 + (0.25*phi_src_TS^3 + xi_200 + xi_56)**2 + (0.25*phi_src_SE^3 + xi_198 + xi_50)**2\n",
+      "xi_204 ← sqrt(xi_203)\n",
+      "xi_206 ← 0.166666666666667*mu_src_S^0 - 0.0833333333333333*mu_src_S^1 + 0.166666666666667*mu_src_C^0 + xi_62\n",
+      "xi_207 ← xi_179*xi_186\n",
+      "xi_208 ← xi_207*(xi_206 + 0.333333333333333)\n",
+      "xi_209 ← xi_181*xi_186\n",
+      "xi_210 ← 1/xi_203\n",
+      "xi_211 ← xi_179*xi_180*xi_186*xi_210*(50.0*phi_dst_S^0 - 50.0*phi_src_S^0 + xi_67)*(xi_189*xi_191 + (0.25*phi_src_TS^0 + xi_194 + xi_47)*(0.25*phi_src_TS^3 + xi_200 + xi_56) + (0.25*phi_src_SE^0 + xi_192 + xi_41)*(0.25*phi_src_SE^3 + xi_198 + xi_50))/(xi_190*xi_197)\n",
+      "xi_212 ← sqrt(xi_178*xi_182)\n",
+      "xi_213 ← -1.0*phi_src_S^1 + 1.0*phi_src_C^1\n",
+      "xi_214 ← -0.25*phi_src_W^1 + 0.25*phi_src_E^1\n",
+      "xi_216 ← -0.25*phi_src_BS^1\n",
+      "xi_219 ← sqrt(xi_213**2 + (0.25*phi_src_TS^1 + xi_216 + xi_79)**2 + (0.25*phi_src_SE^1 + xi_214 + xi_73)**2)\n",
+      "xi_221 ← xi_206 + 0.2\n",
+      "xi_222 ← xi_183*xi_186\n",
+      "xi_223 ← xi_179*xi_182*xi_186*xi_210*(50.0*phi_dst_S^1 - 50.0*phi_src_S^1 + xi_85)*(xi_189*xi_213 + (0.25*phi_src_TS^1 + xi_216 + xi_79)*(0.25*phi_src_TS^3 + xi_200 + xi_56) + (0.25*phi_src_SE^1 + xi_214 + xi_73)*(0.25*phi_src_SE^3 + xi_198 + xi_50))/(xi_212*xi_219)\n",
+      "xi_224 ← sqrt(xi_178*xi_184)\n",
+      "xi_225 ← -1.0*phi_src_S^2 + 1.0*phi_src_C^2\n",
+      "xi_226 ← -0.25*phi_src_W^2 + 0.25*phi_src_E^2\n",
+      "xi_228 ← -0.25*phi_src_BS^2\n",
+      "xi_231 ← sqrt(xi_225**2 + (0.25*phi_src_TS^2 + xi_228 + xi_96)**2 + (0.25*phi_src_SE^2 + xi_226 + xi_90)**2)\n",
+      "xi_233 ← xi_185*xi_186\n",
+      "xi_234 ← xi_179*xi_184*xi_186*xi_210*(50.0*phi_dst_S^2 - 50.0*phi_src_S^2 + xi_101)*(xi_189*xi_225 + (0.25*phi_src_TS^2 + xi_228 + xi_96)*(0.25*phi_src_TS^3 + xi_200 + xi_56) + (0.25*phi_src_SE^2 + xi_226 + xi_90)*(0.25*phi_src_SE^3 + xi_198 + xi_50))/(xi_224*xi_231)\n",
+      "xi_235 ← -0.0833333333333333*mu_src_S^0 + 0.166666666666667*mu_src_S^1 + 0.166666666666667*mu_src_C^1 + xi_103\n",
+      "xi_236 ← xi_207*(xi_235 + 0.333333333333333)\n",
+      "xi_237 ← xi_235 + 0.2\n",
+      "xi_238 ← phi_src_C^3/2 + phi_src_N^3/2\n",
+      "xi_239 ← xi_238**2\n",
+      "xi_240 ← phi_src_C^0/2 + phi_src_N^0/2\n",
+      "xi_241 ← xi_240**2\n",
+      "xi_242 ← phi_src_C^1/2 + phi_src_N^1/2\n",
+      "xi_243 ← xi_242**2\n",
+      "xi_244 ← phi_src_C^2/2 + phi_src_N^2/2\n",
+      "xi_245 ← xi_244**2\n",
+      "xi_246 ← 1/(xi_239 + xi_241 + xi_243 + xi_245)\n",
+      "xi_247 ← xi_239*xi_246*(-1.0*mu_src_C^0 + 1.0*mu_src_N^0)\n",
+      "xi_248 ← xi_239*xi_246*(-1.0*mu_src_C^1 + 1.0*mu_src_N^1)\n",
+      "xi_249 ← -1.0*phi_src_C^3 + 1.0*phi_src_N^3\n",
+      "xi_250 ← sqrt(xi_238*xi_240)\n",
+      "xi_251 ← -1.0*phi_src_C^0 + 1.0*phi_src_N^0\n",
+      "xi_256 ← sqrt(xi_251**2 + (-0.25*phi_src_NW^0 + 0.25*phi_src_NE^0 + xi_192)**2 + (-0.25*phi_src_BN^0 + 0.25*phi_src_TN^0 + xi_47)**2)\n",
+      "xi_261 ← xi_249**2 + (-0.25*phi_src_NW^3 + 0.25*phi_src_NE^3 + xi_198)**2 + (-0.25*phi_src_BN^3 + 0.25*phi_src_TN^3 + xi_56)**2\n",
+      "xi_262 ← sqrt(xi_261)\n",
+      "xi_264 ← 0.166666666666667*mu_src_C^0 + 0.166666666666667*mu_src_N^0 - 0.0833333333333333*mu_src_N^1 + xi_62\n",
+      "xi_265 ← xi_239*xi_246\n",
+      "xi_266 ← xi_265*(xi_264 + 0.333333333333333)\n",
+      "xi_267 ← xi_241*xi_246\n",
+      "xi_268 ← 1/xi_261\n",
+      "xi_269 ← xi_239*xi_240*xi_246*xi_268*(50.0*phi_dst_N^0 - 50.0*phi_src_N^0 + xi_67)*(xi_249*xi_251 + (-0.25*phi_src_NW^0 + 0.25*phi_src_NE^0 + xi_192)*(-0.25*phi_src_NW^3 + 0.25*phi_src_NE^3 + xi_198) + (-0.25*phi_src_BN^0 + 0.25*phi_src_TN^0 + xi_47)*(-0.25*phi_src_BN^3 + 0.25*phi_src_TN^3 + xi_56))/(xi_250*xi_256)\n",
+      "xi_270 ← sqrt(xi_238*xi_242)\n",
+      "xi_271 ← -1.0*phi_src_C^1 + 1.0*phi_src_N^1\n",
+      "xi_276 ← sqrt(xi_271**2 + (-0.25*phi_src_NW^1 + 0.25*phi_src_NE^1 + xi_214)**2 + (-0.25*phi_src_BN^1 + 0.25*phi_src_TN^1 + xi_79)**2)\n",
+      "xi_278 ← xi_264 + 0.2\n",
+      "xi_279 ← xi_243*xi_246\n",
+      "xi_280 ← xi_239*xi_242*xi_246*xi_268*(50.0*phi_dst_N^1 - 50.0*phi_src_N^1 + xi_85)*(xi_249*xi_271 + (-0.25*phi_src_NW^1 + 0.25*phi_src_NE^1 + xi_214)*(-0.25*phi_src_NW^3 + 0.25*phi_src_NE^3 + xi_198) + (-0.25*phi_src_BN^1 + 0.25*phi_src_TN^1 + xi_79)*(-0.25*phi_src_BN^3 + 0.25*phi_src_TN^3 + xi_56))/(xi_270*xi_276)\n",
+      "xi_281 ← sqrt(xi_238*xi_244)\n",
+      "xi_282 ← -1.0*phi_src_C^2 + 1.0*phi_src_N^2\n",
+      "xi_287 ← sqrt(xi_282**2 + (-0.25*phi_src_NW^2 + 0.25*phi_src_NE^2 + xi_226)**2 + (-0.25*phi_src_BN^2 + 0.25*phi_src_TN^2 + xi_96)**2)\n",
+      "xi_289 ← xi_245*xi_246\n",
+      "xi_290 ← xi_239*xi_244*xi_246*xi_268*(50.0*phi_dst_N^2 - 50.0*phi_src_N^2 + xi_101)*(xi_249*xi_282 + (-0.25*phi_src_NW^2 + 0.25*phi_src_NE^2 + xi_226)*(-0.25*phi_src_NW^3 + 0.25*phi_src_NE^3 + xi_198) + (-0.25*phi_src_BN^2 + 0.25*phi_src_TN^2 + xi_96)*(-0.25*phi_src_BN^3 + 0.25*phi_src_TN^3 + xi_56))/(xi_281*xi_287)\n",
+      "xi_291 ← 0.166666666666667*mu_src_C^1 - 0.0833333333333333*mu_src_N^0 + 0.166666666666667*mu_src_N^1 + xi_103\n",
+      "xi_292 ← xi_265*(xi_291 + 0.333333333333333)\n",
+      "xi_293 ← xi_291 + 0.2\n",
+      "xi_294 ← phi_src_B^3/2 + phi_src_C^3/2\n",
+      "xi_295 ← xi_294**2\n",
+      "xi_296 ← phi_src_B^0/2 + phi_src_C^0/2\n",
+      "xi_297 ← xi_296**2\n",
+      "xi_298 ← phi_src_B^1/2 + phi_src_C^1/2\n",
+      "xi_299 ← xi_298**2\n",
+      "xi_300 ← phi_src_B^2/2 + phi_src_C^2/2\n",
+      "xi_301 ← xi_300**2\n",
+      "xi_302 ← 1/(xi_295 + xi_297 + xi_299 + xi_301)\n",
+      "xi_303 ← xi_295*xi_302*(-1.0*mu_src_B^0 + 1.0*mu_src_C^0)\n",
+      "xi_304 ← xi_295*xi_302*(-1.0*mu_src_B^1 + 1.0*mu_src_C^1)\n",
+      "xi_305 ← -1.0*phi_src_B^3 + 1.0*phi_src_C^3\n",
+      "xi_306 ← sqrt(xi_294*xi_296)\n",
+      "xi_307 ← -1.0*phi_src_B^0 + 1.0*phi_src_C^0\n",
+      "xi_310 ← sqrt(xi_307**2 + (0.25*phi_src_BN^0 + xi_194 + xi_43)**2 + (0.25*phi_src_BE^0 + xi_192 + xi_45)**2)\n",
+      "xi_313 ← xi_305**2 + (0.25*phi_src_BN^3 + xi_200 + xi_52)**2 + (0.25*phi_src_BE^3 + xi_198 + xi_54)**2\n",
+      "xi_314 ← sqrt(xi_313)\n",
+      "xi_316 ← 0.166666666666667*mu_src_B^0 - 0.0833333333333333*mu_src_B^1 + 0.166666666666667*mu_src_C^0 + xi_62\n",
+      "xi_317 ← xi_295*xi_302\n",
+      "xi_318 ← xi_317*(xi_316 + 0.333333333333333)\n",
+      "xi_319 ← xi_297*xi_302\n",
+      "xi_320 ← 1/xi_313\n",
+      "xi_321 ← xi_295*xi_296*xi_302*xi_320*(50.0*phi_dst_B^0 - 50.0*phi_src_B^0 + xi_67)*(xi_305*xi_307 + (0.25*phi_src_BN^0 + xi_194 + xi_43)*(0.25*phi_src_BN^3 + xi_200 + xi_52) + (0.25*phi_src_BE^0 + xi_192 + xi_45)*(0.25*phi_src_BE^3 + xi_198 + xi_54))/(xi_306*xi_310)\n",
+      "xi_322 ← sqrt(xi_294*xi_298)\n",
+      "xi_323 ← -1.0*phi_src_B^1 + 1.0*phi_src_C^1\n",
+      "xi_326 ← sqrt(xi_323**2 + (0.25*phi_src_BN^1 + xi_216 + xi_75)**2 + (0.25*phi_src_BE^1 + xi_214 + xi_77)**2)\n",
+      "xi_328 ← xi_316 + 0.2\n",
+      "xi_329 ← xi_299*xi_302\n",
+      "xi_330 ← xi_295*xi_298*xi_302*xi_320*(50.0*phi_dst_B^1 - 50.0*phi_src_B^1 + xi_85)*(xi_305*xi_323 + (0.25*phi_src_BN^1 + xi_216 + xi_75)*(0.25*phi_src_BN^3 + xi_200 + xi_52) + (0.25*phi_src_BE^1 + xi_214 + xi_77)*(0.25*phi_src_BE^3 + xi_198 + xi_54))/(xi_322*xi_326)\n",
+      "xi_331 ← sqrt(xi_294*xi_300)\n",
+      "xi_332 ← -1.0*phi_src_B^2 + 1.0*phi_src_C^2\n",
+      "xi_335 ← sqrt(xi_332**2 + (0.25*phi_src_BN^2 + xi_228 + xi_92)**2 + (0.25*phi_src_BE^2 + xi_226 + xi_94)**2)\n",
+      "xi_337 ← xi_301*xi_302\n",
+      "xi_338 ← xi_295*xi_300*xi_302*xi_320*(50.0*phi_dst_B^2 - 50.0*phi_src_B^2 + xi_101)*(xi_305*xi_332 + (0.25*phi_src_BN^2 + xi_228 + xi_92)*(0.25*phi_src_BN^3 + xi_200 + xi_52) + (0.25*phi_src_BE^2 + xi_226 + xi_94)*(0.25*phi_src_BE^3 + xi_198 + xi_54))/(xi_331*xi_335)\n",
+      "xi_339 ← -0.0833333333333333*mu_src_B^0 + 0.166666666666667*mu_src_B^1 + 0.166666666666667*mu_src_C^1 + xi_103\n",
+      "xi_340 ← xi_317*(xi_339 + 0.333333333333333)\n",
+      "xi_341 ← xi_339 + 0.2\n",
+      "xi_342 ← phi_src_C^3/2 + phi_src_T^3/2\n",
+      "xi_343 ← xi_342**2\n",
+      "xi_344 ← phi_src_C^0/2 + phi_src_T^0/2\n",
+      "xi_345 ← xi_344**2\n",
+      "xi_346 ← phi_src_C^1/2 + phi_src_T^1/2\n",
+      "xi_347 ← xi_346**2\n",
+      "xi_348 ← phi_src_C^2/2 + phi_src_T^2/2\n",
+      "xi_349 ← xi_348**2\n",
+      "xi_350 ← 1/(xi_343 + xi_345 + xi_347 + xi_349)\n",
+      "xi_351 ← xi_343*xi_350*(-1.0*mu_src_C^0 + 1.0*mu_src_T^0)\n",
+      "xi_352 ← xi_343*xi_350*(-1.0*mu_src_C^1 + 1.0*mu_src_T^1)\n",
+      "xi_353 ← -1.0*phi_src_C^3 + 1.0*phi_src_T^3\n",
+      "xi_354 ← sqrt(xi_342*xi_344)\n",
+      "xi_355 ← -1.0*phi_src_C^0 + 1.0*phi_src_T^0\n",
+      "xi_358 ← sqrt(xi_355**2 + (-0.25*phi_src_TW^0 + 0.25*phi_src_TE^0 + xi_192)**2 + (-0.25*phi_src_TS^0 + 0.25*phi_src_TN^0 + xi_43)**2)\n",
+      "xi_361 ← xi_353**2 + (-0.25*phi_src_TW^3 + 0.25*phi_src_TE^3 + xi_198)**2 + (-0.25*phi_src_TS^3 + 0.25*phi_src_TN^3 + xi_52)**2\n",
+      "xi_362 ← sqrt(xi_361)\n",
+      "xi_364 ← 0.166666666666667*mu_src_C^0 + 0.166666666666667*mu_src_T^0 - 0.0833333333333333*mu_src_T^1 + xi_62\n",
+      "xi_365 ← xi_343*xi_350\n",
+      "xi_366 ← xi_365*(xi_364 + 0.333333333333333)\n",
+      "xi_367 ← xi_345*xi_350\n",
+      "xi_368 ← 1/xi_361\n",
+      "xi_369 ← xi_343*xi_344*xi_350*xi_368*(50.0*phi_dst_T^0 - 50.0*phi_src_T^0 + xi_67)*(xi_353*xi_355 + (-0.25*phi_src_TW^0 + 0.25*phi_src_TE^0 + xi_192)*(-0.25*phi_src_TW^3 + 0.25*phi_src_TE^3 + xi_198) + (-0.25*phi_src_TS^0 + 0.25*phi_src_TN^0 + xi_43)*(-0.25*phi_src_TS^3 + 0.25*phi_src_TN^3 + xi_52))/(xi_354*xi_358)\n",
+      "xi_370 ← sqrt(xi_342*xi_346)\n",
+      "xi_371 ← -1.0*phi_src_C^1 + 1.0*phi_src_T^1\n",
+      "xi_374 ← sqrt(xi_371**2 + (-0.25*phi_src_TW^1 + 0.25*phi_src_TE^1 + xi_214)**2 + (-0.25*phi_src_TS^1 + 0.25*phi_src_TN^1 + xi_75)**2)\n",
+      "xi_376 ← xi_364 + 0.2\n",
+      "xi_377 ← xi_347*xi_350\n",
+      "xi_378 ← xi_343*xi_346*xi_350*xi_368*(50.0*phi_dst_T^1 - 50.0*phi_src_T^1 + xi_85)*(xi_353*xi_371 + (-0.25*phi_src_TW^1 + 0.25*phi_src_TE^1 + xi_214)*(-0.25*phi_src_TW^3 + 0.25*phi_src_TE^3 + xi_198) + (-0.25*phi_src_TS^1 + 0.25*phi_src_TN^1 + xi_75)*(-0.25*phi_src_TS^3 + 0.25*phi_src_TN^3 + xi_52))/(xi_370*xi_374)\n",
+      "xi_379 ← sqrt(xi_342*xi_348)\n",
+      "xi_380 ← -1.0*phi_src_C^2 + 1.0*phi_src_T^2\n",
+      "xi_383 ← sqrt(xi_380**2 + (-0.25*phi_src_TW^2 + 0.25*phi_src_TE^2 + xi_226)**2 + (-0.25*phi_src_TS^2 + 0.25*phi_src_TN^2 + xi_92)**2)\n",
+      "xi_385 ← xi_349*xi_350\n",
+      "xi_386 ← xi_343*xi_348*xi_350*xi_368*(50.0*phi_dst_T^2 - 50.0*phi_src_T^2 + xi_101)*(xi_353*xi_380 + (-0.25*phi_src_TW^2 + 0.25*phi_src_TE^2 + xi_226)*(-0.25*phi_src_TW^3 + 0.25*phi_src_TE^3 + xi_198) + (-0.25*phi_src_TS^2 + 0.25*phi_src_TN^2 + xi_92)*(-0.25*phi_src_TS^3 + 0.25*phi_src_TN^3 + xi_52))/(xi_379*xi_383)\n",
+      "xi_387 ← 0.166666666666667*mu_src_C^1 - 0.0833333333333333*mu_src_T^0 + 0.166666666666667*mu_src_T^1 + xi_103\n",
+      "xi_388 ← xi_365*(xi_387 + 0.333333333333333)\n",
+      "xi_389 ← xi_387 + 0.2\n",
+      "dc_dphi_dt_0 ← xi_8*(-phi_src_C^0**2*xi_14 + phi_src_C^2**2*xi_9 + xi_11 + 0.133333333333333*xi_12 - 0.0666666666666667*xi_13 - xi_15 - 0.133333333333333*xi_16 + 0.0666666666666667*xi_17 + xi_18)\n",
+      "dc_dphi_dt_1 ← -xi_8*(phi_src_C^1**2*xi_14 - phi_src_C^2**2*xi_10 + xi_11 + 0.0666666666666667*xi_12 - 0.133333333333333*xi_13 + xi_15 - 0.0666666666666667*xi_16 + 0.133333333333333*xi_17 - xi_18)\n",
+      "staggered_down_0_0 ← 0.333333333333333*xi_33 - 0.166666666666667*xi_35 - xi_37*(3.92699081698724*Piecewise((0, (xi_38 < 1.0e-9) | (xi_49*xi_59 < 1.0e-9)), (xi_69*(xi_65 - xi_66*(xi_63 + 0.6)), True)) + 3.92699081698724*Piecewise((0, (xi_70 < 1.0e-9) | (xi_59*xi_81 < 1.0e-9)), (xi_86*(xi_65 - xi_83*xi_84), True)) + 3.92699081698724*Piecewise((0, (xi_87 < 1.0e-9) | (xi_59*xi_98 < 1.0e-9)), (xi_102*(-xi_100*xi_83 + xi_65), True)))\n",
+      "staggered_down_0_1 ← -0.166666666666667*xi_33 + 0.333333333333333*xi_35 - xi_37*(3.92699081698724*Piecewise((0, (xi_38 < 1.0e-9) | (xi_49*xi_59 < 1.0e-9)), (xi_69*(xi_106 - xi_107*xi_66), True)) + 3.92699081698724*Piecewise((0, (xi_70 < 1.0e-9) | (xi_59*xi_81 < 1.0e-9)), (xi_86*(xi_106 - xi_84*(xi_105 + 0.6)), True)) + 3.92699081698724*Piecewise((0, (xi_87 < 1.0e-9) | (xi_59*xi_98 < 1.0e-9)), (xi_102*(-xi_100*xi_107 + xi_106), True)))\n",
+      "staggered_up_0_0 ← 0.333333333333333*xi_118 - 0.166666666666667*xi_120 - xi_122*(3.92699081698724*Piecewise((0, (xi_123 < 1.0e-9) | (xi_132*xi_140 < 1.0e-9)), (xi_147*(xi_144 - xi_145*(xi_142 + 0.6)), True)) + 3.92699081698724*Piecewise((0, (xi_148 < 1.0e-9) | (xi_140*xi_157 < 1.0e-9)), (xi_161*(xi_144 - xi_159*xi_160), True)) + 3.92699081698724*Piecewise((0, (xi_162 < 1.0e-9) | (xi_140*xi_171 < 1.0e-9)), (xi_174*(xi_144 - xi_159*xi_173), True)))\n",
+      "staggered_up_0_1 ← -0.166666666666667*xi_118 + 0.333333333333333*xi_120 - xi_122*(3.92699081698724*Piecewise((0, (xi_123 < 1.0e-9) | (xi_132*xi_140 < 1.0e-9)), (xi_147*(-xi_145*xi_177 + xi_176), True)) + 3.92699081698724*Piecewise((0, (xi_148 < 1.0e-9) | (xi_140*xi_157 < 1.0e-9)), (xi_161*(-xi_160*(xi_175 + 0.6) + xi_176), True)) + 3.92699081698724*Piecewise((0, (xi_162 < 1.0e-9) | (xi_140*xi_171 < 1.0e-9)), (xi_174*(-xi_173*xi_177 + xi_176), True)))\n",
+      "staggered_down_1_0 ← 0.333333333333333*xi_187 - 0.166666666666667*xi_188 - xi_189*(3.92699081698724*Piecewise((0, (xi_190 < 1.0e-9) | (xi_197*xi_204 < 1.0e-9)), (xi_211*(xi_208 - xi_209*(xi_206 + 0.6)), True)) + 3.92699081698724*Piecewise((0, (xi_212 < 1.0e-9) | (xi_204*xi_219 < 1.0e-9)), (xi_223*(xi_208 - xi_221*xi_222), True)) + 3.92699081698724*Piecewise((0, (xi_224 < 1.0e-9) | (xi_204*xi_231 < 1.0e-9)), (xi_234*(xi_208 - xi_221*xi_233), True)))\n",
+      "staggered_down_1_1 ← -0.166666666666667*xi_187 + 0.333333333333333*xi_188 - xi_189*(3.92699081698724*Piecewise((0, (xi_190 < 1.0e-9) | (xi_197*xi_204 < 1.0e-9)), (xi_211*(-xi_209*xi_237 + xi_236), True)) + 3.92699081698724*Piecewise((0, (xi_212 < 1.0e-9) | (xi_204*xi_219 < 1.0e-9)), (xi_223*(-xi_222*(xi_235 + 0.6) + xi_236), True)) + 3.92699081698724*Piecewise((0, (xi_224 < 1.0e-9) | (xi_204*xi_231 < 1.0e-9)), (xi_234*(-xi_233*xi_237 + xi_236), True)))\n",
+      "staggered_up_1_0 ← 0.333333333333333*xi_247 - 0.166666666666667*xi_248 - xi_249*(3.92699081698724*Piecewise((0, (xi_250 < 1.0e-9) | (xi_256*xi_262 < 1.0e-9)), (xi_269*(xi_266 - xi_267*(xi_264 + 0.6)), True)) + 3.92699081698724*Piecewise((0, (xi_270 < 1.0e-9) | (xi_262*xi_276 < 1.0e-9)), (xi_280*(xi_266 - xi_278*xi_279), True)) + 3.92699081698724*Piecewise((0, (xi_281 < 1.0e-9) | (xi_262*xi_287 < 1.0e-9)), (xi_290*(xi_266 - xi_278*xi_289), True)))\n",
+      "staggered_up_1_1 ← -0.166666666666667*xi_247 + 0.333333333333333*xi_248 - xi_249*(3.92699081698724*Piecewise((0, (xi_250 < 1.0e-9) | (xi_256*xi_262 < 1.0e-9)), (xi_269*(-xi_267*xi_293 + xi_292), True)) + 3.92699081698724*Piecewise((0, (xi_270 < 1.0e-9) | (xi_262*xi_276 < 1.0e-9)), (xi_280*(-xi_279*(xi_291 + 0.6) + xi_292), True)) + 3.92699081698724*Piecewise((0, (xi_281 < 1.0e-9) | (xi_262*xi_287 < 1.0e-9)), (xi_290*(-xi_289*xi_293 + xi_292), True)))\n",
+      "staggered_down_2_0 ← 0.333333333333333*xi_303 - 0.166666666666667*xi_304 - xi_305*(3.92699081698724*Piecewise((0, (xi_306 < 1.0e-9) | (xi_310*xi_314 < 1.0e-9)), (xi_321*(xi_318 - xi_319*(xi_316 + 0.6)), True)) + 3.92699081698724*Piecewise((0, (xi_322 < 1.0e-9) | (xi_314*xi_326 < 1.0e-9)), (xi_330*(xi_318 - xi_328*xi_329), True)) + 3.92699081698724*Piecewise((0, (xi_331 < 1.0e-9) | (xi_314*xi_335 < 1.0e-9)), (xi_338*(xi_318 - xi_328*xi_337), True)))\n",
+      "staggered_down_2_1 ← -0.166666666666667*xi_303 + 0.333333333333333*xi_304 - xi_305*(3.92699081698724*Piecewise((0, (xi_306 < 1.0e-9) | (xi_310*xi_314 < 1.0e-9)), (xi_321*(-xi_319*xi_341 + xi_340), True)) + 3.92699081698724*Piecewise((0, (xi_322 < 1.0e-9) | (xi_314*xi_326 < 1.0e-9)), (xi_330*(-xi_329*(xi_339 + 0.6) + xi_340), True)) + 3.92699081698724*Piecewise((0, (xi_331 < 1.0e-9) | (xi_314*xi_335 < 1.0e-9)), (xi_338*(-xi_337*xi_341 + xi_340), True)))\n",
+      "staggered_up_2_0 ← 0.333333333333333*xi_351 - 0.166666666666667*xi_352 - xi_353*(3.92699081698724*Piecewise((0, (xi_354 < 1.0e-9) | (xi_358*xi_362 < 1.0e-9)), (xi_369*(xi_366 - xi_367*(xi_364 + 0.6)), True)) + 3.92699081698724*Piecewise((0, (xi_370 < 1.0e-9) | (xi_362*xi_374 < 1.0e-9)), (xi_378*(xi_366 - xi_376*xi_377), True)) + 3.92699081698724*Piecewise((0, (xi_379 < 1.0e-9) | (xi_362*xi_383 < 1.0e-9)), (xi_386*(xi_366 - xi_376*xi_385), True)))\n",
+      "staggered_up_2_1 ← -0.166666666666667*xi_351 + 0.333333333333333*xi_352 - xi_353*(3.92699081698724*Piecewise((0, (xi_354 < 1.0e-9) | (xi_358*xi_362 < 1.0e-9)), (xi_369*(-xi_367*xi_389 + xi_388), True)) + 3.92699081698724*Piecewise((0, (xi_370 < 1.0e-9) | (xi_362*xi_374 < 1.0e-9)), (xi_378*(-xi_377*(xi_387 + 0.6) + xi_388), True)) + 3.92699081698724*Piecewise((0, (xi_379 < 1.0e-9) | (xi_362*xi_383 < 1.0e-9)), (xi_386*(-xi_385*xi_389 + xi_388), True)))\n",
+      "divMgradmu_0 ← -1.0*staggered_down_0_0 - 1.0*staggered_down_1_0 - 1.0*staggered_down_2_0 + 1.0*staggered_up_0_0 + 1.0*staggered_up_1_0 + 1.0*staggered_up_2_0\n",
+      "xi_390 ← -0.01*dc_dphi_dt_0 + 0.01*divMgradmu_0\n",
+      "divMgradmu_1 ← -1.0*staggered_down_0_1 - 1.0*staggered_down_1_1 - 1.0*staggered_down_2_1 + 1.0*staggered_up_0_1 + 1.0*staggered_up_1_1 + 1.0*staggered_up_2_1\n",
+      "xi_391 ← -0.01*dc_dphi_dt_1 + 0.01*divMgradmu_1\n",
+      "mu_dst[0,0,0] ← mu_src_C^0 + 4.0*xi_390 + 2.0*xi_391\n",
+      "mu_dst[0,0,0](1) ← mu_src_C^1 + 2.0*xi_390 + 4.0*xi_391\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(rescheduled_eqs))\n",
+    "for eq in rescheduled_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "phi_kernel = ps.create_kernel(\n",
+    "    rescheduled_eqs,\n",
+    "    target=\"gpu\",\n",
+    "    gpu_indexing_params={\n",
+    "        \"block_size\": (32, 4, 1)\n",
+    "    }).compile()\n",
+    "\n",
+    "code = \"#include <cstdint>\\n\"\n",
+    "code += \"#define FUNC_PREFIX __global__ __launch_bounds__(128)\\n\"\n",
+    "code += \"#define RESTRICT const __restrict__\\n\\n\"\n",
+    "\n",
+    "code += str(show_code(phi_kernel.ast))\n",
+    "\n",
+    "cubin = pycuda.compiler.compile(code, options=[\"-w\", \"-std=c++11\", \"-use_fast_math\" ], arch=\"sm_60\")\n",
+    "\n",
+    "run([  \"echo \\\"\" + code + \"\\\" >> temp.cubin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "newFile = open(\"temp.cusbin\", \"wb\")\n",
+    "newFile.write(cubin)\n",
+    "newFile.close()\n",
+    "\n",
+    "result = run([  \"nvdisasm -c   temp.cusbin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "print(len(result.stdout.decode(\"utf-8\").split(\"\\n\") )  )\n",
+    "\n",
+    "print(result.stdout.decode(\"utf-8\"))\n",
+    "\n",
+    "\n",
+    "\n",
+    "newFile = open(\"temp.disasm\", \"wb\")\n",
+    "newFile.write(result.stdout)\n",
+    "newFile.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(show_code(phi_kernel.ast))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "for eq in rescheduled_eqs:\n",
+    "    print(eq)\n",
+    "    print(eq.rhs.func)\n",
+    "    for arg in eq.rhs.args:\n",
+    "        print(arg)\n",
+    "    print()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = graphviz.Digraph(engine='dot')\n",
+    "for eq in rescheduled_eqs:\n",
+    "    #d.node(eq.lhs.name)\n",
+    "    for arg in eq.rhs.atoms():\n",
+    "        if isinstance(arg, sympy.Symbol) and not isinstance(arg, Field.Access):\n",
+    "            d.edge(arg.name, eq.lhs.name)\n",
+    "d\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/test_sched_mu.ipynb b/pystencils_tests/liveness_opts/test_sched_mu.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..a1fa706665672d1cd91441197437f4c9f82b4c0f
--- /dev/null
+++ b/pystencils_tests/liveness_opts/test_sched_mu.ipynb
@@ -0,0 +1,78 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "sys.path.append('../pygrandchem_tests/')\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "from pygrandchem_tests.config import get_system\n",
+    "from pygrandchem.grandchem_generation import create_mu_update_kernel, create_mu_update_equations_from_config, create_mu_staggered_kernel, create_kernel\n",
+    "import test_mu_equivalence\n",
+    "\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_system()\n",
+    "phi_src = np.zeros([3, 3, 3, 4])\n",
+    "mu_src = np.zeros([3, 3, 3, 2])\n",
+    "\n",
+    "update_eqs = sympy_cse_on_assignment_list(create_mu_update_equations_from_config(config, phi_src, mu_src))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sched_option in all_sched_options:\n",
+    "    print(sched_option.__name__)\n",
+    "    rescheduled_eqs = sched_option(update_eqs)\n",
+    "    test_mu_equivalence.test_no_staggered_pre_computation(eqs=rescheduled_eqs, target='gpu')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/pystencils_tests/liveness_opts/test_steal.ipynb b/pystencils_tests/liveness_opts/test_steal.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..f72b7c549e2d4e2267f45f2387f026d9bb958cc3
--- /dev/null
+++ b/pystencils_tests/liveness_opts/test_steal.ipynb
@@ -0,0 +1,450 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys \n",
+    "sys.path.append('..')\n",
+    "print(sys.path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 1\n",
+    "%aimport pystencils.simp.liveness_opts\n",
+    "%aimport pystencils.simp.liveness_opts_exp\n",
+    "%aimport pystencils.shmemvar\n",
+    "%aimport pystencils.backends.cbackend\n",
+    "%aimport pystencils.transformations\n",
+    "\n",
+    "\n",
+    "%load_ext line_profiler\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lbmpy.session import *\n",
+    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from pygrandchem_tests.config2 import get_system\n",
+    "from pygrandchem_tests.config import get_system as get_system_simple\n",
+    "from pystencils.datahandling import SerialDataHandling\n",
+    "from pygrandchem.grandchem_generation import *\n",
+    "from pygrandchem.chemicalpotential import *\n",
+    "from pystencils import show_code, Field\n",
+    "from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational\n",
+    "from pystencils.simp import sympy_cse_on_assignment_list\n",
+    "from pystencils.simp.liveness_opts import *\n",
+    "from pystencils.simp.liveness_opts_exp import *\n",
+    "\n",
+    "from pystencils.shmemvar import *\n",
+    "import graphviz\n",
+    "\n",
+    "\n",
+    "import pycuda\n",
+    "\n",
+    "import sys\n",
+    "from subprocess import run, PIPE\n",
+    "\n",
+    "sys.setrecursionlimit(100000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "config = get_system()\n",
+    "free_energy = FreeEnergy(config['FreeEnergy'], config['Temperature'])\n",
+    "\n",
+    "dh = SerialDataHandling((256, 256, 256), periodicity=(True, True, False))\n",
+    "f = dh.fields\n",
+    "dh.add_array('phi_src', values_per_cell=4, layout='fzyx')\n",
+    "dh.add_array('mu_src', values_per_cell=2, layout='fzyx')\n",
+    "dh.add_array_like('phi_dst', 'phi_src')\n",
+    "dh.add_array_like('mu_dst', 'mu_src')\n",
+    "dh.add_array('c', values_per_cell=2, layout='fzyx')\n",
+    "\n",
+    "diffusion_matrices = np.zeros([4, 2, 2])\n",
+    "diffusion_matrices[0] = config['Parameters']['da']\n",
+    "diffusion_matrices[1] = config['Parameters']['db']\n",
+    "diffusion_matrices[2] = config['Parameters']['dg']\n",
+    "diffusion_matrices[3] = config['Parameters']['dl']\n",
+    "\n",
+    "f = dh.fields\n",
+    "\n",
+    "#update_eqs = create_phi_update_equations(\n",
+    "#    f['phi_src'],\n",
+    "#    f['phi_dst'],\n",
+    "#    f['mu_src'],\n",
+    "#    free_energy,\n",
+    "#    config['Parameters'],\n",
+    "#    simplex_projection=True)\n",
+    "\n",
+    "update_eqs = create_mu_update_equations(\n",
+    "    f['phi_src'], f['phi_dst'], f['mu_src'], f['mu_dst'], free_energy,\n",
+    "    diffusion_matrices, config['Parameters'])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "update_eqs = sympy_cse_on_assignment_list(update_eqs)\n",
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "update_eqs = merge_field_accesses(update_eqs)\n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = Symbol(\"a\")\n",
+    "b = Symbol(\"b\")\n",
+    "c = Symbol(\"c\")\n",
+    "\n",
+    "\n",
+    "fake_eqs = [\n",
+    "    Assignment(a, sympy.Add(sympy.Mul(0.1, f['phi_src'][1, 0, 0](0)), sympy.Mul(0.1, f['phi_src'][0, 0, 0](0)))),\n",
+    "    Assignment(b, sympy.Add(sympy.Mul(0.1, f['phi_src'][-1, 0, 0](0)), sympy.Mul(0.1, f['phi_src'][0, 0, 0](0)))),\n",
+    "    Assignment(f['phi_dst'][0, 0, 0](0), sympy.Add(a, b))\n",
+    "]\n",
+    "\n",
+    "fake_eqs = merge_field_accesses(sympy_cse_on_assignment_list(fake_eqs))\n",
+    "\n",
+    "for eq in fake_eqs:\n",
+    "    print(eq)\n",
+    "    \n",
+    "shifted_fake_eqs = shift_fa_eqs(fake_eqs)\n",
+    "\n",
+    "\n",
+    "print()\n",
+    "for eq in shifted_fake_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stolen_eqs = left_steal(update_eqs, 8)\n",
+    "\n",
+    "for eq in stolen_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "shifted_eqs = shift_fa_eqs(update_eqs)\n",
+    "steal_from_e = get_steal_list(update_eqs, shifted_eqs)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "usage = get_usage(update_eqs)\n",
+    "definitions = get_definitions(update_eqs)\n",
+    "\n",
+    "def count_nodes_up(node):\n",
+    "    if isinstance(node, Field.Access):\n",
+    "        return 1\n",
+    "\n",
+    "    if node in definitions:\n",
+    "        node = definitions[node].rhs\n",
+    "    \n",
+    "    node_count = 0\n",
+    "    for arg in node.args:\n",
+    "        if not (arg in usage and usage[arg] > 1):\n",
+    "            node_count += count_nodes_up(arg)\n",
+    "    return node_count + 1\n",
+    "\n",
+    "scores = [(s, count_nodes_up(s)) for s in steal_from_e if isinstance(s, Symbol)]\n",
+    "scores.sort(key=lambda s: s[1], reverse=True)\n",
+    "\n",
+    "for s in scores:\n",
+    "    print(s)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "definitions = get_definitions(update_eqs)                \n",
+    "usage = get_usage(update_eqs)\n",
+    "\n",
+    "d = graphviz.Digraph(engine='dot', strict=True)\n",
+    "\n",
+    "\n",
+    "for expr in steal_from_e:\n",
+    "    d.node(str(expr), color=\"red\")\n",
+    "\n",
+    "for expr in steal_from_e:\n",
+    "    definition = expr\n",
+    "    if expr in definitions:\n",
+    "        definition = definitions[expr].rhs\n",
+    "        d.edge(str(definition), str(expr), weight=\"200\")\n",
+    "    for atom in definition.args:\n",
+    "        if not isinstance(atom, Number):\n",
+    "            d.edge(str(atom), str(definition), weight=\"200\")\n",
+    "\n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    for atom in sympy.postorder_traversal(eq.rhs):\n",
+    "        if atom in steal_from_e:\n",
+    "            d.edge(str(atom), str(eq.lhs), weight=\"200\")\n",
+    "            \n",
+    "    #d.edge(str(steal_from_e[expr]), str(expr), style=\"dashed\", weight=\"1\")\n",
+    "    \n",
+    "    #expr = steal_from_e[expr]\n",
+    "    #definition = expr\n",
+    "    #if expr in definitions:\n",
+    "    #    definition = definitions[expr].rhs\n",
+    "    #    d.edge(str(definition), str(expr), weight=\"200\")\n",
+    "    #for atom in definition.args:\n",
+    "    #    if not isinstance(atom, Number):\n",
+    "    #        d.edge(str(atom), str(definition), weight=\"200\")\n",
+    "            \n",
+    "    \n",
+    "    \n",
+    "d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "d = graphviz.Digraph(engine='dot', strict=True)\n",
+    "\n",
+    "leaking_nodes = []\n",
+    "non_leaking_nodes = list(steal_from_e)\n",
+    "\n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    if eq.lhs not in steal_from_e:\n",
+    "        for atom in sympy.postorder_traversal(eq.rhs):\n",
+    "            if atom in non_leaking_nodes:\n",
+    "                non_leaking_nodes.remove(atom)\n",
+    "                leaking_nodes.append(atom)\n",
+    "\n",
+    "for e in steal_from_e:\n",
+    "    if e in leaking_nodes:\n",
+    "        d.node(str(e), color=\"blue\")\n",
+    "    else:\n",
+    "        d.node(str(e), color=\"red\")\n",
+    "        \n",
+    "for expr in steal_from_e:\n",
+    "    definition = expr\n",
+    "    if expr in definitions:\n",
+    "        definition = definitions[expr].rhs\n",
+    "        d.edge(str(definition), str(expr), weight=\"200\")\n",
+    "    for atom in definition.args:\n",
+    "        if not isinstance(atom, Number):\n",
+    "            d.edge(str(atom), str(definition), weight=\"200\")\n",
+    "\n",
+    "            \n",
+    "d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = graphviz.Digraph(engine='dot', strict=True)\n",
+    "\n",
+    "def walk_up(expr):\n",
+    "    if expr in definitions:\n",
+    "        walk_up(definitions[expr].rhs)\n",
+    "        d.edge(str(definitions[expr].rhs), str(expr))\n",
+    "    for arg in expr.args:\n",
+    "        if isinstance(arg, sympy.Number): continue\n",
+    "        walk_up(arg)\n",
+    "        d.edge(str(arg), str(expr))\n",
+    "\n",
+    "for eq in update_eqs:\n",
+    "    if eq.lhs.name == \"xi_137\":\n",
+    "        s_xi = eq.lhs\n",
+    "        \n",
+    "walk_up(s_xi)\n",
+    "#walk_up(left_steal[s_xi])\n",
+    "\n",
+    "d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rescheduled_eqs = schedule_eqs(update_eqs)\n",
+    "\n",
+    "for eq in rescheduled_eqs:\n",
+    "    print(eq)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "phi_kernel = create_kernel(\n",
+    "    update_eqs,\n",
+    "    target=\"gpu\",\n",
+    "    gpu_indexing_params={\n",
+    "        \"block_size\": (32, 4, 1)\n",
+    "    }).compile()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(show_code(phi_kernel.ast))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "code = \"#include <cstdint>\\n\"\n",
+    "code += \"#define FUNC_PREFIX __global__ __launch_bounds__(128)\\n\"\n",
+    "code += \"#define RESTRICT __restrict__\\n\\n\"\n",
+    "\n",
+    "code += str(show_code(phi_kernel.ast))\n",
+    "\n",
+    "cubin = pycuda.compiler.compile(code, options=[\"-w\", \"-std=c++11\", \"-use_fast_math\" ], arch=\"sm_60\")\n",
+    "\n",
+    "run([  \"echo \\\"\" + code + \"\\\" >> temp.cubin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "newFile = open(\"temp.cusbin\", \"wb\")\n",
+    "newFile.write(cubin)\n",
+    "newFile.close()\n",
+    "\n",
+    "result = run([  \"nvdisasm -c   temp.cusbin\"],\n",
+    "        stdout=PIPE,\n",
+    "        shell=True)\n",
+    "\n",
+    "print(result.stdout.decode(\"utf-8\"))\n",
+    "\n",
+    "newFile = open(\"temp.disasm\", \"wb\")\n",
+    "newFile.write(result.stdout)\n",
+    "newFile.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = graphviz.Digraph(engine='dot')\n",
+    "for eq in rescheduled_eqs:\n",
+    "    #d.node(eq.lhs.name)\n",
+    "    for arg in eq.rhs.atoms():\n",
+    "        if isinstance(arg, sympy.Symbol) and not isinstance(arg, Field.Access):\n",
+    "            d.edge(arg.name, eq.lhs.name)\n",
+    "d\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for e in steal_from_e:\n",
+    "    print(e)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}