diff --git a/pystencils/boundaries/createindexlist.py b/pystencils/boundaries/createindexlist.py
index 51bd5464f89b80c1ed272f1b4b0a932e46095863..be8fee7e5aaee82f5515dad0e9eb33005a958b1c 100644
--- a/pystencils/boundaries/createindexlist.py
+++ b/pystencils/boundaries/createindexlist.py
@@ -1,4 +1,3 @@
-import itertools
 import warnings
 
 import numpy as np
@@ -35,45 +34,67 @@ def numpy_data_type_for_boundary_object(boundary_object, dim):
                     + [(i[0], i[1].numpy_dtype) for i in boundary_object.additional_data], align=True)
 
 
-def _create_boundary_neighbor_index_list_python(flag_field_arr, nr_of_ghost_layers, boundary_mask,
-                                                fluid_mask, stencil, single_link):
+def _create_index_list_python(flag_field_arr, boundary_mask,
+                              fluid_mask, stencil, single_link, inner_or_boundary=False, nr_of_ghost_layers=None):
+
+    if inner_or_boundary and nr_of_ghost_layers is None:
+        raise ValueError("If inner_or_boundary is set True the number of ghost layers "
+                         "around the inner domain has to be specified")
+
+    if nr_of_ghost_layers is None:
+        nr_of_ghost_layers = 0
+
     coordinate_names = boundary_index_array_coordinate_names[:len(flag_field_arr.shape)]
     index_arr_dtype = np.dtype([(name, np.int32) for name in coordinate_names] + [(direction_member_name, np.int32)])
 
-    result = []
-    gl = nr_of_ghost_layers
-    for cell in itertools.product(*reversed([range(gl, i - gl) for i in flag_field_arr.shape])):
-        cell = cell[::-1]
-        if not flag_field_arr[cell] & fluid_mask:
-            continue
+    # boundary cells are extracted via np.where. To ensure continous memory access in the compute kernel these cells
+    # have to be sorted.
+    boundary_cells = np.transpose(np.nonzero(flag_field_arr == boundary_mask))
+    for i in range(len(flag_field_arr.shape)):
+        boundary_cells = boundary_cells[boundary_cells[:, i].argsort(kind='mergesort')]
+
+    # First a set is created to save all fluid cells which are near boundary
+    fluid_cells = set()
+    for cell in boundary_cells:
+        cell = tuple(cell)
         for dir_idx, direction in enumerate(stencil):
             neighbor_cell = tuple([cell_i + dir_i for cell_i, dir_i in zip(cell, direction)])
-            if flag_field_arr[neighbor_cell] & boundary_mask:
-                result.append(cell + (dir_idx,))
-                if single_link:
-                    break
-
-    return np.array(result, dtype=index_arr_dtype)
+            # prevent out ouf bounds access. If boundary cell is at the border, some stencil directions would be out.
+            if any(not 0 + nr_of_ghost_layers <= e < upper - nr_of_ghost_layers
+                   for e, upper in zip(neighbor_cell, flag_field_arr.shape)):
+                continue
+            if flag_field_arr[neighbor_cell] & fluid_mask:
+                fluid_cells.add(neighbor_cell)
 
+    # then this is set is transformed to a list to make it sortable. This ensures continoous memory access later.
+    fluid_cells = list(fluid_cells)
+    if len(flag_field_arr.shape) == 3:
+        fluid_cells.sort(key=lambda tup: (tup[-1], tup[-2], tup[0]))
+    else:
+        fluid_cells.sort(key=lambda tup: (tup[-1], tup[0]))
 
-def _create_boundary_cell_index_list_python(flag_field_arr, boundary_mask,
-                                            fluid_mask, stencil, single_link):
-    coordinate_names = boundary_index_array_coordinate_names[:len(flag_field_arr.shape)]
-    index_arr_dtype = np.dtype([(name, np.int32) for name in coordinate_names] + [(direction_member_name, np.int32)])
+    cells_to_iterate = fluid_cells if inner_or_boundary else boundary_cells
+    checkmask = boundary_mask if inner_or_boundary else fluid_mask
 
     result = []
-    for cell in itertools.product(*reversed([range(0, i) for i in flag_field_arr.shape])):
-        cell = cell[::-1]
-        if not flag_field_arr[cell] & boundary_mask:
-            continue
+    for cell in cells_to_iterate:
+        cell = tuple(cell)
+        sum_cells = np.zeros(len(cell))
         for dir_idx, direction in enumerate(stencil):
             neighbor_cell = tuple([cell_i + dir_i for cell_i, dir_i in zip(cell, direction)])
+            # prevent out ouf bounds access. If boundary cell is at the border, some stencil directions would be out.
             if any(not 0 <= e < upper for e, upper in zip(neighbor_cell, flag_field_arr.shape)):
                 continue
-            if flag_field_arr[neighbor_cell] & fluid_mask:
-                result.append(cell + (dir_idx,))
+            if flag_field_arr[neighbor_cell] & checkmask:
                 if single_link:
-                    break
+                    sum_cells += np.array(direction)
+                else:
+                    result.append(tuple(cell) + (dir_idx,))
+
+        # the discrete normal direction is the one which gives the maximum inner product to the stencil direction
+        if single_link and any(sum_cells != 0):
+            idx = np.argmax(np.inner(sum_cells, stencil))
+            result.append(tuple(cell) + (idx,))
 
     return np.array(result, dtype=index_arr_dtype)
 
@@ -91,7 +112,7 @@ def create_boundary_index_list(flag_field, stencil, boundary_mask, fluid_mask,
         nr_of_ghost_layers: only relevant if neighbors is True
         inner_or_boundary: if true, the result contains the cell coordinates of the domain cells -
                     if false the boundary cells are listed
-        single_link: if true only the first link is reported from this cell
+        single_link: if true only the link in normal direction to this cell is reported
 
     """
     dim = len(flag_field.shape)
@@ -119,10 +140,8 @@ def create_boundary_index_list(flag_field, stencil, boundary_mask, fluid_mask,
     else:
         if flag_field.size > 1e6:
             warnings.warn("Boundary setup may take very long! Consider installing cython to speed it up")
-        if inner_or_boundary:
-            return _create_boundary_neighbor_index_list_python(*args)
-        else:
-            return _create_boundary_cell_index_list_python(*args_no_gl)
+        return _create_index_list_python(*args_no_gl, inner_or_boundary=inner_or_boundary,
+                                         nr_of_ghost_layers=nr_of_ghost_layers)
 
 
 def create_boundary_index_array(flag_field, stencil, boundary_mask, fluid_mask, boundary_object,
diff --git a/pystencils/boundaries/createindexlistcython.pyx b/pystencils/boundaries/createindexlistcython.pyx
index 0931fdc7fe1038a7445f4e97a65ea24074e684fe..bd30fc1bafc91d007c5a3a41ff6e6adefafd1eed 100644
--- a/pystencils/boundaries/createindexlistcython.pyx
+++ b/pystencils/boundaries/createindexlistcython.pyx
@@ -22,20 +22,37 @@ def create_boundary_neighbor_index_list_2d(object[IntegerType, ndim=2] flag_fiel
     cdef int xs, ys, x, y
     cdef int dirIdx, num_directions, dx, dy
 
+    cdef int sum_x, sum_y
+    cdef float dot, maxn
+    cdef int calculated_idx
+
     xs, ys = flag_field.shape
     boundary_index_list = []
     num_directions = stencil.shape[0]
 
     for y in range(nr_of_ghost_layers, ys - nr_of_ghost_layers):
         for x in range(nr_of_ghost_layers, xs - nr_of_ghost_layers):
+            sum_x = 0; sum_y = 0;
             if flag_field[x, y] & fluid_mask:
                 for dirIdx in range(num_directions):
-                    dx = stencil[dirIdx,0]
-                    dy = stencil[dirIdx,1]
+                    dx = stencil[dirIdx,0]; dy = stencil[dirIdx,1]
                     if flag_field[x + dx, y + dy] & boundary_mask:
-                        boundary_index_list.append((x,y, dirIdx))
                         if single_link:
-                            break
+                            sum_x += dx; sum_y += dy;
+                        else:
+                            boundary_index_list.append((x, y, dirIdx))
+
+            dot = 0; maxn = 0; calculated_idx = 0
+            if single_link and (sum_x != 0 or sum_y != 0):
+                for dirIdx in range(num_directions):
+                    dx = stencil[dirIdx, 0]; dy = stencil[dirIdx, 1];
+
+                    dot = dx * sum_x + dy * sum_y
+                    if dot > maxn:
+                        maxn = dot
+                        calculated_idx = dirIdx
+
+                boundary_index_list.append((x, y, calculated_idx))
     return boundary_index_list
 
 
@@ -47,6 +64,10 @@ def create_boundary_neighbor_index_list_3d(object[IntegerType, ndim=3] flag_fiel
     cdef int xs, ys, zs, x, y, z
     cdef int dirIdx, num_directions, dx, dy, dz
 
+    cdef int sum_x, sum_y, sum_z
+    cdef float dot, maxn
+    cdef int calculated_idx
+
     xs, ys, zs = flag_field.shape
     boundary_index_list = []
     num_directions = stencil.shape[0]
@@ -54,15 +75,27 @@ def create_boundary_neighbor_index_list_3d(object[IntegerType, ndim=3] flag_fiel
     for z in range(nr_of_ghost_layers, zs - nr_of_ghost_layers):
         for y in range(nr_of_ghost_layers, ys - nr_of_ghost_layers):
             for x in range(nr_of_ghost_layers, xs - nr_of_ghost_layers):
+                sum_x = 0; sum_y = 0; sum_z = 0
                 if flag_field[x, y, z] & fluid_mask:
                     for dirIdx in range(num_directions):
-                        dx = stencil[dirIdx,0]
-                        dy = stencil[dirIdx,1]
-                        dz = stencil[dirIdx,2]
+                        dx = stencil[dirIdx,0]; dy = stencil[dirIdx,1]; dz = stencil[dirIdx,2]
                         if flag_field[x + dx, y + dy, z + dz] & boundary_mask:
-                            boundary_index_list.append((x,y,z, dirIdx))
                             if single_link:
-                                break
+                                sum_x += dx; sum_y += dy; sum_z += dz
+                            else:
+                                boundary_index_list.append((x, y, z, dirIdx))
+
+                dot = 0; maxn = 0; calculated_idx = 0
+                if single_link and (sum_x != 0 or sum_y != 0 or sum_z != 0):
+                    for dirIdx in range(num_directions):
+                        dx = stencil[dirIdx, 0]; dy = stencil[dirIdx, 1]; dz = stencil[dirIdx, 2]
+
+                        dot = dx * sum_x + dy * sum_y + dz * sum_z
+                        if dot > maxn:
+                            maxn = dot
+                            calculated_idx = dirIdx
+
+                    boundary_index_list.append((x, y, z, calculated_idx))
     return boundary_index_list
 
 
@@ -75,21 +108,39 @@ def create_boundary_cell_index_list_2d(object[IntegerType, ndim=2] flag_field,
     cdef int xs, ys, x, y
     cdef int dirIdx, num_directions, dx, dy
 
+    cdef int sum_x, sum_y
+    cdef float dot, maxn
+    cdef int calculated_idx
+
     xs, ys = flag_field.shape
     boundary_index_list = []
     num_directions = stencil.shape[0]
 
     for y in range(0, ys):
         for x in range(0, xs):
+            sum_x = 0; sum_y = 0;
             if flag_field[x, y] & boundary_mask:
                 for dirIdx in range(num_directions):
-                    dx = stencil[dirIdx,0]
-                    dy = stencil[dirIdx,1]
+                    dx = stencil[dirIdx,0]; dy = stencil[dirIdx,1]
                     if 0 <= x + dx < xs and 0 <= y + dy < ys:
                         if flag_field[x + dx, y + dy] & fluid_mask:
-                            boundary_index_list.append((x,y, dirIdx))
                             if single_link:
-                                break
+                                sum_x += dx; sum_y += dy
+                            else:
+                                boundary_index_list.append((x, y, dirIdx))
+
+            dot = 0; maxn = 0; calculated_idx = 0
+            if single_link and (sum_x != 0 or sum_y != 0):
+                for dirIdx in range(num_directions):
+                    dx = stencil[dirIdx, 0]; dy = stencil[dirIdx, 1]
+
+                    dot = dx * sum_x + dy * sum_y
+                    if dot > maxn:
+                        maxn = dot
+                        calculated_idx = dirIdx
+
+                boundary_index_list.append((x, y, calculated_idx))
+
     return boundary_index_list
 
 
@@ -101,6 +152,10 @@ def create_boundary_cell_index_list_3d(object[IntegerType, ndim=3] flag_field,
     cdef int xs, ys, zs, x, y, z
     cdef int dirIdx, num_directions, dx, dy, dz
 
+    cdef int sum_x, sum_y, sum_z
+    cdef float dot, maxn
+    cdef int calculated_idx
+
     xs, ys, zs = flag_field.shape
     boundary_index_list = []
     num_directions = stencil.shape[0]
@@ -108,14 +163,27 @@ def create_boundary_cell_index_list_3d(object[IntegerType, ndim=3] flag_field,
     for z in range(0, zs):
         for y in range(0, ys):
             for x in range(0, xs):
+                sum_x = 0; sum_y = 0; sum_z = 0
                 if flag_field[x, y, z] & boundary_mask:
                     for dirIdx in range(num_directions):
-                        dx = stencil[dirIdx,0]
-                        dy = stencil[dirIdx,1]
-                        dz = stencil[dirIdx,2]
+                        dx = stencil[dirIdx, 0]; dy = stencil[dirIdx, 1]; dz = stencil[dirIdx, 2]
                         if 0 <= x + dx < xs and 0 <= y + dy < ys and 0 <= z + dz < zs:
                             if flag_field[x + dx, y + dy, z + dz] & fluid_mask:
-                                boundary_index_list.append((x,y,z, dirIdx))
                                 if single_link:
-                                    break
+                                    sum_x += dx; sum_y += dy; sum_z += dz
+                                else:
+                                    boundary_index_list.append((x, y, z, dirIdx))
+
+                dot = 0; maxn = 0; calculated_idx=0
+                if single_link and (sum_x != 0 or sum_y !=0 or sum_z !=0):
+                    for dirIdx in range(num_directions):
+                        dx = stencil[dirIdx, 0]; dy = stencil[dirIdx, 1]; dz = stencil[dirIdx, 2]
+
+                        dot = dx*sum_x + dy*sum_y + dz*sum_z
+                        if dot > maxn:
+                            maxn = dot
+                            calculated_idx = dirIdx
+
+                    boundary_index_list.append((x, y, z, calculated_idx))
+
     return boundary_index_list
\ No newline at end of file
diff --git a/pystencils_tests/test_boundary_indexlist_creation.py b/pystencils_tests/test_boundary_indexlist_creation.py
index 58c6a18f6eb26e0b33f30389465a2cd593b6e930..970a2b1de609e0fcd0091163051af4a3e6d66a22 100644
--- a/pystencils_tests/test_boundary_indexlist_creation.py
+++ b/pystencils_tests/test_boundary_indexlist_creation.py
@@ -26,11 +26,11 @@ def test_equivalence_cython_python_version(single_link):
         flag_field_3d[-1, :, :] = mask
         flag_field_3d[7, 7, 7] = mask
 
-        result_python_2d = cil._create_boundary_neighbor_index_list_python(flag_field_2d, 1, mask, fluid_mask,
-                                                                           stencil_2d, single_link)
+        result_python_2d = cil._create_index_list_python(flag_field_2d, mask, fluid_mask,
+                                                         stencil_2d, single_link, True, 1)
 
-        result_python_3d = cil._create_boundary_neighbor_index_list_python(flag_field_3d, 1, mask, fluid_mask,
-                                                                           stencil_3d, single_link)
+        result_python_3d = cil._create_index_list_python(flag_field_3d, mask, fluid_mask,
+                                                         stencil_3d, single_link, True, 1)
 
         result_cython_2d = cil.create_boundary_index_list(flag_field_2d, stencil_2d, mask,
                                                           fluid_mask, 1, True, single_link)
@@ -62,11 +62,11 @@ def test_equivalence_cell_idx_list_cython_python_version(single_link):
         flag_field_3d[-1, :, :] = mask
         flag_field_3d[7, 7, 7] = mask
 
-        result_python_2d = cil._create_boundary_cell_index_list_python(flag_field_2d, mask, fluid_mask,
-                                                                       stencil_2d, single_link)
+        result_python_2d = cil._create_index_list_python(flag_field_2d, mask, fluid_mask,
+                                                         stencil_2d, single_link, False)
 
-        result_python_3d = cil._create_boundary_cell_index_list_python(flag_field_3d, mask, fluid_mask,
-                                                                       stencil_3d, single_link)
+        result_python_3d = cil._create_index_list_python(flag_field_3d, mask, fluid_mask,
+                                                         stencil_3d, single_link, False)
 
         result_cython_2d = cil.create_boundary_index_list(flag_field_2d, stencil_2d, mask, fluid_mask, None,
                                                           False, single_link)
@@ -75,3 +75,42 @@ def test_equivalence_cell_idx_list_cython_python_version(single_link):
 
         np.testing.assert_equal(result_python_2d, result_cython_2d)
         np.testing.assert_equal(result_python_3d, result_cython_3d)
+
+@pytest.mark.parametrize('inner_or_boundary', [False, True])
+def test_normal_calculation(inner_or_boundary):
+    stencil = tuple((x,y) for x,y in product([-1, 0, 1], [-1, 0, 1]))
+    domain_size = (32, 32)
+    dtype = np.uint32
+    fluid_mask = dtype(1)
+    mask = dtype(2)
+    flag_field = np.ones([domain_size[0], domain_size[1]], dtype=dtype) * fluid_mask
+
+    radius_inner = domain_size[0] // 4
+    radius_outer = domain_size[0] // 2
+    y_mid = domain_size[1] / 2
+    x_mid = domain_size[0] / 2
+
+    for x in range(0, domain_size[0]):
+        for y in range(0, domain_size[1]):
+            if (y - y_mid) ** 2 + (x - x_mid) ** 2 < radius_inner ** 2:
+                flag_field[x, y] = mask
+            if (x - x_mid) ** 2 + (y - y_mid) ** 2 > radius_outer ** 2:
+                flag_field[x, y] = mask
+
+    args_no_gl = (flag_field, mask, fluid_mask, np.array(stencil, dtype=np.int32), True)
+    index_list = cil._create_index_list_python(*args_no_gl, inner_or_boundary=inner_or_boundary, nr_of_ghost_layers=1)
+
+    checkmask = mask if inner_or_boundary else fluid_mask
+
+    for cell in index_list:
+        idx = cell[2]
+        cell = tuple((cell[0], cell[1]))
+        sum_cells = np.zeros(len(cell))
+        for dir_idx, direction in enumerate(stencil):
+            neighbor_cell = tuple([cell_i + dir_i for cell_i, dir_i in zip(cell, direction)])
+            if any(not 0 <= e < upper for e, upper in zip(neighbor_cell, flag_field.shape)):
+                continue
+            if flag_field[neighbor_cell] & checkmask:
+                sum_cells += np.array(direction)
+
+        assert np.argmax(np.inner(sum_cells, stencil)) == idx