diff --git a/pystencils/gpucuda/indexing.py b/pystencils/gpucuda/indexing.py
index 9e2df4e0a1183d752f560985ab15acee041f2b76..423b115837bda768ec8313abdc50c9a5ac017085 100644
--- a/pystencils/gpucuda/indexing.py
+++ b/pystencils/gpucuda/indexing.py
@@ -169,7 +169,7 @@ class BlockIndexing(AbstractIndexing):
 
         conditions = [c < e for c, e in zip(self.coordinates, end)]
         for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice):
-            if iter_slice.step > 1:
+            if isinstance(iter_slice, slice) and iter_slice.step > 1:
                 conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0))
 
         condition = conditions[0]
@@ -177,6 +177,9 @@ class BlockIndexing(AbstractIndexing):
             condition = sp.And(condition, c)
         return Block([Conditional(condition, kernel_content)])
 
+    def iteration_space(self, arr_shape):
+        return _iteration_space(self._iterationSlice, arr_shape)
+
     @staticmethod
     def limit_block_size_by_register_restriction(block_size, required_registers_per_thread, device=None):
         """Shrinks the block_size if there are too many registers used per multiprocessor.
@@ -284,6 +287,9 @@ class LineIndexing(AbstractIndexing):
     def symbolic_parameters(self):
         return set()
 
+    def iteration_space(self, arr_shape):
+        return _iteration_space(self._iterationSlice, arr_shape)
+
 
 # -------------------------------------- Helper functions --------------------------------------------------------------
 
@@ -310,6 +316,23 @@ def _get_end_from_slice(iteration_slice, arr_shape):
     return res
 
 
+def _get_steps_from_slice(iteration_slice):
+    res = []
+    for slice_component in iteration_slice:
+        if type(slice_component) is slice:
+            res.append(slice_component.step)
+        else:
+            res.append(1)
+    return res
+
+
+def _iteration_space(iteration_slice, arr_shape):
+    starts = _get_start_from_slice(iteration_slice)
+    ends = _get_end_from_slice(iteration_slice, arr_shape)
+    steps = _get_steps_from_slice(iteration_slice)
+    return [slice(start, end, step) for start, end, step in zip(starts, ends, steps)]
+
+
 def indexing_creator_from_params(gpu_indexing, gpu_indexing_params):
     if isinstance(gpu_indexing, str):
         if gpu_indexing == 'block':
diff --git a/pystencils/gpucuda/kernelcreation.py b/pystencils/gpucuda/kernelcreation.py
index 611c1e56b4975d3cad12440553032f67b6465c82..96a53138316ac97d4518cc870d7a4dd5f3fed001 100644
--- a/pystencils/gpucuda/kernelcreation.py
+++ b/pystencils/gpucuda/kernelcreation.py
@@ -13,7 +13,7 @@ from pystencils.node_collection import NodeCollection
 from pystencils.gpucuda.indexing import indexing_creator_from_params
 from pystencils.simp.assignment_collection import AssignmentCollection
 from pystencils.transformations import (
-    get_base_buffer_index, get_common_shape, parse_base_pointer_info,
+    get_base_buffer_index, get_common_field, parse_base_pointer_info,
     resolve_buffer_accesses, resolve_field_accesses, unify_shape_symbols)
 
 
@@ -44,7 +44,9 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
         field_accesses = {e for e in field_accesses if not e.is_absolute_access}
         num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field))
 
-    common_shape = get_common_shape(fields_without_buffers)
+    # common shape and field to from the iteration space
+    common_field = get_common_field(fields_without_buffers)
+    common_shape = common_field.spatial_shape
 
     if iteration_slice is None:
         # determine iteration slice from ghost layers
@@ -62,7 +64,7 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
                 iteration_slice.append(slice(ghost_layers[i][0],
                                              -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None))
 
-    indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice)
+    indexing = indexing_creator(field=common_field, iteration_slice=iteration_slice)
     coord_mapping = indexing.coordinates
 
     cell_idx_assignments = [SympyAssignment(LoopOverCoordinate.get_loop_counter_symbol(i), value)
@@ -92,7 +94,8 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
     coord_mapping = {f.name: cell_idx_symbols for f in all_fields}
 
     if any(FieldType.is_buffer(f) for f in all_fields):
-        resolve_buffer_accesses(ast, get_base_buffer_index(ast, indexing.coordinates, common_shape), read_only_fields)
+        iteration_space = indexing.iteration_space(common_shape)
+        resolve_buffer_accesses(ast, get_base_buffer_index(ast, cell_idx_symbols, iteration_space), read_only_fields)
 
     resolve_field_accesses(ast, read_only_fields, field_to_base_pointer_info=base_pointer_info,
                            field_to_fixed_coordinates=coord_mapping)
@@ -157,7 +160,7 @@ def created_indexed_cuda_kernel(assignments: Union[AssignmentCollection, NodeCol
                                 iteration_slice=[slice(None, None, None)] * len(idx_field.spatial_shape))
 
     function_body = Block(coordinate_symbol_assignments + assignments)
-    function_body = indexing.guard(function_body, get_common_shape(index_fields))
+    function_body = indexing.guard(function_body, get_common_field(index_fields).spatial_shape)
     ast = KernelFunction(function_body, Target.GPU, Backend.CUDA, make_python_function,
                          None, function_name, assignments=assignments)
     ast.global_variables.update(indexing.index_variables)
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index 9f10a5f0a3bd1a1e23587ff58dcb2f3a7f50f6b6..02806d62223c8443cc2bb91740afd2c0ef6bda55 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -122,9 +122,10 @@ def unify_shape_symbols(body, common_shape, fields):
         body.subs(substitutions)
 
 
-def get_common_shape(field_set):
-    """Takes a set of pystencils Fields and returns their common spatial shape if it exists. Otherwise
-    ValueError is raised"""
+def get_common_field(field_set):
+    """Takes a set of pystencils Fields, checks if a common spatial shape exists and returns one
+        representative field, that can be used for shape information etc. in the kernel creation.
+        If the fields have different shapes ValueError is raised"""
     nr_of_fixed_shaped_fields = 0
     for f in field_set:
         if f.has_fixed_shape:
@@ -142,8 +143,9 @@ def get_common_shape(field_set):
         if len(shape_set) != 1:
             raise ValueError("Differently sized field accesses in loop body: " + str(shape_set))
 
-    shape = list(sorted(shape_set, key=lambda e: str(e[0])))[0]
-    return shape
+    # Sort the fields by their name to ensure that always the same field is returned
+    reference_field = list(sorted(field_set, key=lambda e: str(e)))[0]
+    return reference_field
 
 
 def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_order=None):
@@ -178,13 +180,15 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
 
     if absolut_accesses_only:
         absolut_access_fields = {e.field for e in body.atoms(Field.Access)}
-        shape = get_common_shape(absolut_access_fields)
+        common_field = get_common_field(absolut_access_fields)
+        common_shape = common_field.spatial_shape
     else:
-        shape = get_common_shape(fields)
-    unify_shape_symbols(body, common_shape=shape, fields=fields)
+        common_field = get_common_field(fields)
+        common_shape = common_field.spatial_shape
+    unify_shape_symbols(body, common_shape=common_shape, fields=fields)
 
     if iteration_slice is not None:
-        iteration_slice = normalize_slice(iteration_slice, shape)
+        iteration_slice = normalize_slice(iteration_slice, common_shape)
 
     if ghost_layers is None:
         if absolut_accesses_only:
@@ -199,7 +203,7 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
     for i, loop_coordinate in enumerate(reversed(loop_order)):
         if iteration_slice is None:
             begin = ghost_layers[loop_coordinate][0]
-            end = shape[loop_coordinate] - ghost_layers[loop_coordinate][1]
+            end = common_shape[loop_coordinate] - ghost_layers[loop_coordinate][1]
             new_loop = ast.LoopOverCoordinate(current_body, loop_coordinate, begin, end, 1)
             current_body = ast.Block([new_loop])
         else:
@@ -351,7 +355,7 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None):
         ast_node: ast before any field accesses are resolved
         loop_counters: for CPU kernels: leave to default 'None' (can be determined from loop nodes)
                        for GPU kernels: list of 'loop counters' from inner to outer loop
-        loop_iterations: number of iterations of each loop from inner to outer, for CPU kernels leave to default
+        loop_iterations: iteration slice for each loop from inner to outer, for CPU kernels leave to default
 
     Returns:
         base buffer index - required by 'resolve_buffer_accesses' function
@@ -363,15 +367,14 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None):
         assert len(loops) == len(parents_of_innermost_loop)
         assert all(l1 is l2 for l1, l2 in zip(loops, parents_of_innermost_loop))
 
-        actual_sizes = [int_div((loop.stop - loop.start), loop.step)
-                        if loop.step != 1 else loop.stop - loop.start for loop in loops]
+        loop_counters = [loop.loop_counter_symbol for loop in loops]
+        loop_iterations = [slice(loop.start, loop.stop, loop.step) for loop in loops]
 
-        actual_steps = [int_div((loop.loop_counter_symbol - loop.start), loop.step)
-                        if loop.step != 1 else loop.loop_counter_symbol - loop.start for loop in loops]
+    actual_sizes = [int_div((s.stop - s.start), s.step)
+                    if s.step != 1 else s.stop - s.start for s in loop_iterations]
 
-    else:
-        actual_sizes = loop_iterations
-        actual_steps = loop_counters
+    actual_steps = [int_div((ctr - s.start), s.step)
+                    if s.step != 1 else ctr - s.start for ctr, s in zip(loop_counters, loop_iterations)]
 
     field_accesses = ast_node.atoms(Field.Access)
     buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)}
diff --git a/pystencils_tests/test_buffer.py b/pystencils_tests/test_buffer.py
index b8af6f53f0b25075d55a182a2a93231fc03b60f7..a21d68cc406c604b3566b46a0167a4ba356c59cc 100644
--- a/pystencils_tests/test_buffer.py
+++ b/pystencils_tests/test_buffer.py
@@ -22,7 +22,7 @@ def _generate_fields(dt=np.uint64, num_directions=1, layout='numpy'):
         field_layout = layout_string_to_tuple(layout, len(size))
         src_arr = create_numpy_array_with_layout(size, field_layout, dtype=dt)
 
-        array_data = np.reshape(np.arange(1, int(np.prod(size)+1)), size)
+        array_data = np.reshape(np.arange(1, int(np.prod(size) + 1)), size)
         # Use flat iterator to input data into the array
         src_arr.flat = add_ghost_layers(array_data, index_dimensions=1 if num_directions > 1 else 0).astype(dt).flat
         dst_arr = np.zeros(src_arr.shape, dtype=dt)
@@ -41,7 +41,8 @@ def test_full_scalar_field():
                                       field_type=FieldType.BUFFER, dtype=src_arr.dtype)
 
         pack_eqs = [Assignment(buffer.center(), src_field.center())]
-        pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        pack_code = create_kernel(pack_eqs, config=config)
         code = ps.get_code_str(pack_code)
         ps.show_code(pack_code)
 
@@ -49,7 +50,9 @@ def test_full_scalar_field():
         pack_kernel(buffer=buffer_arr, src_field=src_arr)
 
         unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
-        unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+
+        config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+        unpack_code = create_kernel(unpack_eqs, config=config)
 
         unpack_kernel = unpack_code.compile()
         unpack_kernel(dst_field=dst_arr, buffer=buffer_arr)
@@ -73,14 +76,18 @@ def test_field_slice():
                                           field_type=FieldType.BUFFER, dtype=src_arr.dtype)
 
             pack_eqs = [Assignment(buffer.center(), src_field.center())]
-            pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+
+            config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+            pack_code = create_kernel(pack_eqs, config=config)
 
             pack_kernel = pack_code.compile()
             pack_kernel(buffer=bufferArr, src_field=src_arr[pack_slice])
 
             # Unpack into ghost layer of dst_field in N direction
             unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
-            unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+
+            config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+            unpack_code = create_kernel(unpack_eqs, config=config)
 
             unpack_kernel = unpack_code.compile()
             unpack_kernel(buffer=bufferArr, dst_field=dst_arr[unpack_slice])
@@ -105,7 +112,8 @@ def test_all_cell_values():
             eq = Assignment(buffer(idx), src_field(idx))
             pack_eqs.append(eq)
 
-        pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        pack_code = create_kernel(pack_eqs, config=config)
         pack_kernel = pack_code.compile()
         pack_kernel(buffer=bufferArr, src_field=src_arr)
 
@@ -115,7 +123,8 @@ def test_all_cell_values():
             eq = Assignment(dst_field(idx), buffer(idx))
             unpack_eqs.append(eq)
 
-        unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+        config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+        unpack_code = create_kernel(unpack_eqs, config=config)
         unpack_kernel = unpack_code.compile()
         unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
 
@@ -141,7 +150,8 @@ def test_subset_cell_values():
             eq = Assignment(buffer(buffer_idx), src_field(cell_idx))
             pack_eqs.append(eq)
 
-        pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        pack_code = create_kernel(pack_eqs, config=config)
         pack_kernel = pack_code.compile()
         pack_kernel(buffer=bufferArr, src_field=src_arr)
 
@@ -151,7 +161,8 @@ def test_subset_cell_values():
             eq = Assignment(dst_field(cell_idx), buffer(buffer_idx))
             unpack_eqs.append(eq)
 
-        unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+        config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+        unpack_code = create_kernel(unpack_eqs, config=config)
         unpack_kernel = unpack_code.compile()
         unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
 
@@ -176,7 +187,8 @@ def test_field_layouts():
                 eq = Assignment(buffer(idx), src_field(idx))
                 pack_eqs.append(eq)
 
-            pack_code = create_kernel(pack_eqs, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+            config = ps.CreateKernelConfig(data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+            pack_code = create_kernel(pack_eqs, config=config)
             pack_kernel = pack_code.compile()
             pack_kernel(buffer=bufferArr, src_field=src_arr)
 
@@ -186,7 +198,8 @@ def test_field_layouts():
                 eq = Assignment(dst_field(idx), buffer(idx))
                 unpack_eqs.append(eq)
 
-            unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+            config = ps.CreateKernelConfig(data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+            unpack_code = create_kernel(unpack_eqs, config=config)
             unpack_kernel = unpack_code.compile()
             unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
 
@@ -202,7 +215,7 @@ def test_iteration_slices():
         src_field = Field.create_generic("src_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt)
         dst_field = Field.create_generic("dst_field", spatial_dimensions, index_shape=(num_cell_values,), dtype=dt)
         buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1,
-                                        field_type=FieldType.BUFFER, dtype=src_arr.dtype)
+                                      field_type=FieldType.BUFFER, dtype=src_arr.dtype)
 
         pack_eqs = []
         # Since we are packing all cell values for all cells, then
@@ -214,13 +227,16 @@ def test_iteration_slices():
         dim = src_field.spatial_dimensions
 
         #   Pack only the leftmost slice, only every second cell
-        pack_slice = (slice(None, None, 2),) * (dim-1) + (0, )
+        pack_slice = (slice(None, None, 2),) * (dim - 1) + (0,)
 
         #   Fill the entire array with data
         src_arr[(slice(None, None, 1),) * dim] = np.arange(num_cell_values)
         dst_arr.fill(0)
 
-        pack_code = create_kernel(pack_eqs, iteration_slice=pack_slice, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        config = ps.CreateKernelConfig(iteration_slice=pack_slice,
+                                       data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+
+        pack_code = create_kernel(pack_eqs, config=config)
         pack_kernel = pack_code.compile()
         pack_kernel(buffer=bufferArr, src_field=src_arr)
 
@@ -230,12 +246,14 @@ def test_iteration_slices():
             eq = Assignment(dst_field(idx), buffer(idx))
             unpack_eqs.append(eq)
 
-        unpack_code = create_kernel(unpack_eqs, iteration_slice=pack_slice, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+        config = ps.CreateKernelConfig(iteration_slice=pack_slice,
+                                       data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+
+        unpack_code = create_kernel(unpack_eqs, config=config)
         unpack_kernel = unpack_code.compile()
         unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
 
         #   Check if only every second entry of the leftmost slice has been copied
         np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice])
-        np.testing.assert_equal(dst_arr[(slice(1, None, 2),)  * (dim-1) + (0,)], 0)
-        np.testing.assert_equal(dst_arr[(slice(None, None, 1),)  * (dim-1) + (slice(1,None),)], 0)
-
+        np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim - 1) + (0,)], 0)
+        np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim - 1) + (slice(1, None),)], 0)
diff --git a/pystencils_tests/test_buffer_gpu.py b/pystencils_tests/test_buffer_gpu.py
index dd20acd5d6e4bb080980d7ddab83e38e820f7099..ae0a1548790fda79c50b6c6b6952868755009d44 100644
--- a/pystencils_tests/test_buffer_gpu.py
+++ b/pystencils_tests/test_buffer_gpu.py
@@ -274,3 +274,59 @@ def test_buffer_indexing():
             assert s in src_field_size
 
     assert len(spatial_shape_symbols) <= 3
+
+
+def test_iteration_slices():
+    num_cell_values = 19
+    dt = np.uint64
+    fields = _generate_fields(dt=dt, stencil_directions=num_cell_values)
+    for (src_arr, gpu_src_arr, gpu_dst_arr, gpu_buffer_arr) in fields:
+        src_field = Field.create_from_numpy_array("src_field", gpu_src_arr, index_dimensions=1)
+        dst_field = Field.create_from_numpy_array("dst_field", gpu_src_arr, index_dimensions=1)
+        buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1,
+                                      field_type=FieldType.BUFFER, dtype=src_arr.dtype)
+
+        pack_eqs = []
+        # Since we are packing all cell values for all cells, then
+        # the buffer index is equivalent to the field index
+        for idx in range(num_cell_values):
+            eq = Assignment(buffer(idx), src_field(idx))
+            pack_eqs.append(eq)
+
+        dim = src_field.spatial_dimensions
+
+        #   Pack only the leftmost slice, only every second cell
+        pack_slice = (slice(None, None, 2),) * (dim - 1) + (0,)
+
+        #   Fill the entire array with data
+        src_arr[(slice(None, None, 1),) * dim] = np.arange(num_cell_values)
+        gpu_src_arr[(slice(None, None, 1),) * dim] = src_arr
+        gpu_dst_arr.fill(0)
+
+        config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
+                                    data_type={'src_field': gpu_src_arr.dtype, 'buffer': gpu_buffer_arr.dtype})
+
+        pack_code = create_kernel(pack_eqs, config=config)
+        pack_kernel = pack_code.compile()
+        pack_kernel(buffer=gpu_buffer_arr, src_field=gpu_src_arr)
+
+        unpack_eqs = []
+
+        for idx in range(num_cell_values):
+            eq = Assignment(dst_field(idx), buffer(idx))
+            unpack_eqs.append(eq)
+
+        config = CreateKernelConfig(target=Target.GPU, iteration_slice=pack_slice,
+                                    data_type={'dst_field': gpu_dst_arr.dtype, 'buffer': gpu_buffer_arr.dtype})
+
+        unpack_code = create_kernel(unpack_eqs, config=config)
+        unpack_kernel = unpack_code.compile()
+        unpack_kernel(buffer=gpu_buffer_arr, dst_field=gpu_dst_arr)
+
+        dst_arr = gpu_dst_arr.get()
+        src_arr = gpu_src_arr.get()
+
+        #   Check if only every second entry of the leftmost slice has been copied
+        np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice])
+        np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim - 1) + (0,)], 0)
+        np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim - 1) + (slice(1, None),)], 0)