diff --git a/pystencils/gpucuda/indexing.py b/pystencils/gpucuda/indexing.py
index f74b6e5071cf90b09074b088eeaf6c52c32d0eb1..9e2df4e0a1183d752f560985ab15acee041f2b76 100644
--- a/pystencils/gpucuda/indexing.py
+++ b/pystencils/gpucuda/indexing.py
@@ -124,12 +124,18 @@ class BlockIndexing(AbstractIndexing):
         self._symbolic_shape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape]
         self._compile_time_block_size = compile_time_block_size
 
+    @property
+    def cuda_indices(self):
+        block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
+        indices = [block_index * bs + thread_idx
+                   for block_index, bs, thread_idx in zip(BLOCK_IDX, block_size, THREAD_IDX)]
+
+        return indices[:self._dim]
+
     @property
     def coordinates(self):
         offsets = _get_start_from_slice(self._iterationSlice)
-        block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
-        coordinates = [block_index * bs + thread_idx + off
-                       for block_index, bs, thread_idx, off in zip(BLOCK_IDX, block_size, THREAD_IDX, offsets)]
+        coordinates = [c + off for c, off in zip(self.cuda_indices, offsets)]
 
         return coordinates[:self._dim]
 
@@ -159,8 +165,13 @@ class BlockIndexing(AbstractIndexing):
 
     def guard(self, kernel_content, arr_shape):
         arr_shape = arr_shape[:self._dim]
-        conditions = [c < end
-                      for c, end in zip(self.coordinates, _get_end_from_slice(self._iterationSlice, arr_shape))]
+        end = _get_end_from_slice(self._iterationSlice, arr_shape)
+
+        conditions = [c < e for c, e in zip(self.coordinates, end)]
+        for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice):
+            if iter_slice.step > 1:
+                conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0))
+
         condition = conditions[0]
         for c in conditions[1:]:
             condition = sp.And(condition, c)
diff --git a/pystencils/typing/leaf_typing.py b/pystencils/typing/leaf_typing.py
index b0928d0b79ef657f7a3882cbbd39433c5a2b9fe1..ecb82bab8898d882669b21716ade20e33306c128 100644
--- a/pystencils/typing/leaf_typing.py
+++ b/pystencils/typing/leaf_typing.py
@@ -171,12 +171,13 @@ class TypeAdder:
             args_types = [self.figure_out_type(a) for a in expr.args]
             new_args = [a if t.dtype_eq(bool_type) else BooleanCastFunc(a, bool_type) for a, t in args_types]
             return expr.func(*new_args), bool_type
-        elif type(expr, ) in pystencils.integer_functions.__dict__.values():
+        elif type(expr, ) in pystencils.integer_functions.__dict__.values() or isinstance(expr, sp.Mod):
             args_types = [self.figure_out_type(a) for a in expr.args]
             collated_type = collate_types([t for _, t in args_types])
             # TODO: should we downcast to integer? If yes then which integer type?
             if not collated_type.is_int():
-                raise ValueError(f"Integer functions need to be used with integer types but {collated_type} was given")
+                raise ValueError(f"Integer functions or Modulo need to be used with integer types "
+                                 f"but {collated_type} was given")
 
             return expr, collated_type
         elif isinstance(expr, flag_cond):
diff --git a/pystencils_tests/test_modulo.py b/pystencils_tests/test_modulo.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f81ab6448fe8d326d618561b1e147e60e5faea5
--- /dev/null
+++ b/pystencils_tests/test_modulo.py
@@ -0,0 +1,49 @@
+import pytest
+
+import sympy as sp
+import pystencils as ps
+from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
+
+
+@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
+@pytest.mark.parametrize('iteration_slice', [False, True])
+def test_mod(target, iteration_slice):
+    if target == ps.Target.GPU:
+        pytest.importorskip("pycuda")
+    dh = ps.create_data_handling(domain_size=(5, 5), periodicity=True, default_target=ps.Target.CPU)
+
+    loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)]
+    cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)]
+
+    field = dh.add_array("a", values_per_cell=1)
+
+    eq_list = [SympyAssignment(field.center, 1.0)]
+
+    if iteration_slice:
+        iteration_slice = ps.make_slice[1:-1:2, 1:-1:2]
+        config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice)
+        assign = eq_list
+    else:
+        assign = [Conditional(sp.And(*cond), Block(eq_list))]
+        config = ps.CreateKernelConfig(target=dh.default_target)
+
+    kernel = ps.create_kernel(assign, config=config).compile()
+
+    dh.fill(field.name, 0, ghost_layers=True)
+
+    if config.target == ps.enums.Target.GPU:
+        dh.to_gpu(field.name)
+
+    dh.run_kernel(kernel)
+
+    if config.target == ps.enums.Target.GPU:
+        dh.to_cpu(field.name)
+
+    result = dh.gather_array(field.name, ghost_layers=True)
+
+    for x in range(result.shape[0]):
+        for y in range(result.shape[1]):
+            if x % 2 == 1 and y % 2 == 1:
+                assert result[x, y] == 1.0
+            else:
+                assert result[x, y] == 0.0