Philipp Suffa · Markus Holzer · Markus Holzer · Markus Holzer · Christoph Alt · Christoph Alt
--- a/pystencils/gpucuda/indexing.py
+++ b/pystencils/gpucuda/indexing.py
@@ -124,12 +124,18 @@ class BlockIndexing(AbstractIndexing):
        self._symbolic_shape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape]
        self._compile_time_block_size = compile_time_block_size

+    @property
+    def cuda_indices(self):
+        block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
+        indices = [block_index * bs + thread_idx
+                   for block_index, bs, thread_idx in zip(BLOCK_IDX, block_size, THREAD_IDX)]
+
+        return indices[:self._dim]
+
    @property
    def coordinates(self):
        offsets = _get_start_from_slice(self._iterationSlice)
-        block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
-        coordinates = [block_index * bs + thread_idx + off
-                       for block_index, bs, thread_idx, off in zip(BLOCK_IDX, block_size, THREAD_IDX, offsets)]
+        coordinates = [c + off for c, off in zip(self.cuda_indices, offsets)]

        return coordinates[:self._dim]

@@ -159,8 +165,13 @@ class BlockIndexing(AbstractIndexing):

    def guard(self, kernel_content, arr_shape):
        arr_shape = arr_shape[:self._dim]
-        conditions = [c < end
-                      for c, end in zip(self.coordinates, _get_end_from_slice(self._iterationSlice, arr_shape))]
+        end = _get_end_from_slice(self._iterationSlice, arr_shape)
+
+        conditions = [c < e for c, e in zip(self.coordinates, end)]
+        for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice):
+            if iter_slice.step > 1:
+                conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0))
+
        condition = conditions[0]
        for c in conditions[1:]:
            condition = sp.And(condition, c)

--- a/pystencils/gpucuda/kernelcreation.py
+++ b/pystencils/gpucuda/kernelcreation.py
@@ -34,7 +34,7 @@ def create_cuda_kernel(assignments: Union[AssignmentCollection, NodeCollection],
    all_fields = fields_read.union(fields_written)
    read_only_fields = set([f.name for f in fields_read - fields_written])

-    buffers = set([f for f in all_fields if FieldType.is_buffer(f) or FieldType.is_custom(f)])
+    buffers = set([f for f in all_fields if FieldType.is_buffer(f)])
    fields_without_buffers = all_fields - buffers

    field_accesses = set()

--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -161,9 +161,11 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
        tuple of loop-node, ghost_layer_info
    """
    # find correct ordering by inspecting participating FieldAccesses
+    absolut_accesses_only = False
    field_accesses = body.atoms(Field.Access)
    field_accesses = {e for e in field_accesses if not e.is_absolute_access}
-
+    if len(field_accesses) == 0:  # when kernel contains only absolute accesses
+        absolut_accesses_only = True
    # exclude accesses to buffers from field_list, because buffers are treated separately
    field_list = [e.field for e in field_accesses if not (FieldType.is_buffer(e.field) or FieldType.is_custom(e.field))]
    if len(field_list) == 0:  # when kernel contains only custom fields
@@ -174,14 +176,21 @@ def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_or
    if loop_order is None:
        loop_order = get_optimal_loop_ordering(fields)

-    shape = get_common_shape(fields)
+    if absolut_accesses_only:
+        absolut_access_fields = {e.field for e in body.atoms(Field.Access)}
+        shape = get_common_shape(absolut_access_fields)
+    else:
+        shape = get_common_shape(fields)
    unify_shape_symbols(body, common_shape=shape, fields=fields)

    if iteration_slice is not None:
        iteration_slice = normalize_slice(iteration_slice, shape)

    if ghost_layers is None:
-        required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
+        if absolut_accesses_only:
+            required_ghost_layers = 0
+        else:
+            required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
        ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order)
    if isinstance(ghost_layers, int):
        ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order)

--- a/pystencils/typing/leaf_typing.py
+++ b/pystencils/typing/leaf_typing.py
@@ -171,12 +171,13 @@ class TypeAdder:
            args_types = [self.figure_out_type(a) for a in expr.args]
            new_args = [a if t.dtype_eq(bool_type) else BooleanCastFunc(a, bool_type) for a, t in args_types]
            return expr.func(*new_args), bool_type
-        elif type(expr, ) in pystencils.integer_functions.__dict__.values():
+        elif type(expr, ) in pystencils.integer_functions.__dict__.values() or isinstance(expr, sp.Mod):
            args_types = [self.figure_out_type(a) for a in expr.args]
            collated_type = collate_types([t for _, t in args_types])
            # TODO: should we downcast to integer? If yes then which integer type?
            if not collated_type.is_int():
-                raise ValueError(f"Integer functions need to be used with integer types but {collated_type} was given")
+                raise ValueError(f"Integer functions or Modulo need to be used with integer types "
+                                 f"but {collated_type} was given")

            return expr, collated_type
        elif isinstance(expr, flag_cond):

--- a/pystencils_tests/test_modulo.py
+++ b/pystencils_tests/test_modulo.py
+import pytest
+
+import sympy as sp
+import pystencils as ps
+from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
+
+
+@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
+@pytest.mark.parametrize('iteration_slice', [False, True])
+def test_mod(target, iteration_slice):
+    if target == ps.Target.GPU:
+        pytest.importorskip("pycuda")
+    dh = ps.create_data_handling(domain_size=(5, 5), periodicity=True, default_target=ps.Target.CPU)
+
+    loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)]
+    cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)]
+
+    field = dh.add_array("a", values_per_cell=1)
+
+    eq_list = [SympyAssignment(field.center, 1.0)]
+
+    if iteration_slice:
+        iteration_slice = ps.make_slice[1:-1:2, 1:-1:2]
+        config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice)
+        assign = eq_list
+    else:
+        assign = [Conditional(sp.And(*cond), Block(eq_list))]
+        config = ps.CreateKernelConfig(target=dh.default_target)
+
+    kernel = ps.create_kernel(assign, config=config).compile()
+
+    dh.fill(field.name, 0, ghost_layers=True)
+
+    if config.target == ps.enums.Target.GPU:
+        dh.to_gpu(field.name)
+
+    dh.run_kernel(kernel)
+
+    if config.target == ps.enums.Target.GPU:
+        dh.to_cpu(field.name)
+
+    result = dh.gather_array(field.name, ghost_layers=True)
+
+    for x in range(result.shape[0]):
+        for y in range(result.shape[1]):
+            if x % 2 == 1 and y % 2 == 1:
+                assert result[x, y] == 1.0
+            else:
+                assert result[x, y] == 0.0
No results found