Merge branch 'FixIterationGPU' into 'master'

[FIX] Iteration slices with GPU kernels Closes #58 See merge request !317

Merge branch 'FixIterationGPU' into 'master'
[FIX] Iteration slices with GPU kernels Closes #58 See merge request !317
0cac3c27 · Christoph Alt · 262446d1 · 5e12eabd · 0cac3c27 · 0cac3c27
Commit 0cac3c27 authored 2 years ago by Christoph Alt
--- a/pystencils/gpucuda/indexing.py
+++ b/pystencils/gpucuda/indexing.py
@@ -124,12 +124,18 @@ class BlockIndexing(AbstractIndexing):
        self._symbolic_shape = [e if isinstance(e, sp.Basic) else None for e in field.spatial_shape]
        self._compile_time_block_size = compile_time_block_size
+    @property
+    def cuda_indices(self):
+        block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
+        indices = [block_index * bs + thread_idx
+                   for block_index, bs, thread_idx in zip(BLOCK_IDX, block_size, THREAD_IDX)]
+        return indices[:self._dim]
    @property
    def coordinates(self):
        offsets = _get_start_from_slice(self._iterationSlice)
-        block_size = self._block_size if self._compile_time_block_size else BLOCK_DIM
+        coordinates = [c + off for c, off in zip(self.cuda_indices, offsets)]
-        coordinates = [block_index * bs + thread_idx + off
-                       for block_index, bs, thread_idx, off in zip(BLOCK_IDX, block_size, THREAD_IDX, offsets)]
        return coordinates[:self._dim]
@@ -159,8 +165,13 @@ class BlockIndexing(AbstractIndexing):
    def guard(self, kernel_content, arr_shape):
        arr_shape = arr_shape[:self._dim]
-        conditions = [c < end
+        end = _get_end_from_slice(self._iterationSlice, arr_shape)
-                      for c, end in zip(self.coordinates, _get_end_from_slice(self._iterationSlice, arr_shape))]
+        conditions = [c < e for c, e in zip(self.coordinates, end)]
+        for cuda_index, iter_slice in zip(self.cuda_indices, self._iterationSlice):
+            if iter_slice.step > 1:
+                conditions.append(sp.Eq(sp.Mod(cuda_index, iter_slice.step), 0))
        condition = conditions[0]
        for c in conditions[1:]:
            condition = sp.And(condition, c)

--- a/pystencils/typing/leaf_typing.py
+++ b/pystencils/typing/leaf_typing.py
@@ -171,12 +171,13 @@ class TypeAdder:
            args_types = [self.figure_out_type(a) for a in expr.args]
            new_args = [a if t.dtype_eq(bool_type) else BooleanCastFunc(a, bool_type) for a, t in args_types]
            return expr.func(*new_args), bool_type
-        elif type(expr, ) in pystencils.integer_functions.__dict__.values():
+        elif type(expr, ) in pystencils.integer_functions.__dict__.values() or isinstance(expr, sp.Mod):
            args_types = [self.figure_out_type(a) for a in expr.args]
            collated_type = collate_types([t for _, t in args_types])
            # TODO: should we downcast to integer? If yes then which integer type?
            if not collated_type.is_int():
-                raise ValueError(f"Integer functions need to be used with integer types but {collated_type} was given")
+                raise ValueError(f"Integer functions or Modulo need to be used with integer types "
+                                 f"but {collated_type} was given")
            return expr, collated_type
        elif isinstance(expr, flag_cond):

--- a/pystencils_tests/test_modulo.py
+++ b/pystencils_tests/test_modulo.py
+import pytest
+import sympy as sp
+import pystencils as ps
+from pystencils.astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
+@pytest.mark.parametrize('target', [ps.Target.CPU, ps.Target.GPU])
+@pytest.mark.parametrize('iteration_slice', [False, True])
+def test_mod(target, iteration_slice):
+    if target == ps.Target.GPU:
+        pytest.importorskip("pycuda")
+    dh = ps.create_data_handling(domain_size=(5, 5), periodicity=True, default_target=ps.Target.CPU)
+    loop_ctrs = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dh.dim)]
+    cond = [sp.Eq(sp.Mod(loop_ctrs[i], 2), 1) for i in range(dh.dim)]
+    field = dh.add_array("a", values_per_cell=1)
+    eq_list = [SympyAssignment(field.center, 1.0)]
+    if iteration_slice:
+        iteration_slice = ps.make_slice[1:-1:2, 1:-1:2]
+        config = ps.CreateKernelConfig(target=dh.default_target, iteration_slice=iteration_slice)
+        assign = eq_list
+    else:
+        assign = [Conditional(sp.And(*cond), Block(eq_list))]
+        config = ps.CreateKernelConfig(target=dh.default_target)
+    kernel = ps.create_kernel(assign, config=config).compile()
+    dh.fill(field.name, 0, ghost_layers=True)
+    if config.target == ps.enums.Target.GPU:
+        dh.to_gpu(field.name)
+    dh.run_kernel(kernel)
+    if config.target == ps.enums.Target.GPU:
+        dh.to_cpu(field.name)
+    result = dh.gather_array(field.name, ghost_layers=True)
+    for x in range(result.shape[0]):
+        for y in range(result.shape[1]):
+            if x % 2 == 1 and y % 2 == 1:
+                assert result[x, y] == 1.0
+            else:
+                assert result[x, y] == 0.0