From 67c87a99effb2111bcf6f675adf45063ed11e4a4 Mon Sep 17 00:00:00 2001
From: Frederik Hennig <frederik.hennig@fau.de>
Date: Tue, 8 Jun 2021 06:54:16 +0000
Subject: [PATCH] Fixes for buffers in loops with step size > 1

---
 pystencils/backends/cbackend.py  |  8 ++++++
 pystencils/cpu/kernelcreation.py |  8 ++++--
 pystencils/integer_functions.py  | 18 +++++++++---
 pystencils/transformations.py    | 29 +++++++++++--------
 pystencils_tests/test_buffer.py  | 48 +++++++++++++++++++++++++++++++-
 5 files changed, 92 insertions(+), 19 deletions(-)

diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 94f21c076..1b45294a6 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -529,6 +529,14 @@ class CustomSympyPrinter(CCodePrinter):
             return res + '.0f' if '.' not in res else res + 'f'
         elif dtype.numpy_dtype == np.float64:
             return res + '.0' if '.' not in res else res
+        elif dtype.is_int():
+            tokens = res.split('.')
+            if len(tokens) == 1: 
+                return res
+            elif int(tokens[1]) != 0:
+                raise ValueError(f"Cannot print non-integer number {res} as an integer.")
+            else:
+                return tokens[0]
         else:
             return res
 
diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py
index 81daf49fc..9e265a08e 100644
--- a/pystencils/cpu/kernelcreation.py
+++ b/pystencils/cpu/kernelcreation.py
@@ -18,7 +18,7 @@ AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]]
 
 def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "kernel", type_info='double',
                   split_groups=(), iteration_slice=None, ghost_layers=None,
-                  skip_independence_check=False) -> KernelFunction:
+                  skip_independence_check=False, allow_double_writes=False) -> KernelFunction:
     """Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
 
     Loops are created according to the field accesses in the equations.
@@ -39,6 +39,9 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
                      all dimensions
         skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                  periodicity kernel, that access the field outside the iteration bounds. Use with care!
+        allow_double_writes: If True, don't check if every field is only written at a single location. This is required
+                             for example for kernels that are compiled with loop step sizes > 1, that handle multiple 
+                             cells at once. Use with care!
 
     Returns:
         AST node representing a function, that can be printed as C or CUDA code
@@ -54,7 +57,8 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
         else:
             raise ValueError("Term has to be field access or symbol")
 
-    fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check)
+    fields_read, fields_written, assignments = add_types(
+        assignments, type_info, not skip_independence_check, check_double_write_condition=not allow_double_writes)
     all_fields = fields_read.union(fields_written)
     read_only_fields = set([f.name for f in fields_read - fields_written])
 
diff --git a/pystencils/integer_functions.py b/pystencils/integer_functions.py
index c6fae4fbf..efdaaaecf 100644
--- a/pystencils/integer_functions.py
+++ b/pystencils/integer_functions.py
@@ -20,13 +20,21 @@ class IntegerFunctionTwoArgsMixIn(sp.Function):
 
         for a in args:
             try:
-                type = get_type_of_expression(a)
-                if not type.is_int():
-                    raise ValueError("Argument to integer function is not an int but " + str(type))
+                dtype = get_type_of_expression(a)
+                if not dtype.is_int():
+                    raise ValueError("Argument to integer function is not an int but " + str(dtype))
             except NotImplementedError:
                 raise ValueError("Integer functions can only be constructed with typed expressions")
         return super().__new__(cls, *args)
 
+    def _eval_evalf(self, *pargs, **kwargs):
+        arg1 = self.args[0].evalf(*pargs, **kwargs) if hasattr(self.args[0], 'evalf') else self.args[0]
+        arg2 = self.args[1].evalf(*pargs, **kwargs) if hasattr(self.args[1], 'evalf') else self.args[1]
+        return self._eval_op(arg1, arg2)
+
+    def _eval_op(self, arg1, arg2):
+        return self
+
 
 # noinspection PyPep8Naming
 class bitwise_xor(IntegerFunctionTwoArgsMixIn):
@@ -55,7 +63,9 @@ class bitwise_or(IntegerFunctionTwoArgsMixIn):
 
 # noinspection PyPep8Naming
 class int_div(IntegerFunctionTwoArgsMixIn):
-    pass
+    
+    def _eval_op(self, arg1, arg2):
+        return int(arg1 // arg2)
 
 
 # noinspection PyPep8Naming
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index fc09f34e4..1ce3774b6 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -20,6 +20,7 @@ from pystencils.field import AbstractField, Field, FieldType
 from pystencils.kernelparameters import FieldPointerSymbol
 from pystencils.simp.assignment_collection import AssignmentCollection
 from pystencils.slicing import normalize_slice
+from pystencils.integer_functions import int_div
 
 
 class NestedScopes:
@@ -357,20 +358,23 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None):
         assert len(loops) == len(parents_of_innermost_loop)
         assert all(l1 is l2 for l1, l2 in zip(loops, parents_of_innermost_loop))
 
-        loop_iterations = [(l.stop - l.start) / l.step for l in loops]
-        loop_counters = [l.loop_counter_symbol for l in loops]
+        actual_sizes = [int_div((l.stop - l.start), l.step) for l in loops]
+        actual_steps = [int_div((l.loop_counter_symbol - l.start), l.step) for l in loops]
+    else:
+        actual_sizes = loop_iterations
+        actual_steps = loop_counters
 
     field_accesses = ast_node.atoms(AbstractField.AbstractAccess)
     buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)}
-    loop_counters = [v * len(buffer_accesses) for v in loop_counters]
+    buffer_index_size = len(buffer_accesses)
 
-    base_buffer_index = loop_counters[0]
-    stride = 1
-    for idx, var in enumerate(loop_counters[1:]):
-        cur_stride = loop_iterations[idx]
-        stride *= int(cur_stride) if isinstance(cur_stride, float) else cur_stride
-        base_buffer_index += var * stride
-    return base_buffer_index
+    base_buffer_index = actual_steps[0]
+    actual_stride = 1
+    for idx, actual_step in enumerate(actual_steps[1:]):
+        cur_stride = actual_sizes[idx]
+        actual_stride *= int(cur_stride) if isinstance(cur_stride, float) else cur_stride
+        base_buffer_index += actual_stride * actual_step
+    return base_buffer_index * buffer_index_size
 
 
 def resolve_buffer_accesses(ast_node, base_buffer_index, read_only_field_names=set()):
@@ -933,7 +937,7 @@ class KernelConstraintsCheck:
             self.scopes.access_symbol(rhs)
 
 
-def add_types(eqs, type_for_symbol, check_independence_condition):
+def add_types(eqs, type_for_symbol, check_independence_condition, check_double_write_condition=True):
     """Traverses AST and replaces every :class:`sympy.Symbol` by a :class:`pystencils.typedsymbol.TypedSymbol`.
 
     Additionally returns sets of all fields which are read/written
@@ -951,7 +955,8 @@ def add_types(eqs, type_for_symbol, check_independence_condition):
     if isinstance(type_for_symbol, (str, type)) or not hasattr(type_for_symbol, '__getitem__'):
         type_for_symbol = typing_from_sympy_inspection(eqs, type_for_symbol)
 
-    check = KernelConstraintsCheck(type_for_symbol, check_independence_condition)
+    check = KernelConstraintsCheck(type_for_symbol, check_independence_condition,
+                                   check_double_write_condition=check_double_write_condition)
 
     def visit(obj):
         if isinstance(obj, (list, tuple)):
diff --git a/pystencils_tests/test_buffer.py b/pystencils_tests/test_buffer.py
index 7ea7fd2a1..43b14e76e 100644
--- a/pystencils_tests/test_buffer.py
+++ b/pystencils_tests/test_buffer.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from pystencils import Assignment, Field, FieldType, create_kernel
+from pystencils import Assignment, Field, FieldType, create_kernel, make_slice
 from pystencils.field import create_numpy_array_with_layout, layout_string_to_tuple
 from pystencils.slicing import (
     add_ghost_layers, get_ghost_region_slice, get_slice_before_ghost_layer)
@@ -186,3 +186,49 @@ def test_field_layouts():
             unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
             unpack_kernel = unpack_code.compile()
             unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
+
+
+def test_iteration_slices():
+    num_cell_values = 19
+    fields = _generate_fields(num_directions=num_cell_values)
+    for (src_arr, dst_arr, bufferArr) in fields:
+        src_field = Field.create_from_numpy_array("src_field", src_arr, index_dimensions=1)
+        dst_field = Field.create_from_numpy_array("dst_field", dst_arr, index_dimensions=1)
+        buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1,
+                                        field_type=FieldType.BUFFER, dtype=src_arr.dtype)
+
+        pack_eqs = []
+        # Since we are packing all cell values for all cells, then
+        # the buffer index is equivalent to the field index
+        for idx in range(num_cell_values):
+            eq = Assignment(buffer(idx), src_field(idx))
+            pack_eqs.append(eq)
+
+        dim = src_field.spatial_dimensions
+
+        #   Pack only the leftmost slice, only every second cell
+        pack_slice = (slice(None, None, 2),)  * (dim-1) + (0, )
+
+        #   Fill the entire array with data
+        src_arr[ (slice(None, None, 1),) * dim] = np.arange(num_cell_values)
+        dst_arr.fill(0.0)
+
+        pack_code = create_kernel(pack_eqs, iteration_slice=pack_slice, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype})
+        pack_kernel = pack_code.compile()
+        pack_kernel(buffer=bufferArr, src_field=src_arr)
+
+        unpack_eqs = []
+
+        for idx in range(num_cell_values):
+            eq = Assignment(dst_field(idx), buffer(idx))
+            unpack_eqs.append(eq)
+
+        unpack_code = create_kernel(unpack_eqs, iteration_slice=pack_slice, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype})
+        unpack_kernel = unpack_code.compile()
+        unpack_kernel(buffer=bufferArr, dst_field=dst_arr)
+
+        #   Check if only every second entry of the leftmost slice has been copied
+        np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice])
+        np.testing.assert_equal(dst_arr[(slice(1, None, 2),)  * (dim-1) + (0,)], 0.0)
+        np.testing.assert_equal(dst_arr[(slice(None, None, 1),)  * (dim-1) + (slice(1,None),)], 0.0)
+
-- 
GitLab