From 67c87a99effb2111bcf6f675adf45063ed11e4a4 Mon Sep 17 00:00:00 2001 From: Frederik Hennig <frederik.hennig@fau.de> Date: Tue, 8 Jun 2021 06:54:16 +0000 Subject: [PATCH] Fixes for buffers in loops with step size > 1 --- pystencils/backends/cbackend.py | 8 ++++++ pystencils/cpu/kernelcreation.py | 8 ++++-- pystencils/integer_functions.py | 18 +++++++++--- pystencils/transformations.py | 29 +++++++++++-------- pystencils_tests/test_buffer.py | 48 +++++++++++++++++++++++++++++++- 5 files changed, 92 insertions(+), 19 deletions(-) diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py index 94f21c076..1b45294a6 100644 --- a/pystencils/backends/cbackend.py +++ b/pystencils/backends/cbackend.py @@ -529,6 +529,14 @@ class CustomSympyPrinter(CCodePrinter): return res + '.0f' if '.' not in res else res + 'f' elif dtype.numpy_dtype == np.float64: return res + '.0' if '.' not in res else res + elif dtype.is_int(): + tokens = res.split('.') + if len(tokens) == 1: + return res + elif int(tokens[1]) != 0: + raise ValueError(f"Cannot print non-integer number {res} as an integer.") + else: + return tokens[0] else: return res diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py index 81daf49fc..9e265a08e 100644 --- a/pystencils/cpu/kernelcreation.py +++ b/pystencils/cpu/kernelcreation.py @@ -18,7 +18,7 @@ AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]] def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "kernel", type_info='double', split_groups=(), iteration_slice=None, ghost_layers=None, - skip_independence_check=False) -> KernelFunction: + skip_independence_check=False, allow_double_writes=False) -> KernelFunction: """Creates an abstract syntax tree for a kernel function, by taking a list of update rules. Loops are created according to the field accesses in the equations. @@ -39,6 +39,9 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke all dimensions skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for periodicity kernel, that access the field outside the iteration bounds. Use with care! + allow_double_writes: If True, don't check if every field is only written at a single location. This is required + for example for kernels that are compiled with loop step sizes > 1, that handle multiple + cells at once. Use with care! Returns: AST node representing a function, that can be printed as C or CUDA code @@ -54,7 +57,8 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke else: raise ValueError("Term has to be field access or symbol") - fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) + fields_read, fields_written, assignments = add_types( + assignments, type_info, not skip_independence_check, check_double_write_condition=not allow_double_writes) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) diff --git a/pystencils/integer_functions.py b/pystencils/integer_functions.py index c6fae4fbf..efdaaaecf 100644 --- a/pystencils/integer_functions.py +++ b/pystencils/integer_functions.py @@ -20,13 +20,21 @@ class IntegerFunctionTwoArgsMixIn(sp.Function): for a in args: try: - type = get_type_of_expression(a) - if not type.is_int(): - raise ValueError("Argument to integer function is not an int but " + str(type)) + dtype = get_type_of_expression(a) + if not dtype.is_int(): + raise ValueError("Argument to integer function is not an int but " + str(dtype)) except NotImplementedError: raise ValueError("Integer functions can only be constructed with typed expressions") return super().__new__(cls, *args) + def _eval_evalf(self, *pargs, **kwargs): + arg1 = self.args[0].evalf(*pargs, **kwargs) if hasattr(self.args[0], 'evalf') else self.args[0] + arg2 = self.args[1].evalf(*pargs, **kwargs) if hasattr(self.args[1], 'evalf') else self.args[1] + return self._eval_op(arg1, arg2) + + def _eval_op(self, arg1, arg2): + return self + # noinspection PyPep8Naming class bitwise_xor(IntegerFunctionTwoArgsMixIn): @@ -55,7 +63,9 @@ class bitwise_or(IntegerFunctionTwoArgsMixIn): # noinspection PyPep8Naming class int_div(IntegerFunctionTwoArgsMixIn): - pass + + def _eval_op(self, arg1, arg2): + return int(arg1 // arg2) # noinspection PyPep8Naming diff --git a/pystencils/transformations.py b/pystencils/transformations.py index fc09f34e4..1ce3774b6 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -20,6 +20,7 @@ from pystencils.field import AbstractField, Field, FieldType from pystencils.kernelparameters import FieldPointerSymbol from pystencils.simp.assignment_collection import AssignmentCollection from pystencils.slicing import normalize_slice +from pystencils.integer_functions import int_div class NestedScopes: @@ -357,20 +358,23 @@ def get_base_buffer_index(ast_node, loop_counters=None, loop_iterations=None): assert len(loops) == len(parents_of_innermost_loop) assert all(l1 is l2 for l1, l2 in zip(loops, parents_of_innermost_loop)) - loop_iterations = [(l.stop - l.start) / l.step for l in loops] - loop_counters = [l.loop_counter_symbol for l in loops] + actual_sizes = [int_div((l.stop - l.start), l.step) for l in loops] + actual_steps = [int_div((l.loop_counter_symbol - l.start), l.step) for l in loops] + else: + actual_sizes = loop_iterations + actual_steps = loop_counters field_accesses = ast_node.atoms(AbstractField.AbstractAccess) buffer_accesses = {fa for fa in field_accesses if FieldType.is_buffer(fa.field)} - loop_counters = [v * len(buffer_accesses) for v in loop_counters] + buffer_index_size = len(buffer_accesses) - base_buffer_index = loop_counters[0] - stride = 1 - for idx, var in enumerate(loop_counters[1:]): - cur_stride = loop_iterations[idx] - stride *= int(cur_stride) if isinstance(cur_stride, float) else cur_stride - base_buffer_index += var * stride - return base_buffer_index + base_buffer_index = actual_steps[0] + actual_stride = 1 + for idx, actual_step in enumerate(actual_steps[1:]): + cur_stride = actual_sizes[idx] + actual_stride *= int(cur_stride) if isinstance(cur_stride, float) else cur_stride + base_buffer_index += actual_stride * actual_step + return base_buffer_index * buffer_index_size def resolve_buffer_accesses(ast_node, base_buffer_index, read_only_field_names=set()): @@ -933,7 +937,7 @@ class KernelConstraintsCheck: self.scopes.access_symbol(rhs) -def add_types(eqs, type_for_symbol, check_independence_condition): +def add_types(eqs, type_for_symbol, check_independence_condition, check_double_write_condition=True): """Traverses AST and replaces every :class:`sympy.Symbol` by a :class:`pystencils.typedsymbol.TypedSymbol`. Additionally returns sets of all fields which are read/written @@ -951,7 +955,8 @@ def add_types(eqs, type_for_symbol, check_independence_condition): if isinstance(type_for_symbol, (str, type)) or not hasattr(type_for_symbol, '__getitem__'): type_for_symbol = typing_from_sympy_inspection(eqs, type_for_symbol) - check = KernelConstraintsCheck(type_for_symbol, check_independence_condition) + check = KernelConstraintsCheck(type_for_symbol, check_independence_condition, + check_double_write_condition=check_double_write_condition) def visit(obj): if isinstance(obj, (list, tuple)): diff --git a/pystencils_tests/test_buffer.py b/pystencils_tests/test_buffer.py index 7ea7fd2a1..43b14e76e 100644 --- a/pystencils_tests/test_buffer.py +++ b/pystencils_tests/test_buffer.py @@ -2,7 +2,7 @@ import numpy as np -from pystencils import Assignment, Field, FieldType, create_kernel +from pystencils import Assignment, Field, FieldType, create_kernel, make_slice from pystencils.field import create_numpy_array_with_layout, layout_string_to_tuple from pystencils.slicing import ( add_ghost_layers, get_ghost_region_slice, get_slice_before_ghost_layer) @@ -186,3 +186,49 @@ def test_field_layouts(): unpack_code = create_kernel(unpack_eqs, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype}) unpack_kernel = unpack_code.compile() unpack_kernel(buffer=bufferArr, dst_field=dst_arr) + + +def test_iteration_slices(): + num_cell_values = 19 + fields = _generate_fields(num_directions=num_cell_values) + for (src_arr, dst_arr, bufferArr) in fields: + src_field = Field.create_from_numpy_array("src_field", src_arr, index_dimensions=1) + dst_field = Field.create_from_numpy_array("dst_field", dst_arr, index_dimensions=1) + buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, + field_type=FieldType.BUFFER, dtype=src_arr.dtype) + + pack_eqs = [] + # Since we are packing all cell values for all cells, then + # the buffer index is equivalent to the field index + for idx in range(num_cell_values): + eq = Assignment(buffer(idx), src_field(idx)) + pack_eqs.append(eq) + + dim = src_field.spatial_dimensions + + # Pack only the leftmost slice, only every second cell + pack_slice = (slice(None, None, 2),) * (dim-1) + (0, ) + + # Fill the entire array with data + src_arr[ (slice(None, None, 1),) * dim] = np.arange(num_cell_values) + dst_arr.fill(0.0) + + pack_code = create_kernel(pack_eqs, iteration_slice=pack_slice, data_type={'src_field': src_arr.dtype, 'buffer': buffer.dtype}) + pack_kernel = pack_code.compile() + pack_kernel(buffer=bufferArr, src_field=src_arr) + + unpack_eqs = [] + + for idx in range(num_cell_values): + eq = Assignment(dst_field(idx), buffer(idx)) + unpack_eqs.append(eq) + + unpack_code = create_kernel(unpack_eqs, iteration_slice=pack_slice, data_type={'dst_field': dst_arr.dtype, 'buffer': buffer.dtype}) + unpack_kernel = unpack_code.compile() + unpack_kernel(buffer=bufferArr, dst_field=dst_arr) + + # Check if only every second entry of the leftmost slice has been copied + np.testing.assert_equal(dst_arr[pack_slice], src_arr[pack_slice]) + np.testing.assert_equal(dst_arr[(slice(1, None, 2),) * (dim-1) + (0,)], 0.0) + np.testing.assert_equal(dst_arr[(slice(None, None, 1),) * (dim-1) + (slice(1,None),)], 0.0) + -- GitLab