diff --git a/src/pystencils/gpu/kernelcreation.py b/src/pystencils/gpu/kernelcreation.py index 2feb8883a8df644ec2f4dca5d6452831bea619f3..e537787073e86f3f232f36bf0211ebd0a17c8275 100644 --- a/src/pystencils/gpu/kernelcreation.py +++ b/src/pystencils/gpu/kernelcreation.py @@ -66,7 +66,8 @@ def create_cuda_kernel(assignments: NodeCollection, config: CreateKernelConfig): iteration_space = normalize_slice(iteration_slice, common_shape) else: iteration_space = normalize_slice(iteration_slice, common_shape) - iteration_space = tuple([s if isinstance(s, slice) else slice(s, s, 1) for s in iteration_space]) + + iteration_space = tuple([s if isinstance(s, slice) else slice(s, s + 1, 1) for s in iteration_space]) loop_counter_symbols = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(len(iteration_space))] diff --git a/tests/test_sliced_iteration.py b/tests/test_sliced_iteration.py index 5eff0a89d4d23386d00e7408a19ece93453d7d9d..cc7b87a48793b6684346a103ac6b1820d9a0b9df 100644 --- a/tests/test_sliced_iteration.py +++ b/tests/test_sliced_iteration.py @@ -1,29 +1,106 @@ import numpy as np import sympy as sp +import pytest -from pystencils import Assignment, Field, TypedSymbol, create_kernel, make_slice +from pystencils import ( + Assignment, + Field, + TypedSymbol, + create_kernel, + make_slice, + Target, + create_data_handling, +) from pystencils.simp import sympy_cse_on_assignment_list -def test_sliced_iteration(): +@pytest.mark.parametrize("target", [Target.CPU, Target.GPU]) +def test_sliced_iteration(target): + if target == Target.GPU: + pytest.importorskip("cupy") + size = (4, 4) - src_arr = np.ones(size) - dst_arr = np.zeros_like(src_arr) - src_field = Field.create_from_numpy_array('src', src_arr) - dst_field = Field.create_from_numpy_array('dst', dst_arr) + + dh = create_data_handling(size, default_target=target, default_ghost_layers=0) + + src_field = dh.add_array("src", 1) + dst_field = dh.add_array("dst", 1) + + dh.fill(src_field.name, 1.0, ghost_layers=True) + dh.fill(dst_field.name, 0.0, ghost_layers=True) a, b = sp.symbols("a b") - update_rule = Assignment(dst_field[0, 0], - (a * src_field[0, 1] + a * src_field[0, -1] + - b * src_field[1, 0] + b * src_field[-1, 0]) / 4) + update_rule = Assignment( + dst_field[0, 0], + ( + a * src_field[0, 1] + + a * src_field[0, -1] + + b * src_field[1, 0] + + b * src_field[-1, 0] + ) + / 4, + ) + + s = make_slice[1:3, 1] + kernel = create_kernel( + sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target + ).compile() + + if target == Target.GPU: + dh.all_to_gpu() + + dh.run_kernel(kernel, a=1.0, b=1.0) + + if target == Target.GPU: + dh.all_to_cpu() + + expected_result = np.zeros(size) + expected_result[1:3, 1] = 1 + np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result) + + +@pytest.mark.parametrize("target", [Target.CPU, Target.GPU]) +def test_symbols_in_slice(target): + if target == Target.GPU: + pytest.xfail("Iteration slices including arbitrary symbols are currently broken on GPU") + + size = (4, 4) + + dh = create_data_handling(size, default_target=target, default_ghost_layers=0) + + src_field = dh.add_array("src", 1) + dst_field = dh.add_array("dst", 1) + + dh.fill(src_field.name, 1.0, ghost_layers=True) + dh.fill(dst_field.name, 0.0, ghost_layers=True) + + a, b = sp.symbols("a b") + update_rule = Assignment( + dst_field[0, 0], + ( + a * src_field[0, 1] + + a * src_field[0, -1] + + b * src_field[1, 0] + + b * src_field[-1, 0] + ) + / 4, + ) x_end = TypedSymbol("x_end", "int") s = make_slice[1:x_end, 1] x_end_value = size[1] - 1 - kernel = create_kernel(sympy_cse_on_assignment_list([update_rule]), iteration_slice=s).compile() + kernel = create_kernel( + sympy_cse_on_assignment_list([update_rule]), iteration_slice=s, target=target + ).compile() + + if target == Target.GPU: + dh.all_to_gpu() + + dh.run_kernel(kernel, a=1.0, b=1.0, x_end=x_end_value) - kernel(src=src_arr, dst=dst_arr, a=1.0, b=1.0, x_end=x_end_value) + if target == Target.GPU: + dh.all_to_cpu() expected_result = np.zeros(size) expected_result[1:x_end_value, 1] = 1 - np.testing.assert_almost_equal(expected_result, dst_arr) + np.testing.assert_almost_equal(dh.gather_array(dst_field.name), expected_result)