diff --git a/lbmpy/advanced_streaming/communication.py b/lbmpy/advanced_streaming/communication.py index 519ab9ef7faf0cca5ef8f357345c83b0a9239103..fe22d5cb2f68aa09e90e2c08648ccbaa3a31c3c0 100644 --- a/lbmpy/advanced_streaming/communication.py +++ b/lbmpy/advanced_streaming/communication.py @@ -1,3 +1,4 @@ +import itertools from pystencils import Field, Assignment from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \ @@ -59,14 +60,17 @@ def get_communication_slices( Return the source and destination slices for periodicity handling or communication between blocks. :param stencil: The stencil used by the LB method. - :param comm_stencil: The stencil defining the communication directions. If None, it will be set to stencil. + :param comm_stencil: The stencil defining the communication directions. If None, it will be set to the + full stencil (D2Q9 in 2D, D3Q27 in 3D, etc.). :param streaming_pattern: The streaming pattern. :param prev_timestep: Timestep after which communication is run. :param ghost_layers: Number of ghost layers in each direction. """ + + dim = len(stencil[0]) if comm_stencil is None: - comm_stencil = stencil + comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(dim))) pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(len(stencil),)) write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil) @@ -168,8 +172,9 @@ class LBMPeriodicityHandling: stencil = get_stencil(stencil) self.stencil = stencil + self.dim = len(stencil[0]) self.dh = data_handling - + target = data_handling.default_target assert target in ['cpu', 'gpu', 'opencl'] @@ -184,13 +189,16 @@ class LBMPeriodicityHandling: self.pycuda_direct_copy = target == 'gpu' and pycuda_direct_copy def is_copy_direction(direction): + s = 0 for d, p in zip(direction, periodicity): + s += abs(d) if d != 0 and not p: return False - return True + return s != 0 - copy_directions = tuple(filter(is_copy_direction, stencil[1:])) + full_stencil = itertools.product(*([-1, 0, 1] for _ in range(self.dim))) + copy_directions = tuple(filter(is_copy_direction, full_stencil)) self.comm_slices = [] timesteps = get_timesteps(streaming_pattern) for timestep in timesteps: @@ -224,7 +232,7 @@ class LBMPeriodicityHandling: for src, dst in self.comm_slices[timestep.idx]: kernels.append( periodic_pdf_copy_kernel( - pdf_field, src, dst, target=self.target, + pdf_field, src, dst, target=self.target, opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx)) return kernels diff --git a/lbmpy_tests/advanced_streaming/test_communication.py b/lbmpy_tests/advanced_streaming/test_communication.py index bbdbd6ee7e175ded4d4dc72d80875ef76b04ba3a..70e2d275e953bb2fe5020dba5fa7a839ead564e4 100644 --- a/lbmpy_tests/advanced_streaming/test_communication.py +++ b/lbmpy_tests/advanced_streaming/test_communication.py @@ -1,35 +1,43 @@ +import pystencils as ps + import numpy as np + from lbmpy.stencils import get_stencil from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice -from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices +from lbmpy.creationfunctions import create_lb_update_rule +from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices, \ + LBMPeriodicityHandling from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep import pytest -@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27']) + +@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27']) @pytest.mark.parametrize('streaming_pattern', streaming_patterns) @pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD]) def test_slices_not_empty(stencil, streaming_pattern, timestep): stencil = get_stencil(stencil) dim = len(stencil[0]) q = len(stencil) - arr = np.zeros( (4,) * dim + (q,) ) - slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, ghost_layers=1) + arr = np.zeros((4,) * dim + (q,)) + slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, + ghost_layers=1) for _, slices_list in slices.items(): for src, dst in slices_list: assert all(s != 0 for s in arr[src].shape) assert all(s != 0 for s in arr[dst].shape) -@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27']) +@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27']) @pytest.mark.parametrize('streaming_pattern', streaming_patterns) @pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD]) def test_src_dst_same_shape(stencil, streaming_pattern, timestep): stencil = get_stencil(stencil) dim = len(stencil[0]) q = len(stencil) - arr = np.zeros( (4,) * dim + (q,) ) - slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, ghost_layers=1) + arr = np.zeros((4,) * dim + (q,)) + slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, + ghost_layers=1) for _, slices_list in slices.items(): for src, dst in slices_list: src_shape = arr[src].shape @@ -37,7 +45,7 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep): assert src_shape == dst_shape -@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27']) +@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27']) def test_pull_communication_slices(stencil): stencil = get_stencil(stencil) @@ -52,9 +60,71 @@ def test_pull_communication_slices(stencil): src = s[0][:-1] dst = s[1][:-1] break - + inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1)) inv_dir = (-e for e in d) gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1)) assert src == inner_slice - assert dst == gl_slice \ No newline at end of file + assert dst == gl_slice + + +@pytest.mark.parametrize('stencil_name', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27']) +def test_optimised_and_full_communication_equivalence(stencil_name): + target = 'cpu' + stencil = get_stencil(stencil_name) + dim = len(stencil[0]) + domain_size = (4, ) * dim + + dh = ps.create_data_handling(domain_size, periodicity=(True, ) * dim, + parallel=False, default_target=target) + + pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64) + dh.fill("pdf", 0, ghost_layers=True) + pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64) + dh.fill("pdf_tmp", 0, ghost_layers=True) + + gl = dh.ghost_layers_of_field("pdf") + + num = 0 + for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): + dh.cpu_arrays['pdf'][idx] = num + dh.cpu_arrays['pdf_tmp'][idx] = num + num += 1 + + ac = create_lb_update_rule(stencil=stencil, + optimization={"symbolic_field": pdf, + "symbolic_temporary_field": pdf_tmp}, + kernel_type='stream_pull_only') + ast = ps.create_kernel(ac, target=dh.default_target, cpu_openmp=True) + stream = ast.compile() + + full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True}) + full_communication() + + dh.run_kernel(stream) + dh.swap("pdf", "pdf_tmp") + pdf_full_communication = np.copy(dh.cpu_arrays['pdf']) + + num = 0 + for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']): + dh.cpu_arrays['pdf'][idx] = num + dh.cpu_arrays['pdf_tmp'][idx] = num + num += 1 + + optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name, + streaming_pattern='pull') + optimised_communication() + dh.run_kernel(stream) + dh.swap("pdf", "pdf_tmp") + + if dim == 3: + for i in range(gl, domain_size[0]): + for j in range(gl, domain_size[1]): + for k in range(gl, domain_size[2]): + for f in range(len(stencil)): + assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f) + else: + for i in range(gl, domain_size[0]): + for j in range(gl, domain_size[1]): + for f in range(len(stencil)): + assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]