Commit 54241096 authored by Markus Holzer's avatar Markus Holzer Committed by Michael Kuron
Browse files

Fixed incomplete communication stencil for D3Q15

(and any other stencil omitting certain sub-diagonal directions)
parent 3e293a52
import itertools
from pystencils import Field, Assignment
from pystencils.slicing import shift_slice, get_slice_before_ghost_layer, normalize_slice
from lbmpy.advanced_streaming.utility import is_inplace, get_accessor, numeric_index, \
......@@ -59,14 +60,17 @@ def get_communication_slices(
Return the source and destination slices for periodicity handling or communication between blocks.
:param stencil: The stencil used by the LB method.
:param comm_stencil: The stencil defining the communication directions. If None, it will be set to stencil.
:param comm_stencil: The stencil defining the communication directions. If None, it will be set to the
full stencil (D2Q9 in 2D, D3Q27 in 3D, etc.).
:param streaming_pattern: The streaming pattern.
:param prev_timestep: Timestep after which communication is run.
:param ghost_layers: Number of ghost layers in each direction.
"""
dim = len(stencil[0])
if comm_stencil is None:
comm_stencil = stencil
comm_stencil = itertools.product(*([-1, 0, 1] for _ in range(dim)))
pdfs = Field.create_generic('pdfs', spatial_dimensions=len(stencil[0]), index_shape=(len(stencil),))
write_accesses = get_accessor(streaming_pattern, prev_timestep).write(pdfs, stencil)
......@@ -168,8 +172,9 @@ class LBMPeriodicityHandling:
stencil = get_stencil(stencil)
self.stencil = stencil
self.dim = len(stencil[0])
self.dh = data_handling
target = data_handling.default_target
assert target in ['cpu', 'gpu', 'opencl']
......@@ -184,13 +189,16 @@ class LBMPeriodicityHandling:
self.pycuda_direct_copy = target == 'gpu' and pycuda_direct_copy
def is_copy_direction(direction):
s = 0
for d, p in zip(direction, periodicity):
s += abs(d)
if d != 0 and not p:
return False
return True
return s != 0
copy_directions = tuple(filter(is_copy_direction, stencil[1:]))
full_stencil = itertools.product(*([-1, 0, 1] for _ in range(self.dim)))
copy_directions = tuple(filter(is_copy_direction, full_stencil))
self.comm_slices = []
timesteps = get_timesteps(streaming_pattern)
for timestep in timesteps:
......@@ -224,7 +232,7 @@ class LBMPeriodicityHandling:
for src, dst in self.comm_slices[timestep.idx]:
kernels.append(
periodic_pdf_copy_kernel(
pdf_field, src, dst, target=self.target,
pdf_field, src, dst, target=self.target,
opencl_queue=self.opencl_queue, opencl_ctx=self.opencl_ctx))
return kernels
......
import pystencils as ps
import numpy as np
from lbmpy.stencils import get_stencil
from pystencils.slicing import get_slice_before_ghost_layer, get_ghost_region_slice
from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices
from lbmpy.creationfunctions import create_lb_update_rule
from lbmpy.advanced_streaming.communication import get_communication_slices, _fix_length_one_slices, \
LBMPeriodicityHandling
from lbmpy.advanced_streaming.utility import streaming_patterns, Timestep
import pytest
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
def test_slices_not_empty(stencil, streaming_pattern, timestep):
stencil = get_stencil(stencil)
dim = len(stencil[0])
q = len(stencil)
arr = np.zeros( (4,) * dim + (q,) )
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, ghost_layers=1)
arr = np.zeros((4,) * dim + (q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
for _, slices_list in slices.items():
for src, dst in slices_list:
assert all(s != 0 for s in arr[src].shape)
assert all(s != 0 for s in arr[dst].shape)
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('streaming_pattern', streaming_patterns)
@pytest.mark.parametrize('timestep', [Timestep.EVEN, Timestep.ODD])
def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
stencil = get_stencil(stencil)
dim = len(stencil[0])
q = len(stencil)
arr = np.zeros( (4,) * dim + (q,) )
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep, ghost_layers=1)
arr = np.zeros((4,) * dim + (q,))
slices = get_communication_slices(stencil, streaming_pattern=streaming_pattern, prev_timestep=timestep,
ghost_layers=1)
for _, slices_list in slices.items():
for src, dst in slices_list:
src_shape = arr[src].shape
......@@ -37,7 +45,7 @@ def test_src_dst_same_shape(stencil, streaming_pattern, timestep):
assert src_shape == dst_shape
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q19', 'D3Q27'])
@pytest.mark.parametrize('stencil', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
def test_pull_communication_slices(stencil):
stencil = get_stencil(stencil)
......@@ -52,9 +60,71 @@ def test_pull_communication_slices(stencil):
src = s[0][:-1]
dst = s[1][:-1]
break
inner_slice = _fix_length_one_slices(get_slice_before_ghost_layer(d, ghost_layers=1))
inv_dir = (-e for e in d)
gl_slice = _fix_length_one_slices(get_ghost_region_slice(inv_dir, ghost_layers=1))
assert src == inner_slice
assert dst == gl_slice
\ No newline at end of file
assert dst == gl_slice
@pytest.mark.parametrize('stencil_name', ['D2Q9', 'D3Q15', 'D3Q19', 'D3Q27'])
def test_optimised_and_full_communication_equivalence(stencil_name):
target = 'cpu'
stencil = get_stencil(stencil_name)
dim = len(stencil[0])
domain_size = (4, ) * dim
dh = ps.create_data_handling(domain_size, periodicity=(True, ) * dim,
parallel=False, default_target=target)
pdf = dh.add_array("pdf", values_per_cell=len(stencil), dtype=np.int64)
dh.fill("pdf", 0, ghost_layers=True)
pdf_tmp = dh.add_array("pdf_tmp", values_per_cell=len(stencil), dtype=np.int64)
dh.fill("pdf_tmp", 0, ghost_layers=True)
gl = dh.ghost_layers_of_field("pdf")
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
num += 1
ac = create_lb_update_rule(stencil=stencil,
optimization={"symbolic_field": pdf,
"symbolic_temporary_field": pdf_tmp},
kernel_type='stream_pull_only')
ast = ps.create_kernel(ac, target=dh.default_target, cpu_openmp=True)
stream = ast.compile()
full_communication = dh.synchronization_function(pdf.name, target=dh.default_target, optimization={"openmp": True})
full_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
pdf_full_communication = np.copy(dh.cpu_arrays['pdf'])
num = 0
for idx, x in np.ndenumerate(dh.cpu_arrays['pdf']):
dh.cpu_arrays['pdf'][idx] = num
dh.cpu_arrays['pdf_tmp'][idx] = num
num += 1
optimised_communication = LBMPeriodicityHandling(stencil=stencil, data_handling=dh, pdf_field_name=pdf.name,
streaming_pattern='pull')
optimised_communication()
dh.run_kernel(stream)
dh.swap("pdf", "pdf_tmp")
if dim == 3:
for i in range(gl, domain_size[0]):
for j in range(gl, domain_size[1]):
for k in range(gl, domain_size[2]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, k, f] == pdf_full_communication[i, j, k, f], print(f)
else:
for i in range(gl, domain_size[0]):
for j in range(gl, domain_size[1]):
for f in range(len(stencil)):
assert dh.cpu_arrays['pdf'][i, j, f] == pdf_full_communication[i, j, f]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment