Commit 495e92b9 authored by Markus Holzer's avatar Markus Holzer Committed by Martin Bauer
Browse files

Implemented an improved version of the communication for periodic BC

The communication of the ghost layers used to communicate just all
values in between one time step to make sure that everything is correct.
Furthermore the communication was only valid for pull stream steps.
The improved communication distinguishes automatically between pull and
push and communicates only values which are needed. With this improvement
it was possible to implement the EsoTwist streaming scheme.
parent 7511f364
from typing import List, Tuple, Any
from pystencils import Assignment
from pystencils.boundaries.boundaryhandling import BoundaryOffsetInfo
from typing import List, Tuple, Any
from pystencils.data_types import create_type
......@@ -250,7 +250,14 @@ class SerialDataHandling(DataHandling):
def synchronization_function_gpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, 'gpu')
def synchronization_function(self, names, stencil=None, target=None, **_):
def get_communication_before_sweep(self, names, stencil_name=None, update_rule=None, **_):
return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'pull')
def get_communication_after_sweep(self, names, stencil_name=None, update_rule=None, **_):
return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'push')
def synchronization_function(self, names, stencil=None, target=None, update_rule=None,
reads_writes="pull", **_):
if target is None:
target = self.default_target
assert target in ('cpu', 'gpu')
......@@ -265,11 +272,11 @@ class SerialDataHandling(DataHandling):
elif (stencil is None and self.dim == 3) or (stencil is not None and stencil.startswith('D3')):
directions = itertools.product(*[neighbors] * 3)
raise ValueError("Invalid stencil")
directions = itertools.product(*[neighbors] * 1)
for direction in directions:
use_direction = True
if direction == (0, 0) or direction == (0, 0, 0):
if direction == (0,) or direction == (0, 0) or direction == (0, 0, 0):
use_direction = False
for component, periodicity in zip(direction, self._periodicity):
if not periodicity and component != 0:
......@@ -290,6 +297,7 @@ class SerialDataHandling(DataHandling):
if len(filtered_stencil) > 0:
if target == 'cpu':
if update_rule is None:
from pystencils.slicing import get_periodic_boundary_functor
result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls))
......@@ -298,7 +306,22 @@ class SerialDataHandling(DataHandling):
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func
result.append(boundary_func(filtered_stencil, self._domainSize,
if target == 'cpu':
def result_functor():
import numpy as np
from pystencils import Field, Assignment
from pystencils import Field, Assignment, AssignmentCollection
from pystencils.slicing import normalize_slice, get_periodic_boundary_src_dst_slices
from pystencils.gpucuda import make_python_function
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
from pystencils.gpucuda import make_python_function as make_python_function_gpu
from pystencils.cpu import make_python_function as make_python_function_cpu
from pystencils import create_kernel
from lbmpy.methods.abstractlbmethod import LbmCollisionRule
def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, index_dim_shape=1, dtype=np.float64):
def create_copy_kernel(domain_size, from_slice, to_slice, stencil, index_dimensions=0, index_dim_shape=1,
dtype=np.float64, target=None, update_rule_assignments=None, reads_writes="pull"):
"""Copies a rectangular part of an array to another non-overlapping part"""
if index_dimensions not in (0, 1):
raise NotImplementedError("Works only for one or zero index coordinates")
......@@ -18,25 +22,86 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
"Slices have to have same size"
update_eqs = []
update_eqs = set()
if update_rule_assignments is None:
for i in range(index_dim_shape):
eq = Assignment(f(i), f[tuple(offset)](i))
for update_rule_assignment in update_rule_assignments:
if reads_writes == "pull":
field_accesses = update_rule_assignment.rhs.atoms(Field.Access)
field_accesses = update_rule_assignment.lhs.atoms(Field.Access)
for field_access in field_accesses:
if 0 in stencil:
for stencil_part, off in zip(stencil, field_access.offsets):
if (stencil_part < 0 and off < 0) or (stencil_part > 0 and off > 0):
if reads_writes == "pull":
eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0]))
if reads_writes == "push":
eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0]))
if stencil == field_access.offsets:
if reads_writes == "pull":
eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0]))
if reads_writes == "push":
eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0]))
kernels = []
if len(update_eqs) > 0:
ast = create_kernel(list(update_eqs), iteration_slice=to_slice,
skip_independence_check=True, target=target)
if target == 'cpu':
ast = create_cuda_kernel(update_eqs, iteration_slice=to_slice, skip_independence_check=True)
return make_python_function(ast)
return kernels
def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
thickness=None, dtype=float):
thickness=None, dtype=float, target=None, update_rule=None, reads_writes="pull"):
# sort stencil entry's by number of zeros to make sure, that surfaces, edges and corners are
# communicated in that order. Otherwise values (e.g. corner values) will be overwritten
stencil.sort(key=number_of_zeros, reverse=True)
src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
kernels = []
index_dimensions = index_dimensions
for src_slice, dst_slice in src_dst_slice_tuples:
kernels.append(create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype))
if isinstance(update_rule, Assignment):
update_rule_assignments = [update_rule]
elif isinstance(update_rule, (AssignmentCollection, LbmCollisionRule)):
update_rule_assignments = update_rule.all_assignments
update_rule_assignments = None
# by default we only get the communication pattern for pull stream
if reads_writes is not "pull" or reads_writes is not "push":
assert "not a valid argument for the definition of read or/and write access"
for i, (src_slice, dst_slice) in enumerate(src_dst_slice_tuples):
for j in create_copy_kernel(domain_size, src_slice, dst_slice, stencil[i], index_dimensions,
index_dim_shape, dtype, target, update_rule_assignments, reads_writes):
def functor(pdfs, **_):
for kernel in kernels:
return functor
def number_of_zeros(stencil_entry):
count_zeros = 0
for tmp in stencil_entry:
if tmp == 0:
count_zeros += 1
return count_zeros
......@@ -175,7 +175,8 @@ def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layer
if iteration_slice is not None:
iteration_slice = normalize_slice(iteration_slice, shape)
ghost_layers = 0
if ghost_layers is None:
required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order)
......@@ -135,7 +135,7 @@ def test_periodicity():
arr_gpu = gpuarray.to_gpu(arr_cpu)
periodicity_stencil = [(1, 0), (-1, 0), (1, 1)]
periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2)
periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2, target='gpu')
periodic_cpu_kernel = periodic_cpu(periodicity_stencil)
cpu_result = np.copy(arr_cpu)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment