Commit 495e92b9 authored by Markus Holzer's avatar Markus Holzer Committed by Martin Bauer
Browse files

Implemented an improved version of the communication for periodic BC

The communication of the ghost layers used to communicate just all
values in between one time step to make sure that everything is correct.
Furthermore the communication was only valid for pull stream steps.
The improved communication distinguishes automatically between pull and
push and communicates only values which are needed. With this improvement
it was possible to implement the EsoTwist streaming scheme.
parent 7511f364
from typing import List, Tuple, Any
from pystencils import Assignment from pystencils import Assignment
from pystencils.boundaries.boundaryhandling import BoundaryOffsetInfo from pystencils.boundaries.boundaryhandling import BoundaryOffsetInfo
from typing import List, Tuple, Any
from pystencils.data_types import create_type from pystencils.data_types import create_type
......
...@@ -250,7 +250,14 @@ class SerialDataHandling(DataHandling): ...@@ -250,7 +250,14 @@ class SerialDataHandling(DataHandling):
def synchronization_function_gpu(self, names, stencil_name=None, **_): def synchronization_function_gpu(self, names, stencil_name=None, **_):
return self.synchronization_function(names, stencil_name, 'gpu') return self.synchronization_function(names, stencil_name, 'gpu')
def synchronization_function(self, names, stencil=None, target=None, **_): def get_communication_before_sweep(self, names, stencil_name=None, update_rule=None, **_):
return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'pull')
def get_communication_after_sweep(self, names, stencil_name=None, update_rule=None, **_):
return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'push')
def synchronization_function(self, names, stencil=None, target=None, update_rule=None,
reads_writes="pull", **_):
if target is None: if target is None:
target = self.default_target target = self.default_target
assert target in ('cpu', 'gpu') assert target in ('cpu', 'gpu')
...@@ -265,11 +272,11 @@ class SerialDataHandling(DataHandling): ...@@ -265,11 +272,11 @@ class SerialDataHandling(DataHandling):
elif (stencil is None and self.dim == 3) or (stencil is not None and stencil.startswith('D3')): elif (stencil is None and self.dim == 3) or (stencil is not None and stencil.startswith('D3')):
directions = itertools.product(*[neighbors] * 3) directions = itertools.product(*[neighbors] * 3)
else: else:
raise ValueError("Invalid stencil") directions = itertools.product(*[neighbors] * 1)
for direction in directions: for direction in directions:
use_direction = True use_direction = True
if direction == (0, 0) or direction == (0, 0, 0): if direction == (0,) or direction == (0, 0) or direction == (0, 0, 0):
use_direction = False use_direction = False
for component, periodicity in zip(direction, self._periodicity): for component, periodicity in zip(direction, self._periodicity):
if not periodicity and component != 0: if not periodicity and component != 0:
...@@ -290,15 +297,31 @@ class SerialDataHandling(DataHandling): ...@@ -290,15 +297,31 @@ class SerialDataHandling(DataHandling):
if len(filtered_stencil) > 0: if len(filtered_stencil) > 0:
if target == 'cpu': if target == 'cpu':
from pystencils.slicing import get_periodic_boundary_functor if update_rule is None:
result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls)) from pystencils.slicing import get_periodic_boundary_functor
result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls))
else:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func
result.append(boundary_func(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions,
index_dim_shape=values_per_cell,
dtype=self.fields[name].dtype.numpy_dtype,
ghost_layers=gls,
target='cpu',
update_rule=update_rule,
reads_writes=reads_writes))
else: else:
from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func
result.append(boundary_func(filtered_stencil, self._domainSize, result.append(boundary_func(filtered_stencil, self._domainSize,
index_dimensions=self.fields[name].index_dimensions, index_dimensions=self.fields[name].index_dimensions,
index_dim_shape=values_per_cell, index_dim_shape=values_per_cell,
dtype=self.fields[name].dtype.numpy_dtype, dtype=self.fields[name].dtype.numpy_dtype,
ghost_layers=gls)) ghost_layers=gls,
target='gpu',
update_rule=update_rule,
reads_writes=reads_writes))
if target == 'cpu': if target == 'cpu':
def result_functor(): def result_functor():
......
import numpy as np import numpy as np
from pystencils import Field, Assignment from pystencils import Field, Assignment, AssignmentCollection
from pystencils.slicing import normalize_slice, get_periodic_boundary_src_dst_slices from pystencils.slicing import normalize_slice, get_periodic_boundary_src_dst_slices
from pystencils.gpucuda import make_python_function
from pystencils.gpucuda.kernelcreation import create_cuda_kernel
from pystencils.gpucuda import make_python_function as make_python_function_gpu
from pystencils.cpu import make_python_function as make_python_function_cpu
from pystencils import create_kernel
from lbmpy.methods.abstractlbmethod import LbmCollisionRule
def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, index_dim_shape=1, dtype=np.float64):
def create_copy_kernel(domain_size, from_slice, to_slice, stencil, index_dimensions=0, index_dim_shape=1,
dtype=np.float64, target=None, update_rule_assignments=None, reads_writes="pull"):
"""Copies a rectangular part of an array to another non-overlapping part""" """Copies a rectangular part of an array to another non-overlapping part"""
if index_dimensions not in (0, 1): if index_dimensions not in (0, 1):
raise NotImplementedError("Works only for one or zero index coordinates") raise NotImplementedError("Works only for one or zero index coordinates")
...@@ -18,25 +22,86 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in ...@@ -18,25 +22,86 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \ assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
"Slices have to have same size" "Slices have to have same size"
update_eqs = [] update_eqs = set()
for i in range(index_dim_shape): if update_rule_assignments is None:
eq = Assignment(f(i), f[tuple(offset)](i)) for i in range(index_dim_shape):
update_eqs.append(eq) eq = Assignment(f(i), f[tuple(offset)](i))
update_eqs.add(eq)
else:
for update_rule_assignment in update_rule_assignments:
if reads_writes == "pull":
field_accesses = update_rule_assignment.rhs.atoms(Field.Access)
else:
field_accesses = update_rule_assignment.lhs.atoms(Field.Access)
for field_access in field_accesses:
if 0 in stencil:
for stencil_part, off in zip(stencil, field_access.offsets):
if (stencil_part < 0 and off < 0) or (stencil_part > 0 and off > 0):
if reads_writes == "pull":
eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0]))
update_eqs.add(eq)
if reads_writes == "push":
eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0]))
update_eqs.add(eq)
else:
if stencil == field_access.offsets:
if reads_writes == "pull":
eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0]))
update_eqs.add(eq)
if reads_writes == "push":
eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0]))
update_eqs.add(eq)
kernels = []
if len(update_eqs) > 0:
ast = create_kernel(list(update_eqs), iteration_slice=to_slice,
skip_independence_check=True, target=target)
if target == 'cpu':
kernels.append(make_python_function_cpu(ast))
else:
kernels.append(make_python_function_gpu(ast))
ast = create_cuda_kernel(update_eqs, iteration_slice=to_slice, skip_independence_check=True) return kernels
return make_python_function(ast)
def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1, def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
thickness=None, dtype=float): thickness=None, dtype=float, target=None, update_rule=None, reads_writes="pull"):
# sort stencil entry's by number of zeros to make sure, that surfaces, edges and corners are
# communicated in that order. Otherwise values (e.g. corner values) will be overwritten
stencil.sort(key=number_of_zeros, reverse=True)
src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness) src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
kernels = [] kernels = []
index_dimensions = index_dimensions
for src_slice, dst_slice in src_dst_slice_tuples: if isinstance(update_rule, Assignment):
kernels.append(create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype)) update_rule_assignments = [update_rule]
elif isinstance(update_rule, (AssignmentCollection, LbmCollisionRule)):
update_rule_assignments = update_rule.all_assignments
else:
update_rule_assignments = None
# by default we only get the communication pattern for pull stream
if reads_writes is not "pull" or reads_writes is not "push":
assert "not a valid argument for the definition of read or/and write access"
for i, (src_slice, dst_slice) in enumerate(src_dst_slice_tuples):
for j in create_copy_kernel(domain_size, src_slice, dst_slice, stencil[i], index_dimensions,
index_dim_shape, dtype, target, update_rule_assignments, reads_writes):
kernels.append(j)
def functor(pdfs, **_): def functor(pdfs, **_):
for kernel in kernels: for kernel in kernels:
kernel(pdfs=pdfs) kernel(pdfs=pdfs)
return functor return functor
def number_of_zeros(stencil_entry):
count_zeros = 0
for tmp in stencil_entry:
if tmp == 0:
count_zeros += 1
return count_zeros
...@@ -175,12 +175,13 @@ def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layer ...@@ -175,12 +175,13 @@ def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layer
if iteration_slice is not None: if iteration_slice is not None:
iteration_slice = normalize_slice(iteration_slice, shape) iteration_slice = normalize_slice(iteration_slice, shape)
ghost_layers = 0
if ghost_layers is None: else:
required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses]) if ghost_layers is None:
ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order) required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
if isinstance(ghost_layers, int): ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order)
ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order) if isinstance(ghost_layers, int):
ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order)
current_body = body current_body = body
for i, loop_coordinate in enumerate(reversed(loop_order)): for i, loop_coordinate in enumerate(reversed(loop_order)):
......
...@@ -135,7 +135,7 @@ def test_periodicity(): ...@@ -135,7 +135,7 @@ def test_periodicity():
arr_gpu = gpuarray.to_gpu(arr_cpu) arr_gpu = gpuarray.to_gpu(arr_cpu)
periodicity_stencil = [(1, 0), (-1, 0), (1, 1)] periodicity_stencil = [(1, 0), (-1, 0), (1, 1)]
periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2) periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2, target='gpu')
periodic_cpu_kernel = periodic_cpu(periodicity_stencil) periodic_cpu_kernel = periodic_cpu(periodicity_stencil)
cpu_result = np.copy(arr_cpu) cpu_result = np.copy(arr_cpu)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment