diff --git a/pystencils/boundaries/boundaryconditions.py b/pystencils/boundaries/boundaryconditions.py
index 10870c4cd5cd999b05fbc3dd72cb9de6710c25a6..6628be82082366e61a4b518d2806f92838057e64 100644
--- a/pystencils/boundaries/boundaryconditions.py
+++ b/pystencils/boundaries/boundaryconditions.py
@@ -1,6 +1,6 @@
-from typing import List, Tuple, Any
 from pystencils import Assignment
 from pystencils.boundaries.boundaryhandling import BoundaryOffsetInfo
+from typing import List, Tuple, Any
 from pystencils.data_types import create_type
 
 
diff --git a/pystencils/datahandling/serial_datahandling.py b/pystencils/datahandling/serial_datahandling.py
index 7e4d19cd8e24aa485a48893d4c992d34078a0f13..dd2b70b89bc8ecf3d8ecb1c187be6442f6aa0efc 100644
--- a/pystencils/datahandling/serial_datahandling.py
+++ b/pystencils/datahandling/serial_datahandling.py
@@ -250,7 +250,14 @@ class SerialDataHandling(DataHandling):
     def synchronization_function_gpu(self, names, stencil_name=None, **_):
         return self.synchronization_function(names, stencil_name, 'gpu')
 
-    def synchronization_function(self, names, stencil=None, target=None, **_):
+    def get_communication_before_sweep(self, names, stencil_name=None, update_rule=None, **_):
+        return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'pull')
+
+    def get_communication_after_sweep(self, names, stencil_name=None, update_rule=None, **_):
+        return self.synchronization_function(names, stencil_name, self.default_target, update_rule, 'push')
+
+    def synchronization_function(self, names, stencil=None, target=None, update_rule=None,
+                                 reads_writes="pull", **_):
         if target is None:
             target = self.default_target
         assert target in ('cpu', 'gpu')
@@ -265,11 +272,11 @@ class SerialDataHandling(DataHandling):
         elif (stencil is None and self.dim == 3) or (stencil is not None and stencil.startswith('D3')):
             directions = itertools.product(*[neighbors] * 3)
         else:
-            raise ValueError("Invalid stencil")
+            directions = itertools.product(*[neighbors] * 1)
 
         for direction in directions:
             use_direction = True
-            if direction == (0, 0) or direction == (0, 0, 0):
+            if direction == (0,) or direction == (0, 0) or direction == (0, 0, 0):
                 use_direction = False
             for component, periodicity in zip(direction, self._periodicity):
                 if not periodicity and component != 0:
@@ -290,15 +297,31 @@ class SerialDataHandling(DataHandling):
 
             if len(filtered_stencil) > 0:
                 if target == 'cpu':
-                    from pystencils.slicing import get_periodic_boundary_functor
-                    result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls))
+                    if update_rule is None:
+                        from pystencils.slicing import get_periodic_boundary_functor
+                        result.append(get_periodic_boundary_functor(filtered_stencil, ghost_layers=gls))
+                    else:
+                        from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func
+                        result.append(boundary_func(filtered_stencil, self._domainSize,
+                                                    index_dimensions=self.fields[name].index_dimensions,
+                                                    index_dim_shape=values_per_cell,
+                                                    dtype=self.fields[name].dtype.numpy_dtype,
+                                                    ghost_layers=gls,
+                                                    target='cpu',
+                                                    update_rule=update_rule,
+                                                    reads_writes=reads_writes))
+
                 else:
                     from pystencils.gpucuda.periodicity import get_periodic_boundary_functor as boundary_func
+
                     result.append(boundary_func(filtered_stencil, self._domainSize,
                                                 index_dimensions=self.fields[name].index_dimensions,
                                                 index_dim_shape=values_per_cell,
                                                 dtype=self.fields[name].dtype.numpy_dtype,
-                                                ghost_layers=gls))
+                                                ghost_layers=gls,
+                                                target='gpu',
+                                                update_rule=update_rule,
+                                                reads_writes=reads_writes))
 
         if target == 'cpu':
             def result_functor():
diff --git a/pystencils/gpucuda/periodicity.py b/pystencils/gpucuda/periodicity.py
index 5657d4618623b421c0d3ef25a60774085d85bb32..ba6232322b00871d0a21c7a21e935c12f011f728 100644
--- a/pystencils/gpucuda/periodicity.py
+++ b/pystencils/gpucuda/periodicity.py
@@ -1,11 +1,15 @@
 import numpy as np
-from pystencils import Field, Assignment
+from pystencils import Field, Assignment, AssignmentCollection
 from pystencils.slicing import normalize_slice, get_periodic_boundary_src_dst_slices
-from pystencils.gpucuda import make_python_function
-from pystencils.gpucuda.kernelcreation import create_cuda_kernel
 
+from pystencils.gpucuda import make_python_function as make_python_function_gpu
+from pystencils.cpu import make_python_function as make_python_function_cpu
+from pystencils import create_kernel
+from lbmpy.methods.abstractlbmethod import LbmCollisionRule
 
-def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, index_dim_shape=1, dtype=np.float64):
+
+def create_copy_kernel(domain_size, from_slice, to_slice, stencil, index_dimensions=0, index_dim_shape=1,
+                       dtype=np.float64, target=None, update_rule_assignments=None, reads_writes="pull"):
     """Copies a rectangular part of an array to another non-overlapping part"""
     if index_dimensions not in (0, 1):
         raise NotImplementedError("Works only for one or zero index coordinates")
@@ -18,25 +22,86 @@ def create_copy_kernel(domain_size, from_slice, to_slice, index_dimensions=0, in
     assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalized_from_slice, normalized_to_slice)], \
         "Slices have to have same size"
 
-    update_eqs = []
-    for i in range(index_dim_shape):
-        eq = Assignment(f(i), f[tuple(offset)](i))
-        update_eqs.append(eq)
+    update_eqs = set()
+    if update_rule_assignments is None:
+        for i in range(index_dim_shape):
+            eq = Assignment(f(i), f[tuple(offset)](i))
+            update_eqs.add(eq)
+    else:
+        for update_rule_assignment in update_rule_assignments:
+            if reads_writes == "pull":
+                field_accesses = update_rule_assignment.rhs.atoms(Field.Access)
+            else:
+                field_accesses = update_rule_assignment.lhs.atoms(Field.Access)
+
+            for field_access in field_accesses:
+                if 0 in stencil:
+                    for stencil_part, off in zip(stencil, field_access.offsets):
+                        if (stencil_part < 0 and off < 0) or (stencil_part > 0 and off > 0):
+                            if reads_writes == "pull":
+                                eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0]))
+                                update_eqs.add(eq)
+                            if reads_writes == "push":
+                                eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0]))
+                                update_eqs.add(eq)
+                else:
+                    if stencil == field_access.offsets:
+                        if reads_writes == "pull":
+                            eq = Assignment(f(field_access.index[0]), f[tuple(offset)](field_access.index[0]))
+                            update_eqs.add(eq)
+                        if reads_writes == "push":
+                            eq = Assignment(f[tuple(offset)](field_access.index[0]), f(field_access.index[0]))
+                            update_eqs.add(eq)
+
+    kernels = []
+    if len(update_eqs) > 0:
+        ast = create_kernel(list(update_eqs), iteration_slice=to_slice,
+                            skip_independence_check=True, target=target)
+        if target == 'cpu':
+            kernels.append(make_python_function_cpu(ast))
+        else:
+            kernels.append(make_python_function_gpu(ast))
 
-    ast = create_cuda_kernel(update_eqs, iteration_slice=to_slice, skip_independence_check=True)
-    return make_python_function(ast)
+    return kernels
 
 
 def get_periodic_boundary_functor(stencil, domain_size, index_dimensions=0, index_dim_shape=1, ghost_layers=1,
-                                  thickness=None, dtype=float):
+                                  thickness=None, dtype=float, target=None, update_rule=None, reads_writes="pull"):
+
+    # sort stencil entry's by number of zeros to make sure, that surfaces, edges and corners are
+    # communicated in that order. Otherwise values (e.g. corner values) will be overwritten
+    stencil.sort(key=number_of_zeros, reverse=True)
+
     src_dst_slice_tuples = get_periodic_boundary_src_dst_slices(stencil, ghost_layers, thickness)
     kernels = []
-    index_dimensions = index_dimensions
-    for src_slice, dst_slice in src_dst_slice_tuples:
-        kernels.append(create_copy_kernel(domain_size, src_slice, dst_slice, index_dimensions, index_dim_shape, dtype))
+
+    if isinstance(update_rule, Assignment):
+        update_rule_assignments = [update_rule]
+    elif isinstance(update_rule, (AssignmentCollection, LbmCollisionRule)):
+        update_rule_assignments = update_rule.all_assignments
+    else:
+        update_rule_assignments = None
+
+    # by default we only get the communication pattern for pull stream
+    if reads_writes is not "pull" or reads_writes is not "push":
+        assert "not a valid argument for the definition of read or/and write access"
+
+    for i, (src_slice, dst_slice) in enumerate(src_dst_slice_tuples):
+        for j in create_copy_kernel(domain_size, src_slice, dst_slice, stencil[i], index_dimensions,
+                                    index_dim_shape, dtype, target, update_rule_assignments, reads_writes):
+            kernels.append(j)
 
     def functor(pdfs, **_):
         for kernel in kernels:
             kernel(pdfs=pdfs)
 
     return functor
+
+
+def number_of_zeros(stencil_entry):
+    count_zeros = 0
+    for tmp in stencil_entry:
+        if tmp == 0:
+            count_zeros += 1
+
+    return count_zeros
diff --git a/pystencils/transformations.py b/pystencils/transformations.py
index 4490e3e1a8e265549879e30fe7c60da02e510336..0346b7d93744e30f968dcb6567cfd8706b6fdc37 100644
--- a/pystencils/transformations.py
+++ b/pystencils/transformations.py
@@ -175,12 +175,13 @@ def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layer
 
     if iteration_slice is not None:
         iteration_slice = normalize_slice(iteration_slice, shape)
-
-    if ghost_layers is None:
-        required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
-        ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order)
-    if isinstance(ghost_layers, int):
-        ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order)
+        ghost_layers = 0
+    else:
+        if ghost_layers is None:
+            required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
+            ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(loop_order)
+        if isinstance(ghost_layers, int):
+            ghost_layers = [(ghost_layers, ghost_layers)] * len(loop_order)
 
     current_body = body
     for i, loop_coordinate in enumerate(reversed(loop_order)):
diff --git a/pystencils_tests/test_cudagpu.py b/pystencils_tests/test_cudagpu.py
index 3afddb128d2bb649f6bc5454d3ddfcd71d2a61a2..4a682526d0fbeaad813ddb308954508468fc80cb 100644
--- a/pystencils_tests/test_cudagpu.py
+++ b/pystencils_tests/test_cudagpu.py
@@ -135,7 +135,7 @@ def test_periodicity():
     arr_gpu = gpuarray.to_gpu(arr_cpu)
 
     periodicity_stencil = [(1, 0), (-1, 0), (1, 1)]
-    periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2)
+    periodic_gpu_kernel = periodic_gpu(periodicity_stencil, (5, 5), 1, 2, target='gpu')
     periodic_cpu_kernel = periodic_cpu(periodicity_stencil)
 
     cpu_result = np.copy(arr_cpu)