diff --git a/datahandling/__init__.py b/datahandling/__init__.py
index 533598c5856740197a4f8544286108a96f67b702..e30d0a5a3fff24a567f84155b43ac522ba8684b1 100644
--- a/datahandling/__init__.py
+++ b/datahandling/__init__.py
@@ -1,9 +1,9 @@
-from .serial_datahandling import SerialDataHandling
+from pystencils.datahandling.serial_datahandling import SerialDataHandling
 
 try:
     import waLBerla
     if waLBerla.cpp_available:
-        from .parallel_datahandling import ParallelDataHandling
+        from pystencils.datahandling.parallel_datahandling import ParallelDataHandling
     else:
         waLBerla = None
 except ImportError:
@@ -31,7 +31,7 @@ def createDataHandling(parallel, domainSize, periodicity, defaultLayout='SoA', d
         else:
             dim = 3
 
-        blockStorage = waLBerla.createUniformBlockGrid(cells=domainSize, periodicity=periodicity)
+        blockStorage = waLBerla.createUniformBlockGrid(cells=domainSize, periodic=periodicity)
         return ParallelDataHandling(blocks=blockStorage, dim=dim,
                                     defaultLayout=defaultLayout, defaultGhostLayers=defaultGhostLayers)
     else:
diff --git a/datahandling/parallel_datahandling.py b/datahandling/parallel_datahandling.py
index 5e9a1cdc70fd3ba4c2943e6fb0cc4e8ebfdf519e..b92e7963b02f8427b8cd29c638ce6bcbfcba91cd 100644
--- a/datahandling/parallel_datahandling.py
+++ b/datahandling/parallel_datahandling.py
@@ -127,22 +127,23 @@ class ParallelDataHandling(DataHandling):
             ghostLayers = 0
 
         prefix = self.GPU_DATA_PREFIX if gpu else ""
-        if sliceObj is None:
+        if sliceObj is not None:
             yield from slicedBlockIteration(self.blocks, sliceObj, ghostLayers, ghostLayers,
                                             self.dim, prefix)
         else:
             yield from blockIteration(self.blocks, ghostLayers, self.dim, prefix)
 
     def gatherArray(self, name, sliceObj=None, allGather=False):
-        with self.accessWrapper(name):
-            if sliceObj is None:
-                sliceObj = makeSlice[:, :, :]
-            for array in wlb.field.gatherGenerator(self.blocks, name, sliceObj, allGather):
-                if self.fields[name].indexDimensions == 0:
-                    array = array[..., 0]
-                if self.dim == 2:
-                    array = array[:, :, 0]
-                yield array
+        if sliceObj is None:
+            sliceObj = makeSlice[:, :, :]
+        if self.dim == 2:
+            sliceObj += (0.5,)
+        for array in wlb.field.gatherGenerator(self.blocks, name, sliceObj, allGather):
+            if self.fields[name].indexDimensions == 0:
+                array = array[..., 0]
+            if self.dim == 2:
+                array = array[:, :, 0]
+            yield array
 
     def _normalizeArrShape(self, arr, indexDimensions):
         if indexDimensions == 0:
@@ -159,7 +160,7 @@ class ParallelDataHandling(DataHandling):
             nameMap = self._fieldNameToCpuDataName
             toArray = wlb.field.toArray
         dataUsedInKernel = [(nameMap[p.fieldName], self.fields[p.fieldName])
-                            for p in kernelFunc.ast.parameters if p.isFieldPtrArgument]
+                            for p in kernelFunc.parameters if p.isFieldPtrArgument]
         for block in self.blocks:
             fieldArgs = {}
             for dataName, f in dataUsedInKernel:
diff --git a/datahandling/serial_datahandling.py b/datahandling/serial_datahandling.py
index b41b5984fe2d4d6ff7b30b5e1fffb4d4d1a54212..3fd69756a14e788ec594acadba4a2c6e51676641 100644
--- a/datahandling/serial_datahandling.py
+++ b/datahandling/serial_datahandling.py
@@ -9,6 +9,7 @@ from pystencils.datahandling.datahandling_interface import DataHandling
 
 try:
     import pycuda.gpuarray as gpuarray
+    import pycuda.autoinit
 except ImportError:
     gpuarray = None
 
@@ -159,7 +160,8 @@ class SerialDataHandling(DataHandling):
         arr = removeGhostLayers(arr, indexDimensions=indDimensions, ghostLayers=gls)
 
         if sliceObj is not None:
-            sliceObj = normalizeSlice(sliceObj, arr.shape[:-indDimensions])
+            sliceObj = normalizeSlice(sliceObj, arr.shape[:-indDimensions] if indDimensions > 0 else arr.shape)
+            sliceObj = tuple(s if type(s) is slice else slice(s, s + 1, None) for s in sliceObj)
             arr = arr[sliceObj]
         yield arr
 
@@ -208,6 +210,8 @@ class SerialDataHandling(DataHandling):
     def _synchronizationFunctor(self, names, stencil, target):
         if stencil is None:
             stencil = 'D3Q27' if self.dim == 3 else 'D2Q9'
+        if stencil == 'D3Q15' or stencil == 'D3Q19':
+            stencil = 'D3Q27'
 
         assert stencil in ("D2Q9", 'D3Q27'), "Serial scenario support only D2Q9 or D3Q27 for periodicity sync"
 
diff --git a/parallel/blockiteration.py b/parallel/blockiteration.py
index 44a22d95e9ef7ec48ec859f9c3a7160826d84637..6e12791a856966df79909d4fd9a6f370ed8fede8 100644
--- a/parallel/blockiteration.py
+++ b/parallel/blockiteration.py
@@ -24,7 +24,7 @@ def blockIteration(blocks, ghostLayers, dim=3, accessPrefix=''):
         localSlice = [slice(0, w, None) for w in cellInterval.size]
         if dim == 2:
             localSlice[2] = ghostLayers
-        yield ParallelBlock(block, cellInterval.min, localSlice, ghostLayers, accessPrefix)
+        yield ParallelBlock(block, cellInterval.min, tuple(localSlice), ghostLayers, accessPrefix)
 
 
 def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, outerGhostLayers=1, dim=3, accessPrefix=''):
@@ -48,7 +48,9 @@ def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, outerGhostLa
     included
     """
     if sliceObj is None:
-        sliceObj = [slice(None, None, None)] * 3
+        sliceObj = tuple([slice(None, None, None)] * dim)
+    if dim == 2:
+        sliceObj += (innerGhostLayers, )
 
     domainCellBB = blocks.getDomainCellBB()
     domainExtent = [s + 2 * outerGhostLayers for s in domainCellBB.size]