diff --git a/cache.py b/cache.py
index 261e76231fd25fc43871c9022072e776f3aeae8f..40a0120226827eddbc56eb882f84159d5ba10cfc 100644
--- a/cache.py
+++ b/cache.py
@@ -21,3 +21,7 @@ except ImportError:
     diskcache = memorycache(maxsize=64)
     diskcacheNoFallback = lambda o: o
 
+
+# Disable memory cache:
+# diskcache = lambda o: o
+# diskcacheNoFallback = lambda o: o
diff --git a/datahandling.py b/datahandling.py
index 38d3d1afd242bb30688ab648b518c531595e1072..9a1411861aada30fadd32c181a476b8d88cbca29 100644
--- a/datahandling.py
+++ b/datahandling.py
@@ -6,7 +6,7 @@ from contextlib import contextmanager
 
 from lbmpy.stencils import getStencil
 from pystencils import Field
-from pystencils.parallel.blockiteration import BlockIterationInfo
+from pystencils.parallel.blockiteration import Block, SerialBlock
 from pystencils.slicing import normalizeSlice, removeGhostLayers
 from pystencils.utils import DotDict
 
@@ -93,21 +93,13 @@ class DataHandling(ABC):
         """Dictionary mapping data name to symbolic pystencils field - use this to create pystencils kernels"""
 
     @abstractmethod
-    def accessArray(self, name, sliceObj=None, innerGhostLayers=None, outerGhostLayers=0):
-        """
-        Generator yielding locally stored sub-arrays together with information about their place in the global domain
-
-        :param name: name of data to access
-        :param sliceObj: optional rectangular sub-region to access
-        :param innerGhostLayers: how many inner (not at domain border) ghost layers to include
-        :param outerGhostLayers: how many ghost layers at the domain border to include
-        Yields a numpy array with local part of data and a BlockIterationInfo object containing geometric information
-        """
+    def ghostLayersOfField(self, name):
+        """Returns the number of ghost layers for a specific field/array"""
 
     @abstractmethod
-    def accessCustomData(self, name):
+    def iterate(self, sliceObj=None, gpu=False, ghostLayers=None):
         """
-        Similar to accessArray, however for custom data no slicing and ghost layer info is necessary/available
+        Iterate over local part of potentially distributed data structure.
         """
 
     @abstractmethod
@@ -122,6 +114,14 @@ class DataHandling(ABC):
         :return: generator expression yielding the gathered field, the gathered field does not include any ghost layers
         """
 
+    @abstractmethod
+    def runKernel(self, kernelFunc, *args, **kwargs):
+        """
+        Runs a compiled pystencils kernel using the arrays stored in the DataHandling class for all array parameters
+        Additional passed arguments are directly passed to the kernel function and override possible parameters from
+        the DataHandling
+        """
+
     def registerPreAccessFunction(self, name, fct):
         self._preAccessFunctions[name].append(fct)
 
@@ -223,6 +223,9 @@ class SerialDataHandling(DataHandling):
     def fields(self):
         return self._fields
 
+    def ghostLayersOfField(self, name):
+        return self._fieldInformation[name]['ghostLayers']
+
     def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None,
                  cpu=True, gpu=False):
         if ghostLayers is None:
@@ -283,25 +286,25 @@ class SerialDataHandling(DataHandling):
     def addArrayLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False):
         self.addArray(name, latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField])
 
-    def accessArray(self, name, sliceObj=None, outerGhostLayers='all', **kwargs):
-        if outerGhostLayers == 'all' or outerGhostLayers is True:
-            outerGhostLayers = self._fieldInformation[name]['ghostLayers']
-        elif outerGhostLayers is False:
-            outerGhostLayers = 0
-
+    def iterate(self, sliceObj=None, gpu=False, ghostLayers=None):
+        if ghostLayers is None:
+            ghostLayers = self.defaultGhostLayers
         if sliceObj is None:
-            sliceObj = [slice(None, None)] * self.dim
-
-        with self.accessWrapper(name):
-            arr = self.cpuArrays[name]
-            glToRemove = self._fieldInformation[name]['ghostLayers'] - outerGhostLayers
-            assert glToRemove >= 0
-            arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=glToRemove)
-            sliceObj = normalizeSlice(sliceObj, arr.shape[:self.dim])
-            yield arr[sliceObj], BlockIterationInfo(None, tuple(s.start for s in sliceObj), sliceObj)
-
-    def accessCustomData(self, name):
-        yield self.customDataCpu[name], ((0, 0, 0)[:self.dim], self._domainSize)
+            sliceObj = (slice(None, None, None),) * self.dim
+        sliceObj = normalizeSlice(sliceObj, tuple(s + 2 * ghostLayers for s in self._domainSize))
+        sliceObj = tuple(s if type(s) is slice else slice(s, s+1, None) for s in sliceObj)
+
+        arrays = self.gpuArrays if gpu else self.cpuArrays
+        iterDict = {}
+        for name, arr in arrays.items():
+            fieldGls = self._fieldInformation[name]['ghostLayers']
+            if fieldGls < ghostLayers:
+                continue
+            arr = removeGhostLayers(arr, indexDimensions=len(arr.shape) - self.dim, ghostLayers=fieldGls-ghostLayers)
+            iterDict[name] = arr
+
+        offset = tuple(s.start - ghostLayers for s in sliceObj)
+        yield SerialBlock(iterDict, offset, sliceObj)
 
     def gatherArray(self, name, sliceObj=None, **kwargs):
         with self.accessWrapper(name):
diff --git a/parallel/blockiteration.py b/parallel/blockiteration.py
index 0742f6c61621cab0966fb101241f78c495fc6ef0..0253681286b98007304186be77e880514a39c532 100644
--- a/parallel/blockiteration.py
+++ b/parallel/blockiteration.py
@@ -1,59 +1,48 @@
+"""
+This module contains function that simplify the iteration over waLBerlas distributed data structure.
+These function simplify the iteration over rectangular slices, managing the mapping between block local coordinates and
+global coordinates.
+"""
 import numpy as np
 import waLBerla as wlb
 from pystencils.slicing import normalizeSlice
 
 
-class BlockIterationInfo:
-    def __init__(self, block, offset, localSlice):
-        self._block = block
-        self._offset = offset
-        self._localSlice = localSlice
-
-    @property
-    def block(self):
-        return self._block
-
-    @property
-    def offset(self):
-        return self._offset
-
-    @property
-    def shape(self):
-        return tuple(s.stop - s.start for s in self._localSlice)
-
-    @property
-    def localSlice(self):
-        """Slice object of intersection between current block and iteration interval in local coordinates"""
-        return self._localSlice
-
-    @property
-    def midpointArrays(self):
-        """Global coordinate meshgrid of cell midpoints which are shifted by 0.5 compared to cell indices"""
-        meshGridParams = [offset + 0.5 + np.arange(width, dtype=float)
-                          for offset, width in zip(self.offset, self.shape)]
-        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
-
-    @property
-    def cellIndexArrays(self):
-        """Global coordinate meshgrid of cell coordinates. Cell indices start at 0 at the first inner cell,
-        ghost layers have negative indices"""
-        meshGridParams = [offset + np.arange(width, dtype=np.int32)
-                          for offset, width in zip(self.offset, self.shape)]
-        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
+def blockIteration(blocks, ghostLayers, dim=3, accessPrefix=''):
+    """
+    Iterator that simplifies the access to field data by automatically converting from waLBerla fields to
+    numpy arrays
+    :param blocks: waLBerla block data structure
+    :param ghostLayers: how many ghost layers to include (outer and inner)
+    :param dim: waLBerlas block data structure is 3D - 2D domains can be done by setting zSize=1
+                if dim=2 is set here, the third coordinate of the returned fields is accessed at z=0 automatically
+    :param accessPrefix: see documentation of slicedBlockIteration
+    """
+    for block in blocks:
+        cellInterval = blocks.getBlockCellBB(block)
+        cellInterval.expand(ghostLayers)
+        localSlice = [slice(0, w, None) for w in cellInterval.size]
+        if dim == 2:
+            localSlice[2] = ghostLayers
+        yield ParallelBlock(block, cellInterval.min, localSlice, ghostLayers, accessPrefix)
 
 
-def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, sliceNormalizationGhostLayers=1):
+def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, outerGhostLayers=1, dim=3, accessPrefix=''):
     """
     Iterates of all blocks that have an intersection with the given slice object.
-    For these blocks a BlockIterationInfo object is yielded
+    For these blocks a Block object is yielded
     
     :param blocks: waLBerla block data structure
-    :param sliceObj: a slice (i.e. rectangular subregion), can be created with makeSlice[]
+    :param sliceObj: a slice (i.e. rectangular sub-region), can be created with makeSlice[]
     :param innerGhostLayers: how many ghost layers are included in the local slice and the optional index arrays
-    :param sliceNormalizationGhostLayers: slices can have relative coordinates e.g. makeSlice[0.2, :, :]
-                                          when computing absolute values, the domain size is needed. This parameter 
-                                          specifies how many ghost layers are taken into account for this operation.
-
+    :param outerGhostLayers: slices can have relative coordinates e.g. makeSlice[0.2, :, :]
+                             when computing absolute values, the domain size is needed. This parameter
+                             specifies how many ghost layers are taken into account for this operation.
+    :param dim: set to 2 for pseudo 2D simulation (i.e. where z coordinate of blocks has extent 1)
+                the arrays returned when indexing the block
+    :param accessPrefix: when accessing block data, this prefix is prepended to the access name
+                         mostly used to switch between CPU and GPU field access (gpu fields are added with a
+                         certain prefix 'gpu_')
     Example: assume no slice is given, then sliceNormalizationGhostLayers effectively sets how much ghost layers
     at the border of the domain are included. The innerGhostLayers parameter specifies how many inner ghost layers are
     included
@@ -62,10 +51,10 @@ def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, sliceNormali
         sliceObj = [slice(None, None, None)] * 3
 
     domainCellBB = blocks.getDomainCellBB()
-    domainExtent = [s + 2 * sliceNormalizationGhostLayers for s in domainCellBB.size]
+    domainExtent = [s + 2 * outerGhostLayers for s in domainCellBB.size]
     sliceObj = normalizeSlice(sliceObj, domainExtent)
     targetCellBB = wlb.CellInterval.fromSlice(sliceObj)
-    targetCellBB.shift(*[a - sliceNormalizationGhostLayers for a in domainCellBB.min])
+    targetCellBB.shift(*[a - outerGhostLayers for a in domainCellBB.min])
 
     for block in blocks:
         intersection = blocks.getBlockCellBB(block).getExpanded(innerGhostLayers)
@@ -76,5 +65,75 @@ def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, sliceNormali
         localTargetBB = blocks.transformGlobalToLocal(block, intersection)
         localTargetBB.shift(innerGhostLayers, innerGhostLayers, innerGhostLayers)
         localSlice = localTargetBB.toSlice(False)
-        yield BlockIterationInfo(block, intersection.min, localSlice)
+        if dim == 2:
+            localSlice = (localSlice[0], localSlice[1], innerGhostLayers)
+        yield ParallelBlock(block, intersection.min, localSlice, innerGhostLayers, accessPrefix)
+
+
+class Block:
+    def __init__(self, offset, localSlice):
+        self._offset = offset
+        self._localSlice = localSlice
+
+    @property
+    def offset(self):
+        """Offset of the current block in global coordinates (where lower ghost layers have negative indices)"""
+        return self._offset
+
+    @property
+    def cellIndexArrays(self):
+        """Global coordinate meshgrid of cell coordinates. Cell indices start at 0 at the first inner cell,
+        lower ghost layers have negative indices"""
+        meshGridParams = [offset + np.arange(width, dtype=np.int32)
+                          for offset, width in zip(self.offset, self.shape)]
+        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
+
+    @property
+    def midpointArrays(self):
+        """Global coordinate meshgrid of cell midpoints which are shifted by 0.5 compared to cell indices"""
+        meshGridParams = [offset + 0.5 + np.arange(width, dtype=float)
+                          for offset, width in zip(self.offset, self.shape)]
+        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
+
+    @property
+    def shape(self):
+        """Shape of the fields (potentially including ghost layers)"""
+        return tuple(s.stop - s.start for s in self._localSlice)
+
+
+# ----------------------------- Implementation details -----------------------------------------------------------------
+
+
+class SerialBlock(Block):
+    """Simple mockup block that is used if SerialDataHandling."""
+    def __init__(self, fieldDict, offset, localSlice):
+        super(SerialBlock, self).__init__(offset, localSlice)
+        self._fieldDict = fieldDict
+
+    def __getitem__(self, dataName):
+        return self._fieldDict[dataName][self._localSlice]
+
+
+class ParallelBlock(Block):
+    def __init__(self, block, offset, localSlice, innerGhostLayers, namePrefix):
+        super(ParallelBlock, self).__init__(offset, localSlice)
+        self._block = block
+        self._gls = innerGhostLayers
+        self._namePrefix = namePrefix
+
+    def __getitem__(self, dataName):
+        result = self._block[self._namePrefix + dataName]
+        typeName = type(result).__name__
+        if typeName == 'GhostLayerField':
+            result = wlb.field.toArray(result, withGhostLayers=self._gls)
+            result = self._normalizeArrayShape(result)
+        elif typeName == 'GpuField':
+            result = wlb.cuda.toGpuArray(result, withGhostLayers=self._gls)
+            result = self._normalizeArrayShape(result)
+        return result
+
+    def _normalizeArrayShape(self, arr):
+        if arr.shape[-1] == 1:
+            arr = arr[..., 0]
+        return arr[self._localSlice]
 
diff --git a/parallel/datahandling.py b/parallel/datahandling.py
index 3052ef9680e66011b6a75f5f42d6d871e051b097..679f37a9b0be9487d9d6895abfb42bf0ecd0167d 100644
--- a/parallel/datahandling.py
+++ b/parallel/datahandling.py
@@ -1,7 +1,7 @@
 import numpy as np
 from pystencils import Field, makeSlice
 from pystencils.datahandling import DataHandling
-from pystencils.parallel.blockiteration import slicedBlockIteration
+from pystencils.parallel.blockiteration import slicedBlockIteration, blockIteration
 from pystencils.utils import DotDict
 import waLBerla as wlb
 
@@ -44,6 +44,9 @@ class ParallelDataHandling(DataHandling):
     def fields(self):
         return self._fields
 
+    def ghostLayersOfField(self, name):
+        return self._fieldInformation[name]['ghostLayers']
+
     def addCustomData(self, name, cpuCreationFunction,
                       gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None):
         self.blocks.addBlockData(name, cpuCreationFunction)
@@ -53,7 +56,8 @@ class ParallelDataHandling(DataHandling):
                 raise ValueError("For GPU data, both transfer functions have to be specified")
             self._customDataTransferFunctions[name] = (cpuToGpuTransferFunc, gpuToCpuTransferFunc)
 
-    def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False):
+    def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None,
+                 layout=None, cpu=True, gpu=False):
         if ghostLayers is None:
             ghostLayers = self.defaultGhostLayers
         if layout is None:
@@ -112,31 +116,15 @@ class ParallelDataHandling(DataHandling):
         for block in self.blocks:
             block[name1].swapDataPointers(block[name2])
 
-    def accessArray(self, name, sliceObj=None, innerGhostLayers='all', outerGhostLayers='all'):
-        fieldInfo = self._fieldInformation[name]
-        with self.accessWrapper(name):
-            if innerGhostLayers is 'all' or innerGhostLayers is True:
-                innerGhostLayers = fieldInfo['ghostLayers']
-            elif innerGhostLayers is False:
-                innerGhostLayers = 0
-            if outerGhostLayers is 'all' or innerGhostLayers is True:
-                outerGhostLayers = fieldInfo['ghostLayers']
-            elif outerGhostLayers is False:
-                outerGhostLayers = 0
-            glAccess = [innerGhostLayers, innerGhostLayers, 0 if self.dim == 2 else innerGhostLayers]
-            for iterInfo in slicedBlockIteration(self.blocks, sliceObj, innerGhostLayers, outerGhostLayers):
-                arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=glAccess)[iterInfo.localSlice]
-                arr = self._normalizeArrShape(arr, self.fields[name].indexDimensions)
-                yield arr, iterInfo
-
-    def accessCustomData(self, name):
-        with self.accessWrapper(name):
-            for block in self.blocks:
-                data = block[name]
-                cellBB = self.blocks.getBlockCellBB(block)
-                min = cellBB.min[:self.dim]
-                max = tuple(e + 1 for e in cellBB.max[:self.dim])
-                yield data, (min, max)
+    def iterate(self, sliceObj=None, gpu=False, ghostLayers=None):
+        if ghostLayers is None:
+            ghostLayers = self.defaultGhostLayers
+        prefix = self.GPU_DATA_PREFIX if gpu else ""
+        if sliceObj is None:
+            yield from slicedBlockIteration(self.blocks, sliceObj, ghostLayers, ghostLayers,
+                                            self.dim, prefix)
+        else:
+            yield from blockIteration(self.blocks, ghostLayers, self.dim, prefix)
 
     def gatherArray(self, name, sliceObj=None, allGather=False):
         with self.accessWrapper(name):