diff --git a/cpu/cpujit.py b/cpu/cpujit.py
index 7fcfab610c353bcd1c0fc7c55f204447524d0493..7f36c88c37a11a7e6697e4b1949152c3b4dfeb53 100644
--- a/cpu/cpujit.py
+++ b/cpu/cpujit.py
@@ -440,4 +440,5 @@ def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict, func):
             cacheValues.append(kwargs)  # keep objects alive such that ids remain unique
             func(*args)
     wrapper.ast = kernelFunctionNode
+    wrapper.parameters = kernelFunctionNode.parameters
     return wrapper
diff --git a/datahandling.py b/datahandling.py
index 9a1411861aada30fadd32c181a476b8d88cbca29..e43c5f5743a612b4f09c85980882735b520afbcd 100644
--- a/datahandling.py
+++ b/datahandling.py
@@ -1,11 +1,9 @@
 import numpy as np
 from abc import ABC, abstractmethod
 
-from collections import defaultdict
-from contextlib import contextmanager
-
 from lbmpy.stencils import getStencil
 from pystencils import Field
+from pystencils.field import layoutStringToTuple, spatialLayoutStringToTuple, createNumpyArrayWithLayout
 from pystencils.parallel.blockiteration import Block, SerialBlock
 from pystencils.slicing import normalizeSlice, removeGhostLayers
 from pystencils.utils import DotDict
@@ -26,10 +24,6 @@ class DataHandling(ABC):
     'gather' function that has collects (parts of the) distributed data on a single process.
     """
 
-    def __init__(self):
-        self._preAccessFunctions = defaultdict(list)
-        self._postAccessFunctions = defaultdict(list)
-
     # ---------------------------- Adding and accessing data -----------------------------------------------------------
 
     @property
@@ -87,6 +81,13 @@ class DataHandling(ABC):
         :return:
         """
 
+    def addCustomClass(self, name, classObj, cpu=True, gpu=False):
+        self.addCustomData(name,
+                           cpuCreationFunction=classObj if cpu else None,
+                           gpuCreationFunction=classObj if gpu else None,
+                           cpuToGpuTransferFunc=classObj.toGpu if cpu and gpu and hasattr(classObj, 'toGpu') else None,
+                           gpuToCpuTransferFunc=classObj.toCpu if cpu and gpu and hasattr(classObj, 'toCpu') else None)
+
     @property
     @abstractmethod
     def fields(self):
@@ -122,19 +123,6 @@ class DataHandling(ABC):
         the DataHandling
         """
 
-    def registerPreAccessFunction(self, name, fct):
-        self._preAccessFunctions[name].append(fct)
-
-    def registerPostAccessFunction(self, name, fct):
-        self._postAccessFunctions[name].append(fct)
-
-    @contextmanager
-    def accessWrapper(self, name):
-        for func in self._preAccessFunctions[name]:
-            func()
-        yield
-        for func in self._postAccessFunctions[name]:
-            func()
     # ------------------------------- CPU/GPU transfer -----------------------------------------------------------------
 
     @abstractmethod
@@ -235,12 +223,9 @@ class SerialDataHandling(DataHandling):
         if latexName is None:
             latexName = name
 
-        assert layout in ('SoA', 'AoS')
-
         kwargs = {
             'shape': tuple(s + 2 * ghostLayers for s in self._domainSize),
             'dtype': dtype,
-            'order': 'C' if layout == 'AoS' else 'F',
         }
         self._fieldInformation[name] = {
             'ghostLayers': ghostLayers,
@@ -252,50 +237,64 @@ class SerialDataHandling(DataHandling):
         if fSize > 1:
             kwargs['shape'] = kwargs['shape'] + (fSize,)
             indexDimensions = 1
+            layoutTuple = layoutStringToTuple(layout, self.dim+1)
         else:
             indexDimensions = 0
+            layoutTuple = spatialLayoutStringToTuple(layout, self.dim)
+
+        # cpuArr is always created - since there is no createPycudaArrayWithLayout()
+        cpuArr = createNumpyArrayWithLayout(layout=layoutTuple, **kwargs)
         if cpu:
             if name in self.cpuArrays:
                 raise ValueError("CPU Field with this name already exists")
-            self.cpuArrays[name] = np.empty(**kwargs)
+            self.cpuArrays[name] = cpuArr
         if gpu:
             if name in self.gpuArrays:
                 raise ValueError("GPU Field with this name already exists")
-
-            self.gpuArrays[name] = gpuarray.empty(**kwargs)
+            self.gpuArrays[name] = gpuarray.to_gpu(cpuArr)
 
         assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists"
         self.fields[name] = Field.createFixedSize(latexName, shape=kwargs['shape'], indexDimensions=indexDimensions,
-                                                  dtype=kwargs['dtype'], layout=kwargs['order'])
+                                                  dtype=kwargs['dtype'], layout=layout)
         self._fieldLatexNameToDataName[latexName] = name
 
     def addCustomData(self, name, cpuCreationFunction,
                       gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None):
-        assert name not in self.cpuArrays
-        assert name not in self.customDataCpu
-        self.customDataCpu[name] = cpuCreationFunction()
-        if gpuCreationFunction:
-            self.customDataGpu[name] = gpuCreationFunction()
+
+        if cpuCreationFunction and gpuCreationFunction:
             if cpuToGpuTransferFunc is None or gpuToCpuTransferFunc is None:
                 raise ValueError("For GPU data, both transfer functions have to be specified")
             self._customDataTransferFunctions[name] = (cpuToGpuTransferFunc, gpuToCpuTransferFunc)
 
+        assert name not in self.customDataCpu
+        if cpuCreationFunction:
+            assert name not in self.cpuArrays
+            self.customDataCpu[name] = cpuCreationFunction()
+
+        if gpuCreationFunction:
+            assert name not in self.gpuArrays
+            self.customDataGpu[name] = gpuCreationFunction()
+
     def hasData(self, name):
         return name in self.fields
 
     def addArrayLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False):
         self.addArray(name, latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField])
 
-    def iterate(self, sliceObj=None, gpu=False, ghostLayers=None):
-        if ghostLayers is None:
+    def iterate(self, sliceObj=None, gpu=False, ghostLayers=True):
+        if ghostLayers is True:
             ghostLayers = self.defaultGhostLayers
+        elif ghostLayers is False:
+            ghostLayers = 0
+
         if sliceObj is None:
             sliceObj = (slice(None, None, None),) * self.dim
         sliceObj = normalizeSlice(sliceObj, tuple(s + 2 * ghostLayers for s in self._domainSize))
         sliceObj = tuple(s if type(s) is slice else slice(s, s+1, None) for s in sliceObj)
 
         arrays = self.gpuArrays if gpu else self.cpuArrays
-        iterDict = {}
+        customDataDict = self.customDataGpu if gpu else self.customDataCpu
+        iterDict = customDataDict.copy()
         for name, arr in arrays.items():
             fieldGls = self._fieldInformation[name]['ghostLayers']
             if fieldGls < ghostLayers:
@@ -307,14 +306,13 @@ class SerialDataHandling(DataHandling):
         yield SerialBlock(iterDict, offset, sliceObj)
 
     def gatherArray(self, name, sliceObj=None, **kwargs):
-        with self.accessWrapper(name):
-            gls = self._fieldInformation[name]['ghostLayers']
-            arr = self.cpuArrays[name]
-            arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=gls)
+        gls = self._fieldInformation[name]['ghostLayers']
+        arr = self.cpuArrays[name]
+        arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=gls)
 
-            if sliceObj is not None:
-                arr = arr[sliceObj]
-            yield arr
+        if sliceObj is not None:
+            arr = arr[sliceObj]
+        yield arr
 
     def swap(self, name1, name2, gpu=False):
         if not gpu:
@@ -332,7 +330,7 @@ class SerialDataHandling(DataHandling):
 
     def runKernel(self, kernelFunc, *args, **kwargs):
         dataUsedInKernel = [self._fieldLatexNameToDataName[p.fieldName]
-                            for p in kernelFunc.ast.parameters if p.isFieldPtrArgument]
+                            for p in kernelFunc.parameters if p.isFieldPtrArgument]
         arrays = self.gpuArrays if kernelFunc.ast.backend == 'gpucuda' else self.cpuArrays
         arrayParams = {name: arrays[name] for name in dataUsedInKernel}
         arrayParams.update(kwargs)
diff --git a/field.py b/field.py
index 410952c492e025f3db17998619ad5bc7fb7815bb..def8e0dd71b57988bd2b0da7e40c6c9213eb4210 100644
--- a/field.py
+++ b/field.py
@@ -444,7 +444,7 @@ def getLayoutOfArray(arr, indexDimensionIds=[]):
     return getLayoutFromStrides(arr.strides, indexDimensionIds)
 
 
-def createNumpyArrayWithLayout(shape, layout):
+def createNumpyArrayWithLayout(shape, layout, **kwargs):
     """
     Creates a numpy array with
     :param shape: shape of the resulting array
@@ -469,7 +469,7 @@ def createNumpyArrayWithLayout(shape, layout):
     for a, b in swaps:
         shape[a], shape[b] = shape[b], shape[a]
 
-    res = np.empty(shape, order='c')
+    res = np.empty(shape, order='c', **kwargs)
     for a, b in reversed(swaps):
         res = res.swapaxes(a, b)
     return res
diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py
index 96087f1a48ce3478d321a00e63afb4895f2fcf38..6f5633d5e99cb7ff751800f94e1cab1d4369b1c6 100644
--- a/gpucuda/cudajit.py
+++ b/gpucuda/cudajit.py
@@ -53,6 +53,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
             func(*args, **dictWithBlockAndThreadNumbers)
         #cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called
     wrapper.ast = kernelFunctionNode
+    wrapper.parameters = kernelFunctionNode.parameters
     return wrapper
 
 
diff --git a/parallel/blockiteration.py b/parallel/blockiteration.py
index 0253681286b98007304186be77e880514a39c532..e1eef994d78820a77d2386758a175cb5bb12b854 100644
--- a/parallel/blockiteration.py
+++ b/parallel/blockiteration.py
@@ -111,7 +111,10 @@ class SerialBlock(Block):
         self._fieldDict = fieldDict
 
     def __getitem__(self, dataName):
-        return self._fieldDict[dataName][self._localSlice]
+        result = self._fieldDict[dataName]
+        if isinstance(result, np.ndarray):
+            result = result[self._localSlice]
+        return result
 
 
 class ParallelBlock(Block):
diff --git a/parallel/datahandling.py b/parallel/datahandling.py
index 679f37a9b0be9487d9d6895abfb42bf0ecd0167d..3f6205e8fc9194d03e47dda8ee5cb95c46b8bad8 100644
--- a/parallel/datahandling.py
+++ b/parallel/datahandling.py
@@ -49,13 +49,16 @@ class ParallelDataHandling(DataHandling):
 
     def addCustomData(self, name, cpuCreationFunction,
                       gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None):
-        self.blocks.addBlockData(name, cpuCreationFunction)
-        if gpuCreationFunction:
-            self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpuCreationFunction)
+        if cpuCreationFunction and gpuCreationFunction:
             if cpuToGpuTransferFunc is None or gpuToCpuTransferFunc is None:
                 raise ValueError("For GPU data, both transfer functions have to be specified")
             self._customDataTransferFunctions[name] = (cpuToGpuTransferFunc, gpuToCpuTransferFunc)
 
+        if cpuCreationFunction:
+            self.blocks.addBlockData(name, cpuCreationFunction)
+        if gpuCreationFunction:
+            self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpuCreationFunction)
+
     def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None,
                  layout=None, cpu=True, gpu=False):
         if ghostLayers is None:
@@ -77,6 +80,7 @@ class ParallelDataHandling(DataHandling):
                                         'dtype': dtype}
 
         layoutMap = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
+                     'f': wlb.field.Layout.fzyx,
                      'SoA': wlb.field.Layout.fzyx,  'AoS': wlb.field.Layout.zyxf}
 
         if cpu:
@@ -116,9 +120,12 @@ class ParallelDataHandling(DataHandling):
         for block in self.blocks:
             block[name1].swapDataPointers(block[name2])
 
-    def iterate(self, sliceObj=None, gpu=False, ghostLayers=None):
-        if ghostLayers is None:
+    def iterate(self, sliceObj=None, gpu=False, ghostLayers=True):
+        if ghostLayers is True:
             ghostLayers = self.defaultGhostLayers
+        elif ghostLayers is False:
+            ghostLayers = 0
+
         prefix = self.GPU_DATA_PREFIX if gpu else ""
         if sliceObj is None:
             yield from slicedBlockIteration(self.blocks, sliceObj, ghostLayers, ghostLayers,