diff --git a/datahandling.py b/datahandling.py
new file mode 100644
index 0000000000000000000000000000000000000000..b64300ac87cd5bde97f8916711c3b88f52cba67e
--- /dev/null
+++ b/datahandling.py
@@ -0,0 +1,236 @@
+import numpy as np
+from abc import ABC, abstractmethod, abstractproperty
+from pystencils import Field, makeSlice
+from pystencils.parallel.blockiteration import BlockIterationInfo
+from pystencils.slicing import normalizeSlice, removeGhostLayers
+from pystencils.utils import DotDict
+
+try:
+    import pycuda.gpuarray as gpuarray
+except ImportError:
+    gpuarray = None
+
+
+class DataHandling(ABC):
+    """
+    Manages the storage of arrays and maps them to a symbolic field.
+    Two versions are available: a simple, pure Python implementation for single node
+    simulations :py:class:SerialDataHandling and a distributed version using waLBerla in :py:class:ParallelDataHandling
+
+    Keep in mind that the data can be distributed, so use the 'access' method whenever possible and avoid the
+    'gather' function that has collects (parts of the) distributed data on a single process.
+    """
+
+    # ---------------------------- Adding and accessing data -----------------------------------------------------------
+
+    @property
+    @abstractmethod
+    def dim(self):
+        """Dimension of the domain, either 2 or 3"""
+        pass
+
+    @abstractmethod
+    def add(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False):
+        """
+        Adds a (possibly distributed) array to the handling that can be accessed using the given name.
+        For each array a symbolic field is available via the 'fields' dictionary
+
+        :param name: unique name that is used to access the field later
+        :param fSize: shape of the dim+1 coordinate. DataHandling supports zero or one index dimensions, i.e. scalar
+                      fields and vector fields. This parameter gives the shape of the index dimensions. The default
+                      value of 1 means no index dimension
+        :param dtype: data type of the array as numpy data type
+        :param latexName: optional, name of the symbolic field, if not given 'name' is used
+        :param ghostLayers: number of ghost layers - if not specified a default value specified in the constructor
+                            is used
+        :param layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
+                       this is only important if fSize > 1
+        :param cpu: allocate field on the CPU
+        :param gpu: allocate field on the GPU
+        """
+        pass
+
+    @abstractmethod
+    def addLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False):
+        """
+        Adds an array with the same parameters (number of ghost layers, fSize, dtype) as existing array
+        :param name: name of new array
+        :param nameOfTemplateField: name of array that is used as template
+        :param latexName: see 'add' method
+        :param cpu: see 'add' method
+        :param gpu: see 'add' method
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def fields(self):
+        """Dictionary mapping data name to symbolic pystencils field - use this to create pystencils kernels"""
+        pass
+
+    @abstractmethod
+    def access(self, name, sliceObj=None, innerGhostLayers=None, outerGhostLayers=0):
+        """
+        Generator yielding locally stored sub-arrays together with information about their place in the global domain
+
+        :param name: name of data to access
+        :param sliceObj: optional rectangular sub-region to access
+        :param innerGhostLayers: how many inner (not at domain border) ghost layers to include
+        :param outerGhostLayers: how many ghost layers at the domain border to include
+        Yields a numpy array with local part of data and a BlockIterationInfo object containing geometric information
+        """
+        pass
+
+    @abstractmethod
+    def gather(self, name, sliceObj=None, allGather=False):
+        """
+        Gathers part of the domain on a local process. Whenever possible use 'access' instead, since this method copies
+        the distributed data to a single process which is inefficient and may exhaust the available memory
+
+        :param name: name of the array to gather
+        :param sliceObj: slice expression of the rectangular sub-part that should be gathered
+        :param allGather: if False only the root process receives the result, if True all processes
+        :return: generator expression yielding the gathered field, the gathered field does not include any ghost layers
+        """
+        pass
+
+    # ------------------------------- CPU/GPU transfer -----------------------------------------------------------------
+
+    @abstractmethod
+    def toCpu(self, name):
+        """Copies GPU data of array with specified name to CPU.
+        Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method"""
+        pass
+
+    @abstractmethod
+    def toGpu(self, name):
+        """Copies GPU data of array with specified name to GPU.
+        Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method"""
+        pass
+
+    @abstractmethod
+    def allToCpu(self, name):
+        """Copies data from GPU to CPU for all arrays that have a CPU and a GPU representation"""
+        pass
+
+    @abstractmethod
+    def allToGpu(self, name):
+        """Copies data from CPU to GPU for all arrays that have a CPU and a GPU representation"""
+        pass
+
+
+class SerialDataHandling(DataHandling):
+
+    class _PassThroughContextManager:
+        def __init__(self, arr):
+            self.arr = arr
+
+        def __enter__(self, *args, **kwargs):
+            return self.arr
+
+    def __init__(self, domainSize, defaultGhostLayers=1, defaultLayout='SoA'):
+        """
+        Creates a data handling for single node simulations
+
+        :param domainSize: size of the spatial domain as tuple
+        :param defaultGhostLayers: nr of ghost layers used if not specified in add() method
+        :param defaultLayout: layout used if no layout is given to add() method
+        """
+        self._domainSize = tuple(domainSize)
+        self.defaultGhostLayers = defaultGhostLayers
+        self.defaultLayout = defaultLayout
+        self._fields = DotDict()
+        self.cpuArrays = DotDict()
+        self.gpuArrays = DotDict()
+        self._fieldInformation = {}
+
+    @property
+    def dim(self):
+        return len(self._domainSize)
+
+    @property
+    def fields(self):
+        return self._fields
+
+    def add(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False):
+        if ghostLayers is None:
+            ghostLayers = self.defaultGhostLayers
+        if layout is None:
+            layout = self.defaultLayout
+        if latexName is None:
+            latexName = name
+
+        assert layout in ('SoA', 'AoS')
+
+        kwargs = {
+            'shape': tuple(s + 2 * ghostLayers for s in self._domainSize),
+            'dtype': dtype,
+            'order': 'c' if layout == 'AoS' else 'f',
+        }
+        self._fieldInformation[name] = {
+            'ghostLayers': ghostLayers,
+            'fSize': fSize,
+            'layout': layout,
+            'dtype': dtype,
+        }
+
+        if fSize > 1:
+            kwargs['shape'] = kwargs['shape'] + (fSize,)
+            indexDimensions = 1
+        else:
+            indexDimensions = 0
+        if cpu:
+            if name in self.cpuArrays:
+                raise ValueError("CPU Field with this name already exists")
+            self.cpuArrays[name] = np.empty(**kwargs)
+        if gpu:
+            if name in self.gpuArrays:
+                raise ValueError("GPU Field with this name already exists")
+
+            self.gpuArrays[name] = gpuarray.empty(**kwargs)
+
+        assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists"
+        self.fields[name] = Field.createFixedSize(latexName, shape=kwargs['shape'], indexDimensions=indexDimensions,
+                                                  dtype=kwargs['dtype'], layout=kwargs['order'])
+
+    def addLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False):
+        self.add(name,latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField])
+
+    def access(self, name, sliceObj=None, outerGhostLayers=0, **kwargs):
+        if sliceObj is None:
+            sliceObj = [slice(None, None)] * self.dim
+        arr = self.cpuArrays[name]
+        glToRemove = self._fieldInformation[name]['ghostLayers'] - outerGhostLayers
+        assert glToRemove >= 0
+        arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=glToRemove)
+        sliceObj = normalizeSlice(sliceObj, arr.shape[:self.dim])
+        yield arr[sliceObj], BlockIterationInfo(None, tuple(s.start for s in sliceObj), sliceObj)
+
+    def gather(self, name, sliceObj=None, **kwargs):
+        gls = self._fieldInformation[name]['ghostLayers']
+        arr = self.cpuArrays[name]
+        arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=gls)
+
+        if sliceObj is not None:
+            arr = arr[sliceObj]
+        yield arr
+
+    def swap(self, name1, name2, gpu=False):
+        if not gpu:
+            self.cpuArrays[name1], self.cpuArrays[name2] = self.cpuArrays[name2], self.cpuArrays[name1]
+        else:
+            self.gpuArrays[name1], self.gpuArrays[name2] = self.gpuArrays[name2], self.gpuArrays[name1]
+
+    def allToCpu(self):
+        for name in self.cpuArrays.keys() & self.gpuArrays.keys():
+            self.toCpu(name)
+
+    def allToGpu(self):
+        for name in self.cpuArrays.keys() & self.gpuArrays.keys():
+            self.toGpu(name)
+
+    def toCpu(self, name):
+        self.gpuArrays[name].get(self.cpuArrays[name])
+
+    def toGpu(self, name):
+        self.gpuArrays[name].set(self.cpuArrays[name])
diff --git a/field.py b/field.py
index 5bced7f0e96d8f98a6a6812cc4e9e8ca5036c79a..a3ffc22bf9709a8560e9514af2bc129d4de46c66 100644
--- a/field.py
+++ b/field.py
@@ -446,18 +446,18 @@ def spatialLayoutStringToTuple(layoutStr, dim):
         assert dim <= 3
         return tuple(reversed(range(dim)))
 
-    if layoutStr == "fzyx" or layoutStr == 'f' or layoutStr == 'reverseNumpy':
+    if layoutStr in ('fzyx', 'f', 'reverseNumpy', 'SoA'):
         return tuple(reversed(range(dim)))
-    elif layoutStr == 'c' or layoutStr == 'numpy':
+    elif layoutStr in ('c', 'numpy', 'AoS'):
         return tuple(range(dim))
     raise ValueError("Unknown layout descriptor " + layoutStr)
 
 
 def layoutStringToTuple(layoutStr, dim):
-    if layoutStr == 'fzyx':
+    if layoutStr == 'fzyx' or layoutStr == 'SoA':
         assert dim <= 4
         return tuple(reversed(range(dim)))
-    elif layoutStr == 'zyxf':
+    elif layoutStr == 'zyxf' or layoutStr == 'AoS':
         assert dim <= 4
         return tuple(reversed(range(dim - 1))) + (dim-1,)
     elif layoutStr == 'f' or layoutStr == 'reverseNumpy':
diff --git a/parallel/__init__.py b/parallel/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/parallel/blockiteration.py b/parallel/blockiteration.py
new file mode 100644
index 0000000000000000000000000000000000000000..0742f6c61621cab0966fb101241f78c495fc6ef0
--- /dev/null
+++ b/parallel/blockiteration.py
@@ -0,0 +1,80 @@
+import numpy as np
+import waLBerla as wlb
+from pystencils.slicing import normalizeSlice
+
+
+class BlockIterationInfo:
+    def __init__(self, block, offset, localSlice):
+        self._block = block
+        self._offset = offset
+        self._localSlice = localSlice
+
+    @property
+    def block(self):
+        return self._block
+
+    @property
+    def offset(self):
+        return self._offset
+
+    @property
+    def shape(self):
+        return tuple(s.stop - s.start for s in self._localSlice)
+
+    @property
+    def localSlice(self):
+        """Slice object of intersection between current block and iteration interval in local coordinates"""
+        return self._localSlice
+
+    @property
+    def midpointArrays(self):
+        """Global coordinate meshgrid of cell midpoints which are shifted by 0.5 compared to cell indices"""
+        meshGridParams = [offset + 0.5 + np.arange(width, dtype=float)
+                          for offset, width in zip(self.offset, self.shape)]
+        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
+
+    @property
+    def cellIndexArrays(self):
+        """Global coordinate meshgrid of cell coordinates. Cell indices start at 0 at the first inner cell,
+        ghost layers have negative indices"""
+        meshGridParams = [offset + np.arange(width, dtype=np.int32)
+                          for offset, width in zip(self.offset, self.shape)]
+        return np.meshgrid(*meshGridParams, indexing='ij', copy=False)
+
+
+def slicedBlockIteration(blocks, sliceObj=None, innerGhostLayers=1, sliceNormalizationGhostLayers=1):
+    """
+    Iterates of all blocks that have an intersection with the given slice object.
+    For these blocks a BlockIterationInfo object is yielded
+    
+    :param blocks: waLBerla block data structure
+    :param sliceObj: a slice (i.e. rectangular subregion), can be created with makeSlice[]
+    :param innerGhostLayers: how many ghost layers are included in the local slice and the optional index arrays
+    :param sliceNormalizationGhostLayers: slices can have relative coordinates e.g. makeSlice[0.2, :, :]
+                                          when computing absolute values, the domain size is needed. This parameter 
+                                          specifies how many ghost layers are taken into account for this operation.
+
+    Example: assume no slice is given, then sliceNormalizationGhostLayers effectively sets how much ghost layers
+    at the border of the domain are included. The innerGhostLayers parameter specifies how many inner ghost layers are
+    included
+    """
+    if sliceObj is None:
+        sliceObj = [slice(None, None, None)] * 3
+
+    domainCellBB = blocks.getDomainCellBB()
+    domainExtent = [s + 2 * sliceNormalizationGhostLayers for s in domainCellBB.size]
+    sliceObj = normalizeSlice(sliceObj, domainExtent)
+    targetCellBB = wlb.CellInterval.fromSlice(sliceObj)
+    targetCellBB.shift(*[a - sliceNormalizationGhostLayers for a in domainCellBB.min])
+
+    for block in blocks:
+        intersection = blocks.getBlockCellBB(block).getExpanded(innerGhostLayers)
+        intersection.intersect(targetCellBB)
+        if intersection.empty():
+            continue
+
+        localTargetBB = blocks.transformGlobalToLocal(block, intersection)
+        localTargetBB.shift(innerGhostLayers, innerGhostLayers, innerGhostLayers)
+        localSlice = localTargetBB.toSlice(False)
+        yield BlockIterationInfo(block, intersection.min, localSlice)
+
diff --git a/parallel/datahandling.py b/parallel/datahandling.py
new file mode 100644
index 0000000000000000000000000000000000000000..7aa029f16404e9494f6a3d7a5f5eccf9c9968e2a
--- /dev/null
+++ b/parallel/datahandling.py
@@ -0,0 +1,130 @@
+import numpy as np
+from pystencils import Field, makeSlice
+from pystencils.datahandling import DataHandling
+from pystencils.parallel.blockiteration import slicedBlockIteration
+from pystencils.utils import DotDict
+import waLBerla as wlb
+
+
+class ParallelDataHandling(DataHandling):
+    GPU_DATA_PREFIX = "gpu_"
+
+    def __init__(self, blocks, defaultGhostLayers=1, defaultLayout='SoA', dim=3):
+        """
+        Creates data handling based on waLBerla block storage
+
+        :param blocks: waLBerla block storage
+        :param defaultGhostLayers: nr of ghost layers used if not specified in add() method
+        :param defaultLayout: layout used if no layout is given to add() method
+        :param dim: dimension of scenario,
+                    waLBerla always uses three dimensions, so if dim=2 the extend of the
+                    z coordinate of blocks has to be 1
+        """
+        assert dim in (2, 3)
+        self.blocks = blocks
+        self.defaultGhostLayers = defaultGhostLayers
+        self.defaultLayout = defaultLayout
+        self._fields = DotDict()  # maps name to symbolic pystencils field
+        self.dataNames = set()
+        self._dim = dim
+        self._fieldInformation = {}
+        self._cpuGpuPairs = []
+        if self._dim == 2:
+            assert self.blocks.getDomainCellBB().size[2] == 1
+
+    @property
+    def dim(self):
+        return self._dim
+
+    @property
+    def fields(self):
+        return self._fields
+
+    def add(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False):
+        if ghostLayers is None:
+            ghostLayers = self.defaultGhostLayers
+        if layout is None:
+            layout = self.defaultLayout
+        if latexName is None:
+            latexName = name
+        if len(self.blocks) == 0:
+            raise ValueError("Data handling expects that each process has at least one block")
+        if hasattr(dtype, 'type'):
+            dtype = dtype.type
+        if name in self.blocks[0] or self.GPU_DATA_PREFIX + name in self.blocks[0]:
+            raise ValueError("Data with this name has already been added")
+
+        self._fieldInformation[name] = {'ghostLayers': ghostLayers,
+                                        'fSize': fSize,
+                                        'layout': layout,
+                                        'dtype': dtype}
+
+        layoutMap = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf,
+                     'SoA': wlb.field.Layout.fzyx,  'AoS': wlb.field.Layout.zyxf}
+        if cpu:
+            wlb.field.addToStorage(self.blocks, name, dtype, fSize=fSize, layout=layoutMap[layout],
+                                   ghostLayers=ghostLayers)
+        if gpu:
+            wlb.cuda.addGpuFieldToStorage(self.blocks, self.GPU_DATA_PREFIX+name, dtype, fSize=fSize,
+                                          usePitchedMem=False, ghostLayers=ghostLayers, layout=layoutMap[layout])
+
+        if cpu and gpu:
+            self._cpuGpuPairs.append((name, self.GPU_DATA_PREFIX + name))
+
+        blockBB = self.blocks.getBlockCellBB(self.blocks[0])
+        shape = tuple(s + 2 * ghostLayers for s in blockBB.size)
+        indexDimensions = 1 if fSize > 1 else 0
+        if indexDimensions == 1:
+            shape += (fSize, )
+
+        assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists"
+        self.fields[name] = Field.createFixedSize(latexName, shape, indexDimensions, dtype, layout)
+
+    def addLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False):
+        self.add(name,latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField])
+
+    def swap(self, name1, name2, gpu=False):
+        if gpu:
+            name1 = self.GPU_DATA_PREFIX + name1
+            name2 = self.GPU_DATA_PREFIX + name2
+        for block in self.blocks:
+            block[name1].swapDataPointers(block[name2])
+
+    def access(self, name, sliceObj=None, innerGhostLayers=None, outerGhostLayers=0):
+        if innerGhostLayers is None:
+            innerGhostLayers = self._fieldInformation[name]['ghostLayers']
+
+        if outerGhostLayers is None:
+            outerGhostLayers = self._fieldInformation[name]['ghostLayers']
+
+        for iterInfo in slicedBlockIteration(self.blocks, sliceObj, innerGhostLayers, outerGhostLayers):
+            arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=innerGhostLayers)[iterInfo.localSlice]
+            if self.fields[name].indexDimensions == 0:
+                arr = arr[..., 0]
+            if self.dim == 2:
+                arr = arr[:, :, 0]
+            yield arr, iterInfo
+
+    def gather(self, name, sliceObj=None, allGather=False):
+        if sliceObj is None:
+            sliceObj = makeSlice[:, :, :]
+        for array in wlb.field.gatherGenerator(self.blocks, name, sliceObj, allGather):
+            if self.fields[name].indexDimensions == 0:
+                array = array[..., 0]
+            if self.dim == 2:
+                array = array[:, :, 0]
+            yield array
+
+    def toCpu(self, name):
+        wlb.cuda.copyFieldToCpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
+
+    def toGpu(self, name):
+        wlb.cuda.copyFieldToGpu(self.blocks, self.GPU_DATA_PREFIX + name, name)
+
+    def allToCpu(self):
+        for cpuName, gpuName in self._cpuGpuPairs:
+            wlb.cuda.copyFieldToCpu(self.blocks, gpuName, cpuName)
+
+    def allToGpu(self):
+        for cpuName, gpuName in self._cpuGpuPairs:
+            wlb.cuda.copyFieldToGpu(self.blocks, gpuName, cpuName)
diff --git a/slicing.py b/slicing.py
index beedcc1f707e7afed1aeab1847df2036d728416b..f22ad411bc3e6a7ad9e11656d03031f988c29258 100644
--- a/slicing.py
+++ b/slicing.py
@@ -108,6 +108,8 @@ def sliceFromDirection(directionName, dim, normalOffset=0, tangentialOffset=0):
 
 
 def removeGhostLayers(arr, indexDimensions=0, ghostLayers=1):
+    if ghostLayers <= 0:
+        return arr
     dimensions = len(arr.shape)
     spatialDimensions = dimensions - indexDimensions
     indexing = [slice(ghostLayers, -ghostLayers, None), ] * spatialDimensions