From c62868e7f61f8caf60eecf1a777c70fe499fd953 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Thu, 11 Jan 2018 16:53:02 +0100
Subject: [PATCH] Run kernels directly on data handlings

---
 astnodes.py              |  2 +-
 datahandling.py          | 31 ++++++++++++++++-------
 field.py                 | 11 ++++++---
 parallel/datahandling.py | 53 +++++++++++++++++++++++++++++-----------
 4 files changed, 70 insertions(+), 27 deletions(-)

diff --git a/astnodes.py b/astnodes.py
index cd39d26c1..252f4f250 100644
--- a/astnodes.py
+++ b/astnodes.py
@@ -166,7 +166,7 @@ class KernelFunction(Node):
         self.ghostLayers = ghostLayers
         # these variables are assumed to be global, so no automatic parameter is generated for them
         self.globalVariables = set()
-        self.backend = ""
+        self.backend = backend
 
     @property
     def symbolsDefined(self):
diff --git a/datahandling.py b/datahandling.py
index 9472c500f..38d3d1afd 100644
--- a/datahandling.py
+++ b/datahandling.py
@@ -1,11 +1,11 @@
 import numpy as np
-from abc import ABC, abstractmethod, abstractproperty
+from abc import ABC, abstractmethod
 
 from collections import defaultdict
 from contextlib import contextmanager
 
 from lbmpy.stencils import getStencil
-from pystencils import Field, makeSlice
+from pystencils import Field
 from pystencils.parallel.blockiteration import BlockIterationInfo
 from pystencils.slicing import normalizeSlice, removeGhostLayers
 from pystencils.utils import DotDict
@@ -122,11 +122,11 @@ class DataHandling(ABC):
         :return: generator expression yielding the gathered field, the gathered field does not include any ghost layers
         """
 
-    def registerPreAccessFunction(self, name, function):
-        self._preAccessFunctions[name].append(function)
+    def registerPreAccessFunction(self, name, fct):
+        self._preAccessFunctions[name].append(fct)
 
-    def registerPostAccessFunction(self, name, function):
-        self._postAccessFunctions[name].append(function)
+    def registerPostAccessFunction(self, name, fct):
+        self._postAccessFunctions[name].append(fct)
 
     @contextmanager
     def accessWrapper(self, name):
@@ -200,6 +200,7 @@ class SerialDataHandling(DataHandling):
         self.defaultGhostLayers = defaultGhostLayers
         self.defaultLayout = defaultLayout
         self._fields = DotDict()
+        self._fieldLatexNameToDataName = {}
         self.cpuArrays = DotDict()
         self.gpuArrays = DotDict()
         self.customDataCpu = DotDict()
@@ -222,7 +223,8 @@ class SerialDataHandling(DataHandling):
     def fields(self):
         return self._fields
 
-    def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False):
+    def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None,
+                 cpu=True, gpu=False):
         if ghostLayers is None:
             ghostLayers = self.defaultGhostLayers
         if layout is None:
@@ -262,6 +264,7 @@ class SerialDataHandling(DataHandling):
         assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists"
         self.fields[name] = Field.createFixedSize(latexName, shape=kwargs['shape'], indexDimensions=indexDimensions,
                                                   dtype=kwargs['dtype'], layout=kwargs['order'])
+        self._fieldLatexNameToDataName[latexName] = name
 
     def addCustomData(self, name, cpuCreationFunction,
                       gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None):
@@ -281,8 +284,10 @@ class SerialDataHandling(DataHandling):
         self.addArray(name, latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField])
 
     def accessArray(self, name, sliceObj=None, outerGhostLayers='all', **kwargs):
-        if outerGhostLayers == 'all':
+        if outerGhostLayers == 'all' or outerGhostLayers is True:
             outerGhostLayers = self._fieldInformation[name]['ghostLayers']
+        elif outerGhostLayers is False:
+            outerGhostLayers = 0
 
         if sliceObj is None:
             sliceObj = [slice(None, None)] * self.dim
@@ -296,7 +301,7 @@ class SerialDataHandling(DataHandling):
             yield arr[sliceObj], BlockIterationInfo(None, tuple(s.start for s in sliceObj), sliceObj)
 
     def accessCustomData(self, name):
-        yield self.customDataCpu[name], ((0,0,0)[:self.dim], self._domainSize)
+        yield self.customDataCpu[name], ((0, 0, 0)[:self.dim], self._domainSize)
 
     def gatherArray(self, name, sliceObj=None, **kwargs):
         with self.accessWrapper(name):
@@ -322,6 +327,14 @@ class SerialDataHandling(DataHandling):
         for name in (self.cpuArrays.keys() & self.gpuArrays.keys()) | self._customDataTransferFunctions.keys():
             self.toGpu(name)
 
+    def runKernel(self, kernelFunc, *args, **kwargs):
+        dataUsedInKernel = [self._fieldLatexNameToDataName[p.fieldName]
+                            for p in kernelFunc.ast.parameters if p.isFieldPtrArgument]
+        arrays = self.gpuArrays if kernelFunc.ast.backend == 'gpucuda' else self.cpuArrays
+        arrayParams = {name: arrays[name] for name in dataUsedInKernel}
+        arrayParams.update(kwargs)
+        kernelFunc(*args, **arrayParams)
+
     def toCpu(self, name):
         if name in self._customDataTransferFunctions:
             transferFunc = self._customDataTransferFunctions[name][1]
diff --git a/field.py b/field.py
index 51d08df97..410952c49 100644
--- a/field.py
+++ b/field.py
@@ -143,7 +143,7 @@ class Field(object):
         return Field(fieldName, FieldType.GENERIC, npArray.dtype, spatialLayout, shape, strides)
 
     @staticmethod
-    def createFixedSize(fieldName, shape, indexDimensions=0, dtype=np.float64, layout='numpy'):
+    def createFixedSize(fieldName, shape, indexDimensions=0, dtype=np.float64, layout='numpy', strides=None):
         """
         Creates a field with fixed sizes i.e. can be called only with arrays of the same size and layout
 
@@ -152,6 +152,7 @@ class Field(object):
         :param indexDimensions: how many of the trailing dimensions are interpreted as index (as opposed to spatial)
         :param dtype: numpy data type of the array the kernel is called with later
         :param layout: full layout of array, not only spatial dimensions
+        :param strides: strides in bytes or None to automatically compute them from shape (assuming no padding)
         """
         spatialDimensions = len(shape) - indexDimensions
         assert spatialDimensions >= 1
@@ -160,7 +161,11 @@ class Field(object):
             layout = layoutStringToTuple(layout, spatialDimensions + indexDimensions)
 
         shape = tuple(int(s) for s in shape)
-        strides = computeStrides(shape, layout)
+        if strides is None:
+            strides = computeStrides(shape, layout)
+        else:
+            assert len(strides) == len(shape)
+            strides = tuple([s // np.dtype(dtype).itemsize for s in strides])
 
         npDataType = np.dtype(dtype)
         if npDataType.fields is not None:
@@ -244,7 +249,7 @@ class Field(object):
         return Field.Access(self, tuple(offsetList))
 
     def neighbors(self, stencil):
-        return [ self.__getitem__(dir) for s in stencil]
+        return [self.__getitem__(s) for s in stencil]
 
     @property
     def center(self):
diff --git a/parallel/datahandling.py b/parallel/datahandling.py
index f05539816..3052ef968 100644
--- a/parallel/datahandling.py
+++ b/parallel/datahandling.py
@@ -26,6 +26,8 @@ class ParallelDataHandling(DataHandling):
         self.defaultGhostLayers = defaultGhostLayers
         self.defaultLayout = defaultLayout
         self._fields = DotDict()  # maps name to symbolic pystencils field
+        self._fieldNameToCpuDataName = {}
+        self._fieldNameToGpuDataName = {}
         self.dataNames = set()
         self._dim = dim
         self._fieldInformation = {}
@@ -84,13 +86,18 @@ class ParallelDataHandling(DataHandling):
             self._cpuGpuPairs.append((name, self.GPU_DATA_PREFIX + name))
 
         blockBB = self.blocks.getBlockCellBB(self.blocks[0])
-        shape = tuple(s + 2 * ghostLayers for s in blockBB.size)
+        shape = tuple(s + 2 * ghostLayers for s in blockBB.size[:self.dim])
         indexDimensions = 1 if fSize > 1 else 0
         if indexDimensions == 1:
             shape += (fSize, )
 
         assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists"
-        self.fields[name] = Field.createFixedSize(latexName, shape, indexDimensions, dtype, layout)
+
+        self.fields[name] = Field.createGeneric(latexName, self.dim, dtype, indexDimensions, layout,
+                                                indexShape=(fSize,) if indexDimensions > 0 else None)
+        self._fieldNameToCpuDataName[latexName] = name
+        if gpu:
+            self._fieldNameToGpuDataName[latexName] = self.GPU_DATA_PREFIX + name
 
     def hasData(self, name):
         return name in self._fields
@@ -108,17 +115,18 @@ class ParallelDataHandling(DataHandling):
     def accessArray(self, name, sliceObj=None, innerGhostLayers='all', outerGhostLayers='all'):
         fieldInfo = self._fieldInformation[name]
         with self.accessWrapper(name):
-            if innerGhostLayers is 'all':
+            if innerGhostLayers is 'all' or innerGhostLayers is True:
                 innerGhostLayers = fieldInfo['ghostLayers']
-            if outerGhostLayers is 'all':
+            elif innerGhostLayers is False:
+                innerGhostLayers = 0
+            if outerGhostLayers is 'all' or innerGhostLayers is True:
                 outerGhostLayers = fieldInfo['ghostLayers']
-
+            elif outerGhostLayers is False:
+                outerGhostLayers = 0
+            glAccess = [innerGhostLayers, innerGhostLayers, 0 if self.dim == 2 else innerGhostLayers]
             for iterInfo in slicedBlockIteration(self.blocks, sliceObj, innerGhostLayers, outerGhostLayers):
-                arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=innerGhostLayers)[iterInfo.localSlice]
-                if self.fields[name].indexDimensions == 0:
-                    arr = arr[..., 0]
-                if self.dim == 2:
-                    arr = arr[:, :, 0]
+                arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=glAccess)[iterInfo.localSlice]
+                arr = self._normalizeArrShape(arr, self.fields[name].indexDimensions)
                 yield arr, iterInfo
 
     def accessCustomData(self, name):
@@ -141,13 +149,30 @@ class ParallelDataHandling(DataHandling):
                     array = array[:, :, 0]
                 yield array
 
+    def _normalizeArrShape(self, arr, indexDimensions):
+        if indexDimensions == 0:
+            arr = arr[..., 0]
+        if self.dim == 2:
+            arr = arr[:, :, 0]
+        return arr
+
     def runKernel(self, kernelFunc, *args, **kwargs):
-        fieldArguments = [p.fieldName for p in kernelFunc.ast.parameters if p.isFieldPtrArgument]
+        if kernelFunc.ast.backend == 'gpucuda':
+            nameMap = self._fieldNameToGpuDataName
+            toArray = wlb.cuda.toGpuArray
+        else:
+            nameMap = self._fieldNameToCpuDataName
+            toArray = wlb.field.toArray
+        dataUsedInKernel = [(nameMap[p.fieldName], self.fields[p.fieldName])
+                            for p in kernelFunc.ast.parameters if p.isFieldPtrArgument]
         for block in self.blocks:
-            fieldArgs = {fieldName: wlb.field.toArray(block[fieldName], withGhostLayers=True)
-                         for fieldName in fieldArguments}
+            fieldArgs = {}
+            for dataName, f in dataUsedInKernel:
+                arr = toArray(block[dataName], withGhostLayers=[True, True, self.dim == 3])
+                arr = self._normalizeArrShape(arr, f.indexDimensions)
+                fieldArgs[f.name] = arr
             fieldArgs.update(kwargs)
-            kernelFunc(*args, **kwargs)
+            kernelFunc(*args, **fieldArgs)
 
     def toCpu(self, name):
         if name in self._customDataTransferFunctions:
-- 
GitLab