From c62868e7f61f8caf60eecf1a777c70fe499fd953 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Thu, 11 Jan 2018 16:53:02 +0100 Subject: [PATCH] Run kernels directly on data handlings --- astnodes.py | 2 +- datahandling.py | 31 ++++++++++++++++------- field.py | 11 ++++++--- parallel/datahandling.py | 53 +++++++++++++++++++++++++++++----------- 4 files changed, 70 insertions(+), 27 deletions(-) diff --git a/astnodes.py b/astnodes.py index cd39d26c1..252f4f250 100644 --- a/astnodes.py +++ b/astnodes.py @@ -166,7 +166,7 @@ class KernelFunction(Node): self.ghostLayers = ghostLayers # these variables are assumed to be global, so no automatic parameter is generated for them self.globalVariables = set() - self.backend = "" + self.backend = backend @property def symbolsDefined(self): diff --git a/datahandling.py b/datahandling.py index 9472c500f..38d3d1afd 100644 --- a/datahandling.py +++ b/datahandling.py @@ -1,11 +1,11 @@ import numpy as np -from abc import ABC, abstractmethod, abstractproperty +from abc import ABC, abstractmethod from collections import defaultdict from contextlib import contextmanager from lbmpy.stencils import getStencil -from pystencils import Field, makeSlice +from pystencils import Field from pystencils.parallel.blockiteration import BlockIterationInfo from pystencils.slicing import normalizeSlice, removeGhostLayers from pystencils.utils import DotDict @@ -122,11 +122,11 @@ class DataHandling(ABC): :return: generator expression yielding the gathered field, the gathered field does not include any ghost layers """ - def registerPreAccessFunction(self, name, function): - self._preAccessFunctions[name].append(function) + def registerPreAccessFunction(self, name, fct): + self._preAccessFunctions[name].append(fct) - def registerPostAccessFunction(self, name, function): - self._postAccessFunctions[name].append(function) + def registerPostAccessFunction(self, name, fct): + self._postAccessFunctions[name].append(fct) @contextmanager def accessWrapper(self, name): @@ -200,6 +200,7 @@ class SerialDataHandling(DataHandling): self.defaultGhostLayers = defaultGhostLayers self.defaultLayout = defaultLayout self._fields = DotDict() + self._fieldLatexNameToDataName = {} self.cpuArrays = DotDict() self.gpuArrays = DotDict() self.customDataCpu = DotDict() @@ -222,7 +223,8 @@ class SerialDataHandling(DataHandling): def fields(self): return self._fields - def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False): + def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, + cpu=True, gpu=False): if ghostLayers is None: ghostLayers = self.defaultGhostLayers if layout is None: @@ -262,6 +264,7 @@ class SerialDataHandling(DataHandling): assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists" self.fields[name] = Field.createFixedSize(latexName, shape=kwargs['shape'], indexDimensions=indexDimensions, dtype=kwargs['dtype'], layout=kwargs['order']) + self._fieldLatexNameToDataName[latexName] = name def addCustomData(self, name, cpuCreationFunction, gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None): @@ -281,8 +284,10 @@ class SerialDataHandling(DataHandling): self.addArray(name, latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField]) def accessArray(self, name, sliceObj=None, outerGhostLayers='all', **kwargs): - if outerGhostLayers == 'all': + if outerGhostLayers == 'all' or outerGhostLayers is True: outerGhostLayers = self._fieldInformation[name]['ghostLayers'] + elif outerGhostLayers is False: + outerGhostLayers = 0 if sliceObj is None: sliceObj = [slice(None, None)] * self.dim @@ -296,7 +301,7 @@ class SerialDataHandling(DataHandling): yield arr[sliceObj], BlockIterationInfo(None, tuple(s.start for s in sliceObj), sliceObj) def accessCustomData(self, name): - yield self.customDataCpu[name], ((0,0,0)[:self.dim], self._domainSize) + yield self.customDataCpu[name], ((0, 0, 0)[:self.dim], self._domainSize) def gatherArray(self, name, sliceObj=None, **kwargs): with self.accessWrapper(name): @@ -322,6 +327,14 @@ class SerialDataHandling(DataHandling): for name in (self.cpuArrays.keys() & self.gpuArrays.keys()) | self._customDataTransferFunctions.keys(): self.toGpu(name) + def runKernel(self, kernelFunc, *args, **kwargs): + dataUsedInKernel = [self._fieldLatexNameToDataName[p.fieldName] + for p in kernelFunc.ast.parameters if p.isFieldPtrArgument] + arrays = self.gpuArrays if kernelFunc.ast.backend == 'gpucuda' else self.cpuArrays + arrayParams = {name: arrays[name] for name in dataUsedInKernel} + arrayParams.update(kwargs) + kernelFunc(*args, **arrayParams) + def toCpu(self, name): if name in self._customDataTransferFunctions: transferFunc = self._customDataTransferFunctions[name][1] diff --git a/field.py b/field.py index 51d08df97..410952c49 100644 --- a/field.py +++ b/field.py @@ -143,7 +143,7 @@ class Field(object): return Field(fieldName, FieldType.GENERIC, npArray.dtype, spatialLayout, shape, strides) @staticmethod - def createFixedSize(fieldName, shape, indexDimensions=0, dtype=np.float64, layout='numpy'): + def createFixedSize(fieldName, shape, indexDimensions=0, dtype=np.float64, layout='numpy', strides=None): """ Creates a field with fixed sizes i.e. can be called only with arrays of the same size and layout @@ -152,6 +152,7 @@ class Field(object): :param indexDimensions: how many of the trailing dimensions are interpreted as index (as opposed to spatial) :param dtype: numpy data type of the array the kernel is called with later :param layout: full layout of array, not only spatial dimensions + :param strides: strides in bytes or None to automatically compute them from shape (assuming no padding) """ spatialDimensions = len(shape) - indexDimensions assert spatialDimensions >= 1 @@ -160,7 +161,11 @@ class Field(object): layout = layoutStringToTuple(layout, spatialDimensions + indexDimensions) shape = tuple(int(s) for s in shape) - strides = computeStrides(shape, layout) + if strides is None: + strides = computeStrides(shape, layout) + else: + assert len(strides) == len(shape) + strides = tuple([s // np.dtype(dtype).itemsize for s in strides]) npDataType = np.dtype(dtype) if npDataType.fields is not None: @@ -244,7 +249,7 @@ class Field(object): return Field.Access(self, tuple(offsetList)) def neighbors(self, stencil): - return [ self.__getitem__(dir) for s in stencil] + return [self.__getitem__(s) for s in stencil] @property def center(self): diff --git a/parallel/datahandling.py b/parallel/datahandling.py index f05539816..3052ef968 100644 --- a/parallel/datahandling.py +++ b/parallel/datahandling.py @@ -26,6 +26,8 @@ class ParallelDataHandling(DataHandling): self.defaultGhostLayers = defaultGhostLayers self.defaultLayout = defaultLayout self._fields = DotDict() # maps name to symbolic pystencils field + self._fieldNameToCpuDataName = {} + self._fieldNameToGpuDataName = {} self.dataNames = set() self._dim = dim self._fieldInformation = {} @@ -84,13 +86,18 @@ class ParallelDataHandling(DataHandling): self._cpuGpuPairs.append((name, self.GPU_DATA_PREFIX + name)) blockBB = self.blocks.getBlockCellBB(self.blocks[0]) - shape = tuple(s + 2 * ghostLayers for s in blockBB.size) + shape = tuple(s + 2 * ghostLayers for s in blockBB.size[:self.dim]) indexDimensions = 1 if fSize > 1 else 0 if indexDimensions == 1: shape += (fSize, ) assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists" - self.fields[name] = Field.createFixedSize(latexName, shape, indexDimensions, dtype, layout) + + self.fields[name] = Field.createGeneric(latexName, self.dim, dtype, indexDimensions, layout, + indexShape=(fSize,) if indexDimensions > 0 else None) + self._fieldNameToCpuDataName[latexName] = name + if gpu: + self._fieldNameToGpuDataName[latexName] = self.GPU_DATA_PREFIX + name def hasData(self, name): return name in self._fields @@ -108,17 +115,18 @@ class ParallelDataHandling(DataHandling): def accessArray(self, name, sliceObj=None, innerGhostLayers='all', outerGhostLayers='all'): fieldInfo = self._fieldInformation[name] with self.accessWrapper(name): - if innerGhostLayers is 'all': + if innerGhostLayers is 'all' or innerGhostLayers is True: innerGhostLayers = fieldInfo['ghostLayers'] - if outerGhostLayers is 'all': + elif innerGhostLayers is False: + innerGhostLayers = 0 + if outerGhostLayers is 'all' or innerGhostLayers is True: outerGhostLayers = fieldInfo['ghostLayers'] - + elif outerGhostLayers is False: + outerGhostLayers = 0 + glAccess = [innerGhostLayers, innerGhostLayers, 0 if self.dim == 2 else innerGhostLayers] for iterInfo in slicedBlockIteration(self.blocks, sliceObj, innerGhostLayers, outerGhostLayers): - arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=innerGhostLayers)[iterInfo.localSlice] - if self.fields[name].indexDimensions == 0: - arr = arr[..., 0] - if self.dim == 2: - arr = arr[:, :, 0] + arr = wlb.field.toArray(iterInfo.block[name], withGhostLayers=glAccess)[iterInfo.localSlice] + arr = self._normalizeArrShape(arr, self.fields[name].indexDimensions) yield arr, iterInfo def accessCustomData(self, name): @@ -141,13 +149,30 @@ class ParallelDataHandling(DataHandling): array = array[:, :, 0] yield array + def _normalizeArrShape(self, arr, indexDimensions): + if indexDimensions == 0: + arr = arr[..., 0] + if self.dim == 2: + arr = arr[:, :, 0] + return arr + def runKernel(self, kernelFunc, *args, **kwargs): - fieldArguments = [p.fieldName for p in kernelFunc.ast.parameters if p.isFieldPtrArgument] + if kernelFunc.ast.backend == 'gpucuda': + nameMap = self._fieldNameToGpuDataName + toArray = wlb.cuda.toGpuArray + else: + nameMap = self._fieldNameToCpuDataName + toArray = wlb.field.toArray + dataUsedInKernel = [(nameMap[p.fieldName], self.fields[p.fieldName]) + for p in kernelFunc.ast.parameters if p.isFieldPtrArgument] for block in self.blocks: - fieldArgs = {fieldName: wlb.field.toArray(block[fieldName], withGhostLayers=True) - for fieldName in fieldArguments} + fieldArgs = {} + for dataName, f in dataUsedInKernel: + arr = toArray(block[dataName], withGhostLayers=[True, True, self.dim == 3]) + arr = self._normalizeArrShape(arr, f.indexDimensions) + fieldArgs[f.name] = arr fieldArgs.update(kwargs) - kernelFunc(*args, **kwargs) + kernelFunc(*args, **fieldArgs) def toCpu(self, name): if name in self._customDataTransferFunctions: -- GitLab