diff --git a/cpu/cpujit.py b/cpu/cpujit.py index 7fcfab610c353bcd1c0fc7c55f204447524d0493..7f36c88c37a11a7e6697e4b1949152c3b4dfeb53 100644 --- a/cpu/cpujit.py +++ b/cpu/cpujit.py @@ -440,4 +440,5 @@ def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict, func): cacheValues.append(kwargs) # keep objects alive such that ids remain unique func(*args) wrapper.ast = kernelFunctionNode + wrapper.parameters = kernelFunctionNode.parameters return wrapper diff --git a/datahandling.py b/datahandling.py index 9a1411861aada30fadd32c181a476b8d88cbca29..e43c5f5743a612b4f09c85980882735b520afbcd 100644 --- a/datahandling.py +++ b/datahandling.py @@ -1,11 +1,9 @@ import numpy as np from abc import ABC, abstractmethod -from collections import defaultdict -from contextlib import contextmanager - from lbmpy.stencils import getStencil from pystencils import Field +from pystencils.field import layoutStringToTuple, spatialLayoutStringToTuple, createNumpyArrayWithLayout from pystencils.parallel.blockiteration import Block, SerialBlock from pystencils.slicing import normalizeSlice, removeGhostLayers from pystencils.utils import DotDict @@ -26,10 +24,6 @@ class DataHandling(ABC): 'gather' function that has collects (parts of the) distributed data on a single process. """ - def __init__(self): - self._preAccessFunctions = defaultdict(list) - self._postAccessFunctions = defaultdict(list) - # ---------------------------- Adding and accessing data ----------------------------------------------------------- @property @@ -87,6 +81,13 @@ class DataHandling(ABC): :return: """ + def addCustomClass(self, name, classObj, cpu=True, gpu=False): + self.addCustomData(name, + cpuCreationFunction=classObj if cpu else None, + gpuCreationFunction=classObj if gpu else None, + cpuToGpuTransferFunc=classObj.toGpu if cpu and gpu and hasattr(classObj, 'toGpu') else None, + gpuToCpuTransferFunc=classObj.toCpu if cpu and gpu and hasattr(classObj, 'toCpu') else None) + @property @abstractmethod def fields(self): @@ -122,19 +123,6 @@ class DataHandling(ABC): the DataHandling """ - def registerPreAccessFunction(self, name, fct): - self._preAccessFunctions[name].append(fct) - - def registerPostAccessFunction(self, name, fct): - self._postAccessFunctions[name].append(fct) - - @contextmanager - def accessWrapper(self, name): - for func in self._preAccessFunctions[name]: - func() - yield - for func in self._postAccessFunctions[name]: - func() # ------------------------------- CPU/GPU transfer ----------------------------------------------------------------- @abstractmethod @@ -235,12 +223,9 @@ class SerialDataHandling(DataHandling): if latexName is None: latexName = name - assert layout in ('SoA', 'AoS') - kwargs = { 'shape': tuple(s + 2 * ghostLayers for s in self._domainSize), 'dtype': dtype, - 'order': 'C' if layout == 'AoS' else 'F', } self._fieldInformation[name] = { 'ghostLayers': ghostLayers, @@ -252,50 +237,64 @@ class SerialDataHandling(DataHandling): if fSize > 1: kwargs['shape'] = kwargs['shape'] + (fSize,) indexDimensions = 1 + layoutTuple = layoutStringToTuple(layout, self.dim+1) else: indexDimensions = 0 + layoutTuple = spatialLayoutStringToTuple(layout, self.dim) + + # cpuArr is always created - since there is no createPycudaArrayWithLayout() + cpuArr = createNumpyArrayWithLayout(layout=layoutTuple, **kwargs) if cpu: if name in self.cpuArrays: raise ValueError("CPU Field with this name already exists") - self.cpuArrays[name] = np.empty(**kwargs) + self.cpuArrays[name] = cpuArr if gpu: if name in self.gpuArrays: raise ValueError("GPU Field with this name already exists") - - self.gpuArrays[name] = gpuarray.empty(**kwargs) + self.gpuArrays[name] = gpuarray.to_gpu(cpuArr) assert all(f.name != latexName for f in self.fields.values()), "Symbolic field with this name already exists" self.fields[name] = Field.createFixedSize(latexName, shape=kwargs['shape'], indexDimensions=indexDimensions, - dtype=kwargs['dtype'], layout=kwargs['order']) + dtype=kwargs['dtype'], layout=layout) self._fieldLatexNameToDataName[latexName] = name def addCustomData(self, name, cpuCreationFunction, gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None): - assert name not in self.cpuArrays - assert name not in self.customDataCpu - self.customDataCpu[name] = cpuCreationFunction() - if gpuCreationFunction: - self.customDataGpu[name] = gpuCreationFunction() + + if cpuCreationFunction and gpuCreationFunction: if cpuToGpuTransferFunc is None or gpuToCpuTransferFunc is None: raise ValueError("For GPU data, both transfer functions have to be specified") self._customDataTransferFunctions[name] = (cpuToGpuTransferFunc, gpuToCpuTransferFunc) + assert name not in self.customDataCpu + if cpuCreationFunction: + assert name not in self.cpuArrays + self.customDataCpu[name] = cpuCreationFunction() + + if gpuCreationFunction: + assert name not in self.gpuArrays + self.customDataGpu[name] = gpuCreationFunction() + def hasData(self, name): return name in self.fields def addArrayLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False): self.addArray(name, latexName=latexName, cpu=cpu, gpu=gpu, **self._fieldInformation[nameOfTemplateField]) - def iterate(self, sliceObj=None, gpu=False, ghostLayers=None): - if ghostLayers is None: + def iterate(self, sliceObj=None, gpu=False, ghostLayers=True): + if ghostLayers is True: ghostLayers = self.defaultGhostLayers + elif ghostLayers is False: + ghostLayers = 0 + if sliceObj is None: sliceObj = (slice(None, None, None),) * self.dim sliceObj = normalizeSlice(sliceObj, tuple(s + 2 * ghostLayers for s in self._domainSize)) sliceObj = tuple(s if type(s) is slice else slice(s, s+1, None) for s in sliceObj) arrays = self.gpuArrays if gpu else self.cpuArrays - iterDict = {} + customDataDict = self.customDataGpu if gpu else self.customDataCpu + iterDict = customDataDict.copy() for name, arr in arrays.items(): fieldGls = self._fieldInformation[name]['ghostLayers'] if fieldGls < ghostLayers: @@ -307,14 +306,13 @@ class SerialDataHandling(DataHandling): yield SerialBlock(iterDict, offset, sliceObj) def gatherArray(self, name, sliceObj=None, **kwargs): - with self.accessWrapper(name): - gls = self._fieldInformation[name]['ghostLayers'] - arr = self.cpuArrays[name] - arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=gls) + gls = self._fieldInformation[name]['ghostLayers'] + arr = self.cpuArrays[name] + arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=gls) - if sliceObj is not None: - arr = arr[sliceObj] - yield arr + if sliceObj is not None: + arr = arr[sliceObj] + yield arr def swap(self, name1, name2, gpu=False): if not gpu: @@ -332,7 +330,7 @@ class SerialDataHandling(DataHandling): def runKernel(self, kernelFunc, *args, **kwargs): dataUsedInKernel = [self._fieldLatexNameToDataName[p.fieldName] - for p in kernelFunc.ast.parameters if p.isFieldPtrArgument] + for p in kernelFunc.parameters if p.isFieldPtrArgument] arrays = self.gpuArrays if kernelFunc.ast.backend == 'gpucuda' else self.cpuArrays arrayParams = {name: arrays[name] for name in dataUsedInKernel} arrayParams.update(kwargs) diff --git a/field.py b/field.py index 410952c492e025f3db17998619ad5bc7fb7815bb..def8e0dd71b57988bd2b0da7e40c6c9213eb4210 100644 --- a/field.py +++ b/field.py @@ -444,7 +444,7 @@ def getLayoutOfArray(arr, indexDimensionIds=[]): return getLayoutFromStrides(arr.strides, indexDimensionIds) -def createNumpyArrayWithLayout(shape, layout): +def createNumpyArrayWithLayout(shape, layout, **kwargs): """ Creates a numpy array with :param shape: shape of the resulting array @@ -469,7 +469,7 @@ def createNumpyArrayWithLayout(shape, layout): for a, b in swaps: shape[a], shape[b] = shape[b], shape[a] - res = np.empty(shape, order='c') + res = np.empty(shape, order='c', **kwargs) for a, b in reversed(swaps): res = res.swapaxes(a, b) return res diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index 96087f1a48ce3478d321a00e63afb4895f2fcf38..6f5633d5e99cb7ff751800f94e1cab1d4369b1c6 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -53,6 +53,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): func(*args, **dictWithBlockAndThreadNumbers) #cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called wrapper.ast = kernelFunctionNode + wrapper.parameters = kernelFunctionNode.parameters return wrapper diff --git a/parallel/blockiteration.py b/parallel/blockiteration.py index 0253681286b98007304186be77e880514a39c532..e1eef994d78820a77d2386758a175cb5bb12b854 100644 --- a/parallel/blockiteration.py +++ b/parallel/blockiteration.py @@ -111,7 +111,10 @@ class SerialBlock(Block): self._fieldDict = fieldDict def __getitem__(self, dataName): - return self._fieldDict[dataName][self._localSlice] + result = self._fieldDict[dataName] + if isinstance(result, np.ndarray): + result = result[self._localSlice] + return result class ParallelBlock(Block): diff --git a/parallel/datahandling.py b/parallel/datahandling.py index 679f37a9b0be9487d9d6895abfb42bf0ecd0167d..3f6205e8fc9194d03e47dda8ee5cb95c46b8bad8 100644 --- a/parallel/datahandling.py +++ b/parallel/datahandling.py @@ -49,13 +49,16 @@ class ParallelDataHandling(DataHandling): def addCustomData(self, name, cpuCreationFunction, gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None): - self.blocks.addBlockData(name, cpuCreationFunction) - if gpuCreationFunction: - self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpuCreationFunction) + if cpuCreationFunction and gpuCreationFunction: if cpuToGpuTransferFunc is None or gpuToCpuTransferFunc is None: raise ValueError("For GPU data, both transfer functions have to be specified") self._customDataTransferFunctions[name] = (cpuToGpuTransferFunc, gpuToCpuTransferFunc) + if cpuCreationFunction: + self.blocks.addBlockData(name, cpuCreationFunction) + if gpuCreationFunction: + self.blocks.addBlockData(self.GPU_DATA_PREFIX + name, gpuCreationFunction) + def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False): if ghostLayers is None: @@ -77,6 +80,7 @@ class ParallelDataHandling(DataHandling): 'dtype': dtype} layoutMap = {'fzyx': wlb.field.Layout.fzyx, 'zyxf': wlb.field.Layout.zyxf, + 'f': wlb.field.Layout.fzyx, 'SoA': wlb.field.Layout.fzyx, 'AoS': wlb.field.Layout.zyxf} if cpu: @@ -116,9 +120,12 @@ class ParallelDataHandling(DataHandling): for block in self.blocks: block[name1].swapDataPointers(block[name2]) - def iterate(self, sliceObj=None, gpu=False, ghostLayers=None): - if ghostLayers is None: + def iterate(self, sliceObj=None, gpu=False, ghostLayers=True): + if ghostLayers is True: ghostLayers = self.defaultGhostLayers + elif ghostLayers is False: + ghostLayers = 0 + prefix = self.GPU_DATA_PREFIX if gpu else "" if sliceObj is None: yield from slicedBlockIteration(self.blocks, sliceObj, ghostLayers, ghostLayers,