From 5bcf24bf3fd174c0d9263190a67d4716beed91f1 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Mon, 22 Jan 2018 16:17:21 +0100 Subject: [PATCH] Started to implement old scenarios as steps - Step working for serial CPU scenarios --- datahandling/__init__.py | 39 +++++ datahandling/datahandling_interface.py | 154 +++++++++++++++++ .../parallel_datahandling.py | 2 +- .../serial_datahandling.py | 159 +----------------- parallel/blockiteration.py | 4 + 5 files changed, 202 insertions(+), 156 deletions(-) create mode 100644 datahandling/__init__.py create mode 100644 datahandling/datahandling_interface.py rename parallel/datahandling.py => datahandling/parallel_datahandling.py (99%) rename datahandling.py => datahandling/serial_datahandling.py (58%) diff --git a/datahandling/__init__.py b/datahandling/__init__.py new file mode 100644 index 000000000..533598c58 --- /dev/null +++ b/datahandling/__init__.py @@ -0,0 +1,39 @@ +from .serial_datahandling import SerialDataHandling + +try: + import waLBerla + if waLBerla.cpp_available: + from .parallel_datahandling import ParallelDataHandling + else: + waLBerla = None +except ImportError: + waLBerla = None + ParallelDataHandling = None + + +def createDataHandling(parallel, domainSize, periodicity, defaultLayout='SoA', defaultGhostLayers=1): + if parallel: + if waLBerla is None: + raise ValueError("Cannot create parallel data handling because waLBerla module is not available") + + if periodicity is False or periodicity is None: + periodicity = (0, 0, 0) + elif periodicity is True: + periodicity = (1, 1, 1) + else: + periodicity = (int(bool(x)) for x in periodicity) + if len(periodicity) == 2: + periodicity += (1,) + + if len(domainSize) == 2: + dim = 2 + domainSize = (domainSize[0], domainSize[1], 1) + else: + dim = 3 + + blockStorage = waLBerla.createUniformBlockGrid(cells=domainSize, periodicity=periodicity) + return ParallelDataHandling(blocks=blockStorage, dim=dim, + defaultLayout=defaultLayout, defaultGhostLayers=defaultGhostLayers) + else: + return SerialDataHandling(domainSize, periodicity=periodicity, + defaultLayout=defaultLayout, defaultGhostLayers=defaultGhostLayers) diff --git a/datahandling/datahandling_interface.py b/datahandling/datahandling_interface.py new file mode 100644 index 000000000..ced806dd8 --- /dev/null +++ b/datahandling/datahandling_interface.py @@ -0,0 +1,154 @@ +import numpy as np +from abc import ABC, abstractmethod + + +class DataHandling(ABC): + """ + Manages the storage of arrays and maps them to a symbolic field. + Two versions are available: a simple, pure Python implementation for single node + simulations :py:class:SerialDataHandling and a distributed version using waLBerla in :py:class:ParallelDataHandling + + Keep in mind that the data can be distributed, so use the 'access' method whenever possible and avoid the + 'gather' function that has collects (parts of the) distributed data on a single process. + """ + + # ---------------------------- Adding and accessing data ----------------------------------------------------------- + + @property + @abstractmethod + def dim(self): + """Dimension of the domain, either 2 or 3""" + + @abstractmethod + def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False): + """ + Adds a (possibly distributed) array to the handling that can be accessed using the given name. + For each array a symbolic field is available via the 'fields' dictionary + + :param name: unique name that is used to access the field later + :param fSize: shape of the dim+1 coordinate. DataHandling supports zero or one index dimensions, i.e. scalar + fields and vector fields. This parameter gives the shape of the index dimensions. The default + value of 1 means no index dimension + :param dtype: data type of the array as numpy data type + :param latexName: optional, name of the symbolic field, if not given 'name' is used + :param ghostLayers: number of ghost layers - if not specified a default value specified in the constructor + is used + :param layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'. + this is only important if fSize > 1 + :param cpu: allocate field on the CPU + :param gpu: allocate field on the GPU + """ + + @abstractmethod + def hasData(self, name): + """ + Returns true if a field or custom data element with this name was added + """ + + @abstractmethod + def addArrayLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False): + """ + Adds an array with the same parameters (number of ghost layers, fSize, dtype) as existing array + :param name: name of new array + :param nameOfTemplateField: name of array that is used as template + :param latexName: see 'add' method + :param cpu: see 'add' method + :param gpu: see 'add' method + """ + + @abstractmethod + def addCustomData(self, name, cpuCreationFunction, + gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None): + """ + Adds custom (non-array) data to domain + :param name: name to access data + :param cpuCreationFunction: function returning a new instance of the data that should be stored + :param gpuCreationFunction: optional, function returning a new instance, stored on GPU + :param cpuToGpuTransferFunc: function that transfers cpu to gpu version, getting two parameters (gpuInstance, cpuInstance) + :param gpuToCpuTransferFunc: function that transfers gpu to cpu version, getting two parameters (gpuInstance, cpuInstance) + :return: + """ + + def addCustomClass(self, name, classObj, cpu=True, gpu=False): + self.addCustomData(name, + cpuCreationFunction=classObj if cpu else None, + gpuCreationFunction=classObj if gpu else None, + cpuToGpuTransferFunc=classObj.toGpu if cpu and gpu and hasattr(classObj, 'toGpu') else None, + gpuToCpuTransferFunc=classObj.toCpu if cpu and gpu and hasattr(classObj, 'toCpu') else None) + + @property + @abstractmethod + def fields(self): + """Dictionary mapping data name to symbolic pystencils field - use this to create pystencils kernels""" + + @abstractmethod + def ghostLayersOfField(self, name): + """Returns the number of ghost layers for a specific field/array""" + + @abstractmethod + def iterate(self, sliceObj=None, gpu=False, ghostLayers=None): + """ + Iterate over local part of potentially distributed data structure. + """ + + @abstractmethod + def gatherArray(self, name, sliceObj=None, allGather=False): + """ + Gathers part of the domain on a local process. Whenever possible use 'access' instead, since this method copies + the distributed data to a single process which is inefficient and may exhaust the available memory + + :param name: name of the array to gather + :param sliceObj: slice expression of the rectangular sub-part that should be gathered + :param allGather: if False only the root process receives the result, if True all processes + :return: generator expression yielding the gathered field, the gathered field does not include any ghost layers + """ + + @abstractmethod + def runKernel(self, kernelFunc, *args, **kwargs): + """ + Runs a compiled pystencils kernel using the arrays stored in the DataHandling class for all array parameters + Additional passed arguments are directly passed to the kernel function and override possible parameters from + the DataHandling + """ + + # ------------------------------- CPU/GPU transfer ----------------------------------------------------------------- + + @abstractmethod + def toCpu(self, name): + """Copies GPU data of array with specified name to CPU. + Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method""" + pass + + @abstractmethod + def toGpu(self, name): + """Copies GPU data of array with specified name to GPU. + Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method""" + pass + + @abstractmethod + def allToCpu(self, name): + """Copies data from GPU to CPU for all arrays that have a CPU and a GPU representation""" + pass + + @abstractmethod + def allToGpu(self, name): + """Copies data from CPU to GPU for all arrays that have a CPU and a GPU representation""" + pass + + # ------------------------------- Communication -------------------------------------------------------------------- + + def synchronizationFunctionCPU(self, names, stencil=None, **kwargs): + """ + Synchronizes ghost layers for distributed arrays - for serial scenario this has to be called + for correct periodicity handling + :param names: what data to synchronize: name of array or sequence of names + :param stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19' + if None, a full synchronization (i.e. D2Q9 or D3Q27) is done + :param kwargs: implementation specific, optional optimization parameters for communication + :return: function object to run the communication + """ + + def synchronizationFunctionGPU(self, names, stencil=None, **kwargs): + """ + Synchronization of GPU fields, for documentation see CPU version above + """ diff --git a/parallel/datahandling.py b/datahandling/parallel_datahandling.py similarity index 99% rename from parallel/datahandling.py rename to datahandling/parallel_datahandling.py index 3f6205e8f..5e9a1cdc7 100644 --- a/parallel/datahandling.py +++ b/datahandling/parallel_datahandling.py @@ -1,6 +1,6 @@ import numpy as np from pystencils import Field, makeSlice -from pystencils.datahandling import DataHandling +from pystencils.datahandling.datahandling_interface import DataHandling from pystencils.parallel.blockiteration import slicedBlockIteration, blockIteration from pystencils.utils import DotDict import waLBerla as wlb diff --git a/datahandling.py b/datahandling/serial_datahandling.py similarity index 58% rename from datahandling.py rename to datahandling/serial_datahandling.py index e43c5f574..b41b5984f 100644 --- a/datahandling.py +++ b/datahandling/serial_datahandling.py @@ -1,12 +1,11 @@ import numpy as np -from abc import ABC, abstractmethod - from lbmpy.stencils import getStencil from pystencils import Field from pystencils.field import layoutStringToTuple, spatialLayoutStringToTuple, createNumpyArrayWithLayout from pystencils.parallel.blockiteration import Block, SerialBlock from pystencils.slicing import normalizeSlice, removeGhostLayers from pystencils.utils import DotDict +from pystencils.datahandling.datahandling_interface import DataHandling try: import pycuda.gpuarray as gpuarray @@ -14,158 +13,6 @@ except ImportError: gpuarray = None -class DataHandling(ABC): - """ - Manages the storage of arrays and maps them to a symbolic field. - Two versions are available: a simple, pure Python implementation for single node - simulations :py:class:SerialDataHandling and a distributed version using waLBerla in :py:class:ParallelDataHandling - - Keep in mind that the data can be distributed, so use the 'access' method whenever possible and avoid the - 'gather' function that has collects (parts of the) distributed data on a single process. - """ - - # ---------------------------- Adding and accessing data ----------------------------------------------------------- - - @property - @abstractmethod - def dim(self): - """Dimension of the domain, either 2 or 3""" - - @abstractmethod - def addArray(self, name, fSize=1, dtype=np.float64, latexName=None, ghostLayers=None, layout=None, cpu=True, gpu=False): - """ - Adds a (possibly distributed) array to the handling that can be accessed using the given name. - For each array a symbolic field is available via the 'fields' dictionary - - :param name: unique name that is used to access the field later - :param fSize: shape of the dim+1 coordinate. DataHandling supports zero or one index dimensions, i.e. scalar - fields and vector fields. This parameter gives the shape of the index dimensions. The default - value of 1 means no index dimension - :param dtype: data type of the array as numpy data type - :param latexName: optional, name of the symbolic field, if not given 'name' is used - :param ghostLayers: number of ghost layers - if not specified a default value specified in the constructor - is used - :param layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'. - this is only important if fSize > 1 - :param cpu: allocate field on the CPU - :param gpu: allocate field on the GPU - """ - - @abstractmethod - def hasData(self, name): - """ - Returns true if a field or custom data element with this name was added - """ - - @abstractmethod - def addArrayLike(self, name, nameOfTemplateField, latexName=None, cpu=True, gpu=False): - """ - Adds an array with the same parameters (number of ghost layers, fSize, dtype) as existing array - :param name: name of new array - :param nameOfTemplateField: name of array that is used as template - :param latexName: see 'add' method - :param cpu: see 'add' method - :param gpu: see 'add' method - """ - - @abstractmethod - def addCustomData(self, name, cpuCreationFunction, - gpuCreationFunction=None, cpuToGpuTransferFunc=None, gpuToCpuTransferFunc=None): - """ - Adds custom (non-array) data to domain - :param name: name to access data - :param cpuCreationFunction: function returning a new instance of the data that should be stored - :param gpuCreationFunction: optional, function returning a new instance, stored on GPU - :param cpuToGpuTransferFunc: function that transfers cpu to gpu version, getting two parameters (gpuInstance, cpuInstance) - :param gpuToCpuTransferFunc: function that transfers gpu to cpu version, getting two parameters (gpuInstance, cpuInstance) - :return: - """ - - def addCustomClass(self, name, classObj, cpu=True, gpu=False): - self.addCustomData(name, - cpuCreationFunction=classObj if cpu else None, - gpuCreationFunction=classObj if gpu else None, - cpuToGpuTransferFunc=classObj.toGpu if cpu and gpu and hasattr(classObj, 'toGpu') else None, - gpuToCpuTransferFunc=classObj.toCpu if cpu and gpu and hasattr(classObj, 'toCpu') else None) - - @property - @abstractmethod - def fields(self): - """Dictionary mapping data name to symbolic pystencils field - use this to create pystencils kernels""" - - @abstractmethod - def ghostLayersOfField(self, name): - """Returns the number of ghost layers for a specific field/array""" - - @abstractmethod - def iterate(self, sliceObj=None, gpu=False, ghostLayers=None): - """ - Iterate over local part of potentially distributed data structure. - """ - - @abstractmethod - def gatherArray(self, name, sliceObj=None, allGather=False): - """ - Gathers part of the domain on a local process. Whenever possible use 'access' instead, since this method copies - the distributed data to a single process which is inefficient and may exhaust the available memory - - :param name: name of the array to gather - :param sliceObj: slice expression of the rectangular sub-part that should be gathered - :param allGather: if False only the root process receives the result, if True all processes - :return: generator expression yielding the gathered field, the gathered field does not include any ghost layers - """ - - @abstractmethod - def runKernel(self, kernelFunc, *args, **kwargs): - """ - Runs a compiled pystencils kernel using the arrays stored in the DataHandling class for all array parameters - Additional passed arguments are directly passed to the kernel function and override possible parameters from - the DataHandling - """ - - # ------------------------------- CPU/GPU transfer ----------------------------------------------------------------- - - @abstractmethod - def toCpu(self, name): - """Copies GPU data of array with specified name to CPU. - Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method""" - pass - - @abstractmethod - def toGpu(self, name): - """Copies GPU data of array with specified name to GPU. - Works only if 'cpu=True' and 'gpu=True' has been used in 'add' method""" - pass - - @abstractmethod - def allToCpu(self, name): - """Copies data from GPU to CPU for all arrays that have a CPU and a GPU representation""" - pass - - @abstractmethod - def allToGpu(self, name): - """Copies data from CPU to GPU for all arrays that have a CPU and a GPU representation""" - pass - - # ------------------------------- Communication -------------------------------------------------------------------- - - def synchronizationFunctionCPU(self, names, stencil=None, **kwargs): - """ - Synchronizes ghost layers for distributed arrays - for serial scenario this has to be called - for correct periodicity handling - :param names: what data to synchronize: name of array or sequence of names - :param stencil: stencil as string defining which neighbors are synchronized e.g. 'D2Q9', 'D3Q19' - if None, a full synchronization (i.e. D2Q9 or D3Q27) is done - :param kwargs: implementation specific, optional optimization parameters for communication - :return: function object to run the communication - """ - - def synchronizationFunctionGPU(self, names, stencil=None, **kwargs): - """ - Synchronization of GPU fields, for documentation see CPU version above - """ - - class SerialDataHandling(DataHandling): class _PassThroughContextManager: @@ -308,9 +155,11 @@ class SerialDataHandling(DataHandling): def gatherArray(self, name, sliceObj=None, **kwargs): gls = self._fieldInformation[name]['ghostLayers'] arr = self.cpuArrays[name] - arr = removeGhostLayers(arr, indexDimensions=self.fields[name].indexDimensions, ghostLayers=gls) + indDimensions = self.fields[name].indexDimensions + arr = removeGhostLayers(arr, indexDimensions=indDimensions, ghostLayers=gls) if sliceObj is not None: + sliceObj = normalizeSlice(sliceObj, arr.shape[:-indDimensions]) arr = arr[sliceObj] yield arr diff --git a/parallel/blockiteration.py b/parallel/blockiteration.py index e1eef994d..44a22d95e 100644 --- a/parallel/blockiteration.py +++ b/parallel/blockiteration.py @@ -100,6 +100,10 @@ class Block: """Shape of the fields (potentially including ghost layers)""" return tuple(s.stop - s.start for s in self._localSlice) + @property + def globalSlice(self): + """Slice in global coordinates""" + return tuple(slice(off, off+size) for off, size in zip(self._offset, self.shape)) # ----------------------------- Implementation details ----------------------------------------------------------------- -- GitLab