From 6860c7d2ef092e014de44646b46c57649984e9c4 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Sat, 9 Dec 2017 19:46:01 +0100 Subject: [PATCH] LB boundary generation for waLBerla --- field.py | 27 ++++++++++++++++++++------- gpucuda/cudajit.py | 8 +++++--- gpucuda/indexing.py | 14 +++++++++++--- sympyextensions.py | 8 ++++++++ transformations/transformations.py | 7 ++++--- 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/field.py b/field.py index 3fb5b72e3..b0f61e597 100644 --- a/field.py +++ b/field.py @@ -4,6 +4,7 @@ import sympy as sp from sympy.core.cache import cacheit from sympy.tensor import IndexedBase from pystencils.data_types import TypedSymbol, createType +from pystencils.sympyextensions import isIntegerSequence class Field(object): @@ -49,7 +50,8 @@ class Field(object): """ @staticmethod - def createGeneric(fieldName, spatialDimensions, dtype=np.float64, indexDimensions=0, layout='numpy'): + def createGeneric(fieldName, spatialDimensions, dtype=np.float64, indexDimensions=0, layout='numpy', + indexShape=None): """ Creates a generic field where the field size is not fixed i.e. can be called with arrays of different sizes @@ -60,13 +62,20 @@ class Field(object): :param layout: tuple specifying the loop ordering of the spatial dimensions e.g. (2, 1, 0 ) means that the outer loop loops over dimension 2, the second outer over dimension 1, and the inner loop over dimension 0. Also allowed: the strings 'numpy' (0,1,..d) or 'reverseNumpy' (d, ..., 1, 0) + :param indexShape: optional shape of the index dimensions i.e. maximum values allowed for each index dimension, + has to be a list or tuple """ if isinstance(layout, str): layout = spatialLayoutStringToTuple(layout, dim=spatialDimensions) shapeSymbol = IndexedBase(TypedSymbol(Field.SHAPE_PREFIX + fieldName, Field.SHAPE_DTYPE), shape=(1,)) strideSymbol = IndexedBase(TypedSymbol(Field.STRIDE_PREFIX + fieldName, Field.STRIDE_DTYPE), shape=(1,)) totalDimensions = spatialDimensions + indexDimensions - shape = tuple([shapeSymbol[i] for i in range(totalDimensions)]) + if indexShape is None or len(indexShape) == 0: + shape = tuple([shapeSymbol[i] for i in range(totalDimensions)]) + else: + shape = tuple([shapeSymbol[i] for i in range(spatialDimensions)] + list(indexShape)) + assert len(shape) == totalDimensions + strides = tuple([strideSymbol[i] for i in range(totalDimensions)]) npDataType = np.dtype(dtype) @@ -173,18 +182,22 @@ class Field(object): def spatialShape(self): return self.shape[:self.spatialDimensions] + @property + def indexShape(self): + return self.shape[self.spatialDimensions:] + @property def hasFixedShape(self): - try: - [int(i) for i in self.shape] - return True - except TypeError: - return False + return isIntegerSequence(self.shape) @property def indexShape(self): return self.shape[self.spatialDimensions:] + @property + def hasFixedIndexShape(self): + return isIntegerSequence(self.indexShape) + @property def spatialStrides(self): return self.strides[:self.spatialDimensions] diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index e75603665..b815f5c33 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -1,7 +1,4 @@ import numpy as np -import pycuda.driver as cuda -import pycuda.autoinit -from pycuda.compiler import SourceModule from pystencils.backends.cbackend import generateC from pystencils.transformations import symbolNameToVariableName from pystencils.data_types import StructType, getBaseType @@ -18,6 +15,9 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): returned kernel functor. :return: kernel functor """ + import pycuda.autoinit + from pycuda.compiler import SourceModule + code = "#include <cstdint>\n" code += "#define FUNC_PREFIX __global__\n" code += "#define RESTRICT __restrict__\n\n" @@ -55,6 +55,8 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): def _buildNumpyArgumentList(parameters, argumentDict): + import pycuda.driver as cuda + argumentDict = {symbolNameToVariableName(k): v for k, v in argumentDict.items()} result = [] for arg in parameters: diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py index 000be750d..d61d5353b 100644 --- a/gpucuda/indexing.py +++ b/gpucuda/indexing.py @@ -1,13 +1,13 @@ import abc import sympy as sp -import pycuda.driver as cuda -import pycuda.autoinit from pystencils.astnodes import Conditional, Block from pystencils.slicing import normalizeSlice from pystencils.data_types import TypedSymbol, createTypeFromString +AUTO_BLOCKSIZE_LIMITING = True + BLOCK_IDX = [TypedSymbol("blockIdx." + coord, createTypeFromString("int")) for coord in ('x', 'y', 'z')] THREAD_IDX = [TypedSymbol("threadIdx." + coord, createTypeFromString("int")) for coord in ('x', 'y', 'z')] @@ -71,7 +71,9 @@ class BlockIndexing(AbstractIndexing): if permuteBlockSizeDependentOnLayout: blockSize = self.permuteBlockSizeAccordingToLayout(blockSize, field.layout) - blockSize = self.limitBlockSizeToDeviceMaximum(blockSize) + if AUTO_BLOCKSIZE_LIMITING: + blockSize = self.limitBlockSizeToDeviceMaximum(blockSize) + self._blockSize = blockSize self._iterationSlice = normalizeSlice(iterationSlice, field.spatialShape) self._dim = field.spatialDimensions @@ -118,6 +120,9 @@ class BlockIndexing(AbstractIndexing): Returns the altered blockSize """ # Get device limits + import pycuda.driver as cuda + import pycuda.autoinit + da = cuda.device_attribute device = cuda.Context.get_device() @@ -169,6 +174,9 @@ class BlockIndexing(AbstractIndexing): They can be obtained by ``func.num_regs`` from a pycuda function. :returns smaller blockSize if too many registers are used. """ + import pycuda.driver as cuda + import pycuda.autoinit + da = cuda.device_attribute if device is None: device = cuda.Context.get_device() diff --git a/sympyextensions.py b/sympyextensions.py index a45ede9f8..f56781e67 100644 --- a/sympyextensions.py +++ b/sympyextensions.py @@ -16,6 +16,14 @@ def allIn(a, b): return all(element in b for element in a) +def isIntegerSequence(sequence): + try: + [int(i) for i in sequence] + return True + except TypeError: + return False + + def scalarProduct(a, b): return sum(a_i * b_i for a_i, b_i in zip(a, b)) diff --git a/transformations/transformations.py b/transformations/transformations.py index a1202dd75..d41cadc01 100644 --- a/transformations/transformations.py +++ b/transformations/transformations.py @@ -306,8 +306,6 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn dtype = PointerType(field.dtype, const=field.name in readOnlyFieldNames, restrict=True) fieldPtr = TypedSymbol("%s%s" % (Field.DATA_PREFIX, symbolNameToVariableName(field.name)), dtype) - lastPointer = fieldPtr - def createCoordinateDict(group): coordDict = {} for e in group: @@ -328,6 +326,8 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn coordDict[e] = fieldAccess.index[e - field.spatialDimensions] return coordDict + lastPointer = fieldPtr + for group in reversed(basePointerInfo[1:]): coordDict = createCoordinateDict(group) newPtr, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer) @@ -338,7 +338,8 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn coordDict = createCoordinateDict(basePointerInfo[0]) _, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer) - result = ast.ResolvedFieldAccess(lastPointer, offset, fieldAccess.field, fieldAccess.offsets, fieldAccess.index) + result = ast.ResolvedFieldAccess(lastPointer, offset, fieldAccess.field, + fieldAccess.offsets, fieldAccess.index) if isinstance(getBaseType(fieldAccess.field.dtype), StructType): newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0]) -- GitLab