From 117f8b73ded3e7770749d3bbc6d7d56ac0df8b6e Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Thu, 30 Mar 2017 16:39:20 +0200 Subject: [PATCH] more CUDA bugfixes & periodicity kernels for CUDA --- gpucuda/cudajit.py | 7 ++++++- gpucuda/indexing.py | 33 +++++++++++++++++++------------- gpucuda/kernelcreation.py | 9 ++++----- gpucuda/periodicity.py | 40 +++++++++++++++++++++++++++++++++++++++ slicing.py | 36 +++++++++++++++++------------------ transformations.py | 3 ++- 6 files changed, 89 insertions(+), 39 deletions(-) create mode 100644 gpucuda/periodicity.py diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py index 041949357..f57ded792 100644 --- a/gpucuda/cudajit.py +++ b/gpucuda/cudajit.py @@ -57,6 +57,11 @@ def _buildNumpyArgumentList(parameters, argumentDict): if arg.isFieldArgument: field = argumentDict[arg.fieldName] if arg.isFieldPtrArgument: + actualType = field.dtype + expectedType = arg.dtype.baseType.numpyDtype + if expectedType != actualType: + raise ValueError("Data type mismatch for field '%s'. Expected '%s' got '%s'." % + (arg.fieldName, expectedType, actualType)) result.append(field.gpudata) elif arg.isFieldStrideArgument: dtype = getBaseType(arg.dtype).numpyDtype @@ -71,7 +76,7 @@ def _buildNumpyArgumentList(parameters, argumentDict): else: param = argumentDict[arg.name] expectedType = arg.dtype.numpyDtype - result.append(expectedType(param)) + result.append(expectedType.type(param)) assert len(result) == len(parameters) return result diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py index 7fa4e0426..651340d6d 100644 --- a/gpucuda/indexing.py +++ b/gpucuda/indexing.py @@ -72,21 +72,25 @@ class BlockIndexing(AbstractIndexing): blockSize = self.limitBlockSizeToDeviceMaximum(blockSize) self._blockSize = blockSize - - self._iterationSlice = iterationSlice - offsets = _getStartFromSlice(self._iterationSlice) - self._coordinates = [blockIndex * bs + threadIndex + off - for blockIndex, bs, threadIndex, off in zip(BLOCK_IDX, blockSize, THREAD_IDX, offsets)] - - self._coordinates = self._coordinates[:field.spatialDimensions] + self._iterationSlice = normalizeSlice(iterationSlice, field.spatialShape) + self._dim = field.spatialDimensions + self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatialShape] @property def coordinates(self): - return self._coordinates + offsets = _getStartFromSlice(self._iterationSlice) + coordinates = [blockIndex * bs + threadIdx + off + for blockIndex, bs, threadIdx, off in zip(BLOCK_IDX, self._blockSize, THREAD_IDX, offsets)] + + return coordinates[:self._dim] def getCallParameters(self, arrShape): + substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None} + widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice), _getEndFromSlice(self._iterationSlice, arrShape))] + widths = sp.Matrix(widths).subs(substitutionDict) + grid = tuple(math.ceil(length / blockSize) for length, blockSize in zip(widths, self._blockSize)) extendBs = (1,) * (3 - len(self._blockSize)) extendGr = (1,) * (3 - len(grid)) @@ -94,10 +98,9 @@ class BlockIndexing(AbstractIndexing): 'grid': grid + extendGr} def guard(self, kernelContent, arrShape): - dim = len(self._coordinates) - arrShape = arrShape[:dim] + arrShape = arrShape[:self._dim] conditions = [c < end - for c, end in zip(self._coordinates, _getEndFromSlice(self._iterationSlice, arrShape))] + for c, end in zip(self.coordinates, _getEndFromSlice(self._iterationSlice, arrShape))] condition = conditions[0] for c in conditions[1:]: condition = sp.And(condition, c) @@ -190,22 +193,26 @@ class LineIndexing(AbstractIndexing): coordinates[0], coordinates[fastestCoordinate] = coordinates[fastestCoordinate], coordinates[0] self._coordinates = coordinates - self._iterationSlice = iterationSlice + self._iterationSlice = normalizeSlice(iterationSlice, field.spatialShape) + self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatialShape] @property def coordinates(self): return [i + offset for i, offset in zip(self._coordinates, _getStartFromSlice(self._iterationSlice))] def getCallParameters(self, arrShape): + substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None} + widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice), _getEndFromSlice(self._iterationSlice, arrShape))] + widths = sp.Matrix(widths).subs(substitutionDict) def getShapeOfCudaIdx(cudaIdx): if cudaIdx not in self._coordinates: return 1 else: idx = self._coordinates.index(cudaIdx) - return widths[idx] + return int(widths[idx]) return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]), 'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX])} diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index cbe05e971..3d825fa21 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -5,7 +5,7 @@ from pystencils.types import TypedSymbol, BasicType, StructType from pystencils import Field -def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing, +def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing, iterationSlice=None, ghostLayers=None): fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol) allFields = fieldsRead.union(fieldsWritten) @@ -25,13 +25,12 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None iterationSlice = [] if isinstance(ghostLayers, int): for i in range(len(commonShape)): - iterationSlice.append(slice(ghostLayers[i], -ghostLayers[i])) + iterationSlice.append(slice(ghostLayers[i], -ghostLayers[i] if ghostLayers[i] > 0 else None)) else: for i in range(len(commonShape)): - iterationSlice.append(slice(ghostLayers[i][0], -ghostLayers[i][1])) + iterationSlice.append(slice(ghostLayers[i][0], -ghostLayers[i][1] if ghostLayers[i][1] > 0 else None)) - - indexing = indexingCreator(field=list(fieldsRead)[0], iterationSlice=iterationSlice) + indexing = indexingCreator(field=list(allFields)[0], iterationSlice=iterationSlice) block = Block(assignments) block = indexing.guard(block, commonShape) diff --git a/gpucuda/periodicity.py b/gpucuda/periodicity.py new file mode 100644 index 000000000..06b22b0a0 --- /dev/null +++ b/gpucuda/periodicity.py @@ -0,0 +1,40 @@ +import sympy as sp +from pystencils import Field +from pystencils.slicing import normalizeSlice, getPeriodicBoundarySrcDstSlices +from pystencils.gpucuda import makePythonFunction +from pystencils.gpucuda.kernelcreation import createCUDAKernel + + +def createCopyKernel(domainSize, fromSlice, toSlice, indexDimensions=0, indexDimShape=1): + """Copies a rectangular part of an array to another non-overlapping part""" + if indexDimensions not in (0, 1): + raise NotImplementedError("Works only for one or zero index coordinates") + + f = Field.createGeneric("pdfs", len(domainSize), indexDimensions=indexDimensions) + normalizedFromSlice = normalizeSlice(fromSlice, f.spatialShape) + normalizedToSlice = normalizeSlice(toSlice, f.spatialShape) + + offset = [s1.start - s2.start for s1, s2 in zip(normalizedFromSlice, normalizedToSlice)] + assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalizedFromSlice, normalizedToSlice)], "Slices have to have same size" + + updateEqs = [] + for i in range(indexDimShape): + eq = sp.Eq(f(i), f[tuple(offset)](i)) + updateEqs.append(eq) + + ast = createCUDAKernel(updateEqs, iterationSlice=toSlice) + return makePythonFunction(ast) + + +def getPeriodicBoundaryFunctor(stencil, domainSize, indexDimensions=0, indexDimShape=1, ghostLayers=1, thickness=None): + srcDstSliceTuples = getPeriodicBoundarySrcDstSlices(stencil, ghostLayers, thickness) + kernels = [] + indexDimensions = indexDimensions + for srcSlice, dstSlice in srcDstSliceTuples: + kernels.append(createCopyKernel(domainSize, srcSlice, dstSlice, indexDimensions, indexDimShape)) + + def functor(pdfs): + for kernel in kernels: + kernel(pdfs=pdfs) + + return functor diff --git a/slicing.py b/slicing.py index cf3a53cb3..b81f84498 100644 --- a/slicing.py +++ b/slicing.py @@ -30,6 +30,8 @@ def normalizeSlice(slices, sizes): newStart = 0 elif type(s.start) is float: newStart = int(s.start * size) + elif not isinstance(s.start, sp.Basic) and s.start < 0: + newStart = size + s.start else: newStart = s.start @@ -160,14 +162,7 @@ def getGhostRegionSlice(direction, ghostLayers=1, thickness=None, fullSlice=Fals return tuple(slices) -def getPeriodicBoundaryFunctor(stencil, ghostLayers=1, thickness=None): - """ - Returns a function that applies periodic boundary conditions - :param stencil: sequence of directions e.g. ( [0,1], [0,-1] ) for y periodicity - :param ghostLayers: how many ghost layers the array has - :param thickness: how many of the ghost layers to copy, None means 'all' - :return: function that takes a single array and applies the periodic copy operation - """ +def getPeriodicBoundarySrcDstSlices(stencil, ghostLayers=1, thickness=None): srcDstSliceTuples = [] for d in stencil: @@ -176,21 +171,24 @@ def getPeriodicBoundaryFunctor(stencil, ghostLayers=1, thickness=None): invDir = (-e for e in d) src = getSliceBeforeGhostLayer(invDir, ghostLayers, thickness=thickness, fullSlice=False) dst = getGhostRegionSlice(d, ghostLayers, thickness=thickness, fullSlice=False) - print(d, ", ", src, "->", dst) srcDstSliceTuples.append((src, dst)) + return srcDstSliceTuples + + +def getPeriodicBoundaryFunctor(stencil, ghostLayers=1, thickness=None): + """ + Returns a function that applies periodic boundary conditions + :param stencil: sequence of directions e.g. ( [0,1], [0,-1] ) for y periodicity + :param ghostLayers: how many ghost layers the array has + :param thickness: how many of the ghost layers to copy, None means 'all' + :return: function that takes a single array and applies the periodic copy operation + """ + srcDstSliceTuples = getPeriodicBoundarySrcDstSlices(stencil, ghostLayers, thickness) - def functor(field): + def functor(pdfs): for srcSlice, dstSlice in srcDstSliceTuples: - field[dstSlice] = field[srcSlice] + pdfs[dstSlice] = pdfs[srcSlice] return functor -if __name__ == '__main__': - import numpy as np - f = np.arange(0, 8*8).reshape(8, 8) - from lbmpy.stencils import getStencil - res = getPeriodicBoundaryFunctor(getStencil("D2Q9"), ghostLayers=2, thickness=1) - print(f) - res(f) - print(f) diff --git a/transformations.py b/transformations.py index 19aebd937..ce3a1da25 100644 --- a/transformations.py +++ b/transformations.py @@ -533,7 +533,8 @@ def getOptimalLoopOrdering(fields): refField = next(iter(fields)) for field in fields: if field.spatialDimensions != refField.spatialDimensions: - raise ValueError("All fields have to have the same number of spatial dimensions") + raise ValueError("All fields have to have the same number of spatial dimensions. Spatial field dimensions: " + + str({f.name: f.spatialShape for f in fields})) layouts = set([field.layout for field in fields]) if len(layouts) > 1: -- GitLab