From 882c84e6465e44463cbd70d3435f5e1793e77d2e Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Wed, 22 Mar 2017 17:09:53 +0100 Subject: [PATCH] More flexible ghost layer specification for CUDA kernels --- gpucuda/indexing.py | 15 ++++++++------- gpucuda/kernelcreation.py | 18 +++++++++++++----- transformations.py | 2 ++ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py index 7cc46c133..6db21f806 100644 --- a/gpucuda/indexing.py +++ b/gpucuda/indexing.py @@ -73,8 +73,8 @@ class BlockIndexing(AbstractIndexing): blockSize = self.limitBlockSizeToDeviceMaximum(blockSize) self._blockSize = blockSize - self._coordinates = [blockIndex * bs + threadIndex + ghostLayers - for blockIndex, bs, threadIndex in zip(BLOCK_IDX, blockSize, THREAD_IDX)] + self._coordinates = [blockIndex * bs + threadIndex + gl[0] + for blockIndex, bs, threadIndex, gl in zip(BLOCK_IDX, blockSize, THREAD_IDX, ghostLayers)] self._coordinates = self._coordinates[:field.spatialDimensions] self._ghostLayers = ghostLayers @@ -85,7 +85,7 @@ class BlockIndexing(AbstractIndexing): def getCallParameters(self, arrShape): dim = len(self._coordinates) - arrShape = arrShape[:dim] + arrShape = [s - (gl[0] + gl[1]) for s, gl in zip(arrShape[:dim], self._ghostLayers)] grid = tuple(math.ceil(length / blockSize) for length, blockSize in zip(arrShape, self._blockSize)) extendBs = (1,) * (3 - len(self._blockSize)) extendGr = (1,) * (3 - len(grid)) @@ -95,8 +95,8 @@ class BlockIndexing(AbstractIndexing): def guard(self, kernelContent, arrShape): dim = len(self._coordinates) arrShape = arrShape[:dim] - conditions = [c < shapeComponent - self._ghostLayers - for c, shapeComponent in zip(self._coordinates, arrShape)] + conditions = [c < shapeComponent - gl[1] + for c, shapeComponent, gl in zip(self._coordinates, arrShape, self._ghostLayers)] condition = conditions[0] for c in conditions[1:]: condition = sp.And(condition, c) @@ -189,7 +189,7 @@ class LineIndexing(AbstractIndexing): coordinates[0], coordinates[fastestCoordinate] = coordinates[fastestCoordinate], coordinates[0] self._coordiantesNoGhostLayer = coordinates - self._coordinates = [i + ghostLayers for i in coordinates] + self._coordinates = [i + gl[0] for i, gl in zip(coordinates, ghostLayers)] self._ghostLayers = ghostLayers @property @@ -201,7 +201,8 @@ class LineIndexing(AbstractIndexing): if cudaIdx not in self._coordiantesNoGhostLayer: return 1 else: - return arrShape[self._coordiantesNoGhostLayer.index(cudaIdx)] - 2 * self._ghostLayers + idx = self._coordiantesNoGhostLayer.index(cudaIdx) + return arrShape[idx] - (self._ghostLayers[idx][0] + self._ghostLayers[idx][1]) return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]), 'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX])} diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py index ea06701ca..03a490aad 100644 --- a/gpucuda/kernelcreation.py +++ b/gpucuda/kernelcreation.py @@ -5,7 +5,8 @@ from pystencils.types import TypedSymbol, BasicType, StructType from pystencils import Field -def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing): +def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing, + ghostLayers=None): fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol) allFields = fieldsRead.union(fieldsWritten) readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten]) @@ -14,11 +15,17 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, for eq in listOfEquations: fieldAccesses.update(eq.atoms(Field.Access)) - requiredGhostLayers = max([fa.requiredGhostLayers for fa in fieldAccesses]) - indexing = indexingCreator(field=list(fieldsRead)[0], ghostLayers=requiredGhostLayers) + commonShape = getCommonShape(allFields) + if ghostLayers is None: + requiredGhostLayers = max([fa.requiredGhostLayers for fa in fieldAccesses]) + ghostLayers = [(requiredGhostLayers, requiredGhostLayers)] * len(commonShape) + if isinstance(ghostLayers, int): + ghostLayers = [(ghostLayers, ghostLayers)] * len(commonShape) + + indexing = indexingCreator(field=list(fieldsRead)[0], ghostLayers=ghostLayers) block = Block(assignments) - block = indexing.guard(block, getCommonShape(allFields)) + block = indexing.guard(block, commonShape) ast = KernelFunction(block, allFields, functionName) ast.globalVariables.update(indexing.indexVariables) @@ -63,7 +70,8 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel" coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]] coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments] - indexing = indexingCreator(field=list(indexFields)[0], ghostLayers=0) + idxField = list(indexFields)[0] + indexing = indexingCreator(field=idxField, ghostLayers=[(0, 0)] * len(idxField.shape)) functionBody = Block(coordinateSymbolAssignments + assignments) functionBody = indexing.guard(functionBody, getCommonShape(indexFields)) diff --git a/transformations.py b/transformations.py index a48125dea..19aebd937 100644 --- a/transformations.py +++ b/transformations.py @@ -77,6 +77,8 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None if ghostLayers is None: requiredGhostLayers = max([fa.requiredGhostLayers for fa in fieldAccesses]) ghostLayers = [(requiredGhostLayers, requiredGhostLayers)] * len(loopOrder) + if isinstance(ghostLayers, int): + ghostLayers = [(ghostLayers, ghostLayers)] * len(loopOrder) currentBody = body lastLoop = None -- GitLab