Skip to content
Snippets Groups Projects
Commit 882c84e6 authored by Martin Bauer's avatar Martin Bauer
Browse files

More flexible ghost layer specification for CUDA kernels

parent 771a9b22
No related merge requests found
......@@ -73,8 +73,8 @@ class BlockIndexing(AbstractIndexing):
blockSize = self.limitBlockSizeToDeviceMaximum(blockSize)
self._blockSize = blockSize
self._coordinates = [blockIndex * bs + threadIndex + ghostLayers
for blockIndex, bs, threadIndex in zip(BLOCK_IDX, blockSize, THREAD_IDX)]
self._coordinates = [blockIndex * bs + threadIndex + gl[0]
for blockIndex, bs, threadIndex, gl in zip(BLOCK_IDX, blockSize, THREAD_IDX, ghostLayers)]
self._coordinates = self._coordinates[:field.spatialDimensions]
self._ghostLayers = ghostLayers
......@@ -85,7 +85,7 @@ class BlockIndexing(AbstractIndexing):
def getCallParameters(self, arrShape):
dim = len(self._coordinates)
arrShape = arrShape[:dim]
arrShape = [s - (gl[0] + gl[1]) for s, gl in zip(arrShape[:dim], self._ghostLayers)]
grid = tuple(math.ceil(length / blockSize) for length, blockSize in zip(arrShape, self._blockSize))
extendBs = (1,) * (3 - len(self._blockSize))
extendGr = (1,) * (3 - len(grid))
......@@ -95,8 +95,8 @@ class BlockIndexing(AbstractIndexing):
def guard(self, kernelContent, arrShape):
dim = len(self._coordinates)
arrShape = arrShape[:dim]
conditions = [c < shapeComponent - self._ghostLayers
for c, shapeComponent in zip(self._coordinates, arrShape)]
conditions = [c < shapeComponent - gl[1]
for c, shapeComponent, gl in zip(self._coordinates, arrShape, self._ghostLayers)]
condition = conditions[0]
for c in conditions[1:]:
condition = sp.And(condition, c)
......@@ -189,7 +189,7 @@ class LineIndexing(AbstractIndexing):
coordinates[0], coordinates[fastestCoordinate] = coordinates[fastestCoordinate], coordinates[0]
self._coordiantesNoGhostLayer = coordinates
self._coordinates = [i + ghostLayers for i in coordinates]
self._coordinates = [i + gl[0] for i, gl in zip(coordinates, ghostLayers)]
self._ghostLayers = ghostLayers
@property
......@@ -201,7 +201,8 @@ class LineIndexing(AbstractIndexing):
if cudaIdx not in self._coordiantesNoGhostLayer:
return 1
else:
return arrShape[self._coordiantesNoGhostLayer.index(cudaIdx)] - 2 * self._ghostLayers
idx = self._coordiantesNoGhostLayer.index(cudaIdx)
return arrShape[idx] - (self._ghostLayers[idx][0] + self._ghostLayers[idx][1])
return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]),
'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX])}
......
......@@ -5,7 +5,8 @@ from pystencils.types import TypedSymbol, BasicType, StructType
from pystencils import Field
def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing):
def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing,
ghostLayers=None):
fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
allFields = fieldsRead.union(fieldsWritten)
readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
......@@ -14,11 +15,17 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None,
for eq in listOfEquations:
fieldAccesses.update(eq.atoms(Field.Access))
requiredGhostLayers = max([fa.requiredGhostLayers for fa in fieldAccesses])
indexing = indexingCreator(field=list(fieldsRead)[0], ghostLayers=requiredGhostLayers)
commonShape = getCommonShape(allFields)
if ghostLayers is None:
requiredGhostLayers = max([fa.requiredGhostLayers for fa in fieldAccesses])
ghostLayers = [(requiredGhostLayers, requiredGhostLayers)] * len(commonShape)
if isinstance(ghostLayers, int):
ghostLayers = [(ghostLayers, ghostLayers)] * len(commonShape)
indexing = indexingCreator(field=list(fieldsRead)[0], ghostLayers=ghostLayers)
block = Block(assignments)
block = indexing.guard(block, getCommonShape(allFields))
block = indexing.guard(block, commonShape)
ast = KernelFunction(block, allFields, functionName)
ast.globalVariables.update(indexing.indexVariables)
......@@ -63,7 +70,8 @@ def createdIndexedCUDAKernel(listOfEquations, indexFields, functionName="kernel"
coordinateSymbolAssignments = [getCoordinateSymbolAssignment(n) for n in coordinateNames[:spatialCoordinates]]
coordinateTypedSymbols = [eq.lhs for eq in coordinateSymbolAssignments]
indexing = indexingCreator(field=list(indexFields)[0], ghostLayers=0)
idxField = list(indexFields)[0]
indexing = indexingCreator(field=idxField, ghostLayers=[(0, 0)] * len(idxField.shape))
functionBody = Block(coordinateSymbolAssignments + assignments)
functionBody = indexing.guard(functionBody, getCommonShape(indexFields))
......
......@@ -77,6 +77,8 @@ def makeLoopOverDomain(body, functionName, iterationSlice=None, ghostLayers=None
if ghostLayers is None:
requiredGhostLayers = max([fa.requiredGhostLayers for fa in fieldAccesses])
ghostLayers = [(requiredGhostLayers, requiredGhostLayers)] * len(loopOrder)
if isinstance(ghostLayers, int):
ghostLayers = [(ghostLayers, ghostLayers)] * len(loopOrder)
currentBody = body
lastLoop = None
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment