Commit 117f8b73 authored by Martin Bauer's avatar Martin Bauer
Browse files

more CUDA bugfixes & periodicity kernels for CUDA

parent 1926ebf1
......@@ -57,6 +57,11 @@ def _buildNumpyArgumentList(parameters, argumentDict):
if arg.isFieldArgument:
field = argumentDict[arg.fieldName]
if arg.isFieldPtrArgument:
actualType = field.dtype
expectedType = arg.dtype.baseType.numpyDtype
if expectedType != actualType:
raise ValueError("Data type mismatch for field '%s'. Expected '%s' got '%s'." %
(arg.fieldName, expectedType, actualType))
result.append(field.gpudata)
elif arg.isFieldStrideArgument:
dtype = getBaseType(arg.dtype).numpyDtype
......@@ -71,7 +76,7 @@ def _buildNumpyArgumentList(parameters, argumentDict):
else:
param = argumentDict[arg.name]
expectedType = arg.dtype.numpyDtype
result.append(expectedType(param))
result.append(expectedType.type(param))
assert len(result) == len(parameters)
return result
......
......@@ -72,21 +72,25 @@ class BlockIndexing(AbstractIndexing):
blockSize = self.limitBlockSizeToDeviceMaximum(blockSize)
self._blockSize = blockSize
self._iterationSlice = iterationSlice
offsets = _getStartFromSlice(self._iterationSlice)
self._coordinates = [blockIndex * bs + threadIndex + off
for blockIndex, bs, threadIndex, off in zip(BLOCK_IDX, blockSize, THREAD_IDX, offsets)]
self._coordinates = self._coordinates[:field.spatialDimensions]
self._iterationSlice = normalizeSlice(iterationSlice, field.spatialShape)
self._dim = field.spatialDimensions
self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatialShape]
@property
def coordinates(self):
return self._coordinates
offsets = _getStartFromSlice(self._iterationSlice)
coordinates = [blockIndex * bs + threadIdx + off
for blockIndex, bs, threadIdx, off in zip(BLOCK_IDX, self._blockSize, THREAD_IDX, offsets)]
return coordinates[:self._dim]
def getCallParameters(self, arrShape):
substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None}
widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice),
_getEndFromSlice(self._iterationSlice, arrShape))]
widths = sp.Matrix(widths).subs(substitutionDict)
grid = tuple(math.ceil(length / blockSize) for length, blockSize in zip(widths, self._blockSize))
extendBs = (1,) * (3 - len(self._blockSize))
extendGr = (1,) * (3 - len(grid))
......@@ -94,10 +98,9 @@ class BlockIndexing(AbstractIndexing):
'grid': grid + extendGr}
def guard(self, kernelContent, arrShape):
dim = len(self._coordinates)
arrShape = arrShape[:dim]
arrShape = arrShape[:self._dim]
conditions = [c < end
for c, end in zip(self._coordinates, _getEndFromSlice(self._iterationSlice, arrShape))]
for c, end in zip(self.coordinates, _getEndFromSlice(self._iterationSlice, arrShape))]
condition = conditions[0]
for c in conditions[1:]:
condition = sp.And(condition, c)
......@@ -190,22 +193,26 @@ class LineIndexing(AbstractIndexing):
coordinates[0], coordinates[fastestCoordinate] = coordinates[fastestCoordinate], coordinates[0]
self._coordinates = coordinates
self._iterationSlice = iterationSlice
self._iterationSlice = normalizeSlice(iterationSlice, field.spatialShape)
self._symbolicShape = [e if isinstance(e, sp.Basic) else None for e in field.spatialShape]
@property
def coordinates(self):
return [i + offset for i, offset in zip(self._coordinates, _getStartFromSlice(self._iterationSlice))]
def getCallParameters(self, arrShape):
substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None}
widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice),
_getEndFromSlice(self._iterationSlice, arrShape))]
widths = sp.Matrix(widths).subs(substitutionDict)
def getShapeOfCudaIdx(cudaIdx):
if cudaIdx not in self._coordinates:
return 1
else:
idx = self._coordinates.index(cudaIdx)
return widths[idx]
return int(widths[idx])
return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]),
'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX])}
......
......@@ -5,7 +5,7 @@ from pystencils.types import TypedSymbol, BasicType, StructType
from pystencils import Field
def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing,
def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None, indexingCreator=BlockIndexing,
iterationSlice=None, ghostLayers=None):
fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
allFields = fieldsRead.union(fieldsWritten)
......@@ -25,13 +25,12 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None
iterationSlice = []
if isinstance(ghostLayers, int):
for i in range(len(commonShape)):
iterationSlice.append(slice(ghostLayers[i], -ghostLayers[i]))
iterationSlice.append(slice(ghostLayers[i], -ghostLayers[i] if ghostLayers[i] > 0 else None))
else:
for i in range(len(commonShape)):
iterationSlice.append(slice(ghostLayers[i][0], -ghostLayers[i][1]))
iterationSlice.append(slice(ghostLayers[i][0], -ghostLayers[i][1] if ghostLayers[i][1] > 0 else None))
indexing = indexingCreator(field=list(fieldsRead)[0], iterationSlice=iterationSlice)
indexing = indexingCreator(field=list(allFields)[0], iterationSlice=iterationSlice)
block = Block(assignments)
block = indexing.guard(block, commonShape)
......
import sympy as sp
from pystencils import Field
from pystencils.slicing import normalizeSlice, getPeriodicBoundarySrcDstSlices
from pystencils.gpucuda import makePythonFunction
from pystencils.gpucuda.kernelcreation import createCUDAKernel
def createCopyKernel(domainSize, fromSlice, toSlice, indexDimensions=0, indexDimShape=1):
"""Copies a rectangular part of an array to another non-overlapping part"""
if indexDimensions not in (0, 1):
raise NotImplementedError("Works only for one or zero index coordinates")
f = Field.createGeneric("pdfs", len(domainSize), indexDimensions=indexDimensions)
normalizedFromSlice = normalizeSlice(fromSlice, f.spatialShape)
normalizedToSlice = normalizeSlice(toSlice, f.spatialShape)
offset = [s1.start - s2.start for s1, s2 in zip(normalizedFromSlice, normalizedToSlice)]
assert offset == [s1.stop - s2.stop for s1, s2 in zip(normalizedFromSlice, normalizedToSlice)], "Slices have to have same size"
updateEqs = []
for i in range(indexDimShape):
eq = sp.Eq(f(i), f[tuple(offset)](i))
updateEqs.append(eq)
ast = createCUDAKernel(updateEqs, iterationSlice=toSlice)
return makePythonFunction(ast)
def getPeriodicBoundaryFunctor(stencil, domainSize, indexDimensions=0, indexDimShape=1, ghostLayers=1, thickness=None):
srcDstSliceTuples = getPeriodicBoundarySrcDstSlices(stencil, ghostLayers, thickness)
kernels = []
indexDimensions = indexDimensions
for srcSlice, dstSlice in srcDstSliceTuples:
kernels.append(createCopyKernel(domainSize, srcSlice, dstSlice, indexDimensions, indexDimShape))
def functor(pdfs):
for kernel in kernels:
kernel(pdfs=pdfs)
return functor
......@@ -30,6 +30,8 @@ def normalizeSlice(slices, sizes):
newStart = 0
elif type(s.start) is float:
newStart = int(s.start * size)
elif not isinstance(s.start, sp.Basic) and s.start < 0:
newStart = size + s.start
else:
newStart = s.start
......@@ -160,14 +162,7 @@ def getGhostRegionSlice(direction, ghostLayers=1, thickness=None, fullSlice=Fals
return tuple(slices)
def getPeriodicBoundaryFunctor(stencil, ghostLayers=1, thickness=None):
"""
Returns a function that applies periodic boundary conditions
:param stencil: sequence of directions e.g. ( [0,1], [0,-1] ) for y periodicity
:param ghostLayers: how many ghost layers the array has
:param thickness: how many of the ghost layers to copy, None means 'all'
:return: function that takes a single array and applies the periodic copy operation
"""
def getPeriodicBoundarySrcDstSlices(stencil, ghostLayers=1, thickness=None):
srcDstSliceTuples = []
for d in stencil:
......@@ -176,21 +171,24 @@ def getPeriodicBoundaryFunctor(stencil, ghostLayers=1, thickness=None):
invDir = (-e for e in d)
src = getSliceBeforeGhostLayer(invDir, ghostLayers, thickness=thickness, fullSlice=False)
dst = getGhostRegionSlice(d, ghostLayers, thickness=thickness, fullSlice=False)
print(d, ", ", src, "->", dst)
srcDstSliceTuples.append((src, dst))
return srcDstSliceTuples
def getPeriodicBoundaryFunctor(stencil, ghostLayers=1, thickness=None):
"""
Returns a function that applies periodic boundary conditions
:param stencil: sequence of directions e.g. ( [0,1], [0,-1] ) for y periodicity
:param ghostLayers: how many ghost layers the array has
:param thickness: how many of the ghost layers to copy, None means 'all'
:return: function that takes a single array and applies the periodic copy operation
"""
srcDstSliceTuples = getPeriodicBoundarySrcDstSlices(stencil, ghostLayers, thickness)
def functor(field):
def functor(pdfs):
for srcSlice, dstSlice in srcDstSliceTuples:
field[dstSlice] = field[srcSlice]
pdfs[dstSlice] = pdfs[srcSlice]
return functor
if __name__ == '__main__':
import numpy as np
f = np.arange(0, 8*8).reshape(8, 8)
from lbmpy.stencils import getStencil
res = getPeriodicBoundaryFunctor(getStencil("D2Q9"), ghostLayers=2, thickness=1)
print(f)
res(f)
print(f)
......@@ -533,7 +533,8 @@ def getOptimalLoopOrdering(fields):
refField = next(iter(fields))
for field in fields:
if field.spatialDimensions != refField.spatialDimensions:
raise ValueError("All fields have to have the same number of spatial dimensions")
raise ValueError("All fields have to have the same number of spatial dimensions. Spatial field dimensions: "
+ str({f.name: f.spatialShape for f in fields}))
layouts = set([field.layout for field in fields])
if len(layouts) > 1:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment