Skip to content
Snippets Groups Projects
Commit eee767f9 authored by Martin Bauer's avatar Martin Bauer
Browse files

Fixed CUDA resource problems in GPU test (too many registers used)

-> smaller block
parent 0ff9a94e
No related merge requests found
...@@ -41,7 +41,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}): ...@@ -41,7 +41,7 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
shape = _checkArguments(parameters, fullArguments) shape = _checkArguments(parameters, fullArguments)
indexing = kernelFunctionNode.indexing indexing = kernelFunctionNode.indexing
dictWithBlockAndThreadNumbers = indexing.getCallParameters(shape) dictWithBlockAndThreadNumbers = indexing.getCallParameters(shape, func)
args = _buildNumpyArgumentList(parameters, fullArguments) args = _buildNumpyArgumentList(parameters, fullArguments)
cache[key] = (args, dictWithBlockAndThreadNumbers) cache[key] = (args, dictWithBlockAndThreadNumbers)
......
import abc import abc
import sympy as sp import sympy as sp
import math import math
import pycuda.driver as cuda import pycuda.driver as cuda
...@@ -30,10 +31,12 @@ class AbstractIndexing(abc.ABCMeta('ABC', (object,), {})): ...@@ -30,10 +31,12 @@ class AbstractIndexing(abc.ABCMeta('ABC', (object,), {})):
return BLOCK_IDX + THREAD_IDX return BLOCK_IDX + THREAD_IDX
@abc.abstractmethod @abc.abstractmethod
def getCallParameters(self, arrShape): def getCallParameters(self, arrShape, functionToCall):
""" """
Determine grid and block size for kernel call Determine grid and block size for kernel call
:param arrShape: the numeric (not symbolic) shape of the array :param arrShape: the numeric (not symbolic) shape of the array
:param functionToCall: compile kernel function that should be called. Use this object to get information
about required resources like number of registers
:return: dict with keys 'blocks' and 'threads' with tuple values for number of (x,y,z) threads and blocks :return: dict with keys 'blocks' and 'threads' with tuple values for number of (x,y,z) threads and blocks
the kernel should be started with the kernel should be started with
""" """
...@@ -84,7 +87,7 @@ class BlockIndexing(AbstractIndexing): ...@@ -84,7 +87,7 @@ class BlockIndexing(AbstractIndexing):
return coordinates[:self._dim] return coordinates[:self._dim]
def getCallParameters(self, arrShape): def getCallParameters(self, arrShape, functionToCall):
substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None} substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None}
widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice), widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice),
...@@ -94,6 +97,7 @@ class BlockIndexing(AbstractIndexing): ...@@ -94,6 +97,7 @@ class BlockIndexing(AbstractIndexing):
grid = tuple(math.ceil(length / blockSize) for length, blockSize in zip(widths, self._blockSize)) grid = tuple(math.ceil(length / blockSize) for length, blockSize in zip(widths, self._blockSize))
extendBs = (1,) * (3 - len(self._blockSize)) extendBs = (1,) * (3 - len(self._blockSize))
extendGr = (1,) * (3 - len(grid)) extendGr = (1,) * (3 - len(grid))
return {'block': self._blockSize + extendBs, return {'block': self._blockSize + extendBs,
'grid': grid + extendGr} 'grid': grid + extendGr}
...@@ -160,6 +164,32 @@ class BlockIndexing(AbstractIndexing): ...@@ -160,6 +164,32 @@ class BlockIndexing(AbstractIndexing):
return tuple(blockSize) return tuple(blockSize)
@staticmethod
def limitBlockSizeByRegisterRestriction(blockSize, requiredRegistersPerThread, device=None):
"""Shrinks the blockSize if there are too many registers used per multiprocessor.
This is not done automatically, since the requiredRegistersPerThread are not known before compilation.
They can be obtained by ``func.num_regs`` from a pycuda function.
:returns smaller blockSize if too many registers are used.
"""
da = cuda.device_attribute
if device is None:
device = cuda.Context.get_device()
availableRegistersPerMP = device.get_attribute(da.MAX_REGISTERS_PER_MULTIPROCESSOR)
block = blockSize
while True:
numThreads = 1
for t in block:
numThreads *= t
requiredRegistersPerMT = numThreads * requiredRegistersPerThread
if requiredRegistersPerMT <= availableRegistersPerMP:
return block
else:
largestGridEntryIdx = max(range(len(block)), key=lambda e: block[e])
assert block[largestGridEntryIdx] >= 2
block[largestGridEntryIdx] //= 2
@staticmethod @staticmethod
def permuteBlockSizeAccordingToLayout(blockSize, layout): def permuteBlockSizeAccordingToLayout(blockSize, layout):
"""Returns modified blockSize such that the fastest coordinate gets the biggest block dimension""" """Returns modified blockSize such that the fastest coordinate gets the biggest block dimension"""
...@@ -200,7 +230,7 @@ class LineIndexing(AbstractIndexing): ...@@ -200,7 +230,7 @@ class LineIndexing(AbstractIndexing):
def coordinates(self): def coordinates(self):
return [i + offset for i, offset in zip(self._coordinates, _getStartFromSlice(self._iterationSlice))] return [i + offset for i, offset in zip(self._coordinates, _getStartFromSlice(self._iterationSlice))]
def getCallParameters(self, arrShape): def getCallParameters(self, arrShape, functionToCall):
substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None} substitutionDict = {sym: value for sym, value in zip(self._symbolicShape, arrShape) if sym is not None}
widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice), widths = [end - start for start, end in zip(_getStartFromSlice(self._iterationSlice),
...@@ -243,4 +273,5 @@ def _getEndFromSlice(iterationSlice, arrShape): ...@@ -243,4 +273,5 @@ def _getEndFromSlice(iterationSlice, arrShape):
else: else:
assert isinstance(sliceComponent, int) assert isinstance(sliceComponent, int)
res.append(sliceComponent + 1) res.append(sliceComponent + 1)
return res return res
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment