Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Jonas Plewinski
pystencils
Commits
cb511afe
Commit
cb511afe
authored
Mar 17, 2017
by
Martin Bauer
Browse files
pystencils: indexed kernels for GPUs
parent
ad7a24c0
Changes
5
Hide whitespace changes
Inline
Side-by-side
cpu/cpujit.py
View file @
cb511afe
...
...
@@ -363,6 +363,8 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict):
argumentDict
=
{
symbolNameToVariableName
(
k
):
v
for
k
,
v
in
argumentDict
.
items
()}
ctArguments
=
[]
arrayShapes
=
set
()
indexArrShapes
=
set
()
for
arg
in
parameterSpecification
:
if
arg
.
isFieldArgument
:
try
:
...
...
@@ -388,8 +390,11 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict):
raise
ValueError
(
"Passed array '%s' has strides %s which does not match expected strides %s"
%
(
arg
.
fieldName
,
str
(
fieldArr
.
strides
),
str
(
symbolicFieldStrides
)))
if
not
symbolicField
.
isIndexField
:
if
symbolicField
.
isIndexField
:
indexArrShapes
.
add
(
fieldArr
.
shape
[:
symbolicField
.
spatialDimensions
])
else
:
arrayShapes
.
add
(
fieldArr
.
shape
[:
symbolicField
.
spatialDimensions
])
elif
arg
.
isFieldShapeArgument
:
dataType
=
toCtypes
(
getBaseType
(
arg
.
dtype
))
ctArguments
.
append
(
fieldArr
.
ctypes
.
shape_as
(
dataType
))
...
...
@@ -412,6 +417,9 @@ def buildCTypeArgumentList(parameterSpecification, argumentDict):
if
len
(
arrayShapes
)
>
1
:
raise
ValueError
(
"All passed arrays have to have the same size "
+
str
(
arrayShapes
))
if
len
(
indexArrShapes
)
>
1
:
raise
ValueError
(
"All passed index arrays have to have the same size "
+
str
(
arrayShapes
))
return
ctArguments
...
...
cpu/kernelcreation.py
View file @
cb511afe
...
...
@@ -60,7 +60,8 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl
return
code
def
createIndexedKernel
(
listOfEquations
,
indexFields
,
typeForSymbol
=
None
,
coordinateNames
=
(
'x'
,
'y'
,
'z'
)):
def
createIndexedKernel
(
listOfEquations
,
indexFields
,
functionName
=
"kernel"
,
typeForSymbol
=
None
,
coordinateNames
=
(
'x'
,
'y'
,
'z'
)):
"""
Similar to :func:`createKernel`, but here not all cells of a field are updated but only cells with
coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.
...
...
@@ -73,6 +74,7 @@ def createIndexedKernel(listOfEquations, indexFields, typeForSymbol=None, coordi
:param listOfEquations: list of update equations or AST nodes
:param indexFields: list of index fields, i.e. 1D fields with struct data type
:param typeForSymbol: see documentation of :func:`createKernel`
:param functionName: see documentation of :func:`createKernel`
:param coordinateNames: name of the coordinate fields in the struct data type
:return: abstract syntax tree
"""
...
...
@@ -110,7 +112,7 @@ def createIndexedKernel(listOfEquations, indexFields, typeForSymbol=None, coordi
loopBody
.
append
(
assignment
)
functionBody
=
Block
([
loopNode
])
ast
=
KernelFunction
(
functionBody
,
allFields
.
union
(
indexFields
)
)
ast
=
KernelFunction
(
functionBody
,
allFields
,
functionName
)
fixedCoordinateMapping
=
{
f
.
name
:
coordinateTypedSymbols
for
f
in
nonIndexFields
}
resolveFieldAccesses
(
ast
,
set
([
'indexField'
]),
fieldToFixedCoordinates
=
fixedCoordinateMapping
)
...
...
gpucuda/__init__.py
View file @
cb511afe
from
pystencils.gpucuda.kernelcreation
import
createCUDAKernel
from
pystencils.gpucuda.kernelcreation
import
createCUDAKernel
,
createdIndexedCUDAKernel
from
pystencils.gpucuda.cudajit
import
makePythonFunction
gpucuda/cudajit.py
View file @
cb511afe
...
...
@@ -2,39 +2,44 @@ import numpy as np
import
pycuda.driver
as
cuda
import
pycuda.autoinit
from
pycuda.compiler
import
SourceModule
from
pycuda.gpuarray
import
GPUArray
from
pystencils.backends.cbackend
import
generateC
from
pystencils.transformations
import
symbolNameToVariableName
from
pystencils.types
import
StructType
def
numpyTypeFromString
(
typename
,
includePointers
=
True
):
import
ctypes
as
ct
def
makePythonFunction
(
kernelFunctionNode
,
argumentDict
=
{}):
"""
Creates a kernel function from an abstract syntax tree which
was created e.g. by :func:`pystencils.gpucuda.createCUDAKernel`
or :func:`pystencils.gpucuda.createdIndexedCUDAKernel`
typename
=
typename
.
replace
(
"*"
,
" * "
)
typeComponents
=
typename
.
split
()
:param kernelFunctionNode: the abstract syntax tree
:param argumentDict: parameters passed here are already fixed. Remaining parameters have to be passed to the
returned kernel functor.
:return: kernel functor
"""
code
=
"#include <cstdint>
\n
"
code
+=
"#define FUNC_PREFIX __global__
\n
"
code
+=
"#define RESTRICT __restrict__
\n\n
"
code
+=
str
(
generateC
(
kernelFunctionNode
))
basicTypeMap
=
{
'double'
:
np
.
float64
,
'float'
:
np
.
float32
,
'int'
:
np
.
int32
,
'long'
:
np
.
int64
,
}
mod
=
SourceModule
(
code
,
options
=
[
"-w"
,
"-std=c++11"
])
func
=
mod
.
get_function
(
kernelFunctionNode
.
functionName
)
def
wrapper
(
**
kwargs
):
from
copy
import
copy
fullArguments
=
copy
(
argumentDict
)
fullArguments
.
update
(
kwargs
)
shape
=
_checkArguments
(
kernelFunctionNode
.
parameters
,
fullArguments
)
resultType
=
None
for
typeComponent
in
typeComponents
:
typeComponent
=
typeComponent
.
strip
()
if
typeComponent
==
"const"
or
typeComponent
==
"restrict"
or
typeComponent
==
"volatile"
:
continue
if
typeComponent
in
basicTypeMap
:
resultType
=
basicTypeMap
[
typeComponent
]
elif
typeComponent
==
"*"
and
includePointers
:
assert
resultType
is
not
None
resultType
=
ct
.
POINTER
(
resultType
)
dictWithBlockAndThreadNumbers
=
kernelFunctionNode
.
getCallParameters
(
shape
)
return
resultType
args
=
_buildNumpyArgumentList
(
kernelFunctionNode
,
fullArguments
)
func
(
*
args
,
**
dictWithBlockAndThreadNumbers
)
return
wrapper
def
buildNumpyArgumentList
(
kernelFunctionNode
,
argumentDict
):
def
_
buildNumpyArgumentList
(
kernelFunctionNode
,
argumentDict
):
argumentDict
=
{
symbolNameToVariableName
(
k
):
v
for
k
,
v
in
argumentDict
.
items
()}
result
=
[]
for
arg
in
kernelFunctionNode
.
parameters
:
...
...
@@ -52,38 +57,57 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict):
assert
False
else
:
param
=
argumentDict
[
arg
.
name
]
expectedType
=
numpyTypeFromString
(
arg
.
dtype
)
expectedType
=
arg
.
dtype
.
numpyDtype
result
.
append
(
expectedType
(
param
))
return
result
def
makePythonFunction
(
kernelFunctionNode
,
argumentDict
=
{}):
code
=
"#define FUNC_PREFIX __global__
\n
"
code
+=
"#define RESTRICT __restrict__
\n\n
"
code
+=
str
(
generateC
(
kernelFunctionNode
))
def
_checkArguments
(
parameterSpecification
,
argumentDict
):
"""
Checks if parameters passed to kernel match the description in the AST function node.
If not it raises a ValueError, on success it returns the array shape that determines the CUDA blocks and threads
"""
argumentDict
=
{
symbolNameToVariableName
(
k
):
v
for
k
,
v
in
argumentDict
.
items
()}
arrayShapes
=
set
()
indexArrShapes
=
set
()
for
arg
in
parameterSpecification
:
if
arg
.
isFieldArgument
:
try
:
fieldArr
=
argumentDict
[
arg
.
fieldName
]
except
KeyError
:
raise
KeyError
(
"Missing field parameter for kernel call "
+
arg
.
fieldName
)
mod
=
SourceModule
(
code
,
options
=
[
"-w"
])
func
=
mod
.
get_function
(
kernelFunctionNode
.
functionName
)
symbolicField
=
arg
.
field
if
arg
.
isFieldPtrArgument
:
if
symbolicField
.
hasFixedShape
:
symbolicFieldShape
=
tuple
(
int
(
i
)
for
i
in
symbolicField
.
shape
)
if
isinstance
(
symbolicField
.
dtype
,
StructType
):
symbolicFieldShape
=
symbolicFieldShape
[:
-
1
]
if
symbolicFieldShape
!=
fieldArr
.
shape
:
raise
ValueError
(
"Passed array '%s' has shape %s which does not match expected shape %s"
%
(
arg
.
fieldName
,
str
(
fieldArr
.
shape
),
str
(
symbolicField
.
shape
)))
if
symbolicField
.
hasFixedShape
:
symbolicFieldStrides
=
tuple
(
int
(
i
)
*
fieldArr
.
dtype
.
itemsize
for
i
in
symbolicField
.
strides
)
if
isinstance
(
symbolicField
.
dtype
,
StructType
):
symbolicFieldStrides
=
symbolicFieldStrides
[:
-
1
]
if
symbolicFieldStrides
!=
fieldArr
.
strides
:
raise
ValueError
(
"Passed array '%s' has strides %s which does not match expected strides %s"
%
(
arg
.
fieldName
,
str
(
fieldArr
.
strides
),
str
(
symbolicFieldStrides
)))
def
wrapper
(
**
kwargs
):
from
copy
import
copy
fullArguments
=
copy
(
argumentDict
)
fullArguments
.
update
(
kwargs
)
if
symbolicField
.
isIndexField
:
indexArrShapes
.
add
(
fieldArr
.
shape
[:
symbolicField
.
spatialDimensions
])
else
:
arrayShapes
.
add
(
fieldArr
.
shape
[:
symbolicField
.
spatialDimensions
])
if
len
(
arrayShapes
)
>
1
:
raise
ValueError
(
"All passed arrays have to have the same size "
+
str
(
arrayShapes
))
if
len
(
indexArrShapes
)
>
1
:
raise
ValueError
(
"All passed index arrays have to have the same size "
+
str
(
arrayShapes
))
if
len
(
indexArrShapes
)
>
0
:
return
list
(
indexArrShapes
)[
0
]
else
:
return
list
(
arrayShapes
)[
0
]
shapes
=
set
()
strides
=
set
()
for
argValue
in
fullArguments
.
values
():
if
isinstance
(
argValue
,
GPUArray
):
shapes
.
add
(
argValue
.
shape
)
strides
.
add
(
argValue
.
strides
)
if
len
(
strides
)
==
0
:
raise
ValueError
(
"No GPU arrays passed as argument"
)
assert
len
(
strides
)
<
2
,
"All passed arrays have to have the same strides"
assert
len
(
shapes
)
<
2
,
"All passed arrays have to have the same size"
shape
=
list
(
shapes
)[
0
]
dictWithBlockAndThreadNumbers
=
kernelFunctionNode
.
getCallParameters
(
shape
)
args
=
buildNumpyArgumentList
(
kernelFunctionNode
,
fullArguments
)
func
(
*
args
,
**
dictWithBlockAndThreadNumbers
)
return
wrapper
gpucuda/kernelcreation.py
View file @
cb511afe
...
...
@@ -2,8 +2,9 @@ import sympy as sp
from
pystencils.transformations
import
resolveFieldAccesses
,
typeAllEquations
,
\
parseBasePointerInfo
,
typingFromSympyInspection
from
pystencils.astnodes
import
Block
,
KernelFunction
from
pystencils.astnodes
import
Block
,
KernelFunction
,
LoopOverCoordinate
,
SympyAssignment
from
pystencils
import
Field
from
pystencils.types
import
TypedSymbol
,
BasicType
,
StructType
BLOCK_IDX
=
list
(
sp
.
symbols
(
"blockIdx.x blockIdx.y blockIdx.z"
))
THREAD_IDX
=
list
(
sp
.
symbols
(
"threadIdx.x threadIdx.y threadIdx.z"
))
...
...
@@ -31,19 +32,14 @@ def getLinewiseCoordinates(field, ghostLayers):
def
createCUDAKernel
(
listOfEquations
,
functionName
=
"kernel"
,
typeForSymbol
=
None
):
if
not
typeForSymbol
or
typeForSymbol
==
'double'
:
typeForSymbol
=
typingFromSympyInspection
(
listOfEquations
,
"double"
)
elif
typeForSymbol
==
'float'
:
typeForSymbol
=
typingFromSympyInspection
(
listOfEquations
,
"float"
)
fieldsRead
,
fieldsWritten
,
assignments
=
typeAllEquations
(
listOfEquations
,
typeForSymbol
)
allFields
=
fieldsRead
.
union
(
fieldsWritten
)
readOnlyFields
=
set
([
f
.
name
for
f
in
fieldsRead
-
fieldsWritten
])
code
=
KernelFunction
(
Block
(
assignments
),
allFields
,
functionName
)
code
.
globalVariables
.
update
(
BLOCK_IDX
+
THREAD_IDX
)
ast
=
KernelFunction
(
Block
(
assignments
),
allFields
,
functionName
)
ast
.
globalVariables
.
update
(
BLOCK_IDX
+
THREAD_IDX
)
fieldAccesses
=
code
.
atoms
(
Field
.
Access
)
fieldAccesses
=
ast
.
atoms
(
Field
.
Access
)
requiredGhostLayers
=
max
([
fa
.
requiredGhostLayers
for
fa
in
fieldAccesses
])
coordMapping
,
getCallParameters
=
getLinewiseCoordinates
(
list
(
fieldsRead
)[
0
],
requiredGhostLayers
)
...
...
@@ -51,10 +47,63 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None)
basePointerInfos
=
{
f
.
name
:
parseBasePointerInfo
(
basePointerInfo
,
[
2
,
1
,
0
],
f
)
for
f
in
allFields
}
coordMapping
=
{
f
.
name
:
coordMapping
for
f
in
allFields
}
resolveFieldAccesses
(
code
,
readOnlyFields
,
fieldToFixedCoordinates
=
coordMapping
,
resolveFieldAccesses
(
ast
,
readOnlyFields
,
fieldToFixedCoordinates
=
coordMapping
,
fieldToBasePointerInfo
=
basePointerInfos
)
# add the function which determines #blocks and #threads as additional member to KernelFunction node
# this is used by the jit
code
.
getCallParameters
=
getCallParameters
return
code
ast
.
getCallParameters
=
getCallParameters
return
ast
def
createdIndexedCUDAKernel
(
listOfEquations
,
indexFields
,
functionName
=
"kernel"
,
typeForSymbol
=
None
,
coordinateNames
=
(
'x'
,
'y'
,
'z'
)):
fieldsRead
,
fieldsWritten
,
assignments
=
typeAllEquations
(
listOfEquations
,
typeForSymbol
)
allFields
=
fieldsRead
.
union
(
fieldsWritten
)
readOnlyFields
=
set
([
f
.
name
for
f
in
fieldsRead
-
fieldsWritten
])
for
indexField
in
indexFields
:
indexField
.
isIndexField
=
True
assert
indexField
.
spatialDimensions
==
1
,
"Index fields have to be 1D"
nonIndexFields
=
[
f
for
f
in
allFields
if
f
not
in
indexFields
]
spatialCoordinates
=
{
f
.
spatialDimensions
for
f
in
nonIndexFields
}
assert
len
(
spatialCoordinates
)
==
1
,
"Non-index fields do not have the same number of spatial coordinates"
spatialCoordinates
=
list
(
spatialCoordinates
)[
0
]
def
getCoordinateSymbolAssignment
(
name
):
for
indexField
in
indexFields
:
assert
isinstance
(
indexField
.
dtype
,
StructType
),
"Index fields have to have a struct datatype"
dataType
=
indexField
.
dtype
if
dataType
.
hasElement
(
name
):
rhs
=
indexField
[
0
](
name
)
lhs
=
TypedSymbol
(
name
,
BasicType
(
dataType
.
getElementType
(
name
)))
return
SympyAssignment
(
lhs
,
rhs
)
raise
ValueError
(
"Index %s not found in any of the passed index fields"
%
(
name
,))
coordinateSymbolAssignments
=
[
getCoordinateSymbolAssignment
(
n
)
for
n
in
coordinateNames
[:
spatialCoordinates
]]
coordinateTypedSymbols
=
[
eq
.
lhs
for
eq
in
coordinateSymbolAssignments
]
assignments
=
coordinateSymbolAssignments
+
assignments
# make 1D loop over index fields
loopBody
=
Block
([])
loopNode
=
LoopOverCoordinate
(
loopBody
,
coordinateToLoopOver
=
0
,
start
=
0
,
stop
=
indexFields
[
0
].
shape
[
0
])
for
assignment
in
assignments
:
loopBody
.
append
(
assignment
)
functionBody
=
Block
([
loopNode
])
ast
=
KernelFunction
(
functionBody
,
allFields
,
functionName
)
ast
.
globalVariables
.
update
(
BLOCK_IDX
+
THREAD_IDX
)
coordMapping
,
getCallParameters
=
getLinewiseCoordinates
(
list
(
fieldsRead
)[
0
],
ghostLayers
=
0
)
basePointerInfo
=
[[
'spatialInner0'
]]
basePointerInfos
=
{
f
.
name
:
parseBasePointerInfo
(
basePointerInfo
,
[
2
,
1
,
0
],
f
)
for
f
in
allFields
}
coordMapping
=
{
f
.
name
:
coordMapping
for
f
in
indexFields
}
coordMapping
.
update
({
f
.
name
:
coordinateTypedSymbols
for
f
in
nonIndexFields
})
resolveFieldAccesses
(
ast
,
readOnlyFields
,
fieldToFixedCoordinates
=
coordMapping
,
fieldToBasePointerInfo
=
basePointerInfos
)
# add the function which determines #blocks and #threads as additional member to KernelFunction node
# this is used by the jit
ast
.
getCallParameters
=
getCallParameters
return
ast
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment