Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Jonas Plewinski
pystencils
Commits
1c0665c4
Commit
1c0665c4
authored
Sep 24, 2019
by
Stephan Seitz
Browse files
Implement interpolation (without CubicInterpolationCUDA)
parent
e871e864
Changes
18
Expand all
Hide whitespace changes
Inline
Side-by-side
pystencils/backends/cbackend.py
View file @
1c0665c4
...
...
@@ -103,6 +103,10 @@ def get_headers(ast_node: Node) -> Set[str]:
if
isinstance
(
a
,
Node
):
headers
.
update
(
get_headers
(
a
))
for
g
in
get_global_declarations
(
ast_node
):
if
isinstance
(
g
,
Node
):
headers
.
update
(
get_headers
(
g
))
return
sorted
(
headers
)
...
...
pystencils/backends/cuda_backend.py
View file @
1c0665c4
...
...
@@ -3,6 +3,7 @@ from os.path import dirname, join
from
pystencils.astnodes
import
Node
from
pystencils.backends.cbackend
import
CBackend
,
CustomSympyPrinter
,
generate_c
from
pystencils.fast_approximation
import
fast_division
,
fast_inv_sqrt
,
fast_sqrt
from
pystencils.interpolation_astnodes
import
InterpolationMode
with
open
(
join
(
dirname
(
__file__
),
'cuda_known_functions.txt'
))
as
f
:
lines
=
f
.
readlines
()
...
...
@@ -43,11 +44,19 @@ class CudaBackend(CBackend):
return
code
def
_print_TextureDeclaration
(
self
,
node
):
code
=
"texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;"
%
(
str
(
node
.
texture
.
field
.
dtype
),
node
.
texture
.
field
.
spatial_dimensions
,
node
.
texture
)
if
node
.
texture
.
field
.
dtype
.
numpy_dtype
.
itemsize
>
4
:
code
=
"texture<fp_tex_%s, cudaTextureType%iD, cudaReadModeElementType> %s;"
%
(
str
(
node
.
texture
.
field
.
dtype
),
node
.
texture
.
field
.
spatial_dimensions
,
node
.
texture
)
else
:
code
=
"texture<%s, cudaTextureType%iD, cudaReadModeElementType> %s;"
%
(
str
(
node
.
texture
.
field
.
dtype
),
node
.
texture
.
field
.
spatial_dimensions
,
node
.
texture
)
return
code
def
_print_SkipIteration
(
self
,
_
):
...
...
@@ -62,17 +71,23 @@ class CudaSympyPrinter(CustomSympyPrinter):
self
.
known_functions
.
update
(
CUDA_KNOWN_FUNCTIONS
)
def
_print_TextureAccess
(
self
,
node
):
dtype
=
node
.
texture
.
field
.
dtype
.
numpy_dtype
if
node
.
texture
.
cubic_bspline_interpolation
:
template
=
"cubicTex%iDSimple
<%s>
(%s, %s)"
if
node
.
texture
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
:
template
=
"cubicTex%iDSimple(%s, %s)"
else
:
template
=
"tex%iD<%s>(%s, %s)"
if
dtype
.
itemsize
>
4
:
# Use PyCuda hack!
# https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp
template
=
"fp_tex%iD(%s, %s)"
else
:
template
=
"tex%iD(%s, %s)"
code
=
template
%
(
node
.
texture
.
field
.
spatial_dimensions
,
str
(
node
.
texture
.
field
.
dtype
),
str
(
node
.
texture
),
', '
.
join
(
self
.
_print
(
o
)
for
o
in
node
.
offsets
)
# + 0.5 comes from Nvidia's staggered indexing
', '
.
join
(
self
.
_print
(
o
+
0.5
)
for
o
in
reversed
(
node
.
offsets
))
)
return
code
...
...
pystencils/backends/cuda_known_functions.txt
View file @
1c0665c4
...
...
@@ -45,6 +45,7 @@ tex1D
tex2D
tex3D
sqrtf
rsqrtf
cbrtf
rcbrtf
...
...
pystencils/cpu/kernelcreation.py
View file @
1c0665c4
...
...
@@ -10,8 +10,8 @@ from pystencils.data_types import BasicType, StructType, TypedSymbol, create_typ
from
pystencils.field
import
Field
,
FieldType
from
pystencils.transformations
import
(
add_types
,
filtered_tree_iteration
,
get_base_buffer_index
,
get_optimal_loop_ordering
,
make_loop_over_domain
,
move_constants_before_loop
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
split_inner_loop
)
implement_interpolations
,
make_loop_over_domain
,
move_constants_before_loop
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
split_inner_loop
)
AssignmentOrAstNodeList
=
List
[
Union
[
Assignment
,
ast
.
Node
]]
...
...
@@ -67,6 +67,7 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
ghost_layers
=
ghost_layers
,
loop_order
=
loop_order
)
ast_node
=
KernelFunction
(
loop_node
,
'cpu'
,
'c'
,
compile_function
=
make_python_function
,
ghost_layers
=
ghost_layer_info
,
function_name
=
function_name
)
implement_interpolations
(
body
)
if
split_groups
:
typed_split_groups
=
[[
type_symbol
(
s
)
for
s
in
split_group
]
for
split_group
in
split_groups
]
...
...
@@ -139,6 +140,8 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
loop_body
=
Block
([])
loop_node
=
LoopOverCoordinate
(
loop_body
,
coordinate_to_loop_over
=
0
,
start
=
0
,
stop
=
index_fields
[
0
].
shape
[
0
])
implement_interpolations
(
loop_node
)
for
assignment
in
assignments
:
loop_body
.
append
(
assignment
)
...
...
pystencils/data_types.py
View file @
1c0665c4
import
ctypes
from
collections
import
defaultdict
from
functools
import
partial
from
typing
import
Tuple
import
numpy
as
np
import
sympy
as
sp
import
sympy.codegen.ast
from
sympy.core.cache
import
cacheit
from
sympy.logic.boolalg
import
Boolean
import
pystencils
from
pystencils.cache
import
memorycache
,
memorycache_if_hashable
from
pystencils.utils
import
all_equal
...
...
@@ -17,6 +20,26 @@ except ImportError as e:
_ir_importerror
=
e
def
typed_symbols
(
names
,
dtype
,
*
args
):
symbols
=
sp
.
symbols
(
names
,
*
args
)
if
isinstance
(
symbols
,
Tuple
):
return
tuple
(
TypedSymbol
(
str
(
s
),
dtype
)
for
s
in
symbols
)
else
:
return
TypedSymbol
(
str
(
symbols
),
dtype
)
def
matrix_symbols
(
names
,
dtype
,
rows
,
cols
):
if
isinstance
(
names
,
str
):
names
=
names
.
replace
(
' '
,
''
).
split
(
','
)
matrices
=
[]
for
n
in
names
:
symbols
=
typed_symbols
(
"%s:%i"
%
(
n
,
rows
*
cols
),
dtype
)
matrices
.
append
(
sp
.
Matrix
(
rows
,
cols
,
lambda
i
,
j
:
symbols
[
i
*
cols
+
j
]))
return
tuple
(
matrices
)
# noinspection PyPep8Naming
class
address_of
(
sp
.
Function
):
is_Atom
=
True
...
...
@@ -86,6 +109,11 @@ class cast_func(sp.Function):
@
property
def
is_integer
(
self
):
"""
Uses Numpy type hierarchy to determine :func:`sympy.Expr.is_integer` predicate
For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
"""
if
hasattr
(
self
.
dtype
,
'numpy_dtype'
):
return
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
integer
)
or
super
().
is_integer
else
:
...
...
@@ -93,6 +121,9 @@ class cast_func(sp.Function):
@
property
def
is_negative
(
self
):
"""
See :func:`.TypedSymbol.is_integer`
"""
if
hasattr
(
self
.
dtype
,
'numpy_dtype'
):
if
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
unsignedinteger
):
return
False
...
...
@@ -101,6 +132,9 @@ class cast_func(sp.Function):
@
property
def
is_nonnegative
(
self
):
"""
See :func:`.TypedSymbol.is_integer`
"""
if
self
.
is_negative
is
False
:
return
True
else
:
...
...
@@ -108,6 +142,9 @@ class cast_func(sp.Function):
@
property
def
is_real
(
self
):
"""
See :func:`.TypedSymbol.is_integer`
"""
if
hasattr
(
self
.
dtype
,
'numpy_dtype'
):
return
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
integer
)
or
\
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
floating
)
or
\
...
...
@@ -171,6 +208,11 @@ class TypedSymbol(sp.Symbol):
# For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
@
property
def
is_integer
(
self
):
"""
Uses Numpy type hierarchy to determine :func:`sympy.Expr.is_integer` predicate
For reference: Numpy type hierarchy https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html
"""
if
hasattr
(
self
.
dtype
,
'numpy_dtype'
):
return
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
integer
)
or
super
().
is_integer
else
:
...
...
@@ -178,6 +220,9 @@ class TypedSymbol(sp.Symbol):
@
property
def
is_negative
(
self
):
"""
See :func:`.TypedSymbol.is_integer`
"""
if
hasattr
(
self
.
dtype
,
'numpy_dtype'
):
if
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
unsignedinteger
):
return
False
...
...
@@ -186,6 +231,9 @@ class TypedSymbol(sp.Symbol):
@
property
def
is_nonnegative
(
self
):
"""
See :func:`.TypedSymbol.is_integer`
"""
if
self
.
is_negative
is
False
:
return
True
else
:
...
...
@@ -193,6 +241,9 @@ class TypedSymbol(sp.Symbol):
@
property
def
is_real
(
self
):
"""
See :func:`.TypedSymbol.is_integer`
"""
if
hasattr
(
self
.
dtype
,
'numpy_dtype'
):
return
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
integer
)
or
\
np
.
issubdtype
(
self
.
dtype
.
numpy_dtype
,
np
.
floating
)
or
\
...
...
@@ -370,12 +421,17 @@ def peel_off_type(dtype, type_to_peel_off):
return
dtype
def
collate_types
(
types
):
def
collate_types
(
types
,
forbid_collation_to_float
=
False
):
"""
Takes a sequence of types and returns their "common type" e.g. (float, double, float) -> double
Uses the collation rules from numpy.
"""
if
forbid_collation_to_float
:
types
=
[
t
for
t
in
types
if
not
(
hasattr
(
t
,
'is_float'
)
and
t
.
is_float
())]
if
not
types
:
return
[
create_type
(
'int32'
)]
# Pointer arithmetic case i.e. pointer + integer is allowed
if
any
(
type
(
t
)
is
PointerType
for
t
in
types
):
pointer_type
=
None
...
...
@@ -433,6 +489,8 @@ def get_type_of_expression(expr,
return
create_type
(
default_float_type
)
elif
isinstance
(
expr
,
ResolvedFieldAccess
):
return
expr
.
field
.
dtype
elif
isinstance
(
expr
,
pystencils
.
field
.
Field
.
AbstractAccess
):
return
expr
.
field
.
dtype
elif
isinstance
(
expr
,
TypedSymbol
):
return
expr
.
dtype
elif
isinstance
(
expr
,
sp
.
Symbol
):
...
...
@@ -525,6 +583,10 @@ class BasicType(Type):
def
numpy_dtype
(
self
):
return
self
.
_dtype
@
property
def
sympy_dtype
(
self
):
return
getattr
(
sympy
.
codegen
.
ast
,
str
(
self
.
numpy_dtype
))
@
property
def
item_size
(
self
):
return
1
...
...
pystencils/field.py
View file @
1c0665c4
import
functools
import
hashlib
import
operator
import
pickle
import
re
from
enum
import
Enum
...
...
@@ -9,6 +11,7 @@ import numpy as np
import
sympy
as
sp
from
sympy.core.cache
import
cacheit
import
pystencils
from
pystencils.alignedarray
import
aligned_empty
from
pystencils.data_types
import
StructType
,
TypedSymbol
,
create_type
from
pystencils.kernelparameters
import
FieldShapeSymbol
,
FieldStrideSymbol
...
...
@@ -38,7 +41,6 @@ def fields(description=None, index_dimensions=0, layout=None, **kwargs):
>>> assert s.index_dimensions == 0 and s.dtype.numpy_dtype == arr_s.dtype
>>> assert v.index_shape == (2,)
Format string can be left out, field names are taken from keyword arguments.
>>> fields(f1=arr_s, f2=arr_s)
[f1, f2]
...
...
@@ -292,6 +294,10 @@ class Field(AbstractField):
self
.
shape
=
shape
self
.
strides
=
strides
self
.
latex_name
=
None
# type: Optional[str]
self
.
coordinate_origin
=
sp
.
Matrix
(
tuple
(
0
for
_
in
range
(
self
.
spatial_dimensions
)
))
# type: tuple[float,sp.Symbol]
self
.
coordinate_transform
=
sp
.
eye
(
self
.
spatial_dimensions
)
def
new_field_with_different_name
(
self
,
new_name
):
if
self
.
has_fixed_shape
:
...
...
@@ -312,6 +318,9 @@ class Field(AbstractField):
def
ndim
(
self
)
->
int
:
return
len
(
self
.
shape
)
def
values_per_cell
(
self
)
->
int
:
return
functools
.
reduce
(
operator
.
mul
,
self
.
index_shape
,
1
)
@
property
def
layout
(
self
):
return
self
.
_layout
...
...
@@ -393,6 +402,27 @@ class Field(AbstractField):
assert
FieldType
.
is_custom
(
self
)
return
Field
.
Access
(
self
,
offset
,
index
,
is_absolute_access
=
True
)
def
interpolated_access
(
self
,
offset
:
Tuple
,
interpolation_mode
=
'linear'
,
address_mode
=
'BORDER'
,
allow_textures
=
True
):
"""Provides access to field values at non-integer positions
``interpolated_access`` is similar to :func:`Field.absolute_access` except that
it allows non-integer offsets and automatic handling of out-of-bound accesses.
:param offset: Tuple of spatial coordinates (can be floats)
:param interpolation_mode: One of :class:`pystencils.interpolation_astnodes.InterpolationMode`
:param address_mode: How boundaries are handled can be 'border', 'wrap', 'mirror', 'clamp'
:param allow_textures: Allow implementation by texture accesses on GPUs
"""
from
pystencils.interpolation_astnodes
import
Interpolator
return
Interpolator
(
self
,
interpolation_mode
,
address_mode
,
allow_textures
=
allow_textures
).
at
(
offset
)
def
__call__
(
self
,
*
args
,
**
kwargs
):
center
=
tuple
([
0
]
*
self
.
spatial_dimensions
)
return
Field
.
Access
(
self
,
center
)(
*
args
,
**
kwargs
)
...
...
@@ -409,6 +439,34 @@ class Field(AbstractField):
return
False
return
self
.
hashable_contents
()
==
other
.
hashable_contents
()
@
property
def
physical_coordinates
(
self
):
return
self
.
coordinate_transform
@
(
self
.
coordinate_origin
+
pystencils
.
x_vector
(
self
.
spatial_dimensions
))
@
property
def
physical_coordinates_staggered
(
self
):
return
self
.
coordinate_transform
@
\
(
self
.
coordinate_origin
+
pystencils
.
x_staggered_vector
(
self
.
spatial_dimensions
))
def
index_to_physical
(
self
,
index_coordinates
,
staggered
=
False
):
if
staggered
:
index_coordinates
=
sp
.
Matrix
([
i
+
0.5
for
i
in
index_coordinates
])
return
self
.
coordinate_transform
@
(
self
.
coordinate_origin
+
index_coordinates
)
def
physical_to_index
(
self
,
physical_coordinates
,
staggered
=
False
):
rtn
=
self
.
coordinate_transform
.
inv
()
@
physical_coordinates
-
self
.
coordinate_origin
if
staggered
:
rtn
=
sp
.
Matrix
([
i
-
0.5
for
i
in
rtn
])
return
rtn
def
index_to_staggered_physical_coordinates
(
self
,
symbol_vector
):
symbol_vector
+=
sp
.
Matrix
([
0.5
]
*
self
.
spatial_dimensions
)
return
self
.
create_physical_coordinates
(
symbol_vector
)
def
set_coordinate_origin_to_field_center
(
self
):
self
.
coordinate_origin
=
-
sp
.
Matrix
([
i
/
2
for
i
in
self
.
spatial_shape
])
# noinspection PyAttributeOutsideInit,PyUnresolvedReferences
class
Access
(
TypedSymbol
,
AbstractField
.
AbstractAccess
):
"""Class representing a relative access into a `Field`.
...
...
@@ -429,11 +487,12 @@ class Field(AbstractField):
>>> central_y_component.at_index(0) # change component
v_C^0
"""
def
__new__
(
cls
,
name
,
*
args
,
**
kwargs
):
obj
=
Field
.
Access
.
__xnew_cached_
(
cls
,
name
,
*
args
,
**
kwargs
)
return
obj
def
__new_stage2__
(
self
,
field
,
offsets
=
(
0
,
0
,
0
),
idx
=
None
,
is_absolute_access
=
False
):
def
__new_stage2__
(
self
,
field
,
offsets
=
(
0
,
0
,
0
),
idx
=
None
,
is_absolute_access
=
False
,
dtype
=
None
):
field_name
=
field
.
name
offsets_and_index
=
(
*
offsets
,
*
idx
)
if
idx
is
not
None
else
offsets
constant_offsets
=
not
any
([
isinstance
(
o
,
sp
.
Basic
)
and
not
o
.
is_Integer
for
o
in
offsets_and_index
])
...
...
@@ -484,7 +543,7 @@ class Field(AbstractField):
return
obj
def
__getnewargs__
(
self
):
return
self
.
field
,
self
.
offsets
,
self
.
index
,
self
.
is_absolute_access
return
self
.
field
,
self
.
offsets
,
self
.
index
,
self
.
is_absolute_access
,
self
.
dtype
# noinspection SpellCheckingInspection
__xnew__
=
staticmethod
(
__new_stage2__
)
...
...
@@ -503,7 +562,7 @@ class Field(AbstractField):
if
len
(
idx
)
!=
self
.
field
.
index_dimensions
:
raise
ValueError
(
"Wrong number of indices: "
"Got %d, expected %d"
%
(
len
(
idx
),
self
.
field
.
index_dimensions
))
return
Field
.
Access
(
self
.
field
,
self
.
_offsets
,
idx
)
return
Field
.
Access
(
self
.
field
,
self
.
_offsets
,
idx
,
dtype
=
self
.
dtype
)
def
__getitem__
(
self
,
*
idx
):
return
self
.
__call__
(
*
idx
)
...
...
@@ -562,7 +621,7 @@ class Field(AbstractField):
"""
offset_list
=
list
(
self
.
offsets
)
offset_list
[
coord_id
]
+=
offset
return
Field
.
Access
(
self
.
field
,
tuple
(
offset_list
),
self
.
index
)
return
Field
.
Access
(
self
.
field
,
tuple
(
offset_list
),
self
.
index
,
dtype
=
self
.
dtype
)
def
get_shifted
(
self
,
*
shift
)
->
'Field.Access'
:
"""Returns a new Access with changed spatial coordinates
...
...
@@ -572,7 +631,10 @@ class Field(AbstractField):
>>> f[0,0].get_shifted(1, 1)
f_NE
"""
return
Field
.
Access
(
self
.
field
,
tuple
(
a
+
b
for
a
,
b
in
zip
(
shift
,
self
.
offsets
)),
self
.
index
)
return
Field
.
Access
(
self
.
field
,
tuple
(
a
+
b
for
a
,
b
in
zip
(
shift
,
self
.
offsets
)),
self
.
index
,
dtype
=
self
.
dtype
)
def
at_index
(
self
,
*
idx_tuple
)
->
'Field.Access'
:
"""Returns new Access with changed index.
...
...
@@ -582,7 +644,7 @@ class Field(AbstractField):
>>> f(0).at_index(8)
f_C^8
"""
return
Field
.
Access
(
self
.
field
,
self
.
offsets
,
idx_tuple
)
return
Field
.
Access
(
self
.
field
,
self
.
offsets
,
idx_tuple
,
dtype
=
self
.
dtype
)
@
property
def
is_absolute_access
(
self
)
->
bool
:
...
...
pystencils/gpucuda/cudajit.py
View file @
1c0665c4
...
...
@@ -3,7 +3,9 @@ import numpy as np
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.data_types
import
StructType
from
pystencils.field
import
FieldType
from
pystencils.include
import
get_pystencils_include_path
from
pystencils.gpucuda.texture_utils
import
ndarray_to_tex
from
pystencils.include
import
get_pycuda_include_path
,
get_pystencils_include_path
from
pystencils.interpolation_astnodes
import
TextureAccess
from
pystencils.kernelparameters
import
FieldPointerSymbol
USE_FAST_MATH
=
True
...
...
@@ -29,17 +31,33 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
if
argument_dict
is
None
:
argument_dict
=
{}
header_list
=
[
'<stdint
.h
>'
]
+
list
(
get_headers
(
kernel_function_node
))
header_list
=
[
'<
c
stdint>'
]
+
list
(
get_headers
(
kernel_function_node
))
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
header_list
])
code
=
includes
+
"
\n
"
code
+=
"#define FUNC_PREFIX __global__
\n
"
code
+=
"#define RESTRICT __restrict__
\n\n
"
code
+=
str
(
generate_c
(
kernel_function_node
,
dialect
=
'cuda'
,
custom_backend
=
custom_backend
))
options
=
[
"-w"
,
"-std=c++11"
,
"-Wno-deprecated-gpu-targets"
]
textures
=
set
(
d
.
texture
for
d
in
kernel_function_node
.
atoms
(
TextureAccess
))
nvcc_options
=
[
"-w"
,
"-std=c++11"
,
"-Wno-deprecated-gpu-targets"
]
if
USE_FAST_MATH
:
options
.
append
(
"-use_fast_math"
)
mod
=
SourceModule
(
code
,
options
=
options
,
include_dirs
=
[
get_pystencils_include_path
()])
nvcc_options
.
append
(
"-use_fast_math"
)
# Code for
# if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures):
# assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
# "Submodule CubicInterpolationCUDA does not exist"
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
# needed_dims = set(t.field.spatial_dimensions for t in textures
# if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE)
# for i in needed_dims:
# code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
mod
=
SourceModule
(
code
,
options
=
nvcc_options
,
include_dirs
=
[
get_pystencils_include_path
(),
get_pycuda_include_path
()])
func
=
mod
.
get_function
(
kernel_function_node
.
function_name
)
parameters
=
kernel_function_node
.
get_parameters
()
...
...
@@ -63,6 +81,12 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
block_and_thread_numbers
[
'block'
]
=
tuple
(
int
(
i
)
for
i
in
block_and_thread_numbers
[
'block'
])
block_and_thread_numbers
[
'grid'
]
=
tuple
(
int
(
i
)
for
i
in
block_and_thread_numbers
[
'grid'
])
# TODO: use texture objects:
# https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
for
tex
in
textures
:
tex_ref
=
mod
.
get_texref
(
str
(
tex
))
ndarray_to_tex
(
tex_ref
,
full_arguments
[
tex
.
field
.
name
],
tex
.
address_mode
,
tex
.
filter_mode
,
tex
.
use_normalized_coordinates
,
tex
.
read_as_integer
)
args
=
_build_numpy_argument_list
(
parameters
,
full_arguments
)
cache
[
key
]
=
(
args
,
block_and_thread_numbers
)
cache_values
.
append
(
kwargs
)
# keep objects alive such that ids remain unique
...
...
pystencils/gpucuda/kernelcreation.py
View file @
1c0665c4
...
...
@@ -4,12 +4,18 @@ from pystencils.field import Field, FieldType
from
pystencils.gpucuda.cudajit
import
make_python_function
from
pystencils.gpucuda.indexing
import
BlockIndexing
from
pystencils.transformations
import
(
add_types
,
get_base_buffer_index
,
get_common_shape
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
unify_shape_symbols
)
def
create_cuda_kernel
(
assignments
,
function_name
=
"kernel"
,
type_info
=
None
,
indexing_creator
=
BlockIndexing
,
iteration_slice
=
None
,
ghost_layers
=
None
,
skip_independence_check
=
False
):
add_types
,
get_base_buffer_index
,
get_common_shape
,
implement_interpolations
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
unify_shape_symbols
)
def
create_cuda_kernel
(
assignments
,
function_name
=
"kernel"
,
type_info
=
None
,
indexing_creator
=
BlockIndexing
,
iteration_slice
=
None
,
ghost_layers
=
None
,
skip_independence_check
=
False
,
use_textures_for_interpolation
=
True
):
fields_read
,
fields_written
,
assignments
=
add_types
(
assignments
,
type_info
,
not
skip_independence_check
)
all_fields
=
fields_read
.
union
(
fields_written
)
read_only_fields
=
set
([
f
.
name
for
f
in
fields_read
-
fields_written
])
...
...
@@ -57,6 +63,8 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
ast
=
KernelFunction
(
block
,
'gpu'
,
'gpucuda'
,
make_python_function
,
ghost_layers
,
function_name
)
ast
.
global_variables
.
update
(
indexing
.
index_variables
)
implement_interpolations
(
ast
,
implement_by_texture_accesses
=
use_textures_for_interpolation
)
base_pointer_spec
=
[[
'spatialInner0'
]]
base_pointer_info
=
{
f
.
name
:
parse_base_pointer_info
(
base_pointer_spec
,
[
2
,
1
,
0
],
f
.
spatial_dimensions
,
f
.
index_dimensions
)
...
...
@@ -86,8 +94,13 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde
return
ast
def
created_indexed_cuda_kernel
(
assignments
,
index_fields
,
function_name
=
"kernel"
,
type_info
=
None
,
coordinate_names
=
(
'x'
,
'y'
,
'z'
),
indexing_creator
=
BlockIndexing
):
def
created_indexed_cuda_kernel
(
assignments
,
index_fields
,
function_name
=
"kernel"
,
type_info
=
None
,
coordinate_names
=
(
'x'
,
'y'
,
'z'
),
indexing_creator
=
BlockIndexing
,
use_textures_for_interpolation
=
True
):
fields_read
,
fields_written
,
assignments
=
add_types
(
assignments
,
type_info
,
check_independence_condition
=
False
)
all_fields
=
fields_read
.
union
(
fields_written
)
read_only_fields
=
set
([
f
.
name
for
f
in
fields_read
-
fields_written
])
...
...
@@ -125,6 +138,8 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel
ast
=
KernelFunction
(
function_body
,
'gpu'
,
'gpucuda'
,
make_python_function
,
None
,
function_name
)
ast
.
global_variables
.
update
(
indexing
.
index_variables
)
implement_interpolations
(
ast
,
implement_by_texture_accesses
=
use_textures_for_interpolation
)
coord_mapping
=
indexing
.
coordinates
base_pointer_spec
=
[[
'spatialInner0'
]]
base_pointer_info
=
{
f
.
name
:
parse_base_pointer_info
(
base_pointer_spec
,
[
2
,
1
,
0
],
...
...
pystencils/gpucuda/texture_utils.py
0 → 100644
View file @
1c0665c4
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
from
os.path
import
dirname
,
isdir
,
join
import
numpy
as
np
try
:
import
pycuda.driver
as
cuda
from
pycuda
import
gpuarray
except
Exception
:
pass
def
pow_two_divider
(
n
):
if
n
==
0
:
return
0
divider
=
1
while
(
n
&
divider
)
==
0
:
divider
<<=
1
return
divider
def
ndarray_to_tex
(
tex_ref
,
ndarray
,
address_mode
=
None
,
filter_mode
=
None
,
use_normalized_coordinates
=
False
,
read_as_integer
=
False
):
if
address_mode
is
None
:
address_mode
=
cuda
.
address_mode
.
BORDER
if
filter_mode
is
None
:
filter_mode
=
cuda
.
filter_mode
.
LINEAR
if
isinstance
(
ndarray
,
np
.
ndarray
):
cu_array
=
cuda
.
np_to_array
(
ndarray
,
'C'
)
elif
isinstance
(
ndarray
,
gpuarray
.
GPUArray
):
cu_array
=
cuda
.
gpuarray_to_array
(
ndarray
,
'C'
)
else
:
raise
TypeError
(
'ndarray must be numpy.ndarray or pycuda.gpuarray.GPUArray'
)
cuda
.
TextureReference
.
set_array
(
tex_ref
,
cu_array
)
tex_ref
.
set_address_mode
(
0
,
address_mode
)
if
ndarray
.
ndim
>=
2
:
tex_ref
.
set_address_mode
(
1
,
address_mode
)
if
ndarray
.
ndim
>=
3
: