Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Alexander Reinauer
pystencils
Commits
8cf7aa27
Commit
8cf7aa27
authored
Jan 23, 2020
by
Michael Kuron
Browse files
Merge branch 'master' of i10git.cs.fau.de:pycodegen/pystencils into oldsympy
parents
eb1c6ea6
1cabb407
Changes
14
Hide whitespace changes
Inline
Side-by-side
pystencils/backends/cbackend.py
View file @
8cf7aa27
...
...
@@ -10,11 +10,12 @@ from sympy.printing.ccode import C89CodePrinter
from
pystencils.astnodes
import
KernelFunction
,
Node
from
pystencils.cpu.vectorization
import
vec_all
,
vec_any
from
pystencils.data_types
import
(
PointerType
,
VectorType
,
address_of
,
cast_func
,
create_type
,
get_type_of_expression
,
reinterpret_cast_func
,
vector_memory_access
)
PointerType
,
VectorType
,
address_of
,
cast_func
,
create_type
,
get_type_of_expression
,
reinterpret_cast_func
,
vector_memory_access
)
from
pystencils.fast_approximation
import
fast_division
,
fast_inv_sqrt
,
fast_sqrt
from
pystencils.integer_functions
import
(
bit_shift_left
,
bit_shift_right
,
bitwise_and
,
bitwise_or
,
bitwise_xor
,
int_div
,
int_power_of_2
,
modulo_ceil
)
bit_shift_left
,
bit_shift_right
,
bitwise_and
,
bitwise_or
,
bitwise_xor
,
int_div
,
int_power_of_2
,
modulo_ceil
)
try
:
from
sympy.printing.ccode
import
C99CodePrinter
as
CCodePrinter
...
...
@@ -91,7 +92,7 @@ def get_global_declarations(ast):
visit_node
(
ast
)
return
sorted
(
set
(
global_declarations
),
key
=
lambda
x
:
str
(
x
)
)
return
sorted
(
set
(
global_declarations
),
key
=
str
)
def
get_headers
(
ast_node
:
Node
)
->
Set
[
str
]:
...
...
pystencils/backends/cuda_backend.py
View file @
8cf7aa27
...
...
@@ -3,7 +3,7 @@ from os.path import dirname, join
from
pystencils.astnodes
import
Node
from
pystencils.backends.cbackend
import
CBackend
,
CustomSympyPrinter
,
generate_c
from
pystencils.fast_approximation
import
fast_division
,
fast_inv_sqrt
,
fast_sqrt
from
pystencils.interpolation_astnodes
import
InterpolationMode
from
pystencils.interpolation_astnodes
import
DiffInterpolatorAccess
,
InterpolationMode
with
open
(
join
(
dirname
(
__file__
),
'cuda_known_functions.txt'
))
as
f
:
lines
=
f
.
readlines
()
...
...
@@ -70,10 +70,13 @@ class CudaSympyPrinter(CustomSympyPrinter):
super
(
CudaSympyPrinter
,
self
).
__init__
()
self
.
known_functions
.
update
(
CUDA_KNOWN_FUNCTIONS
)
def
_print_
Texture
Access
(
self
,
node
):
dtype
=
node
.
texture
.
field
.
dtype
.
numpy_dtype
def
_print_
Interpolator
Access
(
self
,
node
):
dtype
=
node
.
interpolator
.
field
.
dtype
.
numpy_dtype
if
node
.
texture
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
:
if
type
(
node
)
==
DiffInterpolatorAccess
:
# cubicTex3D_1st_derivative_x(texture tex, float3 coord)
template
=
f
"cubicTex%iD_1st_derivative_
{
list
(
reversed
(
'xyz'
[
:
node
.
ndim
]))[
node
.
diff_coordinate_idx
]
}
(%s, %s)"
# noqa
elif
node
.
interpolator
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
:
template
=
"cubicTex%iDSimple(%s, %s)"
else
:
if
dtype
.
itemsize
>
4
:
...
...
@@ -84,8 +87,8 @@ class CudaSympyPrinter(CustomSympyPrinter):
template
=
"tex%iD(%s, %s)"
code
=
template
%
(
node
.
texture
.
field
.
spatial_dimensions
,
str
(
node
.
texture
),
node
.
interpolator
.
field
.
spatial_dimensions
,
str
(
node
.
interpolator
),
# + 0.5 comes from Nvidia's staggered indexing
', '
.
join
(
self
.
_print
(
o
+
0.5
)
for
o
in
reversed
(
node
.
offsets
))
)
...
...
pystencils/fd/derivative.py
View file @
8cf7aa27
...
...
@@ -111,6 +111,16 @@ class Diff(sp.Expr):
def
__str__
(
self
):
return
"D(%s)"
%
self
.
arg
def
interpolated_access
(
self
,
offset
,
**
kwargs
):
"""Represents an interpolated access on a spatially differentiated field
Args:
offset (Tuple[sympy.Expr]): Absolute position to determine the value of the spatial derivative
"""
from
pystencils.interpolation_astnodes
import
DiffInterpolatorAccess
assert
isinstance
(
self
.
arg
.
field
,
Field
),
"Must be field to enable interpolated accesses"
return
DiffInterpolatorAccess
(
self
.
arg
.
field
.
interpolated_access
(
offset
,
**
kwargs
).
symbol
,
self
.
target
,
*
offset
)
class
DiffOperator
(
sp
.
Expr
):
"""Un-applied differential, i.e. differential operator
...
...
pystencils/fd/finitedifferences.py
View file @
8cf7aa27
...
...
@@ -109,7 +109,9 @@ class Discretization2ndOrder:
return
self
.
_discretize_advection
(
e
)
elif
isinstance
(
e
,
Diff
):
arg
,
*
indices
=
diff_args
(
e
)
if
not
isinstance
(
arg
,
Field
.
Access
):
from
pystencils.interpolation_astnodes
import
InterpolatorAccess
if
not
isinstance
(
arg
,
(
Field
.
Access
,
InterpolatorAccess
)):
raise
ValueError
(
"Only derivatives with field or field accesses as arguments can be discretized"
)
return
self
.
spatial_stencil
(
indices
,
self
.
dx
,
arg
)
else
:
...
...
pystencils/field.py
View file @
8cf7aa27
...
...
@@ -332,7 +332,7 @@ class Field(AbstractField):
self
.
latex_name
:
Optional
[
str
]
=
None
self
.
coordinate_origin
:
tuple
[
float
,
sp
.
Symbol
]
=
sp
.
Matrix
(
tuple
(
0
for
_
in
range
(
self
.
spatial_dimensions
)
))
# type
))
self
.
coordinate_transform
=
sp
.
eye
(
self
.
spatial_dimensions
)
if
field_type
==
FieldType
.
STAGGERED
:
assert
self
.
staggered_stencil
...
...
pystencils/gpucuda/cudajit.py
View file @
8cf7aa27
...
...
@@ -5,13 +5,20 @@ from pystencils.data_types import StructType
from
pystencils.field
import
FieldType
from
pystencils.gpucuda.texture_utils
import
ndarray_to_tex
from
pystencils.include
import
get_pycuda_include_path
,
get_pystencils_include_path
from
pystencils.interpolation_astnodes
import
TextureAccess
from
pystencils.interpolation_astnodes
import
InterpolatorAccess
,
TextureCachedField
from
pystencils.kernel_wrapper
import
KernelWrapper
from
pystencils.kernelparameters
import
FieldPointerSymbol
USE_FAST_MATH
=
True
def
get_cubic_interpolation_include_paths
():
from
os.path
import
join
,
dirname
return
[
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
),
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
,
"internal"
)]
def
make_python_function
(
kernel_function_node
,
argument_dict
=
None
,
custom_backend
=
None
):
"""
Creates a kernel function from an abstract syntax tree which
...
...
@@ -39,23 +46,28 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
code
+=
"#define FUNC_PREFIX __global__
\n
"
code
+=
"#define RESTRICT __restrict__
\n\n
"
code
+=
str
(
generate_c
(
kernel_function_node
,
dialect
=
'cuda'
,
custom_backend
=
custom_backend
))
textures
=
set
(
d
.
texture
for
d
in
kernel_function_node
.
atoms
(
TextureAccess
))
textures
=
set
(
d
.
interpolator
for
d
in
kernel_function_node
.
atoms
(
InterpolatorAccess
)
if
isinstance
(
d
.
interpolator
,
TextureCachedField
))
nvcc_options
=
[
"-w"
,
"-std=c++11"
,
"-Wno-deprecated-gpu-targets"
]
if
USE_FAST_MATH
:
nvcc_options
.
append
(
"-use_fast_math"
)
# Code for
# if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures):
# assert isdir(join(dirname(__file__), "CubicInterpolationCUDA", "code")), \
# "Submodule CubicInterpolationCUDA does not exist"
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")]
# nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")]
# needed_dims = set(t.field.spatial_dimensions for t in textures
# if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE)
# for i in needed_dims:
# code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code
# Code for CubicInterpolationCUDA
from
pystencils.interpolation_astnodes
import
InterpolationMode
from
os.path
import
join
,
dirname
,
isdir
if
any
(
t
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
for
t
in
textures
):
assert
isdir
(
join
(
dirname
(
__file__
),
(
"CubicInterpolationCUDA"
,
"code"
)),
"Submodule CubicInterpolationCUDA does not exist.
\n
"
+
"Clone https://github.com/theHamsta/CubicInterpolationCUDA into pystencils.gpucuda"
)
nvcc_options
+=
[
"-I"
+
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
)]
nvcc_options
+=
[
"-I"
+
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
,
"internal"
)]
needed_dims
=
set
(
t
.
field
.
spatial_dimensions
for
t
in
textures
if
t
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
)
for
i
in
needed_dims
:
code
=
'extern "C++" {
\n
#include "cubicTex%iD.cu"
\n
}
\n
'
%
i
+
code
mod
=
SourceModule
(
code
,
options
=
nvcc_options
,
include_dirs
=
[
get_pystencils_include_path
(),
get_pycuda_include_path
()])
...
...
pystencils/gpucuda/texture_utils.py
View file @
8cf7aa27
...
...
@@ -15,6 +15,7 @@ import numpy as np
try
:
import
pycuda.driver
as
cuda
from
pycuda
import
gpuarray
import
pycuda
except
Exception
:
pass
...
...
@@ -35,6 +36,8 @@ def ndarray_to_tex(tex_ref,
use_normalized_coordinates
=
False
,
read_as_integer
=
False
):
if
isinstance
(
address_mode
,
str
):
address_mode
=
getattr
(
pycuda
.
driver
.
address_mode
,
address_mode
.
upper
())
if
address_mode
is
None
:
address_mode
=
cuda
.
address_mode
.
BORDER
if
filter_mode
is
None
:
...
...
pystencils/interpolation_astnodes.py
View file @
8cf7aa27
...
...
@@ -35,6 +35,27 @@ class InterpolationMode(str, Enum):
CUBIC_SPLINE
=
"cubic_spline"
class
_InterpolationSymbol
(
TypedSymbol
):
def
__new__
(
cls
,
name
,
field
,
interpolator
):
obj
=
cls
.
__xnew_cached_
(
cls
,
name
,
field
,
interpolator
)
return
obj
def
__new_stage2__
(
cls
,
name
,
field
,
interpolator
):
obj
=
super
().
__xnew__
(
cls
,
name
,
'dummy_symbol_carrying_field'
+
field
.
name
)
obj
.
field
=
field
obj
.
interpolator
=
interpolator
return
obj
def
__getnewargs__
(
self
):
return
self
.
name
,
self
.
field
,
self
.
interpolator
# noinspection SpellCheckingInspection
__xnew__
=
staticmethod
(
__new_stage2__
)
# noinspection SpellCheckingInspection
__xnew_cached_
=
staticmethod
(
cacheit
(
__new_stage2__
))
class
Interpolator
(
object
):
"""
Implements non-integer accesses on fields using linear interpolation.
...
...
@@ -77,24 +98,25 @@ class Interpolator(object):
allow_textures
=
True
):
super
().
__init__
()
self
.
field
=
parent_field
.
new_field_with_different_name
(
parent_field
.
name
)
self
.
field
=
parent_field
self
.
field
.
field_type
=
pystencils
.
field
.
FieldType
.
CUSTOM
self
.
address_mode
=
address_mode
self
.
use_normalized_coordinates
=
use_normalized_coordinates
hash_str
=
"%x"
%
abs
(
hash
(
self
.
field
)
+
hash
(
address_mode
))
self
.
symbol
=
TypedSymbol
(
'dummy_symbol_carrying_field'
+
self
.
field
.
name
+
hash_str
,
'dummy_symbol_carrying_field'
+
self
.
field
.
name
+
hash_str
)
self
.
symbol
.
field
=
self
.
field
self
.
symbol
.
interpolator
=
self
self
.
allow_textures
=
allow_textures
self
.
interpolation_mode
=
interpolation_mode
self
.
hash_str
=
hashlib
.
md5
(
f
'
{
self
.
field
}
_
{
address_mode
}
_
{
self
.
field
.
dtype
}
_
{
interpolation_mode
}
'
.
encode
()).
hexdigest
()
self
.
symbol
=
_InterpolationSymbol
(
str
(
self
),
parent_field
,
self
)
self
.
allow_textures
=
allow_textures
@
property
def
ndim
(
self
):
return
self
.
field
.
ndim
@
property
def
_hashable_contents
(
self
):
return
(
str
(
self
.
address_mode
),
str
(
type
(
self
)),
self
.
symbol
,
self
.
address_mode
,
self
.
hash_str
,
self
.
use_normalized_coordinates
)
def
at
(
self
,
offset
):
...
...
@@ -112,6 +134,9 @@ class Interpolator(object):
def
__hash__
(
self
):
return
hash
(
self
.
_hashable_contents
)
def
__eq__
(
self
,
other
):
return
hash
(
self
)
==
hash
(
other
)
@
property
def
reproducible_hash
(
self
):
return
_hash
(
str
(
self
.
_hashable_contents
).
encode
()).
hexdigest
()
...
...
@@ -142,29 +167,43 @@ class NearestNeightborInterpolator(Interpolator):
class
InterpolatorAccess
(
TypedSymbol
):
def
__new__
(
cls
,
field
,
offsets
,
*
args
,
**
kwargs
):
obj
=
Texture
Access
.
__xnew_cached_
(
cls
,
field
,
offsets
,
*
args
,
**
kwargs
)
def
__new__
(
cls
,
field
,
*
offsets
,
**
kwargs
):
obj
=
Interpolator
Access
.
__xnew_cached_
(
cls
,
field
,
*
offsets
,
**
kwargs
)
return
obj
def
__new_stage2__
(
self
,
symbol
,
*
offsets
):
def
__new_stage2__
(
cls
,
symbol
,
*
offsets
):
assert
offsets
is
not
None
obj
=
super
().
__xnew__
(
self
,
'%s_interpolator_%x'
%
(
symbol
.
field
.
name
,
abs
(
hash
(
tuple
(
offsets
)))),
symbol
.
field
.
dtype
)
obj
=
super
().
__xnew__
(
cls
,
'%s_interpolator_%s'
%
(
symbol
.
field
.
name
,
_hash
(
str
(
tuple
(
offsets
)).
encode
()).
hexdigest
()),
symbol
.
field
.
dtype
)
obj
.
offsets
=
offsets
obj
.
symbol
=
symbol
obj
.
field
=
symbol
.
field
obj
.
interpolator
=
symbol
.
interpolator
return
obj
def
_
_hash
__
(
self
):
return
hash
((
self
.
symbol
,
self
.
field
,
tuple
(
self
.
offsets
),
self
.
interpolator
))
def
_hash
able_contents
(
self
):
return
super
().
_hashable_content
()
+
((
self
.
symbol
,
self
.
field
,
tuple
(
self
.
offsets
),
self
.
symbol
.
interpolator
))
def
__str__
(
self
):
return
'%s_interpolator(%s)'
%
(
self
.
field
.
name
,
','
.
join
(
str
(
o
)
for
o
in
self
.
offsets
))
return
'%s_interpolator(%s)'
%
(
self
.
field
.
name
,
',
'
.
join
(
str
(
o
)
for
o
in
self
.
offsets
))
def
__repr__
(
self
):
return
self
.
__str__
()
def
_latex
(
self
,
printer
,
*
_
):
n
=
self
.
field
.
latex_name
if
self
.
field
.
latex_name
else
self
.
field
.
name
foo
=
", "
.
join
(
str
(
printer
.
doprint
(
o
))
for
o
in
self
.
offsets
)
return
f
'
{
n
}
_{{interpolator}}
\\
left(
{
foo
}
\\
right)'
@
property
def
ndim
(
self
):
return
len
(
self
.
offsets
)
@
property
def
is_texture
(
self
):
return
isinstance
(
self
.
interpolator
,
TextureCachedField
)
def
atoms
(
self
,
*
types
):
if
self
.
offsets
:
offsets
=
set
(
o
for
o
in
self
.
offsets
if
isinstance
(
o
,
types
))
...
...
@@ -177,6 +216,11 @@ class InterpolatorAccess(TypedSymbol):
else
:
return
set
()
def
neighbor
(
self
,
coord_id
,
offset
):
offset_list
=
list
(
self
.
offsets
)
offset_list
[
coord_id
]
+=
offset
return
self
.
interpolator
.
at
(
tuple
(
offset_list
))
@
property
def
free_symbols
(
self
):
symbols
=
set
()
...
...
@@ -189,6 +233,13 @@ class InterpolatorAccess(TypedSymbol):
return
symbols
@
property
def
required_global_declarations
(
self
):
required_global_declarations
=
self
.
symbol
.
interpolator
.
required_global_declarations
if
required_global_declarations
:
required_global_declarations
[
0
].
_symbols_defined
.
add
(
self
)
return
required_global_declarations
@
property
def
args
(
self
):
return
[
self
.
symbol
,
*
self
.
offsets
]
...
...
@@ -201,14 +252,27 @@ class InterpolatorAccess(TypedSymbol):
def
interpolation_mode
(
self
):
return
self
.
interpolator
.
interpolation_mode
@
property
def
_diff_interpolation_vec
(
self
):
return
sp
.
Matrix
([
DiffInterpolatorAccess
(
self
.
symbol
,
i
,
*
self
.
offsets
)
for
i
in
range
(
len
(
self
.
offsets
))])
def
diff
(
self
,
*
symbols
,
**
kwargs
):
if
symbols
==
(
self
,):
return
1
rtn
=
self
.
_diff_interpolation_vec
.
T
*
sp
.
Matrix
(
self
.
offsets
).
diff
(
*
symbols
,
**
kwargs
)
if
rtn
.
shape
==
(
1
,
1
):
rtn
=
rtn
[
0
,
0
]
return
rtn
def
implementation_with_stencils
(
self
):
field
=
self
.
field
default_int_type
=
create_type
(
'int64'
)
use_textures
=
isinstance
(
self
,
TextureAccess
)
use_textures
=
isinstance
(
self
.
interpolator
,
TextureCachedField
)
if
use_textures
:
def
absolute_access
(
x
,
_
):
return
self
.
texture
.
at
((
o
for
o
in
x
))
return
self
.
symbol
.
interpolator
.
at
((
o
for
o
in
x
))
else
:
absolute_access
=
field
.
absolute_access
...
...
@@ -255,7 +319,7 @@ class InterpolatorAccess(TypedSymbol):
for
(
dim
,
i
)
in
enumerate
(
index
)]
index
=
[
cast_func
(
sp
.
Piecewise
((
i
,
i
>
0
),
(
sp
.
Abs
(
cast_func
(
field
.
shape
[
dim
]
-
1
+
i
,
default_int_type
)),
True
)),
default_int_type
)
True
)),
default_int_type
)
for
(
dim
,
i
)
in
enumerate
(
index
)]
sum
[
channel_idx
]
+=
weight
*
\
absolute_access
(
index
,
channel_idx
if
field
.
index_dimensions
else
())
...
...
@@ -287,12 +351,61 @@ class InterpolatorAccess(TypedSymbol):
# noinspection SpellCheckingInspection
__xnew_cached_
=
staticmethod
(
cacheit
(
__new_stage2__
))
def
__getnewargs__
(
self
):
return
tuple
(
self
.
symbol
,
*
self
.
offsets
)
class
DiffInterpolatorAccess
(
InterpolatorAccess
):
def
__new__
(
cls
,
symbol
,
diff_coordinate_idx
,
*
offsets
,
**
kwargs
):
if
symbol
.
interpolator
.
interpolation_mode
==
InterpolationMode
.
LINEAR
:
from
pystencils.fd
import
Diff
,
Discretization2ndOrder
return
Discretization2ndOrder
(
1
)(
Diff
(
symbol
.
interpolator
.
at
(
offsets
),
diff_coordinate_idx
))
obj
=
DiffInterpolatorAccess
.
__xnew_cached_
(
cls
,
symbol
,
diff_coordinate_idx
,
*
offsets
,
**
kwargs
)
return
obj
def
__new_stage2__
(
self
,
symbol
:
sp
.
Symbol
,
diff_coordinate_idx
,
*
offsets
):
assert
offsets
is
not
None
obj
=
super
().
__xnew__
(
self
,
symbol
,
*
offsets
)
obj
.
diff_coordinate_idx
=
diff_coordinate_idx
return
obj
def
__hash__
(
self
):
return
hash
((
self
.
symbol
,
self
.
field
,
self
.
diff_coordinate_idx
,
tuple
(
self
.
offsets
),
self
.
interpolator
))
def
__str__
(
self
):
return
'%s_diff%i_interpolator(%s)'
%
(
self
.
field
.
name
,
self
.
diff_coordinate_idx
,
', '
.
join
(
str
(
o
)
for
o
in
self
.
offsets
))
def
__repr__
(
self
):
return
str
(
self
)
@
property
def
args
(
self
):
return
[
self
.
symbol
,
self
.
diff_coordinate_idx
,
*
self
.
offsets
]
@
property
def
symbols_defined
(
self
)
->
Set
[
sp
.
Symbol
]:
return
{
self
}
@
property
def
interpolation_mode
(
self
):
return
self
.
interpolator
.
interpolation_mode
# noinspection SpellCheckingInspection
__xnew__
=
staticmethod
(
__new_stage2__
)
# noinspection SpellCheckingInspection
__xnew_cached_
=
staticmethod
(
cacheit
(
__new_stage2__
))
def
__getnewargs__
(
self
):
return
tuple
(
self
.
symbol
,
self
.
diff_coordinate_idx
,
*
self
.
offsets
)
##########################################################################################
# GPU-specific fast specializations (for precision GPUs can also use above nodes/symbols #
##########################################################################################
class
TextureCachedField
:
class
TextureCachedField
(
Interpolator
)
:
def
__init__
(
self
,
parent_field
,
address_mode
=
None
,
...
...
@@ -301,28 +414,19 @@ class TextureCachedField:
use_normalized_coordinates
=
False
,
read_as_integer
=
False
):
if
isinstance
(
address_mode
,
str
):
address_mode
=
getattr
(
pycuda
.
driver
.
address_mode
,
address_mode
.
upper
())
super
().
__init__
(
parent_field
,
interpolation_mode
,
address_mode
,
use_normalized_coordinates
)
if
address_mode
is
None
:
address_mode
=
pycuda
.
driver
.
address_mode
.
BORDER
address_mode
=
'border'
if
filter_mode
is
None
:
filter_mode
=
pycuda
.
driver
.
filter_mode
.
LINEAR
# self, field_name, field_type, dtype, layout, shape, strides
self
.
field
=
parent_field
self
.
address_mode
=
address_mode
self
.
filter_mode
=
filter_mode
self
.
read_as_integer
=
read_as_integer
self
.
use_normalized_coordinates
=
use_normalized_coordinates
self
.
interpolation_mode
=
interpolation_mode
self
.
symbol
=
TypedSymbol
(
str
(
self
),
self
.
field
.
dtype
.
numpy_dtype
)
self
.
symbol
.
interpolator
=
self
self
.
symbol
.
field
=
self
.
field
self
.
required_global_declarations
=
[
TextureDeclaration
(
self
)]
# assert str(self.field.dtype) != 'double', "CUDA does not support double textures!"
# assert dtype_supports_textures(self.field.dtype), "CUDA only supports texture types with 32 bits or less"
@
property
def
ndim
(
self
):
return
self
.
field
.
ndim
@
classmethod
def
from_interpolator
(
cls
,
interpolator
:
LinearInterpolator
):
...
...
@@ -332,59 +436,17 @@ class TextureCachedField:
obj
=
cls
(
interpolator
.
field
,
interpolator
.
address_mode
,
interpolation_mode
=
interpolator
.
interpolation_mode
)
return
obj
def
at
(
self
,
offset
):
return
TextureAccess
(
self
.
symbol
,
*
offset
)
def
__getitem__
(
self
,
offset
):
return
TextureAccess
(
self
.
symbol
,
*
offset
)
def
__str__
(
self
):
return
'%s_texture_%s'
%
(
self
.
field
.
name
,
self
.
reproducible_hash
)
def
__repr__
(
self
):
return
self
.
__str__
()
@
property
def
_hashable_contents
(
self
):
return
(
type
(
self
),
self
.
address_mode
,
self
.
filter_mode
,
self
.
read_as_integer
,
self
.
interpolation_mode
,
self
.
use_normalized_coordinates
)
def
__hash__
(
self
):
return
hash
(
self
.
_hashable_contents
)
@
property
def
reproducible_hash
(
self
):
return
_hash
(
str
(
self
.
_hashable_contents
).
encode
()).
hexdigest
()
class
TextureAccess
(
InterpolatorAccess
):
def
__new__
(
cls
,
texture_symbol
,
offsets
,
*
args
,
**
kwargs
):
obj
=
TextureAccess
.
__xnew_cached_
(
cls
,
texture_symbol
,
offsets
,
*
args
,
**
kwargs
)
return
obj
def
__new_stage2__
(
self
,
symbol
,
*
offsets
):
obj
=
super
().
__xnew__
(
self
,
symbol
,
*
offsets
)
obj
.
required_global_declarations
=
symbol
.
interpolator
.
required_global_declarations
obj
.
required_global_declarations
[
0
].
_symbols_defined
.
add
(
obj
)
return
obj
def
__str__
(
self
):
return
'%s_texture(%s)'
%
(
self
.
interpolator
.
field
.
name
,
','
.
join
(
str
(
o
)
for
o
in
self
.
offsets
))
@
property
def
texture
(
self
):
return
self
.
interpolator
# noinspection SpellCheckingInspection
__xnew__
=
staticmethod
(
__new_stage2__
)
# noinspection SpellCheckingInspection
__xnew_cached_
=
staticmethod
(
cacheit
(
__new_stage2__
))
class
TextureDeclaration
(
Node
):
"""
A global declaration of a texture. Visible both for device and host code.
...
...
@@ -421,12 +483,18 @@ class TextureDeclaration(Node):
@
property
def
headers
(
self
):
return
[
'"pycuda-helpers.hpp"'
]
headers
=
[
'"pycuda-helpers.hpp"'
]
if
self
.
texture
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
:
headers
.
append
(
'"cubicTex%iD.cu"'
%
self
.
texture
.
ndim
)
return
headers
def
__str__
(
self
):
from
pystencils.backends.cuda_backend
import
CudaBackend
return
CudaBackend
()(
self
)
def
__repr__
(
self
):
return
str
(
self
)
class
TextureObject
(
TextureDeclaration
):
"""
...
...
pystencils/math_optimizations.py
View file @
8cf7aa27
...
...
@@ -44,3 +44,13 @@ def optimize_assignments(assignments, optimizations):
a
.
optimize
(
optimizations
)
return
assignments
def
optimize_ast
(
ast
,
optimizations
):
if
HAS_REWRITING
:
assignments_nodes
=
ast
.
atoms
(
SympyAssignment
)
for
a
in
assignments_nodes
:
a
.
optimize
(
optimizations
)
return
ast
pystencils/simp/assignment_collection.py
View file @
8cf7aa27
...
...
@@ -156,6 +156,9 @@ class AssignmentCollection:
"""See :func:`count_operations` """
return
count_operations
(
self
.
all_assignments
,
only_type
=
None
)
def
atoms
(
self
,
*
args
):
return
set
().
union
(
*
[
a
.
atoms
(
*
args
)
for
a
in
self
.
all_assignments
])
def
dependent_symbols
(
self
,
symbols
:
Iterable
[
sp
.
Symbol
])
->
Set
[
sp
.
Symbol
]:
"""Returns all symbols that depend on one of the passed symbols.
...
...
pystencils/transformations.py
View file @
8cf7aa27
...
...
@@ -14,8 +14,8 @@ import pystencils.astnodes as ast
import
pystencils.integer_functions
from
pystencils.assignment
import
Assignment
from
pystencils.data_types
import
(
PointerType
,
StructType
,
TypedImaginaryUnit
,
TypedSymbol
,
cast_func
,
collate_types
,
create_type
,
get_base_type
,
get_type_of_expression
,
pointer_arithmetic_func
,
reinterpret_cast_func
)
PointerType
,
StructType
,
TypedImaginaryUnit
,
TypedSymbol
,
cast_func
,
collate_types
,
create_type
,
get_base_type
,
get_type_of_expression
,
pointer_arithmetic_func
,
reinterpret_cast_func
)
from
pystencils.field
import
AbstractField
,
Field
,
FieldType
from
pystencils.kernelparameters
import
FieldPointerSymbol
from
pystencils.simp.assignment_collection
import
AssignmentCollection
...
...
@@ -1314,7 +1314,7 @@ def implement_interpolations(ast_node: ast.Node,
implement_by_texture_accesses
:
bool
=
False
,
vectorize
:
bool
=
False
,
use_hardware_interpolation_for_f32
=
True
):
from
pystencils.interpolation_astnodes
import
InterpolatorAccess
,
TextureAccess
,
TextureCachedField
from
pystencils.interpolation_astnodes
import
(
InterpolatorAccess
,
TextureCachedField
)
# TODO: perform this function on assignments, when unify_shape_symbols allows differently sized fields
assert
not
(
implement_by_texture_accesses
and
vectorize
),
\
...
...
@@ -1322,37 +1322,42 @@ def implement_interpolations(ast_node: ast.Node,
FLOAT32_T
=
create_type
(
'float32'
)
interpolation_accesses
=
ast_node
.
atoms
(
InterpolatorAccess
)