Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Jan Hönig
pystencils
Commits
62e2ed51
Commit
62e2ed51
authored
Nov 17, 2021
by
Markus Holzer
Committed by
Jan Hönig
Nov 17, 2021
Browse files
Remove interpolator
parent
43393627
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
doc/notebooks/05_tutorial_phasefield_spinodal_decomposition.ipynb
View file @
62e2ed51
This diff is collapsed.
Click to expand it.
doc/notebooks/demo_derivatives.ipynb
View file @
62e2ed51
This diff is collapsed.
Click to expand it.
pystencils/astnodes.py
View file @
62e2ed51
...
...
@@ -228,8 +228,7 @@ class KernelFunction(Node):
@
property
def
fields_accessed
(
self
)
->
Set
[
Field
]:
"""Set of Field instances: fields which are accessed inside this kernel function"""
from
pystencils.interpolation_astnodes
import
InterpolatorAccess
return
set
(
o
.
field
for
o
in
itertools
.
chain
(
self
.
atoms
(
ResolvedFieldAccess
),
self
.
atoms
(
InterpolatorAccess
)))
return
set
(
o
.
field
for
o
in
itertools
.
chain
(
self
.
atoms
(
ResolvedFieldAccess
)))
@
property
def
fields_written
(
self
)
->
Set
[
Field
]:
...
...
pystencils/backends/cuda_backend.py
View file @
62e2ed51
...
...
@@ -4,7 +4,6 @@ from pystencils.astnodes import Node
from
pystencils.backends.cbackend
import
CBackend
,
CustomSympyPrinter
,
generate_c
from
pystencils.enums
import
Backend
from
pystencils.fast_approximation
import
fast_division
,
fast_inv_sqrt
,
fast_sqrt
from
pystencils.interpolation_astnodes
import
DiffInterpolatorAccess
,
InterpolationMode
with
open
(
join
(
dirname
(
__file__
),
'cuda_known_functions.txt'
))
as
f
:
lines
=
f
.
readlines
()
...
...
@@ -76,30 +75,6 @@ class CudaSympyPrinter(CustomSympyPrinter):
super
(
CudaSympyPrinter
,
self
).
__init__
()
self
.
known_functions
.
update
(
CUDA_KNOWN_FUNCTIONS
)
def
_print_InterpolatorAccess
(
self
,
node
):
dtype
=
node
.
interpolator
.
field
.
dtype
.
numpy_dtype
if
type
(
node
)
==
DiffInterpolatorAccess
:
# cubicTex3D_1st_derivative_x(texture tex, float3 coord)
template
=
f
"cubicTex%iD_1st_derivative_
{
list
(
reversed
(
'xyz'
[
:
node
.
ndim
]))[
node
.
diff_coordinate_idx
]
}
(%s, %s)"
# noqa
elif
node
.
interpolator
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
:
template
=
"cubicTex%iDSimple(%s, %s)"
else
:
if
dtype
.
itemsize
>
4
:
# Use PyCuda hack!
# https://github.com/inducer/pycuda/blob/master/pycuda/cuda/pycuda-helpers.hpp
template
=
"fp_tex%iD(%s, %s)"
else
:
template
=
"tex%iD(%s, %s)"
code
=
template
%
(
node
.
interpolator
.
field
.
spatial_dimensions
,
str
(
node
.
interpolator
),
# + 0.5 comes from Nvidia's staggered indexing
', '
.
join
(
self
.
_print
(
o
+
0.5
)
for
o
in
reversed
(
node
.
offsets
))
)
return
code
def
_print_Function
(
self
,
expr
):
if
isinstance
(
expr
,
fast_division
):
assert
len
(
expr
.
args
)
==
2
,
f
"__fdividef has two arguments, but
{
len
(
expr
.
args
)
}
where given"
...
...
pystencils/cpu/kernelcreation.py
View file @
62e2ed51
...
...
@@ -11,9 +11,9 @@ from pystencils.cpu.cpujit import make_python_function
from
pystencils.data_types
import
StructType
,
TypedSymbol
,
create_type
from
pystencils.field
import
Field
,
FieldType
from
pystencils.transformations
import
(
add_types
,
filtered_tree_iteration
,
get_base_buffer_index
,
get_optimal_loop_ordering
,
implement_interpolations
,
make_loop_over_domain
,
move_constants_before_loop
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
split_inner_loop
)
add_types
,
filtered_tree_iteration
,
get_base_buffer_index
,
get_optimal_loop_ordering
,
make_loop_over_domain
,
move_constants_before_loop
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
split_inner_loop
)
AssignmentOrAstNodeList
=
List
[
Union
[
Assignment
,
ast
.
Node
]]
...
...
@@ -73,7 +73,6 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke
ghost_layers
=
ghost_layers
,
loop_order
=
loop_order
)
ast_node
=
KernelFunction
(
loop_node
,
Target
.
CPU
,
Backend
.
C
,
compile_function
=
make_python_function
,
ghost_layers
=
ghost_layer_info
,
function_name
=
function_name
,
assignments
=
assignments
)
implement_interpolations
(
body
)
if
split_groups
:
typed_split_groups
=
[[
type_symbol
(
s
)
for
s
in
split_group
]
for
split_group
in
split_groups
]
...
...
@@ -146,8 +145,6 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu
loop_body
=
Block
([])
loop_node
=
LoopOverCoordinate
(
loop_body
,
coordinate_to_loop_over
=
0
,
start
=
0
,
stop
=
index_fields
[
0
].
shape
[
0
])
implement_interpolations
(
loop_node
)
for
assignment
in
assignments
:
loop_body
.
append
(
assignment
)
...
...
pystencils/fd/finitedifferences.py
View file @
62e2ed51
...
...
@@ -105,9 +105,8 @@ class Discretization2ndOrder:
return
self
.
_discretize_advection
(
e
)
elif
isinstance
(
e
,
Diff
):
arg
,
*
indices
=
diff_args
(
e
)
from
pystencils.interpolation_astnodes
import
InterpolatorAccess
if
not
isinstance
(
arg
,
(
Field
.
Access
,
InterpolatorAccess
)
):
if
not
isinstance
(
arg
,
Field
.
Access
):
raise
ValueError
(
"Only derivatives with field or field accesses as arguments can be discretized"
)
return
self
.
spatial_stencil
(
indices
,
self
.
dx
,
arg
)
else
:
...
...
pystencils/gpucuda/cudajit.py
View file @
62e2ed51
...
...
@@ -4,9 +4,7 @@ from pystencils.backends.cbackend import get_headers
from
pystencils.backends.cuda_backend
import
generate_cuda
from
pystencils.data_types
import
StructType
from
pystencils.field
import
FieldType
from
pystencils.gpucuda.texture_utils
import
ndarray_to_tex
from
pystencils.include
import
get_pycuda_include_path
,
get_pystencils_include_path
from
pystencils.interpolation_astnodes
import
InterpolatorAccess
,
TextureCachedField
from
pystencils.kernel_wrapper
import
KernelWrapper
from
pystencils.kernelparameters
import
FieldPointerSymbol
...
...
@@ -47,29 +45,11 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
code
+=
"#define FUNC_PREFIX __global__
\n
"
code
+=
"#define RESTRICT __restrict__
\n\n
"
code
+=
str
(
generate_cuda
(
kernel_function_node
,
custom_backend
=
custom_backend
))
textures
=
set
(
d
.
interpolator
for
d
in
kernel_function_node
.
atoms
(
InterpolatorAccess
)
if
isinstance
(
d
.
interpolator
,
TextureCachedField
))
nvcc_options
=
[
"-w"
,
"-std=c++11"
,
"-Wno-deprecated-gpu-targets"
]
if
USE_FAST_MATH
:
nvcc_options
.
append
(
"-use_fast_math"
)
# Code for CubicInterpolationCUDA
from
pystencils.interpolation_astnodes
import
InterpolationMode
from
os.path
import
join
,
dirname
,
isdir
if
any
(
t
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
for
t
in
textures
):
assert
isdir
(
join
(
dirname
(
__file__
),
(
"CubicInterpolationCUDA"
,
"code"
)),
"Submodule CubicInterpolationCUDA does not exist.
\n
"
+
"Clone https://github.com/theHamsta/CubicInterpolationCUDA into pystencils.gpucuda"
)
nvcc_options
+=
[
"-I"
+
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
)]
nvcc_options
+=
[
"-I"
+
join
(
dirname
(
__file__
),
"CubicInterpolationCUDA"
,
"code"
,
"internal"
)]
needed_dims
=
set
(
t
.
field
.
spatial_dimensions
for
t
in
textures
if
t
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
)
for
i
in
needed_dims
:
code
=
'extern "C++" {
\n
#include "cubicTex%iD.cu"
\n
}
\n
'
%
i
+
code
mod
=
SourceModule
(
code
,
options
=
nvcc_options
,
include_dirs
=
[
get_pystencils_include_path
(),
get_pycuda_include_path
()])
func
=
mod
.
get_function
(
kernel_function_node
.
function_name
)
...
...
@@ -95,12 +75,6 @@ def make_python_function(kernel_function_node, argument_dict=None, custom_backen
block_and_thread_numbers
[
'block'
]
=
tuple
(
int
(
i
)
for
i
in
block_and_thread_numbers
[
'block'
])
block_and_thread_numbers
[
'grid'
]
=
tuple
(
int
(
i
)
for
i
in
block_and_thread_numbers
[
'grid'
])
# TODO: use texture objects:
# https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/
for
tex
in
textures
:
tex_ref
=
mod
.
get_texref
(
str
(
tex
))
ndarray_to_tex
(
tex_ref
,
full_arguments
[
tex
.
field
.
name
],
tex
.
address_mode
,
tex
.
filter_mode
,
tex
.
use_normalized_coordinates
,
tex
.
read_as_integer
)
args
=
_build_numpy_argument_list
(
parameters
,
full_arguments
)
cache
[
key
]
=
(
args
,
block_and_thread_numbers
)
cache_values
.
append
(
kwargs
)
# keep objects alive such that ids remain unique
...
...
pystencils/gpucuda/kernelcreation.py
View file @
62e2ed51
...
...
@@ -7,8 +7,8 @@ from pystencils.enums import Target, Backend
from
pystencils.gpucuda.cudajit
import
make_python_function
from
pystencils.gpucuda.indexing
import
BlockIndexing
from
pystencils.transformations
import
(
add_types
,
get_base_buffer_index
,
get_common_shape
,
implement_interpolations
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
unify_shape_symbols
)
add_types
,
get_base_buffer_index
,
get_common_shape
,
parse_base_pointer_info
,
resolve_buffer_accesses
,
resolve_field_accesses
,
unify_shape_symbols
)
def
create_cuda_kernel
(
assignments
,
...
...
@@ -17,8 +17,7 @@ def create_cuda_kernel(assignments,
indexing_creator
=
BlockIndexing
,
iteration_slice
=
None
,
ghost_layers
=
None
,
skip_independence_check
=
False
,
use_textures_for_interpolation
=
True
):
skip_independence_check
=
False
):
assert
assignments
,
"Assignments must not be empty!"
fields_read
,
fields_written
,
assignments
=
add_types
(
assignments
,
type_info
,
not
skip_independence_check
)
all_fields
=
fields_read
.
union
(
fields_written
)
...
...
@@ -74,8 +73,6 @@ def create_cuda_kernel(assignments,
assignments
=
assignments
)
ast
.
global_variables
.
update
(
indexing
.
index_variables
)
implement_interpolations
(
ast
,
implement_by_texture_accesses
=
use_textures_for_interpolation
)
base_pointer_spec
=
[[
'spatialInner0'
]]
base_pointer_info
=
{
f
.
name
:
parse_base_pointer_info
(
base_pointer_spec
,
[
2
,
1
,
0
],
f
.
spatial_dimensions
,
f
.
index_dimensions
)
...
...
@@ -110,8 +107,7 @@ def created_indexed_cuda_kernel(assignments,
function_name
=
"kernel"
,
type_info
=
None
,
coordinate_names
=
(
'x'
,
'y'
,
'z'
),
indexing_creator
=
BlockIndexing
,
use_textures_for_interpolation
=
True
):
indexing_creator
=
BlockIndexing
):
fields_read
,
fields_written
,
assignments
=
add_types
(
assignments
,
type_info
,
check_independence_condition
=
False
)
all_fields
=
fields_read
.
union
(
fields_written
)
read_only_fields
=
set
([
f
.
name
for
f
in
fields_read
-
fields_written
])
...
...
@@ -150,8 +146,6 @@ def created_indexed_cuda_kernel(assignments,
None
,
function_name
,
assignments
=
assignments
)
ast
.
global_variables
.
update
(
indexing
.
index_variables
)
implement_interpolations
(
ast
,
implement_by_texture_accesses
=
use_textures_for_interpolation
)
coord_mapping
=
indexing
.
coordinates
base_pointer_spec
=
[[
'spatialInner0'
]]
base_pointer_info
=
{
f
.
name
:
parse_base_pointer_info
(
base_pointer_spec
,
[
2
,
1
,
0
],
...
...
pystencils/interpolation_astnodes.py
deleted
100644 → 0
View file @
43393627
# -*- coding: utf-8 -*-
#
# Copyright © 2019 Stephan Seitz <stephan.seitz@fau.de>
#
# Distributed under terms of the GPLv3 license.
"""
"""
import
hashlib
import
itertools
from
enum
import
Enum
from
typing
import
Set
import
sympy
as
sp
from
sympy.core.cache
import
cacheit
import
pystencils
from
pystencils.astnodes
import
Node
from
pystencils.data_types
import
TypedSymbol
,
cast_func
,
create_type
try
:
import
pycuda.driver
except
Exception
:
pass
_hash
=
hashlib
.
md5
class
InterpolationMode
(
str
,
Enum
):
NEAREST_NEIGHBOR
=
"nearest_neighbour"
NN
=
NEAREST_NEIGHBOR
LINEAR
=
"linear"
CUBIC_SPLINE
=
"cubic_spline"
class
_InterpolationSymbol
(
TypedSymbol
):
def
__new__
(
cls
,
name
,
field
,
interpolator
):
obj
=
cls
.
__xnew_cached_
(
cls
,
name
,
field
,
interpolator
)
return
obj
def
__new_stage2__
(
cls
,
name
,
field
,
interpolator
):
obj
=
super
().
__xnew__
(
cls
,
name
,
'dummy_symbol_carrying_field'
+
field
.
name
)
obj
.
field
=
field
obj
.
interpolator
=
interpolator
return
obj
def
__getnewargs__
(
self
):
return
self
.
name
,
self
.
field
,
self
.
interpolator
def
__getnewargs_ex__
(
self
):
return
(
self
.
name
,
self
.
field
,
self
.
interpolator
),
{}
# noinspection SpellCheckingInspection
__xnew__
=
staticmethod
(
__new_stage2__
)
# noinspection SpellCheckingInspection
__xnew_cached_
=
staticmethod
(
cacheit
(
__new_stage2__
))
class
Interpolator
(
object
):
"""
Implements non-integer accesses on fields using linear interpolation.
On GPU, this interpolator can be implemented by a :class:`.TextureCachedField` for hardware acceleration.
Address modes are different boundary handlings possible choices are like for CUDA textures
**CLAMP**
The signal c[k] is continued outside k=0,...,M-1 so that c[k] = c[0] for k < 0, and c[k] = c[M-1] for k >= M.
**BORDER**
The signal c[k] is continued outside k=0,...,M-1 so that c[k] = 0 for k < 0and for k >= M.
Now, to describe the last two address modes, we are forced to consider normalized coordinates,
so that the 1D input signal samples are assumed to be c[k / M], with k=0,...,M-1.
**WRAP**
The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to M.
In other words, c[(k + p * M) / M] = c[k / M] for any (positive, negative or vanishing) integer p.
**MIRROR**
The signal c[k / M] is continued outside k=0,...,M-1 so that it is periodic with period equal to 2 * M - 2.
In other words, c[l / M] = c[k / M] for any l and k such that (l + k)mod(2 * M - 2) = 0.
Explanations from https://stackoverflow.com/questions/19020963/the-different-addressing-modes-of-cuda-textures
"""
required_global_declarations
=
[]
def
__init__
(
self
,
parent_field
,
interpolation_mode
:
InterpolationMode
,
address_mode
=
'BORDER'
,
use_normalized_coordinates
=
False
,
allow_textures
=
True
):
super
().
__init__
()
self
.
field
=
parent_field
self
.
field
.
field_type
=
pystencils
.
field
.
FieldType
.
CUSTOM
self
.
address_mode
=
address_mode
self
.
use_normalized_coordinates
=
use_normalized_coordinates
self
.
interpolation_mode
=
interpolation_mode
self
.
hash_str
=
hashlib
.
md5
(
f
'
{
self
.
field
}
_
{
address_mode
}
_
{
self
.
field
.
dtype
}
_
{
interpolation_mode
}
'
.
encode
()).
hexdigest
()
self
.
symbol
=
_InterpolationSymbol
(
str
(
self
),
parent_field
,
self
)
self
.
allow_textures
=
allow_textures
@
property
def
ndim
(
self
):
return
self
.
field
.
ndim
@
property
def
_hashable_contents
(
self
):
return
(
str
(
self
.
address_mode
),
str
(
type
(
self
)),
self
.
hash_str
,
self
.
use_normalized_coordinates
)
def
at
(
self
,
offset
):
return
InterpolatorAccess
(
self
.
symbol
,
*
[
sp
.
S
(
o
)
for
o
in
offset
])
def
__getitem__
(
self
,
offset
):
return
InterpolatorAccess
(
self
.
symbol
,
*
[
sp
.
S
(
o
)
for
o
in
offset
])
def
__str__
(
self
):
return
f
'
{
self
.
field
.
name
}
_interpolator_
{
self
.
reproducible_hash
}
'
def
__repr__
(
self
):
return
self
.
__str__
()
def
__hash__
(
self
):
return
hash
(
self
.
_hashable_contents
)
def
__eq__
(
self
,
other
):
return
hash
(
self
)
==
hash
(
other
)
@
property
def
reproducible_hash
(
self
):
return
_hash
(
str
(
self
.
_hashable_contents
).
encode
()).
hexdigest
()
class
LinearInterpolator
(
Interpolator
):
def
__init__
(
self
,
parent_field
:
pystencils
.
Field
,
address_mode
=
'BORDER'
,
use_normalized_coordinates
=
False
):
super
().
__init__
(
parent_field
,
InterpolationMode
.
LINEAR
,
address_mode
,
use_normalized_coordinates
)
class
NearestNeightborInterpolator
(
Interpolator
):
def
__init__
(
self
,
parent_field
:
pystencils
.
Field
,
address_mode
=
'BORDER'
,
use_normalized_coordinates
=
False
):
super
().
__init__
(
parent_field
,
InterpolationMode
.
NN
,
address_mode
,
use_normalized_coordinates
)
class
InterpolatorAccess
(
TypedSymbol
):
def
__new__
(
cls
,
field
,
*
offsets
):
obj
=
InterpolatorAccess
.
__xnew_cached_
(
cls
,
field
,
*
offsets
)
return
obj
def
__new_stage2__
(
cls
,
symbol
,
*
offsets
):
assert
offsets
is
not
None
obj
=
super
().
__xnew__
(
cls
,
'%s_interpolator_%s'
%
(
symbol
.
field
.
name
,
_hash
(
str
(
tuple
(
offsets
)).
encode
()).
hexdigest
()),
symbol
.
field
.
dtype
)
obj
.
offsets
=
offsets
obj
.
symbol
=
symbol
obj
.
field
=
symbol
.
field
obj
.
interpolator
=
symbol
.
interpolator
return
obj
def
_hashable_contents
(
self
):
return
super
().
_hashable_content
()
+
((
self
.
symbol
,
self
.
field
,
tuple
(
self
.
offsets
),
self
.
symbol
.
interpolator
))
def
__str__
(
self
):
return
f
"
{
self
.
field
.
name
}
_interpolator(
{
', '
.
join
(
str
(
o
)
for
o
in
self
.
offsets
)
}
)"
def
__repr__
(
self
):
return
self
.
__str__
()
def
_latex
(
self
,
printer
,
*
_
):
n
=
self
.
field
.
latex_name
if
self
.
field
.
latex_name
else
self
.
field
.
name
foo
=
", "
.
join
(
str
(
printer
.
doprint
(
o
))
for
o
in
self
.
offsets
)
return
f
'
{
n
}
_{{interpolator}}
\\
left(
{
foo
}
\\
right)'
@
property
def
ndim
(
self
):
return
len
(
self
.
offsets
)
@
property
def
is_texture
(
self
):
return
isinstance
(
self
.
interpolator
,
TextureCachedField
)
def
atoms
(
self
,
*
types
):
if
self
.
offsets
:
offsets
=
set
(
o
for
o
in
self
.
offsets
if
isinstance
(
o
,
types
))
if
isinstance
(
self
,
*
types
):
offsets
.
update
([
self
])
for
o
in
self
.
offsets
:
if
hasattr
(
o
,
'atoms'
):
offsets
.
update
(
set
(
o
.
atoms
(
*
types
)))
return
offsets
else
:
return
set
()
def
neighbor
(
self
,
coord_id
,
offset
):
offset_list
=
list
(
self
.
offsets
)
offset_list
[
coord_id
]
+=
offset
return
self
.
interpolator
.
at
(
tuple
(
offset_list
))
@
property
def
free_symbols
(
self
):
symbols
=
set
()
if
self
.
offsets
is
not
None
:
for
o
in
self
.
offsets
:
if
hasattr
(
o
,
'free_symbols'
):
symbols
.
update
(
set
(
o
.
free_symbols
))
# if hasattr(o, 'atoms'):
# symbols.update(set(o.atoms(sp.Symbol)))
return
symbols
@
property
def
required_global_declarations
(
self
):
required_global_declarations
=
self
.
symbol
.
interpolator
.
required_global_declarations
if
required_global_declarations
:
required_global_declarations
[
0
].
_symbols_defined
.
add
(
self
)
return
required_global_declarations
@
property
def
args
(
self
):
return
[
self
.
symbol
,
*
self
.
offsets
]
@
property
def
symbols_defined
(
self
)
->
Set
[
sp
.
Symbol
]:
return
{
self
}
@
property
def
interpolation_mode
(
self
):
return
self
.
interpolator
.
interpolation_mode
@
property
def
_diff_interpolation_vec
(
self
):
return
sp
.
Matrix
([
DiffInterpolatorAccess
(
self
.
symbol
,
i
,
*
self
.
offsets
)
for
i
in
range
(
len
(
self
.
offsets
))])
def
diff
(
self
,
*
symbols
,
**
kwargs
):
if
symbols
==
(
self
,):
return
1
rtn
=
self
.
_diff_interpolation_vec
.
T
*
sp
.
Matrix
(
self
.
offsets
).
diff
(
*
symbols
,
**
kwargs
)
if
rtn
.
shape
==
(
1
,
1
):
rtn
=
rtn
[
0
,
0
]
return
rtn
def
implementation_with_stencils
(
self
):
field
=
self
.
field
default_int_type
=
create_type
(
'int64'
)
use_textures
=
isinstance
(
self
.
interpolator
,
TextureCachedField
)
if
use_textures
:
def
absolute_access
(
x
,
_
):
return
self
.
symbol
.
interpolator
.
at
((
o
for
o
in
x
))
else
:
absolute_access
=
field
.
absolute_access
sum
=
[
0
,
]
*
(
field
.
shape
[
0
]
if
field
.
index_dimensions
else
1
)
offsets
=
self
.
offsets
rounding_functions
=
(
sp
.
floor
,
lambda
x
:
sp
.
floor
(
x
)
+
1
)
for
channel_idx
in
range
(
field
.
shape
[
0
]
if
field
.
index_dimensions
else
1
):
if
self
.
interpolation_mode
==
InterpolationMode
.
NN
:
if
use_textures
:
sum
[
channel_idx
]
=
self
else
:
sum
[
channel_idx
]
=
absolute_access
([
sp
.
floor
(
i
+
0.5
)
for
i
in
offsets
],
channel_idx
)
elif
self
.
interpolation_mode
==
InterpolationMode
.
LINEAR
:
# TODO optimization: implement via lerp: https://devblogs.nvidia.com/lerp-faster-cuda/
for
c
in
itertools
.
product
(
rounding_functions
,
repeat
=
field
.
spatial_dimensions
):
weight
=
sp
.
Mul
(
*
[
1
-
sp
.
Abs
(
f
(
offset
)
-
offset
)
for
(
f
,
offset
)
in
zip
(
c
,
offsets
)])
index
=
[
f
(
offset
)
for
(
f
,
offset
)
in
zip
(
c
,
offsets
)]
# Hardware boundary handling on GPU
if
use_textures
:
weight
=
sp
.
Mul
(
*
[
1
-
sp
.
Abs
(
f
(
offset
)
-
offset
)
for
(
f
,
offset
)
in
zip
(
c
,
offsets
)])
sum
[
channel_idx
]
+=
\
weight
*
absolute_access
(
index
,
channel_idx
if
field
.
index_dimensions
else
())
# else boundary handling using software
elif
str
(
self
.
interpolator
.
address_mode
).
lower
()
==
'border'
:
is_inside_field
=
sp
.
And
(
*
itertools
.
chain
([
i
>=
0
for
i
in
index
],
[
idx
<
field
.
shape
[
dim
]
for
(
dim
,
idx
)
in
enumerate
(
index
)]))
index
=
[
cast_func
(
i
,
default_int_type
)
for
i
in
index
]
sum
[
channel_idx
]
+=
sp
.
Piecewise
(
(
weight
*
absolute_access
(
index
,
channel_idx
if
field
.
index_dimensions
else
()),
is_inside_field
),
(
sp
.
simplify
(
0
),
True
)
)
elif
str
(
self
.
interpolator
.
address_mode
).
lower
()
==
'clamp'
:
index
=
[
sp
.
Min
(
sp
.
Max
(
0
,
cast_func
(
i
,
default_int_type
)),
field
.
spatial_shape
[
dim
]
-
1
)
for
(
dim
,
i
)
in
enumerate
(
index
)]
sum
[
channel_idx
]
+=
weight
*
\
absolute_access
(
index
,
channel_idx
if
field
.
index_dimensions
else
())
elif
str
(
self
.
interpolator
.
address_mode
).
lower
()
==
'wrap'
:
index
=
[
sp
.
Mod
(
cast_func
(
i
,
default_int_type
),
field
.
shape
[
dim
]
-
1
)
for
(
dim
,
i
)
in
enumerate
(
index
)]
index
=
[
cast_func
(
sp
.
Piecewise
((
i
,
i
>
0
),
(
sp
.
Abs
(
cast_func
(
field
.
shape
[
dim
]
-
1
+
i
,
default_int_type
)),
True
)),
default_int_type
)
for
(
dim
,
i
)
in
enumerate
(
index
)]
sum
[
channel_idx
]
+=
weight
*
\
absolute_access
(
index
,
channel_idx
if
field
.
index_dimensions
else
())
# sum[channel_idx] = 0
elif
str
(
self
.
interpolator
.
address_mode
).
lower
()
==
'mirror'
:
def
triangle_fun
(
x
,
half_period
):
saw_tooth
=
cast_func
(
sp
.
Abs
(
cast_func
(
x
,
'int32'
)),
'int32'
)
%
(
cast_func
(
2
*
half_period
,
create_type
(
'int32'
)))
return
sp
.
Piecewise
((
saw_tooth
,
saw_tooth
<
half_period
),
(
2
*
half_period
-
1
-
saw_tooth
,
True
))
index
=
[
cast_func
(
triangle_fun
(
i
,
field
.
shape
[
dim
]),
default_int_type
)
for
(
dim
,
i
)
in
enumerate
(
index
)]
sum
[
channel_idx
]
+=
weight
*
\
absolute_access
(
index
,
channel_idx
if
field
.
index_dimensions
else
())
else
:
raise
NotImplementedError
()
elif
self
.
interpolation_mode
==
InterpolationMode
.
CUBIC_SPLINE
:
raise
NotImplementedError
(
"only works with HW interpolation for float32"
)
sum
=
[
sp
.
factor
(
s
)
for
s
in
sum
]
if
field
.
index_dimensions
:
return
sp
.
Matrix
(
sum
)
else
:
return
sum
[
0
]
# noinspection SpellCheckingInspection
__xnew__
=
staticmethod
(
__new_stage2__
)
# noinspection SpellCheckingInspection
__xnew_cached_
=
staticmethod
(
cacheit
(
__new_stage2__
))
def
__getnewargs__
(
self
):
return
(
self
.
symbol
,
*
self
.
offsets
)
def
__getnewargs_ex__
(
self
):
return
(
self
.
symbol
,
*
self
.
offsets
),
{}
class
DiffInterpolatorAccess
(
InterpolatorAccess
):
def
__new__
(
cls
,
symbol
,
diff_coordinate_idx
,
*
offsets
):
if
symbol
.
interpolator
.
interpolation_mode
==
InterpolationMode
.
LINEAR
:
from
pystencils.fd
import
Diff
,
Discretization2ndOrder
return
Discretization2ndOrder
(
1
)(
Diff
(
symbol
.
interpolator
.
at
(
offsets
),
diff_coordinate_idx
))
obj
=
DiffInterpolatorAccess
.
__xnew_cached_
(
cls
,
symbol
,
diff_coordinate_idx
,
*
offsets
)
return
obj
def
__new_stage2__
(
self
,
symbol
:
sp
.
Symbol
,
diff_coordinate_idx
,
*
offsets
):
assert
offsets
is
not
None
obj
=
super
().
__xnew__
(
self
,
symbol
,
*
offsets
)
obj
.
diff_coordinate_idx
=
diff_coordinate_idx
return
obj
def
__hash__
(
self
):
return
hash
((
self
.
symbol
,
self
.
field
,
self
.
diff_coordinate_idx
,
tuple
(
self
.
offsets
),
self
.
interpolator
))
def
__str__
(
self
):
return
'%s_diff%i_interpolator(%s)'
%
(
self
.
field
.
name
,
self
.
diff_coordinate_idx
,
', '
.
join
(
str
(
o
)
for
o
in
self
.
offsets
))
def
__repr__
(
self
):
return
str
(
self
)
@
property
def
args
(
self
):
return
[
self
.
symbol
,
self
.
diff_coordinate_idx
,
*
self
.
offsets
]
@
property