Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Stephan Seitz
pystencils
Commits
a3f37cbd
Commit
a3f37cbd
authored
Aug 10, 2020
by
Markus Holzer
Browse files
Merge remote-tracking branch 'remotes/origin/Extend_testsuite' into Extend_testsuite
parents
7aa67902
4465a595
Changes
21
Hide whitespace changes
Inline
Side-by-side
pystencils/backends/cbackend.py
View file @
a3f37cbd
...
...
@@ -298,7 +298,7 @@ class CBackend:
return
node
.
get_code
(
self
.
_dialect
,
self
.
_vector_instruction_set
)
def
_print_SourceCodeComment
(
self
,
node
):
return
"/*
"
+
node
.
text
+
"
*/"
return
f
"/*
{
node
.
text
}
*/"
def
_print_EmptyLine
(
self
,
node
):
return
""
...
...
@@ -316,7 +316,7 @@ class CBackend:
result
=
f
"if (
{
condition_expr
}
)
\n
{
true_block
}
"
if
node
.
false_block
:
false_block
=
self
.
_print_Block
(
node
.
false_block
)
result
+=
"else
"
+
false_block
result
+=
f
"else
{
false_block
}
"
return
result
...
...
@@ -336,7 +336,7 @@ class CustomSympyPrinter(CCodePrinter):
return
self
.
_typed_number
(
expr
.
evalf
(),
get_type_of_expression
(
expr
))
if
expr
.
exp
.
is_integer
and
expr
.
exp
.
is_number
and
0
<
expr
.
exp
<
8
:
return
"(
"
+
self
.
_print
(
sp
.
Mul
(
*
[
expr
.
base
]
*
expr
.
exp
,
evaluate
=
False
))
+
"
)"
return
f
"(
{
self
.
_print
(
sp
.
Mul
(
*
[
expr
.
base
]
*
expr
.
exp
,
evaluate
=
False
))
}
)"
elif
expr
.
exp
.
is_integer
and
expr
.
exp
.
is_number
and
-
8
<
expr
.
exp
<
0
:
return
f
"1 / (
{
self
.
_print
(
sp
.
Mul
(
*
([
expr
.
base
]
*
-
expr
.
exp
),
evaluate
=
False
))
}
)"
else
:
...
...
@@ -589,9 +589,6 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
result
=
self
.
instruction_set
[
'&'
].
format
(
result
,
item
)
return
result
def
_print_Max
(
self
,
expr
):
return
"test"
def
_print_Or
(
self
,
expr
):
result
=
self
.
_scalarFallback
(
'_print_Or'
,
expr
)
if
result
:
...
...
pystencils/backends/cuda_backend.py
View file @
a3f37cbd
...
...
@@ -104,7 +104,6 @@ class CudaSympyPrinter(CustomSympyPrinter):
assert
len
(
expr
.
args
)
==
1
,
f
"__fsqrt_rn has one argument, but
{
len
(
expr
.
args
)
}
where given"
return
f
"__fsqrt_rn(
{
self
.
_print
(
expr
.
args
[
0
])
}
)"
elif
isinstance
(
expr
,
fast_inv_sqrt
):
print
(
len
(
expr
.
args
)
==
1
)
assert
len
(
expr
.
args
)
==
1
,
f
"__frsqrt_rn has one argument, but
{
len
(
expr
.
args
)
}
where given"
return
f
"__frsqrt_rn(
{
self
.
_print
(
expr
.
args
[
0
])
}
)"
return
super
().
_print_Function
(
expr
)
pystencils/datahandling/datahandling_interface.py
View file @
a3f37cbd
...
...
@@ -86,6 +86,13 @@ class DataHandling(ABC):
Args:
description (str): String description of the fields to add
dtype: data type of the array as numpy data type
ghost_layers: number of ghost layers - if not specified a default value specified in the constructor
is used
layout: memory layout of array, either structure of arrays 'SoA' or array of structures 'AoS'.
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
alignment: either False for no alignment, or the number of bytes to align to
Returns:
Fields representing the just created arrays
"""
...
...
@@ -200,6 +207,10 @@ class DataHandling(ABC):
directly passed to the kernel function and override possible parameters from the DataHandling
"""
@
abstractmethod
def
get_kernel_kwargs
(
self
,
kernel_function
,
**
kwargs
):
"""Returns the input arguments of a kernel"""
@
abstractmethod
def
swap
(
self
,
name1
,
name2
,
gpu
=
False
):
"""Swaps data of two arrays"""
...
...
pystencils/datahandling/serial_datahandling.py
View file @
a3f37cbd
...
...
@@ -266,10 +266,10 @@ class SerialDataHandling(DataHandling):
return
name
in
self
.
gpu_arrays
def
synchronization_function_cpu
(
self
,
names
,
stencil_name
=
None
,
**
_
):
return
self
.
synchronization_function
(
names
,
stencil_name
,
'cpu'
)
return
self
.
synchronization_function
(
names
,
stencil_name
,
target
=
'cpu'
)
def
synchronization_function_gpu
(
self
,
names
,
stencil_name
=
None
,
**
_
):
return
self
.
synchronization_function
(
names
,
stencil_name
,
'gpu'
)
return
self
.
synchronization_function
(
names
,
stencil_name
,
target
=
'gpu'
)
def
synchronization_function
(
self
,
names
,
stencil
=
None
,
target
=
None
,
**
_
):
if
target
is
None
:
...
...
@@ -425,14 +425,15 @@ class SerialDataHandling(DataHandling):
np
.
savez_compressed
(
file
,
**
self
.
cpu_arrays
)
def
load_all
(
self
,
file
):
if
'.npz'
not
in
file
:
file
+=
'.npz'
file_contents
=
np
.
load
(
file
)
for
arr_name
,
arr_contents
in
self
.
cpu_arrays
.
items
():
if
arr_name
not
in
file_contents
:
print
(
f
"Skipping read data
{
arr_name
}
because there is no data with this name in data handling"
)
continue
if
file_contents
[
arr_name
].
shape
!=
arr_contents
.
shape
:
print
(
"Skipping read data {} because shapes don't match. "
"Read array shape {}, existing array shape {}"
.
format
(
arr_name
,
file_contents
[
arr_name
].
shape
,
arr_contents
.
shape
))
print
(
f
"Skipping read data
{
arr_name
}
because shapes don't match. "
f
"Read array shape
{
file_contents
[
arr_name
].
shape
}
, existing array shape
{
arr_contents
.
shape
}
"
)
continue
np
.
copyto
(
arr_contents
,
file_contents
[
arr_name
])
pystencils/stencil.py
View file @
a3f37cbd
...
...
@@ -34,6 +34,8 @@ def is_valid(stencil, max_neighborhood=None):
True
>>> is_valid([(2, 0), (1, 0)], max_neighborhood=1)
False
>>> is_valid([(2, 0), (1, 0)], max_neighborhood=2)
True
"""
expected_dim
=
len
(
stencil
[
0
])
for
d
in
stencil
:
...
...
@@ -67,8 +69,11 @@ def have_same_entries(s1, s2):
Examples:
>>> stencil1 = [(1, 0), (-1, 0), (0, 1), (0, -1)]
>>> stencil2 = [(-1, 0), (0, -1), (1, 0), (0, 1)]
>>> stencil3 = [(-1, 0), (0, -1), (1, 0)]
>>> have_same_entries(stencil1, stencil2)
True
>>> have_same_entries(stencil1, stencil3)
False
"""
if
len
(
s1
)
!=
len
(
s2
):
return
False
...
...
pystencils/sympyextensions.py
View file @
a3f37cbd
...
...
@@ -481,7 +481,7 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]],
pass
elif
t
.
func
is
sp
.
Mul
:
if
check_type
(
t
):
result
[
'muls'
]
+=
len
(
t
.
args
)
result
[
'muls'
]
+=
len
(
t
.
args
)
-
1
for
a
in
t
.
args
:
if
a
==
1
or
a
==
-
1
:
result
[
'muls'
]
-=
1
...
...
@@ -509,7 +509,8 @@ def count_operations(term: Union[sp.Expr, List[sp.Expr]],
if
t
.
exp
>=
0
:
result
[
'muls'
]
+=
int
(
t
.
exp
)
-
1
else
:
result
[
'muls'
]
-=
1
if
result
[
'muls'
]
>
0
:
result
[
'muls'
]
-=
1
result
[
'divs'
]
+=
1
result
[
'muls'
]
+=
(
-
int
(
t
.
exp
))
-
1
elif
sp
.
nsimplify
(
t
.
exp
)
==
sp
.
Rational
(
1
,
2
):
...
...
pystencils/transformations.py
View file @
a3f37cbd
...
...
@@ -1206,13 +1206,13 @@ def get_loop_hierarchy(ast_node):
return
reversed
(
result
)
def
get_loop_counter_symbol_hierarchy
(
ast
N
ode
):
def
get_loop_counter_symbol_hierarchy
(
ast
_n
ode
):
"""Determines the loop counter symbols around a given AST node.
:param ast
N
ode: the AST node
:param ast
_n
ode: the AST node
:return: list of loop counter symbols, where the first list entry is the symbol of the innermost loop
"""
result
=
[]
node
=
ast
N
ode
node
=
ast
_n
ode
while
node
is
not
None
:
node
=
get_next_parent_of_type
(
node
,
ast
.
LoopOverCoordinate
)
if
node
:
...
...
pystencils_tests/test_astnodes.py
0 → 100644
View file @
a3f37cbd
import
sympy
as
sp
import
pystencils
as
ps
from
pystencils
import
Assignment
from
pystencils.astnodes
import
Block
,
SkipIteration
,
LoopOverCoordinate
,
SympyAssignment
from
sympy.codegen.rewriting
import
optims_c99
dst
=
ps
.
fields
(
'dst(8): double[2D]'
)
s
=
sp
.
symbols
(
's_:8'
)
x
=
sp
.
symbols
(
'x'
)
y
=
sp
.
symbols
(
'y'
)
def
test_kernel_function
():
assignments
=
[
Assignment
(
dst
[
0
,
0
](
0
),
s
[
0
]),
Assignment
(
x
,
dst
[
0
,
0
](
2
))
]
ast_node
=
ps
.
create_kernel
(
assignments
)
assert
ast_node
.
target
==
'cpu'
assert
ast_node
.
backend
==
'c'
# symbols_defined and undefined_symbols will always return an emtpy set
assert
ast_node
.
symbols_defined
==
set
()
assert
ast_node
.
undefined_symbols
==
set
()
assert
ast_node
.
fields_written
==
{
dst
}
assert
ast_node
.
fields_read
==
{
dst
}
def
test_skip_iteration
():
# skip iteration is an object which should give back empty data structures.
skipped
=
SkipIteration
()
assert
skipped
.
args
==
[]
assert
skipped
.
symbols_defined
==
set
()
assert
skipped
.
undefined_symbols
==
set
()
def
test_block
():
assignments
=
[
Assignment
(
dst
[
0
,
0
](
0
),
s
[
0
]),
Assignment
(
x
,
dst
[
0
,
0
](
2
))
]
bl
=
Block
(
assignments
)
assert
bl
.
symbols_defined
==
{
dst
[
0
,
0
](
0
),
dst
[
0
,
0
](
2
),
s
[
0
],
x
}
bl
.
append
([
Assignment
(
y
,
10
)])
assert
bl
.
symbols_defined
==
{
dst
[
0
,
0
](
0
),
dst
[
0
,
0
](
2
),
s
[
0
],
x
,
y
}
assert
len
(
bl
.
args
)
==
3
list_iterator
=
iter
([
Assignment
(
s
[
1
],
11
)])
bl
.
insert_front
(
list_iterator
)
assert
bl
.
args
[
0
]
==
Assignment
(
s
[
1
],
11
)
def
test_loop_over_coordinate
():
assignments
=
[
Assignment
(
dst
[
0
,
0
](
0
),
s
[
0
]),
Assignment
(
x
,
dst
[
0
,
0
](
2
))
]
body
=
Block
(
assignments
)
loop
=
LoopOverCoordinate
(
body
,
coordinate_to_loop_over
=
0
,
start
=
0
,
stop
=
10
,
step
=
1
)
assert
loop
.
body
==
body
new_body
=
Block
([
assignments
[
0
]])
loop
=
loop
.
new_loop_with_different_body
(
new_body
)
assert
loop
.
body
==
new_body
assert
loop
.
start
==
0
assert
loop
.
stop
==
10
assert
loop
.
step
==
1
loop
.
replace
(
loop
.
start
,
2
)
loop
.
replace
(
loop
.
stop
,
20
)
loop
.
replace
(
loop
.
step
,
2
)
assert
loop
.
start
==
2
assert
loop
.
stop
==
20
assert
loop
.
step
==
2
def
test_sympy_assignment
():
assignment
=
SympyAssignment
(
dst
[
0
,
0
](
0
),
sp
.
log
(
x
+
3
)
/
sp
.
log
(
2
)
+
sp
.
log
(
x
**
2
+
1
))
assignment
.
optimize
(
optims_c99
)
ast
=
ps
.
create_kernel
([
assignment
])
code
=
ps
.
get_code_str
(
ast
)
assert
'log1p'
in
code
assert
'log2'
in
code
assignment
.
replace
(
assignment
.
lhs
,
dst
[
0
,
0
](
1
))
assignment
.
replace
(
assignment
.
rhs
,
sp
.
log
(
2
))
assert
assignment
.
lhs
==
dst
[
0
,
0
](
1
)
assert
assignment
.
rhs
==
sp
.
log
(
2
)
pystencils_tests/test_data/datahandling_load_test.npz
0 → 100644
View file @
a3f37cbd
File added
pystencils_tests/test_data/datahandling_save_test.npz
0 → 100644
View file @
a3f37cbd
File added
pystencils_tests/test_datahandling.py
View file @
a3f37cbd
import
os
from
tempfile
import
TemporaryDirectory
from
pathlib
import
Path
import
numpy
as
np
...
...
@@ -12,6 +13,9 @@ except ImportError:
import
unittest.mock
pytest
=
unittest
.
mock
.
MagicMock
()
SCRIPT_FOLDER
=
Path
(
__file__
).
parent
.
absolute
()
INPUT_FOLDER
=
SCRIPT_FOLDER
/
"test_data"
def
basic_iteration
(
dh
):
dh
.
add_array
(
'basic_iter_test_gl_default'
)
...
...
@@ -111,6 +115,11 @@ def kernel_execution_jacobi(dh, target):
test_gpu
=
target
==
'gpu'
or
target
==
'opencl'
dh
.
add_array
(
'f'
,
gpu
=
test_gpu
)
dh
.
add_array
(
'tmp'
,
gpu
=
test_gpu
)
if
test_gpu
:
assert
dh
.
is_on_gpu
(
'f'
)
assert
dh
.
is_on_gpu
(
'tmp'
)
stencil_2d
=
[(
1
,
0
),
(
-
1
,
0
),
(
0
,
1
),
(
0
,
-
1
)]
stencil_3d
=
[(
1
,
0
,
0
),
(
-
1
,
0
,
0
),
(
0
,
1
,
0
),
(
0
,
-
1
,
0
),
(
0
,
0
,
1
),
(
0
,
0
,
-
1
)]
stencil
=
stencil_2d
if
dh
.
dim
==
2
else
stencil_3d
...
...
@@ -197,6 +206,7 @@ def test_access_and_gather():
def
test_kernel
():
for
domain_shape
in
[(
4
,
5
),
(
3
,
4
,
5
)]:
dh
=
create_data_handling
(
domain_size
=
domain_shape
,
periodicity
=
True
)
assert
all
(
dh
.
periodicity
)
kernel_execution_jacobi
(
dh
,
'cpu'
)
reduction
(
dh
)
...
...
@@ -243,3 +253,105 @@ def test_add_arrays():
assert
y_
==
y
assert
x
==
dh
.
fields
[
'x'
]
assert
y
==
dh
.
fields
[
'y'
]
def
test_get_kwarg
():
domain_shape
=
(
10
,
10
)
field_description
=
'src, dst'
dh
=
create_data_handling
(
domain_size
=
domain_shape
,
default_ghost_layers
=
1
)
src
,
dst
=
dh
.
add_arrays
(
field_description
)
dh
.
fill
(
"src"
,
1.0
,
ghost_layers
=
True
)
dh
.
fill
(
"dst"
,
0.0
,
ghost_layers
=
True
)
with
pytest
.
raises
(
ValueError
):
dh
.
add_array
(
'src'
)
ur
=
ps
.
Assignment
(
src
.
center
,
dst
.
center
)
kernel
=
ps
.
create_kernel
(
ur
).
compile
()
kw
=
dh
.
get_kernel_kwargs
(
kernel
)
assert
np
.
all
(
kw
[
0
][
'src'
]
==
dh
.
cpu_arrays
[
'src'
])
assert
np
.
all
(
kw
[
0
][
'dst'
]
==
dh
.
cpu_arrays
[
'dst'
])
def
test_add_custom_data
():
pytest
.
importorskip
(
'pycuda'
)
import
pycuda.gpuarray
as
gpuarray
import
pycuda.autoinit
# noqa
def
cpu_data_create_func
():
return
np
.
ones
((
2
,
2
),
dtype
=
np
.
float64
)
def
gpu_data_create_func
():
return
gpuarray
.
zeros
((
2
,
2
),
dtype
=
np
.
float64
)
def
cpu_to_gpu_transfer_func
(
gpuarr
,
cpuarray
):
gpuarr
.
set
(
cpuarray
)
def
gpu_to_cpu_transfer_func
(
gpuarr
,
cpuarray
):
gpuarr
.
get
(
cpuarray
)
dh
=
create_data_handling
(
domain_size
=
(
10
,
10
))
dh
.
add_custom_data
(
'custom_data'
,
cpu_data_create_func
,
gpu_data_create_func
,
cpu_to_gpu_transfer_func
,
gpu_to_cpu_transfer_func
)
assert
np
.
all
(
dh
.
custom_data_cpu
[
'custom_data'
]
==
1
)
assert
np
.
all
(
dh
.
custom_data_gpu
[
'custom_data'
].
get
()
==
0
)
dh
.
to_cpu
(
name
=
'custom_data'
)
dh
.
to_gpu
(
name
=
'custom_data'
)
assert
'custom_data'
in
dh
.
custom_data_names
def
test_log
():
dh
=
create_data_handling
(
domain_size
=
(
10
,
10
))
dh
.
log_on_root
()
assert
dh
.
is_root
assert
dh
.
world_rank
==
0
def
test_save_data
():
domain_shape
=
(
2
,
2
)
dh
=
create_data_handling
(
domain_size
=
domain_shape
,
default_ghost_layers
=
1
)
dh
.
add_array
(
"src"
,
values_per_cell
=
9
)
dh
.
fill
(
"src"
,
1.0
,
ghost_layers
=
True
)
dh
.
add_array
(
"dst"
,
values_per_cell
=
9
)
dh
.
fill
(
"dst"
,
1.0
,
ghost_layers
=
True
)
dh
.
save_all
(
str
(
INPUT_FOLDER
)
+
'/datahandling_save_test'
)
def
test_load_data
():
domain_shape
=
(
2
,
2
)
dh
=
create_data_handling
(
domain_size
=
domain_shape
,
default_ghost_layers
=
1
)
dh
.
add_array
(
"src"
,
values_per_cell
=
9
)
dh
.
fill
(
"src"
,
0.0
,
ghost_layers
=
True
)
dh
.
add_array
(
"dst"
,
values_per_cell
=
9
)
dh
.
fill
(
"dst"
,
0.0
,
ghost_layers
=
True
)
dh
.
load_all
(
str
(
INPUT_FOLDER
)
+
'/datahandling_load_test'
)
assert
np
.
all
(
dh
.
cpu_arrays
[
'src'
])
==
1
assert
np
.
all
(
dh
.
cpu_arrays
[
'dst'
])
==
1
domain_shape
=
(
3
,
3
)
dh
=
create_data_handling
(
domain_size
=
domain_shape
,
default_ghost_layers
=
1
)
dh
.
add_array
(
"src"
,
values_per_cell
=
9
)
dh
.
fill
(
"src"
,
0.0
,
ghost_layers
=
True
)
dh
.
add_array
(
"dst"
,
values_per_cell
=
9
)
dh
.
fill
(
"dst"
,
0.0
,
ghost_layers
=
True
)
dh
.
add_array
(
"dst2"
,
values_per_cell
=
9
)
dh
.
fill
(
"dst2"
,
0.0
,
ghost_layers
=
True
)
dh
.
load_all
(
str
(
INPUT_FOLDER
)
+
'/datahandling_load_test'
)
assert
np
.
all
(
dh
.
cpu_arrays
[
'src'
])
==
0
assert
np
.
all
(
dh
.
cpu_arrays
[
'dst'
])
==
0
assert
np
.
all
(
dh
.
cpu_arrays
[
'dst2'
])
==
0
pystencils_tests/test_datahandling_parallel.py
View file @
a3f37cbd
import
numpy
as
np
import
waLBerla
as
wlb
from
pystencils
import
make_slice
from
pystencils.datahandling.parallel_datahandling
import
ParallelDataHandling
from
pystencils_tests.test_datahandling
import
(
access_and_gather
,
kernel_execution_jacobi
,
reduction
,
synchronization
,
vtk_output
)
try
:
import
pytest
except
ImportError
:
import
unittest.mock
pytest
=
unittest
.
mock
.
MagicMock
()
def
test_access_and_gather
():
block_size
=
(
4
,
7
,
1
)
...
...
@@ -64,3 +71,51 @@ def test_vtk_output():
blocks
=
wlb
.
createUniformBlockGrid
(
blocks
=
(
3
,
2
,
4
),
cellsPerBlock
=
(
3
,
2
,
5
),
oneBlockPerProcess
=
False
)
dh
=
ParallelDataHandling
(
blocks
)
vtk_output
(
dh
)
def
test_block_iteration
():
block_size
=
(
16
,
16
,
16
)
num_blocks
=
(
2
,
2
,
2
)
blocks
=
wlb
.
createUniformBlockGrid
(
blocks
=
num_blocks
,
cellsPerBlock
=
block_size
,
oneBlockPerProcess
=
False
)
dh
=
ParallelDataHandling
(
blocks
,
default_ghost_layers
=
2
)
dh
.
add_array
(
'v'
,
values_per_cell
=
1
,
dtype
=
np
.
int64
,
ghost_layers
=
2
,
gpu
=
True
)
for
b
in
dh
.
iterate
():
b
[
'v'
].
fill
(
1
)
s
=
0
for
b
in
dh
.
iterate
():
s
+=
np
.
sum
(
b
[
'v'
])
assert
s
==
40
*
40
*
40
sl
=
make_slice
[
0
:
18
,
0
:
18
,
0
:
18
]
for
b
in
dh
.
iterate
(
slice_obj
=
sl
):
b
[
'v'
].
fill
(
0
)
s
=
0
for
b
in
dh
.
iterate
():
s
+=
np
.
sum
(
b
[
'v'
])
assert
s
==
40
*
40
*
40
-
20
*
20
*
20
def
test_getter_setter
():
block_size
=
(
2
,
2
,
2
)
num_blocks
=
(
2
,
2
,
2
)
blocks
=
wlb
.
createUniformBlockGrid
(
blocks
=
num_blocks
,
cellsPerBlock
=
block_size
,
oneBlockPerProcess
=
False
)
dh
=
ParallelDataHandling
(
blocks
,
default_ghost_layers
=
2
)
dh
.
add_array
(
'v'
,
values_per_cell
=
1
,
dtype
=
np
.
int64
,
ghost_layers
=
2
,
gpu
=
True
)
assert
dh
.
shape
==
(
4
,
4
,
4
)
assert
dh
.
periodicity
==
(
False
,
False
,
False
)
assert
dh
.
values_per_cell
(
'v'
)
==
1
assert
dh
.
has_data
(
'v'
)
is
True
assert
'v'
in
dh
.
array_names
dh
.
log_on_root
()
assert
dh
.
is_root
is
True
assert
dh
.
world_rank
==
0
dh
.
to_gpu
(
'v'
)
assert
dh
.
is_on_gpu
(
'v'
)
is
True
dh
.
all_to_cpu
()
pystencils_tests/test_fast_approximation.py
View file @
a3f37cbd
...
...
@@ -11,9 +11,9 @@ def test_fast_sqrt():
assert
len
(
insert_fast_sqrts
(
expr
).
atoms
(
fast_sqrt
))
==
1
assert
len
(
insert_fast_sqrts
([
expr
])[
0
].
atoms
(
fast_sqrt
))
==
1
ast
=
ps
.
create_kernel
(
ps
.
Assignment
(
g
[
0
,
0
],
insert_fast_sqrts
(
expr
)),
target
=
'gpu'
)
ast
.
compile
()
code_str
=
ps
.
get_code_str
(
ast
)
ast
_gpu
=
ps
.
create_kernel
(
ps
.
Assignment
(
g
[
0
,
0
],
insert_fast_sqrts
(
expr
)),
target
=
'gpu'
)
ast
_gpu
.
compile
()
code_str
=
ps
.
get_code_str
(
ast
_gpu
)
assert
'__fsqrt_rn'
in
code_str
expr
=
ps
.
Assignment
(
sp
.
Symbol
(
"tmp"
),
3
/
sp
.
sqrt
(
f
[
0
,
0
]
+
f
[
1
,
0
]))
...
...
@@ -21,9 +21,9 @@ def test_fast_sqrt():
ac
=
ps
.
AssignmentCollection
([
expr
],
[])
assert
len
(
insert_fast_sqrts
(
ac
).
main_assignments
[
0
].
atoms
(
fast_inv_sqrt
))
==
1
ast
=
ps
.
create_kernel
(
insert_fast_sqrts
(
ac
),
target
=
'gpu'
)
ast
.
compile
()
code_str
=
ps
.
get_code_str
(
ast
)
ast
_gpu
=
ps
.
create_kernel
(
insert_fast_sqrts
(
ac
),
target
=
'gpu'
)
ast
_gpu
.
compile
()
code_str
=
ps
.
get_code_str
(
ast
_gpu
)
assert
'__frsqrt_rn'
in
code_str
...
...
pystencils_tests/test_kerncraft_coupling.py
View file @
a3f37cbd
...
...
@@ -7,7 +7,7 @@ from kerncraft.kernel import KernelCode
from
kerncraft.machinemodel
import
MachineModel
from
kerncraft.models
import
ECM
,
ECMData
,
Benchmark
from
pystencils
import
Assignment
,
Field
from
pystencils
import
Assignment
,
Field
,
fields
from
pystencils.cpu
import
create_kernel
from
pystencils.kerncraft_coupling
import
KerncraftParameters
,
PyStencilsKerncraftKernel
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
,
run_c_benchmark
...
...
@@ -159,3 +159,15 @@ def test_benchmark():
timeloop_time
=
timeloop
.
benchmark
(
number_of_time_steps_for_estimation
=
1
)
np
.
testing
.
assert_almost_equal
(
c_benchmark_run
,
timeloop_time
,
decimal
=
4
)
@
pytest
.
mark
.
kerncraft
def
test_kerncraft_generic_field
():
a
=
fields
(
'a: double[3D]'
)
b
=
fields
(
'b: double[3D]'
)
s
=
sp
.
Symbol
(
"s"
)
rhs
=
a
[
0
,
-
1
,
0
]
+
a
[
0
,
1
,
0
]
+
a
[
-
1
,
0
,
0
]
+
a
[
1
,
0
,
0
]
+
a
[
0
,
0
,
-
1
]
+
a
[
0
,
0
,
1
]
update_rule
=
Assignment
(
b
[
0
,
0
,
0
],
s
*
rhs
)
ast
=
create_kernel
([
update_rule
])
k
=
PyStencilsKerncraftKernel
(
ast
,
debug_print
=
True
)
pystencils_tests/test_simplification_strategy.py
View file @
a3f37cbd
import
sympy
as
sp
import
pystencils
as
ps
from
pystencils
import
Assignment
,
AssignmentCollection
from
pystencils.simp
import
(
SimplificationStrategy
,
apply_on_all_subexpressions
,
...
...
@@ -43,3 +44,39 @@ def test_simplification_strategy():
assert
'Adds'
in
report
.
_repr_html_
()
assert
'factor'
in
str
(
strategy
)
def
test_split_inner_loop
():
dst
=
ps
.
fields
(
'dst(8): double[2D]'
)
s
=
sp
.
symbols
(
's_:8'
)
x
=
sp
.
symbols
(
'x'
)
subexpressions
=
[]
main
=
[
Assignment
(
dst
[
0
,
0
](
0
),
s
[
0
]),
Assignment
(
dst
[
0
,
0
](
1
),
s
[
1
]),
Assignment
(
dst
[
0
,
0
](
2
),
s
[
2
]),
Assignment
(
dst
[
0
,
0
](
3
),
s
[
3
]),
Assignment
(
dst
[
0
,
0
](
4
),
s
[
4
]),
Assignment
(
dst
[
0
,
0
](
5
),
s
[
5
]),
Assignment
(
dst
[
0
,
0
](
6
),
s
[
6
]),
Assignment
(
dst
[
0
,
0
](
7
),
s
[
7
]),
Assignment
(
x
,
sum
(
s
))
]
ac
=
AssignmentCollection
(
main
,
subexpressions
)
split_groups
=
[[
dst
[
0
,
0
](
0
),
dst
[
0
,
0
](
1
)],
[
dst
[
0
,
0
](
2
),
dst
[
0
,
0
](
3
)],
[
dst
[
0
,
0
](
4
),
dst
[
0
,
0
](
5
)],
[
dst
[
0
,
0
](
6
),
dst
[
0
,
0
](
7
),
x
]]
ac
.
simplification_hints
[
'split_groups'
]
=
split_groups
ast
=
ps
.
create_kernel
(
ac
)
code
=
ps
.
get_code_str
(
ast
)
# we have four inner loops as indicated in split groups (4 elements) plus one outer loop
assert
code
.
count
(
'for'
)
==
5
ac
=
AssignmentCollection
(
main
,
subexpressions
)
ast
=
ps
.
create_kernel
(
ac
)
code
=
ps
.
get_code_str
(
ast
)
# one inner loop and one outer loop
assert
code
.
count
(
'for'
)
==
2
pystencils_tests/test_slicing.py
0 → 100644
View file @
a3f37cbd
import
numpy
as
np
from
pystencils
import
create_data_handling
from
pystencils.slicing
import
SlicedGetter
,
make_slice
,
SlicedGetterDataHandling
,
shift_slice
,
slice_intersection
def
test_sliced_getter
():
def
get_slice
(
slice_obj
=
None
):
arr
=
np
.
ones
((
10
,
10
))
<