Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sebastian Bindgen
pystencils
Commits
facd3ab4
Commit
facd3ab4
authored
Nov 18, 2020
by
Markus Holzer
Browse files
Merge branch 'CPU_Blocking' into 'master'
Blocking for partial directions See merge request
pycodegen/pystencils!185
parents
dbb1c77e
1a991fff
Changes
3
Hide whitespace changes
Inline
Side-by-side
pystencils/astnodes.py
View file @
facd3ab4
...
...
@@ -404,7 +404,7 @@ class PragmaBlock(Block):
class
LoopOverCoordinate
(
Node
):
LOOP_COUNTER_NAME_PREFIX
=
"ctr"
B
l
OCK_LOOP_COUNTER_NAME_PREFIX
=
"_blockctr"
B
L
OCK_LOOP_COUNTER_NAME_PREFIX
=
"_blockctr"
def
__init__
(
self
,
body
,
coordinate_to_loop_over
,
start
,
stop
,
step
=
1
,
is_block_loop
=
False
):
super
(
LoopOverCoordinate
,
self
).
__init__
(
parent
=
None
)
...
...
@@ -479,7 +479,7 @@ class LoopOverCoordinate(Node):
@
staticmethod
def
get_block_loop_counter_name
(
coordinate_to_loop_over
):
return
f
"
{
LoopOverCoordinate
.
B
l
OCK_LOOP_COUNTER_NAME_PREFIX
}
_
{
coordinate_to_loop_over
}
"
return
f
"
{
LoopOverCoordinate
.
B
L
OCK_LOOP_COUNTER_NAME_PREFIX
}
_
{
coordinate_to_loop_over
}
"
@
property
def
loop_counter_name
(
self
):
...
...
pystencils/transformations.py
View file @
facd3ab4
...
...
@@ -1258,7 +1258,8 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
Args:
ast_node: kernel function node before vectorization transformation has been applied
block_size: sequence defining block size in x, y, (z) direction
block_size: sequence defining block size in x, y, (z) direction.
If chosen as zero the direction will not be used for blocking.
Returns:
number of dimensions blocked
...
...
@@ -1270,8 +1271,10 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
body
=
ast_node
.
body
coordinates
=
[]
coordinates_taken_into_account
=
0
loop_starts
=
{}
loop_stops
=
{}
for
loop
in
loops
:
coord
=
loop
.
coordinate_to_loop_over
if
coord
not
in
coordinates
:
...
...
@@ -1285,6 +1288,9 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
# Create the outer loops that iterate over the blocks
outer_loop
=
None
for
coord
in
reversed
(
coordinates
):
if
block_size
[
coord
]
==
0
:
continue
coordinates_taken_into_account
+=
1
body
=
ast
.
Block
([
outer_loop
])
if
outer_loop
else
body
outer_loop
=
ast
.
LoopOverCoordinate
(
body
,
coord
,
...
...
@@ -1298,6 +1304,8 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
# modify the existing loops to only iterate within one block
for
inner_loop
in
loops
:
coord
=
inner_loop
.
coordinate_to_loop_over
if
block_size
[
coord
]
==
0
:
continue
block_ctr
=
ast
.
LoopOverCoordinate
.
get_block_loop_counter_symbol
(
coord
)
loop_range
=
inner_loop
.
stop
-
inner_loop
.
start
if
sp
.
sympify
(
...
...
@@ -1307,7 +1315,7 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
stop
=
sp
.
Min
(
inner_loop
.
stop
,
block_ctr
+
block_size
[
coord
])
inner_loop
.
start
=
block_ctr
inner_loop
.
stop
=
stop
return
len
(
coordinates
)
return
coordinates
_taken_into_account
def
implement_interpolations
(
ast_node
:
ast
.
Node
,
...
...
pystencils_tests/test_blocking.py
View file @
facd3ab4
...
...
@@ -18,14 +18,20 @@ def check_equivalence(assignments, src_arr):
for
vectorization
in
[
False
,
{
'assume_inner_stride_one'
:
True
}]:
with_blocking
=
ps
.
create_kernel
(
assignments
,
cpu_blocking
=
(
8
,
16
,
4
),
cpu_openmp
=
openmp
,
cpu_vectorize_info
=
vectorization
).
compile
()
with_blocking_only_over_y
=
ps
.
create_kernel
(
assignments
,
cpu_blocking
=
(
0
,
16
,
0
),
cpu_openmp
=
openmp
,
cpu_vectorize_info
=
vectorization
).
compile
()
without_blocking
=
ps
.
create_kernel
(
assignments
).
compile
()
print
(
f
" openmp
{
openmp
}
, vectorization
{
vectorization
}
"
)
dst_arr
=
np
.
zeros_like
(
src_arr
)
dst2_arr
=
np
.
zeros_like
(
src_arr
)
ref_arr
=
np
.
zeros_like
(
src_arr
)
np
.
copyto
(
src_arr
,
np
.
random
.
rand
(
*
src_arr
.
shape
))
with_blocking
(
src
=
src_arr
,
dst
=
dst_arr
)
with_blocking_only_over_y
(
src
=
src_arr
,
dst
=
dst2_arr
)
without_blocking
(
src
=
src_arr
,
dst
=
ref_arr
)
np
.
testing
.
assert_almost_equal
(
ref_arr
,
dst_arr
)
np
.
testing
.
assert_almost_equal
(
ref_arr
,
dst2_arr
)
def
test_jacobi3d_var_size
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment