Commit 1a991fff authored by Markus Holzer's avatar Markus Holzer
Browse files

Blocking for partial directions

parent 74bb2c23
......@@ -404,7 +404,7 @@ class PragmaBlock(Block):
class LoopOverCoordinate(Node):
LOOP_COUNTER_NAME_PREFIX = "ctr"
BlOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
BLOCK_LOOP_COUNTER_NAME_PREFIX = "_blockctr"
def __init__(self, body, coordinate_to_loop_over, start, stop, step=1, is_block_loop=False):
super(LoopOverCoordinate, self).__init__(parent=None)
......@@ -479,7 +479,7 @@ class LoopOverCoordinate(Node):
@staticmethod
def get_block_loop_counter_name(coordinate_to_loop_over):
return f"{LoopOverCoordinate.BlOCK_LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
return f"{LoopOverCoordinate.BLOCK_LOOP_COUNTER_NAME_PREFIX}_{coordinate_to_loop_over}"
@property
def loop_counter_name(self):
......
......@@ -1258,7 +1258,8 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
Args:
ast_node: kernel function node before vectorization transformation has been applied
block_size: sequence defining block size in x, y, (z) direction
block_size: sequence defining block size in x, y, (z) direction.
If chosen as zero the direction will not be used for blocking.
Returns:
number of dimensions blocked
......@@ -1270,8 +1271,10 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
body = ast_node.body
coordinates = []
coordinates_taken_into_account = 0
loop_starts = {}
loop_stops = {}
for loop in loops:
coord = loop.coordinate_to_loop_over
if coord not in coordinates:
......@@ -1285,6 +1288,9 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
# Create the outer loops that iterate over the blocks
outer_loop = None
for coord in reversed(coordinates):
if block_size[coord] == 0:
continue
coordinates_taken_into_account += 1
body = ast.Block([outer_loop]) if outer_loop else body
outer_loop = ast.LoopOverCoordinate(body,
coord,
......@@ -1298,6 +1304,8 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
# modify the existing loops to only iterate within one block
for inner_loop in loops:
coord = inner_loop.coordinate_to_loop_over
if block_size[coord] == 0:
continue
block_ctr = ast.LoopOverCoordinate.get_block_loop_counter_symbol(coord)
loop_range = inner_loop.stop - inner_loop.start
if sp.sympify(
......@@ -1307,7 +1315,7 @@ def loop_blocking(ast_node: ast.KernelFunction, block_size) -> int:
stop = sp.Min(inner_loop.stop, block_ctr + block_size[coord])
inner_loop.start = block_ctr
inner_loop.stop = stop
return len(coordinates)
return coordinates_taken_into_account
def implement_interpolations(ast_node: ast.Node,
......
......@@ -18,14 +18,20 @@ def check_equivalence(assignments, src_arr):
for vectorization in [False, {'assume_inner_stride_one': True}]:
with_blocking = ps.create_kernel(assignments, cpu_blocking=(8, 16, 4), cpu_openmp=openmp,
cpu_vectorize_info=vectorization).compile()
with_blocking_only_over_y = ps.create_kernel(assignments, cpu_blocking=(0, 16, 0), cpu_openmp=openmp,
cpu_vectorize_info=vectorization).compile()
without_blocking = ps.create_kernel(assignments).compile()
print(f" openmp {openmp}, vectorization {vectorization}")
dst_arr = np.zeros_like(src_arr)
dst2_arr = np.zeros_like(src_arr)
ref_arr = np.zeros_like(src_arr)
np.copyto(src_arr, np.random.rand(*src_arr.shape))
with_blocking(src=src_arr, dst=dst_arr)
with_blocking_only_over_y(src=src_arr, dst=dst2_arr)
without_blocking(src=src_arr, dst=ref_arr)
np.testing.assert_almost_equal(ref_arr, dst_arr)
np.testing.assert_almost_equal(ref_arr, dst2_arr)
def test_jacobi3d_var_size():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment