diff --git a/pystencils/cpu/__init__.py b/pystencils/cpu/__init__.py index e15bc8cf0b2a33eaa486c8e95e3cb50404cfa5e6..ba0b57da21322f36f91cdd4cf6d2a64ad485ee41 100644 --- a/pystencils/cpu/__init__.py +++ b/pystencils/cpu/__init__.py @@ -1,4 +1,4 @@ from pystencils.cpu.cpujit import make_python_function -from pystencils.cpu.kernelcreation import add_openmp, create_indexed_kernel, create_kernel +from pystencils.cpu.kernelcreation import add_openmp, create_indexed_kernel, create_kernel, add_pragmas -__all__ = ['create_kernel', 'create_indexed_kernel', 'add_openmp', 'make_python_function'] +__all__ = ['create_kernel', 'create_indexed_kernel', 'add_openmp', 'add_pragmas', 'make_python_function'] diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py index c99332ff0f5f4b181ec6f46ec0cf404c5d85359f..608f6bc9ab03c90f527a998a488a6358953df74d 100644 --- a/pystencils/cpu/kernelcreation.py +++ b/pystencils/cpu/kernelcreation.py @@ -10,8 +10,8 @@ from pystencils.typing.transformations import add_types from pystencils.field import Field, FieldType from pystencils.node_collection import NodeCollection from pystencils.transformations import ( - filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering, make_loop_over_domain, - move_constants_before_loop, parse_base_pointer_info, resolve_buffer_accesses, + filtered_tree_iteration, iterate_loops_by_depth, get_base_buffer_index, get_optimal_loop_ordering, + make_loop_over_domain, move_constants_before_loop, parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, split_inner_loop) @@ -213,3 +213,18 @@ def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None, ass if collapse: prefix += f" collapse({collapse})" loop_to_parallelize.prefix_lines.append(prefix) + + +def add_pragmas(ast_node, pragma_lines, nesting_depth=-1): + """Prepends given pragma lines to all loops of specified nesting depth. + + Args: + ast: pystencils abstract syntax tree + pragma_lines: Iterable of strings containing the pragma lines + nesting_depth: Nesting depth of the loops the pragmas should be applied to. + Outermost loop has depth 0. + A depth of -1 indicates the innermost loops. + """ + loop_nodes = iterate_loops_by_depth(ast_node, nesting_depth) + for n in loop_nodes: + n.prefix_lines += list(pragma_lines) diff --git a/pystencils/transformations.py b/pystencils/transformations.py index 5cde907b553dcdcf639e1a6362548eb346f81376..d29a342fd66306ac32fe7139879bea48c95508a8 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -100,6 +100,45 @@ def generic_visit(term, visitor): return visitor(term) +def iterate_loops_by_depth(node, nesting_depth): + """Iterate all LoopOverCoordinate nodes in the given AST of the specified nesting depth. + + Args: + node: Root node of the abstract syntax tree + nesting_depth: Nesting depth of the loops the pragmas should be applied to. + Outermost loop has depth 0. + A depth of -1 indicates the innermost loops. + Returns: Iterable listing all loop nodes of given nesting depth. + """ + from pystencils.astnodes import LoopOverCoordinate + + def _internal_default(node, nesting_depth): + isloop = isinstance(node, LoopOverCoordinate) + + if nesting_depth < 0: # here, a negative value indicates end of descent + return + elif nesting_depth == 0 and isloop: + yield node + else: + next_depth = nesting_depth - 1 if isloop else nesting_depth + for arg in node.args: + yield from _internal_default(arg, next_depth) + + def _internal_innermost(node): + if isinstance(node, LoopOverCoordinate) and node.is_innermost_loop: + yield node + else: + for arg in node.args: + yield from _internal_innermost(arg) + + if nesting_depth >= 0: + yield from _internal_default(node, nesting_depth) + elif nesting_depth == -1: + yield from _internal_innermost(node) + else: + raise ValueError(f"Invalid nesting depth: {nesting_depth}. Choose a nonnegative number, or -1.") + + def unify_shape_symbols(body, common_shape, fields): """Replaces symbols for array sizes to ensure they are represented by the same unique symbol. diff --git a/pystencils_tests/test_transformations.py b/pystencils_tests/test_transformations.py index d6e6888b5027a4d403b1158d1204be97cc35455f..ba660a115d6438da31b7bf653730c751da9920a7 100644 --- a/pystencils_tests/test_transformations.py +++ b/pystencils_tests/test_transformations.py @@ -1,11 +1,16 @@ import sympy as sp +import numpy as np import pystencils as ps from pystencils import fields, TypedSymbol from pystencils.astnodes import LoopOverCoordinate, SympyAssignment from pystencils.typing import create_type -from pystencils.transformations import filtered_tree_iteration, get_loop_hierarchy, get_loop_counter_symbol_hierarchy +from pystencils.transformations import ( + filtered_tree_iteration, get_loop_hierarchy, get_loop_counter_symbol_hierarchy, + iterate_loops_by_depth, split_inner_loop, loop_blocking +) +from pystencils.cpu import add_pragmas def test_loop_information(): f, g = ps.fields("f, g: double[2D]") @@ -27,6 +32,38 @@ def test_loop_information(): TypedSymbol("ctr_0", create_type("int"), nonnegative=True)] +def test_iterate_loops_by_depth(): + f, g = ps.fields("f, g: double[3D]", layout="fzyx") + x = ps.TypedSymbol('x', np.float64) + subs = [ps.Assignment(x, f[0, 0, 0])] + mains = [ps.Assignment(g[0, 0, 0], x)] + ac = ps.AssignmentCollection(mains, subexpressions=subs) + + config = ps.CreateKernelConfig(cpu_blocking=(0, 16, 0)) + ast = ps.create_kernel(ac, config=config) + split_inner_loop(ast, [[x], [g[0,0,0]]]) + + loops = list(iterate_loops_by_depth(ast, 0)) + assert len(loops) == 1 + assert loops[0].loop_counter_symbol.name == "_blockctr_1" + + loops = list(iterate_loops_by_depth(ast, 1)) + assert len(loops) == 1 + assert loops[0].loop_counter_symbol.name == "ctr_2" + + loops = list(iterate_loops_by_depth(ast, 2)) + assert len(loops) == 1 + assert loops[0].loop_counter_symbol.name == "ctr_1" + + loops = list(iterate_loops_by_depth(ast, 3)) + assert len(loops) == 2 + assert loops[0].loop_counter_symbol.name == "ctr_0" + assert loops[1].loop_counter_symbol.name == "ctr_0" + + innermost = list(iterate_loops_by_depth(ast, -1)) + assert loops == innermost + + def test_split_optimisation(): src, dst = fields(f"src(9), dst(9): [2D]", layout='fzyx') @@ -80,3 +117,31 @@ def test_split_optimisation(): assert code.count("for") == 6 print(code) + +def test_pragmas(): + f, g = ps.fields("f, g: double[3D]", layout="fzyx") + x = ps.TypedSymbol('x', np.float64) + subs = [ps.Assignment(x, f[0, 0, 0])] + mains = [ps.Assignment(g[0, 0, 0], x)] + ac = ps.AssignmentCollection(mains, subexpressions=subs) + + def prepend_omp_pragmas(ast): + add_pragmas(ast, ["#pragma omp for schedule(dynamic)"], nesting_depth=0) + add_pragmas(ast, ["#pragma omp simd simdlen(8)"], nesting_depth=-1) + + ast_passes = [prepend_omp_pragmas] + + config = ps.CreateKernelConfig(target=ps.Target.CPU, cpu_prepend_optimizations=ast_passes) + ast = ps.create_kernel(ac, config=config) + code = ps.get_code_str(ast) + + assert code.find("#pragma omp for schedule(dynamic)") != -1 + assert code.find("#pragma omp simd simdlen(8)") != -1 + + loops = [loop for loop in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)] + + innermost = list(filter(lambda n: n.is_innermost_loop, loops)) + assert innermost[0].prefix_lines == ["#pragma omp simd simdlen(8)"] + + outermost = list(filter(lambda n: n.is_outermost_loop, loops)) + assert outermost[0].prefix_lines == ["#pragma omp for schedule(dynamic)"]