diff --git a/pystencils/astnodes.py b/pystencils/astnodes.py index 2d3174a1a564bfd00633d36150d8119f7dacaec5..d83c488d068d9058e2c0e3c3e9d34454272d32b2 100644 --- a/pystencils/astnodes.py +++ b/pystencils/astnodes.py @@ -162,18 +162,30 @@ class KernelFunction(Node): def field_name(self): return self.fields[0].name - def __init__(self, body, ghost_layers=None, function_name="kernel", backend=""): + def __init__(self, body, target, backend, compile_function, ghost_layers, function_name="kernel"): super(KernelFunction, self).__init__() self._body = body body.parent = self self.function_name = function_name self._body.parent = self - self.compile = None self.ghost_layers = ghost_layers + self._target = target + self._backend = backend # these variables are assumed to be global, so no automatic parameter is generated for them self.global_variables = set() - self.backend = backend self.instruction_set = None # used in `vectorize` function to tell the backend which i.s. (SSE,AVX) to use + # function that compiles the node to a Python callable, is set by the backends + self._compile_function = compile_function + + @property + def target(self): + """Currently either 'cpu' or 'gpu' """ + return self._target + + @property + def backend(self): + """Backend for generating the code e.g. 'llvm', 'c', 'cuda' """ + return self._backend @property def symbols_defined(self): @@ -194,7 +206,7 @@ class KernelFunction(Node): @property def args(self): - return [self._body] + return self._body, @property def fields_accessed(self) -> Set['ResolvedFieldAccess']: @@ -231,6 +243,11 @@ class KernelFunction(Node): params = [p.symbol for p in self.get_parameters()] return '{0} {1}({2})'.format(type(self).__name__, self.function_name, params) + def compile(self, *args, **kwargs): + if self._compile_function is None: + raise ValueError("No compile-function provided for this KernelFunction node") + return self._compile_function(self, *args, **kwargs) + class SkipIteration(Node): @property diff --git a/pystencils/cpu/kernelcreation.py b/pystencils/cpu/kernelcreation.py index 6b94b6de32eb61d77f77f4e8afd2e873d7c63b41..d3b791901b4b04a0b9bfb94f7308a5dc74ba6c61 100644 --- a/pystencils/cpu/kernelcreation.py +++ b/pystencils/cpu/kernelcreation.py @@ -1,5 +1,4 @@ import sympy as sp -from functools import partial from pystencils.astnodes import SympyAssignment, Block, LoopOverCoordinate, KernelFunction from pystencils.transformations import resolve_buffer_accesses, resolve_field_accesses, make_loop_over_domain, \ add_types, get_optimal_loop_ordering, parse_base_pointer_info, move_constants_before_loop, \ @@ -61,9 +60,10 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke body = ast.Block(assignments) loop_order = get_optimal_loop_ordering(fields_without_buffers) - ast_node = make_loop_over_domain(body, function_name, iteration_slice=iteration_slice, - ghost_layers=ghost_layers, loop_order=loop_order) - ast_node.target = 'cpu' + loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice, + ghost_layers=ghost_layers, loop_order=loop_order) + ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function, + ghost_layers=ghost_layer_info, function_name=function_name) if split_groups: typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups] @@ -83,7 +83,6 @@ def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "ke resolve_buffer_accesses(ast_node, get_base_buffer_index(ast_node), read_only_fields) resolve_field_accesses(ast_node, read_only_fields, field_to_base_pointer_info=base_pointer_info) move_constants_before_loop(ast_node) - ast_node.compile = partial(make_python_function, ast_node) return ast_node @@ -141,14 +140,14 @@ def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, fu loop_body.append(assignment) function_body = Block([loop_node]) - ast_node = KernelFunction(function_body, backend="cpu", function_name=function_name) + ast_node = KernelFunction(function_body, "cpu", "c", make_python_function, + ghost_layers=None, function_name=function_name) fixed_coordinate_mapping = {f.name: coordinate_typed_symbols for f in non_index_fields} read_only_fields = set([f.name for f in fields_read - fields_written]) resolve_field_accesses(ast_node, read_only_fields, field_to_fixed_coordinates=fixed_coordinate_mapping) move_constants_before_loop(ast_node) - ast_node.compile = partial(make_python_function, ast_node) return ast_node diff --git a/pystencils/gpucuda/kernelcreation.py b/pystencils/gpucuda/kernelcreation.py index 2bc0327d644764402e22a481740979e17861f683..fd387efbd529220686647acf2235ca90cfe9dc56 100644 --- a/pystencils/gpucuda/kernelcreation.py +++ b/pystencils/gpucuda/kernelcreation.py @@ -1,5 +1,3 @@ -from functools import partial - from pystencils.gpucuda.indexing import BlockIndexing from pystencils.transformations import resolve_field_accesses, add_types, parse_base_pointer_info, \ get_common_shape, resolve_buffer_accesses, unify_shape_symbols, get_base_buffer_index @@ -55,7 +53,7 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde block = indexing.guard(block, common_shape) unify_shape_symbols(block, common_shape=common_shape, fields=fields_without_buffers) - ast = KernelFunction(block, function_name=function_name, ghost_layers=ghost_layers, backend='gpucuda') + ast = KernelFunction(block, 'gpu', 'gpucuda', make_python_function, ghost_layers, function_name) ast.global_variables.update(indexing.index_variables) base_pointer_spec = [['spatialInner0']] @@ -84,7 +82,6 @@ def create_cuda_kernel(assignments, function_name="kernel", type_info=None, inde ast.body.insert_front(SympyAssignment(loop_counter, indexing.coordinates[i])) ast.indexing = indexing - ast.compile = partial(make_python_function, ast) return ast @@ -124,7 +121,7 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel function_body = Block(coordinate_symbol_assignments + assignments) function_body = indexing.guard(function_body, get_common_shape(index_fields)) - ast = KernelFunction(function_body, function_name=function_name, backend='gpucuda') + ast = KernelFunction(function_body, 'gpu', 'gpucuda', make_python_function, function_name) ast.global_variables.update(indexing.index_variables) coord_mapping = indexing.coordinates @@ -141,5 +138,4 @@ def created_indexed_cuda_kernel(assignments, index_fields, function_name="kernel # add the function which determines #blocks and #threads as additional member to KernelFunction node # this is used by the jit ast.indexing = indexing - ast.compile = partial(make_python_function, ast) return ast diff --git a/pystencils/llvm/kernelcreation.py b/pystencils/llvm/kernelcreation.py index bb822b48131dd5ca5cc344d52a28fd2125ebe9d1..fcb403e783dfd4503ec2f8a6b93adea1a8be2a0d 100644 --- a/pystencils/llvm/kernelcreation.py +++ b/pystencils/llvm/kernelcreation.py @@ -1,5 +1,4 @@ from pystencils.transformations import insert_casts -from functools import partial from pystencils.llvm.llvmjit import make_python_function @@ -10,23 +9,25 @@ def create_kernel(assignments, function_name="kernel", type_info=None, split_gro Loops are created according to the field accesses in the equations. - :param assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`. - Defining the update rules of the kernel - :param function_name: name of the generated function - only important if generated code is written out - :param type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to - be of type 'double' except symbols which occur on the left hand side of equations where the - right hand side is a sympy Boolean which are assumed to be 'bool' . - :param split_groups: Specification on how to split up inner loop into multiple loops. For details see - transformation :func:`pystencils.transformation.split_inner_loop` - :param iteration_slice: if not None, iteration is done only over this slice of the field - :param ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers - if None, the number of ghost layers is determined automatically and assumed to be equal for a - all dimensions + Args: + assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`. + Defining the update rules of the kernel + function_name: name of the generated function - only important if generated code is written out + type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to + be of type 'double' except symbols which occur on the left hand side of equations where the + right hand side is a sympy Boolean which are assumed to be 'bool' . + split_groups: Specification on how to split up inner loop into multiple loops. For details see + transformation :func:`pystencils.transformation.split_inner_loop` + iteration_slice: if not None, iteration is done only over this slice of the field + ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers + if None, the number of ghost layers is determined automatically and assumed to be equal for a + all dimensions :return: :class:`pystencils.ast.KernelFunction` node """ from pystencils.cpu import create_kernel code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers) - code = insert_casts(code) - code.compile = partial(make_python_function, code) + code.body = insert_casts(code.body) + code._compile_function = make_python_function + code._backend = 'llvm' return code diff --git a/pystencils/transformations.py b/pystencils/transformations.py index 8f05b20f416ff9de8c3da94945afb8881be7ba86..30cb69844e67333ba7f44e2cebcd444cb4437fd5 100644 --- a/pystencils/transformations.py +++ b/pystencils/transformations.py @@ -143,12 +143,11 @@ def get_common_shape(field_set): return shape -def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layers=None, loop_order=None): +def make_loop_over_domain(body, iteration_slice=None, ghost_layers=None, loop_order=None): """Uses :class:`pystencils.field.Field.Access` to create (multiple) loops around given AST. Args: body: Block object with inner loop contents - function_name: name of generated C function iteration_slice: if not None, iteration is done only over this slice of the field ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers if None, the number of ghost layers is determined automatically and assumed to be equal for a @@ -156,7 +155,7 @@ def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layer loop_order: loop ordering from outer to inner loop (optimal ordering is same as layout) Returns: - :class:`LoopOverCoordinate` instance with nested loops, ordered according to field layouts + tuple of loop-node, ghost_layer_info """ # find correct ordering by inspecting participating FieldAccesses field_accesses = body.atoms(AbstractField.AbstractAccess) @@ -199,8 +198,7 @@ def make_loop_over_domain(body, function_name, iteration_slice=None, ghost_layer sp.sympify(slice_component)) current_body.insert_front(assignment) - ast_node = ast.KernelFunction(current_body, ghost_layers=ghost_layers, function_name=function_name, backend='cpu') - return ast_node + return current_body, ghost_layers def create_intermediate_base_pointer(field_access, coordinates, previous_ptr): diff --git a/pystencils_tests/test_jacobi_cbackend.py b/pystencils_tests/test_jacobi_cbackend.py index 6fbf9d5ba1e82b637f1991aa2da2482d4951de84..ad4f82e89dcb04b25a3f5e604e28d33d4d86360e 100644 --- a/pystencils_tests/test_jacobi_cbackend.py +++ b/pystencils_tests/test_jacobi_cbackend.py @@ -2,7 +2,7 @@ import numpy as np from pystencils import show_code from pystencils.transformations import move_constants_before_loop, make_loop_over_domain, resolve_field_accesses from pystencils.field import Field -from pystencils.astnodes import SympyAssignment, Block +from pystencils.astnodes import SympyAssignment, Block, KernelFunction from pystencils.cpu import make_python_function @@ -19,7 +19,8 @@ def test_jacobi_fixed_field_size(): jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) body = Block([jacobi]) - ast_node = make_loop_over_domain(body, "kernel") + loop_node, gl_info = make_loop_over_domain(body) + ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) @@ -28,7 +29,7 @@ def test_jacobi_fixed_field_size(): dst_field_py[x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] + src_field_py[x, y - 1] + src_field_py[x, y + 1]) - kernel = make_python_function(ast_node) + kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13) @@ -44,7 +45,8 @@ def test_jacobi_variable_field_size(): d = Field.create_generic("d", 3) jacobi = SympyAssignment(d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4) body = Block([jacobi]) - ast_node = make_loop_over_domain(body, "kernel") + loop_node, gl_info = make_loop_over_domain(body) + ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) @@ -59,7 +61,7 @@ def test_jacobi_variable_field_size(): dst_field_py[x, y, z] = 0.25 * (src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] + src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z]) - kernel = make_python_function(ast_node) + kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py-dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13)