kernelcreation.py 8.01 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
from types import MappingProxyType
2
import sympy as sp
3
4
5
6
7
from .assignment import Assignment
from .astnodes import LoopOverCoordinate, Conditional, Block, SympyAssignment
from .simp.assignment_collection import AssignmentCollection
from .gpucuda.indexing import indexing_creator_from_params
from .transformations import remove_conditionals_in_staggered_kernel
Martin Bauer's avatar
Martin Bauer committed
8
9


Martin Bauer's avatar
Martin Bauer committed
10
def create_kernel(assignments, target='cpu', data_type="double", iteration_slice=None, ghost_layers=None,
Martin Bauer's avatar
Martin Bauer committed
11
                  cpu_openmp=False, cpu_vectorize_info=None,
Martin Bauer's avatar
Martin Bauer committed
12
                  gpu_indexing='block', gpu_indexing_params=MappingProxyType({})):
Martin Bauer's avatar
Martin Bauer committed
13
14
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

    Args:
        assignments: either be a plain list of equations or a AssignmentCollection object
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
        ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
                     a single integer specifies the ghost layer count at all borders, can also be a sequence of
                     pairs [(x_lower_gl, x_upper_gl), .... ]

        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
        cpu_vectorize_info: pair of instruction set name ('sse, 'avx', 'avx512') and data type ('float', 'double')

        gpu_indexing: either 'block' or 'line' , or custom indexing class (see gpucuda/indexing.py)
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
                           e.g. for 'block' one can specify {'block_size': (20, 20, 10) }

    Returns:
        abstract syntax tree object, that can either be printed as source code with `show_code` or can be compiled with
        through its `compile()` member
Martin Bauer's avatar
Martin Bauer committed
37
38
39
    """

    # ----  Normalizing parameters
Martin Bauer's avatar
Martin Bauer committed
40
    split_groups = ()
Martin Bauer's avatar
Martin Bauer committed
41
42
43
44
45
46
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]
Martin Bauer's avatar
Martin Bauer committed
47
48
49

    # ----  Creating ast
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
50
51
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
Martin Bauer's avatar
Martin Bauer committed
52
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
53
54
55
56
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
        if cpu_vectorize_info:
Martin Bauer's avatar
Martin Bauer committed
57
            import pystencils.backends.simd_instruction_sets as vec
58
            from pystencils.cpu.vectorization import vectorize
Martin Bauer's avatar
Martin Bauer committed
59
            vec_params = cpu_vectorize_info
Martin Bauer's avatar
Martin Bauer committed
60
61
            vec.selected_instruction_set = vec.x86_vector_instruction_set(instruction_set=vec_params[0],
                                                                          data_type=vec_params[1])
Martin Bauer's avatar
Martin Bauer committed
62
63
64
            vectorize(ast)
        return ast
    elif target == 'llvm':
Martin Bauer's avatar
Martin Bauer committed
65
        from pystencils.llvm import create_kernel
Martin Bauer's avatar
Martin Bauer committed
66
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
67
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
Martin Bauer's avatar
Martin Bauer committed
68
69
        return ast
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
70
        from pystencils.gpucuda import create_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
71
        ast = create_cuda_kernel(assignments, type_info=data_type,
Martin Bauer's avatar
Martin Bauer committed
72
73
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers)
Martin Bauer's avatar
Martin Bauer committed
74
75
76
77
78
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))


Martin Bauer's avatar
Martin Bauer committed
79
def create_indexed_kernel(assignments, index_fields, target='cpu', data_type="double", coordinate_names=('x', 'y', 'z'),
Martin Bauer's avatar
Martin Bauer committed
80
                          cpu_openmp=True, gpu_indexing='block', gpu_indexing_params=MappingProxyType({})):
Martin Bauer's avatar
Martin Bauer committed
81
    """
Martin Bauer's avatar
Martin Bauer committed
82
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
83
84
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
85
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
86
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
87
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
88
89
    example boundary parameters.

Martin Bauer's avatar
Martin Bauer committed
90
91
    index_fields: list of index fields, i.e. 1D fields with struct data type
    coordinate_names: name of the coordinate fields in the struct data type
Martin Bauer's avatar
Martin Bauer committed
92
93
    """

Martin Bauer's avatar
Martin Bauer committed
94
95
    if isinstance(assignments, AssignmentCollection):
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
96
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
97
98
99
100
101
102
        from pystencils.cpu import create_indexed_kernel
        from pystencils.cpu import add_openmp
        ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
                                    coordinate_names=coordinate_names)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
Martin Bauer's avatar
Martin Bauer committed
103
104
105
106
        return ast
    elif target == 'llvm':
        raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
107
        from pystencils.gpucuda import created_indexed_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
108
109
110
        idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
        ast = created_indexed_cuda_kernel(assignments, index_fields, type_info=data_type,
                                          coordinate_names=coordinate_names, indexing_creator=idx_creator)
Martin Bauer's avatar
Martin Bauer committed
111
112
113
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
114
115
116
117
118
119
120
121
122
123
124
125
126
127


def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu', **kwargs):
    """Kernel that updates a staggered field.

    Args:
        staggered_field: field that has one index coordinate and
                where e.g. f[0,0](0) is interpreted as value at the left cell boundary, f[1,0](0) the right cell
                boundary and f[0,0](1) the southern cell boundary etc.
        expressions: sequence of expressions of length dim, defining how the east, southern, (bottom) cell boundary
                     should be update
        subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
        target: 'cpu' or 'gpu'
        kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
128

129
    Returns:
130
        AST, see `create_kernel`
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    """
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
    assert staggered_field.index_dimensions == 1, 'Staggered field must have exactly one index dimension'
    dim = staggered_field.spatial_dimensions

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
    conditions = [counters[i] < staggered_field.shape[i] - 1 for i in range(dim)]
    assert len(expressions) == dim
    final_assignments = []
    for d in range(dim):
        cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
        a_coll = AssignmentCollection([Assignment(staggered_field(d), expressions[d])], list(subexpressions))
        a_coll = a_coll.new_filtered([staggered_field(d)])
        sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments]
        final_assignments.append(Conditional(cond, Block(sp_assignments)))
    ghost_layers = [(1, 0)] * dim

    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)

    if target == 'cpu':
        remove_conditionals_in_staggered_kernel(ast)

    return ast