kernelcreation.py 16.6 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
from types import MappingProxyType
2
from itertools import combinations
Martin Bauer's avatar
Martin Bauer committed
3

4
import sympy as sp
Martin Bauer's avatar
Martin Bauer committed
5

Martin Bauer's avatar
Martin Bauer committed
6
from pystencils.assignment import Assignment
Martin Bauer's avatar
Martin Bauer committed
7
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
Martin Bauer's avatar
Martin Bauer committed
8
from pystencils.cpu.vectorization import vectorize
9
from pystencils.field import Field, FieldType
Martin Bauer's avatar
Martin Bauer committed
10
from pystencils.gpucuda.indexing import indexing_creator_from_params
Martin Bauer's avatar
Martin Bauer committed
11
from pystencils.simp.assignment_collection import AssignmentCollection
12
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
Martin Bauer's avatar
Martin Bauer committed
13
14
from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
Martin Bauer's avatar
Martin Bauer committed
15
16


17
18
19
20
21
def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
22
                  skip_independence_check=False,
23
24
25
26
27
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
28
29
                  use_textures_for_interpolation=True,
                  cpu_prepend_optimizations=[]):
Martin Bauer's avatar
Martin Bauer committed
30
31
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
32
33

    Args:
Martin Bauer's avatar
Martin Bauer committed
34
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
35
36
37
38
39
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
40
41
42
        ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of
                      pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
                      If left to default, the number of ghost layers is determined automatically.
43
44
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
45
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
Martin Bauer's avatar
Martin Bauer committed
46
47
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
48
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
Martin Bauer's avatar
Martin Bauer committed
49
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
Martin Bauer's avatar
Martin Bauer committed
50
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
51
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
Martin Bauer's avatar
Martin Bauer committed
52
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
53
        cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
54
55

    Returns:
Martin Bauer's avatar
Martin Bauer committed
56
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
57
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
Martin Bauer's avatar
Martin Bauer committed
74
75
    """
    # ----  Normalizing parameters
Martin Bauer's avatar
Martin Bauer committed
76
    split_groups = ()
Martin Bauer's avatar
Martin Bauer committed
77
78
79
80
81
82
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]
Martin Bauer's avatar
Martin Bauer committed
83
84
85

    # ----  Creating ast
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
86
87
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
Martin Bauer's avatar
Martin Bauer committed
88
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
89
90
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
91
92
        for optimization in cpu_prepend_optimizations:
            optimization(ast)
Martin Bauer's avatar
Martin Bauer committed
93
94
95
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
Martin Bauer's avatar
Martin Bauer committed
96
        if cpu_openmp:
Martin Bauer's avatar
Martin Bauer committed
97
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
Martin Bauer's avatar
Martin Bauer committed
98
        if cpu_vectorize_info:
Martin Bauer's avatar
Martin Bauer committed
99
            if cpu_vectorize_info is True:
100
                vectorize(ast)
Martin Bauer's avatar
Martin Bauer committed
101
102
103
104
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
Martin Bauer's avatar
Martin Bauer committed
105
106
        return ast
    elif target == 'llvm':
Martin Bauer's avatar
Martin Bauer committed
107
        from pystencils.llvm import create_kernel
Martin Bauer's avatar
Martin Bauer committed
108
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
109
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
Martin Bauer's avatar
Martin Bauer committed
110
111
        return ast
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
112
        from pystencils.gpucuda import create_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
113
        ast = create_cuda_kernel(assignments, type_info=data_type,
Martin Bauer's avatar
Martin Bauer committed
114
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
115
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers,
116
117
                                 skip_independence_check=skip_independence_check,
                                 use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
118
119
120
121
122
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))


123
124
125
126
127
128
129
130
131
def create_indexed_kernel(assignments,
                          index_fields,
                          target='cpu',
                          data_type="double",
                          coordinate_names=('x', 'y', 'z'),
                          cpu_openmp=True,
                          gpu_indexing='block',
                          gpu_indexing_params=MappingProxyType({}),
                          use_textures_for_interpolation=True):
Martin Bauer's avatar
Martin Bauer committed
132
    """
Martin Bauer's avatar
Martin Bauer committed
133
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
134
135
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
136
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
137
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
138
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
139
140
    example boundary parameters.

Martin Bauer's avatar
Martin Bauer committed
141
142
    index_fields: list of index fields, i.e. 1D fields with struct data type
    coordinate_names: name of the coordinate fields in the struct data type
Martin Bauer's avatar
Martin Bauer committed
143

Martin Bauer's avatar
Martin Bauer committed
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>>
        >>> # Index field stores the indices of the cell to visit together with optional values
        >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
        >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
        >>> idx_field = ps.fields(idx=index_arr)
        >>>
        >>> # Additional values  stored in index field can be accessed in the kernel as well
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
        >>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y'))
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
        >>> d_arr
        array([[0. , 0. , 0. , 0. , 0. ],
               [0. , 4.1, 0. , 0. , 0. ],
               [0. , 0. , 4.2, 0. , 0. ],
               [0. , 0. , 0. , 4.3, 0. ],
               [0. , 0. , 0. , 0. , 0. ]])
    """
    if isinstance(assignments, Assignment):
        assignments = [assignments]
    elif isinstance(assignments, AssignmentCollection):
Martin Bauer's avatar
Martin Bauer committed
170
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
171
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
172
173
174
175
176
177
        from pystencils.cpu import create_indexed_kernel
        from pystencils.cpu import add_openmp
        ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
                                    coordinate_names=coordinate_names)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
Martin Bauer's avatar
Martin Bauer committed
178
179
180
181
        return ast
    elif target == 'llvm':
        raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
182
        from pystencils.gpucuda import created_indexed_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
183
        idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
184
185
186
187
188
189
        ast = created_indexed_cuda_kernel(assignments,
                                          index_fields,
                                          type_info=data_type,
                                          coordinate_names=coordinate_names,
                                          indexing_creator=idx_creator,
                                          use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
190
191
192
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
193

194

195
def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=False, **kwargs):
196
197
198
199
200
201
202
203
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
204
205
206
207
208
        assignments: a sequence of assignments or an AssignmentCollection.
                     Assignments to staggered field are processed specially, while subexpressions and assignments to
                     regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
                     used, but they all need to use the same stencil (i.e. the same number of staggered points) and
                     shape.
209
210
211
        target: 'cpu', 'llvm' or 'gpu'
        gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
                                  handled in an else branch.
212
213
214
215
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
216
    """
217
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
218
219

    if isinstance(assignments, AssignmentCollection):
220
        subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
221
222
                                                       if not hasattr(a, 'lhs')
                                                       or type(a.lhs) is not Field.Access
223
                                                       or not FieldType.is_staggered(a.lhs.field)]
224
225
        assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
226
                       and FieldType.is_staggered(a.lhs.field)]
227
    else:
228
229
        subexpressions = [a for a in assignments if not hasattr(a, 'lhs')
                          or type(a.lhs) is not Field.Access
230
                          or not FieldType.is_staggered(a.lhs.field)]
231
232
        assignments = [a for a in assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
233
                       and FieldType.is_staggered(a.lhs.field)]
234
235
236
237
238
    if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same stencil")
    if len(set([a.lhs.field.shape for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same shape")

239
    staggered_field = assignments[0].lhs.field
240
    stencil = staggered_field.staggered_stencil
241
    dim = staggered_field.spatial_dimensions
242
    shape = staggered_field.shape
243
244
245
246
247

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]

    final_assignments = []

248
249
    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
250
    for direction in stencil:
251
252
253
254
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
255
    ghost_layers = [[0, 0] for d in range(dim)]
256
257
258
259
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
260
                ghost_layers[d][1] = 1
261
            elif s == -1:
262
                ghost_layers[d][0] = 1
263

264
265
    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
266
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
267
268
269
270
271

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
272
273
            if e in common_exclusions:
                continue
274
275
276
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
277
                    conditions.append(counters[i] < shape[i] - 1)
278
279
280
281
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

282
    if gpu_exclusive_conditions:
283
        outer_assignment = None
284
285
286
287
288
289
290
291
        conditions = {direction: condition(direction) for direction in stencil}
        for num_conditions in range(len(stencil)):
            for combination in combinations(conditions.values(), num_conditions):
                for assignment in assignments:
                    direction = stencil[assignment.lhs.index[0]]
                    if conditions[direction] in combination:
                        assignment = SympyAssignment(assignment.lhs, assignment.rhs)
                        outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment)
292
293
294

        inner_assignment = []
        for assignment in assignments:
295
            direction = stencil[assignment.lhs.index[0]]
296
297
298
299
300
301
302
303
304
305
306
307
308
            inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
        last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
                                       Block(inner_assignment), outer_assignment)
        final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                            [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                            [last_conditional]

        if target == 'cpu':
            from pystencils.cpu import create_kernel as create_kernel_cpu
            ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, **kwargs)
        else:
            ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
        return ast
309

310
    for assignment in assignments:
311
        direction = stencil[assignment.lhs.index[0]]
312
313
        sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
314
                         [SympyAssignment(assignment.lhs, assignment.rhs)]
315
316
317
        last_conditional = Conditional(condition(direction), Block(sp_assignments))
        final_assignments.append(last_conditional)

318
319
320
    remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
    prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
                             move_constants_before_loop]
321
322
    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target,
                        cpu_prepend_optimizations=prepend_optimizations, **kwargs)
323
    return ast