kernelcreation.py 16.8 KB
Newer Older
1
from itertools import combinations
2
from types import MappingProxyType
Martin Bauer's avatar
Martin Bauer committed
3

4
import sympy as sp
Martin Bauer's avatar
Martin Bauer committed
5

Martin Bauer's avatar
Martin Bauer committed
6
from pystencils.assignment import Assignment
Martin Bauer's avatar
Martin Bauer committed
7
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
Martin Bauer's avatar
Martin Bauer committed
8
from pystencils.cpu.vectorization import vectorize
9
from pystencils.field import Field, FieldType
Martin Bauer's avatar
Martin Bauer committed
10
from pystencils.gpucuda.indexing import indexing_creator_from_params
Martin Bauer's avatar
Martin Bauer committed
11
from pystencils.simp.assignment_collection import AssignmentCollection
12
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
Martin Bauer's avatar
Martin Bauer committed
13
14
from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
Martin Bauer's avatar
Martin Bauer committed
15
16


17
18
19
20
21
def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
22
                  skip_independence_check=False,
23
24
25
26
27
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
28
                  use_textures_for_interpolation=True,
29
30
                  cpu_prepend_optimizations=[],
                  use_auto_for_assignments=False):
Martin Bauer's avatar
Martin Bauer committed
31
32
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
33
34

    Args:
Martin Bauer's avatar
Martin Bauer committed
35
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
36
37
38
39
40
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
41
42
43
        ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of
                      pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
                      If left to default, the number of ghost layers is determined automatically.
44
45
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
46
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
Martin Bauer's avatar
Martin Bauer committed
47
48
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
49
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
Martin Bauer's avatar
Martin Bauer committed
50
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
Martin Bauer's avatar
Martin Bauer committed
51
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
52
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
Martin Bauer's avatar
Martin Bauer committed
53
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
54
        cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
55
56

    Returns:
Martin Bauer's avatar
Martin Bauer committed
57
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
58
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
Martin Bauer's avatar
Martin Bauer committed
75
76
    """
    # ----  Normalizing parameters
77
    assert assignments, "Assignments must not be empty!"
Martin Bauer's avatar
Martin Bauer committed
78
    split_groups = ()
Martin Bauer's avatar
Martin Bauer committed
79
80
81
82
83
84
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]
Martin Bauer's avatar
Martin Bauer committed
85
86
87

    # ----  Creating ast
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
88
89
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
Martin Bauer's avatar
Martin Bauer committed
90
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
91
92
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
93
94
        for optimization in cpu_prepend_optimizations:
            optimization(ast)
Martin Bauer's avatar
Martin Bauer committed
95
96
97
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
Martin Bauer's avatar
Martin Bauer committed
98
        if cpu_openmp:
Martin Bauer's avatar
Martin Bauer committed
99
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
Martin Bauer's avatar
Martin Bauer committed
100
        if cpu_vectorize_info:
Martin Bauer's avatar
Martin Bauer committed
101
            if cpu_vectorize_info is True:
102
                vectorize(ast)
Martin Bauer's avatar
Martin Bauer committed
103
104
105
106
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
Martin Bauer's avatar
Martin Bauer committed
107
    elif target == 'llvm':
Martin Bauer's avatar
Martin Bauer committed
108
        from pystencils.llvm import create_kernel
Martin Bauer's avatar
Martin Bauer committed
109
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
110
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
Martin Bauer's avatar
Martin Bauer committed
111
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
112
        from pystencils.gpucuda import create_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
113
        ast = create_cuda_kernel(assignments, type_info=data_type,
Martin Bauer's avatar
Martin Bauer committed
114
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
115
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers,
116
117
                                 skip_independence_check=skip_independence_check,
                                 use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
118
119
120
    else:
        raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))

121
122
123
124
125
126
    if use_auto_for_assignments:
        for a in ast.atoms(SympyAssignment):
            a.use_auto = True

    return ast

Martin Bauer's avatar
Martin Bauer committed
127

128
129
130
131
132
133
134
135
136
def create_indexed_kernel(assignments,
                          index_fields,
                          target='cpu',
                          data_type="double",
                          coordinate_names=('x', 'y', 'z'),
                          cpu_openmp=True,
                          gpu_indexing='block',
                          gpu_indexing_params=MappingProxyType({}),
                          use_textures_for_interpolation=True):
Martin Bauer's avatar
Martin Bauer committed
137
    """
Martin Bauer's avatar
Martin Bauer committed
138
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
139
140
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
141
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
142
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
143
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
144
145
    example boundary parameters.

Martin Bauer's avatar
Martin Bauer committed
146
147
    index_fields: list of index fields, i.e. 1D fields with struct data type
    coordinate_names: name of the coordinate fields in the struct data type
Martin Bauer's avatar
Martin Bauer committed
148

Martin Bauer's avatar
Martin Bauer committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>>
        >>> # Index field stores the indices of the cell to visit together with optional values
        >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
        >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
        >>> idx_field = ps.fields(idx=index_arr)
        >>>
        >>> # Additional values  stored in index field can be accessed in the kernel as well
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
        >>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y'))
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
        >>> d_arr
        array([[0. , 0. , 0. , 0. , 0. ],
               [0. , 4.1, 0. , 0. , 0. ],
               [0. , 0. , 4.2, 0. , 0. ],
               [0. , 0. , 0. , 4.3, 0. ],
               [0. , 0. , 0. , 0. , 0. ]])
    """
    if isinstance(assignments, Assignment):
        assignments = [assignments]
    elif isinstance(assignments, AssignmentCollection):
Martin Bauer's avatar
Martin Bauer committed
175
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
176
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
177
178
179
180
181
182
        from pystencils.cpu import create_indexed_kernel
        from pystencils.cpu import add_openmp
        ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
                                    coordinate_names=coordinate_names)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
Martin Bauer's avatar
Martin Bauer committed
183
184
185
186
        return ast
    elif target == 'llvm':
        raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
187
        from pystencils.gpucuda import created_indexed_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
188
        idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
189
190
191
192
193
194
        ast = created_indexed_cuda_kernel(assignments,
                                          index_fields,
                                          type_info=data_type,
                                          coordinate_names=coordinate_names,
                                          indexing_creator=idx_creator,
                                          use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
195
196
197
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
198

199

200
def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=False, **kwargs):
201
202
203
204
205
206
207
208
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
209
210
211
212
213
        assignments: a sequence of assignments or an AssignmentCollection.
                     Assignments to staggered field are processed specially, while subexpressions and assignments to
                     regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
                     used, but they all need to use the same stencil (i.e. the same number of staggered points) and
                     shape.
214
215
216
        target: 'cpu', 'llvm' or 'gpu'
        gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
                                  handled in an else branch.
217
218
219
220
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
221
    """
222
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
223
224

    if isinstance(assignments, AssignmentCollection):
225
        subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
226
227
                                                       if not hasattr(a, 'lhs')
                                                       or type(a.lhs) is not Field.Access
228
                                                       or not FieldType.is_staggered(a.lhs.field)]
229
230
        assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
231
                       and FieldType.is_staggered(a.lhs.field)]
232
    else:
233
234
        subexpressions = [a for a in assignments if not hasattr(a, 'lhs')
                          or type(a.lhs) is not Field.Access
235
                          or not FieldType.is_staggered(a.lhs.field)]
236
237
        assignments = [a for a in assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
238
                       and FieldType.is_staggered(a.lhs.field)]
239
240
241
242
243
    if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same stencil")
    if len(set([a.lhs.field.shape for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same shape")

244
    staggered_field = assignments[0].lhs.field
245
    stencil = staggered_field.staggered_stencil
246
    dim = staggered_field.spatial_dimensions
247
    shape = staggered_field.shape
248
249
250
251
252

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]

    final_assignments = []

253
254
    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
255
    for direction in stencil:
256
257
258
259
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
260
    ghost_layers = [[0, 0] for d in range(dim)]
261
262
263
264
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
265
                ghost_layers[d][1] = 1
266
            elif s == -1:
267
                ghost_layers[d][0] = 1
268

269
270
    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
271
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
272
273
274
275
276

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
277
278
            if e in common_exclusions:
                continue
279
280
281
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
282
                    conditions.append(counters[i] < shape[i] - 1)
283
284
285
286
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

287
    if gpu_exclusive_conditions:
288
        outer_assignment = None
289
290
291
292
293
294
295
296
        conditions = {direction: condition(direction) for direction in stencil}
        for num_conditions in range(len(stencil)):
            for combination in combinations(conditions.values(), num_conditions):
                for assignment in assignments:
                    direction = stencil[assignment.lhs.index[0]]
                    if conditions[direction] in combination:
                        assignment = SympyAssignment(assignment.lhs, assignment.rhs)
                        outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment)
297
298
299

        inner_assignment = []
        for assignment in assignments:
300
            direction = stencil[assignment.lhs.index[0]]
301
302
303
304
305
306
307
308
309
310
311
312
313
            inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
        last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
                                       Block(inner_assignment), outer_assignment)
        final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                            [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                            [last_conditional]

        if target == 'cpu':
            from pystencils.cpu import create_kernel as create_kernel_cpu
            ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, **kwargs)
        else:
            ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
        return ast
314

315
    for assignment in assignments:
316
        direction = stencil[assignment.lhs.index[0]]
317
318
        sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
319
                         [SympyAssignment(assignment.lhs, assignment.rhs)]
320
321
322
        last_conditional = Conditional(condition(direction), Block(sp_assignments))
        final_assignments.append(last_conditional)

323
324
325
    remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
    prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
                             move_constants_before_loop]
326
327
    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target,
                        cpu_prepend_optimizations=prepend_optimizations, **kwargs)
328
    return ast