kernelcreation.py 19.2 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
import itertools
Martin Bauer's avatar
Martin Bauer committed
2
from types import MappingProxyType
Martin Bauer's avatar
Martin Bauer committed
3

4
import sympy as sp
Martin Bauer's avatar
Martin Bauer committed
5

Martin Bauer's avatar
Martin Bauer committed
6
from pystencils.assignment import Assignment
Martin Bauer's avatar
Martin Bauer committed
7
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
Martin Bauer's avatar
Martin Bauer committed
8
from pystencils.cpu.vectorization import vectorize
9
from pystencils.field import Field
Martin Bauer's avatar
Martin Bauer committed
10
from pystencils.gpucuda.indexing import indexing_creator_from_params
Martin Bauer's avatar
Martin Bauer committed
11
from pystencils.simp.assignment_collection import AssignmentCollection
12
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
Martin Bauer's avatar
Martin Bauer committed
13
14
from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
Martin Bauer's avatar
Martin Bauer committed
15
16


17
18
19
20
21
def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
22
                  skip_independence_check=False,
23
24
25
26
27
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
28
29
                  use_textures_for_interpolation=True,
                  cpu_prepend_optimizations=[]):
Martin Bauer's avatar
Martin Bauer committed
30
31
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
32
33

    Args:
Martin Bauer's avatar
Martin Bauer committed
34
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
35
36
37
38
39
40
41
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
        ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
                     a single integer specifies the ghost layer count at all borders, can also be a sequence of
Martin Bauer's avatar
Martin Bauer committed
42
                     pairs ``[(x_lower_gl, x_upper_gl), .... ]``
43
44
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
45
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
Martin Bauer's avatar
Martin Bauer committed
46
47
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
48
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
Martin Bauer's avatar
Martin Bauer committed
49
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
Martin Bauer's avatar
Martin Bauer committed
50
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
51
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
Martin Bauer's avatar
Martin Bauer committed
52
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
53
        cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
54
55

    Returns:
Martin Bauer's avatar
Martin Bauer committed
56
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
57
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
Martin Bauer's avatar
Martin Bauer committed
74
75
    """
    # ----  Normalizing parameters
Martin Bauer's avatar
Martin Bauer committed
76
    split_groups = ()
Martin Bauer's avatar
Martin Bauer committed
77
78
79
80
81
82
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]
Martin Bauer's avatar
Martin Bauer committed
83
84
85

    # ----  Creating ast
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
86
87
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
Martin Bauer's avatar
Martin Bauer committed
88
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
89
90
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
91
92
        for optimization in cpu_prepend_optimizations:
            optimization(ast)
Martin Bauer's avatar
Martin Bauer committed
93
94
95
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
Martin Bauer's avatar
Martin Bauer committed
96
        if cpu_openmp:
Martin Bauer's avatar
Martin Bauer committed
97
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
Martin Bauer's avatar
Martin Bauer committed
98
        if cpu_vectorize_info:
Martin Bauer's avatar
Martin Bauer committed
99
            if cpu_vectorize_info is True:
100
                vectorize(ast)
Martin Bauer's avatar
Martin Bauer committed
101
102
103
104
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
Martin Bauer's avatar
Martin Bauer committed
105
106
        return ast
    elif target == 'llvm':
Martin Bauer's avatar
Martin Bauer committed
107
        from pystencils.llvm import create_kernel
Martin Bauer's avatar
Martin Bauer committed
108
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
109
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
Martin Bauer's avatar
Martin Bauer committed
110
111
        return ast
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
112
        from pystencils.gpucuda import create_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
113
        ast = create_cuda_kernel(assignments, type_info=data_type,
Martin Bauer's avatar
Martin Bauer committed
114
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
115
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers,
116
117
                                 skip_independence_check=skip_independence_check,
                                 use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
118
119
120
121
122
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))


123
124
125
126
127
128
129
130
131
def create_indexed_kernel(assignments,
                          index_fields,
                          target='cpu',
                          data_type="double",
                          coordinate_names=('x', 'y', 'z'),
                          cpu_openmp=True,
                          gpu_indexing='block',
                          gpu_indexing_params=MappingProxyType({}),
                          use_textures_for_interpolation=True):
Martin Bauer's avatar
Martin Bauer committed
132
    """
Martin Bauer's avatar
Martin Bauer committed
133
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
134
135
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
136
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
137
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
138
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
139
140
    example boundary parameters.

Martin Bauer's avatar
Martin Bauer committed
141
142
    index_fields: list of index fields, i.e. 1D fields with struct data type
    coordinate_names: name of the coordinate fields in the struct data type
Martin Bauer's avatar
Martin Bauer committed
143

Martin Bauer's avatar
Martin Bauer committed
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>>
        >>> # Index field stores the indices of the cell to visit together with optional values
        >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
        >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
        >>> idx_field = ps.fields(idx=index_arr)
        >>>
        >>> # Additional values  stored in index field can be accessed in the kernel as well
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
        >>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y'))
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
        >>> d_arr
        array([[0. , 0. , 0. , 0. , 0. ],
               [0. , 4.1, 0. , 0. , 0. ],
               [0. , 0. , 4.2, 0. , 0. ],
               [0. , 0. , 0. , 4.3, 0. ],
               [0. , 0. , 0. , 0. , 0. ]])
    """
    if isinstance(assignments, Assignment):
        assignments = [assignments]
    elif isinstance(assignments, AssignmentCollection):
Martin Bauer's avatar
Martin Bauer committed
170
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
171
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
172
173
174
175
176
177
        from pystencils.cpu import create_indexed_kernel
        from pystencils.cpu import add_openmp
        ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
                                    coordinate_names=coordinate_names)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
Martin Bauer's avatar
Martin Bauer committed
178
179
180
181
        return ast
    elif target == 'llvm':
        raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
182
        from pystencils.gpucuda import created_indexed_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
183
        idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
184
185
186
187
188
189
        ast = created_indexed_cuda_kernel(assignments,
                                          index_fields,
                                          type_info=data_type,
                                          coordinate_names=coordinate_names,
                                          indexing_creator=idx_creator,
                                          use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
190
191
192
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
193

194

195
196
197
198
199
200
201
202
203
204
205
206
def create_staggered_kernel(*args, **kwargs):
    """Kernel that updates a staggered field. Dispatches to either create_staggered_kernel_1 or
       create_staggered_kernel_2 depending on the argument types.
    """
    if 'staggered_field' in kwargs or type(args[0]) is Field:
        return create_staggered_kernel_1(*args, **kwargs)
    else:
        return create_staggered_kernel_2(*args, **kwargs)


def create_staggered_kernel_1(staggered_field, expressions, subexpressions=(), target='cpu',
                              gpu_exclusive_conditions=False, **kwargs):
207
208
    """Kernel that updates a staggered field.

Martin Bauer's avatar
Martin Bauer committed
209
210
    .. image:: /img/staggered_grid.svg

211
    Args:
212
        staggered_field: field where the first index coordinate defines the location of the staggered value
213
214
                can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location
                a vector is stored, expressions parameter has to be a sequence of sequences then
Martin Bauer's avatar
Martin Bauer committed
215
216
                where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell
                boundary and ``f[0,0](1)`` the southern cell boundary etc.
217
        expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary
218
                     should be updated.
219
220
        subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
        target: 'cpu' or 'gpu'
221
        gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
222
        kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
223

224
    Returns:
225
        AST, see `create_kernel`
226
227
    """
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
228
    assert staggered_field.index_dimensions in (1, 2), 'Staggered field must have one or two index dimensions'
229
230
231
232
233
    dim = staggered_field.spatial_dimensions

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
    conditions = [counters[i] < staggered_field.shape[i] - 1 for i in range(dim)]
    assert len(expressions) == dim
234
235
236
237
238
    if staggered_field.index_dimensions == 2:
        assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
            "If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
            "same length."

239
    final_assignments = []
240
241
242
243
    last_conditional = None

    def add(condition, dimensions, as_else_block=False):
        nonlocal last_conditional
244
        if staggered_field.index_dimensions == 1:
245
246
247
            assignments = [Assignment(staggered_field(d), expressions[d]) for d in dimensions]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered([staggered_field(d) for d in dimensions])
248
249
        elif staggered_field.index_dimensions == 2:
            assert staggered_field.has_fixed_index_shape
250
251
252
            assignments = [Assignment(staggered_field(d, i), expr)
                           for d in dimensions
                           for i, expr in enumerate(expressions[d])]
253
            a_coll = AssignmentCollection(assignments, list(subexpressions))
254
255
            a_coll = a_coll.new_filtered([staggered_field(d, i) for i in range(staggered_field.index_shape[1])
                                          for d in dimensions])
256
        sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments]
257
        if as_else_block and last_conditional:
258
259
260
            new_cond = Conditional(condition, Block(sp_assignments))
            last_conditional.false_block = Block([new_cond])
            last_conditional = new_cond
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
        else:
            last_conditional = Conditional(condition, Block(sp_assignments))
            final_assignments.append(last_conditional)

    if target == 'cpu' or not gpu_exclusive_conditions:
        for d in range(dim):
            cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
            add(cond, [d])
    elif target == 'gpu':
        full_conditions = [sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim)]
        for include in itertools.product(*[[1, 0]] * dim):
            case_conditions = sp.And(*[c if value else sp.Not(c) for c, value in zip(full_conditions, include)])
            dimensions_to_include = [i for i in range(dim) if include[i]]
            if dimensions_to_include:
                add(case_conditions, dimensions_to_include, True)
276

277
278
    ghost_layers = [(1, 0)] * dim

Martin Bauer's avatar
Martin Bauer committed
279
280
281
282
    blocking = kwargs.get('cpu_blocking', None)
    if blocking:
        del kwargs['cpu_blocking']

283
284
285
    cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
    if cpu_vectorize_info:
        del kwargs['cpu_vectorize_info']
286
287
288
289
    openmp = kwargs.get('cpu_openmp', None)
    if openmp:
        del kwargs['cpu_openmp']

290
    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
291

292
293
    if target == 'cpu':
        remove_conditionals_in_staggered_kernel(ast)
294
        move_constants_before_loop(ast)
295
        omp_collapse = None
Martin Bauer's avatar
Martin Bauer committed
296
        if blocking:
297
298
299
300
            omp_collapse = loop_blocking(ast, blocking)
        if openmp:
            from pystencils.cpu import add_openmp
            add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False)
301
302
303
304
        if cpu_vectorize_info is True:
            vectorize(ast)
        elif isinstance(cpu_vectorize_info, dict):
            vectorize(ast, **cpu_vectorize_info)
305
    return ast
306
307


308
def create_staggered_kernel_2(assignments, gpu_exclusive_conditions=False, **kwargs):
309
310
311
312
313
314
315
316
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
317
        assignments: a sequence of assignments or an AssignmentCollection with one item for each staggered grid point.
318
319
                     When storing vectors/tensors, the number of items expected is multiplied with the number of
                     components.
320
321
322
323
324
        gpu_exclusive_conditions: whether to use nested conditionals instead of multiple conditionals
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
325
    """
326
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343

    subexpressions = ()
    if isinstance(assignments, AssignmentCollection):
        assignments = assignments.main_assignments
        subexpressions = assignments.subexpressions
    if len(set([a.lhs.field for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to the same staggered field")
    staggered_field = assignments[0].lhs.field
    dim = staggered_field.spatial_dimensions
    points = staggered_field.index_shape[0]
    values_per_point = sp.Mul(*staggered_field.index_shape[1:])
    assert len(assignments) == points * values_per_point

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]

    final_assignments = []

344
345
346
347
348
349
350
351
352
353
354
355
356
    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
    for direction in staggered_field.staggered_stencil:
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
    ghost_layers = [[1, 1] for d in range(dim)]
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
                ghost_layers[d][0] = 0
357
358
            elif s == -1:
                ghost_layers[d][1] = 0
359

360
361
    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
362
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
363
364
365
366
367

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
368
369
            if e in common_exclusions:
                continue
370
371
372
373
374
375
376
377
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
                    conditions.append(counters[i] < staggered_field.shape[i] - 1)
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

378
379
380
    if gpu_exclusive_conditions:
        raise NotImplementedError('gpu_exclusive_conditions is not implemented yet')

381
382
383
384
385
386
    for d, direction in zip(range(points), staggered_field.staggered_stencil):
        sp_assignments = [SympyAssignment(assignments[d].lhs, assignments[d].rhs)] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions]
        last_conditional = Conditional(condition(direction), Block(sp_assignments))
        final_assignments.append(last_conditional)

387
388
389
    prepend_optimizations = [remove_conditionals_in_staggered_kernel, move_constants_before_loop]
    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, cpu_prepend_optimizations=prepend_optimizations,
                        **kwargs)
390
    return ast