kernelcreation.py 18.7 KB
Newer Older
1
import itertools
Jan Hönig's avatar
Jan Hönig committed
2
import warnings
Markus Holzer's avatar
Markus Holzer committed
3
from typing import Union, List
Martin Bauer's avatar
Martin Bauer committed
4

5
import sympy as sp
Markus Holzer's avatar
Markus Holzer committed
6
from pystencils.config import CreateKernelConfig
Martin Bauer's avatar
Martin Bauer committed
7

Martin Bauer's avatar
Martin Bauer committed
8
from pystencils.assignment import Assignment
Markus Holzer's avatar
Markus Holzer committed
9
from pystencils.astnodes import Node, Block, Conditional, LoopOverCoordinate, SympyAssignment
Martin Bauer's avatar
Martin Bauer committed
10
from pystencils.cpu.vectorization import vectorize
Jan Hönig's avatar
Jan Hönig committed
11
from pystencils.enums import Target, Backend
12
from pystencils.field import Field, FieldType
Markus Holzer's avatar
Markus Holzer committed
13
from pystencils.node_collection import NodeCollection
Martin Bauer's avatar
Martin Bauer committed
14
from pystencils.simp.assignment_collection import AssignmentCollection
Markus Holzer's avatar
Markus Holzer committed
15
from pystencils.kernel_contrains_check import KernelConstraintsCheck
16
from pystencils.simplificationfactory import create_simplification_strategy
17
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
Martin Bauer's avatar
Martin Bauer committed
18
19
from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
Martin Bauer's avatar
Martin Bauer committed
20
21


Markus Holzer's avatar
Fix FVM    
Markus Holzer committed
22
def create_kernel(assignments: Union[Assignment, List[Assignment], AssignmentCollection, List[Node], NodeCollection], *,
Jan Hönig's avatar
Jan Hönig committed
23
24
25
26
27
28
29
30
                  config: CreateKernelConfig = None, **kwargs):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
    This function forms the general API and delegates the kernel creation to others depending on the CreateKernelConfig.
    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        config: CreateKernelConfig which includes the needed configuration
        kwargs: Arguments for updating the config
31
32

    Returns:
Martin Bauer's avatar
Martin Bauer committed
33
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
34
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
35
36
37
38
39
40

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
Jan Hönig's avatar
Jan Hönig committed
41
42
        >>> kernel_ast = ps.create_kernel(assignment, config=ps.CreateKernelConfig(cpu_openmp=True))
        >>> kernel = kernel_ast.compile()
Martin Bauer's avatar
Martin Bauer committed
43
44
45
46
47
48
49
50
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
Martin Bauer's avatar
Martin Bauer committed
51
    """
Jan Hönig's avatar
Jan Hönig committed
52
53
54
55
56
57
58
59
60
    # ----  Updating configuration from kwargs
    if not config:
        config = CreateKernelConfig(**kwargs)
    else:
        for k, v in kwargs.items():
            if not hasattr(config, k):
                raise KeyError(f'{v} is not a valid kwarg. Please look in CreateKernelConfig for valid settings')
            setattr(config, k, v)

Martin Bauer's avatar
Martin Bauer committed
61
    # ----  Normalizing parameters
62
63
    if isinstance(assignments, Assignment):
        assignments = [assignments]
64
    assert assignments, "Assignments must not be empty!"
Markus Holzer's avatar
Markus Holzer committed
65
    if isinstance(assignments, list):
Markus Holzer's avatar
Markus Holzer committed
66
67
        assignments = NodeCollection(assignments)
    elif isinstance(assignments, AssignmentCollection):
68
        # TODO Markus check and doku
Markus Holzer's avatar
Markus Holzer committed
69
70
71
72
73
74
75
76
        # --- applying first default simplifications
        try:
            if config.default_assignment_simplifications:
                simplification = create_simplification_strategy()
                assignments = simplification(assignments)
        except Exception as e:
            warnings.warn(f"It was not possible to apply the default pystencils optimisations to the "
                          f"AssignmentCollection due to the following problem :{e}")
77
        simplification_hints = assignments.simplification_hints
Markus Holzer's avatar
Markus Holzer committed
78
        assignments = NodeCollection.from_assignment_collection(assignments)
79
        assignments.simplification_hints = simplification_hints
Jan Hönig's avatar
Jan Hönig committed
80
81
82
83
84
85
86

    if config.index_fields:
        return create_indexed_kernel(assignments, config=config)
    else:
        return create_domain_kernel(assignments, config=config)


Markus Holzer's avatar
Markus Holzer committed
87
def create_domain_kernel(assignments: NodeCollection, *, config: CreateKernelConfig):
Jan Hönig's avatar
Jan Hönig committed
88
89
90
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

91
92
93
    Note that `create_domain_kernel` is a lower level function which shoul be accessed by not providing `index_fields`
    to create_kernel

Jan Hönig's avatar
Jan Hönig committed
94
95
96
97
98
99
100
101
102
103
104
    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        config: CreateKernelConfig which includes the needed configuration

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
Markus Holzer's avatar
Markus Holzer committed
105
        >>> from pystencils.kernelcreation import create_domain_kernel
Markus Holzer's avatar
Markus Holzer committed
106
        >>> from pystencils.node_collection import NodeCollection
Jan Hönig's avatar
Jan Hönig committed
107
108
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
109
        >>> kernel_config = ps.CreateKernelConfig(cpu_openmp=True)
Markus Holzer's avatar
Markus Holzer committed
110
        >>> kernel_ast = create_domain_kernel(NodeCollection([assignment]), config=kernel_config)
Jan Hönig's avatar
Jan Hönig committed
111
112
113
114
115
116
117
118
119
120
        >>> kernel = kernel_ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
    """
Markus Holzer's avatar
Markus Holzer committed
121
    # --- eval
Markus Holzer's avatar
Markus Holzer committed
122
    assignments.evaluate_terms()
Markus Holzer's avatar
Markus Holzer committed
123
124
125

    # FUTURE WORK from here we shouldn't NEED sympy
    # --- check constrains
126
127
    check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
                                   check_double_write_condition=not config.allow_double_writes)
Markus Holzer's avatar
Markus Holzer committed
128
    check.visit(assignments)
Markus Holzer's avatar
Markus Holzer committed
129

Markus Holzer's avatar
Markus Holzer committed
130
131
    assignments.bound_fields = check.fields_written
    assignments.rhs_fields = check.fields_read
132

Martin Bauer's avatar
Martin Bauer committed
133
    # ----  Creating ast
Jan Hönig's avatar
Jan Hönig committed
134
135
136
137
    ast = None
    if config.target == Target.CPU:
        if config.backend == Backend.C:
            from pystencils.cpu import add_openmp, create_kernel
Markus Holzer's avatar
Markus Holzer committed
138
            ast = create_kernel(assignments, config=config)
Jan Hönig's avatar
Jan Hönig committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
            for optimization in config.cpu_prepend_optimizations:
                optimization(ast)
            omp_collapse = None
            if config.cpu_blocking:
                omp_collapse = loop_blocking(ast, config.cpu_blocking)
            if config.cpu_openmp:
                add_openmp(ast, num_threads=config.cpu_openmp, collapse=omp_collapse,
                           assume_single_outer_loop=config.omp_single_loop)
            if config.cpu_vectorize_info:
                if config.cpu_vectorize_info is True:
                    vectorize(ast)
                elif isinstance(config.cpu_vectorize_info, dict):
                    vectorize(ast, **config.cpu_vectorize_info)
                    if config.cpu_openmp and config.cpu_blocking and 'nontemporal' in config.cpu_vectorize_info and \
                            config.cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set:
                        # This condition is stricter than it needs to be: if blocks along the fastest axis start on a
                        # cache line boundary, it's okay. But we cannot determine that here.
                        # We don't need to disallow OpenMP collapsing because it is never applied to the inner loop.
                        raise ValueError("Blocking cannot be combined with cacheline-zeroing")
                else:
                    raise ValueError("Invalid value for cpu_vectorize_info")
Markus Holzer's avatar
Markus Holzer committed
160
161
    elif config.target == Target.GPU:
        if config.backend == Backend.CUDA:
Jan Hönig's avatar
Jan Hönig committed
162
            from pystencils.gpucuda import create_cuda_kernel
Markus Holzer's avatar
Markus Holzer committed
163
            ast = create_cuda_kernel(assignments, config=config)
Jan Hönig's avatar
Jan Hönig committed
164
165
166
167

    if not ast:
        raise NotImplementedError(
            f'{config.target} together with {config.backend} is not supported by `create_domain_kernel`')
Martin Bauer's avatar
Martin Bauer committed
168

Jan Hönig's avatar
Jan Hönig committed
169
    if config.use_auto_for_assignments:
170
171
172
173
174
        for a in ast.atoms(SympyAssignment):
            a.use_auto = True

    return ast

Martin Bauer's avatar
Martin Bauer committed
175

Markus Holzer's avatar
Markus Holzer committed
176
def create_indexed_kernel(assignments: NodeCollection, *, config: CreateKernelConfig):
Martin Bauer's avatar
Martin Bauer committed
177
    """
Martin Bauer's avatar
Martin Bauer committed
178
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
179
180
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
181
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
182
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
183
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
184
185
    example boundary parameters.

186
187
188
    Note that `create_indexed_kernel` is a lower level function which shoul be accessed by providing `index_fields`
    to create_kernel

Jan Hönig's avatar
Jan Hönig committed
189
190
191
192
193
194
195
    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        config: CreateKernelConfig which includes the needed configuration

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
196

Martin Bauer's avatar
Martin Bauer committed
197
    Example:
Markus Holzer's avatar
Markus Holzer committed
198
        >>> import pystencils as ps
199
        >>> from pystencils.node_collection import NodeCollection
Martin Bauer's avatar
Martin Bauer committed
200
        >>> import numpy as np
Markus Holzer's avatar
Markus Holzer committed
201
        >>> from pystencils.kernelcreation import create_indexed_kernel
Martin Bauer's avatar
Martin Bauer committed
202
203
204
205
206
207
208
209
        >>>
        >>> # Index field stores the indices of the cell to visit together with optional values
        >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
        >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
        >>> idx_field = ps.fields(idx=index_arr)
        >>>
        >>> # Additional values  stored in index field can be accessed in the kernel as well
        >>> s, d = ps.fields('s, d: [2D]')
210
        >>> assignment = ps.Assignment(d[0, 0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
Markus Holzer's avatar
Markus Holzer committed
211
        >>> kernel_config = ps.CreateKernelConfig(index_fields=[idx_field], coordinate_names=('x', 'y'))
212
        >>> kernel_ast = create_indexed_kernel(NodeCollection([assignment]), config=kernel_config)
Jan Hönig's avatar
Jan Hönig committed
213
        >>> kernel = kernel_ast.compile()
Martin Bauer's avatar
Martin Bauer committed
214
215
216
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
        >>> d_arr
217
218
219
220
221
222
        array([[0. , 0. , 0. , 0. , 0. ],
               [0. , 4.1, 0. , 0. , 0. ],
               [0. , 0. , 4.2, 0. , 0. ],
               [0. , 0. , 0. , 4.3, 0. ],
               [0. , 0. , 0. , 0. , 0. ]])

Martin Bauer's avatar
Martin Bauer committed
223
    """
Markus Holzer's avatar
Markus Holzer committed
224
225
226
227
228
229
230
231
232
233
234
235
    # --- eval
    assignments.evaluate_terms()

    # FUTURE WORK from here we shouldn't NEED sympy
    # --- check constrains
    check = KernelConstraintsCheck(check_independence_condition=not config.skip_independence_check,
                                   check_double_write_condition=not config.allow_double_writes)
    check.visit(assignments)

    assignments.bound_fields = check.fields_written
    assignments.rhs_fields = check.fields_read

Jan Hönig's avatar
Jan Hönig committed
236
237
238
    ast = None
    if config.target == Target.CPU and config.backend == Backend.C:
        from pystencils.cpu import add_openmp, create_indexed_kernel
Markus Holzer's avatar
Markus Holzer committed
239
        ast = create_indexed_kernel(assignments, config=config)
Jan Hönig's avatar
Jan Hönig committed
240
241
        if config.cpu_openmp:
            add_openmp(ast, num_threads=config.cpu_openmp)
Markus Holzer's avatar
Markus Holzer committed
242
243
    elif config.target == Target.GPU:
        if config.backend == Backend.CUDA:
Jan Hönig's avatar
Jan Hönig committed
244
            from pystencils.gpucuda import created_indexed_cuda_kernel
Markus Holzer's avatar
Markus Holzer committed
245
            ast = created_indexed_cuda_kernel(assignments, config=config)
Jan Hönig's avatar
Jan Hönig committed
246
247
248
249

    if not ast:
        raise NotImplementedError(f'Indexed kernels are not yet supported for {config.target} with {config.backend}')
    return ast
250

251

Jan Hönig's avatar
Jan Hönig committed
252
def create_staggered_kernel(assignments, target: Target = Target.CPU, gpu_exclusive_conditions=False, **kwargs):
253
254
255
256
257
258
259
260
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
261
262
263
264
265
        assignments: a sequence of assignments or an AssignmentCollection.
                     Assignments to staggered field are processed specially, while subexpressions and assignments to
                     regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
                     used, but they all need to use the same stencil (i.e. the same number of staggered points) and
                     shape.
Jan Hönig's avatar
Jan Hönig committed
266
        target: 'CPU' or 'GPU'
267
268
        gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
                                  handled in an else branch.
269
270
271
272
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
273
    """
Markus Holzer's avatar
Markus Holzer committed
274
    # TODO: Add doku like in the other kernels
Michael Kuron's avatar
Michael Kuron committed
275
276
277
278
279
280
281
282
283
    if 'ghost_layers' in kwargs:
        assert kwargs['ghost_layers'] is None
        del kwargs['ghost_layers']
    if 'iteration_slice' in kwargs:
        assert kwargs['iteration_slice'] is None
        del kwargs['iteration_slice']
    if 'omp_single_loop' in kwargs:
        assert kwargs['omp_single_loop'] is False
        del kwargs['omp_single_loop']
284
285

    if isinstance(assignments, AssignmentCollection):
286
        subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
287
288
                                                       if not hasattr(a, 'lhs')
                                                       or type(a.lhs) is not Field.Access
289
                                                       or not FieldType.is_staggered(a.lhs.field)]
290
291
        assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
292
                       and FieldType.is_staggered(a.lhs.field)]
293
    else:
294
295
        subexpressions = [a for a in assignments if not hasattr(a, 'lhs')
                          or type(a.lhs) is not Field.Access
296
                          or not FieldType.is_staggered(a.lhs.field)]
297
298
        assignments = [a for a in assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
299
                       and FieldType.is_staggered(a.lhs.field)]
300
301
302
303
304
    if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same stencil")
    if len(set([a.lhs.field.shape for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same shape")

305
    staggered_field = assignments[0].lhs.field
306
    stencil = staggered_field.staggered_stencil
307
    dim = staggered_field.spatial_dimensions
308
    shape = staggered_field.shape
309
310
311
312
313

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]

    final_assignments = []

314
315
    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
316
    for direction in stencil:
317
318
319
320
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
321
    ghost_layers = [[0, 0] for d in range(dim)]
322
323
324
325
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
326
                ghost_layers[d][1] = 1
327
            elif s == -1:
328
                ghost_layers[d][0] = 1
329

330
331
    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
332
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
333
334
335
336
337

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
338
339
            if e in common_exclusions:
                continue
340
341
342
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
343
                    conditions.append(counters[i] < shape[i] - 1)
344
345
346
347
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

348
    if gpu_exclusive_conditions:
349
        outer_assignment = None
350
351
        conditions = {direction: condition(direction) for direction in stencil}
        for num_conditions in range(len(stencil)):
352
            for combination in itertools.combinations(conditions.values(), num_conditions):
353
354
355
356
357
                for assignment in assignments:
                    direction = stencil[assignment.lhs.index[0]]
                    if conditions[direction] in combination:
                        assignment = SympyAssignment(assignment.lhs, assignment.rhs)
                        outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment)
358
359
360
361
362
363
364
365
366
367

        inner_assignment = []
        for assignment in assignments:
            inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
        last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
                                       Block(inner_assignment), outer_assignment)
        final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                            [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                            [last_conditional]

Markus Holzer's avatar
Markus Holzer committed
368
369
        config = CreateKernelConfig(target=target, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs)
        ast = create_kernel(final_assignments, config=config)
370
        return ast
371

372
    for assignment in assignments:
373
        direction = stencil[assignment.lhs.index[0]]
374
375
        sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
376
                         [SympyAssignment(assignment.lhs, assignment.rhs)]
377
378
379
        last_conditional = Conditional(condition(direction), Block(sp_assignments))
        final_assignments.append(last_conditional)

380
381
382
    remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
    prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
                             move_constants_before_loop]
Michael Kuron's avatar
Michael Kuron committed
383
384
385
    if 'cpu_prepend_optimizations' in kwargs:
        prepend_optimizations += kwargs['cpu_prepend_optimizations']
        del kwargs['cpu_prepend_optimizations']
Markus Holzer's avatar
Markus Holzer committed
386
387
388
389

    config = CreateKernelConfig(ghost_layers=ghost_layers, target=target, omp_single_loop=False,
                                cpu_prepend_optimizations=prepend_optimizations, **kwargs)
    ast = create_kernel(final_assignments, config=config)
390
    return ast