kernelcreation.py 14.6 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
from types import MappingProxyType
Martin Bauer's avatar
Martin Bauer committed
2

3
import sympy as sp
Martin Bauer's avatar
Martin Bauer committed
4

Martin Bauer's avatar
Martin Bauer committed
5
from pystencils.assignment import Assignment
Martin Bauer's avatar
Martin Bauer committed
6
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
Martin Bauer's avatar
Martin Bauer committed
7
from pystencils.cpu.vectorization import vectorize
8
from pystencils.field import Field, FieldType
Martin Bauer's avatar
Martin Bauer committed
9
from pystencils.gpucuda.indexing import indexing_creator_from_params
Martin Bauer's avatar
Martin Bauer committed
10
from pystencils.simp.assignment_collection import AssignmentCollection
11
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
Martin Bauer's avatar
Martin Bauer committed
12
13
from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
Martin Bauer's avatar
Martin Bauer committed
14
15


16
17
18
19
20
def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
21
                  skip_independence_check=False,
22
23
24
25
26
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
27
28
                  use_textures_for_interpolation=True,
                  cpu_prepend_optimizations=[]):
Martin Bauer's avatar
Martin Bauer committed
29
30
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
31
32

    Args:
Martin Bauer's avatar
Martin Bauer committed
33
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
34
35
36
37
38
39
40
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
        ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
                     a single integer specifies the ghost layer count at all borders, can also be a sequence of
Martin Bauer's avatar
Martin Bauer committed
41
                     pairs ``[(x_lower_gl, x_upper_gl), .... ]``
42
43
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
44
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
Martin Bauer's avatar
Martin Bauer committed
45
46
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
47
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
Martin Bauer's avatar
Martin Bauer committed
48
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
Martin Bauer's avatar
Martin Bauer committed
49
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
50
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
Martin Bauer's avatar
Martin Bauer committed
51
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
52
        cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
53
54

    Returns:
Martin Bauer's avatar
Martin Bauer committed
55
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
56
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
Martin Bauer's avatar
Martin Bauer committed
73
74
    """
    # ----  Normalizing parameters
Martin Bauer's avatar
Martin Bauer committed
75
    split_groups = ()
Martin Bauer's avatar
Martin Bauer committed
76
77
78
79
80
81
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]
Martin Bauer's avatar
Martin Bauer committed
82
83
84

    # ----  Creating ast
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
85
86
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
Martin Bauer's avatar
Martin Bauer committed
87
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
88
89
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
90
91
        for optimization in cpu_prepend_optimizations:
            optimization(ast)
Martin Bauer's avatar
Martin Bauer committed
92
93
94
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
Martin Bauer's avatar
Martin Bauer committed
95
        if cpu_openmp:
Martin Bauer's avatar
Martin Bauer committed
96
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
Martin Bauer's avatar
Martin Bauer committed
97
        if cpu_vectorize_info:
Martin Bauer's avatar
Martin Bauer committed
98
            if cpu_vectorize_info is True:
99
                vectorize(ast)
Martin Bauer's avatar
Martin Bauer committed
100
101
102
103
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
Martin Bauer's avatar
Martin Bauer committed
104
105
        return ast
    elif target == 'llvm':
Martin Bauer's avatar
Martin Bauer committed
106
        from pystencils.llvm import create_kernel
Martin Bauer's avatar
Martin Bauer committed
107
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
108
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
Martin Bauer's avatar
Martin Bauer committed
109
110
        return ast
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
111
        from pystencils.gpucuda import create_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
112
        ast = create_cuda_kernel(assignments, type_info=data_type,
Martin Bauer's avatar
Martin Bauer committed
113
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
114
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers,
115
116
                                 skip_independence_check=skip_independence_check,
                                 use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
117
118
119
120
121
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))


122
123
124
125
126
127
128
129
130
def create_indexed_kernel(assignments,
                          index_fields,
                          target='cpu',
                          data_type="double",
                          coordinate_names=('x', 'y', 'z'),
                          cpu_openmp=True,
                          gpu_indexing='block',
                          gpu_indexing_params=MappingProxyType({}),
                          use_textures_for_interpolation=True):
Martin Bauer's avatar
Martin Bauer committed
131
    """
Martin Bauer's avatar
Martin Bauer committed
132
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
133
134
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
135
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
136
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
137
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
138
139
    example boundary parameters.

Martin Bauer's avatar
Martin Bauer committed
140
141
    index_fields: list of index fields, i.e. 1D fields with struct data type
    coordinate_names: name of the coordinate fields in the struct data type
Martin Bauer's avatar
Martin Bauer committed
142

Martin Bauer's avatar
Martin Bauer committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>>
        >>> # Index field stores the indices of the cell to visit together with optional values
        >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
        >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
        >>> idx_field = ps.fields(idx=index_arr)
        >>>
        >>> # Additional values  stored in index field can be accessed in the kernel as well
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
        >>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y'))
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
        >>> d_arr
        array([[0. , 0. , 0. , 0. , 0. ],
               [0. , 4.1, 0. , 0. , 0. ],
               [0. , 0. , 4.2, 0. , 0. ],
               [0. , 0. , 0. , 4.3, 0. ],
               [0. , 0. , 0. , 0. , 0. ]])
    """
    if isinstance(assignments, Assignment):
        assignments = [assignments]
    elif isinstance(assignments, AssignmentCollection):
Martin Bauer's avatar
Martin Bauer committed
169
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
170
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
171
172
173
174
175
176
        from pystencils.cpu import create_indexed_kernel
        from pystencils.cpu import add_openmp
        ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
                                    coordinate_names=coordinate_names)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
Martin Bauer's avatar
Martin Bauer committed
177
178
179
180
        return ast
    elif target == 'llvm':
        raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
181
        from pystencils.gpucuda import created_indexed_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
182
        idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
183
184
185
186
187
188
        ast = created_indexed_cuda_kernel(assignments,
                                          index_fields,
                                          type_info=data_type,
                                          coordinate_names=coordinate_names,
                                          indexing_creator=idx_creator,
                                          use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
189
190
191
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
192

193

194
def create_staggered_kernel(assignments, gpu_exclusive_conditions=False, **kwargs):
195
196
197
198
199
200
201
202
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
203
204
205
206
207
        assignments: a sequence of assignments or an AssignmentCollection.
                     Assignments to staggered field are processed specially, while subexpressions and assignments to
                     regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
                     used, but they all need to use the same stencil (i.e. the same number of staggered points) and
                     shape.
208
209
210
211
212
        gpu_exclusive_conditions: whether to use nested conditionals instead of multiple conditionals
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
213
    """
214
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
215
216
217

    subexpressions = ()
    if isinstance(assignments, AssignmentCollection):
218
        subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
219
220
221
222
                                                       if type(a.lhs) is not Field.Access
                                                       and not FieldType.is_staggered(a.lhs.field)]
        assignments = [a for a in assignments.main_assignments if type(a.lhs) is Field.Access
                       and FieldType.is_staggered(a.lhs.field)]
223
    else:
224
225
226
227
        subexpressions = [a for a in assignments if type(a.lhs) is not Field.Access
                          and not FieldType.is_staggered(a.lhs.field)]
        assignments = [a for a in assignments if type(a.lhs) is Field.Access 
                       and FieldType.is_staggered(a.lhs.field)]
228
229
230
231
232
    if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same stencil")
    if len(set([a.lhs.field.shape for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same shape")

233
    staggered_field = assignments[0].lhs.field
234
    stencil = staggered_field.staggered_stencil
235
236
    dim = staggered_field.spatial_dimensions
    points = staggered_field.index_shape[0]
237
    shape = staggered_field.shape
238
239
240
241
242

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]

    final_assignments = []

243
244
    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
245
    for direction in stencil:
246
247
248
249
250
251
252
253
254
255
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
    ghost_layers = [[1, 1] for d in range(dim)]
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
                ghost_layers[d][0] = 0
256
257
            elif s == -1:
                ghost_layers[d][1] = 0
258

259
260
    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
261
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
262
263
264
265
266

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
267
268
            if e in common_exclusions:
                continue
269
270
271
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
272
                    conditions.append(counters[i] < shape[i] - 1)
273
274
275
276
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

277
278
279
    if gpu_exclusive_conditions:
        raise NotImplementedError('gpu_exclusive_conditions is not implemented yet')

280
    for d, direction in zip(range(points), stencil):
281
282
283
284
285
        sp_assignments = [SympyAssignment(assignments[d].lhs, assignments[d].rhs)] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions]
        last_conditional = Conditional(condition(direction), Block(sp_assignments))
        final_assignments.append(last_conditional)

286
287
288
    prepend_optimizations = [remove_conditionals_in_staggered_kernel, move_constants_before_loop]
    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, cpu_prepend_optimizations=prepend_optimizations,
                        **kwargs)
289
    return ast