kernelcreation.py 14.6 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
import itertools
Martin Bauer's avatar
Martin Bauer committed
2
from types import MappingProxyType
Martin Bauer's avatar
Martin Bauer committed
3

4
import sympy as sp
Martin Bauer's avatar
Martin Bauer committed
5

Martin Bauer's avatar
Martin Bauer committed
6
from pystencils.assignment import Assignment
Martin Bauer's avatar
Martin Bauer committed
7
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
Martin Bauer's avatar
Martin Bauer committed
8
from pystencils.cpu.vectorization import vectorize
Martin Bauer's avatar
Martin Bauer committed
9
from pystencils.gpucuda.indexing import indexing_creator_from_params
Martin Bauer's avatar
Martin Bauer committed
10
11
12
from pystencils.simp.assignment_collection import AssignmentCollection
from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
Martin Bauer's avatar
Martin Bauer committed
13
14


15
16
17
18
19
def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
20
                  skip_independence_check=False,
21
22
23
24
25
26
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
                  use_textures_for_interpolation=True):
Martin Bauer's avatar
Martin Bauer committed
27
28
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
29
30

    Args:
Martin Bauer's avatar
Martin Bauer committed
31
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
32
33
34
35
36
37
38
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
        ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
                     a single integer specifies the ghost layer count at all borders, can also be a sequence of
Martin Bauer's avatar
Martin Bauer committed
39
                     pairs ``[(x_lower_gl, x_upper_gl), .... ]``
40
41
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
42
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
Martin Bauer's avatar
Martin Bauer committed
43
44
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
45
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
Martin Bauer's avatar
Martin Bauer committed
46
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
Martin Bauer's avatar
Martin Bauer committed
47
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
48
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
Martin Bauer's avatar
Martin Bauer committed
49
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
50
51

    Returns:
Martin Bauer's avatar
Martin Bauer committed
52
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
53
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
Martin Bauer's avatar
Martin Bauer committed
70
71
    """
    # ----  Normalizing parameters
Martin Bauer's avatar
Martin Bauer committed
72
    split_groups = ()
Martin Bauer's avatar
Martin Bauer committed
73
74
75
76
77
78
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]
Martin Bauer's avatar
Martin Bauer committed
79
80
81

    # ----  Creating ast
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
82
83
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
Martin Bauer's avatar
Martin Bauer committed
84
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
85
86
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
Martin Bauer's avatar
Martin Bauer committed
87
88
89
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
Martin Bauer's avatar
Martin Bauer committed
90
        if cpu_openmp:
Martin Bauer's avatar
Martin Bauer committed
91
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
Martin Bauer's avatar
Martin Bauer committed
92
        if cpu_vectorize_info:
Martin Bauer's avatar
Martin Bauer committed
93
            if cpu_vectorize_info is True:
94
                vectorize(ast)
Martin Bauer's avatar
Martin Bauer committed
95
96
97
98
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
Martin Bauer's avatar
Martin Bauer committed
99
100
        return ast
    elif target == 'llvm':
Martin Bauer's avatar
Martin Bauer committed
101
        from pystencils.llvm import create_kernel
Martin Bauer's avatar
Martin Bauer committed
102
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
103
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
Martin Bauer's avatar
Martin Bauer committed
104
105
        return ast
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
106
        from pystencils.gpucuda import create_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
107
        ast = create_cuda_kernel(assignments, type_info=data_type,
Martin Bauer's avatar
Martin Bauer committed
108
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
109
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers,
110
111
                                 skip_independence_check=skip_independence_check,
                                 use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
112
113
114
115
116
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " % (target,))


117
118
119
120
121
122
123
124
125
def create_indexed_kernel(assignments,
                          index_fields,
                          target='cpu',
                          data_type="double",
                          coordinate_names=('x', 'y', 'z'),
                          cpu_openmp=True,
                          gpu_indexing='block',
                          gpu_indexing_params=MappingProxyType({}),
                          use_textures_for_interpolation=True):
Martin Bauer's avatar
Martin Bauer committed
126
    """
Martin Bauer's avatar
Martin Bauer committed
127
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
128
129
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
130
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
131
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
132
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
133
134
    example boundary parameters.

Martin Bauer's avatar
Martin Bauer committed
135
136
    index_fields: list of index fields, i.e. 1D fields with struct data type
    coordinate_names: name of the coordinate fields in the struct data type
Martin Bauer's avatar
Martin Bauer committed
137

Martin Bauer's avatar
Martin Bauer committed
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>>
        >>> # Index field stores the indices of the cell to visit together with optional values
        >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
        >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
        >>> idx_field = ps.fields(idx=index_arr)
        >>>
        >>> # Additional values  stored in index field can be accessed in the kernel as well
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
        >>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y'))
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
        >>> d_arr
        array([[0. , 0. , 0. , 0. , 0. ],
               [0. , 4.1, 0. , 0. , 0. ],
               [0. , 0. , 4.2, 0. , 0. ],
               [0. , 0. , 0. , 4.3, 0. ],
               [0. , 0. , 0. , 0. , 0. ]])
    """
    if isinstance(assignments, Assignment):
        assignments = [assignments]
    elif isinstance(assignments, AssignmentCollection):
Martin Bauer's avatar
Martin Bauer committed
164
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
165
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
166
167
168
169
170
171
        from pystencils.cpu import create_indexed_kernel
        from pystencils.cpu import add_openmp
        ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
                                    coordinate_names=coordinate_names)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
Martin Bauer's avatar
Martin Bauer committed
172
173
174
175
        return ast
    elif target == 'llvm':
        raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
    elif target == 'gpu':
Martin Bauer's avatar
Martin Bauer committed
176
        from pystencils.gpucuda import created_indexed_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
177
        idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
178
179
180
181
182
183
        ast = created_indexed_cuda_kernel(assignments,
                                          index_fields,
                                          type_info=data_type,
                                          coordinate_names=coordinate_names,
                                          indexing_creator=idx_creator,
                                          use_textures_for_interpolation=use_textures_for_interpolation)
Martin Bauer's avatar
Martin Bauer committed
184
185
186
        return ast
    else:
        raise ValueError("Unknown target %s. Has to be either 'cpu' or 'gpu'" % (target,))
187

188

189
190
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu',
                            gpu_exclusive_conditions=False, **kwargs):
191
192
    """Kernel that updates a staggered field.

Martin Bauer's avatar
Martin Bauer committed
193
194
    .. image:: /img/staggered_grid.svg

195
    Args:
196
        staggered_field: field where the first index coordinate defines the location of the staggered value
197
198
                can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location
                a vector is stored, expressions parameter has to be a sequence of sequences then
Martin Bauer's avatar
Martin Bauer committed
199
200
                where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell
                boundary and ``f[0,0](1)`` the southern cell boundary etc.
201
        expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary
202
                     should be updated.
203
204
        subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
        target: 'cpu' or 'gpu'
205
        gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
206
        kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed
207

208
    Returns:
209
        AST, see `create_kernel`
210
211
    """
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
212
    assert staggered_field.index_dimensions in (1, 2), 'Staggered field must have one or two index dimensions'
213
214
215
216
217
    dim = staggered_field.spatial_dimensions

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]
    conditions = [counters[i] < staggered_field.shape[i] - 1 for i in range(dim)]
    assert len(expressions) == dim
218
219
220
221
222
    if staggered_field.index_dimensions == 2:
        assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
            "If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
            "same length."

223
    final_assignments = []
224
225
226
227
    last_conditional = None

    def add(condition, dimensions, as_else_block=False):
        nonlocal last_conditional
228
        if staggered_field.index_dimensions == 1:
229
230
231
            assignments = [Assignment(staggered_field(d), expressions[d]) for d in dimensions]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered([staggered_field(d) for d in dimensions])
232
233
        elif staggered_field.index_dimensions == 2:
            assert staggered_field.has_fixed_index_shape
234
235
236
            assignments = [Assignment(staggered_field(d, i), expr)
                           for d in dimensions
                           for i, expr in enumerate(expressions[d])]
237
            a_coll = AssignmentCollection(assignments, list(subexpressions))
238
239
            a_coll = a_coll.new_filtered([staggered_field(d, i) for i in range(staggered_field.index_shape[1])
                                          for d in dimensions])
240
        sp_assignments = [SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments]
241
        if as_else_block and last_conditional:
242
243
244
            new_cond = Conditional(condition, Block(sp_assignments))
            last_conditional.false_block = Block([new_cond])
            last_conditional = new_cond
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
        else:
            last_conditional = Conditional(condition, Block(sp_assignments))
            final_assignments.append(last_conditional)

    if target == 'cpu' or not gpu_exclusive_conditions:
        for d in range(dim):
            cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
            add(cond, [d])
    elif target == 'gpu':
        full_conditions = [sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim)]
        for include in itertools.product(*[[1, 0]] * dim):
            case_conditions = sp.And(*[c if value else sp.Not(c) for c, value in zip(full_conditions, include)])
            dimensions_to_include = [i for i in range(dim) if include[i]]
            if dimensions_to_include:
                add(case_conditions, dimensions_to_include, True)
260

261
262
    ghost_layers = [(1, 0)] * dim

Martin Bauer's avatar
Martin Bauer committed
263
264
265
266
    blocking = kwargs.get('cpu_blocking', None)
    if blocking:
        del kwargs['cpu_blocking']

267
268
269
    cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
    if cpu_vectorize_info:
        del kwargs['cpu_vectorize_info']
270
271
272
273
    openmp = kwargs.get('cpu_openmp', None)
    if openmp:
        del kwargs['cpu_openmp']

274
    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
275

276
277
    if target == 'cpu':
        remove_conditionals_in_staggered_kernel(ast)
278
        move_constants_before_loop(ast)
279
        omp_collapse = None
Martin Bauer's avatar
Martin Bauer committed
280
        if blocking:
281
282
283
284
            omp_collapse = loop_blocking(ast, blocking)
        if openmp:
            from pystencils.cpu import add_openmp
            add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False)
285
286
287
288
        if cpu_vectorize_info is True:
            vectorize(ast)
        elif isinstance(cpu_vectorize_info, dict):
            vectorize(ast, **cpu_vectorize_info)
289
    return ast