kernelcreation.py 18.5 KB
Newer Older
1 2
import functools
import itertools
3
from types import MappingProxyType
Martin Bauer's avatar
Martin Bauer committed
4

5
import sympy as sp
Martin Bauer's avatar
Martin Bauer committed
6

Martin Bauer's avatar
Martin Bauer committed
7
from pystencils.assignment import Assignment
Martin Bauer's avatar
Martin Bauer committed
8
from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment
Martin Bauer's avatar
Martin Bauer committed
9
from pystencils.cpu.vectorization import vectorize
10
from pystencils.field import Field, FieldType
Martin Bauer's avatar
Martin Bauer committed
11
from pystencils.gpucuda.indexing import indexing_creator_from_params
Martin Bauer's avatar
Martin Bauer committed
12
from pystencils.simp.assignment_collection import AssignmentCollection
13
from pystencils.stencil import direction_string_to_offset, inverse_direction_string
Martin Bauer's avatar
Martin Bauer committed
14 15
from pystencils.transformations import (
    loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel)
Martin Bauer's avatar
Martin Bauer committed
16 17


18 19 20 21 22
def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
23
                  skip_independence_check=False,
24 25 26
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
27
                  omp_single_loop=True,
28 29
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({}),
30
                  use_textures_for_interpolation=True,
31
                  cpu_prepend_optimizations=[],
32 33 34
                  use_auto_for_assignments=False,
                  opencl_queue=None,
                  opencl_ctx=None):
Martin Bauer's avatar
Martin Bauer committed
35 36
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.
37 38

    Args:
Martin Bauer's avatar
Martin Bauer committed
39
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
40
        target: 'cpu', 'llvm', 'gpu' or 'opencl'
41 42 43 44
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
45 46 47
        ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of
                      pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
                      If left to default, the number of ghost layers is determined automatically.
48 49
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
50
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
51
        omp_single_loop: if OpenMP is active: whether multiple outer loops are permitted
Martin Bauer's avatar
Martin Bauer committed
52 53
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
54
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
Martin Bauer's avatar
Martin Bauer committed
55
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
Martin Bauer's avatar
Martin Bauer committed
56
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
57
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
Martin Bauer's avatar
Martin Bauer committed
58
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
59
        cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
60 61

    Returns:
Martin Bauer's avatar
Martin Bauer committed
62
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
63
        can be compiled with through its 'compile()' member
Martin Bauer's avatar
Martin Bauer committed
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
Martin Bauer's avatar
Martin Bauer committed
80 81
    """
    # ----  Normalizing parameters
82 83
    if isinstance(assignments, Assignment):
        assignments = [assignments]
84
    assert assignments, "Assignments must not be empty!"
Martin Bauer's avatar
Martin Bauer committed
85
    split_groups = ()
Martin Bauer's avatar
Martin Bauer committed
86 87 88 89
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
90 91 92

    # ----  Creating ast
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
93 94
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
Martin Bauer's avatar
Martin Bauer committed
95
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
96 97
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
98 99
        for optimization in cpu_prepend_optimizations:
            optimization(ast)
Martin Bauer's avatar
Martin Bauer committed
100 101 102
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
Martin Bauer's avatar
Martin Bauer committed
103
        if cpu_openmp:
104
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse, assume_single_outer_loop=omp_single_loop)
Martin Bauer's avatar
Martin Bauer committed
105
        if cpu_vectorize_info:
Martin Bauer's avatar
Martin Bauer committed
106
            if cpu_vectorize_info is True:
107
                vectorize(ast)
Martin Bauer's avatar
Martin Bauer committed
108 109
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
110 111 112 113 114 115
                if cpu_openmp and cpu_blocking and 'nontemporal' in cpu_vectorize_info and \
                        cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set:
                    # This condition is stricter than it needs to be: if blocks along the fastest axis start on a
                    # cache line boundary, it's okay. But we cannot determine that here.
                    # We don't need to disallow OpenMP collapsing because it is never applied to the inner loop.
                    raise ValueError("Blocking cannot be combined with cacheline-zeroing")
Martin Bauer's avatar
Martin Bauer committed
116 117
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
Martin Bauer's avatar
Martin Bauer committed
118
    elif target == 'llvm':
Martin Bauer's avatar
Martin Bauer committed
119
        from pystencils.llvm import create_kernel
Martin Bauer's avatar
Martin Bauer committed
120
        ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups,
Martin Bauer's avatar
Martin Bauer committed
121
                            iteration_slice=iteration_slice, ghost_layers=ghost_layers)
122
    elif target == 'gpu' or target == 'opencl':
Martin Bauer's avatar
Martin Bauer committed
123
        from pystencils.gpucuda import create_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
124
        ast = create_cuda_kernel(assignments, type_info=data_type,
Martin Bauer's avatar
Martin Bauer committed
125
                                 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params),
126
                                 iteration_slice=iteration_slice, ghost_layers=ghost_layers,
127 128
                                 skip_independence_check=skip_independence_check,
                                 use_textures_for_interpolation=use_textures_for_interpolation)
129 130 131 132 133 134 135
        if target == 'opencl':
            from pystencils.opencl.opencljit import make_python_function
            ast._backend = 'opencl'
            ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
            ast._target = 'opencl'
            ast._backend = 'opencl'
        return ast
Martin Bauer's avatar
Martin Bauer committed
136
    else:
137
        raise ValueError(f"Unknown target {target}. Has to be one of 'cpu', 'gpu' or 'llvm' ")
Martin Bauer's avatar
Martin Bauer committed
138

139 140 141 142 143 144
    if use_auto_for_assignments:
        for a in ast.atoms(SympyAssignment):
            a.use_auto = True

    return ast

Martin Bauer's avatar
Martin Bauer committed
145

146 147 148 149 150 151 152 153
def create_indexed_kernel(assignments,
                          index_fields,
                          target='cpu',
                          data_type="double",
                          coordinate_names=('x', 'y', 'z'),
                          cpu_openmp=True,
                          gpu_indexing='block',
                          gpu_indexing_params=MappingProxyType({}),
154 155 156
                          use_textures_for_interpolation=True,
                          opencl_queue=None,
                          opencl_ctx=None):
Martin Bauer's avatar
Martin Bauer committed
157
    """
Martin Bauer's avatar
Martin Bauer committed
158
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
Martin Bauer's avatar
Martin Bauer committed
159 160
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

Martin Bauer's avatar
Martin Bauer committed
161
    The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type.
Martin Bauer's avatar
Martin Bauer committed
162
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
Martin Bauer's avatar
Martin Bauer committed
163
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
Martin Bauer's avatar
Martin Bauer committed
164 165
    example boundary parameters.

Martin Bauer's avatar
Martin Bauer committed
166 167
    index_fields: list of index fields, i.e. 1D fields with struct data type
    coordinate_names: name of the coordinate fields in the struct data type
Martin Bauer's avatar
Martin Bauer committed
168

Martin Bauer's avatar
Martin Bauer committed
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>>
        >>> # Index field stores the indices of the cell to visit together with optional values
        >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)])
        >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype)
        >>> idx_field = ps.fields(idx=index_arr)
        >>>
        >>> # Additional values  stored in index field can be accessed in the kernel as well
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val'))
        >>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y'))
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr)
        >>> d_arr
        array([[0. , 0. , 0. , 0. , 0. ],
               [0. , 4.1, 0. , 0. , 0. ],
               [0. , 0. , 4.2, 0. , 0. ],
               [0. , 0. , 0. , 4.3, 0. ],
               [0. , 0. , 0. , 0. , 0. ]])
    """
    if isinstance(assignments, Assignment):
        assignments = [assignments]
    elif isinstance(assignments, AssignmentCollection):
Martin Bauer's avatar
Martin Bauer committed
195
        assignments = assignments.all_assignments
Martin Bauer's avatar
Martin Bauer committed
196
    if target == 'cpu':
Martin Bauer's avatar
Martin Bauer committed
197 198 199 200 201 202
        from pystencils.cpu import create_indexed_kernel
        from pystencils.cpu import add_openmp
        ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type,
                                    coordinate_names=coordinate_names)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp)
Martin Bauer's avatar
Martin Bauer committed
203 204 205
        return ast
    elif target == 'llvm':
        raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend")
206
    elif target == 'gpu' or target == 'opencl':
Martin Bauer's avatar
Martin Bauer committed
207
        from pystencils.gpucuda import created_indexed_cuda_kernel
Martin Bauer's avatar
Martin Bauer committed
208
        idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params)
209 210 211 212 213 214
        ast = created_indexed_cuda_kernel(assignments,
                                          index_fields,
                                          type_info=data_type,
                                          coordinate_names=coordinate_names,
                                          indexing_creator=idx_creator,
                                          use_textures_for_interpolation=use_textures_for_interpolation)
215 216 217 218 219 220
        if target == 'opencl':
            from pystencils.opencl.opencljit import make_python_function
            ast._backend = 'opencl'
            ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx)
            ast._target = 'opencl'
            ast._backend = 'opencl'
Martin Bauer's avatar
Martin Bauer committed
221 222
        return ast
    else:
223
        raise ValueError(f"Unknown target {target}. Has to be either 'cpu' or 'gpu'")
224

225

226
def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=False, **kwargs):
227 228 229 230 231 232 233 234
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
235 236 237 238 239
        assignments: a sequence of assignments or an AssignmentCollection.
                     Assignments to staggered field are processed specially, while subexpressions and assignments to
                     regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
                     used, but they all need to use the same stencil (i.e. the same number of staggered points) and
                     shape.
240 241 242
        target: 'cpu', 'llvm' or 'gpu'
        gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
                                  handled in an else branch.
243 244 245 246
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
247
    """
248
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs and 'omp_single_loop' not in kwargs
249 250

    if isinstance(assignments, AssignmentCollection):
251
        subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments
252 253
                                                       if not hasattr(a, 'lhs')
                                                       or type(a.lhs) is not Field.Access
254
                                                       or not FieldType.is_staggered(a.lhs.field)]
255 256
        assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
257
                       and FieldType.is_staggered(a.lhs.field)]
258
    else:
259 260
        subexpressions = [a for a in assignments if not hasattr(a, 'lhs')
                          or type(a.lhs) is not Field.Access
261
                          or not FieldType.is_staggered(a.lhs.field)]
262 263
        assignments = [a for a in assignments if hasattr(a, 'lhs')
                       and type(a.lhs) is Field.Access
264
                       and FieldType.is_staggered(a.lhs.field)]
265 266 267 268 269
    if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same stencil")
    if len(set([a.lhs.field.shape for a in assignments])) != 1:
        raise ValueError("All assignments need to be made to staggered fields with the same shape")

270
    staggered_field = assignments[0].lhs.field
271
    stencil = staggered_field.staggered_stencil
272
    dim = staggered_field.spatial_dimensions
273
    shape = staggered_field.shape
274 275 276 277 278

    counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)]

    final_assignments = []

279 280
    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
281
    for direction in stencil:
282 283 284 285
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
286
    ghost_layers = [[0, 0] for d in range(dim)]
287 288 289 290
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
291
                ghost_layers[d][1] = 1
292
            elif s == -1:
293
                ghost_layers[d][0] = 1
294

295 296
    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
297
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
298 299 300 301 302

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
303 304
            if e in common_exclusions:
                continue
305 306 307
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
308
                    conditions.append(counters[i] < shape[i] - 1)
309 310 311 312
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

313
    if gpu_exclusive_conditions:
314
        outer_assignment = None
315 316
        conditions = {direction: condition(direction) for direction in stencil}
        for num_conditions in range(len(stencil)):
317
            for combination in itertools.combinations(conditions.values(), num_conditions):
318 319 320 321 322
                for assignment in assignments:
                    direction = stencil[assignment.lhs.index[0]]
                    if conditions[direction] in combination:
                        assignment = SympyAssignment(assignment.lhs, assignment.rhs)
                        outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment)
323 324 325

        inner_assignment = []
        for assignment in assignments:
326
            direction = stencil[assignment.lhs.index[0]]
327 328 329 330 331 332 333 334 335
            inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs))
        last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]),
                                       Block(inner_assignment), outer_assignment)
        final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                            [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                            [last_conditional]

        if target == 'cpu':
            from pystencils.cpu import create_kernel as create_kernel_cpu
336
            ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs)
337 338 339
        else:
            ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs)
        return ast
340

341
    for assignment in assignments:
342
        direction = stencil[assignment.lhs.index[0]]
343 344
        sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
345
                         [SympyAssignment(assignment.lhs, assignment.rhs)]
346 347 348
        last_conditional = Conditional(condition(direction), Block(sp_assignments))
        final_assignments.append(last_conditional)

349 350 351
    remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
    prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional),
                             move_constants_before_loop]
352
    ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, omp_single_loop=False,
353
                        cpu_prepend_optimizations=prepend_optimizations, **kwargs)
354
    return ast