kerncraft_interface.py 14.5 KB
Newer Older
Martin Bauer's avatar
Martin Bauer committed
1
import warnings
2
import fcntl
Martin Bauer's avatar
Martin Bauer committed
3
from collections import defaultdict
Martin Bauer's avatar
Martin Bauer committed
4
from tempfile import TemporaryDirectory
Julian Hammer's avatar
Julian Hammer committed
5
import textwrap
6
7
import itertools
import string
8

Julian Hammer's avatar
Julian Hammer committed
9
from jinja2 import Environment, PackageLoader, StrictUndefined, Template
Martin Bauer's avatar
Martin Bauer committed
10
import sympy as sp
11
from kerncraft.kerncraft import KernelCode
12
from kerncraft.kernel import symbol_pos_int
Julian Hammer's avatar
Julian Hammer committed
13
14
from kerncraft.machinemodel import MachineModel

Julian Hammer's avatar
Julian Hammer committed
15
16
17
from pystencils.astnodes import \
    KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment
from pystencils.backends.cbackend import generate_c, get_headers
Martin Bauer's avatar
Martin Bauer committed
18
from pystencils.field import get_layout_from_strides
Martin Bauer's avatar
Martin Bauer committed
19
from pystencils.sympyextensions import count_operations_in_ast
20
from pystencils.transformations import filtered_tree_iteration
Martin Bauer's avatar
Martin Bauer committed
21
from pystencils.utils import DotDict
22
from pystencils.cpu.kernelcreation import add_openmp
Julian Hammer's avatar
Julian Hammer committed
23
24
from pystencils.data_types import get_base_type
from pystencils.sympyextensions import prod
Martin Bauer's avatar
Martin Bauer committed
25
26


27
class PyStencilsKerncraftKernel(KernelCode):
Martin Bauer's avatar
Martin Bauer committed
28
29
30
31
    """
    Implementation of kerncraft's kernel interface for pystencils CPU kernels.
    Analyses a list of equations assuming they will be executed on a CPU
    """
Martin Bauer's avatar
Martin Bauer committed
32
33
    LIKWID_BASE = '/usr/local/likwid'

Julian Hammer's avatar
Julian Hammer committed
34
    def __init__(self, ast: KernelFunction, machine: MachineModel,
35
                 assumed_layout='SoA', debug_print=False, filename=None):
Julian Hammer's avatar
Julian Hammer committed
36
37
38
39
40
        """Create a kerncraft kernel using a pystencils AST

        Args:
            ast: pystencils ast
            machine: kerncraft machine model - specify this if kernel needs to be compiled
41
42
43
            assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index
                    coordinates is not known. In this case either a structures of array (SoA) or
                    array of structures (AoS) layout is assumed
44
45
            debug_print: print debug information
            filename: used for caching
Julian Hammer's avatar
Julian Hammer committed
46
        """
47
        super(KernelCode, self).__init__(machine=machine)
Martin Bauer's avatar
Martin Bauer committed
48

Julian Hammer's avatar
Julian Hammer committed
49
50
        # Initialize state
        self.asm_block = None
51
        self._filename = filename
Julian Hammer's avatar
Julian Hammer committed
52
        self._keep_intermediates = False
Julian Hammer's avatar
Julian Hammer committed
53
54

        self.kernel_ast = ast
Martin Bauer's avatar
Martin Bauer committed
55
        self.temporary_dir = TemporaryDirectory()
56
        self._keep_intermediates = debug_print
Martin Bauer's avatar
Martin Bauer committed
57
58

        # Loops
Markus Holzer's avatar
Markus Holzer committed
59
60
        inner_loops = [l for l in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)
                       if l.is_innermost_loop]
Martin Bauer's avatar
Martin Bauer committed
61
        if len(inner_loops) == 0:
Martin Bauer's avatar
Martin Bauer committed
62
63
            raise ValueError("No loop found in pystencils AST")
        else:
64
65
66
            if len(inner_loops) > 1:
                warnings.warn("pystencils AST contains multiple inner loops. "
                              "Only one can be analyzed - choosing first one")
Martin Bauer's avatar
Martin Bauer committed
67
            inner_loop = inner_loops[0]
Martin Bauer's avatar
Martin Bauer committed
68
69

        self._loop_stack = []
Martin Bauer's avatar
Martin Bauer committed
70
71
72
        cur_node = inner_loop
        while cur_node is not None:
            if isinstance(cur_node, LoopOverCoordinate):
Martin Bauer's avatar
Martin Bauer committed
73
                loop_counter_sym = cur_node.loop_counter_symbol
Markus Holzer's avatar
Markus Holzer committed
74
75
76
77
                loop_info = (loop_counter_sym.name,
                             sp.Integer(cur_node.start),
                             sp.Integer(cur_node.stop),
                             sp.Integer(1))
Julian Hammer's avatar
Julian Hammer committed
78
79
                # If the correct step were to be provided, all access within that step length will
                # also need to be passed to kerncraft: cur_node.step)
Martin Bauer's avatar
Martin Bauer committed
80
                self._loop_stack.append(loop_info)
Martin Bauer's avatar
Martin Bauer committed
81
            cur_node = cur_node.parent
Martin Bauer's avatar
Martin Bauer committed
82
83
        self._loop_stack = list(reversed(self._loop_stack))

Julian Hammer's avatar
Julian Hammer committed
84
85
86
87
88
89
90
91
92
        def get_layout_tuple(f):
            if f.has_fixed_shape:
                return get_layout_from_strides(f.strides)
            else:
                layout_list = list(f.layout)
                for _ in range(f.index_dimensions):
                    layout_list.insert(0 if assumed_layout == 'SoA' else -1, max(layout_list) + 1)
                return layout_list

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
        # Variables (arrays) and Constants (scalar sizes)
        const_names_iter = itertools.product(string.ascii_uppercase, repeat=1)
        constants_reversed = {}
        fields_accessed = self.kernel_ast.fields_accessed
        for field in fields_accessed:
            layout = get_layout_tuple(field)
            permuted_shape = list(field.shape[i] for i in layout)
            # Replace shape dimensions with constant variables (necessary for layer condition
            # analysis)
            for i, d in enumerate(permuted_shape):
                if d not in self.constants.values():
                    const_symbol = symbol_pos_int(''.join(next(const_names_iter)))
                    self.set_constant(const_symbol, d)
                    constants_reversed[d] = const_symbol
                permuted_shape[i] = constants_reversed[d]
            self.set_variable(field.name, (str(field.dtype),), tuple(permuted_shape))

        # Data sources & destinations
        self.sources = defaultdict(list)
        self.destinations = defaultdict(list)

Martin Bauer's avatar
Martin Bauer committed
114
        reads, writes = search_resolved_field_accesses_in_ast(inner_loop)
Martin Bauer's avatar
Martin Bauer committed
115
        for accesses, target_dict in [(reads, self.sources), (writes, self.destinations)]:
Martin Bauer's avatar
Martin Bauer committed
116
            for fa in accesses:
117
                coord = [symbol_pos_int(LoopOverCoordinate.get_loop_counter_name(i)) + off
Martin Bauer's avatar
Martin Bauer committed
118
                         for i, off in enumerate(fa.offsets)]
Martin Bauer's avatar
Martin Bauer committed
119
                coord += list(fa.idx_coordinate_values)
Julian Hammer's avatar
Julian Hammer committed
120
121
                layout = get_layout_tuple(fa.field)
                permuted_coord = [sp.sympify(coord[i]) for i in layout]
Martin Bauer's avatar
Martin Bauer committed
122
                target_dict[fa.field.name].append(permuted_coord)
Martin Bauer's avatar
Martin Bauer committed
123
124
125
126
127

        # data type
        self.datatype = list(self.variables.values())[0][0]

        # flops
Martin Bauer's avatar
Martin Bauer committed
128
        operation_count = count_operations_in_ast(inner_loop)
Martin Bauer's avatar
Martin Bauer committed
129
        self._flops = {
Martin Bauer's avatar
Martin Bauer committed
130
131
132
            '+': operation_count['adds'],
            '*': operation_count['muls'],
            '/': operation_count['divs'],
Martin Bauer's avatar
Martin Bauer committed
133
        }
Jan Hönig's avatar
Jan Hönig committed
134
135
        for k in [k for k, v in self._flops.items() if v == 0]:
            del self._flops[k]
Martin Bauer's avatar
Martin Bauer committed
136
137
        self.check()

138
139
140
141
142
143
144
145
146
147
148
        if debug_print:
            from pprint import pprint
            print("-----------------------------  Loop Stack --------------------------")
            pprint(self._loop_stack)
            print("-----------------------------  Sources -----------------------------")
            pprint(self.sources)
            print("-----------------------------  Destinations ------------------------")
            pprint(self.destinations)
            print("-----------------------------  FLOPS -------------------------------")
            pprint(self._flops)

149
150
151
152
153
    def get_kernel_header(self, name='pystencils_kernel'):
        file_name = "pystencils_kernel.h"
        file_path = self.get_intermediate_location(file_name, machine_and_compiler_dependent=False)
        lock_mode, lock_fp = self.lock_intermediate(file_path)

Julian Hammer's avatar
Julian Hammer committed
154
155
156
157
        if lock_mode == fcntl.LOCK_SH:
            # use cache
            pass
        else:  # lock_mode == fcntl.LOCK_EX:
158
159
160
161
162
163
164
165
166
167
168
            function_signature = generate_c(self.kernel_ast, dialect='c', signature_only=True)

            jinja_context = {
                'function_signature': function_signature,
            }

            env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
            file_header = env.get_template('kernel.h').render(**jinja_context)
            with open(file_path, 'w') as f:
                f.write(file_header)

Julian Hammer's avatar
Julian Hammer committed
169
            self.release_exclusive_lock(lock_fp)  # degrade to shared lock
170
171
172
        return file_path, lock_fp

    def get_kernel_code(self, openmp=False, name='pystencils_kernl'):
Julian Hammer's avatar
Julian Hammer committed
173
        """
Julian Hammer's avatar
Julian Hammer committed
174
        Generate and return compilable source code from AST.
Julian Hammer's avatar
Julian Hammer committed
175

176
177
        Args:
            openmp: if true, openmp code will be generated
178
            name: kernel name
Julian Hammer's avatar
Julian Hammer committed
179
        """
180
181
182
183
184
185
186
        filename = 'pystencils_kernl'
        if openmp:
            filename += '-omp'
        filename += '.c'
        file_path = self.get_intermediate_location(filename, machine_and_compiler_dependent=False)
        lock_mode, lock_fp = self.lock_intermediate(file_path)

Julian Hammer's avatar
Julian Hammer committed
187
188
189
190
191
        if lock_mode == fcntl.LOCK_SH:
            # use cache
            with open(file_path) as f:
                code = f.read()
        else:  # lock_mode == fcntl.LOCK_EX:
192
193
194
            header_list = get_headers(self.kernel_ast)
            includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

195
196
197
            if openmp:
                add_openmp(self.kernel_ast)

198
199
200
201
202
203
204
205
            kernel_code = generate_c(self.kernel_ast, dialect='c')

            jinja_context = {
                'includes': includes,
                'kernel_code': kernel_code,
            }

            env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
Julian Hammer's avatar
Julian Hammer committed
206
            code = env.get_template('kernel.c').render(**jinja_context)
207
            with open(file_path, 'w') as f:
Julian Hammer's avatar
Julian Hammer committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
                f.write(code)

            self.release_exclusive_lock(lock_fp)  # degrade to shared lock
        return file_path, lock_fp

    CODE_TEMPLATE = Template(textwrap.dedent("""
        #include <likwid.h>
        #include <stdlib.h>
        #include <stdint.h>
        #include <stdbool.h>
        #include <math.h>
        #include "kerncraft.h"
        #include "kernel.h"

        #define RESTRICT __restrict__
        #define FUNC_PREFIX
        void dummy(void *);
        extern int var_false;

        int main(int argc, char **argv) {
          {%- for constantName, dataType in constants %}
          // Constant {{constantName}}
          {{dataType}} {{constantName}};
          {{constantName}} = 0.23;
          {%- endfor %}

          // Declaring arrays
          {%- for field_name, dataType, size in fields %}

          // Initialization {{field_name}}
          double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
          // TODO initialize in parallel context in same order as they are touched
          for (unsigned long long i = 0; i < {{size}}; ++i)
            {{field_name}}[i] = 0.23;
          {%- endfor %}

          likwid_markerInit();
          #pragma omp parallel
          {
            likwid_markerRegisterRegion("loop");
            #pragma omp barrier

            // Initializing arrays in same order as touched in kernel loop nest
            //INIT_ARRAYS;

            // Dummy call
            {%- for field_name, dataType, size in fields %}
            if(var_false) dummy({{field_name}});
            {%- endfor %}
            {%- for constantName, dataType in constants %}
            if(var_false) dummy(&{{constantName}});
            {%- endfor %}

            for(int warmup = 1; warmup >= 0; --warmup) {
              int repeat = 2;
              if(warmup == 0) {
                repeat = atoi(argv[1]);
                likwid_markerStartRegion("loop");
              }

              for(; repeat > 0; --repeat) {
                {{kernelName}}({{call_argument_list}});

                {%- for field_name, dataType, size in fields %}
                if(var_false) dummy({{field_name}});
                {%- endfor %}
                {%- for constantName, dataType in constants %}
                if(var_false) dummy(&{{constantName}});
                {%- endfor %}
              }

            }
            likwid_markerStopRegion("loop");
          }
          likwid_markerClose();
          return 0;
        }
        """))
286

Julian Hammer's avatar
Julian Hammer committed
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
    def get_main_code(self, kernel_function_name='kernel'):
        """
        Generate and return compilable source code from AST.

        :return: tuple of filename and shared lock file pointer
        """
        # TODO produce nicer code, including help text and other "comfort features".
        assert self.kernel_ast is not None, "AST does not exist, this could be due to running " \
                                            "based on a kernel description rather than code."

        file_path = self.get_intermediate_location('main.c', machine_and_compiler_dependent=False)
        lock_mode, lock_fp = self.lock_intermediate(file_path)

        if lock_mode == fcntl.LOCK_SH:
            # use cache
            with open(file_path) as f:
                code = f.read()
        else:  # lock_mode == fcntl.LOCK_EX
            # needs update
            accessed_fields = {f.name: f for f in self.kernel_ast.fields_accessed}
            constants = []
            fields = []
            call_parameters = []
            for p in self.kernel_ast.get_parameters():
                if not p.is_field_parameter:
                    constants.append((p.symbol.name, str(p.symbol.dtype)))
                    call_parameters.append(p.symbol.name)
                else:
                    assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
                    field = accessed_fields[p.field_name]
                    dtype = str(get_base_type(p.symbol.dtype))
                    fields.append((p.field_name, dtype, prod(field.shape)))
                    call_parameters.append(p.field_name)

            header_list = get_headers(self.kernel_ast)
            includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

            # Generate code
            code = self.CODE_TEMPLATE.render(
                kernelName=self.kernel_ast.function_name,
                fields=fields,
                constants=constants,
                call_agument_list=','.join(call_parameters),
                includes=includes)

            # Store to file
            with open(file_path, 'w') as f:
                f.write(code)
            self.release_exclusive_lock(lock_fp)  # degrade to shared lock
336
337

        return file_path, lock_fp
338

Martin Bauer's avatar
Martin Bauer committed
339
340

class KerncraftParameters(DotDict):
Martin Bauer's avatar
Martin Bauer committed
341
    def __init__(self, **kwargs):
342
        super(KerncraftParameters, self).__init__()
Martin Bauer's avatar
Martin Bauer committed
343
344
345
346
347
        self['asm_block'] = 'auto'
        self['asm_increment'] = 0
        self['cores'] = 1
        self['cache_predictor'] = 'SIM'
        self['verbose'] = 0
Jan Hönig's avatar
Jan Hönig committed
348
        self['pointer_increment'] = 'auto'
Jan Hönig's avatar
Jan Hönig committed
349
        self['iterations'] = 10
Julian Hammer's avatar
Julian Hammer committed
350
351
        self['unit'] = 'cy/CL'
        self['ignore_warnings'] = True
352
        self['incore_model'] = 'OSACA'
353
        self.update(**kwargs)
Jan Hönig's avatar
Jan Hönig committed
354

Martin Bauer's avatar
Martin Bauer committed
355
356
357
358

# ------------------------------------------- Helper functions ---------------------------------------------------------


Martin Bauer's avatar
Martin Bauer committed
359
def search_resolved_field_accesses_in_ast(ast):
Martin Bauer's avatar
Martin Bauer committed
360
361
362
363
364
365
366
367
368
    def visit(node, reads, writes):
        if not isinstance(node, SympyAssignment):
            for a in node.args:
                visit(a, reads, writes)
            return

        for expr, accesses in [(node.lhs, writes), (node.rhs, reads)]:
            accesses.update(expr.atoms(ResolvedFieldAccess))

Martin Bauer's avatar
Martin Bauer committed
369
370
371
    read_accesses = set()
    write_accesses = set()
    visit(ast, read_accesses, write_accesses)
Martin Bauer's avatar
Martin Bauer committed
372
    return read_accesses, write_accesses