generate_benchmark.py 5.78 KB
Newer Older
1
from jinja2 import Template
2
3
4
5
import os
import subprocess
from pystencils.include import get_pystencils_include_path
from pystencils.cpu.cpujit import get_compiler_config, run_compile_step
6
from pystencils.backends.cbackend import generate_c, get_headers
7
from pystencils.sympyextensions import prod
Martin Bauer's avatar
Martin Bauer committed
8
from pystencils.data_types import get_base_type
9
from pystencils.astnodes import PragmaBlock
10

Martin Bauer's avatar
Martin Bauer committed
11
benchmark_template = Template("""
12
13
14
#include "kerncraft.h"
#include <stdlib.h>
#include <stdint.h>
Martin Bauer's avatar
Martin Bauer committed
15
#include <stdbool.h>
16
#include <math.h>
17
18
#include <stdio.h>

19
20
{{ includes }}

21
22
23
24
25
26
{%- if likwid %}
#include <likwid.h>
{%- endif %}

#define RESTRICT __restrict__
#define FUNC_PREFIX
27
28
void dummy(void *);
void timing(double* wcTime, double* cpuTime);
29
30
31
extern int var_false;


32
{{kernel_code}}
33
34
35
36
37
38
39
40


int main(int argc, char **argv)
{
  {%- if likwid %}
  likwid_markerInit();
  {%- endif %}

Martin Bauer's avatar
Martin Bauer committed
41
  {%- for field_name, dataType, size in fields %}
Martin Bauer's avatar
Martin Bauer committed
42
43

  // Initialization {{field_name}}
44
  double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
45
  for (unsigned long long i = 0; i < {{size}}; ++i)
Martin Bauer's avatar
Martin Bauer committed
46
    {{field_name}}[i] = 0.23;
Martin Bauer's avatar
Martin Bauer committed
47

48
  if(var_false)
Martin Bauer's avatar
Martin Bauer committed
49
50
    dummy({{field_name}});

51
  {%- endfor %}
Martin Bauer's avatar
Martin Bauer committed
52
53
54



55
  {%- for constantName, dataType in constants %}
Martin Bauer's avatar
Martin Bauer committed
56

57
58
59
60
61
  // Constant {{constantName}}
  {{dataType}} {{constantName}};
  {{constantName}} = 0.23;
  if(var_false)
      dummy(& {{constantName}});
Martin Bauer's avatar
Martin Bauer committed
62

63
  {%- endfor %}
Martin Bauer's avatar
Martin Bauer committed
64

Julian Hammer's avatar
Julian Hammer committed
65
  {%- if likwid and openmp %}
66
67
68
69
  #pragma omp parallel
  {
  likwid_markerRegisterRegion("loop");
  #pragma omp barrier
Julian Hammer's avatar
Julian Hammer committed
70
71
  {%- elif likwid %}
  likwid_markerRegisterRegion("loop");
72
  {%- endif %}
Martin Bauer's avatar
Martin Bauer committed
73

74
75
76
77
  for(int warmup = 1; warmup >= 0; --warmup) {
    int repeat = 2;
    if(warmup == 0) {
      repeat = atoi(argv[1]);
Julian Hammer's avatar
Julian Hammer committed
78
      {%- if likwid %}
79
      likwid_markerStartRegion("loop");
Julian Hammer's avatar
Julian Hammer committed
80
      {%- endif %}
81
    }
82
83
84
85
86
87
    
    {%- if timing %}
    double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
    timing(&wcStartTime, &cpuStartTime);
    {%- endif %}
    
88
89
90
91
92
93
    for (; repeat > 0; --repeat)
    {
      {{kernelName}}({{call_argument_list}});

      // Dummy calls
      {%- for field_name, dataType, size in fields %}
94
      if(var_false) dummy((void*){{field_name}});
95
96
      {%- endfor %}
      {%- for constantName, dataType in constants %}
97
      if(var_false) dummy((void*)&{{constantName}});
98
99
      {%- endfor %}
    }
100
101
102
103
104
105
    {%- if timing %}
    timing(&wcEndTime, &cpuEndTime);
    if( warmup == 0)
        printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
    {%- endif %}

106
  }
Martin Bauer's avatar
Martin Bauer committed
107
108

  {%- if likwid %}
109
  likwid_markerStopRegion("loop");
110
111
112
  {%- if openmp %}
  }
  {%- endif %}
113
  {%- endif %}
Martin Bauer's avatar
Martin Bauer committed
114
115

  {%- if likwid %}
116
117
118
119
120
121
  likwid_markerClose();
  {%- endif %}
}
""")


122
123
124
125
126
127
128
129
130
131
132
133
def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
    """Return C code of a benchmark program for the given kernel.

    Args:
        ast: the pystencils AST object as returned by create_kernel
        likwid: if True likwid markers are added to the code
        openmp: relevant only if likwid=True, to generated correct likwid initialization code
        timing: add timing output to the code, prints time per iteration to stdout

    Returns:
        C code as string
    """
Martin Bauer's avatar
Martin Bauer committed
134
    accessed_fields = {f.name: f for f in ast.fields_accessed}
135
136
    constants = []
    fields = []
Martin Bauer's avatar
Martin Bauer committed
137
    call_parameters = []
138
139
140
141
    for p in ast.get_parameters():
        if not p.is_field_parameter:
            constants.append((p.symbol.name, str(p.symbol.dtype)))
            call_parameters.append(p.symbol.name)
142
        else:
143
            assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
Martin Bauer's avatar
Martin Bauer committed
144
            field = accessed_fields[p.field_name]
145
            dtype = str(get_base_type(p.symbol.dtype))
Martin Bauer's avatar
Martin Bauer committed
146
147
            fields.append((p.field_name, dtype, prod(field.shape)))
            call_parameters.append(p.field_name)
148

149
150
151
    header_list = get_headers(ast)
    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

152
153
154
155
156
157
    # Strip "#pragma omp parallel" from within kernel, because main function takes care of that
    # when likwid and openmp are enabled
    if likwid and openmp:
        if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
            ast.body.args[0].pragma_line = ''

158
159
    args = {
        'likwid': likwid,
160
        'openmp': openmp,
161
        'kernel_code': generate_c(ast, dialect='c'),
Martin Bauer's avatar
Martin Bauer committed
162
        'kernelName': ast.function_name,
163
164
        'fields': fields,
        'constants': constants,
165
        'call_argument_list': ",".join(call_parameters),
166
        'includes': includes,
167
        'timing': timing,
168
    }
Martin Bauer's avatar
Martin Bauer committed
169
    return benchmark_template.render(**args)
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208


def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
    """Runs the given kernel with outer loop in C

    Args:
        ast:
        inner_iterations: timings are recorded around this many iterations
        outer_iterations: number of timings recorded

    Returns:
        list of times per iterations for each outer iteration
    """
    import kerncraft

    benchmark_code = generate_benchmark(ast, timing=True)
    with open('bench.c', 'w') as f:
        f.write(benchmark_code)

    kerncraft_path = os.path.dirname(kerncraft.__file__)

    extra_flags = ['-I' + get_pystencils_include_path(),
                   '-I' + os.path.join(kerncraft_path, 'headers')]

    compiler_config = get_compiler_config()
    compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
    compile_cmd += [*extra_flags,
                    os.path.join(kerncraft_path, 'headers', 'timing.c'),
                    os.path.join(kerncraft_path, 'headers', 'dummy.c'),
                    'bench.c',
                    '-o', 'bench',
                    ]
    run_compile_step(compile_cmd)

    results = []
    for _ in range(outer_iterations):
        benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)]))
        results.append(benchmark_time)
    return results