generate_benchmark.py 4.49 KB
Newer Older
1
2
import os
import subprocess
3
4
import warnings
import tempfile
Martin Bauer's avatar
Martin Bauer committed
5

6
from jinja2 import Environment, PackageLoader, StrictUndefined
Martin Bauer's avatar
Martin Bauer committed
7
8

from pystencils.astnodes import PragmaBlock
9
from pystencils.backends.cbackend import generate_c, get_headers
Martin Bauer's avatar
Martin Bauer committed
10
from pystencils.cpu.cpujit import get_compiler_config, run_compile_step
Martin Bauer's avatar
Martin Bauer committed
11
from pystencils.data_types import get_base_type
Martin Bauer's avatar
Martin Bauer committed
12
13
from pystencils.include import get_pystencils_include_path
from pystencils.sympyextensions import prod
14
15


16
17
18
19
20
21
22
23
24
25
26
27
def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
    """Return C code of a benchmark program for the given kernel.

    Args:
        ast: the pystencils AST object as returned by create_kernel
        likwid: if True likwid markers are added to the code
        openmp: relevant only if likwid=True, to generated correct likwid initialization code
        timing: add timing output to the code, prints time per iteration to stdout

    Returns:
        C code as string
    """
Martin Bauer's avatar
Martin Bauer committed
28
    accessed_fields = {f.name: f for f in ast.fields_accessed}
29
30
    constants = []
    fields = []
Martin Bauer's avatar
Martin Bauer committed
31
    call_parameters = []
32
33
34
35
    for p in ast.get_parameters():
        if not p.is_field_parameter:
            constants.append((p.symbol.name, str(p.symbol.dtype)))
            call_parameters.append(p.symbol.name)
36
        else:
37
            assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
Martin Bauer's avatar
Martin Bauer committed
38
            field = accessed_fields[p.field_name]
39
            dtype = str(get_base_type(p.symbol.dtype))
Martin Bauer's avatar
Martin Bauer committed
40
41
            fields.append((p.field_name, dtype, prod(field.shape)))
            call_parameters.append(p.field_name)
42

43
44
45
    header_list = get_headers(ast)
    includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])

46
47
48
49
50
51
    # Strip "#pragma omp parallel" from within kernel, because main function takes care of that
    # when likwid and openmp are enabled
    if likwid and openmp:
        if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
            ast.body.args[0].pragma_line = ''

52
    jinja_context = {
53
        'likwid': likwid,
54
        'openmp': openmp,
55
        'kernel_code': generate_c(ast, dialect='c'),
Martin Bauer's avatar
Martin Bauer committed
56
        'kernelName': ast.function_name,
57
58
        'fields': fields,
        'constants': constants,
59
        'call_argument_list': ",".join(call_parameters),
60
        'includes': includes,
61
        'timing': timing,
62
    }
63
64
65
66

    env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)

    return env.get_template('benchmark.c').render(**jinja_context)
67
68


69
def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None):
70
71
72
    """Runs the given kernel with outer loop in C

    Args:
73
        ast: pystencils ast which is used to compile the benchmark file
74
75
        inner_iterations: timings are recorded around this many iterations
        outer_iterations: number of timings recorded
76
        path: path where the benchmark file is stored. If None a tmp folder is created
77
78
79
80
81
82
83

    Returns:
        list of times per iterations for each outer iteration
    """
    import kerncraft

    benchmark_code = generate_benchmark(ast, timing=True)
84
85
86
87
88

    if path is None:
        path = tempfile.mkdtemp()

    with open(os.path.join(path, 'bench.c'), 'w') as f:
89
90
91
92
93
94
95
96
97
98
99
100
        f.write(benchmark_code)

    kerncraft_path = os.path.dirname(kerncraft.__file__)

    extra_flags = ['-I' + get_pystencils_include_path(),
                   '-I' + os.path.join(kerncraft_path, 'headers')]

    compiler_config = get_compiler_config()
    compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
    compile_cmd += [*extra_flags,
                    os.path.join(kerncraft_path, 'headers', 'timing.c'),
                    os.path.join(kerncraft_path, 'headers', 'dummy.c'),
101
102
                    os.path.join(path, 'bench.c'),
                    '-o', os.path.join(path, 'bench'),
103
104
105
                    ]
    run_compile_step(compile_cmd)

106
107
108
109
110
111
112
    time_pre_estimation_per_iteration = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(10)]))
    benchmark_time_limit = 20
    if benchmark_time_limit / time_pre_estimation_per_iteration < inner_iterations:
        warn = (f"A benchmark run with {inner_iterations} inner_iterations will probably take longer than "
                f"{benchmark_time_limit} seconds for this kernel")
        warnings.warn(warn)

113
114
    results = []
    for _ in range(outer_iterations):
115
        benchmark_time = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(inner_iterations)]))
116
117
        results.append(benchmark_time)
    return results