From 85ed4a03541b527b079e16545ed2949eef98476b Mon Sep 17 00:00:00 2001 From: markus <markus.holzer@fau.de> Date: Sun, 12 Jul 2020 15:10:35 +0200 Subject: [PATCH] Minor fixes and benchmark test case --- .../kerncraft_coupling/generate_benchmark.py | 26 +++++++--- .../kerncraft_coupling/kerncraft_interface.py | 28 ++++------ .../kerncraft_coupling/templates/benchmark.c | 2 +- pystencils_tests/test_kerncraft_coupling.py | 51 ++++++++++++++----- 4 files changed, 71 insertions(+), 36 deletions(-) diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py index 7abf2578..2c618219 100644 --- a/pystencils/kerncraft_coupling/generate_benchmark.py +++ b/pystencils/kerncraft_coupling/generate_benchmark.py @@ -1,5 +1,7 @@ import os import subprocess +import warnings +import tempfile from jinja2 import Environment, PackageLoader, StrictUndefined @@ -64,13 +66,14 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False): return env.get_template('benchmark.c').render(**jinja_context) -def run_c_benchmark(ast, inner_iterations, outer_iterations=3): +def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None): """Runs the given kernel with outer loop in C Args: - ast: + ast: pystencils ast which is used to compile the benchmark file inner_iterations: timings are recorded around this many iterations outer_iterations: number of timings recorded + path: path where the benchmark file is stored. If None a tmp folder is created Returns: list of times per iterations for each outer iteration @@ -78,7 +81,11 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3): import kerncraft benchmark_code = generate_benchmark(ast, timing=True) - with open('bench.c', 'w') as f: + + if path is None: + path = tempfile.mkdtemp() + + with open(os.path.join(path, 'bench.c'), 'w') as f: f.write(benchmark_code) kerncraft_path = os.path.dirname(kerncraft.__file__) @@ -91,13 +98,20 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3): compile_cmd += [*extra_flags, os.path.join(kerncraft_path, 'headers', 'timing.c'), os.path.join(kerncraft_path, 'headers', 'dummy.c'), - 'bench.c', - '-o', 'bench', + os.path.join(path, 'bench.c'), + '-o', os.path.join(path, 'bench'), ] run_compile_step(compile_cmd) + time_pre_estimation_per_iteration = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(10)])) + benchmark_time_limit = 20 + if benchmark_time_limit / time_pre_estimation_per_iteration < inner_iterations: + warn = (f"A benchmark run with {inner_iterations} inner_iterations will probably take longer than " + f"{benchmark_time_limit} seconds for this kernel") + warnings.warn(warn) + results = [] for _ in range(outer_iterations): - benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)])) + benchmark_time = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(inner_iterations)])) results.append(benchmark_time) return results diff --git a/pystencils/kerncraft_coupling/kerncraft_interface.py b/pystencils/kerncraft_coupling/kerncraft_interface.py index d92bd67d..7564245c 100644 --- a/pystencils/kerncraft_coupling/kerncraft_interface.py +++ b/pystencils/kerncraft_coupling/kerncraft_interface.py @@ -6,19 +6,17 @@ from typing import Optional from jinja2 import Environment, PackageLoader, StrictUndefined -import kerncraft import sympy as sp from kerncraft.kerncraft import KernelCode from kerncraft.machinemodel import MachineModel -from pystencils.astnodes import ( - KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment) +from pystencils.astnodes import (KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment) from pystencils.field import get_layout_from_strides -from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark from pystencils.sympyextensions import count_operations_in_ast from pystencils.transformations import filtered_tree_iteration from pystencils.utils import DotDict from pystencils.backends.cbackend import generate_c, get_headers +from pystencils.cpu.kernelcreation import add_openmp class PyStencilsKerncraftKernel(KernelCode): @@ -38,8 +36,10 @@ class PyStencilsKerncraftKernel(KernelCode): assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index coordinates is not known. In this case either a structures of array (SoA) or array of structures (AoS) layout is assumed + debug_print: print debug information + filename: used for caching """ - kerncraft.kernel.Kernel.__init__(self, machine) + super(KernelCode, self).__init__(machine=machine) # Initialize state self.asm_block = None @@ -138,11 +138,7 @@ class PyStencilsKerncraftKernel(KernelCode): file_path = self.get_intermediate_location(file_name, machine_and_compiler_dependent=False) lock_mode, lock_fp = self.lock_intermediate(file_path) - if lock_mode == fcntl.LOCK_SH: - # use cache - with open(file_path) as f: - code = f.read() - else: # lock_mode == fcntl.LOCK_EX + if lock_mode == fcntl.LOCK_EX: function_signature = generate_c(self.kernel_ast, dialect='c', signature_only=True) jinja_context = { @@ -163,9 +159,8 @@ class PyStencilsKerncraftKernel(KernelCode): Generate and return compilable source code. Args: - type_: can be iaca or likwid. openmp: if true, openmp code will be generated - as_filename: writes a file with the name as_filename + name: kernel name """ filename = 'pystencils_kernl' if openmp: @@ -174,14 +169,13 @@ class PyStencilsKerncraftKernel(KernelCode): file_path = self.get_intermediate_location(filename, machine_and_compiler_dependent=False) lock_mode, lock_fp = self.lock_intermediate(file_path) - if lock_mode == fcntl.LOCK_SH: - # use cache - with open(file_path) as f: - code = f.read() - else: # lock_mode == fcntl.LOCK_EX + if lock_mode == fcntl.LOCK_EX: header_list = get_headers(self.kernel_ast) includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) + if openmp: + add_openmp(self.kernel_ast) + kernel_code = generate_c(self.kernel_ast, dialect='c') jinja_context = { diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c index 2cda6787..ae70ddd6 100644 --- a/pystencils/kerncraft_coupling/templates/benchmark.c +++ b/pystencils/kerncraft_coupling/templates/benchmark.c @@ -90,7 +90,7 @@ int main(int argc, char **argv) {%- if timing %} timing(&wcEndTime, &cpuEndTime); if( warmup == 0) - printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) ); + printf("%e\n", (wcEndTime - wcStartTime) / atoi(argv[1]) ); {%- endif %} } diff --git a/pystencils_tests/test_kerncraft_coupling.py b/pystencils_tests/test_kerncraft_coupling.py index ac7ed5de..25efc421 100644 --- a/pystencils_tests/test_kerncraft_coupling.py +++ b/pystencils_tests/test_kerncraft_coupling.py @@ -1,16 +1,17 @@ import os - import numpy as np import pytest import sympy as sp -import kerncraft + from kerncraft.kernel import KernelCode from kerncraft.machinemodel import MachineModel +from kerncraft.models import ECM, ECMData, Benchmark from pystencils import Assignment, Field from pystencils.cpu import create_kernel from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel -from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark +from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark +from pystencils.timeloop import TimeLoop SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__)) INPUT_FOLDER = os.path.join(SCRIPT_FOLDER, "kerncraft_inputs") @@ -45,28 +46,28 @@ def analysis(kernel, model='ecmdata'): machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml") machine = MachineModel(path_to_yaml=machine_file_path) if model == 'ecmdata': - model = kerncraft.models.ECMData(kernel, machine, KerncraftParameters()) + model = ECMData(kernel, machine, KerncraftParameters()) elif model == 'ecm': - model = kerncraft.models.ECM(kernel, machine, KerncraftParameters()) + model = ECM(kernel, machine, KerncraftParameters()) # model.analyze() # model.plot() elif model == 'benchmark': - model = kerncraft.models.Benchmark(kernel, machine, KerncraftParameters()) + model = Benchmark(kernel, machine, KerncraftParameters()) else: - model = kerncraft.models.ECM(kernel, machine, KerncraftParameters()) + model = ECM(kernel, machine, KerncraftParameters()) model.analyze() return model @pytest.mark.kerncraft -def test_3d_7pt_OSACA(): - # Make sure you use the intel compiler +def test_3d_7pt_osaca(): + size = [20, 200, 200] kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c") machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml") - machine = MachineModel(path_to_yaml=machine_file_path) + machine_model = MachineModel(path_to_yaml=machine_file_path) with open(kernel_file_path) as kernel_file: - reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path) + reference_kernel = KernelCode(kernel_file.read(), machine=machine_model, filename=kernel_file_path) reference_kernel.set_constant('M', size[0]) reference_kernel.set_constant('N', size[1]) assert size[1] == size[2] @@ -80,7 +81,7 @@ def test_3d_7pt_OSACA(): update_rule = Assignment(b[0, 0, 0], s * rhs) ast = create_kernel([update_rule]) - k = PyStencilsKerncraftKernel(ast, machine) + k = PyStencilsKerncraftKernel(ast, machine=machine_model) analysis(k, model='ecm') assert reference_kernel._flops == k._flops # assert reference.results['cl throughput'] == analysis.results['cl throughput'] @@ -132,3 +133,29 @@ def test_3d_7pt(): for e1, e2 in zip(reference.results['cycles'], result.results['cycles']): assert e1 == e2 + + +@pytest.mark.kerncraft +def test_benchmark(): + size = [30, 50, 50] + arr = np.zeros(size) + a = Field.create_from_numpy_array('a', arr, index_dimensions=0) + b = Field.create_from_numpy_array('b', arr, index_dimensions=0) + s = sp.Symbol("s") + rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1] + + update_rule = Assignment(b[0, 0, 0], s * rhs) + ast = create_kernel([update_rule]) + + c_benchmark_run = run_c_benchmark(ast, inner_iterations=1000, outer_iterations=1) + + kernel = ast.compile() + a = np.full(size, fill_value=0.23) + b = np.full(size, fill_value=0.23) + + timeloop = TimeLoop(steps=1) + timeloop.add_call(kernel, {'a': a, 'b': b, 's': 0.23}) + + timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1) + + np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=5) -- GitLab