From 85ed4a03541b527b079e16545ed2949eef98476b Mon Sep 17 00:00:00 2001
From: markus <markus.holzer@fau.de>
Date: Sun, 12 Jul 2020 15:10:35 +0200
Subject: [PATCH] Minor fixes and benchmark test case

---
 .../kerncraft_coupling/generate_benchmark.py  | 26 +++++++---
 .../kerncraft_coupling/kerncraft_interface.py | 28 ++++------
 .../kerncraft_coupling/templates/benchmark.c  |  2 +-
 pystencils_tests/test_kerncraft_coupling.py   | 51 ++++++++++++++-----
 4 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py
index 7abf2578..2c618219 100644
--- a/pystencils/kerncraft_coupling/generate_benchmark.py
+++ b/pystencils/kerncraft_coupling/generate_benchmark.py
@@ -1,5 +1,7 @@
 import os
 import subprocess
+import warnings
+import tempfile
 
 from jinja2 import Environment, PackageLoader, StrictUndefined
 
@@ -64,13 +66,14 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
     return env.get_template('benchmark.c').render(**jinja_context)
 
 
-def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
+def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None):
     """Runs the given kernel with outer loop in C
 
     Args:
-        ast:
+        ast: pystencils ast which is used to compile the benchmark file
         inner_iterations: timings are recorded around this many iterations
         outer_iterations: number of timings recorded
+        path: path where the benchmark file is stored. If None a tmp folder is created
 
     Returns:
         list of times per iterations for each outer iteration
@@ -78,7 +81,11 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
     import kerncraft
 
     benchmark_code = generate_benchmark(ast, timing=True)
-    with open('bench.c', 'w') as f:
+
+    if path is None:
+        path = tempfile.mkdtemp()
+
+    with open(os.path.join(path, 'bench.c'), 'w') as f:
         f.write(benchmark_code)
 
     kerncraft_path = os.path.dirname(kerncraft.__file__)
@@ -91,13 +98,20 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
     compile_cmd += [*extra_flags,
                     os.path.join(kerncraft_path, 'headers', 'timing.c'),
                     os.path.join(kerncraft_path, 'headers', 'dummy.c'),
-                    'bench.c',
-                    '-o', 'bench',
+                    os.path.join(path, 'bench.c'),
+                    '-o', os.path.join(path, 'bench'),
                     ]
     run_compile_step(compile_cmd)
 
+    time_pre_estimation_per_iteration = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(10)]))
+    benchmark_time_limit = 20
+    if benchmark_time_limit / time_pre_estimation_per_iteration < inner_iterations:
+        warn = (f"A benchmark run with {inner_iterations} inner_iterations will probably take longer than "
+                f"{benchmark_time_limit} seconds for this kernel")
+        warnings.warn(warn)
+
     results = []
     for _ in range(outer_iterations):
-        benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)]))
+        benchmark_time = float(subprocess.check_output([os.path.join('./', path, 'bench'), str(inner_iterations)]))
         results.append(benchmark_time)
     return results
diff --git a/pystencils/kerncraft_coupling/kerncraft_interface.py b/pystencils/kerncraft_coupling/kerncraft_interface.py
index d92bd67d..7564245c 100644
--- a/pystencils/kerncraft_coupling/kerncraft_interface.py
+++ b/pystencils/kerncraft_coupling/kerncraft_interface.py
@@ -6,19 +6,17 @@ from typing import Optional
 
 from jinja2 import Environment, PackageLoader, StrictUndefined
 
-import kerncraft
 import sympy as sp
 from kerncraft.kerncraft import KernelCode
 from kerncraft.machinemodel import MachineModel
 
-from pystencils.astnodes import (
-    KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment)
+from pystencils.astnodes import (KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment)
 from pystencils.field import get_layout_from_strides
-from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
 from pystencils.sympyextensions import count_operations_in_ast
 from pystencils.transformations import filtered_tree_iteration
 from pystencils.utils import DotDict
 from pystencils.backends.cbackend import generate_c, get_headers
+from pystencils.cpu.kernelcreation import add_openmp
 
 
 class PyStencilsKerncraftKernel(KernelCode):
@@ -38,8 +36,10 @@ class PyStencilsKerncraftKernel(KernelCode):
             assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index
                     coordinates is not known. In this case either a structures of array (SoA) or
                     array of structures (AoS) layout is assumed
+            debug_print: print debug information
+            filename: used for caching
         """
-        kerncraft.kernel.Kernel.__init__(self, machine)
+        super(KernelCode, self).__init__(machine=machine)
 
         # Initialize state
         self.asm_block = None
@@ -138,11 +138,7 @@ class PyStencilsKerncraftKernel(KernelCode):
         file_path = self.get_intermediate_location(file_name, machine_and_compiler_dependent=False)
         lock_mode, lock_fp = self.lock_intermediate(file_path)
 
-        if lock_mode == fcntl.LOCK_SH:
-            # use cache
-            with open(file_path) as f:
-                code = f.read()
-        else:  # lock_mode == fcntl.LOCK_EX
+        if lock_mode == fcntl.LOCK_EX:
             function_signature = generate_c(self.kernel_ast, dialect='c', signature_only=True)
 
             jinja_context = {
@@ -163,9 +159,8 @@ class PyStencilsKerncraftKernel(KernelCode):
         Generate and return compilable source code.
 
         Args:
-            type_: can be iaca or likwid.
             openmp: if true, openmp code will be generated
-            as_filename: writes a file with the name as_filename
+            name: kernel name
         """
         filename = 'pystencils_kernl'
         if openmp:
@@ -174,14 +169,13 @@ class PyStencilsKerncraftKernel(KernelCode):
         file_path = self.get_intermediate_location(filename, machine_and_compiler_dependent=False)
         lock_mode, lock_fp = self.lock_intermediate(file_path)
 
-        if lock_mode == fcntl.LOCK_SH:
-            # use cache
-            with open(file_path) as f:
-                code = f.read()
-        else:  # lock_mode == fcntl.LOCK_EX
+        if lock_mode == fcntl.LOCK_EX:
             header_list = get_headers(self.kernel_ast)
             includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
 
+            if openmp:
+                add_openmp(self.kernel_ast)
+
             kernel_code = generate_c(self.kernel_ast, dialect='c')
 
             jinja_context = {
diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c
index 2cda6787..ae70ddd6 100644
--- a/pystencils/kerncraft_coupling/templates/benchmark.c
+++ b/pystencils/kerncraft_coupling/templates/benchmark.c
@@ -90,7 +90,7 @@ int main(int argc, char **argv)
     {%- if timing %}
     timing(&wcEndTime, &cpuEndTime);
     if( warmup == 0)
-        printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
+        printf("%e\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
     {%- endif %}
 
   }
diff --git a/pystencils_tests/test_kerncraft_coupling.py b/pystencils_tests/test_kerncraft_coupling.py
index ac7ed5de..25efc421 100644
--- a/pystencils_tests/test_kerncraft_coupling.py
+++ b/pystencils_tests/test_kerncraft_coupling.py
@@ -1,16 +1,17 @@
 import os
-
 import numpy as np
 import pytest
 import sympy as sp
-import kerncraft
+
 from kerncraft.kernel import KernelCode
 from kerncraft.machinemodel import MachineModel
+from kerncraft.models import ECM, ECMData, Benchmark
 
 from pystencils import Assignment, Field
 from pystencils.cpu import create_kernel
 from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
-from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
+from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
+from pystencils.timeloop import TimeLoop
 
 SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))
 INPUT_FOLDER = os.path.join(SCRIPT_FOLDER, "kerncraft_inputs")
@@ -45,28 +46,28 @@ def analysis(kernel, model='ecmdata'):
     machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml")
     machine = MachineModel(path_to_yaml=machine_file_path)
     if model == 'ecmdata':
-        model = kerncraft.models.ECMData(kernel, machine, KerncraftParameters())
+        model = ECMData(kernel, machine, KerncraftParameters())
     elif model == 'ecm':
-        model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
+        model = ECM(kernel, machine, KerncraftParameters())
         # model.analyze()
         # model.plot()
     elif model == 'benchmark':
-        model = kerncraft.models.Benchmark(kernel, machine, KerncraftParameters())
+        model = Benchmark(kernel, machine, KerncraftParameters())
     else:
-        model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
+        model = ECM(kernel, machine, KerncraftParameters())
     model.analyze()
     return model
 
 
 @pytest.mark.kerncraft
-def test_3d_7pt_OSACA():
-    # Make sure you use the intel compiler
+def test_3d_7pt_osaca():
+
     size = [20, 200, 200]
     kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
     machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml")
-    machine = MachineModel(path_to_yaml=machine_file_path)
+    machine_model = MachineModel(path_to_yaml=machine_file_path)
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=machine_model, filename=kernel_file_path)
     reference_kernel.set_constant('M', size[0])
     reference_kernel.set_constant('N', size[1])
     assert size[1] == size[2]
@@ -80,7 +81,7 @@ def test_3d_7pt_OSACA():
 
     update_rule = Assignment(b[0, 0, 0], s * rhs)
     ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast, machine)
+    k = PyStencilsKerncraftKernel(ast, machine=machine_model)
     analysis(k, model='ecm')
     assert reference_kernel._flops == k._flops
     # assert reference.results['cl throughput'] == analysis.results['cl throughput']
@@ -132,3 +133,29 @@ def test_3d_7pt():
 
     for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
         assert e1 == e2
+
+
+@pytest.mark.kerncraft
+def test_benchmark():
+    size = [30, 50, 50]
+    arr = np.zeros(size)
+    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
+    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
+    s = sp.Symbol("s")
+    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
+
+    update_rule = Assignment(b[0, 0, 0], s * rhs)
+    ast = create_kernel([update_rule])
+
+    c_benchmark_run = run_c_benchmark(ast, inner_iterations=1000, outer_iterations=1)
+
+    kernel = ast.compile()
+    a = np.full(size, fill_value=0.23)
+    b = np.full(size, fill_value=0.23)
+
+    timeloop = TimeLoop(steps=1)
+    timeloop.add_call(kernel, {'a': a, 'b': b, 's': 0.23})
+
+    timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
+
+    np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=5)
-- 
GitLab