diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py
index c95c420d6c62d4428354f934a4e36fb7e7398b9d..9a012d6c2a75c98faae05e6815dd3883c7d4d2e4 100644
--- a/pystencils/kerncraft_coupling/generate_benchmark.py
+++ b/pystencils/kerncraft_coupling/generate_benchmark.py
@@ -1,7 +1,9 @@
-import os
 import subprocess
+import warnings
+import tempfile
+from pathlib import Path
 
-from jinja2 import Template
+from jinja2 import Environment, PackageLoader, StrictUndefined
 
 from pystencils.astnodes import PragmaBlock
 from pystencils.backends.cbackend import generate_c, get_headers
@@ -10,116 +12,6 @@ from pystencils.data_types import get_base_type
 from pystencils.include import get_pystencils_include_path
 from pystencils.sympyextensions import prod
 
-benchmark_template = Template("""
-#include "kerncraft.h"
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <math.h>
-#include <stdio.h>
-
-{{ includes }}
-
-{%- if likwid %}
-#include <likwid.h>
-{%- endif %}
-
-#define RESTRICT __restrict__
-#define FUNC_PREFIX
-void dummy(void *);
-void timing(double* wcTime, double* cpuTime);
-extern int var_false;
-
-
-{{kernel_code}}
-
-
-int main(int argc, char **argv)
-{
-  {%- if likwid %}
-  likwid_markerInit();
-  {%- endif %}
-
-  {%- for field_name, dataType, size in fields %}
-
-  // Initialization {{field_name}}
-  double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
-  for (unsigned long long i = 0; i < {{size}}; ++i)
-    {{field_name}}[i] = 0.23;
-
-  if(var_false)
-    dummy({{field_name}});
-
-  {%- endfor %}
-
-
-
-  {%- for constantName, dataType in constants %}
-
-  // Constant {{constantName}}
-  {{dataType}} {{constantName}};
-  {{constantName}} = 0.23;
-  if(var_false)
-      dummy(& {{constantName}});
-
-  {%- endfor %}
-
-  {%- if likwid and openmp %}
-  #pragma omp parallel
-  {
-  likwid_markerRegisterRegion("loop");
-  #pragma omp barrier
-  {%- elif likwid %}
-  likwid_markerRegisterRegion("loop");
-  {%- endif %}
-
-  for(int warmup = 1; warmup >= 0; --warmup) {
-    int repeat = 2;
-    if(warmup == 0) {
-      repeat = atoi(argv[1]);
-      {%- if likwid %}
-      likwid_markerStartRegion("loop");
-      {%- endif %}
-    }
-    
-    {%- if timing %}
-    double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
-    timing(&wcStartTime, &cpuStartTime);
-    {%- endif %}
-    
-    for (; repeat > 0; --repeat)
-    {
-      {{kernelName}}({{call_argument_list}});
-
-      // Dummy calls
-      {%- for field_name, dataType, size in fields %}
-      if(var_false) dummy((void*){{field_name}});
-      {%- endfor %}
-      {%- for constantName, dataType in constants %}
-      if(var_false) dummy((void*)&{{constantName}});
-      {%- endfor %}
-    }
-    {%- if timing %}
-    timing(&wcEndTime, &cpuEndTime);
-    if( warmup == 0)
-        printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
-    {%- endif %}
-
-  }
-
-  {%- if likwid %}
-  likwid_markerStopRegion("loop");
-  {%- if openmp %}
-  }
-  {%- endif %}
-  {%- endif %}
-
-  {%- if likwid %}
-  likwid_markerClose();
-  {%- endif %}
-}
-""")
-
 
 def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
     """Return C code of a benchmark program for the given kernel.
@@ -157,7 +49,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
         if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
             ast.body.args[0].pragma_line = ''
 
-    args = {
+    jinja_context = {
         'likwid': likwid,
         'openmp': openmp,
         'kernel_code': generate_c(ast, dialect='c'),
@@ -168,16 +60,20 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
         'includes': includes,
         'timing': timing,
     }
-    return benchmark_template.render(**args)
 
+    env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
+
+    return env.get_template('benchmark.c').render(**jinja_context)
 
-def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
+
+def run_c_benchmark(ast, inner_iterations, outer_iterations=3, path=None):
     """Runs the given kernel with outer loop in C
 
     Args:
-        ast:
+        ast: pystencils ast which is used to compile the benchmark file
         inner_iterations: timings are recorded around this many iterations
         outer_iterations: number of timings recorded
+        path: path where the benchmark file is stored. If None a tmp folder is created
 
     Returns:
         list of times per iterations for each outer iteration
@@ -185,26 +81,40 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
     import kerncraft
 
     benchmark_code = generate_benchmark(ast, timing=True)
-    with open('bench.c', 'w') as f:
+
+    if path is None:
+        path = tempfile.mkdtemp()
+
+    if isinstance(path, str):
+        path = Path(path)
+
+    with open(path / 'bench.c', 'w') as f:
         f.write(benchmark_code)
 
-    kerncraft_path = os.path.dirname(kerncraft.__file__)
+    kerncraft_path = Path(kerncraft.__file__).parent
 
     extra_flags = ['-I' + get_pystencils_include_path(),
-                   '-I' + os.path.join(kerncraft_path, 'headers')]
+                   '-I' + str(kerncraft_path / 'headers')]
 
     compiler_config = get_compiler_config()
     compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
     compile_cmd += [*extra_flags,
-                    os.path.join(kerncraft_path, 'headers', 'timing.c'),
-                    os.path.join(kerncraft_path, 'headers', 'dummy.c'),
-                    'bench.c',
-                    '-o', 'bench',
+                    kerncraft_path / 'headers' / 'timing.c',
+                    kerncraft_path / 'headers' / 'dummy.c',
+                    path / 'bench.c',
+                    '-o', path / 'bench',
                     ]
     run_compile_step(compile_cmd)
 
+    time_pre_estimation_per_iteration = float(subprocess.check_output(['./' / path / 'bench', str(10)]))
+    benchmark_time_limit = 20
+    if benchmark_time_limit / time_pre_estimation_per_iteration < inner_iterations:
+        warn = (f"A benchmark run with {inner_iterations} inner_iterations will probably take longer than "
+                f"{benchmark_time_limit} seconds for this kernel")
+        warnings.warn(warn)
+
     results = []
     for _ in range(outer_iterations):
-        benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)]))
+        benchmark_time = float(subprocess.check_output(['./' / path / 'bench', str(inner_iterations)]))
         results.append(benchmark_time)
     return results
diff --git a/pystencils/kerncraft_coupling/kerncraft_interface.py b/pystencils/kerncraft_coupling/kerncraft_interface.py
index bd1771493434d1166bf899cc8a8188994bbd2101..7564245c049c58288387a4a23918622d974aaaf3 100644
--- a/pystencils/kerncraft_coupling/kerncraft_interface.py
+++ b/pystencils/kerncraft_coupling/kerncraft_interface.py
@@ -1,20 +1,22 @@
 import warnings
+import fcntl
 from collections import defaultdict
 from tempfile import TemporaryDirectory
 from typing import Optional
 
-import kerncraft
+from jinja2 import Environment, PackageLoader, StrictUndefined
+
 import sympy as sp
 from kerncraft.kerncraft import KernelCode
 from kerncraft.machinemodel import MachineModel
 
-from pystencils.astnodes import (
-    KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment)
+from pystencils.astnodes import (KernelFunction, LoopOverCoordinate, ResolvedFieldAccess, SympyAssignment)
 from pystencils.field import get_layout_from_strides
-from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
 from pystencils.sympyextensions import count_operations_in_ast
 from pystencils.transformations import filtered_tree_iteration
 from pystencils.utils import DotDict
+from pystencils.backends.cbackend import generate_c, get_headers
+from pystencils.cpu.kernelcreation import add_openmp
 
 
 class PyStencilsKerncraftKernel(KernelCode):
@@ -34,8 +36,10 @@ class PyStencilsKerncraftKernel(KernelCode):
             assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index
                     coordinates is not known. In this case either a structures of array (SoA) or
                     array of structures (AoS) layout is assumed
+            debug_print: print debug information
+            filename: used for caching
         """
-        kerncraft.kernel.Kernel.__init__(self, machine)
+        super(KernelCode, self).__init__(machine=machine)
 
         # Initialize state
         self.asm_block = None
@@ -96,7 +100,7 @@ class PyStencilsKerncraftKernel(KernelCode):
         for field in fields_accessed:
             layout = get_layout_tuple(field)
             permuted_shape = list(field.shape[i] for i in layout)
-            self.set_variable(field.name, str(field.dtype), tuple(permuted_shape))
+            self.set_variable(field.name, tuple([str(field.dtype)]), tuple(permuted_shape))
 
         # Scalars may be safely ignored
         # for param in ast.get_parameters():
@@ -129,24 +133,64 @@ class PyStencilsKerncraftKernel(KernelCode):
             print("-----------------------------  FLOPS -------------------------------")
             pprint(self._flops)
 
-    def as_code(self, type_='iaca', openmp=False, as_filename=False):
+    def get_kernel_header(self, name='pystencils_kernel'):
+        file_name = "pystencils_kernel.h"
+        file_path = self.get_intermediate_location(file_name, machine_and_compiler_dependent=False)
+        lock_mode, lock_fp = self.lock_intermediate(file_path)
+
+        if lock_mode == fcntl.LOCK_EX:
+            function_signature = generate_c(self.kernel_ast, dialect='c', signature_only=True)
+
+            jinja_context = {
+                'function_signature': function_signature,
+            }
+
+            env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
+            file_header = env.get_template('kernel.h').render(**jinja_context)
+            with open(file_path, 'w') as f:
+                f.write(file_header)
+
+            fcntl.flock(lock_fp, fcntl.LOCK_SH)  # degrade to shared lock
+
+        return file_path, lock_fp
+
+    def get_kernel_code(self, openmp=False, name='pystencils_kernl'):
         """
         Generate and return compilable source code.
 
         Args:
-            type_: can be iaca or likwid.
             openmp: if true, openmp code will be generated
-            as_filename:
+            name: kernel name
         """
-        code = generate_benchmark(self.kernel_ast, likwid=type_ == 'likwid', openmp=openmp)
-        if as_filename:
-            fp, already_available = self._get_intermediate_file(f'kernel_{type_}.c',
-                                                                machine_and_compiler_dependent=False)
-            if not already_available:
-                fp.write(code)
-            return fp.name
-        else:
-            return code
+        filename = 'pystencils_kernl'
+        if openmp:
+            filename += '-omp'
+        filename += '.c'
+        file_path = self.get_intermediate_location(filename, machine_and_compiler_dependent=False)
+        lock_mode, lock_fp = self.lock_intermediate(file_path)
+
+        if lock_mode == fcntl.LOCK_EX:
+            header_list = get_headers(self.kernel_ast)
+            includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
+
+            if openmp:
+                add_openmp(self.kernel_ast)
+
+            kernel_code = generate_c(self.kernel_ast, dialect='c')
+
+            jinja_context = {
+                'includes': includes,
+                'kernel_code': kernel_code,
+            }
+
+            env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined)
+            file_header = env.get_template('kernel.c').render(**jinja_context)
+            with open(file_path, 'w') as f:
+                f.write(file_header)
+
+            fcntl.flock(lock_fp, fcntl.LOCK_SH)  # degrade to shared lock
+
+        return file_path, lock_fp
 
 
 class KerncraftParameters(DotDict):
@@ -161,6 +205,7 @@ class KerncraftParameters(DotDict):
         self['iterations'] = 10
         self['unit'] = 'cy/CL'
         self['ignore_warnings'] = True
+        self['incore_model'] = 'OSACA'
 
 
 # ------------------------------------------- Helper functions ---------------------------------------------------------
diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c
new file mode 100644
index 0000000000000000000000000000000000000000..ae70ddd6775a45c0709e95d57cef061da2a4b6b0
--- /dev/null
+++ b/pystencils/kerncraft_coupling/templates/benchmark.c
@@ -0,0 +1,108 @@
+
+#include "kerncraft.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+#include <stdio.h>
+
+{{ includes }}
+
+{%- if likwid %}
+#include <likwid.h>
+{%- endif %}
+
+#define RESTRICT __restrict__
+#define FUNC_PREFIX
+void dummy(void *);
+void timing(double* wcTime, double* cpuTime);
+extern int var_false;
+
+
+{{kernel_code}}
+
+
+int main(int argc, char **argv)
+{
+  {%- if likwid %}
+  likwid_markerInit();
+  {%- endif %}
+
+  {%- for field_name, dataType, size in fields %}
+
+  // Initialization {{field_name}}
+  double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
+  for (unsigned long long i = 0; i < {{size}}; ++i)
+    {{field_name}}[i] = 0.23;
+
+  if(var_false)
+    dummy({{field_name}});
+
+  {%- endfor %}
+
+
+
+  {%- for constantName, dataType in constants %}
+
+  // Constant {{constantName}}
+  {{dataType}} {{constantName}};
+  {{constantName}} = 0.23;
+  if(var_false)
+      dummy(& {{constantName}});
+
+  {%- endfor %}
+
+  {%- if likwid and openmp %}
+  #pragma omp parallel
+  {
+  likwid_markerRegisterRegion("loop");
+  #pragma omp barrier
+  {%- elif likwid %}
+  likwid_markerRegisterRegion("loop");
+  {%- endif %}
+
+  for(int warmup = 1; warmup >= 0; --warmup) {
+    int repeat = 2;
+    if(warmup == 0) {
+      repeat = atoi(argv[1]);
+      {%- if likwid %}
+      likwid_markerStartRegion("loop");
+      {%- endif %}
+    }
+    
+    {%- if timing %}
+    double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
+    timing(&wcStartTime, &cpuStartTime);
+    {%- endif %}
+    
+    for (; repeat > 0; --repeat)
+    {
+      {{kernelName}}({{call_argument_list}});
+
+      // Dummy calls
+      {%- for field_name, dataType, size in fields %}
+      if(var_false) dummy((void*){{field_name}});
+      {%- endfor %}
+      {%- for constantName, dataType in constants %}
+      if(var_false) dummy((void*)&{{constantName}});
+      {%- endfor %}
+    }
+    {%- if timing %}
+    timing(&wcEndTime, &cpuEndTime);
+    if( warmup == 0)
+        printf("%e\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
+    {%- endif %}
+
+  }
+
+  {%- if likwid %}
+  likwid_markerStopRegion("loop");
+  {%- if openmp %}
+  }
+  {%- endif %}
+  {%- endif %}
+
+  {%- if likwid %}
+  likwid_markerClose();
+  {%- endif %}
+}
diff --git a/pystencils/kerncraft_coupling/templates/kernel.c b/pystencils/kerncraft_coupling/templates/kernel.c
new file mode 100644
index 0000000000000000000000000000000000000000..47fbf7cf25eda318a8fcecffa1477f5738eb1abc
--- /dev/null
+++ b/pystencils/kerncraft_coupling/templates/kernel.c
@@ -0,0 +1,18 @@
+
+#include "kerncraft.h"
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+#include <stdio.h>
+
+{{ includes }}
+
+#define RESTRICT __restrict__
+#define FUNC_PREFIX
+void dummy(void *);
+void timing(double* wcTime, double* cpuTime);
+extern int var_false;
+
+
+{{kernel_code}}
\ No newline at end of file
diff --git a/pystencils/kerncraft_coupling/templates/kernel.h b/pystencils/kerncraft_coupling/templates/kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..539d51f928ca5d702dc2dad8de7396a505e5c5ee
--- /dev/null
+++ b/pystencils/kerncraft_coupling/templates/kernel.h
@@ -0,0 +1,3 @@
+#define FUNC_PREFIX
+
+{{function_signature}}
\ No newline at end of file
diff --git a/pystencils_tests/kerncraft_inputs/Example_SandyBridgeEP_E5-2680.yml b/pystencils_tests/kerncraft_inputs/Example_SandyBridgeEP_E5-2680.yml
new file mode 100644
index 0000000000000000000000000000000000000000..37889b8fee94242855e51d5d31a3118dc367bfd6
--- /dev/null
+++ b/pystencils_tests/kerncraft_inputs/Example_SandyBridgeEP_E5-2680.yml
@@ -0,0 +1,974 @@
+kerncraft version: 0.8.3.dev0
+model name: Intel(R) Xeon(R) CPU E5-2680 0 @ 2.70GHz
+model type: Intel Xeon SandyBridge EN/EP processor
+clock: 2.7 GHz
+
+sockets: 2
+cores per socket: 8
+threads per core: 2
+NUMA domains per socket: 1
+cores per NUMA domain: 8
+
+in-core model: !!omap
+  - IACA: SNB
+  - OSACA: SNB
+  - LLVM-MCA: -mcpu=sandybridge
+isa: x86
+
+FLOPs per cycle:
+  SP: {total: 16, ADD: 8, MUL: 8}
+  DP: {total: 8, ADD: 4, MUL: 4}
+
+compiler: !!omap
+- icc: -O3 -xAVX -fno-alias -qopenmp
+- clang: -O3 -march=corei7-avx -mtune=corei7-avx -D_POSIX_C_SOURCE=200809L -fopenmp
+- gcc: -O3 -march=corei7-avx -D_POSIX_C_SOURCE=200809L -fopenmp -lm
+
+non-overlapping model:
+  ports: [2D, 3D]
+  performance counter metric: T_nOL + T_L1L2 + T_L2L3 + T_L3MEM
+overlapping model:
+  ports: ['0', 0DV, '1', '2', '3', '4', '5']
+  performance counter metric: Max(UOPS_DISPATCHED_PORT_PORT_0:PMC[0-3], UOPS_DISPATCHED_PORT_PORT_1:PMC[0-3],
+    UOPS_DISPATCHED_PORT_PORT_4:PMC[0-3], UOPS_DISPATCHED_PORT_PORT_5:PMC[0-3])
+
+cacheline size: 64 B
+memory hierarchy:
+- level: L1
+  cache per group: {sets: 64, ways: 8, cl_size: 64, replacement_policy: LRU, write_allocate: true,
+    write_back: true, load_from: L2, store_to: L2}
+  cores per group: 1
+  threads per group: 2
+  groups: 16
+  performance counter metrics:
+    accesses: MEM_UOPS_RETIRED_LOADS:PMC[0-3] + MEM_UOPS_RETIRED_STORES:PMC[0-3]
+    misses: L1D_REPLACEMENT:PMC[0-3]
+    evicts: L1D_M_EVICT:PMC[0-3]
+  upstream throughput: [architecture code analyzer, [2D, 3D]]
+  transfers overlap: false
+- level: L2
+  cache per group: {sets: 512, ways: 8, cl_size: 64, replacement_policy: LRU, write_allocate: true,
+    write_back: true, load_from: L3, store_to: L3}
+  cores per group: 1
+  threads per group: 2
+  groups: 16
+  upstream throughput: [32 B/cy, half-duplex]
+  transfers overlap: false
+  performance counter metrics:
+    accesses: L1D_REPLACEMENT:PMC[0-3] + L1D_M_EVICT:PMC[0-3]
+    misses: L2_LINES_IN_ALL:PMC[0-3]
+    evicts: L2_TRANS_L2_WB:PMC[0-3]
+- level: L3
+  cache per group: {sets: 20480, ways: 16, cl_size: 64, replacement_policy: LRU, write_allocate: true,
+    write_back: true}
+  cores per group: 8
+  threads per group: 16
+  groups: 2
+  upstream throughput: [32 B/cy, half-duplex]
+  transfers overlap: false
+  performance counter metrics:
+    accesses: L2_LINES_IN_ALL:PMC[0-3] + L2_TRANS_L2_WB:PMC[0-3]
+    misses: (CAS_COUNT_RD:MBOX0C[01] + CAS_COUNT_RD:MBOX1C[01] + CAS_COUNT_RD:MBOX2C[01]
+      + CAS_COUNT_RD:MBOX3C[01])
+    evicts: (CAS_COUNT_WR:MBOX0C[01] + CAS_COUNT_WR:MBOX1C[01] + CAS_COUNT_WR:MBOX2C[01]
+      + CAS_COUNT_WR:MBOX3C[01])
+- level: MEM
+  cores per group: 8
+  upstream throughput: [full socket memory bandwidth, half-duplex]
+  transfers overlap: false
+  size per group:
+  threads per group: 16
+
+benchmarks:
+  kernels:
+    copy:
+      FLOPs per iteration: 0
+      fastest bench kernel: copy_avx
+      read streams: {bytes: 8.00 B, streams: 1}
+      read+write streams: {bytes: 0.00 B, streams: 0}
+      write streams: {bytes: 8.00 B, streams: 1}
+    daxpy:
+      FLOPs per iteration: 2
+      fastest bench kernel: daxpy_avx
+      read streams: {bytes: 16.00 B, streams: 2}
+      read+write streams: {bytes: 8.00 B, streams: 1}
+      write streams: {bytes: 8.00 B, streams: 1}
+    load:
+      FLOPs per iteration: 0
+      fastest bench kernel: load_avx
+      read streams: {bytes: 8.00 B, streams: 1}
+      read+write streams: {bytes: 0.00 B, streams: 0}
+      write streams: {bytes: 0.00 B, streams: 0}
+    triad:
+      FLOPs per iteration: 2
+      fastest bench kernel: triad_avx
+      read streams: {bytes: 24.00 B, streams: 3}
+      read+write streams: {bytes: 0.00 B, streams: 0}
+      write streams: {bytes: 8.00 B, streams: 1}
+    update:
+      FLOPs per iteration: 0
+      fastest bench kernel: update_avx
+      read streams: {bytes: 8.00 B, streams: 1}
+      read+write streams: {bytes: 8.00 B, streams: 1}
+      write streams: {bytes: 8.00 B, streams: 1}
+  measurements:
+    L1:
+      1:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [83.27 GB/s, 166.52 GB/s, 249.78 GB/s, 333.02 GB/s, 416.34 GB/s, 495.96
+              GB/s, 578.56 GB/s, 660.60 GB/s]
+          daxpy: [116.88 GB/s, 233.68 GB/s, 311.60 GB/s, 409.72 GB/s, 509.79 GB/s,
+            559.65 GB/s, 612.77 GB/s, 719.71 GB/s]
+          load: [84.07 GB/s, 168.13 GB/s, 252.21 GB/s, 336.04 GB/s, 420.34 GB/s, 504.02
+              GB/s, 588.04 GB/s, 668.37 GB/s]
+          triad: [100.24 GB/s, 211.57 GB/s, 314.53 GB/s, 392.73 GB/s, 506.87 GB/s,
+            589.51 GB/s, 687.28 GB/s, 782.17 GB/s]
+          update: [84.77 GB/s, 160.10 GB/s, 237.12 GB/s, 312.74 GB/s, 392.54 GB/s,
+            465.53 GB/s, 516.02 GB/s, 567.27 GB/s]
+        size per core: [21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB,
+          21.12 kB, 21.12 kB]
+        size per thread: [21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB, 21.12
+            kB, 21.12 kB, 21.12 kB]
+        stats:
+          copy:
+          - [83.24 GB/s, 83.25 GB/s, 83.26 GB/s, 83.26 GB/s, 83.27 GB/s, 83.26 GB/s,
+            83.25 GB/s, 83.23 GB/s, 83.24 GB/s, 83.25 GB/s]
+          - [166.49 GB/s, 166.47 GB/s, 166.51 GB/s, 166.49 GB/s, 166.48 GB/s, 166.52
+              GB/s, 166.51 GB/s, 166.51 GB/s, 166.51 GB/s, 166.50 GB/s]
+          - [249.78 GB/s, 249.75 GB/s, 249.73 GB/s, 249.72 GB/s, 249.74 GB/s, 249.76
+              GB/s, 249.76 GB/s, 249.74 GB/s, 249.73 GB/s, 249.75 GB/s]
+          - [332.98 GB/s, 327.92 GB/s, 332.30 GB/s, 332.95 GB/s, 333.00 GB/s, 333.01
+              GB/s, 332.95 GB/s, 333.00 GB/s, 332.99 GB/s, 333.02 GB/s]
+          - [416.26 GB/s, 416.23 GB/s, 416.28 GB/s, 416.27 GB/s, 416.23 GB/s, 416.27
+              GB/s, 416.34 GB/s, 416.26 GB/s, 416.16 GB/s, 416.23 GB/s]
+          - [495.84 GB/s, 495.93 GB/s, 495.88 GB/s, 495.91 GB/s, 495.96 GB/s, 495.92
+              GB/s, 495.89 GB/s, 495.87 GB/s, 495.96 GB/s, 495.92 GB/s]
+          - [578.51 GB/s, 578.52 GB/s, 578.39 GB/s, 578.56 GB/s, 578.48 GB/s, 578.44
+              GB/s, 578.51 GB/s, 578.48 GB/s, 578.51 GB/s, 578.53 GB/s]
+          - [422.14 GB/s, 660.55 GB/s, 660.60 GB/s, 660.49 GB/s, 660.52 GB/s, 660.48
+              GB/s, 660.56 GB/s, 660.56 GB/s, 660.52 GB/s, 651.64 GB/s]
+          daxpy:
+          - [116.87 GB/s, 116.82 GB/s, 116.85 GB/s, 116.84 GB/s, 116.83 GB/s, 116.85
+              GB/s, 116.88 GB/s, 116.87 GB/s, 116.86 GB/s, 116.82 GB/s]
+          - [214.69 GB/s, 229.83 GB/s, 221.16 GB/s, 233.60 GB/s, 232.90 GB/s, 233.68
+              GB/s, 207.83 GB/s, 233.65 GB/s, 212.71 GB/s, 214.07 GB/s]
+          - [282.77 GB/s, 307.63 GB/s, 307.09 GB/s, 310.67 GB/s, 307.50 GB/s, 311.40
+              GB/s, 307.06 GB/s, 305.89 GB/s, 311.60 GB/s, 308.47 GB/s]
+          - [404.96 GB/s, 408.54 GB/s, 395.76 GB/s, 409.72 GB/s, 316.70 GB/s, 408.07
+              GB/s, 347.34 GB/s, 406.03 GB/s, 391.75 GB/s, 385.10 GB/s]
+          - [479.84 GB/s, 509.24 GB/s, 502.60 GB/s, 449.79 GB/s, 402.46 GB/s, 489.18
+              GB/s, 491.15 GB/s, 491.20 GB/s, 384.36 GB/s, 509.79 GB/s]
+          - [515.12 GB/s, 496.21 GB/s, 517.52 GB/s, 540.00 GB/s, 501.82 GB/s, 507.84
+              GB/s, 496.71 GB/s, 479.42 GB/s, 559.65 GB/s, 519.55 GB/s]
+          - [584.86 GB/s, 580.10 GB/s, 583.34 GB/s, 612.77 GB/s, 607.15 GB/s, 607.89
+              GB/s, 589.85 GB/s, 609.59 GB/s, 592.86 GB/s, 568.07 GB/s]
+          - [719.71 GB/s, 660.98 GB/s, 675.88 GB/s, 679.51 GB/s, 696.97 GB/s, 635.23
+              GB/s, 644.06 GB/s, 694.74 GB/s, 654.01 GB/s, 656.57 GB/s]
+          load:
+          - [84.04 GB/s, 84.06 GB/s, 84.06 GB/s, 84.04 GB/s, 84.05 GB/s, 84.05 GB/s,
+            84.07 GB/s, 84.04 GB/s, 84.05 GB/s, 84.06 GB/s]
+          - [168.09 GB/s, 168.12 GB/s, 168.06 GB/s, 168.11 GB/s, 168.12 GB/s, 168.13
+              GB/s, 168.13 GB/s, 168.12 GB/s, 168.10 GB/s, 168.13 GB/s]
+          - [252.16 GB/s, 252.21 GB/s, 252.07 GB/s, 252.07 GB/s, 252.18 GB/s, 252.16
+              GB/s, 252.21 GB/s, 252.20 GB/s, 252.20 GB/s, 252.17 GB/s]
+          - [335.94 GB/s, 336.03 GB/s, 335.99 GB/s, 336.04 GB/s, 336.00 GB/s, 335.98
+              GB/s, 335.97 GB/s, 335.89 GB/s, 335.99 GB/s, 336.03 GB/s]
+          - [420.30 GB/s, 420.18 GB/s, 420.30 GB/s, 420.33 GB/s, 420.25 GB/s, 420.28
+              GB/s, 420.31 GB/s, 420.31 GB/s, 420.34 GB/s, 420.33 GB/s]
+          - [503.98 GB/s, 503.99 GB/s, 503.97 GB/s, 503.98 GB/s, 504.02 GB/s, 503.99
+              GB/s, 503.92 GB/s, 503.98 GB/s, 503.94 GB/s, 503.97 GB/s]
+          - [587.93 GB/s, 588.01 GB/s, 588.04 GB/s, 587.94 GB/s, 587.97 GB/s, 588.01
+              GB/s, 588.00 GB/s, 587.92 GB/s, 588.04 GB/s, 588.02 GB/s]
+          - [668.21 GB/s, 668.22 GB/s, 668.29 GB/s, 668.24 GB/s, 668.27 GB/s, 668.37
+              GB/s, 668.28 GB/s, 668.14 GB/s, 668.19 GB/s, 668.19 GB/s]
+          triad:
+          - [100.00 GB/s, 99.71 GB/s, 99.74 GB/s, 100.24 GB/s, 99.72 GB/s, 99.62 GB/s,
+            99.54 GB/s, 99.61 GB/s, 99.72 GB/s, 99.71 GB/s]
+          - [208.08 GB/s, 210.33 GB/s, 211.57 GB/s, 208.34 GB/s, 210.03 GB/s, 209.16
+              GB/s, 210.21 GB/s, 209.48 GB/s, 210.03 GB/s, 208.80 GB/s]
+          - [311.43 GB/s, 311.08 GB/s, 311.41 GB/s, 311.10 GB/s, 313.13 GB/s, 314.53
+              GB/s, 311.59 GB/s, 311.80 GB/s, 311.57 GB/s, 311.89 GB/s]
+          - [391.65 GB/s, 392.34 GB/s, 391.84 GB/s, 392.07 GB/s, 391.96 GB/s, 392.73
+              GB/s, 391.66 GB/s, 391.83 GB/s, 392.09 GB/s, 391.88 GB/s]
+          - [504.20 GB/s, 506.77 GB/s, 503.22 GB/s, 506.74 GB/s, 502.78 GB/s, 506.15
+              GB/s, 506.87 GB/s, 502.85 GB/s, 505.82 GB/s, 506.57 GB/s]
+          - [587.75 GB/s, 589.51 GB/s, 588.01 GB/s, 587.29 GB/s, 588.04 GB/s, 587.92
+              GB/s, 588.08 GB/s, 587.94 GB/s, 587.82 GB/s, 587.55 GB/s]
+          - [686.03 GB/s, 685.97 GB/s, 685.01 GB/s, 685.88 GB/s, 685.61 GB/s, 687.12
+              GB/s, 684.97 GB/s, 686.09 GB/s, 685.81 GB/s, 687.28 GB/s]
+          - [782.05 GB/s, 781.73 GB/s, 781.13 GB/s, 781.87 GB/s, 782.17 GB/s, 781.24
+              GB/s, 781.82 GB/s, 781.92 GB/s, 781.90 GB/s, 781.66 GB/s]
+          update:
+          - [84.76 GB/s, 84.76 GB/s, 84.77 GB/s, 84.75 GB/s, 84.75 GB/s, 84.75 GB/s,
+            84.75 GB/s, 84.75 GB/s, 84.74 GB/s, 57.21 GB/s]
+          - [157.73 GB/s, 155.29 GB/s, 147.91 GB/s, 160.10 GB/s, 156.33 GB/s, 158.06
+              GB/s, 159.23 GB/s, 156.16 GB/s, 155.30 GB/s, 159.15 GB/s]
+          - [232.07 GB/s, 230.40 GB/s, 234.05 GB/s, 232.69 GB/s, 215.80 GB/s, 232.76
+              GB/s, 236.01 GB/s, 237.12 GB/s, 234.66 GB/s, 234.86 GB/s]
+          - [303.60 GB/s, 304.21 GB/s, 306.83 GB/s, 309.43 GB/s, 312.69 GB/s, 311.75
+              GB/s, 301.74 GB/s, 307.54 GB/s, 312.74 GB/s, 312.19 GB/s]
+          - [386.45 GB/s, 382.41 GB/s, 387.87 GB/s, 392.54 GB/s, 369.42 GB/s, 341.87
+              GB/s, 352.85 GB/s, 390.87 GB/s, 382.44 GB/s, 383.50 GB/s]
+          - [459.60 GB/s, 384.27 GB/s, 437.39 GB/s, 459.42 GB/s, 465.53 GB/s, 447.31
+              GB/s, 440.00 GB/s, 409.94 GB/s, 412.94 GB/s, 446.74 GB/s]
+          - [489.85 GB/s, 489.35 GB/s, 435.92 GB/s, 492.39 GB/s, 446.44 GB/s, 501.71
+              GB/s, 516.02 GB/s, 478.87 GB/s, 494.52 GB/s, 493.04 GB/s]
+          - [521.08 GB/s, 553.73 GB/s, 541.34 GB/s, 527.75 GB/s, 554.87 GB/s, 536.30
+              GB/s, 540.66 GB/s, 551.02 GB/s, 567.27 GB/s, 565.31 GB/s]
+        threads: [1, 2, 3, 4, 5, 6, 7, 8]
+        threads per core: 1
+        total size: [21.12 kB, 42.24 kB, 63.36 kB, 84.48 kB, 105.60 kB, 126.72 kB,
+          147.84 kB, 168.96 kB]
+      2:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [80.41 GB/s, 160.83 GB/s, 240.43 GB/s, 320.63 GB/s, 401.66 GB/s, 454.32
+              GB/s, 539.77 GB/s, 628.51 GB/s]
+          daxpy: [95.87 GB/s, 187.75 GB/s, 270.68 GB/s, 371.80 GB/s, 454.05 GB/s,
+            503.46 GB/s, 606.85 GB/s, 689.34 GB/s]
+          load: [82.30 GB/s, 164.06 GB/s, 244.78 GB/s, 326.21 GB/s, 408.56 GB/s, 490.13
+              GB/s, 569.95 GB/s, 651.79 GB/s]
+          triad: [93.22 GB/s, 186.75 GB/s, 288.55 GB/s, 340.91 GB/s, 442.20 GB/s,
+            534.62 GB/s, 597.98 GB/s, 707.54 GB/s]
+          update: [83.25 GB/s, 166.04 GB/s, 248.21 GB/s, 330.58 GB/s, 414.71 GB/s,
+            496.97 GB/s, 578.67 GB/s, 656.56 GB/s]
+        size per core: [21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB, 21.12 kB,
+          21.12 kB, 21.12 kB]
+        size per thread: [10.56 kB, 10.56 kB, 10.56 kB, 10.56 kB, 10.56 kB, 10.56
+            kB, 10.56 kB, 10.56 kB]
+        stats:
+          copy:
+          - [80.37 GB/s, 79.07 GB/s, 80.39 GB/s, 80.39 GB/s, 80.41 GB/s, 80.29 GB/s,
+            80.36 GB/s, 79.05 GB/s, 77.87 GB/s, 80.37 GB/s]
+          - [160.76 GB/s, 160.63 GB/s, 160.76 GB/s, 160.71 GB/s, 160.80 GB/s, 160.74
+              GB/s, 160.83 GB/s, 160.69 GB/s, 160.79 GB/s, 160.78 GB/s]
+          - [240.43 GB/s, 240.20 GB/s, 240.36 GB/s, 240.37 GB/s, 237.17 GB/s, 240.39
+              GB/s, 240.14 GB/s, 240.24 GB/s, 240.26 GB/s, 240.10 GB/s]
+          - [320.46 GB/s, 320.47 GB/s, 320.63 GB/s, 320.52 GB/s, 320.40 GB/s, 320.40
+              GB/s, 320.51 GB/s, 320.46 GB/s, 319.72 GB/s, 320.44 GB/s]
+          - [401.40 GB/s, 399.28 GB/s, 401.66 GB/s, 401.53 GB/s, 401.52 GB/s, 401.55
+              GB/s, 401.60 GB/s, 401.47 GB/s, 401.47 GB/s, 401.35 GB/s]
+          - [447.24 GB/s, 453.65 GB/s, 453.54 GB/s, 453.86 GB/s, 453.82 GB/s, 453.62
+              GB/s, 453.48 GB/s, 454.32 GB/s, 453.86 GB/s, 446.79 GB/s]
+          - [538.79 GB/s, 538.47 GB/s, 539.02 GB/s, 538.25 GB/s, 538.72 GB/s, 538.89
+              GB/s, 539.37 GB/s, 539.41 GB/s, 539.77 GB/s, 538.49 GB/s]
+          - [628.14 GB/s, 618.54 GB/s, 628.12 GB/s, 623.90 GB/s, 628.27 GB/s, 623.78
+              GB/s, 618.17 GB/s, 623.43 GB/s, 628.51 GB/s, 628.43 GB/s]
+          daxpy:
+          - [95.77 GB/s, 93.25 GB/s, 92.87 GB/s, 95.87 GB/s, 95.84 GB/s, 95.81 GB/s,
+            95.80 GB/s, 94.99 GB/s, 95.81 GB/s, 95.86 GB/s]
+          - [184.53 GB/s, 186.60 GB/s, 183.99 GB/s, 187.48 GB/s, 187.75 GB/s, 181.53
+              GB/s, 183.82 GB/s, 187.75 GB/s, 184.13 GB/s, 180.61 GB/s]
+          - [258.46 GB/s, 270.13 GB/s, 264.76 GB/s, 262.23 GB/s, 265.05 GB/s, 267.25
+              GB/s, 270.68 GB/s, 268.08 GB/s, 266.20 GB/s, 265.66 GB/s]
+          - [367.99 GB/s, 367.15 GB/s, 361.68 GB/s, 364.86 GB/s, 368.76 GB/s, 363.27
+              GB/s, 364.95 GB/s, 366.97 GB/s, 371.80 GB/s, 366.55 GB/s]
+          - [441.95 GB/s, 442.77 GB/s, 444.97 GB/s, 454.05 GB/s, 441.02 GB/s, 445.96
+              GB/s, 442.49 GB/s, 440.23 GB/s, 449.29 GB/s, 452.66 GB/s]
+          - [501.31 GB/s, 489.91 GB/s, 495.43 GB/s, 503.39 GB/s, 488.03 GB/s, 497.71
+              GB/s, 503.46 GB/s, 496.85 GB/s, 497.38 GB/s, 468.90 GB/s]
+          - [604.57 GB/s, 580.51 GB/s, 587.67 GB/s, 594.32 GB/s, 561.32 GB/s, 588.09
+              GB/s, 606.85 GB/s, 600.91 GB/s, 599.40 GB/s, 598.24 GB/s]
+          - [646.48 GB/s, 655.06 GB/s, 684.70 GB/s, 653.61 GB/s, 671.61 GB/s, 689.34
+              GB/s, 673.74 GB/s, 685.49 GB/s, 681.48 GB/s, 683.23 GB/s]
+          load:
+          - [82.19 GB/s, 82.08 GB/s, 82.22 GB/s, 82.10 GB/s, 82.14 GB/s, 82.17 GB/s,
+            82.22 GB/s, 82.28 GB/s, 82.30 GB/s, 81.98 GB/s]
+          - [163.22 GB/s, 163.43 GB/s, 164.06 GB/s, 164.03 GB/s, 163.19 GB/s, 163.83
+              GB/s, 163.29 GB/s, 163.88 GB/s, 163.83 GB/s, 163.11 GB/s]
+          - [244.32 GB/s, 244.47 GB/s, 244.65 GB/s, 244.29 GB/s, 243.96 GB/s, 244.50
+              GB/s, 244.78 GB/s, 244.52 GB/s, 244.48 GB/s, 244.72 GB/s]
+          - [325.18 GB/s, 326.21 GB/s, 325.49 GB/s, 325.86 GB/s, 325.73 GB/s, 325.72
+              GB/s, 326.00 GB/s, 325.41 GB/s, 325.63 GB/s, 325.82 GB/s]
+          - [407.81 GB/s, 407.96 GB/s, 407.59 GB/s, 408.56 GB/s, 407.64 GB/s, 407.61
+              GB/s, 408.09 GB/s, 407.95 GB/s, 408.30 GB/s, 408.32 GB/s]
+          - [488.65 GB/s, 489.73 GB/s, 489.38 GB/s, 489.81 GB/s, 490.13 GB/s, 489.31
+              GB/s, 488.74 GB/s, 489.38 GB/s, 488.17 GB/s, 489.51 GB/s]
+          - [569.95 GB/s, 567.21 GB/s, 566.08 GB/s, 567.88 GB/s, 567.69 GB/s, 569.58
+              GB/s, 568.61 GB/s, 568.35 GB/s, 569.70 GB/s, 568.87 GB/s]
+          - [650.43 GB/s, 651.58 GB/s, 650.86 GB/s, 651.34 GB/s, 651.04 GB/s, 651.79
+              GB/s, 650.28 GB/s, 650.31 GB/s, 650.81 GB/s, 651.09 GB/s]
+          triad:
+          - [93.22 GB/s, 90.73 GB/s, 92.48 GB/s, 92.53 GB/s, 92.37 GB/s, 92.50 GB/s,
+            92.48 GB/s, 90.28 GB/s, 92.35 GB/s, 92.51 GB/s]
+          - [186.75 GB/s, 184.51 GB/s, 184.17 GB/s, 186.66 GB/s, 186.43 GB/s, 184.59
+              GB/s, 186.71 GB/s, 186.30 GB/s, 186.64 GB/s, 186.12 GB/s]
+          - [287.77 GB/s, 288.55 GB/s, 287.76 GB/s, 287.76 GB/s, 288.19 GB/s, 287.70
+              GB/s, 287.42 GB/s, 288.12 GB/s, 287.66 GB/s, 288.01 GB/s]
+          - [339.82 GB/s, 338.95 GB/s, 340.11 GB/s, 340.11 GB/s, 340.25 GB/s, 340.20
+              GB/s, 339.90 GB/s, 340.22 GB/s, 340.91 GB/s, 340.01 GB/s]
+          - [440.41 GB/s, 440.65 GB/s, 441.59 GB/s, 442.20 GB/s, 441.67 GB/s, 432.59
+              GB/s, 440.20 GB/s, 440.81 GB/s, 440.24 GB/s, 441.38 GB/s]
+          - [534.30 GB/s, 527.60 GB/s, 528.52 GB/s, 509.55 GB/s, 527.68 GB/s, 527.63
+              GB/s, 533.66 GB/s, 534.62 GB/s, 534.60 GB/s, 534.19 GB/s]
+          - [595.90 GB/s, 595.94 GB/s, 597.91 GB/s, 580.22 GB/s, 597.98 GB/s, 597.66
+              GB/s, 596.16 GB/s, 567.03 GB/s, 580.88 GB/s, 578.29 GB/s]
+          - [703.80 GB/s, 705.57 GB/s, 694.84 GB/s, 682.59 GB/s, 694.37 GB/s, 696.56
+              GB/s, 704.50 GB/s, 704.95 GB/s, 694.52 GB/s, 707.54 GB/s]
+          update:
+          - [83.18 GB/s, 83.24 GB/s, 83.25 GB/s, 83.16 GB/s, 83.22 GB/s, 83.23 GB/s,
+            83.22 GB/s, 83.21 GB/s, 83.20 GB/s, 83.17 GB/s]
+          - [165.65 GB/s, 165.76 GB/s, 165.99 GB/s, 166.04 GB/s, 165.49 GB/s, 165.87
+              GB/s, 165.58 GB/s, 165.96 GB/s, 165.67 GB/s, 165.66 GB/s]
+          - [247.30 GB/s, 248.14 GB/s, 247.84 GB/s, 247.90 GB/s, 247.77 GB/s, 247.60
+              GB/s, 248.21 GB/s, 247.95 GB/s, 248.05 GB/s, 247.83 GB/s]
+          - [330.49 GB/s, 330.07 GB/s, 329.91 GB/s, 329.90 GB/s, 330.58 GB/s, 329.30
+              GB/s, 329.92 GB/s, 330.03 GB/s, 330.04 GB/s, 330.12 GB/s]
+          - [413.89 GB/s, 414.04 GB/s, 413.56 GB/s, 414.06 GB/s, 414.15 GB/s, 413.94
+              GB/s, 414.04 GB/s, 414.71 GB/s, 414.32 GB/s, 413.93 GB/s]
+          - [496.97 GB/s, 496.80 GB/s, 496.17 GB/s, 495.42 GB/s, 496.17 GB/s, 496.66
+              GB/s, 495.55 GB/s, 496.27 GB/s, 495.52 GB/s, 496.80 GB/s]
+          - [564.44 GB/s, 577.86 GB/s, 574.38 GB/s, 571.96 GB/s, 564.76 GB/s, 578.67
+              GB/s, 565.89 GB/s, 572.49 GB/s, 571.80 GB/s, 572.01 GB/s]
+          - [647.68 GB/s, 656.56 GB/s, 655.56 GB/s, 644.04 GB/s, 655.30 GB/s, 648.80
+              GB/s, 654.77 GB/s, 653.58 GB/s, 656.27 GB/s, 653.79 GB/s]
+        threads: [2, 4, 6, 8, 10, 12, 14, 16]
+        threads per core: 2
+        total size: [21.12 kB, 42.24 kB, 63.36 kB, 84.48 kB, 105.60 kB, 126.72 kB,
+          147.84 kB, 168.96 kB]
+    L2:
+      1:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [36.74 GB/s, 73.65 GB/s, 107.11 GB/s, 141.43 GB/s, 179.70 GB/s, 215.63
+              GB/s, 247.20 GB/s, 282.42 GB/s]
+          daxpy: [44.59 GB/s, 88.24 GB/s, 132.21 GB/s, 175.78 GB/s, 219.11 GB/s, 259.95
+              GB/s, 305.84 GB/s, 346.83 GB/s]
+          load: [31.46 GB/s, 62.97 GB/s, 93.73 GB/s, 125.46 GB/s, 157.32 GB/s, 183.63
+              GB/s, 214.02 GB/s, 245.17 GB/s]
+          triad: [37.79 GB/s, 75.08 GB/s, 111.43 GB/s, 148.90 GB/s, 185.54 GB/s, 223.72
+              GB/s, 258.53 GB/s, 299.32 GB/s]
+          update: [48.46 GB/s, 96.10 GB/s, 141.97 GB/s, 189.18 GB/s, 234.73 GB/s,
+            280.47 GB/s, 330.94 GB/s, 365.43 GB/s]
+        size per core: [168.96 kB, 168.96 kB, 168.96 kB, 168.96 kB, 168.96 kB, 168.96
+            kB, 168.96 kB, 168.96 kB]
+        size per thread: [168.96 kB, 168.96 kB, 168.96 kB, 168.96 kB, 168.96 kB, 168.96
+            kB, 168.96 kB, 168.96 kB]
+        stats:
+          copy:
+          - [36.38 GB/s, 36.59 GB/s, 36.18 GB/s, 36.57 GB/s, 36.26 GB/s, 34.61 GB/s,
+            35.96 GB/s, 35.84 GB/s, 36.74 GB/s, 36.53 GB/s]
+          - [68.97 GB/s, 70.42 GB/s, 69.88 GB/s, 71.40 GB/s, 69.05 GB/s, 72.46 GB/s,
+            70.32 GB/s, 73.65 GB/s, 72.14 GB/s, 69.81 GB/s]
+          - [107.08 GB/s, 103.53 GB/s, 107.11 GB/s, 103.66 GB/s, 103.88 GB/s, 106.48
+              GB/s, 97.32 GB/s, 105.92 GB/s, 104.16 GB/s, 104.84 GB/s]
+          - [138.97 GB/s, 136.86 GB/s, 140.88 GB/s, 138.96 GB/s, 140.58 GB/s, 138.51
+              GB/s, 141.43 GB/s, 139.53 GB/s, 141.20 GB/s, 139.43 GB/s]
+          - [158.20 GB/s, 171.06 GB/s, 179.70 GB/s, 171.43 GB/s, 174.27 GB/s, 175.01
+              GB/s, 165.20 GB/s, 170.89 GB/s, 173.01 GB/s, 175.17 GB/s]
+          - [209.74 GB/s, 204.59 GB/s, 215.27 GB/s, 215.63 GB/s, 210.59 GB/s, 206.94
+              GB/s, 211.03 GB/s, 201.61 GB/s, 214.45 GB/s, 208.15 GB/s]
+          - [241.38 GB/s, 246.88 GB/s, 246.90 GB/s, 247.20 GB/s, 235.27 GB/s, 227.39
+              GB/s, 239.48 GB/s, 244.45 GB/s, 246.68 GB/s, 235.87 GB/s]
+          - [271.07 GB/s, 282.42 GB/s, 282.38 GB/s, 276.20 GB/s, 269.85 GB/s, 276.96
+              GB/s, 268.64 GB/s, 269.61 GB/s, 279.68 GB/s, 280.63 GB/s]
+          daxpy:
+          - [44.54 GB/s, 44.59 GB/s, 44.50 GB/s, 44.42 GB/s, 44.41 GB/s, 44.06 GB/s,
+            43.39 GB/s, 44.02 GB/s, 44.34 GB/s, 44.28 GB/s]
+          - [85.35 GB/s, 87.05 GB/s, 86.47 GB/s, 86.90 GB/s, 86.92 GB/s, 88.24 GB/s,
+            87.39 GB/s, 87.60 GB/s, 87.55 GB/s, 84.19 GB/s]
+          - [129.21 GB/s, 130.47 GB/s, 123.29 GB/s, 127.92 GB/s, 132.21 GB/s, 128.37
+              GB/s, 127.09 GB/s, 128.72 GB/s, 129.34 GB/s, 128.69 GB/s]
+          - [171.53 GB/s, 169.64 GB/s, 173.92 GB/s, 173.74 GB/s, 168.53 GB/s, 171.54
+              GB/s, 173.96 GB/s, 175.78 GB/s, 171.29 GB/s, 171.33 GB/s]
+          - [219.11 GB/s, 208.86 GB/s, 211.66 GB/s, 216.47 GB/s, 212.73 GB/s, 204.90
+              GB/s, 208.87 GB/s, 215.75 GB/s, 213.61 GB/s, 214.56 GB/s]
+          - [250.69 GB/s, 241.36 GB/s, 255.22 GB/s, 250.29 GB/s, 253.80 GB/s, 256.34
+              GB/s, 254.38 GB/s, 259.95 GB/s, 245.69 GB/s, 259.12 GB/s]
+          - [296.08 GB/s, 301.77 GB/s, 297.40 GB/s, 305.84 GB/s, 288.62 GB/s, 283.76
+              GB/s, 293.61 GB/s, 291.93 GB/s, 299.74 GB/s, 289.76 GB/s]
+          - [344.46 GB/s, 334.36 GB/s, 339.31 GB/s, 330.88 GB/s, 343.26 GB/s, 327.28
+              GB/s, 344.53 GB/s, 346.83 GB/s, 344.29 GB/s, 346.28 GB/s]
+          load:
+          - [31.40 GB/s, 31.23 GB/s, 31.29 GB/s, 31.24 GB/s, 31.46 GB/s, 31.20 GB/s,
+            31.33 GB/s, 30.01 GB/s, 30.08 GB/s, 31.40 GB/s]
+          - [61.20 GB/s, 60.74 GB/s, 61.93 GB/s, 61.22 GB/s, 61.20 GB/s, 60.03 GB/s,
+            59.33 GB/s, 59.94 GB/s, 58.54 GB/s, 62.97 GB/s]
+          - [91.53 GB/s, 93.73 GB/s, 93.05 GB/s, 90.07 GB/s, 91.60 GB/s, 90.11 GB/s,
+            90.21 GB/s, 90.43 GB/s, 89.15 GB/s, 93.10 GB/s]
+          - [122.80 GB/s, 116.57 GB/s, 120.68 GB/s, 122.54 GB/s, 122.75 GB/s, 121.79
+              GB/s, 125.30 GB/s, 125.46 GB/s, 122.28 GB/s, 124.51 GB/s]
+          - [151.01 GB/s, 151.10 GB/s, 148.68 GB/s, 151.17 GB/s, 147.24 GB/s, 153.65
+              GB/s, 146.48 GB/s, 150.48 GB/s, 150.74 GB/s, 157.32 GB/s]
+          - [181.52 GB/s, 173.89 GB/s, 181.58 GB/s, 174.01 GB/s, 176.40 GB/s, 179.73
+              GB/s, 174.06 GB/s, 181.26 GB/s, 180.57 GB/s, 183.63 GB/s]
+          - [214.02 GB/s, 205.69 GB/s, 207.64 GB/s, 204.18 GB/s, 208.42 GB/s, 211.39
+              GB/s, 206.58 GB/s, 204.90 GB/s, 204.75 GB/s, 208.91 GB/s]
+          - [232.16 GB/s, 233.90 GB/s, 241.32 GB/s, 237.45 GB/s, 235.41 GB/s, 241.17
+              GB/s, 237.52 GB/s, 245.17 GB/s, 241.17 GB/s, 234.08 GB/s]
+          triad:
+          - [37.62 GB/s, 37.54 GB/s, 37.79 GB/s, 37.67 GB/s, 37.76 GB/s, 37.77 GB/s,
+            37.68 GB/s, 35.83 GB/s, 37.06 GB/s, 37.50 GB/s]
+          - [72.79 GB/s, 74.76 GB/s, 73.15 GB/s, 74.68 GB/s, 73.88 GB/s, 73.27 GB/s,
+            75.08 GB/s, 73.48 GB/s, 71.27 GB/s, 72.05 GB/s]
+          - [106.26 GB/s, 105.22 GB/s, 109.70 GB/s, 109.07 GB/s, 110.84 GB/s, 111.43
+              GB/s, 106.32 GB/s, 109.73 GB/s, 106.22 GB/s, 107.20 GB/s]
+          - [142.10 GB/s, 148.90 GB/s, 148.11 GB/s, 144.38 GB/s, 144.77 GB/s, 145.42
+              GB/s, 147.36 GB/s, 142.94 GB/s, 145.39 GB/s, 139.42 GB/s]
+          - [182.07 GB/s, 176.75 GB/s, 181.39 GB/s, 183.31 GB/s, 181.87 GB/s, 183.71
+              GB/s, 180.48 GB/s, 178.11 GB/s, 181.36 GB/s, 185.54 GB/s]
+          - [219.85 GB/s, 217.02 GB/s, 218.86 GB/s, 217.09 GB/s, 212.24 GB/s, 212.22
+              GB/s, 219.33 GB/s, 208.81 GB/s, 215.84 GB/s, 223.72 GB/s]
+          - [258.06 GB/s, 232.27 GB/s, 247.04 GB/s, 240.55 GB/s, 236.11 GB/s, 251.88
+              GB/s, 258.53 GB/s, 247.32 GB/s, 251.53 GB/s, 245.10 GB/s]
+          - [273.67 GB/s, 292.81 GB/s, 288.67 GB/s, 289.75 GB/s, 293.98 GB/s, 283.56
+              GB/s, 295.33 GB/s, 280.11 GB/s, 299.32 GB/s, 285.18 GB/s]
+          update:
+          - [47.30 GB/s, 48.33 GB/s, 48.17 GB/s, 47.38 GB/s, 48.16 GB/s, 46.99 GB/s,
+            48.46 GB/s, 47.51 GB/s, 46.20 GB/s, 48.26 GB/s]
+          - [92.10 GB/s, 92.30 GB/s, 95.73 GB/s, 95.53 GB/s, 86.95 GB/s, 96.10 GB/s,
+            94.16 GB/s, 89.72 GB/s, 92.00 GB/s, 93.10 GB/s]
+          - [137.06 GB/s, 140.40 GB/s, 136.20 GB/s, 139.57 GB/s, 140.69 GB/s, 136.20
+              GB/s, 141.53 GB/s, 129.76 GB/s, 136.47 GB/s, 141.97 GB/s]
+          - [184.84 GB/s, 177.96 GB/s, 178.61 GB/s, 179.03 GB/s, 176.59 GB/s, 180.62
+              GB/s, 182.26 GB/s, 182.27 GB/s, 189.18 GB/s, 185.49 GB/s]
+          - [232.17 GB/s, 217.86 GB/s, 232.40 GB/s, 223.10 GB/s, 228.52 GB/s, 234.73
+              GB/s, 232.00 GB/s, 233.14 GB/s, 231.69 GB/s, 225.01 GB/s]
+          - [276.16 GB/s, 274.80 GB/s, 272.58 GB/s, 272.43 GB/s, 280.47 GB/s, 276.90
+              GB/s, 264.76 GB/s, 272.47 GB/s, 277.77 GB/s, 271.42 GB/s]
+          - [330.94 GB/s, 312.06 GB/s, 312.83 GB/s, 312.62 GB/s, 292.44 GB/s, 315.68
+              GB/s, 316.67 GB/s, 321.25 GB/s, 321.71 GB/s, 315.05 GB/s]
+          - [362.85 GB/s, 356.49 GB/s, 365.43 GB/s, 332.52 GB/s, 354.30 GB/s, 354.68
+              GB/s, 335.54 GB/s, 358.54 GB/s, 363.22 GB/s, 360.01 GB/s]
+        threads: [1, 2, 3, 4, 5, 6, 7, 8]
+        threads per core: 1
+        total size: [168.96 kB, 337.92 kB, 506.88 kB, 675.84 kB, 844.80 kB, 1.01 MB,
+          1.18 MB, 1.35 MB]
+      2:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [36.83 GB/s, 72.70 GB/s, 108.11 GB/s, 142.21 GB/s, 178.07 GB/s, 213.30
+              GB/s, 251.98 GB/s, 283.06 GB/s]
+          daxpy: [45.34 GB/s, 90.11 GB/s, 134.85 GB/s, 180.06 GB/s, 224.22 GB/s, 268.27
+              GB/s, 312.15 GB/s, 358.38 GB/s]
+          load: [33.99 GB/s, 67.65 GB/s, 100.93 GB/s, 134.81 GB/s, 165.89 GB/s, 196.09
+              GB/s, 233.31 GB/s, 262.05 GB/s]
+          triad: [38.60 GB/s, 76.58 GB/s, 114.50 GB/s, 150.54 GB/s, 189.60 GB/s, 227.05
+              GB/s, 263.75 GB/s, 301.02 GB/s]
+          update: [49.25 GB/s, 97.34 GB/s, 146.81 GB/s, 194.71 GB/s, 239.97 GB/s,
+            287.14 GB/s, 330.84 GB/s, 384.71 GB/s]
+        size per core: [168.96 kB, 168.96 kB, 168.96 kB, 168.96 kB, 168.96 kB, 168.96
+            kB, 168.96 kB, 168.96 kB]
+        size per thread: [84.48 kB, 84.48 kB, 84.48 kB, 84.48 kB, 84.48 kB, 84.48
+            kB, 84.48 kB, 84.48 kB]
+        stats:
+          copy:
+          - [36.83 GB/s, 36.67 GB/s, 34.90 GB/s, 36.44 GB/s, 35.13 GB/s, 35.07 GB/s,
+            35.53 GB/s, 36.15 GB/s, 35.85 GB/s, 36.23 GB/s]
+          - [71.52 GB/s, 70.16 GB/s, 70.67 GB/s, 71.20 GB/s, 72.70 GB/s, 70.14 GB/s,
+            70.53 GB/s, 69.17 GB/s, 71.57 GB/s, 70.22 GB/s]
+          - [104.39 GB/s, 104.74 GB/s, 103.12 GB/s, 108.11 GB/s, 105.30 GB/s, 102.80
+              GB/s, 102.90 GB/s, 107.06 GB/s, 103.45 GB/s, 105.45 GB/s]
+          - [139.02 GB/s, 134.63 GB/s, 140.72 GB/s, 141.32 GB/s, 140.35 GB/s, 141.19
+              GB/s, 135.44 GB/s, 142.21 GB/s, 140.96 GB/s, 142.05 GB/s]
+          - [177.86 GB/s, 177.74 GB/s, 177.42 GB/s, 175.35 GB/s, 176.42 GB/s, 173.13
+              GB/s, 174.32 GB/s, 170.24 GB/s, 178.07 GB/s, 177.88 GB/s]
+          - [206.27 GB/s, 211.63 GB/s, 209.06 GB/s, 210.54 GB/s, 208.80 GB/s, 209.99
+              GB/s, 208.77 GB/s, 206.41 GB/s, 213.30 GB/s, 206.39 GB/s]
+          - [240.18 GB/s, 238.36 GB/s, 244.16 GB/s, 236.26 GB/s, 244.12 GB/s, 238.49
+              GB/s, 242.23 GB/s, 244.46 GB/s, 251.98 GB/s, 242.55 GB/s]
+          - [279.77 GB/s, 282.91 GB/s, 278.73 GB/s, 276.91 GB/s, 283.06 GB/s, 273.23
+              GB/s, 278.33 GB/s, 280.88 GB/s, 277.54 GB/s, 281.83 GB/s]
+          daxpy:
+          - [45.32 GB/s, 44.62 GB/s, 45.29 GB/s, 45.18 GB/s, 45.17 GB/s, 45.07 GB/s,
+            44.69 GB/s, 45.17 GB/s, 45.11 GB/s, 45.34 GB/s]
+          - [89.94 GB/s, 89.97 GB/s, 89.37 GB/s, 89.90 GB/s, 88.37 GB/s, 89.13 GB/s,
+            90.11 GB/s, 89.67 GB/s, 89.90 GB/s, 89.93 GB/s]
+          - [134.83 GB/s, 134.85 GB/s, 132.02 GB/s, 134.33 GB/s, 133.82 GB/s, 132.39
+              GB/s, 131.67 GB/s, 134.62 GB/s, 132.71 GB/s, 131.67 GB/s]
+          - [175.52 GB/s, 173.36 GB/s, 176.83 GB/s, 177.98 GB/s, 175.73 GB/s, 173.42
+              GB/s, 180.06 GB/s, 179.55 GB/s, 176.71 GB/s, 175.85 GB/s]
+          - [222.00 GB/s, 216.86 GB/s, 220.17 GB/s, 218.14 GB/s, 220.60 GB/s, 219.43
+              GB/s, 220.58 GB/s, 224.22 GB/s, 220.89 GB/s, 222.28 GB/s]
+          - [258.75 GB/s, 262.88 GB/s, 261.77 GB/s, 268.27 GB/s, 263.66 GB/s, 262.59
+              GB/s, 266.54 GB/s, 261.67 GB/s, 262.80 GB/s, 263.72 GB/s]
+          - [298.65 GB/s, 312.15 GB/s, 308.52 GB/s, 304.22 GB/s, 301.87 GB/s, 305.53
+              GB/s, 309.84 GB/s, 310.67 GB/s, 310.49 GB/s, 311.99 GB/s]
+          - [347.55 GB/s, 350.67 GB/s, 348.93 GB/s, 358.38 GB/s, 352.35 GB/s, 352.05
+              GB/s, 353.82 GB/s, 356.00 GB/s, 348.07 GB/s, 349.87 GB/s]
+          load:
+          - [33.99 GB/s, 32.54 GB/s, 32.94 GB/s, 33.17 GB/s, 33.83 GB/s, 31.55 GB/s,
+            31.91 GB/s, 33.86 GB/s, 33.93 GB/s, 33.75 GB/s]
+          - [66.22 GB/s, 64.94 GB/s, 67.64 GB/s, 67.52 GB/s, 65.01 GB/s, 67.21 GB/s,
+            66.07 GB/s, 66.43 GB/s, 67.65 GB/s, 64.84 GB/s]
+          - [98.58 GB/s, 97.97 GB/s, 98.39 GB/s, 98.50 GB/s, 98.77 GB/s, 97.84 GB/s,
+            99.58 GB/s, 100.93 GB/s, 100.50 GB/s, 99.94 GB/s]
+          - [130.23 GB/s, 131.10 GB/s, 131.04 GB/s, 127.83 GB/s, 134.81 GB/s, 132.68
+              GB/s, 131.80 GB/s, 129.42 GB/s, 130.76 GB/s, 126.96 GB/s]
+          - [164.90 GB/s, 165.18 GB/s, 161.19 GB/s, 164.33 GB/s, 162.76 GB/s, 165.04
+              GB/s, 162.20 GB/s, 165.89 GB/s, 164.34 GB/s, 159.66 GB/s]
+          - [192.69 GB/s, 193.33 GB/s, 188.88 GB/s, 190.70 GB/s, 194.60 GB/s, 190.92
+              GB/s, 191.36 GB/s, 192.89 GB/s, 191.85 GB/s, 196.09 GB/s]
+          - [227.70 GB/s, 223.95 GB/s, 222.79 GB/s, 227.09 GB/s, 227.04 GB/s, 229.45
+              GB/s, 228.09 GB/s, 227.83 GB/s, 233.31 GB/s, 227.49 GB/s]
+          - [257.94 GB/s, 261.47 GB/s, 262.05 GB/s, 257.70 GB/s, 259.70 GB/s, 259.23
+              GB/s, 261.09 GB/s, 253.81 GB/s, 254.21 GB/s, 259.34 GB/s]
+          triad:
+          - [38.60 GB/s, 36.68 GB/s, 38.07 GB/s, 38.10 GB/s, 37.89 GB/s, 36.48 GB/s,
+            38.33 GB/s, 38.12 GB/s, 37.43 GB/s, 37.87 GB/s]
+          - [76.58 GB/s, 74.97 GB/s, 75.74 GB/s, 76.02 GB/s, 72.66 GB/s, 74.73 GB/s,
+            76.37 GB/s, 76.18 GB/s, 74.59 GB/s, 75.75 GB/s]
+          - [111.71 GB/s, 114.50 GB/s, 108.96 GB/s, 111.49 GB/s, 111.56 GB/s, 111.66
+              GB/s, 113.43 GB/s, 114.37 GB/s, 111.67 GB/s, 108.14 GB/s]
+          - [146.29 GB/s, 147.84 GB/s, 149.09 GB/s, 149.93 GB/s, 150.54 GB/s, 145.50
+              GB/s, 145.16 GB/s, 149.47 GB/s, 146.30 GB/s, 149.32 GB/s]
+          - [186.73 GB/s, 186.46 GB/s, 180.47 GB/s, 187.32 GB/s, 184.34 GB/s, 187.34
+              GB/s, 186.55 GB/s, 183.81 GB/s, 189.60 GB/s, 188.70 GB/s]
+          - [224.81 GB/s, 219.69 GB/s, 227.05 GB/s, 224.25 GB/s, 223.36 GB/s, 225.86
+              GB/s, 216.09 GB/s, 221.98 GB/s, 218.47 GB/s, 226.37 GB/s]
+          - [263.29 GB/s, 259.28 GB/s, 258.81 GB/s, 258.77 GB/s, 256.56 GB/s, 256.49
+              GB/s, 256.39 GB/s, 263.75 GB/s, 262.00 GB/s, 261.48 GB/s]
+          - [299.28 GB/s, 292.80 GB/s, 293.63 GB/s, 297.93 GB/s, 293.02 GB/s, 295.95
+              GB/s, 287.92 GB/s, 301.02 GB/s, 300.76 GB/s, 297.01 GB/s]
+          update:
+          - [49.07 GB/s, 47.17 GB/s, 47.56 GB/s, 49.25 GB/s, 46.44 GB/s, 49.04 GB/s,
+            48.91 GB/s, 49.20 GB/s, 48.30 GB/s, 48.85 GB/s]
+          - [96.45 GB/s, 97.11 GB/s, 94.03 GB/s, 92.56 GB/s, 95.39 GB/s, 97.34 GB/s,
+            96.06 GB/s, 92.25 GB/s, 95.53 GB/s, 97.08 GB/s]
+          - [137.54 GB/s, 135.13 GB/s, 145.80 GB/s, 141.29 GB/s, 138.99 GB/s, 143.44
+              GB/s, 146.81 GB/s, 142.94 GB/s, 133.84 GB/s, 146.33 GB/s]
+          - [190.64 GB/s, 185.02 GB/s, 194.24 GB/s, 187.48 GB/s, 194.52 GB/s, 188.51
+              GB/s, 189.17 GB/s, 194.71 GB/s, 194.37 GB/s, 190.83 GB/s]
+          - [239.97 GB/s, 219.74 GB/s, 233.72 GB/s, 234.38 GB/s, 235.78 GB/s, 235.11
+              GB/s, 235.62 GB/s, 226.09 GB/s, 235.93 GB/s, 230.51 GB/s]
+          - [280.16 GB/s, 275.22 GB/s, 260.15 GB/s, 286.01 GB/s, 280.61 GB/s, 287.14
+              GB/s, 283.75 GB/s, 275.23 GB/s, 283.71 GB/s, 285.38 GB/s]
+          - [311.15 GB/s, 318.00 GB/s, 325.21 GB/s, 328.34 GB/s, 318.09 GB/s, 328.66
+              GB/s, 329.69 GB/s, 316.97 GB/s, 328.51 GB/s, 330.84 GB/s]
+          - [374.41 GB/s, 369.73 GB/s, 358.15 GB/s, 375.54 GB/s, 384.71 GB/s, 357.66
+              GB/s, 369.71 GB/s, 375.35 GB/s, 370.25 GB/s, 364.01 GB/s]
+        threads: [2, 4, 6, 8, 10, 12, 14, 16]
+        threads per core: 2
+        total size: [168.96 kB, 337.92 kB, 506.88 kB, 675.84 kB, 844.80 kB, 1.01 MB,
+          1.18 MB, 1.35 MB]
+    L3:
+      1:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [21.93 GB/s, 43.10 GB/s, 65.38 GB/s, 85.69 GB/s, 105.64 GB/s, 127.34
+              GB/s, 148.22 GB/s, 171.52 GB/s]
+          daxpy: [30.98 GB/s, 62.27 GB/s, 93.13 GB/s, 123.27 GB/s, 153.64 GB/s, 185.97
+              GB/s, 216.67 GB/s, 247.41 GB/s]
+          load: [23.47 GB/s, 46.84 GB/s, 69.74 GB/s, 92.76 GB/s, 115.37 GB/s, 139.23
+              GB/s, 163.12 GB/s, 186.65 GB/s]
+          triad: [24.72 GB/s, 49.11 GB/s, 72.42 GB/s, 95.36 GB/s, 119.46 GB/s, 144.60
+              GB/s, 168.66 GB/s, 189.45 GB/s]
+          update: [31.39 GB/s, 62.11 GB/s, 91.95 GB/s, 122.24 GB/s, 151.40 GB/s, 182.28
+              GB/s, 216.07 GB/s, 239.92 GB/s]
+        size per core: [13.20 MB, 6.60 MB, 4.40 MB, 3.30 MB, 2.64 MB, 2.20 MB, 1.89
+            MB, 1.65 MB]
+        size per thread: [13.20 MB, 6.60 MB, 4.40 MB, 3.30 MB, 2.64 MB, 2.20 MB, 1.89
+            MB, 1.65 MB]
+        stats:
+          copy:
+          - [21.64 GB/s, 20.85 GB/s, 20.56 GB/s, 21.69 GB/s, 21.06 GB/s, 21.46 GB/s,
+            21.93 GB/s, 21.73 GB/s, 21.83 GB/s, 21.69 GB/s]
+          - [42.86 GB/s, 42.70 GB/s, 42.72 GB/s, 38.47 GB/s, 42.82 GB/s, 43.10 GB/s,
+            42.66 GB/s, 42.44 GB/s, 42.61 GB/s, 42.48 GB/s]
+          - [64.95 GB/s, 64.34 GB/s, 63.93 GB/s, 65.38 GB/s, 64.36 GB/s, 63.60 GB/s,
+            62.65 GB/s, 63.66 GB/s, 63.51 GB/s, 63.75 GB/s]
+          - [84.07 GB/s, 83.97 GB/s, 83.34 GB/s, 83.91 GB/s, 81.16 GB/s, 85.69 GB/s,
+            85.40 GB/s, 85.37 GB/s, 85.42 GB/s, 84.48 GB/s]
+          - [102.83 GB/s, 104.24 GB/s, 105.42 GB/s, 103.68 GB/s, 105.22 GB/s, 105.64
+              GB/s, 103.15 GB/s, 102.02 GB/s, 100.60 GB/s, 105.09 GB/s]
+          - [125.46 GB/s, 122.23 GB/s, 123.56 GB/s, 124.59 GB/s, 127.03 GB/s, 125.39
+              GB/s, 124.50 GB/s, 127.02 GB/s, 126.95 GB/s, 127.34 GB/s]
+          - [147.99 GB/s, 146.65 GB/s, 139.23 GB/s, 147.69 GB/s, 146.42 GB/s, 145.65
+              GB/s, 148.22 GB/s, 143.77 GB/s, 147.96 GB/s, 147.70 GB/s]
+          - [168.36 GB/s, 168.24 GB/s, 164.99 GB/s, 165.32 GB/s, 167.08 GB/s, 165.98
+              GB/s, 165.39 GB/s, 165.84 GB/s, 166.15 GB/s, 171.52 GB/s]
+          daxpy:
+          - [30.92 GB/s, 30.74 GB/s, 30.87 GB/s, 30.98 GB/s, 30.45 GB/s, 29.62 GB/s,
+            29.54 GB/s, 30.04 GB/s, 30.94 GB/s, 30.93 GB/s]
+          - [61.96 GB/s, 61.38 GB/s, 61.27 GB/s, 62.27 GB/s, 61.36 GB/s, 61.27 GB/s,
+            62.06 GB/s, 60.01 GB/s, 61.49 GB/s, 62.16 GB/s]
+          - [92.26 GB/s, 93.06 GB/s, 88.45 GB/s, 92.18 GB/s, 93.13 GB/s, 92.11 GB/s,
+            92.28 GB/s, 92.28 GB/s, 93.03 GB/s, 92.78 GB/s]
+          - [123.22 GB/s, 123.06 GB/s, 123.27 GB/s, 119.42 GB/s, 122.94 GB/s, 122.54
+              GB/s, 123.24 GB/s, 115.90 GB/s, 121.65 GB/s, 122.47 GB/s]
+          - [151.70 GB/s, 145.65 GB/s, 149.53 GB/s, 152.52 GB/s, 153.64 GB/s, 152.93
+              GB/s, 152.81 GB/s, 153.01 GB/s, 153.04 GB/s, 152.06 GB/s]
+          - [184.04 GB/s, 171.51 GB/s, 184.83 GB/s, 184.09 GB/s, 185.97 GB/s, 183.75
+              GB/s, 184.66 GB/s, 182.54 GB/s, 184.39 GB/s, 184.40 GB/s]
+          - [198.70 GB/s, 216.51 GB/s, 216.17 GB/s, 203.10 GB/s, 211.40 GB/s, 215.04
+              GB/s, 215.48 GB/s, 216.03 GB/s, 216.24 GB/s, 216.67 GB/s]
+          - [246.02 GB/s, 247.35 GB/s, 245.00 GB/s, 244.65 GB/s, 229.12 GB/s, 243.37
+              GB/s, 247.22 GB/s, 247.41 GB/s, 246.03 GB/s, 244.83 GB/s]
+          load:
+          - [23.08 GB/s, 23.38 GB/s, 22.88 GB/s, 23.43 GB/s, 23.05 GB/s, 23.23 GB/s,
+            22.97 GB/s, 22.39 GB/s, 23.47 GB/s, 23.33 GB/s]
+          - [46.39 GB/s, 46.40 GB/s, 46.45 GB/s, 46.36 GB/s, 46.69 GB/s, 46.62 GB/s,
+            46.84 GB/s, 45.98 GB/s, 46.73 GB/s, 46.80 GB/s]
+          - [69.18 GB/s, 68.61 GB/s, 69.74 GB/s, 69.34 GB/s, 68.39 GB/s, 69.73 GB/s,
+            67.76 GB/s, 69.65 GB/s, 69.70 GB/s, 69.16 GB/s]
+          - [92.29 GB/s, 91.67 GB/s, 92.76 GB/s, 90.78 GB/s, 92.76 GB/s, 90.76 GB/s,
+            91.58 GB/s, 91.60 GB/s, 91.03 GB/s, 92.72 GB/s]
+          - [114.04 GB/s, 113.82 GB/s, 112.26 GB/s, 112.65 GB/s, 114.09 GB/s, 113.81
+              GB/s, 113.72 GB/s, 114.70 GB/s, 115.37 GB/s, 112.57 GB/s]
+          - [136.42 GB/s, 135.83 GB/s, 134.93 GB/s, 135.43 GB/s, 135.94 GB/s, 139.23
+              GB/s, 137.52 GB/s, 137.59 GB/s, 135.97 GB/s, 136.96 GB/s]
+          - [157.88 GB/s, 163.12 GB/s, 159.53 GB/s, 160.16 GB/s, 162.18 GB/s, 159.58
+              GB/s, 161.55 GB/s, 159.81 GB/s, 162.97 GB/s, 163.10 GB/s]
+          - [183.41 GB/s, 181.86 GB/s, 183.55 GB/s, 183.38 GB/s, 181.66 GB/s, 186.65
+              GB/s, 179.62 GB/s, 174.70 GB/s, 180.10 GB/s, 181.49 GB/s]
+          triad:
+          - [24.72 GB/s, 23.66 GB/s, 23.58 GB/s, 23.75 GB/s, 23.62 GB/s, 24.37 GB/s,
+            24.44 GB/s, 23.57 GB/s, 23.30 GB/s, 23.57 GB/s]
+          - [49.11 GB/s, 46.87 GB/s, 47.13 GB/s, 46.83 GB/s, 46.58 GB/s, 46.73 GB/s,
+            46.32 GB/s, 47.22 GB/s, 46.79 GB/s, 48.73 GB/s]
+          - [72.29 GB/s, 69.87 GB/s, 70.57 GB/s, 68.89 GB/s, 68.56 GB/s, 69.02 GB/s,
+            72.42 GB/s, 69.37 GB/s, 72.34 GB/s, 69.44 GB/s]
+          - [94.95 GB/s, 94.67 GB/s, 91.05 GB/s, 90.46 GB/s, 95.36 GB/s, 91.63 GB/s,
+            94.06 GB/s, 95.30 GB/s, 93.99 GB/s, 94.71 GB/s]
+          - [119.32 GB/s, 117.99 GB/s, 119.46 GB/s, 117.28 GB/s, 118.97 GB/s, 115.67
+              GB/s, 116.64 GB/s, 117.99 GB/s, 119.02 GB/s, 117.75 GB/s]
+          - [138.63 GB/s, 144.53 GB/s, 144.60 GB/s, 135.72 GB/s, 141.86 GB/s, 139.64
+              GB/s, 142.95 GB/s, 140.89 GB/s, 142.10 GB/s, 143.97 GB/s]
+          - [168.66 GB/s, 166.77 GB/s, 157.10 GB/s, 164.75 GB/s, 164.00 GB/s, 164.38
+              GB/s, 163.94 GB/s, 158.58 GB/s, 165.60 GB/s, 164.39 GB/s]
+          - [184.53 GB/s, 187.00 GB/s, 186.87 GB/s, 179.43 GB/s, 185.70 GB/s, 187.49
+              GB/s, 189.45 GB/s, 186.82 GB/s, 188.50 GB/s, 185.96 GB/s]
+          update:
+          - [30.60 GB/s, 31.20 GB/s, 30.65 GB/s, 31.39 GB/s, 30.89 GB/s, 30.75 GB/s,
+            30.58 GB/s, 30.99 GB/s, 30.69 GB/s, 31.34 GB/s]
+          - [60.99 GB/s, 62.11 GB/s, 61.42 GB/s, 61.55 GB/s, 61.79 GB/s, 61.24 GB/s,
+            61.37 GB/s, 61.74 GB/s, 61.45 GB/s, 61.58 GB/s]
+          - [91.11 GB/s, 91.21 GB/s, 91.95 GB/s, 91.19 GB/s, 91.14 GB/s, 91.36 GB/s,
+            91.30 GB/s, 91.70 GB/s, 90.84 GB/s, 91.09 GB/s]
+          - [120.90 GB/s, 120.49 GB/s, 121.35 GB/s, 122.24 GB/s, 120.37 GB/s, 119.83
+              GB/s, 119.32 GB/s, 119.48 GB/s, 119.11 GB/s, 119.76 GB/s]
+          - [146.72 GB/s, 147.18 GB/s, 147.81 GB/s, 151.40 GB/s, 147.81 GB/s, 146.84
+              GB/s, 147.51 GB/s, 148.15 GB/s, 146.89 GB/s, 148.41 GB/s]
+          - [179.93 GB/s, 179.68 GB/s, 182.28 GB/s, 179.65 GB/s, 179.06 GB/s, 182.25
+              GB/s, 182.03 GB/s, 179.10 GB/s, 178.82 GB/s, 177.84 GB/s]
+          - [208.84 GB/s, 210.17 GB/s, 210.20 GB/s, 210.81 GB/s, 209.88 GB/s, 211.16
+              GB/s, 216.07 GB/s, 211.77 GB/s, 208.89 GB/s, 210.47 GB/s]
+          - [236.56 GB/s, 239.05 GB/s, 237.81 GB/s, 237.20 GB/s, 238.68 GB/s, 237.69
+              GB/s, 239.05 GB/s, 239.38 GB/s, 239.92 GB/s, 238.63 GB/s]
+        threads: [1, 2, 3, 4, 5, 6, 7, 8]
+        threads per core: 1
+        total size: [13.20 MB, 13.20 MB, 13.20 MB, 13.20 MB, 13.20 MB, 13.20 MB, 13.20
+            MB, 13.20 MB]
+      2:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [23.35 GB/s, 45.63 GB/s, 68.10 GB/s, 89.46 GB/s, 111.10 GB/s, 134.20
+              GB/s, 154.44 GB/s, 174.89 GB/s]
+          daxpy: [32.32 GB/s, 64.16 GB/s, 96.12 GB/s, 126.75 GB/s, 156.91 GB/s, 188.57
+              GB/s, 221.57 GB/s, 251.65 GB/s]
+          load: [25.14 GB/s, 50.38 GB/s, 75.49 GB/s, 101.06 GB/s, 126.04 GB/s, 151.12
+              GB/s, 172.57 GB/s, 196.91 GB/s]
+          triad: [25.15 GB/s, 50.37 GB/s, 75.31 GB/s, 99.12 GB/s, 123.25 GB/s, 150.29
+              GB/s, 171.60 GB/s, 197.81 GB/s]
+          update: [32.98 GB/s, 65.60 GB/s, 97.60 GB/s, 130.34 GB/s, 162.76 GB/s, 194.12
+              GB/s, 229.02 GB/s, 260.35 GB/s]
+        size per core: [13.20 MB, 6.60 MB, 4.40 MB, 3.30 MB, 2.64 MB, 2.20 MB, 1.89
+            MB, 1.65 MB]
+        size per thread: [6.60 MB, 3.30 MB, 2.20 MB, 1.65 MB, 1.32 MB, 1.10 MB, 0.94
+            MB, 825.00 kB]
+        stats:
+          copy:
+          - [22.79 GB/s, 22.55 GB/s, 22.86 GB/s, 22.74 GB/s, 23.09 GB/s, 22.51 GB/s,
+            23.35 GB/s, 23.32 GB/s, 23.02 GB/s, 22.75 GB/s]
+          - [45.32 GB/s, 45.15 GB/s, 45.63 GB/s, 44.84 GB/s, 44.54 GB/s, 44.33 GB/s,
+            44.68 GB/s, 44.98 GB/s, 44.64 GB/s, 44.75 GB/s]
+          - [68.10 GB/s, 67.88 GB/s, 67.98 GB/s, 67.32 GB/s, 67.02 GB/s, 67.14 GB/s,
+            67.71 GB/s, 67.19 GB/s, 63.08 GB/s, 68.04 GB/s]
+          - [89.46 GB/s, 88.53 GB/s, 88.51 GB/s, 89.13 GB/s, 89.32 GB/s, 84.53 GB/s,
+            87.51 GB/s, 88.95 GB/s, 88.91 GB/s, 87.62 GB/s]
+          - [108.72 GB/s, 110.42 GB/s, 106.02 GB/s, 111.08 GB/s, 110.70 GB/s, 111.10
+              GB/s, 110.24 GB/s, 109.68 GB/s, 109.55 GB/s, 108.86 GB/s]
+          - [133.21 GB/s, 127.37 GB/s, 132.83 GB/s, 132.67 GB/s, 133.02 GB/s, 132.65
+              GB/s, 134.20 GB/s, 132.96 GB/s, 118.86 GB/s, 131.20 GB/s]
+          - [152.95 GB/s, 153.90 GB/s, 153.80 GB/s, 153.22 GB/s, 153.32 GB/s, 142.75
+              GB/s, 152.99 GB/s, 154.44 GB/s, 154.43 GB/s, 152.24 GB/s]
+          - [174.89 GB/s, 171.49 GB/s, 157.46 GB/s, 172.90 GB/s, 173.42 GB/s, 171.07
+              GB/s, 171.82 GB/s, 170.68 GB/s, 172.19 GB/s, 161.38 GB/s]
+          daxpy:
+          - [31.88 GB/s, 32.27 GB/s, 31.11 GB/s, 32.20 GB/s, 32.17 GB/s, 32.32 GB/s,
+            32.20 GB/s, 32.32 GB/s, 30.76 GB/s, 32.03 GB/s]
+          - [64.16 GB/s, 63.70 GB/s, 64.04 GB/s, 63.55 GB/s, 60.64 GB/s, 64.05 GB/s,
+            63.56 GB/s, 63.36 GB/s, 63.94 GB/s, 63.86 GB/s]
+          - [96.12 GB/s, 95.66 GB/s, 95.93 GB/s, 95.93 GB/s, 96.10 GB/s, 95.94 GB/s,
+            95.78 GB/s, 95.79 GB/s, 95.17 GB/s, 89.44 GB/s]
+          - [126.04 GB/s, 126.43 GB/s, 126.09 GB/s, 124.90 GB/s, 125.07 GB/s, 125.74
+              GB/s, 118.86 GB/s, 125.80 GB/s, 125.10 GB/s, 126.75 GB/s]
+          - [155.92 GB/s, 155.99 GB/s, 156.32 GB/s, 151.54 GB/s, 156.49 GB/s, 156.91
+              GB/s, 154.92 GB/s, 155.92 GB/s, 156.20 GB/s, 154.49 GB/s]
+          - [185.57 GB/s, 180.38 GB/s, 187.51 GB/s, 187.10 GB/s, 186.44 GB/s, 187.13
+              GB/s, 187.31 GB/s, 188.10 GB/s, 187.91 GB/s, 188.57 GB/s]
+          - [207.55 GB/s, 219.63 GB/s, 219.38 GB/s, 219.81 GB/s, 220.29 GB/s, 219.72
+              GB/s, 221.05 GB/s, 216.76 GB/s, 221.57 GB/s, 220.75 GB/s]
+          - [250.81 GB/s, 250.78 GB/s, 251.19 GB/s, 251.28 GB/s, 249.10 GB/s, 250.42
+              GB/s, 251.65 GB/s, 244.31 GB/s, 250.40 GB/s, 250.19 GB/s]
+          load:
+          - [24.84 GB/s, 24.86 GB/s, 25.09 GB/s, 25.04 GB/s, 24.74 GB/s, 24.87 GB/s,
+            25.01 GB/s, 25.08 GB/s, 25.14 GB/s, 25.00 GB/s]
+          - [50.03 GB/s, 49.40 GB/s, 50.28 GB/s, 50.08 GB/s, 50.37 GB/s, 49.75 GB/s,
+            50.01 GB/s, 50.38 GB/s, 49.89 GB/s, 50.24 GB/s]
+          - [74.37 GB/s, 74.65 GB/s, 74.40 GB/s, 73.45 GB/s, 73.31 GB/s, 73.00 GB/s,
+            75.49 GB/s, 73.94 GB/s, 74.42 GB/s, 74.80 GB/s]
+          - [99.51 GB/s, 99.43 GB/s, 98.90 GB/s, 99.83 GB/s, 98.74 GB/s, 100.75 GB/s,
+            99.33 GB/s, 99.81 GB/s, 100.00 GB/s, 101.06 GB/s]
+          - [126.04 GB/s, 126.03 GB/s, 124.70 GB/s, 124.86 GB/s, 125.31 GB/s, 124.78
+              GB/s, 125.99 GB/s, 123.52 GB/s, 124.45 GB/s, 123.01 GB/s]
+          - [146.95 GB/s, 150.27 GB/s, 151.12 GB/s, 150.93 GB/s, 150.68 GB/s, 149.75
+              GB/s, 150.67 GB/s, 146.01 GB/s, 148.34 GB/s, 149.15 GB/s]
+          - [169.40 GB/s, 172.12 GB/s, 172.40 GB/s, 171.99 GB/s, 172.57 GB/s, 171.95
+              GB/s, 167.06 GB/s, 169.66 GB/s, 168.34 GB/s, 169.45 GB/s]
+          - [192.68 GB/s, 191.98 GB/s, 192.82 GB/s, 191.84 GB/s, 191.97 GB/s, 196.91
+              GB/s, 193.36 GB/s, 190.12 GB/s, 192.04 GB/s, 193.93 GB/s]
+          triad:
+          - [24.78 GB/s, 25.03 GB/s, 25.07 GB/s, 24.81 GB/s, 24.65 GB/s, 24.80 GB/s,
+            24.71 GB/s, 25.15 GB/s, 24.70 GB/s, 24.25 GB/s]
+          - [49.63 GB/s, 48.68 GB/s, 49.73 GB/s, 49.97 GB/s, 50.37 GB/s, 49.89 GB/s,
+            49.59 GB/s, 49.00 GB/s, 49.96 GB/s, 49.61 GB/s]
+          - [74.88 GB/s, 74.99 GB/s, 75.31 GB/s, 73.20 GB/s, 74.50 GB/s, 72.88 GB/s,
+            73.43 GB/s, 73.74 GB/s, 74.59 GB/s, 74.60 GB/s]
+          - [95.80 GB/s, 97.67 GB/s, 98.93 GB/s, 97.79 GB/s, 98.74 GB/s, 97.74 GB/s,
+            98.87 GB/s, 99.12 GB/s, 97.90 GB/s, 97.96 GB/s]
+          - [121.15 GB/s, 120.28 GB/s, 120.66 GB/s, 121.19 GB/s, 121.09 GB/s, 121.68
+              GB/s, 121.30 GB/s, 123.22 GB/s, 122.51 GB/s, 123.25 GB/s]
+          - [146.72 GB/s, 146.38 GB/s, 146.25 GB/s, 146.49 GB/s, 146.29 GB/s, 144.30
+              GB/s, 142.89 GB/s, 150.29 GB/s, 146.37 GB/s, 146.30 GB/s]
+          - [166.36 GB/s, 168.18 GB/s, 168.79 GB/s, 170.27 GB/s, 169.26 GB/s, 170.98
+              GB/s, 170.77 GB/s, 171.43 GB/s, 169.53 GB/s, 171.60 GB/s]
+          - [190.83 GB/s, 197.81 GB/s, 196.29 GB/s, 197.12 GB/s, 196.21 GB/s, 188.40
+              GB/s, 191.07 GB/s, 195.14 GB/s, 192.48 GB/s, 194.23 GB/s]
+          update:
+          - [32.74 GB/s, 32.98 GB/s, 32.73 GB/s, 32.57 GB/s, 32.63 GB/s, 32.41 GB/s,
+            32.61 GB/s, 32.24 GB/s, 32.52 GB/s, 32.49 GB/s]
+          - [65.22 GB/s, 65.07 GB/s, 64.65 GB/s, 65.26 GB/s, 63.70 GB/s, 64.19 GB/s,
+            64.35 GB/s, 64.83 GB/s, 65.60 GB/s, 63.99 GB/s]
+          - [97.60 GB/s, 96.65 GB/s, 97.50 GB/s, 96.07 GB/s, 97.12 GB/s, 96.41 GB/s,
+            96.85 GB/s, 96.80 GB/s, 97.10 GB/s, 97.10 GB/s]
+          - [129.18 GB/s, 127.79 GB/s, 129.50 GB/s, 129.46 GB/s, 128.85 GB/s, 128.69
+              GB/s, 129.02 GB/s, 130.34 GB/s, 129.92 GB/s, 129.11 GB/s]
+          - [160.00 GB/s, 161.81 GB/s, 160.37 GB/s, 159.56 GB/s, 160.38 GB/s, 161.91
+              GB/s, 160.54 GB/s, 161.43 GB/s, 160.59 GB/s, 162.76 GB/s]
+          - [192.24 GB/s, 193.69 GB/s, 191.11 GB/s, 190.65 GB/s, 193.10 GB/s, 191.30
+              GB/s, 192.50 GB/s, 193.37 GB/s, 191.98 GB/s, 194.12 GB/s]
+          - [221.45 GB/s, 229.02 GB/s, 226.33 GB/s, 224.81 GB/s, 225.62 GB/s, 224.79
+              GB/s, 226.03 GB/s, 227.09 GB/s, 226.46 GB/s, 225.88 GB/s]
+          - [255.45 GB/s, 256.52 GB/s, 254.06 GB/s, 257.76 GB/s, 256.85 GB/s, 256.27
+              GB/s, 260.35 GB/s, 259.96 GB/s, 258.40 GB/s, 255.79 GB/s]
+        threads: [2, 4, 6, 8, 10, 12, 14, 16]
+        threads per core: 2
+        total size: [13.20 MB, 13.20 MB, 13.20 MB, 13.20 MB, 13.20 MB, 13.20 MB, 13.20
+            MB, 13.20 MB]
+    MEM:
+      1:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [11.12 GB/s, 20.53 GB/s, 24.86 GB/s, 26.20 GB/s, 26.47 GB/s, 26.35
+              GB/s, 26.24 GB/s, 26.17 GB/s]
+          daxpy: [16.10 GB/s, 30.00 GB/s, 36.88 GB/s, 38.86 GB/s, 39.36 GB/s, 39.19
+              GB/s, 39.02 GB/s, 38.88 GB/s]
+          load: [12.30 GB/s, 23.50 GB/s, 33.04 GB/s, 40.59 GB/s, 44.03 GB/s, 44.56
+              GB/s, 44.26 GB/s, 43.77 GB/s]
+          triad: [12.41 GB/s, 24.13 GB/s, 29.24 GB/s, 30.73 GB/s, 30.68 GB/s, 30.58
+              GB/s, 30.54 GB/s, 30.63 GB/s]
+          update: [17.40 GB/s, 31.16 GB/s, 36.80 GB/s, 39.06 GB/s, 39.80 GB/s, 39.77
+              GB/s, 39.50 GB/s, 39.24 GB/s]
+        size per core: [300.00 MB, 150.00 MB, 100.00 MB, 75.00 MB, 60.00 MB, 50.00
+            MB, 42.86 MB, 37.50 MB]
+        size per thread: [300.00 MB, 150.00 MB, 100.00 MB, 75.00 MB, 60.00 MB, 50.00
+            MB, 42.86 MB, 37.50 MB]
+        stats:
+          copy:
+          - [10.83 GB/s, 10.83 GB/s, 10.81 GB/s, 10.82 GB/s, 10.82 GB/s, 10.82 GB/s,
+            10.83 GB/s, 10.81 GB/s, 10.82 GB/s, 11.12 GB/s]
+          - [20.34 GB/s, 20.38 GB/s, 20.37 GB/s, 20.34 GB/s, 20.41 GB/s, 20.39 GB/s,
+            20.39 GB/s, 20.39 GB/s, 20.53 GB/s, 20.35 GB/s]
+          - [24.70 GB/s, 24.76 GB/s, 24.80 GB/s, 24.86 GB/s, 24.75 GB/s, 24.80 GB/s,
+            24.77 GB/s, 24.82 GB/s, 24.81 GB/s, 24.73 GB/s]
+          - [26.10 GB/s, 26.16 GB/s, 26.14 GB/s, 26.16 GB/s, 26.10 GB/s, 26.15 GB/s,
+            26.10 GB/s, 26.15 GB/s, 26.11 GB/s, 26.20 GB/s]
+          - [26.45 GB/s, 26.44 GB/s, 26.41 GB/s, 26.43 GB/s, 26.45 GB/s, 26.44 GB/s,
+            26.46 GB/s, 26.47 GB/s, 26.45 GB/s, 26.44 GB/s]
+          - [26.34 GB/s, 26.30 GB/s, 26.31 GB/s, 26.33 GB/s, 26.26 GB/s, 26.35 GB/s,
+            26.30 GB/s, 26.30 GB/s, 26.30 GB/s, 26.34 GB/s]
+          - [26.20 GB/s, 26.24 GB/s, 26.21 GB/s, 26.22 GB/s, 26.22 GB/s, 26.20 GB/s,
+            26.20 GB/s, 26.23 GB/s, 26.22 GB/s, 26.23 GB/s]
+          - [26.15 GB/s, 26.17 GB/s, 26.12 GB/s, 26.15 GB/s, 26.15 GB/s, 26.15 GB/s,
+            26.12 GB/s, 26.14 GB/s, 26.14 GB/s, 26.17 GB/s]
+          daxpy:
+          - [15.77 GB/s, 15.77 GB/s, 16.04 GB/s, 15.68 GB/s, 15.72 GB/s, 15.76 GB/s,
+            15.91 GB/s, 15.77 GB/s, 16.10 GB/s, 16.04 GB/s]
+          - [29.88 GB/s, 29.80 GB/s, 30.00 GB/s, 29.87 GB/s, 29.87 GB/s, 30.00 GB/s,
+            29.79 GB/s, 29.80 GB/s, 29.80 GB/s, 29.82 GB/s]
+          - [36.63 GB/s, 36.73 GB/s, 36.64 GB/s, 36.64 GB/s, 36.81 GB/s, 36.88 GB/s,
+            36.62 GB/s, 36.65 GB/s, 36.74 GB/s, 36.71 GB/s]
+          - [38.82 GB/s, 38.83 GB/s, 38.86 GB/s, 38.81 GB/s, 38.81 GB/s, 38.82 GB/s,
+            38.85 GB/s, 38.80 GB/s, 38.84 GB/s, 38.73 GB/s]
+          - [39.32 GB/s, 39.30 GB/s, 39.34 GB/s, 39.36 GB/s, 39.28 GB/s, 39.33 GB/s,
+            39.31 GB/s, 39.25 GB/s, 39.32 GB/s, 39.33 GB/s]
+          - [39.10 GB/s, 39.12 GB/s, 39.14 GB/s, 39.16 GB/s, 39.17 GB/s, 39.17 GB/s,
+            39.13 GB/s, 39.15 GB/s, 39.14 GB/s, 39.19 GB/s]
+          - [39.01 GB/s, 39.01 GB/s, 39.02 GB/s, 39.02 GB/s, 39.00 GB/s, 39.00 GB/s,
+            38.97 GB/s, 39.02 GB/s, 38.98 GB/s, 39.01 GB/s]
+          - [38.76 GB/s, 38.86 GB/s, 38.83 GB/s, 38.82 GB/s, 38.87 GB/s, 38.88 GB/s,
+            38.81 GB/s, 38.83 GB/s, 38.88 GB/s, 38.88 GB/s]
+          load:
+          - [11.97 GB/s, 11.96 GB/s, 11.98 GB/s, 11.97 GB/s, 11.96 GB/s, 12.05 GB/s,
+            12.30 GB/s, 12.18 GB/s, 11.97 GB/s, 11.96 GB/s]
+          - [22.85 GB/s, 22.85 GB/s, 22.87 GB/s, 22.94 GB/s, 23.50 GB/s, 22.86 GB/s,
+            22.86 GB/s, 23.25 GB/s, 22.85 GB/s, 22.86 GB/s]
+          - [33.04 GB/s, 32.43 GB/s, 32.51 GB/s, 32.52 GB/s, 32.52 GB/s, 32.81 GB/s,
+            32.77 GB/s, 32.54 GB/s, 32.53 GB/s, 32.53 GB/s]
+          - [39.95 GB/s, 39.94 GB/s, 39.93 GB/s, 40.15 GB/s, 40.59 GB/s, 40.36 GB/s,
+            40.28 GB/s, 39.93 GB/s, 39.94 GB/s, 39.98 GB/s]
+          - [43.98 GB/s, 43.86 GB/s, 43.90 GB/s, 43.80 GB/s, 43.83 GB/s, 43.86 GB/s,
+            44.03 GB/s, 43.94 GB/s, 43.83 GB/s, 43.92 GB/s]
+          - [44.46 GB/s, 44.34 GB/s, 44.56 GB/s, 44.51 GB/s, 44.32 GB/s, 44.32 GB/s,
+            44.51 GB/s, 44.48 GB/s, 44.32 GB/s, 44.34 GB/s]
+          - [44.03 GB/s, 44.26 GB/s, 44.08 GB/s, 44.18 GB/s, 44.10 GB/s, 43.99 GB/s,
+            44.07 GB/s, 44.06 GB/s, 43.94 GB/s, 43.97 GB/s]
+          - [43.48 GB/s, 43.77 GB/s, 43.51 GB/s, 43.49 GB/s, 43.47 GB/s, 43.73 GB/s,
+            43.55 GB/s, 43.68 GB/s, 43.49 GB/s, 43.50 GB/s]
+          triad:
+          - [12.11 GB/s, 12.02 GB/s, 12.03 GB/s, 12.10 GB/s, 12.03 GB/s, 12.04 GB/s,
+            12.05 GB/s, 12.17 GB/s, 12.02 GB/s, 12.41 GB/s]
+          - [23.43 GB/s, 23.25 GB/s, 23.25 GB/s, 23.36 GB/s, 23.28 GB/s, 23.24 GB/s,
+            23.61 GB/s, 23.29 GB/s, 23.31 GB/s, 24.13 GB/s]
+          - [28.92 GB/s, 29.10 GB/s, 29.17 GB/s, 29.04 GB/s, 28.91 GB/s, 29.16 GB/s,
+            28.82 GB/s, 29.01 GB/s, 29.24 GB/s, 28.88 GB/s]
+          - [30.65 GB/s, 30.62 GB/s, 30.73 GB/s, 30.59 GB/s, 30.69 GB/s, 30.68 GB/s,
+            30.59 GB/s, 30.59 GB/s, 30.57 GB/s, 30.67 GB/s]
+          - [30.53 GB/s, 30.67 GB/s, 30.65 GB/s, 30.53 GB/s, 30.63 GB/s, 30.68 GB/s,
+            30.50 GB/s, 30.67 GB/s, 30.64 GB/s, 30.67 GB/s]
+          - [30.45 GB/s, 30.58 GB/s, 30.51 GB/s, 30.49 GB/s, 30.52 GB/s, 30.49 GB/s,
+            30.56 GB/s, 30.55 GB/s, 30.47 GB/s, 30.47 GB/s]
+          - [30.51 GB/s, 30.47 GB/s, 30.50 GB/s, 30.47 GB/s, 30.52 GB/s, 30.54 GB/s,
+            30.54 GB/s, 30.50 GB/s, 30.49 GB/s, 30.50 GB/s]
+          - [30.58 GB/s, 30.34 GB/s, 30.56 GB/s, 30.54 GB/s, 30.63 GB/s, 30.53 GB/s,
+            30.59 GB/s, 30.50 GB/s, 30.54 GB/s, 30.47 GB/s]
+          update:
+          - [17.33 GB/s, 17.32 GB/s, 17.34 GB/s, 17.35 GB/s, 17.40 GB/s, 17.35 GB/s,
+            17.36 GB/s, 17.39 GB/s, 17.35 GB/s, 17.35 GB/s]
+          - [31.12 GB/s, 31.15 GB/s, 31.10 GB/s, 31.16 GB/s, 31.07 GB/s, 31.08 GB/s,
+            31.09 GB/s, 31.12 GB/s, 31.12 GB/s, 31.08 GB/s]
+          - [36.80 GB/s, 36.42 GB/s, 35.92 GB/s, 36.39 GB/s, 35.99 GB/s, 35.98 GB/s,
+            36.37 GB/s, 36.39 GB/s, 36.38 GB/s, 36.44 GB/s]
+          - [39.03 GB/s, 39.05 GB/s, 39.02 GB/s, 39.06 GB/s, 39.01 GB/s, 39.02 GB/s,
+            39.02 GB/s, 39.00 GB/s, 39.00 GB/s, 39.00 GB/s]
+          - [39.76 GB/s, 39.80 GB/s, 39.80 GB/s, 39.78 GB/s, 39.76 GB/s, 39.79 GB/s,
+            39.79 GB/s, 39.77 GB/s, 39.77 GB/s, 39.71 GB/s]
+          - [39.71 GB/s, 39.72 GB/s, 39.72 GB/s, 39.66 GB/s, 39.74 GB/s, 39.70 GB/s,
+            39.76 GB/s, 39.74 GB/s, 39.77 GB/s, 39.74 GB/s]
+          - [39.50 GB/s, 39.47 GB/s, 39.45 GB/s, 39.43 GB/s, 39.46 GB/s, 39.45 GB/s,
+            39.45 GB/s, 39.40 GB/s, 39.43 GB/s, 39.47 GB/s]
+          - [39.21 GB/s, 39.18 GB/s, 39.19 GB/s, 39.19 GB/s, 39.21 GB/s, 39.19 GB/s,
+            39.18 GB/s, 39.21 GB/s, 39.20 GB/s, 39.24 GB/s]
+        threads: [1, 2, 3, 4, 5, 6, 7, 8]
+        threads per core: 1
+        total size: [300.00 MB, 300.00 MB, 300.00 MB, 300.00 MB, 300.00 MB, 300.00
+            MB, 300.00 MB, 300.00 MB]
+      2:
+        cores: [1, 2, 3, 4, 5, 6, 7, 8]
+        results:
+          copy: [10.79 GB/s, 20.46 GB/s, 24.69 GB/s, 25.42 GB/s, 25.63 GB/s, 25.45
+              GB/s, 25.32 GB/s, 25.06 GB/s]
+          daxpy: [15.97 GB/s, 29.70 GB/s, 35.95 GB/s, 37.55 GB/s, 37.81 GB/s, 37.78
+              GB/s, 37.64 GB/s, 37.33 GB/s]
+          load: [13.46 GB/s, 25.84 GB/s, 35.75 GB/s, 40.54 GB/s, 42.38 GB/s, 42.30
+              GB/s, 41.85 GB/s, 41.19 GB/s]
+          triad: [12.05 GB/s, 22.53 GB/s, 27.53 GB/s, 29.10 GB/s, 29.68 GB/s, 29.79
+              GB/s, 29.85 GB/s, 29.64 GB/s]
+          update: [19.12 GB/s, 33.86 GB/s, 38.51 GB/s, 39.38 GB/s, 39.20 GB/s, 38.80
+              GB/s, 38.39 GB/s, 38.02 GB/s]
+        size per core: [300.00 MB, 150.00 MB, 100.00 MB, 75.00 MB, 60.00 MB, 50.00
+            MB, 42.86 MB, 37.50 MB]
+        size per thread: [150.00 MB, 75.00 MB, 50.00 MB, 37.50 MB, 30.00 MB, 25.00
+            MB, 21.43 MB, 18.75 MB]
+        stats:
+          copy:
+          - [10.71 GB/s, 10.69 GB/s, 10.71 GB/s, 10.70 GB/s, 10.79 GB/s, 10.58 GB/s,
+            10.70 GB/s, 10.69 GB/s, 10.69 GB/s, 10.70 GB/s]
+          - [20.27 GB/s, 20.31 GB/s, 20.27 GB/s, 20.26 GB/s, 20.31 GB/s, 20.26 GB/s,
+            20.24 GB/s, 20.26 GB/s, 20.26 GB/s, 20.46 GB/s]
+          - [24.69 GB/s, 24.66 GB/s, 24.64 GB/s, 24.63 GB/s, 24.67 GB/s, 24.64 GB/s,
+            24.64 GB/s, 24.68 GB/s, 24.61 GB/s, 24.63 GB/s]
+          - [25.42 GB/s, 25.41 GB/s, 25.40 GB/s, 25.36 GB/s, 25.40 GB/s, 25.39 GB/s,
+            25.40 GB/s, 25.38 GB/s, 25.41 GB/s, 25.39 GB/s]
+          - [25.55 GB/s, 25.57 GB/s, 25.58 GB/s, 25.63 GB/s, 25.57 GB/s, 25.57 GB/s,
+            25.58 GB/s, 25.55 GB/s, 25.57 GB/s, 25.49 GB/s]
+          - [25.42 GB/s, 25.42 GB/s, 25.41 GB/s, 25.39 GB/s, 25.40 GB/s, 25.43 GB/s,
+            25.45 GB/s, 25.44 GB/s, 25.43 GB/s, 25.43 GB/s]
+          - [25.27 GB/s, 25.31 GB/s, 25.28 GB/s, 25.31 GB/s, 25.32 GB/s, 25.31 GB/s,
+            25.29 GB/s, 25.30 GB/s, 25.25 GB/s, 25.28 GB/s]
+          - [25.03 GB/s, 25.01 GB/s, 25.01 GB/s, 25.04 GB/s, 25.00 GB/s, 25.03 GB/s,
+            25.06 GB/s, 25.04 GB/s, 25.04 GB/s, 25.04 GB/s]
+          daxpy:
+          - [15.81 GB/s, 15.81 GB/s, 15.97 GB/s, 15.62 GB/s, 15.64 GB/s, 15.83 GB/s,
+            15.63 GB/s, 15.82 GB/s, 15.81 GB/s, 15.63 GB/s]
+          - [29.62 GB/s, 29.56 GB/s, 29.61 GB/s, 29.59 GB/s, 29.70 GB/s, 29.61 GB/s,
+            29.65 GB/s, 29.65 GB/s, 29.58 GB/s, 29.59 GB/s]
+          - [35.95 GB/s, 35.89 GB/s, 35.92 GB/s, 35.92 GB/s, 35.95 GB/s, 35.90 GB/s,
+            35.87 GB/s, 35.90 GB/s, 35.92 GB/s, 35.82 GB/s]
+          - [37.55 GB/s, 37.46 GB/s, 37.52 GB/s, 37.51 GB/s, 37.55 GB/s, 37.51 GB/s,
+            37.44 GB/s, 37.41 GB/s, 37.50 GB/s, 37.40 GB/s]
+          - [37.79 GB/s, 37.76 GB/s, 37.80 GB/s, 37.77 GB/s, 37.76 GB/s, 37.81 GB/s,
+            37.78 GB/s, 37.81 GB/s, 37.79 GB/s, 37.78 GB/s]
+          - [37.71 GB/s, 37.68 GB/s, 37.68 GB/s, 37.73 GB/s, 37.74 GB/s, 37.66 GB/s,
+            37.78 GB/s, 37.74 GB/s, 37.71 GB/s, 37.70 GB/s]
+          - [37.61 GB/s, 37.60 GB/s, 37.61 GB/s, 37.62 GB/s, 37.64 GB/s, 37.61 GB/s,
+            37.60 GB/s, 37.59 GB/s, 37.63 GB/s, 37.60 GB/s]
+          - [37.23 GB/s, 37.21 GB/s, 37.26 GB/s, 37.27 GB/s, 37.28 GB/s, 37.33 GB/s,
+            37.29 GB/s, 37.31 GB/s, 37.26 GB/s, 37.29 GB/s]
+          load:
+          - [13.34 GB/s, 13.36 GB/s, 13.35 GB/s, 13.34 GB/s, 13.35 GB/s, 13.38 GB/s,
+            13.46 GB/s, 13.35 GB/s, 13.35 GB/s, 13.35 GB/s]
+          - [25.63 GB/s, 25.64 GB/s, 25.84 GB/s, 25.64 GB/s, 25.74 GB/s, 25.63 GB/s,
+            25.64 GB/s, 25.63 GB/s, 25.64 GB/s, 25.68 GB/s]
+          - [35.38 GB/s, 35.56 GB/s, 35.50 GB/s, 35.75 GB/s, 35.50 GB/s, 35.39 GB/s,
+            35.46 GB/s, 35.39 GB/s, 35.75 GB/s, 35.40 GB/s]
+          - [40.37 GB/s, 40.37 GB/s, 40.49 GB/s, 40.49 GB/s, 40.42 GB/s, 40.37 GB/s,
+            40.54 GB/s, 40.39 GB/s, 40.37 GB/s, 40.51 GB/s]
+          - [42.34 GB/s, 42.14 GB/s, 42.26 GB/s, 42.17 GB/s, 42.10 GB/s, 42.13 GB/s,
+            42.38 GB/s, 42.13 GB/s, 42.21 GB/s, 42.15 GB/s]
+          - [42.30 GB/s, 42.13 GB/s, 42.20 GB/s, 42.11 GB/s, 42.12 GB/s, 42.12 GB/s,
+            42.18 GB/s, 42.25 GB/s, 42.19 GB/s, 42.21 GB/s]
+          - [41.70 GB/s, 41.76 GB/s, 41.85 GB/s, 41.80 GB/s, 41.71 GB/s, 41.71 GB/s,
+            41.80 GB/s, 41.70 GB/s, 41.76 GB/s, 41.75 GB/s]
+          - [41.02 GB/s, 41.01 GB/s, 41.17 GB/s, 41.12 GB/s, 41.13 GB/s, 41.15 GB/s,
+            41.19 GB/s, 41.01 GB/s, 41.10 GB/s, 41.06 GB/s]
+          triad:
+          - [11.87 GB/s, 11.89 GB/s, 11.91 GB/s, 11.81 GB/s, 11.83 GB/s, 11.85 GB/s,
+            11.90 GB/s, 11.80 GB/s, 11.85 GB/s, 12.05 GB/s]
+          - [22.53 GB/s, 22.47 GB/s, 22.44 GB/s, 22.46 GB/s, 22.43 GB/s, 22.52 GB/s,
+            22.41 GB/s, 22.52 GB/s, 22.48 GB/s, 22.41 GB/s]
+          - [27.43 GB/s, 27.42 GB/s, 27.47 GB/s, 27.47 GB/s, 27.52 GB/s, 27.49 GB/s,
+            27.41 GB/s, 27.42 GB/s, 27.51 GB/s, 27.53 GB/s]
+          - [29.02 GB/s, 29.03 GB/s, 29.03 GB/s, 29.04 GB/s, 28.89 GB/s, 29.10 GB/s,
+            29.02 GB/s, 29.05 GB/s, 28.93 GB/s, 29.01 GB/s]
+          - [29.66 GB/s, 29.68 GB/s, 29.60 GB/s, 29.62 GB/s, 29.60 GB/s, 29.67 GB/s,
+            29.66 GB/s, 29.62 GB/s, 29.62 GB/s, 29.62 GB/s]
+          - [29.78 GB/s, 29.76 GB/s, 29.77 GB/s, 29.77 GB/s, 29.75 GB/s, 29.79 GB/s,
+            29.75 GB/s, 29.77 GB/s, 29.76 GB/s, 29.78 GB/s]
+          - [29.82 GB/s, 29.85 GB/s, 29.85 GB/s, 29.83 GB/s, 29.82 GB/s, 29.83 GB/s,
+            29.83 GB/s, 29.81 GB/s, 29.81 GB/s, 29.80 GB/s]
+          - [29.54 GB/s, 29.63 GB/s, 29.57 GB/s, 29.56 GB/s, 29.55 GB/s, 29.64 GB/s,
+            29.60 GB/s, 29.53 GB/s, 29.54 GB/s, 29.57 GB/s]
+          update:
+          - [18.66 GB/s, 18.67 GB/s, 18.66 GB/s, 19.12 GB/s, 18.67 GB/s, 18.67 GB/s,
+            18.67 GB/s, 18.67 GB/s, 18.70 GB/s, 18.67 GB/s]
+          - [33.61 GB/s, 33.34 GB/s, 33.71 GB/s, 33.31 GB/s, 33.34 GB/s, 33.86 GB/s,
+            33.62 GB/s, 33.35 GB/s, 33.54 GB/s, 33.34 GB/s]
+          - [38.51 GB/s, 38.46 GB/s, 38.42 GB/s, 38.43 GB/s, 38.41 GB/s, 38.46 GB/s,
+            38.41 GB/s, 38.42 GB/s, 38.43 GB/s, 38.41 GB/s]
+          - [39.37 GB/s, 39.34 GB/s, 39.36 GB/s, 39.35 GB/s, 39.37 GB/s, 39.38 GB/s,
+            39.36 GB/s, 39.35 GB/s, 39.31 GB/s, 39.32 GB/s]
+          - [39.17 GB/s, 39.17 GB/s, 39.16 GB/s, 39.20 GB/s, 39.18 GB/s, 39.17 GB/s,
+            39.18 GB/s, 39.15 GB/s, 39.20 GB/s, 39.17 GB/s]
+          - [38.79 GB/s, 38.79 GB/s, 38.80 GB/s, 38.78 GB/s, 38.78 GB/s, 38.75 GB/s,
+            38.80 GB/s, 38.77 GB/s, 38.78 GB/s, 38.78 GB/s]
+          - [38.36 GB/s, 38.37 GB/s, 38.37 GB/s, 38.39 GB/s, 38.36 GB/s, 38.37 GB/s,
+            38.38 GB/s, 38.37 GB/s, 38.35 GB/s, 38.39 GB/s]
+          - [37.98 GB/s, 37.99 GB/s, 38.02 GB/s, 38.01 GB/s, 38.01 GB/s, 38.00 GB/s,
+            38.02 GB/s, 38.00 GB/s, 38.02 GB/s, 38.02 GB/s]
+        threads: [2, 4, 6, 8, 10, 12, 14, 16]
+        threads per core: 2
+        total size: [300.00 MB, 300.00 MB, 300.00 MB, 300.00 MB, 300.00 MB, 300.00
+            MB, 300.00 MB, 300.00 MB]
diff --git a/pystencils_tests/kerncraft_inputs/default_machine_file.yaml b/pystencils_tests/kerncraft_inputs/default_machine_file.yaml
deleted file mode 100644
index edec1eef99dfcc153c7c6e933b60d1b6edca74be..0000000000000000000000000000000000000000
--- a/pystencils_tests/kerncraft_inputs/default_machine_file.yaml
+++ /dev/null
@@ -1,277 +0,0 @@
-kerncraft version: 0.7.3
-clock: 2.7 GHz
-cores per socket: 8
-cores per NUMA domain: 8
-NUMA domains per socket: 1
-model type: Intel Core SandyBridge EP processor
-model name: Intel(R) Xeon(R) CPU E5-2680 0 @ 2.70GHz
-sockets: 2
-threads per core: 2
-cacheline size: 64 B
-compiler:
-    !!omap
-    - icc: -O3 -xAVX -fno-alias -qopenmp
-    - clang: -O3 -march=corei7-avx -mtune=corei7-avx -D_POSIX_C_SOURCE=200112L -fopenmp
-    - gcc: -O3 -march=corei7-avx -D_POSIX_C_SOURCE=200112L -fopenmp
-micro-architecture: SNB
-FLOPs per cycle:
-    SP: {total: 16, ADD: 8, MUL: 8}
-    DP: {total: 8, ADD: 4, MUL: 4}
-overlapping model:
-    ports: ["0", "0DV", "1", "2", "3", "4", "5"]
-    performance counter metric:
-        Max(UOPS_DISPATCHED_PORT_PORT_0:PMC[0-3],
-            UOPS_DISPATCHED_PORT_PORT_1:PMC[0-3],
-            UOPS_DISPATCHED_PORT_PORT_4:PMC[0-3],
-            UOPS_DISPATCHED_PORT_PORT_5:PMC[0-3])
-non-overlapping model:
-    ports: ["2D", "3D"]
-    performance counter metric: T_OL + T_L1L2 + T_L2L3 + T_L3MEM
-write-allocate: True
-memory hierarchy:
-    - level: L1
-      cache per group: {
-         'sets': 64, 'ways': 8, 'cl_size': 64, # 32 kB
-         'replacement_policy': 'LRU',
-         'write_allocate': True, 'write_back': True,
-         'load_from': 'L2', 'store_to': 'L2'}
-      cores per group: 1
-      threads per group: 2
-      groups: 16
-      performance counter metrics:
-          accesses: MEM_UOPS_RETIRED_LOADS:PMC[0-3]
-          misses: L1D_REPLACEMENT:PMC[0-3]
-          evicts: L1D_M_EVICT:PMC[0-3]
-    - level: L2
-      cache per group: {
-         'sets': 512, 'ways': 8, 'cl_size': 64, # 256 kB
-         'replacement_policy': 'LRU',
-         'write_allocate': True, 'write_back': True,
-         'load_from': 'L3', 'store_to': 'L3'}
-      cores per group: 1
-      threads per group: 2
-      groups: 16
-      non-overlap upstream throughput: [32 B/cy, 'half-duplex']
-      performance counter metrics:
-          accesses: L1D_REPLACEMENT:PMC[0-3]
-          misses: L2_LINES_IN_ALL:PMC[0-3]
-          evicts: L2_TRANS_L2_WB:PMC[0-3]
-    - level: L3
-      cache per group: {
-         'sets': 20480, 'ways': 16, 'cl_size': 64, # 20 MB
-         'replacement_policy': 'LRU',
-         'write_allocate': True, 'write_back': True}
-      cores per group: 8
-      threads per group: 16
-      groups: 2
-      non-overlap upstream throughput: [32 B/cy, 'half-duplex']
-      performance counter metrics:
-          accesses: L2_LINES_IN_ALL:PMC[0-3]
-          misses: (CAS_COUNT_RD:MBOX0C[01] + CAS_COUNT_RD:MBOX1C[01] +
-                   CAS_COUNT_RD:MBOX2C[01] + CAS_COUNT_RD:MBOX3C[01])
-          evicts: (CAS_COUNT_WR:MBOX0C[01] + CAS_COUNT_WR:MBOX1C[01] +
-                   CAS_COUNT_WR:MBOX2C[01] + CAS_COUNT_WR:MBOX3C[01])
-    - level: MEM
-      cores per group: 8
-      non-overlap upstream throughput: ['full socket memory bandwidth', 'half-duplex']
-      size per group: null
-      threads per group: 16
-benchmarks:
-  kernels:
-    copy:
-      FLOPs per iteration: 0
-      read streams: {bytes: 8.00 B, streams: 1}
-      read+write streams: {bytes: 0.00 B, streams: 0}
-      write streams: {bytes: 8.00 B, streams: 1}
-    daxpy:
-      FLOPs per iteration: 2
-      read streams: {bytes: 16.00 B, streams: 2}
-      read+write streams: {bytes: 8.00 B, streams: 1}
-      write streams: {bytes: 8.00 B, streams: 1}
-    load:
-      FLOPs per iteration: 0
-      read streams: {bytes: 8.00 B, streams: 1}
-      read+write streams: {bytes: 0.00 B, streams: 0}
-      write streams: {bytes: 0.00 B, streams: 0}
-    triad:
-      FLOPs per iteration: 2
-      read streams: {bytes: 24.00 B, streams: 3}
-      read+write streams: {bytes: 0.00 B, streams: 0}
-      write streams: {bytes: 8.00 B, streams: 1}
-    update:
-      FLOPs per iteration: 0
-      read streams: {bytes: 8.00 B, streams: 1}
-      read+write streams: {bytes: 8.00 B, streams: 1}
-      write streams: {bytes: 8.00 B, streams: 1}
-  measurements:
-    L1:
-      1:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [81.98 GB/s, 163.75 GB/s, 245.62 GB/s, 327.69 GB/s, 409.41 GB/s, 489.83
-              GB/s, 571.67 GB/s, 653.50 GB/s]
-          daxpy: [71.55 GB/s, 143.01 GB/s, 214.86 GB/s, 286.26 GB/s, 355.60 GB/s,
-            426.71 GB/s, 497.45 GB/s, 568.97 GB/s]
-          load: [61.92 GB/s, 122.79 GB/s, 183.01 GB/s, 244.30 GB/s, 306.76 GB/s, 368.46
-              GB/s, 427.41 GB/s, 490.88 GB/s]
-          triad: [81.61 GB/s, 163.25 GB/s, 244.92 GB/s, 326.65 GB/s, 406.69 GB/s,
-            487.76 GB/s, 569.10 GB/s, 650.39 GB/s]
-          update: [84.03 GB/s, 168.02 GB/s, 252.10 GB/s, 335.94 GB/s, 419.90 GB/s,
-            503.88 GB/s, 587.86 GB/s, 671.88 GB/s]
-        size per core: [16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB,
-          16.00 kB, 16.00 kB]
-        size per thread: [16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB, 16.00
-            kB, 16.00 kB, 16.00 kB]
-        threads: [1, 2, 3, 4, 5, 6, 7, 8]
-        threads per core: 1
-        total size: [16.00 kB, 32.00 kB, 48.00 kB, 64.00 kB, 80.00 kB, 96.00 kB, 112.00
-            kB, 128.00 kB]
-      2:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [79.53 GB/s, 158.70 GB/s, 238.20 GB/s, 317.62 GB/s, 397.09 GB/s, 476.33
-              GB/s, 555.69 GB/s, 634.96 GB/s]
-          daxpy: [70.94 GB/s, 141.90 GB/s, 212.97 GB/s, 283.91 GB/s, 354.93 GB/s,
-            425.85 GB/s, 496.74 GB/s, 567.40 GB/s]
-          load: [57.01 GB/s, 114.11 GB/s, 171.11 GB/s, 228.13 GB/s, 285.15 GB/s, 342.11
-              GB/s, 399.11 GB/s, 456.11 GB/s]
-          triad: [79.48 GB/s, 159.03 GB/s, 238.53 GB/s, 318.04 GB/s, 392.11 GB/s,
-            477.10 GB/s, 538.36 GB/s, 636.02 GB/s]
-          update: [82.75 GB/s, 165.55 GB/s, 248.50 GB/s, 331.32 GB/s, 414.06 GB/s,
-            496.82 GB/s, 579.83 GB/s, 662.36 GB/s]
-        size per core: [16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB, 16.00 kB,
-          16.00 kB, 16.00 kB]
-        size per thread: [8.00 kB, 8.00 kB, 8.00 kB, 8.00 kB, 8.00 kB, 8.00 kB, 8.00
-            kB, 8.00 kB]
-        threads: [2, 4, 6, 8, 10, 12, 14, 16]
-        threads per core: 2
-        total size: [16.00 kB, 32.00 kB, 48.00 kB, 64.00 kB, 80.00 kB, 96.00 kB, 112.00
-            kB, 128.00 kB]
-    L2:
-      1:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [41.28 GB/s, 81.96 GB/s, 120.28 GB/s, 160.70 GB/s, 203.22 GB/s, 239.97
-              GB/s, 271.13 GB/s, 307.01 GB/s]
-          daxpy: [48.85 GB/s, 98.62 GB/s, 143.29 GB/s, 197.76 GB/s, 230.58 GB/s, 284.98
-              GB/s, 334.22 GB/s, 385.72 GB/s]
-          load: [38.51 GB/s, 76.67 GB/s, 114.73 GB/s, 152.90 GB/s, 188.69 GB/s, 223.64
-              GB/s, 265.21 GB/s, 289.41 GB/s]
-          triad: [40.92 GB/s, 83.49 GB/s, 124.48 GB/s, 165.24 GB/s, 206.74 GB/s, 237.90
-              GB/s, 274.96 GB/s, 329.09 GB/s]
-          update: [50.37 GB/s, 100.05 GB/s, 145.43 GB/s, 196.82 GB/s, 244.07 GB/s,
-            301.62 GB/s, 336.88 GB/s, 403.78 GB/s]
-        size per core: [128.00 kB, 128.00 kB, 128.00 kB, 128.00 kB, 128.00 kB, 128.00
-            kB, 128.00 kB, 128.00 kB]
-        size per thread: [128.00 kB, 128.00 kB, 128.00 kB, 128.00 kB, 128.00 kB, 128.00
-            kB, 128.00 kB, 128.00 kB]
-        threads: [1, 2, 3, 4, 5, 6, 7, 8]
-        threads per core: 1
-        total size: [128.00 kB, 256.00 kB, 384.00 kB, 512.00 kB, 640.00 kB, 768.00
-            kB, 0.90 MB, 1.02 MB]
-      2:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [42.17 GB/s, 83.47 GB/s, 124.57 GB/s, 163.78 GB/s, 202.56 GB/s, 242.80
-              GB/s, 276.95 GB/s, 311.36 GB/s]
-          daxpy: [50.87 GB/s, 98.72 GB/s, 152.12 GB/s, 193.48 GB/s, 251.36 GB/s, 301.72
-              GB/s, 352.55 GB/s, 365.28 GB/s]
-          load: [39.62 GB/s, 79.03 GB/s, 118.03 GB/s, 157.85 GB/s, 196.48 GB/s, 237.44
-              GB/s, 276.81 GB/s, 309.71 GB/s]
-          triad: [44.80 GB/s, 88.35 GB/s, 125.13 GB/s, 169.94 GB/s, 209.60 GB/s, 260.15
-              GB/s, 300.75 GB/s, 333.08 GB/s]
-          update: [49.80 GB/s, 100.70 GB/s, 150.56 GB/s, 196.44 GB/s, 251.90 GB/s,
-            280.93 GB/s, 352.74 GB/s, 399.27 GB/s]
-        size per core: [128.00 kB, 128.00 kB, 128.00 kB, 128.00 kB, 128.00 kB, 128.00
-            kB, 128.00 kB, 128.00 kB]
-        size per thread: [64.00 kB, 64.00 kB, 64.00 kB, 64.00 kB, 64.00 kB, 64.00
-            kB, 64.00 kB, 64.00 kB]
-        threads: [2, 4, 6, 8, 10, 12, 14, 16]
-        threads per core: 2
-        total size: [128.00 kB, 256.00 kB, 384.00 kB, 512.00 kB, 640.00 kB, 768.00
-            kB, 0.90 MB, 1.02 MB]
-    L3:
-      1:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [23.21 GB/s, 46.01 GB/s, 67.96 GB/s, 90.17 GB/s, 111.47 GB/s, 133.14
-              GB/s, 153.84 GB/s, 174.92 GB/s]
-          daxpy: [30.35 GB/s, 60.32 GB/s, 90.00 GB/s, 119.71 GB/s, 148.87 GB/s, 178.39
-              GB/s, 207.10 GB/s, 236.25 GB/s]
-          load: [23.35 GB/s, 46.52 GB/s, 69.57 GB/s, 92.60 GB/s, 115.77 GB/s, 138.89
-              GB/s, 161.82 GB/s, 184.11 GB/s]
-          triad: [25.18 GB/s, 50.08 GB/s, 74.33 GB/s, 98.78 GB/s, 122.66 GB/s, 146.78
-              GB/s, 170.52 GB/s, 194.47 GB/s]
-          update: [32.67 GB/s, 64.65 GB/s, 95.98 GB/s, 127.29 GB/s, 157.67 GB/s, 188.22
-              GB/s, 217.41 GB/s, 246.99 GB/s]
-        size per core: [1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25
-            MB, 1.25 MB]
-        size per thread: [1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25
-            MB, 1.25 MB]
-        threads: [1, 2, 3, 4, 5, 6, 7, 8]
-        threads per core: 1
-        total size: [1.25 MB, 2.50 MB, 3.75 MB, 5.00 MB, 6.25 MB, 7.50 MB, 8.75 MB,
-          10.00 MB]
-      2:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [23.83 GB/s, 47.25 GB/s, 69.84 GB/s, 92.61 GB/s, 114.31 GB/s, 136.48
-              GB/s, 157.55 GB/s, 178.99 GB/s]
-          daxpy: [31.52 GB/s, 62.72 GB/s, 93.43 GB/s, 124.29 GB/s, 154.55 GB/s, 185.18
-              GB/s, 215.10 GB/s, 245.24 GB/s]
-          load: [27.63 GB/s, 54.93 GB/s, 81.57 GB/s, 108.63 GB/s, 134.91 GB/s, 161.72
-              GB/s, 188.15 GB/s, 214.94 GB/s]
-          triad: [25.90 GB/s, 51.76 GB/s, 76.73 GB/s, 102.29 GB/s, 126.17 GB/s, 152.10
-              GB/s, 176.71 GB/s, 200.64 GB/s]
-          update: [34.10 GB/s, 67.67 GB/s, 100.62 GB/s, 133.50 GB/s, 165.61 GB/s,
-            197.74 GB/s, 228.73 GB/s, 259.05 GB/s]
-        size per core: [1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25 MB, 1.25
-            MB, 1.25 MB]
-        size per thread: [625.00 kB, 625.00 kB, 625.00 kB, 625.00 kB, 625.00 kB, 625.00
-            kB, 625.00 kB, 625.00 kB]
-        threads: [2, 4, 6, 8, 10, 12, 14, 16]
-        threads per core: 2
-        total size: [1.25 MB, 2.50 MB, 3.75 MB, 5.00 MB, 6.25 MB, 7.50 MB, 8.75 MB,
-          10.00 MB]
-    MEM:
-      1:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [11.60 GB/s, 21.29 GB/s, 25.94 GB/s, 27.28 GB/s, 27.47 GB/s, 27.36
-              GB/s, 27.21 GB/s, 27.12 GB/s]
-          daxpy: [17.33 GB/s, 31.89 GB/s, 38.65 GB/s, 40.50 GB/s, 40.81 GB/s, 40.62
-              GB/s, 40.59 GB/s, 40.26 GB/s]
-          load: [12.01 GB/s, 23.04 GB/s, 32.79 GB/s, 40.21 GB/s, 43.39 GB/s, 44.14
-              GB/s, 44.42 GB/s, 44.40 GB/s]
-          triad: [12.73 GB/s, 24.27 GB/s, 30.43 GB/s, 31.46 GB/s, 31.77 GB/s, 31.74
-              GB/s, 31.65 GB/s, 31.52 GB/s]
-          update: [18.91 GB/s, 32.43 GB/s, 37.28 GB/s, 39.98 GB/s, 40.99 GB/s, 40.92
-              GB/s, 40.61 GB/s, 40.34 GB/s]
-        size per core: [40.00 MB, 20.00 MB, 13.33 MB, 10.00 MB, 8.00 MB, 6.67 MB,
-          5.71 MB, 5.00 MB]
-        size per thread: [40.00 MB, 20.00 MB, 13.33 MB, 10.00 MB, 8.00 MB, 6.67 MB,
-          5.71 MB, 5.00 MB]
-        threads: [1, 2, 3, 4, 5, 6, 7, 8]
-        threads per core: 1
-        total size: [40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB]
-      2:
-        cores: [1, 2, 3, 4, 5, 6, 7, 8]
-        results:
-          copy: [10.92 GB/s, 20.62 GB/s, 25.34 GB/s, 26.22 GB/s, 26.32 GB/s, 26.31
-              GB/s, 26.22 GB/s, 26.16 GB/s]
-          daxpy: [17.15 GB/s, 31.96 GB/s, 38.12 GB/s, 39.19 GB/s, 39.38 GB/s, 39.16
-              GB/s, 39.06 GB/s, 38.87 GB/s]
-          load: [13.49 GB/s, 25.92 GB/s, 36.16 GB/s, 41.56 GB/s, 43.34 GB/s, 43.40
-              GB/s, 43.01 GB/s, 42.66 GB/s]
-          triad: [12.38 GB/s, 23.17 GB/s, 28.69 GB/s, 29.98 GB/s, 30.50 GB/s, 30.59
-              GB/s, 30.75 GB/s, 30.70 GB/s]
-          update: [19.67 GB/s, 34.93 GB/s, 39.93 GB/s, 40.79 GB/s, 40.43 GB/s, 40.03
-              GB/s, 39.62 GB/s, 39.33 GB/s]
-        size per core: [40.00 MB, 20.00 MB, 13.33 MB, 10.00 MB, 8.00 MB, 6.67 MB,
-          5.71 MB, 5.00 MB]
-        size per thread: [20.00 MB, 10.00 MB, 6.67 MB, 5.00 MB, 4.00 MB, 3.33 MB,
-          2.86 MB, 2.50 MB]
-        threads: [2, 4, 6, 8, 10, 12, 14, 16]
-        threads per core: 2
-        total size: [40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB, 40.00 MB]
-
diff --git a/pystencils_tests/test_kerncraft_coupling.py b/pystencils_tests/test_kerncraft_coupling.py
index 0040006097bc5f48461105cb1d0462313c18bd1a..653ed34d90e6ecd45a7a5785fb71d522cc3734f5 100644
--- a/pystencils_tests/test_kerncraft_coupling.py
+++ b/pystencils_tests/test_kerncraft_coupling.py
@@ -1,28 +1,33 @@
-import os
-
 import numpy as np
 import pytest
 import sympy as sp
-import kerncraft
+from pathlib import Path
+
+from kerncraft.kernel import KernelCode
+from kerncraft.machinemodel import MachineModel
+from kerncraft.models import ECM, ECMData, Benchmark
 
 from pystencils import Assignment, Field
 from pystencils.cpu import create_kernel
 from pystencils.kerncraft_coupling import KerncraftParameters, PyStencilsKerncraftKernel
-from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
+from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark, run_c_benchmark
+from pystencils.timeloop import TimeLoop
 
-SCRIPT_FOLDER = os.path.dirname(os.path.realpath(__file__))
-INPUT_FOLDER = os.path.join(SCRIPT_FOLDER, "kerncraft_inputs")
+SCRIPT_FOLDER = Path(__file__).parent
+INPUT_FOLDER = SCRIPT_FOLDER / "kerncraft_inputs"
 
 
 @pytest.mark.kerncraft
 def test_compilation():
-    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
+    machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
+    machine = MachineModel(path_to_yaml=machine_file_path)
 
-    kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
+    kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
-        reference_kernel.as_code('likwid')
+        reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
+        reference_kernel.get_kernel_header(name='test_kernel')
+        reference_kernel.get_kernel_code(name='test_kernel')
+        reference_kernel.get_main_code(kernel_function_name='test_kernel')
 
     size = [30, 50, 3]
     arr = np.zeros(size)
@@ -38,31 +43,31 @@ def test_compilation():
 
 @pytest.mark.kerncraft
 def analysis(kernel, model='ecmdata'):
-    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
+    machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
+    machine = MachineModel(path_to_yaml=machine_file_path)
     if model == 'ecmdata':
-        model = kerncraft.models.ECMData(kernel, machine, KerncraftParameters())
+        model = ECMData(kernel, machine, KerncraftParameters())
     elif model == 'ecm':
-        model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
+        model = ECM(kernel, machine, KerncraftParameters())
         # model.analyze()
         # model.plot()
     elif model == 'benchmark':
-        model = kerncraft.models.Benchmark(kernel, machine, KerncraftParameters())
+        model = Benchmark(kernel, machine, KerncraftParameters())
     else:
-        model = kerncraft.models.ECM(kernel, machine, KerncraftParameters())
+        model = ECM(kernel, machine, KerncraftParameters())
     model.analyze()
     return model
 
 
 @pytest.mark.kerncraft
-def test_3d_7pt_iaca():
-    # Make sure you use the intel compiler
+def test_3d_7pt_osaca():
+
     size = [20, 200, 200]
-    kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
-    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
-    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
+    kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
+    machine_file_path = INPUT_FOLDER / "Example_SandyBridgeEP_E5-2680.yml"
+    machine_model = MachineModel(path_to_yaml=machine_file_path)
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=machine_model, filename=kernel_file_path)
     reference_kernel.set_constant('M', size[0])
     reference_kernel.set_constant('N', size[1])
     assert size[1] == size[2]
@@ -76,7 +81,7 @@ def test_3d_7pt_iaca():
 
     update_rule = Assignment(b[0, 0, 0], s * rhs)
     ast = create_kernel([update_rule])
-    k = PyStencilsKerncraftKernel(ast, machine)
+    k = PyStencilsKerncraftKernel(ast, machine=machine_model)
     analysis(k, model='ecm')
     assert reference_kernel._flops == k._flops
     # assert reference.results['cl throughput'] == analysis.results['cl throughput']
@@ -85,9 +90,9 @@ def test_3d_7pt_iaca():
 @pytest.mark.kerncraft
 def test_2d_5pt():
     size = [30, 50, 3]
-    kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
+    kernel_file_path = INPUT_FOLDER / "2d-5pt.c"
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
     reference = analysis(reference_kernel)
 
     arr = np.zeros(size)
@@ -107,9 +112,9 @@ def test_2d_5pt():
 @pytest.mark.kerncraft
 def test_3d_7pt():
     size = [30, 50, 50]
-    kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
+    kernel_file_path = INPUT_FOLDER / "3d-7pt.c"
     with open(kernel_file_path) as kernel_file:
-        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
+        reference_kernel = KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
     reference_kernel.set_constant('M', size[0])
     reference_kernel.set_constant('N', size[1])
     assert size[1] == size[2]
@@ -128,3 +133,29 @@ def test_3d_7pt():
 
     for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
         assert e1 == e2
+
+
+@pytest.mark.kerncraft
+def test_benchmark():
+    size = [30, 50, 50]
+    arr = np.zeros(size)
+    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
+    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
+    s = sp.Symbol("s")
+    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]
+
+    update_rule = Assignment(b[0, 0, 0], s * rhs)
+    ast = create_kernel([update_rule])
+
+    c_benchmark_run = run_c_benchmark(ast, inner_iterations=1000, outer_iterations=1)
+
+    kernel = ast.compile()
+    a = np.full(size, fill_value=0.23)
+    b = np.full(size, fill_value=0.23)
+
+    timeloop = TimeLoop(steps=1)
+    timeloop.add_call(kernel, {'a': a, 'b': b, 's': 0.23})
+
+    timeloop_time = timeloop.benchmark(number_of_time_steps_for_estimation=1)
+
+    np.testing.assert_almost_equal(c_benchmark_run, timeloop_time, decimal=4)