diff --git a/pystencils/kerncraft_coupling/generate_benchmark.py b/pystencils/kerncraft_coupling/generate_benchmark.py index c95c420d6c62d4428354f934a4e36fb7e7398b9d..7abf2578ac3802ce77add553b9dca88e39ac034d 100644 --- a/pystencils/kerncraft_coupling/generate_benchmark.py +++ b/pystencils/kerncraft_coupling/generate_benchmark.py @@ -1,7 +1,7 @@ import os import subprocess -from jinja2 import Template +from jinja2 import Environment, PackageLoader, StrictUndefined from pystencils.astnodes import PragmaBlock from pystencils.backends.cbackend import generate_c, get_headers @@ -10,116 +10,6 @@ from pystencils.data_types import get_base_type from pystencils.include import get_pystencils_include_path from pystencils.sympyextensions import prod -benchmark_template = Template(""" -#include "kerncraft.h" -#include <stdlib.h> -#include <stdint.h> -#include <stdbool.h> -#include <math.h> -#include <stdio.h> - -{{ includes }} - -{%- if likwid %} -#include <likwid.h> -{%- endif %} - -#define RESTRICT __restrict__ -#define FUNC_PREFIX -void dummy(void *); -void timing(double* wcTime, double* cpuTime); -extern int var_false; - - -{{kernel_code}} - - -int main(int argc, char **argv) -{ - {%- if likwid %} - likwid_markerInit(); - {%- endif %} - - {%- for field_name, dataType, size in fields %} - - // Initialization {{field_name}} - double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64); - for (unsigned long long i = 0; i < {{size}}; ++i) - {{field_name}}[i] = 0.23; - - if(var_false) - dummy({{field_name}}); - - {%- endfor %} - - - - {%- for constantName, dataType in constants %} - - // Constant {{constantName}} - {{dataType}} {{constantName}}; - {{constantName}} = 0.23; - if(var_false) - dummy(& {{constantName}}); - - {%- endfor %} - - {%- if likwid and openmp %} - #pragma omp parallel - { - likwid_markerRegisterRegion("loop"); - #pragma omp barrier - {%- elif likwid %} - likwid_markerRegisterRegion("loop"); - {%- endif %} - - for(int warmup = 1; warmup >= 0; --warmup) { - int repeat = 2; - if(warmup == 0) { - repeat = atoi(argv[1]); - {%- if likwid %} - likwid_markerStartRegion("loop"); - {%- endif %} - } - - {%- if timing %} - double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime; - timing(&wcStartTime, &cpuStartTime); - {%- endif %} - - for (; repeat > 0; --repeat) - { - {{kernelName}}({{call_argument_list}}); - - // Dummy calls - {%- for field_name, dataType, size in fields %} - if(var_false) dummy((void*){{field_name}}); - {%- endfor %} - {%- for constantName, dataType in constants %} - if(var_false) dummy((void*)&{{constantName}}); - {%- endfor %} - } - {%- if timing %} - timing(&wcEndTime, &cpuEndTime); - if( warmup == 0) - printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) ); - {%- endif %} - - } - - {%- if likwid %} - likwid_markerStopRegion("loop"); - {%- if openmp %} - } - {%- endif %} - {%- endif %} - - {%- if likwid %} - likwid_markerClose(); - {%- endif %} -} -""") - def generate_benchmark(ast, likwid=False, openmp=False, timing=False): """Return C code of a benchmark program for the given kernel. @@ -157,7 +47,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False): if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock): ast.body.args[0].pragma_line = '' - args = { + jinja_context = { 'likwid': likwid, 'openmp': openmp, 'kernel_code': generate_c(ast, dialect='c'), @@ -168,7 +58,10 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False): 'includes': includes, 'timing': timing, } - return benchmark_template.render(**args) + + env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined) + + return env.get_template('benchmark.c').render(**jinja_context) def run_c_benchmark(ast, inner_iterations, outer_iterations=3): diff --git a/pystencils/kerncraft_coupling/kerncraft_interface.py b/pystencils/kerncraft_coupling/kerncraft_interface.py index 26cd281f09aaa907750aee5dc92b46282a142710..d92bd67dae601dd43906aa43ab695bc185f3b42f 100644 --- a/pystencils/kerncraft_coupling/kerncraft_interface.py +++ b/pystencils/kerncraft_coupling/kerncraft_interface.py @@ -1,8 +1,11 @@ import warnings +import fcntl from collections import defaultdict from tempfile import TemporaryDirectory from typing import Optional +from jinja2 import Environment, PackageLoader, StrictUndefined + import kerncraft import sympy as sp from kerncraft.kerncraft import KernelCode @@ -15,6 +18,7 @@ from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark from pystencils.sympyextensions import count_operations_in_ast from pystencils.transformations import filtered_tree_iteration from pystencils.utils import DotDict +from pystencils.backends.cbackend import generate_c, get_headers class PyStencilsKerncraftKernel(KernelCode): @@ -129,24 +133,70 @@ class PyStencilsKerncraftKernel(KernelCode): print("----------------------------- FLOPS -------------------------------") pprint(self._flops) - def as_code(self, type_='iaca', openmp=False, as_filename=False): + def get_kernel_header(self, name='pystencils_kernel'): + file_name = "pystencils_kernel.h" + file_path = self.get_intermediate_location(file_name, machine_and_compiler_dependent=False) + lock_mode, lock_fp = self.lock_intermediate(file_path) + + if lock_mode == fcntl.LOCK_SH: + # use cache + with open(file_path) as f: + code = f.read() + else: # lock_mode == fcntl.LOCK_EX + function_signature = generate_c(self.kernel_ast, dialect='c', signature_only=True) + + jinja_context = { + 'function_signature': function_signature, + } + + env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined) + file_header = env.get_template('kernel.h').render(**jinja_context) + with open(file_path, 'w') as f: + f.write(file_header) + + fcntl.flock(lock_fp, fcntl.LOCK_SH) # degrade to shared lock + + return file_path, lock_fp + + def get_kernel_code(self, openmp=False, name='pystencils_kernl'): """ Generate and return compilable source code. Args: type_: can be iaca or likwid. openmp: if true, openmp code will be generated - as_filename: + as_filename: writes a file with the name as_filename """ - code = generate_benchmark(self.kernel_ast, likwid=type_ == 'likwid', openmp=openmp) - if as_filename: - fp, already_available = self._get_intermediate_file(f'kernel_{type_}.c', - machine_and_compiler_dependent=False) - if not already_available: - fp.write(code) - return fp.name - else: - return code + filename = 'pystencils_kernl' + if openmp: + filename += '-omp' + filename += '.c' + file_path = self.get_intermediate_location(filename, machine_and_compiler_dependent=False) + lock_mode, lock_fp = self.lock_intermediate(file_path) + + if lock_mode == fcntl.LOCK_SH: + # use cache + with open(file_path) as f: + code = f.read() + else: # lock_mode == fcntl.LOCK_EX + header_list = get_headers(self.kernel_ast) + includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) + + kernel_code = generate_c(self.kernel_ast, dialect='c') + + jinja_context = { + 'includes': includes, + 'kernel_code': kernel_code, + } + + env = Environment(loader=PackageLoader('pystencils.kerncraft_coupling'), undefined=StrictUndefined) + file_header = env.get_template('kernel.c').render(**jinja_context) + with open(file_path, 'w') as f: + f.write(file_header) + + fcntl.flock(lock_fp, fcntl.LOCK_SH) # degrade to shared lock + + return file_path, lock_fp class KerncraftParameters(DotDict): diff --git a/pystencils/kerncraft_coupling/templates/benchmark.c b/pystencils/kerncraft_coupling/templates/benchmark.c new file mode 100644 index 0000000000000000000000000000000000000000..2cda6787bc423e5b6505ef9abc74b77825474541 --- /dev/null +++ b/pystencils/kerncraft_coupling/templates/benchmark.c @@ -0,0 +1,108 @@ + +#include "kerncraft.h" +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <math.h> +#include <stdio.h> + +{{ includes }} + +{%- if likwid %} +#include <likwid.h> +{%- endif %} + +#define RESTRICT __restrict__ +#define FUNC_PREFIX +void dummy(void *); +void timing(double* wcTime, double* cpuTime); +extern int var_false; + + +{{kernel_code}} + + +int main(int argc, char **argv) +{ + {%- if likwid %} + likwid_markerInit(); + {%- endif %} + + {%- for field_name, dataType, size in fields %} + + // Initialization {{field_name}} + double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64); + for (unsigned long long i = 0; i < {{size}}; ++i) + {{field_name}}[i] = 0.23; + + if(var_false) + dummy({{field_name}}); + + {%- endfor %} + + + + {%- for constantName, dataType in constants %} + + // Constant {{constantName}} + {{dataType}} {{constantName}}; + {{constantName}} = 0.23; + if(var_false) + dummy(& {{constantName}}); + + {%- endfor %} + + {%- if likwid and openmp %} + #pragma omp parallel + { + likwid_markerRegisterRegion("loop"); + #pragma omp barrier + {%- elif likwid %} + likwid_markerRegisterRegion("loop"); + {%- endif %} + + for(int warmup = 1; warmup >= 0; --warmup) { + int repeat = 2; + if(warmup == 0) { + repeat = atoi(argv[1]); + {%- if likwid %} + likwid_markerStartRegion("loop"); + {%- endif %} + } + + {%- if timing %} + double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime; + timing(&wcStartTime, &cpuStartTime); + {%- endif %} + + for (; repeat > 0; --repeat) + { + {{kernelName}}({{call_argument_list}}); + + // Dummy calls + {%- for field_name, dataType, size in fields %} + if(var_false) dummy((void*){{field_name}}); + {%- endfor %} + {%- for constantName, dataType in constants %} + if(var_false) dummy((void*)&{{constantName}}); + {%- endfor %} + } + {%- if timing %} + timing(&wcEndTime, &cpuEndTime); + if( warmup == 0) + printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) ); + {%- endif %} + + } + + {%- if likwid %} + likwid_markerStopRegion("loop"); + {%- if openmp %} + } + {%- endif %} + {%- endif %} + + {%- if likwid %} + likwid_markerClose(); + {%- endif %} +} diff --git a/pystencils/kerncraft_coupling/templates/kernel.c b/pystencils/kerncraft_coupling/templates/kernel.c new file mode 100644 index 0000000000000000000000000000000000000000..47fbf7cf25eda318a8fcecffa1477f5738eb1abc --- /dev/null +++ b/pystencils/kerncraft_coupling/templates/kernel.c @@ -0,0 +1,18 @@ + +#include "kerncraft.h" +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <math.h> +#include <stdio.h> + +{{ includes }} + +#define RESTRICT __restrict__ +#define FUNC_PREFIX +void dummy(void *); +void timing(double* wcTime, double* cpuTime); +extern int var_false; + + +{{kernel_code}} \ No newline at end of file diff --git a/pystencils/kerncraft_coupling/templates/kernel.h b/pystencils/kerncraft_coupling/templates/kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..539d51f928ca5d702dc2dad8de7396a505e5c5ee --- /dev/null +++ b/pystencils/kerncraft_coupling/templates/kernel.h @@ -0,0 +1,3 @@ +#define FUNC_PREFIX + +{{function_signature}} \ No newline at end of file diff --git a/pystencils_tests/test_kerncraft_coupling.py b/pystencils_tests/test_kerncraft_coupling.py index 2b177ea3807bd7490fa3d5a29c35ccd628b0ab2c..ac7ed5de65ef65298ac715382c5b780ee9185178 100644 --- a/pystencils_tests/test_kerncraft_coupling.py +++ b/pystencils_tests/test_kerncraft_coupling.py @@ -4,6 +4,8 @@ import numpy as np import pytest import sympy as sp import kerncraft +from kerncraft.kernel import KernelCode +from kerncraft.machinemodel import MachineModel from pystencils import Assignment, Field from pystencils.cpu import create_kernel @@ -17,11 +19,11 @@ INPUT_FOLDER = os.path.join(SCRIPT_FOLDER, "kerncraft_inputs") @pytest.mark.kerncraft def test_compilation(): machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml") - machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path) + machine = MachineModel(path_to_yaml=machine_file_path) kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c") with open(kernel_file_path) as kernel_file: - reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path) + reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path) reference_kernel.get_kernel_header(name='test_kernel') reference_kernel.get_kernel_code(name='test_kernel') reference_kernel.get_main_code(kernel_function_name='test_kernel') @@ -41,7 +43,7 @@ def test_compilation(): @pytest.mark.kerncraft def analysis(kernel, model='ecmdata'): machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml") - machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path) + machine = MachineModel(path_to_yaml=machine_file_path) if model == 'ecmdata': model = kerncraft.models.ECMData(kernel, machine, KerncraftParameters()) elif model == 'ecm': @@ -62,9 +64,9 @@ def test_3d_7pt_OSACA(): size = [20, 200, 200] kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c") machine_file_path = os.path.join(INPUT_FOLDER, "Example_SandyBridgeEP_E5-2680.yml") - machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path) + machine = MachineModel(path_to_yaml=machine_file_path) with open(kernel_file_path) as kernel_file: - reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path) + reference_kernel = KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path) reference_kernel.set_constant('M', size[0]) reference_kernel.set_constant('N', size[1]) assert size[1] == size[2] @@ -89,7 +91,7 @@ def test_2d_5pt(): size = [30, 50, 3] kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c") with open(kernel_file_path) as kernel_file: - reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path) + reference_kernel = KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path) reference = analysis(reference_kernel) arr = np.zeros(size) @@ -111,7 +113,7 @@ def test_3d_7pt(): size = [30, 50, 50] kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c") with open(kernel_file_path) as kernel_file: - reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path) + reference_kernel = KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path) reference_kernel.set_constant('M', size[0]) reference_kernel.set_constant('N', size[1]) assert size[1] == size[2]