Commit 9da89cd1 authored by Julian Hammer's avatar Julian Hammer
Browse files

Updated Kerncraft Coupling

parent 622aaa6c
Pipeline #27789 failed with stage
in 12 minutes and 50 seconds
from tempfile import TemporaryDirectory
import fcntl
import textwrap
from copy import deepcopy
import warnings
import sympy as sp
from collections import defaultdict
......@@ -6,14 +10,18 @@ import kerncraft
import kerncraft.kernel
from typing import Optional
from kerncraft.machinemodel import MachineModel
from jinja2 import Template
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
from pystencils.astnodes import LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess, KernelFunction
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.astnodes import \
LoopOverCoordinate, SympyAssignment, ResolvedFieldAccess, KernelFunction
from pystencils.field import get_layout_from_strides
from pystencils.sympyextensions import count_operations_in_ast
from pystencils.transformations import filtered_tree_iteration
from pystencils.utils import DotDict
import warnings
from pystencils.astnodes import PragmaBlock
class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
......@@ -39,6 +47,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
# Initialize state
self.asm_block = None
self._filename = filename
self._keep_intermediates = False
self.kernel_ast = ast
self.temporary_dir = TemporaryDirectory()
......@@ -94,7 +103,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
for field in fields_accessed:
layout = get_layout_tuple(field)
permuted_shape = list(field.shape[i] for i in layout)
self.set_variable(field.name, str(field.dtype), tuple(permuted_shape))
self.set_variable(field.name, (str(field.dtype),), tuple(permuted_shape))
# Scalars may be safely ignored
# for param in ast.get_parameters():
......@@ -127,14 +136,211 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
print("----------------------------- FLOPS -------------------------------")
pprint(self._flops)
def as_code(self, type_='iaca', openmp=False):
def get_kernel_header(self, name='kernel'):
"""
Generate and store kernel.h
:return: tuple of filename of header and file pointer of lockfile
"""
file_name = 'kernel.h'
file_path = self.get_intermediate_location(
file_name, machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
# needs update
code = generate_c(self.kernel_ast, signature_only=True)
with open(file_path, 'w') as f:
f.write(code)
self.release_exclusive_lock(lock_fp) # degrade to shared lock
return file_name, lock_fp
def get_kernel_code(self, openmp=False, name='kernel'):
"""
Generate and return compilable source code with kernel function from AST.
:param openmp: include openmp paragmas (or strip them)
:param name: name of kernel function
"""
assert not openmp, "openmp is currently not support by pystencils"
filename = 'kernel'
if openmp:
filename += '-omp'
filename += '.c'
file_path = self.get_intermediate_location(
filename, machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
# needs update
kernel_template =Template(textwrap.dedent("""
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#define RESTRICT __restrict__
#define FUNC_PREFIX
{{ includes }}
{{kernel_code}}
"""))
header_list = get_headers(self.kernel_ast)
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
ast = deepcopy(self.kernel_ast)
# Strip "#pragma omp parallel" from within kernel, because main function takes care of
# that
if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
ast.body.args[0].pragma_line = ''
code = kernel_template.render(
kernel_code=generate_c(self.kernel_ast, dialect='c'),
includes=includes)
# Store to file
with open(file_path, 'w') as f:
f.write(code)
print(code)
self.release_exclusive_lock(lock_fp) # degrade to shared lock
return file_path, lock_fp
CODE_TEMPLATE = textwrap.dedent("""
#include <likwid.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include "kerncraft.h"
#include "kernel.h"
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
extern int var_false;
int main(int argc, char **argv) {
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
{%- endfor %}
// Declaring arrays
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
// TODO initialize in parallel context in same order as they are touched
for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
{%- endfor %}
likwid_markerInit();
#pragma omp parallel
{
likwid_markerRegisterRegion("loop");
#pragma omp barrier
// Initializing arrays in same order as touched in kernel loop nest
//INIT_ARRAYS;
// Dummy call
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
likwid_markerStartRegion("loop");
}
for(; repeat > 0; --repeat) {
{{kernelName}}({{call_argument_list}});
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
}
}
likwid_markerStopRegion("loop");
}
likwid_markerClose();
return 0;
}
""")
def get_main_code(self, kernel_function_name='kernel'):
"""
Generate and return compilable source code.
Generate and return compilable source code from AST.
:param type: can be iaca or likwid.
:param openmp: if true, openmp code will be generated
:return: tuple of filename and shared lock file pointer
"""
return generate_benchmark(self.kernel_ast, likwid=type_ == 'likwid', openmp=openmp)
# TODO produce nicer code, including help text and other "comfort features".
assert self.kernel_ast is not None, "AST does not exist, this could be due to running " \
"based on a kernel description rather than code."
file_path = self.get_intermediate_location('main.c', machine_and_compiler_dependent=False)
lock_mode, lock_fp = self.lock_intermediate(file_path)
if lock_mode == fcntl.LOCK_SH:
# use cache
with open(file_path) as f:
code = f.read()
else: # lock_mode == fcntl.LOCK_EX
# needs update
accessed_fields = {f.name: f for f in ast.fields_accessed}
constants = []
fields = []
call_parameters = []
for p in ast.get_parameters():
if not p.is_field_parameter:
constants.append((p.symbol.name, str(p.symbol.dtype)))
call_parameters.append(p.symbol.name)
else:
assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
field = accessed_fields[p.field_name]
dtype = str(get_base_type(p.symbol.dtype))
fields.append((p.field_name, dtype, prod(field.shape)))
call_parameters.append(p.field_name)
header_list = get_headers(ast)
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
# Generate code
code = benchmark_template.render(
kernelName=ast.function_name,
fields=fields,
constants=constants,
call_agument_list=','.join(call_parameters),
includes=includes)
# Store to file
with open(file_path, 'w') as f:
f.write(code)
self.release_exclusive_lock(lock_fp) # degrade to shared lock
return file_path, lock_fp
class KerncraftParameters(DotDict):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment