Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 0 additions and 2724 deletions
import os
import subprocess
from jinja2 import Template
from pystencils.astnodes import PragmaBlock
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.cpu.cpujit import get_compiler_config, run_compile_step
from pystencils.data_types import get_base_type
from pystencils.include import get_pystencils_include_path
from pystencils.sympyextensions import prod
benchmark_template = Template("""
#include "kerncraft.h"
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
{{ includes }}
{%- if likwid %}
#include <likwid.h>
{%- endif %}
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
void timing(double* wcTime, double* cpuTime);
extern int var_false;
{{kernel_code}}
int main(int argc, char **argv)
{
{%- if likwid %}
likwid_markerInit();
{%- endif %}
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
if(var_false)
dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
if(var_false)
dummy(& {{constantName}});
{%- endfor %}
{%- if likwid and openmp %}
#pragma omp parallel
{
likwid_markerRegisterRegion("loop");
#pragma omp barrier
{%- elif likwid %}
likwid_markerRegisterRegion("loop");
{%- endif %}
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
{%- if likwid %}
likwid_markerStartRegion("loop");
{%- endif %}
}
{%- if timing %}
double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
timing(&wcStartTime, &cpuStartTime);
{%- endif %}
for (; repeat > 0; --repeat)
{
{{kernelName}}({{call_argument_list}});
// Dummy calls
{%- for field_name, dataType, size in fields %}
if(var_false) dummy((void*){{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy((void*)&{{constantName}});
{%- endfor %}
}
{%- if timing %}
timing(&wcEndTime, &cpuEndTime);
if( warmup == 0)
printf("%e\\n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
{%- endif %}
}
{%- if likwid %}
likwid_markerStopRegion("loop");
{%- if openmp %}
}
{%- endif %}
{%- endif %}
{%- if likwid %}
likwid_markerClose();
{%- endif %}
}
""")
def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
"""Return C code of a benchmark program for the given kernel.
Args:
ast: the pystencils AST object as returned by create_kernel
likwid: if True likwid markers are added to the code
openmp: relevant only if likwid=True, to generated correct likwid initialization code
timing: add timing output to the code, prints time per iteration to stdout
Returns:
C code as string
"""
accessed_fields = {f.name: f for f in ast.fields_accessed}
constants = []
fields = []
call_parameters = []
for p in ast.get_parameters():
if not p.is_field_parameter:
constants.append((p.symbol.name, str(p.symbol.dtype)))
call_parameters.append(p.symbol.name)
else:
assert p.is_field_pointer, "Benchmark implemented only for kernels with fixed loop size"
field = accessed_fields[p.field_name]
dtype = str(get_base_type(p.symbol.dtype))
fields.append((p.field_name, dtype, prod(field.shape)))
call_parameters.append(p.field_name)
header_list = get_headers(ast)
includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list])
# Strip "#pragma omp parallel" from within kernel, because main function takes care of that
# when likwid and openmp are enabled
if likwid and openmp:
if len(ast.body.args) > 0 and isinstance(ast.body.args[0], PragmaBlock):
ast.body.args[0].pragma_line = ''
args = {
'likwid': likwid,
'openmp': openmp,
'kernel_code': generate_c(ast, dialect='c'),
'kernelName': ast.function_name,
'fields': fields,
'constants': constants,
'call_argument_list': ",".join(call_parameters),
'includes': includes,
'timing': timing,
}
return benchmark_template.render(**args)
def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
"""Runs the given kernel with outer loop in C
Args:
ast:
inner_iterations: timings are recorded around this many iterations
outer_iterations: number of timings recorded
Returns:
list of times per iterations for each outer iteration
"""
import kerncraft
benchmark_code = generate_benchmark(ast, timing=True)
with open('bench.c', 'w') as f:
f.write(benchmark_code)
kerncraft_path = os.path.dirname(kerncraft.__file__)
extra_flags = ['-I' + get_pystencils_include_path(),
'-I' + os.path.join(kerncraft_path, 'headers')]
compiler_config = get_compiler_config()
compile_cmd = [compiler_config['command']] + compiler_config['flags'].split()
compile_cmd += [*extra_flags,
os.path.join(kerncraft_path, 'headers', 'timing.c'),
os.path.join(kerncraft_path, 'headers', 'dummy.c'),
'bench.c',
'-o', 'bench',
]
run_compile_step(compile_cmd)
results = []
for _ in range(outer_iterations):
benchmark_time = float(subprocess.check_output(['./bench', str(inner_iterations)]))
results.append(benchmark_time)
return results
This diff is collapsed.
This diff is collapsed.
from .kernelcreation import create_kernel
from .llvmjit import make_python_function
__all__ = ['create_kernel', 'make_python_function']
import llvmlite.ir as ir
class Loop(object):
def __init__(self, builder, start_val, stop_val, step_val=1, loop_name='loop', phi_name="_phi"):
self.builder = builder
self.start_val = start_val
self.stop_val = stop_val
self.step_val = step_val
self.loop_name = loop_name
self.phi_name = phi_name
def __enter__(self):
self.loop_end, self.after, phi = self._for_loop(self.start_val, self.stop_val, self.step_val, self.loop_name,
self.phi_name)
return phi
def _for_loop(self, start_val, stop_val, step_val, loop_name, phi_name):
# TODO size of int??? unisgned???
integer = ir.IntType(64)
# Loop block
pre_loop_bb = self.builder.block
loop_bb = self.builder.append_basic_block(name='loop_' + loop_name)
self.builder.branch(loop_bb)
# Insert an explicit fall through from the current block to loop_bb
self.builder.position_at_start(loop_bb)
# Add phi
phi = self.builder.phi(integer, name=phi_name)
phi.add_incoming(start_val, pre_loop_bb)
loop_end_bb = self.builder.append_basic_block(name=loop_name + "_end_bb")
self.builder.position_at_start(loop_end_bb)
next_var = self.builder.add(phi, step_val, name=loop_name + '_next_it')
cond = self.builder.icmp_unsigned('<', next_var, stop_val, name=loop_name + "_cond")
after_bb = self.builder.append_basic_block(name=loop_name + "_after_bb")
self.builder.cbranch(cond, loop_bb, after_bb)
phi.add_incoming(next_var, loop_end_bb)
self.builder.position_at_end(loop_bb)
return loop_end_bb, after_bb, phi
def __exit__(self, exc_type, exc, exc_tb):
self.builder.branch(self.loop_end)
self.builder.position_at_end(self.after)
from pystencils.llvm.llvmjit import make_python_function
from pystencils.transformations import insert_casts
def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(),
iteration_slice=None, ghost_layers=None):
"""
Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
Loops are created according to the field accesses in the equations.
Args:
assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
Defining the update rules of the kernel
function_name: name of the generated function - only important if generated code is written out
type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
be of type 'double' except symbols which occur on the left hand side of equations where the
right hand side is a sympy Boolean which are assumed to be 'bool' .
split_groups: Specification on how to split up inner loop into multiple loops. For details see
transformation :func:`pystencils.transformation.split_inner_loop`
iteration_slice: if not None, iteration is done only over this slice of the field
ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
if None, the number of ghost layers is determined automatically and assumed to be equal for a
all dimensions
:return: :class:`pystencils.ast.KernelFunction` node
"""
from pystencils.cpu import create_kernel
code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers)
code.body = insert_casts(code.body)
code._compile_function = make_python_function
code._backend = 'llvm'
return code
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Disable gmpy backend until this bug is resolved if joblib serialize
# See https://github.com/sympy/sympy/pull/13530
import os
import warnings
os.environ['MPMATH_NOGMPY'] = '1'
try:
import mpmath.libmp
# In case the user has imported sympy first, then pystencils
if mpmath.libmp.BACKEND == 'gmpy':
warnings.warn("You are using the gmpy backend. You might encounter an error 'argument is not an mpz sympy'. "
"This is due to a known bug in sympy/gmpy library. "
"To prevent this, import pystencils first then sympy or set the environment variable "
"MPMATH_NOGMPY=1")
except ImportError:
pass
__all__ = []
This diff is collapsed.
import numpy as np
import sympy as sp
from pystencils import Assignment, Field, create_kernel
def meassure():
size = [30, 50, 3]
arr = np.zeros(size)
a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
s = sp.Symbol("s")
rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
updateRule = Assignment(b[0, 0], s * rhs)
print(updateRule)
ast = create_kernel([updateRule])
# benchmark = generate_benchmark(ast)
# main = benchmark[0]
# kernel = benchmark[1]
# with open('src/main.cpp', 'w') as file:
# file.write(main)
# with open('src/kernel.cpp', 'w') as file:
# file.write(kernel)
func = ast.compile({'omega': 2/3})
from pystencils.kerncraft_coupling.generate_benchmark import generate_benchmark
from pystencils.kerncraft_coupling import BenchmarkAnalysis
from pystencils.kerncraft_coupling.kerncraft_interface import PyStencilsKerncraftKernel, KerncraftParameters
from kerncraft.machinemodel import MachineModel
from kerncraft.models import ECMData
machineFilePath = "../pystencils_tests/kerncraft_inputs/default_machine_file.yaml"
machine = MachineModel(path_to_yaml=machineFilePath)
benchmark = BenchmarkAnalysis(ast, machine)
#TODO what do i want to do with benchmark?
kernel = PyStencilsKerncraftKernel(ast)
model = ECMData(kernel, machine, KerncraftParameters())
model.analyze()
model.report()
if __name__ == "__main__":
meassure()
This diff is collapsed.
#include "iacaMarks.h"
int main(int argc, char * argv[]){
int a = 0;
for(int i = 0; i < argc+100000; i++){
IACA_START
a += i;
}
IACA_END
return a;
}
double a[30][50][3];
double b[30][50][3];
double s;
for(int j=1; j<30-1; ++j)
for(int i=1; i<50-1; ++i)
b[j][i] = ( a[j][i-1] + a[j][i+1]
+ a[j-1][i] + a[j+1][i]) * s;
double a[M][N][N];
double b[M][N][N];
double s;
for(int k=1; k<M-1; ++k)
for(int j=1; j<N-1; ++j)
for(int i=1; i<N-1; ++i)
b[k][j][i] = ( a[k][j][i-1] + a[k][j][i+1]
+ a[k][j-1][i] + a[k][j+1][i]
+ a[k-1][j][i] + a[k+1][j][i]) * s;
This diff is collapsed.
This diff is collapsed.