Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Julian Hammer
pystencils
Commits
9da89cd1
Commit
9da89cd1
authored
Nov 06, 2020
by
Julian Hammer
Browse files
Updated Kerncraft Coupling
parent
622aaa6c
Pipeline
#27789
failed with stage
in 12 minutes and 50 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
pystencils/pystencils/kerncraft_coupling/kerncraft_interface.py
View file @
9da89cd1
from
tempfile
import
TemporaryDirectory
import
fcntl
import
textwrap
from
copy
import
deepcopy
import
warnings
import
sympy
as
sp
from
collections
import
defaultdict
...
...
@@ -6,14 +10,18 @@ import kerncraft
import
kerncraft.kernel
from
typing
import
Optional
from
kerncraft.machinemodel
import
MachineModel
from
jinja2
import
Template
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
from
pystencils.astnodes
import
LoopOverCoordinate
,
SympyAssignment
,
ResolvedFieldAccess
,
KernelFunction
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.astnodes
import
\
LoopOverCoordinate
,
SympyAssignment
,
ResolvedFieldAccess
,
KernelFunction
from
pystencils.field
import
get_layout_from_strides
from
pystencils.sympyextensions
import
count_operations_in_ast
from
pystencils.transformations
import
filtered_tree_iteration
from
pystencils.utils
import
DotDict
import
warnings
from
pystencils.astnodes
import
PragmaBlock
class
PyStencilsKerncraftKernel
(
kerncraft
.
kernel
.
KernelCode
):
...
...
@@ -39,6 +47,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
# Initialize state
self
.
asm_block
=
None
self
.
_filename
=
filename
self
.
_keep_intermediates
=
False
self
.
kernel_ast
=
ast
self
.
temporary_dir
=
TemporaryDirectory
()
...
...
@@ -94,7 +103,7 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
for
field
in
fields_accessed
:
layout
=
get_layout_tuple
(
field
)
permuted_shape
=
list
(
field
.
shape
[
i
]
for
i
in
layout
)
self
.
set_variable
(
field
.
name
,
str
(
field
.
dtype
),
tuple
(
permuted_shape
))
self
.
set_variable
(
field
.
name
,
(
str
(
field
.
dtype
),
),
tuple
(
permuted_shape
))
# Scalars may be safely ignored
# for param in ast.get_parameters():
...
...
@@ -127,14 +136,211 @@ class PyStencilsKerncraftKernel(kerncraft.kernel.KernelCode):
print
(
"----------------------------- FLOPS -------------------------------"
)
pprint
(
self
.
_flops
)
def
as_code
(
self
,
type_
=
'iaca'
,
openmp
=
False
):
def
get_kernel_header
(
self
,
name
=
'kernel'
):
"""
Generate and store kernel.h
:return: tuple of filename of header and file pointer of lockfile
"""
file_name
=
'kernel.h'
file_path
=
self
.
get_intermediate_location
(
file_name
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
# needs update
code
=
generate_c
(
self
.
kernel_ast
,
signature_only
=
True
)
with
open
(
file_path
,
'w'
)
as
f
:
f
.
write
(
code
)
self
.
release_exclusive_lock
(
lock_fp
)
# degrade to shared lock
return
file_name
,
lock_fp
def
get_kernel_code
(
self
,
openmp
=
False
,
name
=
'kernel'
):
"""
Generate and return compilable source code with kernel function from AST.
:param openmp: include openmp paragmas (or strip them)
:param name: name of kernel function
"""
assert
not
openmp
,
"openmp is currently not support by pystencils"
filename
=
'kernel'
if
openmp
:
filename
+=
'-omp'
filename
+=
'.c'
file_path
=
self
.
get_intermediate_location
(
filename
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
# needs update
kernel_template
=
Template
(
textwrap
.
dedent
(
"""
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#define RESTRICT __restrict__
#define FUNC_PREFIX
{{ includes }}
{{kernel_code}}
"""
))
header_list
=
get_headers
(
self
.
kernel_ast
)
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
header_list
])
ast
=
deepcopy
(
self
.
kernel_ast
)
# Strip "#pragma omp parallel" from within kernel, because main function takes care of
# that
if
len
(
ast
.
body
.
args
)
>
0
and
isinstance
(
ast
.
body
.
args
[
0
],
PragmaBlock
):
ast
.
body
.
args
[
0
].
pragma_line
=
''
code
=
kernel_template
.
render
(
kernel_code
=
generate_c
(
self
.
kernel_ast
,
dialect
=
'c'
),
includes
=
includes
)
# Store to file
with
open
(
file_path
,
'w'
)
as
f
:
f
.
write
(
code
)
print
(
code
)
self
.
release_exclusive_lock
(
lock_fp
)
# degrade to shared lock
return
file_path
,
lock_fp
CODE_TEMPLATE
=
textwrap
.
dedent
(
"""
#include <likwid.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include "kerncraft.h"
#include "kernel.h"
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
extern int var_false;
int main(int argc, char **argv) {
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
{%- endfor %}
// Declaring arrays
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
// TODO initialize in parallel context in same order as they are touched
for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
{%- endfor %}
likwid_markerInit();
#pragma omp parallel
{
likwid_markerRegisterRegion("loop");
#pragma omp barrier
// Initializing arrays in same order as touched in kernel loop nest
//INIT_ARRAYS;
// Dummy call
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
likwid_markerStartRegion("loop");
}
for(; repeat > 0; --repeat) {
{{kernelName}}({{call_argument_list}});
{%- for field_name, dataType, size in fields %}
if(var_false) dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy(&{{constantName}});
{%- endfor %}
}
}
likwid_markerStopRegion("loop");
}
likwid_markerClose();
return 0;
}
"""
)
def
get_main_code
(
self
,
kernel_function_name
=
'kernel'
):
"""
Generate and return compilable source code.
Generate and return compilable source code
from AST
.
:param type: can be iaca or likwid.
:param openmp: if true, openmp code will be generated
:return: tuple of filename and shared lock file pointer
"""
return
generate_benchmark
(
self
.
kernel_ast
,
likwid
=
type_
==
'likwid'
,
openmp
=
openmp
)
# TODO produce nicer code, including help text and other "comfort features".
assert
self
.
kernel_ast
is
not
None
,
"AST does not exist, this could be due to running "
\
"based on a kernel description rather than code."
file_path
=
self
.
get_intermediate_location
(
'main.c'
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
# needs update
accessed_fields
=
{
f
.
name
:
f
for
f
in
ast
.
fields_accessed
}
constants
=
[]
fields
=
[]
call_parameters
=
[]
for
p
in
ast
.
get_parameters
():
if
not
p
.
is_field_parameter
:
constants
.
append
((
p
.
symbol
.
name
,
str
(
p
.
symbol
.
dtype
)))
call_parameters
.
append
(
p
.
symbol
.
name
)
else
:
assert
p
.
is_field_pointer
,
"Benchmark implemented only for kernels with fixed loop size"
field
=
accessed_fields
[
p
.
field_name
]
dtype
=
str
(
get_base_type
(
p
.
symbol
.
dtype
))
fields
.
append
((
p
.
field_name
,
dtype
,
prod
(
field
.
shape
)))
call_parameters
.
append
(
p
.
field_name
)
header_list
=
get_headers
(
ast
)
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
header_list
])
# Generate code
code
=
benchmark_template
.
render
(
kernelName
=
ast
.
function_name
,
fields
=
fields
,
constants
=
constants
,
call_agument_list
=
','
.
join
(
call_parameters
),
includes
=
includes
)
# Store to file
with
open
(
file_path
,
'w'
)
as
f
:
f
.
write
(
code
)
self
.
release_exclusive_lock
(
lock_fp
)
# degrade to shared lock
return
file_path
,
lock_fp
class
KerncraftParameters
(
DotDict
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment