Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sebastian Bindgen
pystencils
Commits
85ed4a03
Commit
85ed4a03
authored
Jul 12, 2020
by
Markus Holzer
Browse files
Minor fixes and benchmark test case
parent
4759ffe3
Changes
4
Hide whitespace changes
Inline
Side-by-side
pystencils/kerncraft_coupling/generate_benchmark.py
View file @
85ed4a03
import
os
import
subprocess
import
warnings
import
tempfile
from
jinja2
import
Environment
,
PackageLoader
,
StrictUndefined
...
...
@@ -64,13 +66,14 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
return
env
.
get_template
(
'benchmark.c'
).
render
(
**
jinja_context
)
def
run_c_benchmark
(
ast
,
inner_iterations
,
outer_iterations
=
3
):
def
run_c_benchmark
(
ast
,
inner_iterations
,
outer_iterations
=
3
,
path
=
None
):
"""Runs the given kernel with outer loop in C
Args:
ast:
ast:
pystencils ast which is used to compile the benchmark file
inner_iterations: timings are recorded around this many iterations
outer_iterations: number of timings recorded
path: path where the benchmark file is stored. If None a tmp folder is created
Returns:
list of times per iterations for each outer iteration
...
...
@@ -78,7 +81,11 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
import
kerncraft
benchmark_code
=
generate_benchmark
(
ast
,
timing
=
True
)
with
open
(
'bench.c'
,
'w'
)
as
f
:
if
path
is
None
:
path
=
tempfile
.
mkdtemp
()
with
open
(
os
.
path
.
join
(
path
,
'bench.c'
),
'w'
)
as
f
:
f
.
write
(
benchmark_code
)
kerncraft_path
=
os
.
path
.
dirname
(
kerncraft
.
__file__
)
...
...
@@ -91,13 +98,20 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
compile_cmd
+=
[
*
extra_flags
,
os
.
path
.
join
(
kerncraft_path
,
'headers'
,
'timing.c'
),
os
.
path
.
join
(
kerncraft_path
,
'headers'
,
'dummy.c'
),
'bench.c'
,
'-o'
,
'bench'
,
os
.
path
.
join
(
path
,
'bench.c'
)
,
'-o'
,
os
.
path
.
join
(
path
,
'bench'
)
,
]
run_compile_step
(
compile_cmd
)
time_pre_estimation_per_iteration
=
float
(
subprocess
.
check_output
([
os
.
path
.
join
(
'./'
,
path
,
'bench'
),
str
(
10
)]))
benchmark_time_limit
=
20
if
benchmark_time_limit
/
time_pre_estimation_per_iteration
<
inner_iterations
:
warn
=
(
f
"A benchmark run with
{
inner_iterations
}
inner_iterations will probably take longer than "
f
"
{
benchmark_time_limit
}
seconds for this kernel"
)
warnings
.
warn
(
warn
)
results
=
[]
for
_
in
range
(
outer_iterations
):
benchmark_time
=
float
(
subprocess
.
check_output
([
'./
bench'
,
str
(
inner_iterations
)]))
benchmark_time
=
float
(
subprocess
.
check_output
([
os
.
path
.
join
(
'./'
,
path
,
'
bench'
)
,
str
(
inner_iterations
)]))
results
.
append
(
benchmark_time
)
return
results
pystencils/kerncraft_coupling/kerncraft_interface.py
View file @
85ed4a03
...
...
@@ -6,19 +6,17 @@ from typing import Optional
from
jinja2
import
Environment
,
PackageLoader
,
StrictUndefined
import
kerncraft
import
sympy
as
sp
from
kerncraft.kerncraft
import
KernelCode
from
kerncraft.machinemodel
import
MachineModel
from
pystencils.astnodes
import
(
KernelFunction
,
LoopOverCoordinate
,
ResolvedFieldAccess
,
SympyAssignment
)
from
pystencils.astnodes
import
(
KernelFunction
,
LoopOverCoordinate
,
ResolvedFieldAccess
,
SympyAssignment
)
from
pystencils.field
import
get_layout_from_strides
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
from
pystencils.sympyextensions
import
count_operations_in_ast
from
pystencils.transformations
import
filtered_tree_iteration
from
pystencils.utils
import
DotDict
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.cpu.kernelcreation
import
add_openmp
class
PyStencilsKerncraftKernel
(
KernelCode
):
...
...
@@ -38,8 +36,10 @@ class PyStencilsKerncraftKernel(KernelCode):
assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index
coordinates is not known. In this case either a structures of array (SoA) or
array of structures (AoS) layout is assumed
debug_print: print debug information
filename: used for caching
"""
kerncraft
.
kernel
.
Kernel
.
__init__
(
self
,
machine
)
super
(
KernelCode
,
self
)
.
__init__
(
machine
=
machine
)
# Initialize state
self
.
asm_block
=
None
...
...
@@ -138,11 +138,7 @@ class PyStencilsKerncraftKernel(KernelCode):
file_path
=
self
.
get_intermediate_location
(
file_name
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
if
lock_mode
==
fcntl
.
LOCK_EX
:
function_signature
=
generate_c
(
self
.
kernel_ast
,
dialect
=
'c'
,
signature_only
=
True
)
jinja_context
=
{
...
...
@@ -163,9 +159,8 @@ class PyStencilsKerncraftKernel(KernelCode):
Generate and return compilable source code.
Args:
type_: can be iaca or likwid.
openmp: if true, openmp code will be generated
as_filename: writes a file with the name as_file
name
name: kernel
name
"""
filename
=
'pystencils_kernl'
if
openmp
:
...
...
@@ -174,14 +169,13 @@ class PyStencilsKerncraftKernel(KernelCode):
file_path
=
self
.
get_intermediate_location
(
filename
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_SH
:
# use cache
with
open
(
file_path
)
as
f
:
code
=
f
.
read
()
else
:
# lock_mode == fcntl.LOCK_EX
if
lock_mode
==
fcntl
.
LOCK_EX
:
header_list
=
get_headers
(
self
.
kernel_ast
)
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
header_list
])
if
openmp
:
add_openmp
(
self
.
kernel_ast
)
kernel_code
=
generate_c
(
self
.
kernel_ast
,
dialect
=
'c'
)
jinja_context
=
{
...
...
pystencils/kerncraft_coupling/templates/benchmark.c
View file @
85ed4a03
...
...
@@ -90,7 +90,7 @@ int main(int argc, char **argv)
{
%-
if
timing
%
}
timing
(
&
wcEndTime
,
&
cpuEndTime
);
if
(
warmup
==
0
)
printf
(
"%e
\
\
n"
,
(
wcEndTime
-
wcStartTime
)
/
atoi
(
argv
[
1
])
);
printf
(
"%e
\n
"
,
(
wcEndTime
-
wcStartTime
)
/
atoi
(
argv
[
1
])
);
{
%-
endif
%
}
}
...
...
pystencils_tests/test_kerncraft_coupling.py
View file @
85ed4a03
import
os
import
numpy
as
np
import
pytest
import
sympy
as
sp
import
kerncraft
from
kerncraft.kernel
import
KernelCode
from
kerncraft.machinemodel
import
MachineModel
from
kerncraft.models
import
ECM
,
ECMData
,
Benchmark
from
pystencils
import
Assignment
,
Field
from
pystencils.cpu
import
create_kernel
from
pystencils.kerncraft_coupling
import
KerncraftParameters
,
PyStencilsKerncraftKernel
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
,
run_c_benchmark
from
pystencils.timeloop
import
TimeLoop
SCRIPT_FOLDER
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
INPUT_FOLDER
=
os
.
path
.
join
(
SCRIPT_FOLDER
,
"kerncraft_inputs"
)
...
...
@@ -45,28 +46,28 @@ def analysis(kernel, model='ecmdata'):
machine_file_path
=
os
.
path
.
join
(
INPUT_FOLDER
,
"Example_SandyBridgeEP_E5-2680.yml"
)
machine
=
MachineModel
(
path_to_yaml
=
machine_file_path
)
if
model
==
'ecmdata'
:
model
=
kerncraft
.
models
.
ECMData
(
kernel
,
machine
,
KerncraftParameters
())
model
=
ECMData
(
kernel
,
machine
,
KerncraftParameters
())
elif
model
==
'ecm'
:
model
=
kerncraft
.
models
.
ECM
(
kernel
,
machine
,
KerncraftParameters
())
model
=
ECM
(
kernel
,
machine
,
KerncraftParameters
())
# model.analyze()
# model.plot()
elif
model
==
'benchmark'
:
model
=
kerncraft
.
models
.
Benchmark
(
kernel
,
machine
,
KerncraftParameters
())
model
=
Benchmark
(
kernel
,
machine
,
KerncraftParameters
())
else
:
model
=
kerncraft
.
models
.
ECM
(
kernel
,
machine
,
KerncraftParameters
())
model
=
ECM
(
kernel
,
machine
,
KerncraftParameters
())
model
.
analyze
()
return
model
@
pytest
.
mark
.
kerncraft
def
test_3d_7pt_
OSACA
():
# Make sure you use the intel compiler
def
test_3d_7pt_
osaca
():
size
=
[
20
,
200
,
200
]
kernel_file_path
=
os
.
path
.
join
(
INPUT_FOLDER
,
"3d-7pt.c"
)
machine_file_path
=
os
.
path
.
join
(
INPUT_FOLDER
,
"Example_SandyBridgeEP_E5-2680.yml"
)
machine
=
MachineModel
(
path_to_yaml
=
machine_file_path
)
machine
_model
=
MachineModel
(
path_to_yaml
=
machine_file_path
)
with
open
(
kernel_file_path
)
as
kernel_file
:
reference_kernel
=
KernelCode
(
kernel_file
.
read
(),
machine
=
machine
,
filename
=
kernel_file_path
)
reference_kernel
=
KernelCode
(
kernel_file
.
read
(),
machine
=
machine
_model
,
filename
=
kernel_file_path
)
reference_kernel
.
set_constant
(
'M'
,
size
[
0
])
reference_kernel
.
set_constant
(
'N'
,
size
[
1
])
assert
size
[
1
]
==
size
[
2
]
...
...
@@ -80,7 +81,7 @@ def test_3d_7pt_OSACA():
update_rule
=
Assignment
(
b
[
0
,
0
,
0
],
s
*
rhs
)
ast
=
create_kernel
([
update_rule
])
k
=
PyStencilsKerncraftKernel
(
ast
,
machine
)
k
=
PyStencilsKerncraftKernel
(
ast
,
machine
=
machine_model
)
analysis
(
k
,
model
=
'ecm'
)
assert
reference_kernel
.
_flops
==
k
.
_flops
# assert reference.results['cl throughput'] == analysis.results['cl throughput']
...
...
@@ -132,3 +133,29 @@ def test_3d_7pt():
for
e1
,
e2
in
zip
(
reference
.
results
[
'cycles'
],
result
.
results
[
'cycles'
]):
assert
e1
==
e2
@
pytest
.
mark
.
kerncraft
def
test_benchmark
():
size
=
[
30
,
50
,
50
]
arr
=
np
.
zeros
(
size
)
a
=
Field
.
create_from_numpy_array
(
'a'
,
arr
,
index_dimensions
=
0
)
b
=
Field
.
create_from_numpy_array
(
'b'
,
arr
,
index_dimensions
=
0
)
s
=
sp
.
Symbol
(
"s"
)
rhs
=
a
[
0
,
-
1
,
0
]
+
a
[
0
,
1
,
0
]
+
a
[
-
1
,
0
,
0
]
+
a
[
1
,
0
,
0
]
+
a
[
0
,
0
,
-
1
]
+
a
[
0
,
0
,
1
]
update_rule
=
Assignment
(
b
[
0
,
0
,
0
],
s
*
rhs
)
ast
=
create_kernel
([
update_rule
])
c_benchmark_run
=
run_c_benchmark
(
ast
,
inner_iterations
=
1000
,
outer_iterations
=
1
)
kernel
=
ast
.
compile
()
a
=
np
.
full
(
size
,
fill_value
=
0.23
)
b
=
np
.
full
(
size
,
fill_value
=
0.23
)
timeloop
=
TimeLoop
(
steps
=
1
)
timeloop
.
add_call
(
kernel
,
{
'a'
:
a
,
'b'
:
b
,
's'
:
0.23
})
timeloop_time
=
timeloop
.
benchmark
(
number_of_time_steps_for_estimation
=
1
)
np
.
testing
.
assert_almost_equal
(
c_benchmark_run
,
timeloop_time
,
decimal
=
5
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment