Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
pycodegen
pystencils
Commits
2d758462
Commit
2d758462
authored
Jul 14, 2020
by
Jan Hönig
Browse files
Merge branch 'Enable_OSACA_usage' into 'master'
Enable osaca usage See merge request
pycodegen/pystencils!165
parents
cfd770cf
0391c91d
Pipeline
#25747
passed with stages
in 8 minutes and 13 seconds
Changes
8
Pipelines
2
Expand all
Hide whitespace changes
Inline
Side-by-side
pystencils/kerncraft_coupling/generate_benchmark.py
View file @
2d758462
import
os
import
subprocess
import
warnings
import
tempfile
from
pathlib
import
Path
from
jinja2
import
Template
from
jinja2
import
Environment
,
PackageLoader
,
StrictUndefined
from
pystencils.astnodes
import
PragmaBlock
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
...
...
@@ -10,116 +12,6 @@ from pystencils.data_types import get_base_type
from
pystencils.include
import
get_pystencils_include_path
from
pystencils.sympyextensions
import
prod
benchmark_template
=
Template
(
"""
#include "kerncraft.h"
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
{{ includes }}
{%- if likwid %}
#include <likwid.h>
{%- endif %}
#define RESTRICT __restrict__
#define FUNC_PREFIX
void dummy(void *);
void timing(double* wcTime, double* cpuTime);
extern int var_false;
{{kernel_code}}
int main(int argc, char **argv)
{
{%- if likwid %}
likwid_markerInit();
{%- endif %}
{%- for field_name, dataType, size in fields %}
// Initialization {{field_name}}
double * {{field_name}} = (double *) aligned_malloc(sizeof({{dataType}}) * {{size}}, 64);
for (unsigned long long i = 0; i < {{size}}; ++i)
{{field_name}}[i] = 0.23;
if(var_false)
dummy({{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
// Constant {{constantName}}
{{dataType}} {{constantName}};
{{constantName}} = 0.23;
if(var_false)
dummy(& {{constantName}});
{%- endfor %}
{%- if likwid and openmp %}
#pragma omp parallel
{
likwid_markerRegisterRegion("loop");
#pragma omp barrier
{%- elif likwid %}
likwid_markerRegisterRegion("loop");
{%- endif %}
for(int warmup = 1; warmup >= 0; --warmup) {
int repeat = 2;
if(warmup == 0) {
repeat = atoi(argv[1]);
{%- if likwid %}
likwid_markerStartRegion("loop");
{%- endif %}
}
{%- if timing %}
double wcStartTime, cpuStartTime, wcEndTime, cpuEndTime;
timing(&wcStartTime, &cpuStartTime);
{%- endif %}
for (; repeat > 0; --repeat)
{
{{kernelName}}({{call_argument_list}});
// Dummy calls
{%- for field_name, dataType, size in fields %}
if(var_false) dummy((void*){{field_name}});
{%- endfor %}
{%- for constantName, dataType in constants %}
if(var_false) dummy((void*)&{{constantName}});
{%- endfor %}
}
{%- if timing %}
timing(&wcEndTime, &cpuEndTime);
if( warmup == 0)
printf("%e
\\
n", (wcEndTime - wcStartTime) / atoi(argv[1]) );
{%- endif %}
}
{%- if likwid %}
likwid_markerStopRegion("loop");
{%- if openmp %}
}
{%- endif %}
{%- endif %}
{%- if likwid %}
likwid_markerClose();
{%- endif %}
}
"""
)
def
generate_benchmark
(
ast
,
likwid
=
False
,
openmp
=
False
,
timing
=
False
):
"""Return C code of a benchmark program for the given kernel.
...
...
@@ -157,7 +49,7 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
if
len
(
ast
.
body
.
args
)
>
0
and
isinstance
(
ast
.
body
.
args
[
0
],
PragmaBlock
):
ast
.
body
.
args
[
0
].
pragma_line
=
''
args
=
{
jinja_context
=
{
'likwid'
:
likwid
,
'openmp'
:
openmp
,
'kernel_code'
:
generate_c
(
ast
,
dialect
=
'c'
),
...
...
@@ -168,16 +60,20 @@ def generate_benchmark(ast, likwid=False, openmp=False, timing=False):
'includes'
:
includes
,
'timing'
:
timing
,
}
return
benchmark_template
.
render
(
**
args
)
env
=
Environment
(
loader
=
PackageLoader
(
'pystencils.kerncraft_coupling'
),
undefined
=
StrictUndefined
)
return
env
.
get_template
(
'benchmark.c'
).
render
(
**
jinja_context
)
def
run_c_benchmark
(
ast
,
inner_iterations
,
outer_iterations
=
3
):
def
run_c_benchmark
(
ast
,
inner_iterations
,
outer_iterations
=
3
,
path
=
None
):
"""Runs the given kernel with outer loop in C
Args:
ast:
ast:
pystencils ast which is used to compile the benchmark file
inner_iterations: timings are recorded around this many iterations
outer_iterations: number of timings recorded
path: path where the benchmark file is stored. If None a tmp folder is created
Returns:
list of times per iterations for each outer iteration
...
...
@@ -185,26 +81,40 @@ def run_c_benchmark(ast, inner_iterations, outer_iterations=3):
import
kerncraft
benchmark_code
=
generate_benchmark
(
ast
,
timing
=
True
)
with
open
(
'bench.c'
,
'w'
)
as
f
:
if
path
is
None
:
path
=
tempfile
.
mkdtemp
()
if
isinstance
(
path
,
str
):
path
=
Path
(
path
)
with
open
(
path
/
'bench.c'
,
'w'
)
as
f
:
f
.
write
(
benchmark_code
)
kerncraft_path
=
os
.
path
.
dirname
(
kerncraft
.
__file__
)
kerncraft_path
=
Path
(
kerncraft
.
__file__
)
.
parent
extra_flags
=
[
'-I'
+
get_pystencils_include_path
(),
'-I'
+
os
.
path
.
join
(
kerncraft_path
,
'headers'
)]
'-I'
+
str
(
kerncraft_path
/
'headers'
)]
compiler_config
=
get_compiler_config
()
compile_cmd
=
[
compiler_config
[
'command'
]]
+
compiler_config
[
'flags'
].
split
()
compile_cmd
+=
[
*
extra_flags
,
os
.
path
.
join
(
kerncraft_path
,
'headers'
,
'timing.c'
)
,
os
.
path
.
join
(
kerncraft_path
,
'headers'
,
'dummy.c'
)
,
'bench.c'
,
'-o'
,
'bench'
,
kerncraft_path
/
'headers'
/
'timing.c'
,
kerncraft_path
/
'headers'
/
'dummy.c'
,
path
/
'bench.c'
,
'-o'
,
path
/
'bench'
,
]
run_compile_step
(
compile_cmd
)
time_pre_estimation_per_iteration
=
float
(
subprocess
.
check_output
([
'./'
/
path
/
'bench'
,
str
(
10
)]))
benchmark_time_limit
=
20
if
benchmark_time_limit
/
time_pre_estimation_per_iteration
<
inner_iterations
:
warn
=
(
f
"A benchmark run with
{
inner_iterations
}
inner_iterations will probably take longer than "
f
"
{
benchmark_time_limit
}
seconds for this kernel"
)
warnings
.
warn
(
warn
)
results
=
[]
for
_
in
range
(
outer_iterations
):
benchmark_time
=
float
(
subprocess
.
check_output
([
'./bench'
,
str
(
inner_iterations
)]))
benchmark_time
=
float
(
subprocess
.
check_output
([
'./
'
/
path
/
'
bench'
,
str
(
inner_iterations
)]))
results
.
append
(
benchmark_time
)
return
results
pystencils/kerncraft_coupling/kerncraft_interface.py
View file @
2d758462
import
warnings
import
fcntl
from
collections
import
defaultdict
from
tempfile
import
TemporaryDirectory
from
typing
import
Optional
import
kerncraft
from
jinja2
import
Environment
,
PackageLoader
,
StrictUndefined
import
sympy
as
sp
from
kerncraft.kerncraft
import
KernelCode
from
kerncraft.machinemodel
import
MachineModel
from
pystencils.astnodes
import
(
KernelFunction
,
LoopOverCoordinate
,
ResolvedFieldAccess
,
SympyAssignment
)
from
pystencils.astnodes
import
(
KernelFunction
,
LoopOverCoordinate
,
ResolvedFieldAccess
,
SympyAssignment
)
from
pystencils.field
import
get_layout_from_strides
from
pystencils.kerncraft_coupling.generate_benchmark
import
generate_benchmark
from
pystencils.sympyextensions
import
count_operations_in_ast
from
pystencils.transformations
import
filtered_tree_iteration
from
pystencils.utils
import
DotDict
from
pystencils.backends.cbackend
import
generate_c
,
get_headers
from
pystencils.cpu.kernelcreation
import
add_openmp
class
PyStencilsKerncraftKernel
(
KernelCode
):
...
...
@@ -34,8 +36,10 @@ class PyStencilsKerncraftKernel(KernelCode):
assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index
coordinates is not known. In this case either a structures of array (SoA) or
array of structures (AoS) layout is assumed
debug_print: print debug information
filename: used for caching
"""
kerncraft
.
kernel
.
Kernel
.
__init__
(
self
,
machine
)
super
(
KernelCode
,
self
)
.
__init__
(
machine
=
machine
)
# Initialize state
self
.
asm_block
=
None
...
...
@@ -96,7 +100,7 @@ class PyStencilsKerncraftKernel(KernelCode):
for
field
in
fields_accessed
:
layout
=
get_layout_tuple
(
field
)
permuted_shape
=
list
(
field
.
shape
[
i
]
for
i
in
layout
)
self
.
set_variable
(
field
.
name
,
str
(
field
.
dtype
),
tuple
(
permuted_shape
))
self
.
set_variable
(
field
.
name
,
tuple
([
str
(
field
.
dtype
)
])
,
tuple
(
permuted_shape
))
# Scalars may be safely ignored
# for param in ast.get_parameters():
...
...
@@ -129,24 +133,64 @@ class PyStencilsKerncraftKernel(KernelCode):
print
(
"----------------------------- FLOPS -------------------------------"
)
pprint
(
self
.
_flops
)
def
as_code
(
self
,
type_
=
'iaca'
,
openmp
=
False
,
as_filename
=
False
):
def
get_kernel_header
(
self
,
name
=
'pystencils_kernel'
):
file_name
=
"pystencils_kernel.h"
file_path
=
self
.
get_intermediate_location
(
file_name
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_EX
:
function_signature
=
generate_c
(
self
.
kernel_ast
,
dialect
=
'c'
,
signature_only
=
True
)
jinja_context
=
{
'function_signature'
:
function_signature
,
}
env
=
Environment
(
loader
=
PackageLoader
(
'pystencils.kerncraft_coupling'
),
undefined
=
StrictUndefined
)
file_header
=
env
.
get_template
(
'kernel.h'
).
render
(
**
jinja_context
)
with
open
(
file_path
,
'w'
)
as
f
:
f
.
write
(
file_header
)
fcntl
.
flock
(
lock_fp
,
fcntl
.
LOCK_SH
)
# degrade to shared lock
return
file_path
,
lock_fp
def
get_kernel_code
(
self
,
openmp
=
False
,
name
=
'pystencils_kernl'
):
"""
Generate and return compilable source code.
Args:
type_: can be iaca or likwid.
openmp: if true, openmp code will be generated
as_file
name
:
name: kernel
name
"""
code
=
generate_benchmark
(
self
.
kernel_ast
,
likwid
=
type_
==
'likwid'
,
openmp
=
openmp
)
if
as_filename
:
fp
,
already_available
=
self
.
_get_intermediate_file
(
f
'kernel_
{
type_
}
.c'
,
machine_and_compiler_dependent
=
False
)
if
not
already_available
:
fp
.
write
(
code
)
return
fp
.
name
else
:
return
code
filename
=
'pystencils_kernl'
if
openmp
:
filename
+=
'-omp'
filename
+=
'.c'
file_path
=
self
.
get_intermediate_location
(
filename
,
machine_and_compiler_dependent
=
False
)
lock_mode
,
lock_fp
=
self
.
lock_intermediate
(
file_path
)
if
lock_mode
==
fcntl
.
LOCK_EX
:
header_list
=
get_headers
(
self
.
kernel_ast
)
includes
=
"
\n
"
.
join
([
"#include %s"
%
(
include_file
,)
for
include_file
in
header_list
])
if
openmp
:
add_openmp
(
self
.
kernel_ast
)
kernel_code
=
generate_c
(
self
.
kernel_ast
,
dialect
=
'c'
)
jinja_context
=
{
'includes'
:
includes
,
'kernel_code'
:
kernel_code
,
}
env
=
Environment
(
loader
=
PackageLoader
(
'pystencils.kerncraft_coupling'
),
undefined
=
StrictUndefined
)
file_header
=
env
.
get_template
(
'kernel.c'
).
render
(
**
jinja_context
)
with
open
(
file_path
,
'w'
)
as
f
:
f
.
write
(
file_header
)
fcntl
.
flock
(
lock_fp
,
fcntl
.
LOCK_SH
)
# degrade to shared lock
return
file_path
,
lock_fp
class
KerncraftParameters
(
DotDict
):
...
...
@@ -161,6 +205,7 @@ class KerncraftParameters(DotDict):
self
[
'iterations'
]
=
10
self
[
'unit'
]
=
'cy/CL'
self
[
'ignore_warnings'
]
=
True
self
[
'incore_model'
]
=
'OSACA'
# ------------------------------------------- Helper functions ---------------------------------------------------------
...
...
pystencils/kerncraft_coupling/templates/benchmark.c
0 → 100644
View file @
2d758462
#include "kerncraft.h"
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
{{
includes
}}
{
%-
if
likwid
%
}
#include <likwid.h>
{
%-
endif
%
}
#define RESTRICT __restrict__
#define FUNC_PREFIX
void
dummy
(
void
*
);
void
timing
(
double
*
wcTime
,
double
*
cpuTime
);
extern
int
var_false
;
{{
kernel_code
}}
int
main
(
int
argc
,
char
**
argv
)
{
{
%-
if
likwid
%
}
likwid_markerInit
();
{
%-
endif
%
}
{
%-
for
field_name
,
dataType
,
size
in
fields
%
}
// Initialization {{field_name}}
double
*
{{
field_name
}}
=
(
double
*
)
aligned_malloc
(
sizeof
({{
dataType
}})
*
{{
size
}},
64
);
for
(
unsigned
long
long
i
=
0
;
i
<
{{
size
}};
++
i
)
{{
field_name
}}[
i
]
=
0
.
23
;
if
(
var_false
)
dummy
({{
field_name
}});
{
%-
endfor
%
}
{
%-
for
constantName
,
dataType
in
constants
%
}
// Constant {{constantName}}
{{
dataType
}}
{{
constantName
}};
{{
constantName
}}
=
0
.
23
;
if
(
var_false
)
dummy
(
&
{{
constantName
}});
{
%-
endfor
%
}
{
%-
if
likwid
and
openmp
%
}
#pragma omp parallel
{
likwid_markerRegisterRegion
(
"loop"
);
#pragma omp barrier
{
%-
elif
likwid
%
}
likwid_markerRegisterRegion
(
"loop"
);
{
%-
endif
%
}
for
(
int
warmup
=
1
;
warmup
>=
0
;
--
warmup
)
{
int
repeat
=
2
;
if
(
warmup
==
0
)
{
repeat
=
atoi
(
argv
[
1
]);
{
%-
if
likwid
%
}
likwid_markerStartRegion
(
"loop"
);
{
%-
endif
%
}
}
{
%-
if
timing
%
}
double
wcStartTime
,
cpuStartTime
,
wcEndTime
,
cpuEndTime
;
timing
(
&
wcStartTime
,
&
cpuStartTime
);
{
%-
endif
%
}
for
(;
repeat
>
0
;
--
repeat
)
{
{{
kernelName
}}({{
call_argument_list
}});
// Dummy calls
{
%-
for
field_name
,
dataType
,
size
in
fields
%
}
if
(
var_false
)
dummy
((
void
*
){{
field_name
}});
{
%-
endfor
%
}
{
%-
for
constantName
,
dataType
in
constants
%
}
if
(
var_false
)
dummy
((
void
*
)
&
{{
constantName
}});
{
%-
endfor
%
}
}
{
%-
if
timing
%
}
timing
(
&
wcEndTime
,
&
cpuEndTime
);
if
(
warmup
==
0
)
printf
(
"%e
\n
"
,
(
wcEndTime
-
wcStartTime
)
/
atoi
(
argv
[
1
])
);
{
%-
endif
%
}
}
{
%-
if
likwid
%
}
likwid_markerStopRegion
(
"loop"
);
{
%-
if
openmp
%
}
}
{
%-
endif
%
}
{
%-
endif
%
}
{
%-
if
likwid
%
}
likwid_markerClose
();
{
%-
endif
%
}
}
pystencils/kerncraft_coupling/templates/kernel.c
0 → 100644
View file @
2d758462
#include "kerncraft.h"
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <stdio.h>
{{
includes
}}
#define RESTRICT __restrict__
#define FUNC_PREFIX
void
dummy
(
void
*
);
void
timing
(
double
*
wcTime
,
double
*
cpuTime
);
extern
int
var_false
;
{{
kernel_code
}}
\ No newline at end of file
pystencils/kerncraft_coupling/templates/kernel.h
0 → 100644
View file @
2d758462
#define FUNC_PREFIX
{{
function_signature
}}
\ No newline at end of file
pystencils_tests/kerncraft_inputs/Example_SandyBridgeEP_E5-2680.yml
0 → 100644
View file @
2d758462
This diff is collapsed.
Click to expand it.
pystencils_tests/kerncraft_inputs/default_machine_file.yaml
deleted
100644 → 0
View file @
cfd770cf
kerncraft version
:
0.7.3
clock
:
2.7 GHz
cores per socket
:
8
cores per NUMA domain
:
8
NUMA domains per socket
:
1
model type
:
Intel Core SandyBridge EP processor
model name
:
Intel(R) Xeon(R) CPU E5-2680 0 @ 2.70GHz
sockets
:
2
threads per core
:
2
cacheline size
:
64 B
compiler
:
!!omap
-
icc
:
-O3 -xAVX -fno-alias -qopenmp
-
clang
:
-O3 -march=corei7-avx -mtune=corei7-avx -D_POSIX_C_SOURCE=200112L -fopenmp
-
gcc
:
-O3 -march=corei7-avx -D_POSIX_C_SOURCE=200112L -fopenmp
micro-architecture
:
SNB
FLOPs per cycle
:
SP
:
{
total
:
16
,
ADD
:
8
,
MUL
:
8
}
DP
:
{
total
:
8
,
ADD
:
4
,
MUL
:
4
}
overlapping model
:
ports
:
[
"
0"
,
"
0DV"
,
"
1"
,
"
2"
,
"
3"
,
"
4"
,
"
5"
]
performance counter metric
:
Max(UOPS_DISPATCHED_PORT_PORT_0:PMC[0-3],
UOPS_DISPATCHED_PORT_PORT_1:PMC[0-3],
UOPS_DISPATCHED_PORT_PORT_4:PMC[0-3],
UOPS_DISPATCHED_PORT_PORT_5:PMC[0-3])
non-overlapping model
:
ports
:
[
"
2D"
,
"
3D"
]
performance counter metric
:
T_OL + T_L1L2 + T_L2L3 + T_L3MEM
write-allocate
:
True
memory hierarchy
:
-
level
:
L1
cache per group
:
{
'
sets'
:
64
,
'
ways'
:
8
,
'
cl_size'
:
64
,
# 32 kB
'
replacement_policy'
:
'
LRU'
,
'
write_allocate'
:
True
,
'
write_back'
:
True
,
'
load_from'
:
'
L2'
,
'
store_to'
:
'
L2'
}
cores per group
:
1
threads per group
:
2
groups
:
16
performance counter metrics
:
accesses
:
MEM_UOPS_RETIRED_LOADS:PMC[0-3]
misses
:
L1D_REPLACEMENT:PMC[0-3]
evicts
:
L1D_M_EVICT:PMC[0-3]
-
level
:
L2
cache per group
:
{
'
sets'
:
512
,
'
ways'
:
8
,
'
cl_size'
:
64
,
# 256 kB
'
replacement_policy'
:
'
LRU'
,
'
write_allocate'
:
True
,
'
write_back'
:
True
,
'
load_from'
:
'
L3'
,
'
store_to'
:
'
L3'
}
cores per group
:
1
threads per group
:
2
groups
:
16
non-overlap upstream throughput
:
[
32 B/cy
,
'
half-duplex'
]
performance counter metrics
:
accesses
:
L1D_REPLACEMENT:PMC[0-3]
misses
:
L2_LINES_IN_ALL:PMC[0-3]
evicts
:
L2_TRANS_L2_WB:PMC[0-3]
-
level
:
L3
cache per group
:
{
'
sets'
:
20480
,
'
ways'
:
16
,
'
cl_size'
:
64
,
# 20 MB
'
replacement_policy'
:
'
LRU'
,
'
write_allocate'
:
True
,
'
write_back'
:
True
}
cores per group
:
8
threads per group
:
16
groups
:
2
non-overlap upstream throughput
:
[
32 B/cy
,
'
half-duplex'
]
performance counter metrics
:
accesses
:
L2_LINES_IN_ALL:PMC[0-3]
misses
:
(CAS_COUNT_RD:MBOX0C[01] + CAS_COUNT_RD:MBOX1C[01] +
CAS_COUNT_RD:MBOX2C[01] + CAS_COUNT_RD:MBOX3C[01])
evicts
:
(CAS_COUNT_WR:MBOX0C[01] + CAS_COUNT_WR:MBOX1C[01] +
CAS_COUNT_WR:MBOX2C[01] + CAS_COUNT_WR:MBOX3C[01])
-
level
:
MEM
cores per group
:
8
non-overlap upstream throughput
:
[
'
full
socket
memory
bandwidth'
,
'
half-duplex'
]
size per group
:
null
threads per group
:
16
benchmarks
:
kernels
:
copy
:
FLOPs per iteration
:
0
read streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
read+write streams
:
{
bytes
:
0.00 B
,
streams
:
0
}
write streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
daxpy
:
FLOPs per iteration
:
2
read streams
:
{
bytes
:
16.00 B
,
streams
:
2
}
read+write streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
write streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
load
:
FLOPs per iteration
:
0
read streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
read+write streams
:
{
bytes
:
0.00 B
,
streams
:
0
}
write streams
:
{
bytes
:
0.00 B
,
streams
:
0
}
triad
:
FLOPs per iteration
:
2
read streams
:
{
bytes
:
24.00 B
,
streams
:
3
}
read+write streams
:
{
bytes
:
0.00 B
,
streams
:
0
}
write streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
update
:
FLOPs per iteration
:
0
read streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
read+write streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
write streams
:
{
bytes
:
8.00 B
,
streams
:
1
}
measurements
:
L1
:
1
:
cores
:
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
]
results
:
copy
:
[
81.98 GB/s
,
163.75 GB/s
,
245.62 GB/s
,
327.69 GB/s
,
409.41 GB/s
,
489.83
GB/s
,
571.67 GB/s
,
653.50 GB/s
]
daxpy
:
[
71.55 GB/s
,
143.01 GB/s
,
214.86 GB/s
,
286.26 GB/s
,
355.60 GB/s
,
426.71 GB/s
,
497.45 GB/s
,
568.97 GB/s
]
load
:
[
61.92 GB/s
,
122.79 GB/s
,
183.01 GB/s
,
244.30 GB/s
,
306.76 GB/s
,
368.46
GB/s
,
427.41 GB/s
,
490.88 GB/s
]
triad
:
[
81.61 GB/s
,
163.25 GB/s
,
244.92 GB/s
,
326.65 GB/s
,
406.69 GB/s
,