Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
pycodegen
pystencils
Commits
6562a56c
Commit
6562a56c
authored
Apr 19, 2021
by
Michael Kuron
Browse files
Support shorter SVE vectors via predicates
parent
30341d80
Pipeline
#31622
passed with stage
in 21 minutes and 13 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
pystencils/backends/arm_instruction_sets.py
View file @
6562a56c
...
...
@@ -65,12 +65,14 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result
=
dict
()
result
[
'bytes'
]
=
bitwidth
//
8
predicate
=
f
'
{
prefix
}
whilelt_b
{
bits
[
data_type
]
}
(0,
{
width
}
)'
int_predicate
=
f
'
{
prefix
}
whilelt_b
{
bits
[
"int"
]
}
(0,
{
intwidth
}
)'
for
intrinsic_id
,
function_shortcut
in
base_names
.
items
():
function_shortcut
=
function_shortcut
.
strip
()
name
=
function_shortcut
[:
function_shortcut
.
index
(
'['
)]
arg_string
=
get_argument_string
(
function_shortcut
,
first
=
f
'
{
prefix
}
ptrue_b
{
bits
[
data_type
]
}
()'
if
prefix
==
'sv'
else
''
)
arg_string
=
get_argument_string
(
function_shortcut
,
first
=
predicate
if
prefix
==
'sv'
else
''
)
if
prefix
==
'sv'
and
not
name
.
startswith
(
'ld'
)
and
not
name
.
startswith
(
'st'
)
and
not
name
.
startswith
(
cmp
):
undef
=
'_x'
else
:
...
...
@@ -86,20 +88,19 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result
[
'makeVecConstInt'
]
=
f
'svdup_s
{
bits
[
"int"
]
}
'
+
'({0})'
result
[
'makeVecIndex'
]
=
f
'svindex_s
{
bits
[
"int"
]
}
'
+
'({0}, {1})'
result
[
'+int'
]
=
f
"svadd_s
{
bits
[
'int'
]
}
_x(
svptrue_b
{
bits
[
'int'
]
}
()
, "
+
"{0}, {1})"
result
[
'+int'
]
=
f
"svadd_s
{
bits
[
'int'
]
}
_x(
{
int_predicate
}
, "
+
"{0}, {1})"
attr
=
f
' __attribute__((arm_sve_vector_bits(
{
bitwidth
}
)))'
result
[
data_type
]
=
f
'svfloat
{
bits
[
data_type
]
}
_t
{
attr
}
'
result
[
'int'
]
=
f
'svint
{
bits
[
"int"
]
}
_t
{
attr
}
'
result
[
'bool'
]
=
f
'svbool_t
{
attr
}
'
result
[
data_type
]
=
f
'svfloat
{
bits
[
data_type
]
}
_st'
result
[
'int'
]
=
f
'svint
{
bits
[
"int"
]
}
_st'
result
[
'bool'
]
=
'svbool_st'
result
[
'headers'
]
=
[
'<arm_sve.h>'
,
'"arm_neon_helpers.h"'
]
result
[
'&'
]
=
f
'svand_b_z(
svptrue_b
{
bits
[
data_type
]
}
()
,'
+
' {0}, {1})'
result
[
'|'
]
=
f
'svorr_b_z(
svptrue_b
{
bits
[
data_type
]
}
()
,'
+
' {0}, {1})'
result
[
'&'
]
=
f
'svand_b_z(
{
predicate
}
,'
+
' {0}, {1})'
result
[
'|'
]
=
f
'svorr_b_z(
{
predicate
}
,'
+
' {0}, {1})'
result
[
'blendv'
]
=
f
'svsel_f
{
bits
[
data_type
]
}
'
+
'({2}, {1}, {0})'
result
[
'any'
]
=
f
'svptest_any(
svptrue_b
{
bits
[
data_type
]
}
()
, {{0}})
> 0
'
result
[
'all'
]
=
f
'svcntp_b
{
bits
[
data_type
]
}
(
svptrue_b
{
bits
[
data_type
]
}
()
, {{0}}) ==
{
width
}
'
result
[
'any'
]
=
f
'svptest_any(
{
predicate
}
, {{0}})'
result
[
'all'
]
=
f
'svcntp_b
{
bits
[
data_type
]
}
(
{
predicate
}
, {{0}}) ==
{
width
}
'
result
[
'compile_flags'
]
=
[
f
'-msve-vector-bits=
{
bitwidth
}
'
]
else
:
...
...
pystencils/backends/simd_instruction_sets.py
View file @
6562a56c
import
os
import
math
import
platform
from
ctypes
import
CDLL
from
pystencils.backends.x86_instruction_sets
import
get_vector_instruction_set_x86
from
pystencils.backends.arm_instruction_sets
import
get_vector_instruction_set_arm
...
...
@@ -59,10 +60,17 @@ def get_supported_instruction_sets():
if
flags
.
issuperset
(
required_neon_flags
):
result
.
append
(
"neon"
)
if
flags
.
issuperset
(
required_sve_flags
):
length_file
=
'/proc/sys/abi/sve_default_vector_length'
if
os
.
path
.
exists
(
length_file
):
length
=
8
*
int
(
open
(
length_file
,
'r'
).
read
())
result
.
append
(
f
"sve
{
length
}
"
)
if
platform
.
system
()
==
'Linux'
:
libc
=
CDLL
(
'libc.so.6'
)
native_length
=
8
*
libc
.
prctl
(
51
,
0
,
0
,
0
,
0
)
# PR_SVE_GET_VL
if
native_length
<
0
:
raise
OSError
(
"SVE length query failed"
)
pwr2_length
=
int
(
2
**
math
.
floor
(
math
.
log2
(
native_length
)))
if
pwr2_length
%
256
==
0
:
result
.
append
(
f
"sve
{
pwr2_length
//
2
}
"
)
if
native_length
!=
pwr2_length
:
result
.
append
(
f
"sve
{
pwr2_length
}
"
)
result
.
append
(
f
"sve
{
native_length
}
"
)
else
:
result
.
append
(
"sve"
)
return
result
...
...
pystencils/include/arm_neon_helpers.h
View file @
6562a56c
#ifdef __ARM_NEON
#include <arm_neon.h>
#endif
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
#include <arm_sve.h>
typedef
svbool_t
svbool_st
__attribute__
((
arm_sve_vector_bits
(
__ARM_FEATURE_SVE_BITS
)));
typedef
svfloat32_t
svfloat32_st
__attribute__
((
arm_sve_vector_bits
(
__ARM_FEATURE_SVE_BITS
)));
typedef
svfloat64_t
svfloat64_st
__attribute__
((
arm_sve_vector_bits
(
__ARM_FEATURE_SVE_BITS
)));
typedef
svint32_t
svint32_st
__attribute__
((
arm_sve_vector_bits
(
__ARM_FEATURE_SVE_BITS
)));
#endif
#ifdef __ARM_NEON
inline
float32x4_t
makeVec_f32
(
float
a
,
float
b
,
float
c
,
float
d
)
{
alignas
(
16
)
float
data
[
4
]
=
{
a
,
b
,
c
,
d
};
...
...
@@ -17,6 +29,7 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
alignas
(
16
)
int
data
[
4
]
=
{
a
,
b
,
c
,
d
};
return
vld1q_s32
(
data
);
}
#endif
inline
void
cachelineZero
(
void
*
p
)
{
__asm__
volatile
(
"dc zva, %0"
::
"r"
(
p
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment