Commit 6562a56c authored by Michael Kuron's avatar Michael Kuron
Browse files

Support shorter SVE vectors via predicates

parent 30341d80
Pipeline #31622 passed with stage
in 21 minutes and 13 seconds
......@@ -65,12 +65,14 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result = dict()
result['bytes'] = bitwidth // 8
predicate = f'{prefix}whilelt_b{bits[data_type]}(0, {width})'
int_predicate = f'{prefix}whilelt_b{bits["int"]}(0, {intwidth})'
for intrinsic_id, function_shortcut in base_names.items():
function_shortcut = function_shortcut.strip()
name = function_shortcut[:function_shortcut.index('[')]
arg_string = get_argument_string(function_shortcut, first=f'{prefix}ptrue_b{bits[data_type]}()'
if prefix == 'sv' else '')
arg_string = get_argument_string(function_shortcut, first=predicate if prefix == 'sv' else '')
if prefix == 'sv' and not name.startswith('ld') and not name.startswith('st') and not name.startswith(cmp):
undef = '_x'
else:
......@@ -86,20 +88,19 @@ def get_vector_instruction_set_arm(data_type='double', instruction_set='neon'):
result['makeVecConstInt'] = f'svdup_s{bits["int"]}' + '({0})'
result['makeVecIndex'] = f'svindex_s{bits["int"]}' + '({0}, {1})'
result['+int'] = f"svadd_s{bits['int']}_x(svptrue_b{bits['int']}(), " + "{0}, {1})"
result['+int'] = f"svadd_s{bits['int']}_x({int_predicate}, " + "{0}, {1})"
attr = f' __attribute__((arm_sve_vector_bits({bitwidth})))'
result[data_type] = f'svfloat{bits[data_type]}_t{attr}'
result['int'] = f'svint{bits["int"]}_t{attr}'
result['bool'] = f'svbool_t{attr}'
result[data_type] = f'svfloat{bits[data_type]}_st'
result['int'] = f'svint{bits["int"]}_st'
result['bool'] = 'svbool_st'
result['headers'] = ['<arm_sve.h>', '"arm_neon_helpers.h"']
result['&'] = f'svand_b_z(svptrue_b{bits[data_type]}(),' + ' {0}, {1})'
result['|'] = f'svorr_b_z(svptrue_b{bits[data_type]}(),' + ' {0}, {1})'
result['&'] = f'svand_b_z({predicate},' + ' {0}, {1})'
result['|'] = f'svorr_b_z({predicate},' + ' {0}, {1})'
result['blendv'] = f'svsel_f{bits[data_type]}' + '({2}, {1}, {0})'
result['any'] = f'svptest_any(svptrue_b{bits[data_type]}(), {{0}}) > 0'
result['all'] = f'svcntp_b{bits[data_type]}(svptrue_b{bits[data_type]}(), {{0}}) == {width}'
result['any'] = f'svptest_any({predicate}, {{0}})'
result['all'] = f'svcntp_b{bits[data_type]}({predicate}, {{0}}) == {width}'
result['compile_flags'] = [f'-msve-vector-bits={bitwidth}']
else:
......
import os
import math
import platform
from ctypes import CDLL
from pystencils.backends.x86_instruction_sets import get_vector_instruction_set_x86
from pystencils.backends.arm_instruction_sets import get_vector_instruction_set_arm
......@@ -59,10 +60,17 @@ def get_supported_instruction_sets():
if flags.issuperset(required_neon_flags):
result.append("neon")
if flags.issuperset(required_sve_flags):
length_file = '/proc/sys/abi/sve_default_vector_length'
if os.path.exists(length_file):
length = 8 * int(open(length_file, 'r').read())
result.append(f"sve{length}")
if platform.system() == 'Linux':
libc = CDLL('libc.so.6')
native_length = 8 * libc.prctl(51, 0, 0, 0, 0) # PR_SVE_GET_VL
if native_length < 0:
raise OSError("SVE length query failed")
pwr2_length = int(2**math.floor(math.log2(native_length)))
if pwr2_length % 256 == 0:
result.append(f"sve{pwr2_length//2}")
if native_length != pwr2_length:
result.append(f"sve{pwr2_length}")
result.append(f"sve{native_length}")
else:
result.append("sve")
return result
......
#ifdef __ARM_NEON
#include <arm_neon.h>
#endif
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
#include <arm_sve.h>
typedef svbool_t svbool_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
typedef svfloat32_t svfloat32_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
typedef svfloat64_t svfloat64_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
typedef svint32_t svint32_st __attribute__((arm_sve_vector_bits(__ARM_FEATURE_SVE_BITS)));
#endif
#ifdef __ARM_NEON
inline float32x4_t makeVec_f32(float a, float b, float c, float d)
{
alignas(16) float data[4] = {a, b, c, d};
......@@ -17,6 +29,7 @@ inline int32x4_t makeVec_s32(int a, int b, int c, int d)
alignas(16) int data[4] = {a, b, c, d};
return vld1q_s32(data);
}
#endif
inline void cachelineZero(void * p) {
__asm__ volatile("dc zva, %0"::"r"(p));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment