Skip to content
Snippets Groups Projects
Commit 2f0d53c0 authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'rvv' into 'master'

RISC-V cacheline zero

See merge request pycodegen/pystencils!326
parents 4a2568c6 8597d398
Branches
Tags
No related merge requests found
......@@ -186,18 +186,16 @@ arm64v9:
riscv64:
# RISC-V vector extension are currently not supported by GCC.
# Also, the image is built without the libomp package which is not yet available on Ubuntu.
extends: .multiarch_template
image: i10git.cs.fau.de:5005/pycodegen/pycodegen/riscv64
variables:
# explicitly set SIMD as detection does not appear to work on QEMU
# explicitly set SIMD as detection requires QEMU >= 8.1
PYSTENCILS_SIMD: "rvv"
QEMU_CPU: "rv64,v=true"
QEMU_CPU: "rv64,v=true,zicboz=true"
before_script:
- *multiarch_before_script
- sed -i 's/march=native/march=rv64imfdv/g' ~/.config/pystencils/config.json
- sed -i s/g\+\+/clang++/g ~/.config/pystencils/config.json
- sed -i 's/fopenmp/fopenmp=libgomp -I\/usr\/include\/riscv64-linux-gnu/g' ~/.config/pystencils/config.json
- sed -i 's/march=native/march=rv64imfdvzicboz/g' ~/.config/pystencils/config.json
- sed -i s/g\+\+/clang++-15/g ~/.config/pystencils/config.json
minimal-conda:
stage: pretest
......
......@@ -98,9 +98,12 @@ def get_vector_instruction_set_riscv(data_type='double', instruction_set='rvv'):
result['int'] = f'vint{bits["int"]}m1_t'
result['bool'] = f'vbool{bits[data_type]}_t'
result['headers'] = ['<riscv_vector.h>']
result['headers'] = ['<riscv_vector.h>', '"riscv_v_helpers.h"']
result['any'] += ' > 0x0'
result['all'] += f' == vsetvl_e{bits[data_type]}m1({vl})'
result['cachelineSize'] = 'cachelineSize()'
result['cachelineZero'] = 'cachelineZero((void*) {0})'
return result
inline void cachelineZero(void * p) {
#ifdef __riscv_zicboz
__asm__ volatile("cbo.zero (%0)"::"r"(p):"memory");
#endif
}
inline size_t _cachelineSize() {
// allocate and fill with ones
const size_t max_size = 0x100000;
uint8_t data[2*max_size];
for (size_t i = 0; i < 2*max_size; ++i) {
data[i] = 0xff;
}
// find alignment offset
size_t offset = max_size - ((uintptr_t) data) % max_size;
// zero a cacheline
cachelineZero((void*) (data + offset));
// make sure that at least one byte was zeroed
if (data[offset] != 0) {
return SIZE_MAX;
}
// make sure that nothing was zeroed before the pointer
if (data[offset-1] == 0) {
return SIZE_MAX;
}
// find the last byte that was zeroed
for (size_t size = 1; size < max_size; ++size) {
if (data[offset + size] != 0) {
return size;
}
}
// too much was zeroed
return SIZE_MAX;
}
inline size_t cachelineSize() {
#ifdef __riscv_zicboz
static size_t size = _cachelineSize();
return size;
#else
return SIZE_MAX;
#endif
}
......@@ -146,7 +146,7 @@ def test_aligned_and_nt_stores(openmp, instruction_set=instruction_set):
if instruction_set in ['sse'] or instruction_set.startswith('avx'):
assert 'stream' in ast.instruction_set
assert 'streamFence' in ast.instruction_set
if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
if instruction_set in ['neon', 'vsx', 'rvv'] or instruction_set.startswith('sve'):
assert 'cachelineZero' in ast.instruction_set
if instruction_set in ['vsx']:
assert 'storeAAndFlushCacheline' in ast.instruction_set
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment