Skip to content
Snippets Groups Projects
Commit 7e6d2a21 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

nontemporal stores: flush cacheline if available

parent 5a88c472
No related merge requests found
......@@ -859,14 +859,16 @@ class NontemporalFence(Node):
class CachelineSize(Node):
symbol = sp.Symbol("_clsize")
mask_symbol = sp.Symbol("_clsize_mask")
last_symbol = sp.Symbol("_cl_lastvec")
def __init__(self):
super(CachelineSize, self).__init__(parent=None)
@property
def symbols_defined(self):
return set([self.mask_symbol])
return set([self.symbol, self.mask_symbol, self.last_symbol])
@property
def undefined_symbols(self):
......
......@@ -274,11 +274,20 @@ class CBackend:
ptr = "&" + self.sympy_printer.doprint(node.lhs.args[0])
pre_code = ''
if instr == 'stream' and 'cachelineZero' in self._vector_instruction_set:
pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "\n\t" + \
self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n'
pre_code = f"if (((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0) " + "{\n\t" + \
self._vector_instruction_set['cachelineZero'].format(ptr) + ';\n}\n'
code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs),
printed_mask) + ';'
flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) != {CachelineSize.last_symbol}"
if instr == 'stream' and 'flushCacheline' in self._vector_instruction_set:
code2 = self._vector_instruction_set['flushCacheline'].format(
ptr, self.sympy_printer.doprint(rhs)) + ';'
code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}"
elif instr == 'stream' and 'streamAndFlushCacheline' in self._vector_instruction_set:
code2 = self._vector_instruction_set['streamAndFlushCacheline'].format(
ptr, self.sympy_printer.doprint(rhs), printed_mask) + ';'
code = f"if ({flushcond}) {{\n\t{code}\n}} else {{\n\t{code2}\n}}"
return pre_code + code
else:
return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};"
......@@ -291,7 +300,10 @@ class CBackend:
def _print_CachelineSize(self, node):
if 'cachelineSize' in self._vector_instruction_set:
return f'const size_t {node.mask_symbol} = {self._vector_instruction_set["cachelineSize"]} - 1;'
code = f'const size_t {node.symbol} = {self._vector_instruction_set["cachelineSize"]};\n'
code += f'const size_t {node.mask_symbol} = {node.symbol} - 1;\n'
code += f'const size_t {node.last_symbol} = {node.symbol} - 16;\n' # TODO: determine size from instruction set
return code
else:
return ''
......
......@@ -29,7 +29,8 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
'loadA': 'ld[0x0, 0]',
'storeU': 'xst[1, 0x0, 0]',
'storeA': 'st[1, 0x0, 0]',
'stream': 'st[1, 0x0, 0]', # stl would flush the cacheline, which only makes sense for the last item
'stream': 'st[1, 0x0, 0]',
'streamAndFlushCacheline': 'stl[1, 0x0, 0]',
'abs': 'abs[0]',
'==': 'cmpeq[0, 1]',
......
......@@ -56,6 +56,10 @@ def test_aligned_and_nt_stores(openmp=False):
assert ast.instruction_set['streamFence'] in ps.get_code_str(ast)
if 'cachelineZero' in ast.instruction_set:
assert ast.instruction_set['cachelineZero'].split('{0}')[0] in ps.get_code_str(ast)
if 'streamAndFlushCacheline' in ast.instruction_set:
assert ast.instruction_set['streamAndFlushCacheline'].split('{0}')[0] in ps.get_code_str(ast)
if 'flushCacheline' in ast.instruction_set:
assert ast.instruction_set['flushCacheline'].split('{0}')[0] in ps.get_code_str(ast)
kernel = ast.compile()
dh.run_kernel(kernel)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment