Skip to content
Snippets Groups Projects
Commit 1d39f5f5 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

logic fix

parent 8f92d147
Branches
Tags
No related merge requests found
...@@ -280,19 +280,19 @@ class CBackend: ...@@ -280,19 +280,19 @@ class CBackend:
code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs), code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs),
printed_mask) + ';' printed_mask) + ';'
flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) != {CachelineSize.last_symbol}" flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == {CachelineSize.last_symbol}"
if nontemporal and 'flushCacheline' in self._vector_instruction_set: if nontemporal and 'flushCacheline' in self._vector_instruction_set:
code2 = self._vector_instruction_set['flushCacheline'].format( code2 = self._vector_instruction_set['flushCacheline'].format(
ptr, self.sympy_printer.doprint(rhs)) + ';' ptr, self.sympy_printer.doprint(rhs)) + ';'
code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}" code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}"
elif nontemporal and 'streamAndFlushCacheline' in self._vector_instruction_set: elif nontemporal and 'storeAAndFlushCacheline' in self._vector_instruction_set:
tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8] tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8]
code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \ code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \
+ self.sympy_printer.doprint(rhs) + ';' + self.sympy_printer.doprint(rhs) + ';'
code1 = self._vector_instruction_set['stream'].format(ptr, tmpvar, printed_mask) + ';' code1 = self._vector_instruction_set[instr].format(ptr, tmpvar, printed_mask) + ';'
code2 = self._vector_instruction_set['streamAndFlushCacheline'].format(ptr, tmpvar, printed_mask) \ code2 = self._vector_instruction_set['storeAAndFlushCacheline'].format(ptr, tmpvar, printed_mask) \
+ ';' + ';'
code += f"\nif ({flushcond}) {{\n\t{code1}\n}} else {{\n\t{code2}\n}}" code += f"\nif ({flushcond}) {{\n\t{code2}\n}} else {{\n\t{code1}\n}}"
return pre_code + code return pre_code + code
else: else:
return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};" return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};"
......
...@@ -29,7 +29,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): ...@@ -29,7 +29,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
'loadA': 'ld[0x0, 0]', 'loadA': 'ld[0x0, 0]',
'storeU': 'xst[1, 0x0, 0]', 'storeU': 'xst[1, 0x0, 0]',
'storeA': 'st[1, 0x0, 0]', 'storeA': 'st[1, 0x0, 0]',
'streamAndFlushCacheline': 'stl[1, 0x0, 0]', 'storeAAndFlushCacheline': 'stl[1, 0x0, 0]',
'abs': 'abs[0]', 'abs': 'abs[0]',
'==': 'cmpeq[0, 1]', '==': 'cmpeq[0, 1]',
...@@ -79,7 +79,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'): ...@@ -79,7 +79,7 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
result['loadA'] = '(__vector double)' + result['loadA'].format('(float*) {0}') result['loadA'] = '(__vector double)' + result['loadA'].format('(float*) {0}')
result['storeA'] = result['storeA'].format('(float*) {0}', '(__vector float) {1}') result['storeA'] = result['storeA'].format('(float*) {0}', '(__vector float) {1}')
result['stream'] = result['stream'].format('(float*) {0}', '(__vector float) {1}') result['stream'] = result['stream'].format('(float*) {0}', '(__vector float) {1}')
result['streamAndFlushCacheline'] = result['streamAndFlushCacheline'].format('(float*) {0}', result['storeAAndFlushCacheline'] = result['storeAAndFlushCacheline'].format('(float*) {0}',
'(__vector float) {1}') '(__vector float) {1}')
result['+int'] = "vec_add({0}, {1})" result['+int'] = "vec_add({0}, {1})"
......
...@@ -48,7 +48,14 @@ def test_aligned_and_nt_stores(openmp=False): ...@@ -48,7 +48,14 @@ def test_aligned_and_nt_stores(openmp=False):
'assume_inner_stride_one': True} 'assume_inner_stride_one': True}
update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))] update_rule = [ps.Assignment(f.center(), 0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))]
ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp) ast = ps.create_kernel(update_rule, target=dh.default_target, cpu_vectorize_info=opt, cpu_openmp=openmp)
for instruction in ['stream', 'streamFence', 'cachelineZero', 'streamAndFlushCacheline', 'flushCacheline']: if instruction_set in ['sse'] or instruction_set.startswith('avx'):
assert 'stream' in ast.instruction_set
assert 'streamFence' in ast.instruction_set
if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
assert 'cachelineZero' in ast.instruction_set
if instruction_set in ['vsx']:
assert 'storeAAndFlushCacheline' in ast.instruction_set
for instruction in ['stream', 'streamFence', 'cachelineZero', 'storeAAndFlushCacheline', 'flushCacheline']:
if instruction in ast.instruction_set: if instruction in ast.instruction_set:
assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast) assert ast.instruction_set[instruction].split('{')[0] in ps.get_code_str(ast)
kernel = ast.compile() kernel = ast.compile()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment