Skip to content
Snippets Groups Projects
Commit a100b5b0 authored by Michael Kuron's avatar Michael Kuron :mortar_board:
Browse files

cache line zero: require alignment to cache line size when OpenMP enabled

otherwise we can delete stuff from the next outer loop, which in 2D would be the one OpenMP-parallelized
parent 31be359b
Branches
Tags
No related merge requests found
......@@ -80,6 +80,8 @@ def get_vector_instruction_set_ppc(data_type='double', instruction_set='vsx'):
result['loadA'] = '(__vector double)' + result['loadA'].format('(float*) {0}')
result['storeA'] = result['storeA'].format('(float*) {0}', '(__vector float) {1}')
result['stream'] = result['stream'].format('(float*) {0}', '(__vector float) {1}')
result['streamAndFlushCacheline'] = result['streamAndFlushCacheline'].format('(float*) {0}',
'(__vector float) {1}')
result['+int'] = "vec_add({0}, {1})"
......
......@@ -58,8 +58,9 @@ import numpy as np
from appdirs import user_cache_dir, user_config_dir
from pystencils import FieldType
from pystencils.astnodes import LoopOverCoordinate
from pystencils.backends.cbackend import generate_c, get_headers
from pystencils.data_types import cast_func, VectorType
from pystencils.data_types import cast_func, VectorType, vector_memory_access
from pystencils.include import get_pystencils_include_path
from pystencils.kernel_wrapper import KernelWrapper
from pystencils.utils import atomic_file_write, file_handle_for_atomic_write, recursive_dict_update
......@@ -386,6 +387,14 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
if ast_node.instruction_set and aligned:
byte_width = ast_node.instruction_set['width'] * item_size
if 'cachelineZero' in ast_node.instruction_set:
has_openmp, has_nontemporal = False, False
for loop in ast_node.atoms(LoopOverCoordinate):
has_openmp = has_openmp or any(['#pragma omp' in p for p in loop.prefix_lines])
has_nontemporal = has_nontemporal or any([a.args[0].field == field and a.args[3] for a in
loop.atoms(vector_memory_access)])
if has_openmp and has_nontemporal:
byte_width = ast_node.instruction_set['cachelineSize']
offset = max(max(ast_node.ghost_layers)) * item_size
offset_cond = f"(((uintptr_t) buffer_{field.name}.buf) + {offset}) % {byte_width} == 0"
......@@ -394,6 +403,9 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
"the kernel creation is not specified it will choose a suitable value " \
"automatically. This value might not " \
"be compatible with the allocated arrays."
if type(byte_width) is not int:
message += " Note that when both OpenMP and non-temporal stores are enabled, alignment to the "\
"cacheline size is required."
pre_call_code += template_check_array.format(cond=offset_cond, what="offset", name=field.name,
expected=message)
......
......@@ -38,14 +38,14 @@ def test_aligned_and_nt_stores(openmp=False):
# create a datahandling object
dh = ps.create_data_handling(domain_size, periodicity=(True, True), parallel=False, default_target='cpu')
# fields
g = dh.add_array("g", values_per_cell=1, alignment=True)
dh.fill("g", 1.0, ghost_layers=True)
if openmp:
# TODO: throw error when not cacheline-aligned
alignment = 128 if instruction_set == 'vsx' else 64 if instruction_set == 'neon' else True
else:
alignment = True
# fields
g = dh.add_array("g", values_per_cell=1, alignment=alignment)
dh.fill("g", 1.0, ghost_layers=True)
f = dh.add_array("f", values_per_cell=1, alignment=alignment)
dh.fill("f", 0.0, ghost_layers=True)
opt = {'instruction_set': instruction_set, 'assume_aligned': True, 'nontemporal': True,
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment