Commit d884044c authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'arm64' into 'master'

Field alignment on ARM

See merge request walberla/walberla!448
parents ab07f020 1661e5e4
......@@ -357,12 +357,23 @@ endif()
# architecture optimization
if( WALBERLA_OPTIMIZE_FOR_LOCALHOST )
if( WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG )
if( CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64" )
# no -march=native available on this compiler, but there is currently only one such processor
else()
add_flag ( CMAKE_CXX_FLAGS "-march=native" )
add_flag ( CMAKE_C_FLAGS "-march=native" )
endif()
if( WALBERLA_CXX_COMPILER_IS_INTEL )
add_flag ( CMAKE_CXX_FLAGS "-xhost" )
add_flag ( CMAKE_C_FLAGS "-xhost" )
endif()
if( EXISTS "/proc/sys/abi/sve_default_vector_length" )
file( READ "/proc/sys/abi/sve_default_vector_length" SVE_LENGTH )
add_flag ( CMAKE_CXX_FLAGS "-msve-vector-bits=${SVE_LENGTH}" )
add_flag ( CMAKE_C_FLAGS "-msve-vector-bits=${SVE_LENGTH}" )
endif()
endif()
endif()
......
......@@ -61,6 +61,8 @@ def generate_sweep(generation_context, class_name, assignments,
else:
ast = create_staggered_kernel(assignments, **create_kernel_params)
ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
ast.nontemporal = create_kernel_params['cpu_vectorize_info']['nontemporal']
ast.openmp = create_kernel_params['cpu_openmp']
def to_name(f):
return f.name if isinstance(f, Field) else f
......
......@@ -238,37 +238,50 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
if param.is_field_pointer:
field = param.fields[0]
if field.field_type == FieldType.BUFFER:
kernel_call_lines.append("%s %s = %s;" % (param.symbol.dtype, param.symbol.name, param.field_name))
kernel_call_lines.append(f"{param.symbol.dtype} {param.symbol.name} = {param.field_name};")
else:
coordinates = get_start_coordinates(field)
actual_gls = "int_c(%s->nrOfGhostLayers())" % (param.field_name, )
actual_gls = f"int_c({param.field_name}->nrOfGhostLayers())"
coord_set = set(coordinates)
coord_set = sorted(coord_set, key=lambda e: str(e))
for c in coord_set:
kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);" %
(c, actual_gls))
kernel_call_lines.append(f"WALBERLA_ASSERT_GREATER_EQUAL({c}, -{actual_gls});")
while len(coordinates) < 4:
coordinates.append(0)
coordinates = tuple(coordinates)
kernel_call_lines.append("%s %s = %s->dataAt(%s, %s, %s, %s);" %
((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates))
kernel_call_lines.append(f"{param.symbol.dtype} {param.symbol.name} = {param.field_name}->dataAt"
f"({coordinates[0]}, {coordinates[1]}, {coordinates[2]}, {coordinates[3]});")
if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (param.field_name,))
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL({param.field_name}->layout(), field::fzyx);")
if ast.instruction_set and ast.assumed_inner_stride_one:
if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['cachelineSize']}, 0);")
else:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['bytes']}, 0);")
elif param.is_field_stride:
casted_stride = get_field_stride(param)
type_str = param.symbol.dtype.base_name
kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, casted_stride))
kernel_call_lines.append(f"const {type_str} {param.symbol.name} = {casted_stride};")
elif param.is_field_shape:
coord = param.symbol.coordinate
field = param.fields[0]
type_str = param.symbol.dtype.base_name
shape = "%s(%s)" % (type_str, get_end_coordinates(field)[coord])
shape = f"{type_str}({get_end_coordinates(field)[coord]})"
assert coord < 3
max_value = "%s->%sSizeWithGhostLayer()" % (field.name, ('x', 'y', 'z')[coord])
kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);" % (max_value, shape))
kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape))
max_value = f"{field.name}->{('x', 'y', 'z')[coord]}SizeWithGhostLayer()"
kernel_call_lines.append(f"WALBERLA_ASSERT_GREATER_EQUAL({max_value}, {shape});")
kernel_call_lines.append(f"const {type_str} {param.symbol.name} = {shape};")
if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (field.name,))
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL({field.name}->layout(), field::fzyx);")
if ast.instruction_set and ast.assumed_inner_stride_one:
if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['cachelineSize']}, 0);")
else:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['bytes']}, 0);")
call_parameters = ", ".join([p.symbol.name for p in ast_params])
......
......@@ -316,7 +316,13 @@ namespace field {
// Automatically select allocator if none was given
if ( alloc == nullptr )
{
#ifdef __BIGGEST_ALIGNMENT__
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
const uint_t alignment = __ARM_FEATURE_SVE_BITS/8;
#elif defined(__ARM_FEATURE_SVE)
const uint_t alignment = 64;
#elif defined(__ARM_NEON)
const uint_t alignment = 16;
#elif defined(__BIGGEST_ALIGNMENT__)
const uint_t alignment = __BIGGEST_ALIGNMENT__;
#elif defined(__AVX512F__)
const uint_t alignment = 64;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment