Commit d884044c authored by Markus Holzer's avatar Markus Holzer
Browse files

Merge branch 'arm64' into 'master'

Field alignment on ARM

See merge request walberla/walberla!448
parents ab07f020 1661e5e4
...@@ -357,12 +357,23 @@ endif() ...@@ -357,12 +357,23 @@ endif()
# architecture optimization # architecture optimization
if( WALBERLA_OPTIMIZE_FOR_LOCALHOST ) if( WALBERLA_OPTIMIZE_FOR_LOCALHOST )
if( WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG ) if( WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG )
add_flag ( CMAKE_CXX_FLAGS "-march=native" ) if( CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64" )
add_flag ( CMAKE_C_FLAGS "-march=native" ) # no -march=native available on this compiler, but there is currently only one such processor
else()
add_flag ( CMAKE_CXX_FLAGS "-march=native" )
add_flag ( CMAKE_C_FLAGS "-march=native" )
endif()
if( WALBERLA_CXX_COMPILER_IS_INTEL ) if( WALBERLA_CXX_COMPILER_IS_INTEL )
add_flag ( CMAKE_CXX_FLAGS "-xhost" ) add_flag ( CMAKE_CXX_FLAGS "-xhost" )
add_flag ( CMAKE_C_FLAGS "-xhost" ) add_flag ( CMAKE_C_FLAGS "-xhost" )
endif() endif()
if( EXISTS "/proc/sys/abi/sve_default_vector_length" )
file( READ "/proc/sys/abi/sve_default_vector_length" SVE_LENGTH )
add_flag ( CMAKE_CXX_FLAGS "-msve-vector-bits=${SVE_LENGTH}" )
add_flag ( CMAKE_C_FLAGS "-msve-vector-bits=${SVE_LENGTH}" )
endif()
endif() endif()
endif() endif()
......
...@@ -61,6 +61,8 @@ def generate_sweep(generation_context, class_name, assignments, ...@@ -61,6 +61,8 @@ def generate_sweep(generation_context, class_name, assignments,
else: else:
ast = create_staggered_kernel(assignments, **create_kernel_params) ast = create_staggered_kernel(assignments, **create_kernel_params)
ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
ast.nontemporal = create_kernel_params['cpu_vectorize_info']['nontemporal']
ast.openmp = create_kernel_params['cpu_openmp']
def to_name(f): def to_name(f):
return f.name if isinstance(f, Field) else f return f.name if isinstance(f, Field) else f
......
...@@ -238,37 +238,50 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non ...@@ -238,37 +238,50 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
if param.is_field_pointer: if param.is_field_pointer:
field = param.fields[0] field = param.fields[0]
if field.field_type == FieldType.BUFFER: if field.field_type == FieldType.BUFFER:
kernel_call_lines.append("%s %s = %s;" % (param.symbol.dtype, param.symbol.name, param.field_name)) kernel_call_lines.append(f"{param.symbol.dtype} {param.symbol.name} = {param.field_name};")
else: else:
coordinates = get_start_coordinates(field) coordinates = get_start_coordinates(field)
actual_gls = "int_c(%s->nrOfGhostLayers())" % (param.field_name, ) actual_gls = f"int_c({param.field_name}->nrOfGhostLayers())"
coord_set = set(coordinates) coord_set = set(coordinates)
coord_set = sorted(coord_set, key=lambda e: str(e)) coord_set = sorted(coord_set, key=lambda e: str(e))
for c in coord_set: for c in coord_set:
kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);" % kernel_call_lines.append(f"WALBERLA_ASSERT_GREATER_EQUAL({c}, -{actual_gls});")
(c, actual_gls))
while len(coordinates) < 4: while len(coordinates) < 4:
coordinates.append(0) coordinates.append(0)
coordinates = tuple(coordinates) coordinates = tuple(coordinates)
kernel_call_lines.append("%s %s = %s->dataAt(%s, %s, %s, %s);" % kernel_call_lines.append(f"{param.symbol.dtype} {param.symbol.name} = {param.field_name}->dataAt"
((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates)) f"({coordinates[0]}, {coordinates[1]}, {coordinates[2]}, {coordinates[3]});")
if ast.assumed_inner_stride_one and field.index_dimensions > 0: if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (param.field_name,)) kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL({param.field_name}->layout(), field::fzyx);")
if ast.instruction_set and ast.assumed_inner_stride_one:
if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['cachelineSize']}, 0);")
else:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['bytes']}, 0);")
elif param.is_field_stride: elif param.is_field_stride:
casted_stride = get_field_stride(param) casted_stride = get_field_stride(param)
type_str = param.symbol.dtype.base_name type_str = param.symbol.dtype.base_name
kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, casted_stride)) kernel_call_lines.append(f"const {type_str} {param.symbol.name} = {casted_stride};")
elif param.is_field_shape: elif param.is_field_shape:
coord = param.symbol.coordinate coord = param.symbol.coordinate
field = param.fields[0] field = param.fields[0]
type_str = param.symbol.dtype.base_name type_str = param.symbol.dtype.base_name
shape = "%s(%s)" % (type_str, get_end_coordinates(field)[coord]) shape = f"{type_str}({get_end_coordinates(field)[coord]})"
assert coord < 3 assert coord < 3
max_value = "%s->%sSizeWithGhostLayer()" % (field.name, ('x', 'y', 'z')[coord]) max_value = f"{field.name}->{('x', 'y', 'z')[coord]}SizeWithGhostLayer()"
kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);" % (max_value, shape)) kernel_call_lines.append(f"WALBERLA_ASSERT_GREATER_EQUAL({max_value}, {shape});")
kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape)) kernel_call_lines.append(f"const {type_str} {param.symbol.name} = {shape};")
if ast.assumed_inner_stride_one and field.index_dimensions > 0: if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (field.name,)) kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL({field.name}->layout(), field::fzyx);")
if ast.instruction_set and ast.assumed_inner_stride_one:
if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['cachelineSize']}, 0);")
else:
kernel_call_lines.append(f"WALBERLA_ASSERT_EQUAL((uintptr_t) {field.name}->dataAt(0, 0, 0, 0) %"
f"{ast.instruction_set['bytes']}, 0);")
call_parameters = ", ".join([p.symbol.name for p in ast_params]) call_parameters = ", ".join([p.symbol.name for p in ast_params])
......
...@@ -316,7 +316,13 @@ namespace field { ...@@ -316,7 +316,13 @@ namespace field {
// Automatically select allocator if none was given // Automatically select allocator if none was given
if ( alloc == nullptr ) if ( alloc == nullptr )
{ {
#ifdef __BIGGEST_ALIGNMENT__ #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
const uint_t alignment = __ARM_FEATURE_SVE_BITS/8;
#elif defined(__ARM_FEATURE_SVE)
const uint_t alignment = 64;
#elif defined(__ARM_NEON)
const uint_t alignment = 16;
#elif defined(__BIGGEST_ALIGNMENT__)
const uint_t alignment = __BIGGEST_ALIGNMENT__; const uint_t alignment = __BIGGEST_ALIGNMENT__;
#elif defined(__AVX512F__) #elif defined(__AVX512F__)
const uint_t alignment = 64; const uint_t alignment = 64;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment