Commit a533fafb authored by Michael Kuron's avatar Michael Kuron
Browse files

Field alignment on ARM

parent ab07f020
......@@ -357,12 +357,23 @@ endif()
# architecture optimization
if( WALBERLA_OPTIMIZE_FOR_LOCALHOST )
if( WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG )
add_flag ( CMAKE_CXX_FLAGS "-march=native" )
add_flag ( CMAKE_C_FLAGS "-march=native" )
if( CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64" )
# no -march=native available on this compiler, but there is currently only one such processor
else()
add_flag ( CMAKE_CXX_FLAGS "-march=native" )
add_flag ( CMAKE_C_FLAGS "-march=native" )
endif()
if( WALBERLA_CXX_COMPILER_IS_INTEL )
add_flag ( CMAKE_CXX_FLAGS "-xhost" )
add_flag ( CMAKE_C_FLAGS "-xhost" )
endif()
if( EXISTS "/proc/sys/abi/sve_default_vector_length" )
file( READ "/proc/sys/abi/sve_default_vector_length" SVE_LENGTH )
add_flag ( CMAKE_CXX_FLAGS "-msve-vector-bits=${SVE_LENGTH}" )
add_flag ( CMAKE_C_FLAGS "-msve-vector-bits=${SVE_LENGTH}" )
endif()
endif()
endif()
......
......@@ -61,6 +61,8 @@ def generate_sweep(generation_context, class_name, assignments,
else:
ast = create_staggered_kernel(assignments, **create_kernel_params)
ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
ast.nontemporal = create_kernel_params['cpu_vectorize_info']['nontemporal']
ast.openmp = create_kernel_params['cpu_openmp']
def to_name(f):
return f.name if isinstance(f, Field) else f
......
......@@ -254,6 +254,13 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates))
if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (param.field_name,))
if ast.instruction_set and ast.assumed_inner_stride_one:
if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL((uintptr_t) %s->dataAt(0, 0, 0, 0) %% %s, 0);"
% (field.name, ast.instruction_set['cachelineSize']))
else:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL((uintptr_t) %s->dataAt(0, 0, 0, 0) %% %s, 0);"
% (field.name, ast.instruction_set['bytes']))
elif param.is_field_stride:
casted_stride = get_field_stride(param)
type_str = param.symbol.dtype.base_name
......@@ -269,6 +276,13 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape))
if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (field.name,))
if ast.instruction_set and ast.assumed_inner_stride_one:
if ast.nontemporal and ast.openmp and 'cachelineZero' in ast.instruction_set:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL((uintptr_t) %s->dataAt(0, 0, 0, 0) %% %s, 0);"
% (field.name, ast.instruction_set['cachelineSize']))
else:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL((uintptr_t) %s->dataAt(0, 0, 0, 0) %% %s, 0);"
% (field.name, ast.instruction_set['bytes']))
call_parameters = ", ".join([p.symbol.name for p in ast_params])
......
......@@ -316,7 +316,13 @@ namespace field {
// Automatically select allocator if none was given
if ( alloc == nullptr )
{
#ifdef __BIGGEST_ALIGNMENT__
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
const uint_t alignment = __ARM_FEATURE_SVE_BITS/8;
#elif defined(__ARM_FEATURE_SVE)
const uint_t alignment = 64;
#elif defined(__ARM_NEON)
const uint_t alignment = 16;
#elif defined(__BIGGEST_ALIGNMENT__)
const uint_t alignment = __BIGGEST_ALIGNMENT__;
#elif defined(__AVX512F__)
const uint_t alignment = 64;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment