Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
itischler
waLBerla
Commits
1661e5e4
Commit
1661e5e4
authored
Apr 29, 2021
by
Michael Kuron
Committed by
Markus Holzer
Apr 29, 2021
Browse files
Field alignment on ARM
parent
ab07f020
Changes
4
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
1661e5e4
...
...
@@ -357,12 +357,23 @@ endif()
# architecture optimization
if
(
WALBERLA_OPTIMIZE_FOR_LOCALHOST
)
if
(
WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_INTEL OR WALBERLA_CXX_COMPILER_IS_CLANG
)
add_flag
(
CMAKE_CXX_FLAGS
"-march=native"
)
add_flag
(
CMAKE_C_FLAGS
"-march=native"
)
if
(
CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
AND CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL
"arm64"
)
# no -march=native available on this compiler, but there is currently only one such processor
else
()
add_flag
(
CMAKE_CXX_FLAGS
"-march=native"
)
add_flag
(
CMAKE_C_FLAGS
"-march=native"
)
endif
()
if
(
WALBERLA_CXX_COMPILER_IS_INTEL
)
add_flag
(
CMAKE_CXX_FLAGS
"-xhost"
)
add_flag
(
CMAKE_C_FLAGS
"-xhost"
)
endif
()
if
(
EXISTS
"/proc/sys/abi/sve_default_vector_length"
)
file
(
READ
"/proc/sys/abi/sve_default_vector_length"
SVE_LENGTH
)
add_flag
(
CMAKE_CXX_FLAGS
"-msve-vector-bits=
${
SVE_LENGTH
}
"
)
add_flag
(
CMAKE_C_FLAGS
"-msve-vector-bits=
${
SVE_LENGTH
}
"
)
endif
()
endif
()
endif
()
...
...
python/pystencils_walberla/codegen.py
View file @
1661e5e4
...
...
@@ -61,6 +61,8 @@ def generate_sweep(generation_context, class_name, assignments,
else
:
ast
=
create_staggered_kernel
(
assignments
,
**
create_kernel_params
)
ast
.
assumed_inner_stride_one
=
create_kernel_params
[
'cpu_vectorize_info'
][
'assume_inner_stride_one'
]
ast
.
nontemporal
=
create_kernel_params
[
'cpu_vectorize_info'
][
'nontemporal'
]
ast
.
openmp
=
create_kernel_params
[
'cpu_openmp'
]
def
to_name
(
f
):
return
f
.
name
if
isinstance
(
f
,
Field
)
else
f
...
...
python/pystencils_walberla/jinja_filters.py
View file @
1661e5e4
...
...
@@ -238,37 +238,50 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
if
param
.
is_field_pointer
:
field
=
param
.
fields
[
0
]
if
field
.
field_type
==
FieldType
.
BUFFER
:
kernel_call_lines
.
append
(
"%s %s = %s;"
%
(
param
.
symbol
.
dtype
,
param
.
symbol
.
name
,
param
.
field_name
)
)
kernel_call_lines
.
append
(
f
"
{
param
.
symbol
.
dtype
}
{
param
.
symbol
.
name
}
=
{
param
.
field_name
}
;"
)
else
:
coordinates
=
get_start_coordinates
(
field
)
actual_gls
=
"int_c(
%s
->nrOfGhostLayers())"
%
(
param
.
field_name
,
)
actual_gls
=
f
"int_c(
{
param
.
field_name
}
->nrOfGhostLayers())"
coord_set
=
set
(
coordinates
)
coord_set
=
sorted
(
coord_set
,
key
=
lambda
e
:
str
(
e
))
for
c
in
coord_set
:
kernel_call_lines
.
append
(
"WALBERLA_ASSERT_GREATER_EQUAL(%s, -%s);"
%
(
c
,
actual_gls
))
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_GREATER_EQUAL(
{
c
}
, -
{
actual_gls
}
);"
)
while
len
(
coordinates
)
<
4
:
coordinates
.
append
(
0
)
coordinates
=
tuple
(
coordinates
)
kernel_call_lines
.
append
(
"%s %s = %s->dataAt(%s, %s, %s, %s);"
%
((
param
.
symbol
.
dtype
,
param
.
symbol
.
name
,
param
.
field_name
)
+
coordinates
)
)
kernel_call_lines
.
append
(
f
"
{
param
.
symbol
.
dtype
}
{
param
.
symbol
.
name
}
=
{
param
.
field_name
}
->dataAt"
f
"(
{
coordinates
[
0
]
}
,
{
coordinates
[
1
]
}
,
{
coordinates
[
2
]
}
,
{
coordinates
[
3
]
}
);"
)
if
ast
.
assumed_inner_stride_one
and
field
.
index_dimensions
>
0
:
kernel_call_lines
.
append
(
"WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);"
%
(
param
.
field_name
,))
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_EQUAL(
{
param
.
field_name
}
->layout(), field::fzyx);"
)
if
ast
.
instruction_set
and
ast
.
assumed_inner_stride_one
:
if
ast
.
nontemporal
and
ast
.
openmp
and
'cachelineZero'
in
ast
.
instruction_set
:
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_EQUAL((uintptr_t)
{
field
.
name
}
->dataAt(0, 0, 0, 0) %"
f
"
{
ast
.
instruction_set
[
'cachelineSize'
]
}
, 0);"
)
else
:
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_EQUAL((uintptr_t)
{
field
.
name
}
->dataAt(0, 0, 0, 0) %"
f
"
{
ast
.
instruction_set
[
'bytes'
]
}
, 0);"
)
elif
param
.
is_field_stride
:
casted_stride
=
get_field_stride
(
param
)
type_str
=
param
.
symbol
.
dtype
.
base_name
kernel_call_lines
.
append
(
"const
%s %s = %s;"
%
(
type_str
,
param
.
symbol
.
name
,
casted_stride
)
)
kernel_call_lines
.
append
(
f
"const
{
type_str
}
{
param
.
symbol
.
name
}
=
{
casted_stride
}
;"
)
elif
param
.
is_field_shape
:
coord
=
param
.
symbol
.
coordinate
field
=
param
.
fields
[
0
]
type_str
=
param
.
symbol
.
dtype
.
base_name
shape
=
"%s(%s)"
%
(
type_str
,
get_end_coordinates
(
field
)[
coord
]
)
shape
=
f
"
{
type_str
}
(
{
get_end_coordinates
(
field
)[
coord
]
}
)"
assert
coord
<
3
max_value
=
"%s->%sSizeWithGhostLayer()"
%
(
field
.
name
,
(
'x'
,
'y'
,
'z'
)[
coord
]
)
kernel_call_lines
.
append
(
"WALBERLA_ASSERT_GREATER_EQUAL(
%s, %s);"
%
(
max_value
,
shape
)
)
kernel_call_lines
.
append
(
"const
%s %s = %s;"
%
(
type_str
,
param
.
symbol
.
name
,
shape
)
)
max_value
=
f
"
{
field
.
name
}
->
{
(
'x'
,
'y'
,
'z'
)[
coord
]
}
SizeWithGhostLayer()"
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_GREATER_EQUAL(
{
max_value
}
,
{
shape
}
);"
)
kernel_call_lines
.
append
(
f
"const
{
type_str
}
{
param
.
symbol
.
name
}
=
{
shape
}
;"
)
if
ast
.
assumed_inner_stride_one
and
field
.
index_dimensions
>
0
:
kernel_call_lines
.
append
(
"WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);"
%
(
field
.
name
,))
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_EQUAL(
{
field
.
name
}
->layout(), field::fzyx);"
)
if
ast
.
instruction_set
and
ast
.
assumed_inner_stride_one
:
if
ast
.
nontemporal
and
ast
.
openmp
and
'cachelineZero'
in
ast
.
instruction_set
:
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_EQUAL((uintptr_t)
{
field
.
name
}
->dataAt(0, 0, 0, 0) %"
f
"
{
ast
.
instruction_set
[
'cachelineSize'
]
}
, 0);"
)
else
:
kernel_call_lines
.
append
(
f
"WALBERLA_ASSERT_EQUAL((uintptr_t)
{
field
.
name
}
->dataAt(0, 0, 0, 0) %"
f
"
{
ast
.
instruction_set
[
'bytes'
]
}
, 0);"
)
call_parameters
=
", "
.
join
([
p
.
symbol
.
name
for
p
in
ast_params
])
...
...
src/field/Field.impl.h
View file @
1661e5e4
...
...
@@ -316,7 +316,13 @@ namespace field {
// Automatically select allocator if none was given
if
(
alloc
==
nullptr
)
{
#ifdef __BIGGEST_ALIGNMENT__
#if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
const
uint_t
alignment
=
__ARM_FEATURE_SVE_BITS
/
8
;
#elif defined(__ARM_FEATURE_SVE)
const
uint_t
alignment
=
64
;
#elif defined(__ARM_NEON)
const
uint_t
alignment
=
16
;
#elif defined(__BIGGEST_ALIGNMENT__)
const
uint_t
alignment
=
__BIGGEST_ALIGNMENT__
;
#elif defined(__AVX512F__)
const
uint_t
alignment
=
64
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment