Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Markus Holzer
pystencils
Commits
8e9bded0
Commit
8e9bded0
authored
Nov 02, 2021
by
Markus Holzer
Committed by
Helen Schottenhamml
Nov 02, 2021
Browse files
Vectorisation bug
parent
c8ea55af
Changes
6
Hide whitespace changes
Inline
Side-by-side
pystencils/backends/cbackend.py
View file @
8e9bded0
...
...
@@ -659,9 +659,9 @@ class VectorizedCustomSympyPrinter(CustomSympyPrinter):
if
isinstance
(
expr
,
vector_memory_access
):
arg
,
data_type
,
aligned
,
_
,
mask
,
stride
=
expr
.
args
if
stride
!=
1
:
return
self
.
instruction_set
[
'loadS'
].
format
(
"&
"
+
self
.
_print
(
arg
),
stride
,
**
self
.
_kwargs
)
return
self
.
instruction_set
[
'loadS'
].
format
(
f
"&
{
self
.
_print
(
arg
)
}
"
,
stride
,
**
self
.
_kwargs
)
instruction
=
self
.
instruction_set
[
'loadA'
]
if
aligned
else
self
.
instruction_set
[
'loadU'
]
return
instruction
.
format
(
"&
"
+
self
.
_print
(
arg
),
**
self
.
_kwargs
)
return
instruction
.
format
(
f
"&
{
self
.
_print
(
arg
)
}
"
,
**
self
.
_kwargs
)
elif
isinstance
(
expr
,
cast_func
):
arg
,
data_type
=
expr
.
args
if
type
(
data_type
)
is
VectorType
:
...
...
pystencils/cpu/cpujit.py
View file @
8e9bded0
...
...
@@ -373,8 +373,7 @@ def equal_size_check(fields):
return
""
ref_field
=
fields
[
0
]
cond
=
[
"(buffer_{field.name}.shape[{i}] == buffer_{ref_field.name}.shape[{i}])"
.
format
(
ref_field
=
ref_field
,
field
=
field_to_test
,
i
=
i
)
cond
=
[
f
"(buffer_
{
field_to_test
.
name
}
.shape[
{
i
}
] == buffer_
{
ref_field
.
name
}
.shape[
{
i
}
])"
for
field_to_test
in
fields
[
1
:]
for
i
in
range
(
fields
[
0
].
spatial_dimensions
)]
cond
=
" && "
.
join
(
cond
)
...
...
@@ -431,8 +430,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
if
(
np_dtype
.
isbuiltin
and
FieldType
.
is_generic
(
field
)
and
not
np
.
issubdtype
(
field
.
dtype
.
numpy_dtype
,
np
.
complexfloating
)):
dtype_cond
=
"buffer_{name}.format[0] == '{format}'"
.
format
(
name
=
field
.
name
,
format
=
field
.
dtype
.
numpy_dtype
.
char
)
dtype_cond
=
f
"buffer_
{
field
.
name
}
.format[0] == '
{
field
.
dtype
.
numpy_dtype
.
char
}
'"
pre_call_code
+=
template_check_array
.
format
(
cond
=
dtype_cond
,
what
=
"data type"
,
name
=
field
.
name
,
expected
=
str
(
field
.
dtype
.
numpy_dtype
))
...
...
@@ -461,8 +459,7 @@ def create_function_boilerplate_code(parameter_info, name, ast_node, insert_chec
elif
param
.
is_field_stride
:
field
=
param
.
fields
[
0
]
item_size
=
field
.
dtype
.
numpy_dtype
.
itemsize
parameters
.
append
(
"buffer_{name}.strides[{i}] / {bytes}"
.
format
(
bytes
=
item_size
,
i
=
param
.
symbol
.
coordinate
,
name
=
field
.
name
))
parameters
.
append
(
f
"buffer_
{
field
.
name
}
.strides[
{
param
.
symbol
.
coordinate
}
] /
{
item_size
}
"
)
elif
param
.
is_field_shape
:
parameters
.
append
(
f
"buffer_
{
param
.
field_name
}
.shape[
{
param
.
symbol
.
coordinate
}
]"
)
elif
type
(
param
.
symbol
)
is
CFunction
:
...
...
@@ -507,8 +504,8 @@ def load_kernel_from_file(module_name, function_name, path):
except
ImportError
:
import
time
import
warnings
warnings
.
warn
(
"Could not load
"
+
path
+
"
, trying on more time..."
)
time
.
sleep
(
1
)
warnings
.
warn
(
f
"Could not load
{
path
}
, trying on more time
in 5 seconds
..."
)
time
.
sleep
(
5
)
spec
=
spec_from_file_location
(
name
=
module_name
,
location
=
path
)
mod
=
module_from_spec
(
spec
)
spec
.
loader
.
exec_module
(
mod
)
...
...
pystencils/cpu/vectorization.py
View file @
8e9bded0
...
...
@@ -121,8 +121,8 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
"to differently typed floating point fields"
)
float_size
=
field_float_dtypes
.
pop
().
numpy_dtype
.
itemsize
assert
float_size
in
(
8
,
4
)
vector_is
=
get_vector_instruction_set
(
'double'
if
float_size
==
8
else
'float'
,
instruction_set
=
instruction_set
)
default_float_type
=
'double'
if
float_size
==
8
else
'float'
vector_is
=
get_vector_instruction_set
(
default_float_type
,
instruction_set
=
instruction_set
)
vector_width
=
vector_is
[
'width'
]
kernel_ast
.
instruction_set
=
vector_is
...
...
@@ -130,7 +130,7 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'best',
keep_loop_stop
=
'{loop_stop}'
in
vector_is
[
'storeA'
if
assume_aligned
else
'storeU'
]
vectorize_inner_loops_and_adapt_load_stores
(
kernel_ast
,
vector_width
,
assume_aligned
,
nontemporal
,
strided
,
keep_loop_stop
,
assume_sufficient_line_padding
)
insert_vector_casts
(
kernel_ast
)
insert_vector_casts
(
kernel_ast
,
default_float_type
)
def
vectorize_inner_loops_and_adapt_load_stores
(
ast_node
,
vector_width
,
assume_aligned
,
nontemporal_fields
,
...
...
@@ -242,25 +242,24 @@ def mask_conditionals(loop_body):
visit_node
(
loop_body
,
mask
=
True
)
def
insert_vector_casts
(
ast_node
):
def
insert_vector_casts
(
ast_node
,
default_float_type
=
'double'
):
"""Inserts necessary casts from scalar values to vector values."""
handled_functions
=
(
sp
.
Add
,
sp
.
Mul
,
fast_division
,
fast_sqrt
,
fast_inv_sqrt
,
vec_any
,
vec_all
)
def
visit_expr
(
expr
):
def
visit_expr
(
expr
,
default_type
=
'double'
):
if
isinstance
(
expr
,
vector_memory_access
):
return
vector_memory_access
(
*
expr
.
args
[
0
:
4
],
visit_expr
(
expr
.
args
[
4
]),
*
expr
.
args
[
5
:])
return
vector_memory_access
(
*
expr
.
args
[
0
:
4
],
visit_expr
(
expr
.
args
[
4
]
,
default_type
),
*
expr
.
args
[
5
:])
elif
isinstance
(
expr
,
cast_func
):
return
expr
elif
expr
.
func
is
sp
.
Abs
and
'abs'
not
in
ast_node
.
instruction_set
:
new_arg
=
visit_expr
(
expr
.
args
[
0
])
new_arg
=
visit_expr
(
expr
.
args
[
0
]
,
default_type
)
base_type
=
get_type_of_expression
(
expr
.
args
[
0
]).
base_type
if
type
(
expr
.
args
[
0
])
is
vector_memory_access
\
else
get_type_of_expression
(
expr
.
args
[
0
])
pw
=
sp
.
Piecewise
((
-
new_arg
,
new_arg
<
base_type
.
numpy_dtype
.
type
(
0
)),
(
new_arg
,
True
))
return
visit_expr
(
pw
)
return
visit_expr
(
pw
,
default_type
)
elif
expr
.
func
in
handled_functions
or
isinstance
(
expr
,
sp
.
Rel
)
or
isinstance
(
expr
,
BooleanFunction
):
default_type
=
'double'
if
expr
.
func
is
sp
.
Mul
and
expr
.
args
[
0
]
==
-
1
:
# special treatment for the unary minus: make sure that the -1 has the same type as the argument
dtype
=
int
...
...
@@ -274,7 +273,7 @@ def insert_vector_casts(ast_node):
if
dtype
is
np
.
float32
:
default_type
=
'float'
expr
=
sp
.
Mul
(
dtype
(
expr
.
args
[
0
]),
*
expr
.
args
[
1
:])
new_args
=
[
visit_expr
(
a
)
for
a
in
expr
.
args
]
new_args
=
[
visit_expr
(
a
,
default_type
)
for
a
in
expr
.
args
]
arg_types
=
[
get_type_of_expression
(
a
,
default_float_type
=
default_type
)
for
a
in
new_args
]
if
not
any
(
type
(
t
)
is
VectorType
for
t
in
arg_types
):
return
expr
...
...
@@ -285,11 +284,11 @@ def insert_vector_casts(ast_node):
for
a
,
t
in
zip
(
new_args
,
arg_types
)]
return
expr
.
func
(
*
casted_args
)
elif
expr
.
func
is
sp
.
Pow
:
new_arg
=
visit_expr
(
expr
.
args
[
0
])
new_arg
=
visit_expr
(
expr
.
args
[
0
]
,
default_type
)
return
expr
.
func
(
new_arg
,
expr
.
args
[
1
])
elif
expr
.
func
==
sp
.
Piecewise
:
new_results
=
[
visit_expr
(
a
[
0
])
for
a
in
expr
.
args
]
new_conditions
=
[
visit_expr
(
a
[
1
])
for
a
in
expr
.
args
]
new_results
=
[
visit_expr
(
a
[
0
]
,
default_type
)
for
a
in
expr
.
args
]
new_conditions
=
[
visit_expr
(
a
[
1
]
,
default_type
)
for
a
in
expr
.
args
]
types_of_results
=
[
get_type_of_expression
(
a
)
for
a
in
new_results
]
types_of_conditions
=
[
get_type_of_expression
(
a
)
for
a
in
new_conditions
]
...
...
@@ -311,14 +310,14 @@ def insert_vector_casts(ast_node):
else
:
return
expr
def
visit_node
(
node
,
substitution_dict
):
def
visit_node
(
node
,
substitution_dict
,
default_type
=
'double'
):
substitution_dict
=
substitution_dict
.
copy
()
for
arg
in
node
.
args
:
if
isinstance
(
arg
,
ast
.
SympyAssignment
):
assignment
=
arg
subs_expr
=
fast_subs
(
assignment
.
rhs
,
substitution_dict
,
skip
=
lambda
e
:
isinstance
(
e
,
ast
.
ResolvedFieldAccess
))
assignment
.
rhs
=
visit_expr
(
subs_expr
)
assignment
.
rhs
=
visit_expr
(
subs_expr
,
default_type
)
rhs_type
=
get_type_of_expression
(
assignment
.
rhs
)
if
isinstance
(
assignment
.
lhs
,
TypedSymbol
):
lhs_type
=
assignment
.
lhs
.
dtype
...
...
@@ -328,13 +327,13 @@ def insert_vector_casts(ast_node):
substitution_dict
[
assignment
.
lhs
]
=
new_lhs
assignment
.
lhs
=
new_lhs
elif
isinstance
(
assignment
.
lhs
,
vector_memory_access
):
assignment
.
lhs
=
visit_expr
(
assignment
.
lhs
)
assignment
.
lhs
=
visit_expr
(
assignment
.
lhs
,
default_type
)
elif
isinstance
(
arg
,
ast
.
Conditional
):
arg
.
condition_expr
=
fast_subs
(
arg
.
condition_expr
,
substitution_dict
,
skip
=
lambda
e
:
isinstance
(
e
,
ast
.
ResolvedFieldAccess
))
arg
.
condition_expr
=
visit_expr
(
arg
.
condition_expr
)
visit_node
(
arg
,
substitution_dict
)
arg
.
condition_expr
=
visit_expr
(
arg
.
condition_expr
,
default_type
)
visit_node
(
arg
,
substitution_dict
,
default_type
)
else
:
visit_node
(
arg
,
substitution_dict
)
visit_node
(
arg
,
substitution_dict
,
default_type
)
visit_node
(
ast_node
,
{})
visit_node
(
ast_node
,
{}
,
default_float_type
)
pystencils/data_types.py
View file @
8e9bded0
...
...
@@ -634,10 +634,10 @@ class BasicType(Type):
return
'ComplexDouble'
elif
name
.
startswith
(
'int'
):
width
=
int
(
name
[
len
(
"int"
):])
return
"int
%d_t"
%
(
width
,)
return
f
"int
{
width
}
_t"
elif
name
.
startswith
(
'uint'
):
width
=
int
(
name
[
len
(
"uint"
):])
return
"uint
%d_t"
%
(
width
,)
return
f
"uint
{
width
}
_t"
elif
name
==
'bool'
:
return
'bool'
else
:
...
...
@@ -736,7 +736,7 @@ class VectorType(Type):
def
__str__
(
self
):
if
self
.
instruction_set
is
None
:
return
"%s[%s]"
%
(
self
.
base_type
,
self
.
width
)
return
f
"
{
self
.
base_type
}
[
{
self
.
width
}
]"
else
:
if
self
.
base_type
==
create_type
(
"int64"
)
or
self
.
base_type
==
create_type
(
"int32"
):
return
self
.
instruction_set
[
'int'
]
...
...
pystencils_tests/test_vectorization.py
View file @
8e9bded0
...
...
@@ -295,3 +295,4 @@ def test_issue40(*_):
code
=
ps
.
get_code_str
(
ast
)
assert
'epi32'
not
in
code
pystencils_tests/test_vectorization_specific.py
View file @
8e9bded0
...
...
@@ -6,7 +6,7 @@ import sympy as sp
import
pystencils
as
ps
from
pystencils.backends.simd_instruction_sets
import
(
get_cacheline_size
,
get_supported_instruction_sets
,
get_vector_instruction_set
)
from
pystencils.data_types
import
ca
st_
func
,
VectorType
from
.
import
te
st_
vectorization
from
pystencils.enums
import
Target
supported_instruction_sets
=
get_supported_instruction_sets
()
if
get_supported_instruction_sets
()
else
[]
...
...
@@ -116,15 +116,33 @@ def test_cacheline_size(instruction_set):
pytest
.
skip
()
instruction_set
=
get_vector_instruction_set
(
'double'
,
instruction_set
)
vector_size
=
instruction_set
[
'bytes'
]
assert
cacheline_size
>
8
and
cacheline_size
<
0x100000
,
"Cache line size is implausible"
assert
8
<
cacheline_size
<
0x100000
,
"Cache line size is implausible"
if
type
(
vector_size
)
is
int
:
assert
cacheline_size
%
vector_size
==
0
,
"Cache line size should be multiple of vector size"
assert
cacheline_size
&
(
cacheline_size
-
1
)
==
0
,
"Cache line size is not a power of 2"
# test_vectorization is not parametrized because it is supposed to run without pytest, so we parametrize it here
from
pystencils_tests
import
test_vectorization
@
pytest
.
mark
.
parametrize
(
'instruction_set'
,
sorted
(
set
(
supported_instruction_sets
)
-
set
([
test_vectorization
.
instruction_set
])))
@
pytest
.
mark
.
parametrize
(
'function'
,
[
f
for
f
in
test_vectorization
.
__dict__
if
f
.
startswith
(
'test_'
)
and
f
!=
'test_hardware_query'
])
@
pytest
.
mark
.
parametrize
(
'instruction_set'
,
sorted
(
set
(
supported_instruction_sets
)
-
{
test_vectorization
.
instruction_set
}))
@
pytest
.
mark
.
parametrize
(
'function'
,
[
f
for
f
in
test_vectorization
.
__dict__
if
f
.
startswith
(
'test_'
)
and
f
!=
'test_hardware_query'
])
def
test_vectorization_other
(
instruction_set
,
function
):
test_vectorization
.
__dict__
[
function
](
instruction_set
)
@
pytest
.
mark
.
parametrize
(
'dtype'
,
(
'float'
,
'double'
))
@
pytest
.
mark
.
parametrize
(
'instruction_set'
,
supported_instruction_sets
)
@
pytest
.
mark
.
parametrize
(
'field_layout'
,
(
'fzyx'
,
'zyxf'
))
def
test_square_root
(
dtype
,
instruction_set
,
field_layout
):
config
=
ps
.
CreateKernelConfig
(
data_type
=
dtype
,
cpu_vectorize_info
=
{
'instruction_set'
:
instruction_set
,
'assume_inner_stride_one'
:
True
,
'assume_aligned'
:
False
,
'nontemporal'
:
False
})
src_field
=
ps
.
Field
.
create_generic
(
'pdfs'
,
2
,
dtype
,
index_dimensions
=
1
,
layout
=
field_layout
,
index_shape
=
(
9
,))
eq
=
[
ps
.
Assignment
(
sp
.
Symbol
(
"xi"
),
sum
(
src_field
.
center_vector
)),
ps
.
Assignment
(
sp
.
Symbol
(
"xi_2"
),
sp
.
Symbol
(
"xi"
)
*
sp
.
sqrt
(
src_field
.
center
))]
ps
.
create_kernel
(
eq
,
config
=
config
).
compile
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment