Skip to content
Snippets Groups Projects
Commit fb99632c authored by Martin Bauer's avatar Martin Bauer
Browse files

Added non-constant field-sizes vectorization support to outer interface

parent b27e94c9
No related merge requests found
import sympy as sp import sympy as sp
import warnings import warnings
from typing import Union, Container from typing import Union, Container
from pystencils.backends.simd_instruction_sets import get_vector_instruction_set from pystencils.backends.simd_instruction_sets import get_vector_instruction_set
from pystencils.integer_functions import modulo_floor from pystencils.integer_functions import modulo_floor
from pystencils.sympyextensions import fast_subs from pystencils.sympyextensions import fast_subs
from pystencils.data_types import TypedSymbol, VectorType, get_type_of_expression, vector_memory_access, cast_func, \ from pystencils.data_types import TypedSymbol, VectorType, get_type_of_expression, vector_memory_access, cast_func, \
collate_types, PointerType collate_types, PointerType
import pystencils.astnodes as ast import pystencils.astnodes as ast
from pystencils.transformations import cut_loop, filtered_tree_iteration from pystencils.transformations import cut_loop, filtered_tree_iteration, replace_inner_stride_with_one
from pystencils.field import Field from pystencils.field import Field
def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False): assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
assume_inner_stride_one: bool = False):
"""Explicit vectorization using SIMD vectorization via intrinsics. """Explicit vectorization using SIMD vectorization via intrinsics.
Args: Args:
...@@ -27,7 +26,10 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', ...@@ -27,7 +26,10 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
instructions have to be used. instructions have to be used.
nontemporal: a container of fields or field names for which nontemporal (streaming) stores are used. nontemporal: a container of fields or field names for which nontemporal (streaming) stores are used.
If true, nontemporal access instructions are used for all fields. If true, nontemporal access instructions are used for all fields.
assume_inner_stride_one: kernels with non-constant inner loop bound and strides can not be vectorized since
the inner loop stride is a runtime variable and thus might not be always 1.
If this parameter is set to true, the the inner stride is assumed to be always one.
This has to be ensured at runtime!
""" """
all_fields = kernel_ast.fields_accessed all_fields = kernel_ast.fields_accessed
if nontemporal is None or nontemporal is False: if nontemporal is None or nontemporal is False:
...@@ -35,6 +37,9 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', ...@@ -35,6 +37,9 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
elif nontemporal is True: elif nontemporal is True:
nontemporal = all_fields nontemporal = all_fields
if assume_inner_stride_one:
replace_inner_stride_with_one(kernel_ast)
field_float_dtypes = set(f.dtype for f in all_fields if f.dtype.is_float()) field_float_dtypes = set(f.dtype for f in all_fields if f.dtype.is_float())
if len(field_float_dtypes) != 1: if len(field_float_dtypes) != 1:
raise NotImplementedError("Cannot vectorize kernels that contain accesses " raise NotImplementedError("Cannot vectorize kernels that contain accesses "
......
...@@ -34,7 +34,7 @@ class DataHandling(ABC): ...@@ -34,7 +34,7 @@ class DataHandling(ABC):
@abstractmethod @abstractmethod
def add_array(self, name: str, values_per_cell: int = 1, dtype=np.float64, def add_array(self, name: str, values_per_cell: int = 1, dtype=np.float64,
latex_name: Optional[str]=None, ghost_layers: Optional[int] = None, layout: Optional[str] = None, latex_name: Optional[str]=None, ghost_layers: Optional[int] = None, layout: Optional[str] = None,
cpu: bool = True, gpu: Optional[bool] = None) -> Field: cpu: bool = True, gpu: Optional[bool] = None, alignment=False) -> Field:
"""Adds a (possibly distributed) array to the handling that can be accessed using the given name. """Adds a (possibly distributed) array to the handling that can be accessed using the given name.
For each array a symbolic field is available via the 'fields' dictionary For each array a symbolic field is available via the 'fields' dictionary
...@@ -52,7 +52,7 @@ class DataHandling(ABC): ...@@ -52,7 +52,7 @@ class DataHandling(ABC):
this is only important if values_per_cell > 1 this is only important if values_per_cell > 1
cpu: allocate field on the CPU cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu' gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
alignment: either False for no alignment, or the number of bytes to align to
Returns: Returns:
pystencils field, that can be used to formulate symbolic kernels pystencils field, that can be used to formulate symbolic kernels
""" """
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment