Commit fb99632c authored by Martin Bauer's avatar Martin Bauer
Browse files

Added non-constant field-sizes vectorization support to outer interface

parent b27e94c9
import sympy as sp
import warnings
from typing import Union, Container
from pystencils.backends.simd_instruction_sets import get_vector_instruction_set
from pystencils.integer_functions import modulo_floor
from pystencils.sympyextensions import fast_subs
from pystencils.data_types import TypedSymbol, VectorType, get_type_of_expression, vector_memory_access, cast_func, \
collate_types, PointerType
import pystencils.astnodes as ast
from pystencils.transformations import cut_loop, filtered_tree_iteration
from pystencils.transformations import cut_loop, filtered_tree_iteration, replace_inner_stride_with_one
from pystencils.field import Field
def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False):
assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
assume_inner_stride_one: bool = False):
"""Explicit vectorization using SIMD vectorization via intrinsics.
......@@ -27,7 +26,10 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
instructions have to be used.
nontemporal: a container of fields or field names for which nontemporal (streaming) stores are used.
If true, nontemporal access instructions are used for all fields.
assume_inner_stride_one: kernels with non-constant inner loop bound and strides can not be vectorized since
the inner loop stride is a runtime variable and thus might not be always 1.
If this parameter is set to true, the the inner stride is assumed to be always one.
This has to be ensured at runtime!
all_fields = kernel_ast.fields_accessed
if nontemporal is None or nontemporal is False:
......@@ -35,6 +37,9 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
elif nontemporal is True:
nontemporal = all_fields
if assume_inner_stride_one:
field_float_dtypes = set(f.dtype for f in all_fields if f.dtype.is_float())
if len(field_float_dtypes) != 1:
raise NotImplementedError("Cannot vectorize kernels that contain accesses "
......@@ -34,7 +34,7 @@ class DataHandling(ABC):
def add_array(self, name: str, values_per_cell: int = 1, dtype=np.float64,
latex_name: Optional[str]=None, ghost_layers: Optional[int] = None, layout: Optional[str] = None,
cpu: bool = True, gpu: Optional[bool] = None) -> Field:
cpu: bool = True, gpu: Optional[bool] = None, alignment=False) -> Field:
"""Adds a (possibly distributed) array to the handling that can be accessed using the given name.
For each array a symbolic field is available via the 'fields' dictionary
......@@ -52,7 +52,7 @@ class DataHandling(ABC):
this is only important if values_per_cell > 1
cpu: allocate field on the CPU
gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
alignment: either False for no alignment, or the number of bytes to align to
pystencils field, that can be used to formulate symbolic kernels
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment