From fb99632c00d8c395cac06ecce97ca36a03fbecf9 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Thu, 17 May 2018 12:16:09 +0200 Subject: [PATCH] Added non-constant field-sizes vectorization support to outer interface --- cpu/vectorization.py | 15 ++++++++++----- datahandling/datahandling_interface.py | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cpu/vectorization.py b/cpu/vectorization.py index 96486b3ea..ecc4ebc1a 100644 --- a/cpu/vectorization.py +++ b/cpu/vectorization.py @@ -1,20 +1,19 @@ import sympy as sp import warnings - from typing import Union, Container - from pystencils.backends.simd_instruction_sets import get_vector_instruction_set from pystencils.integer_functions import modulo_floor from pystencils.sympyextensions import fast_subs from pystencils.data_types import TypedSymbol, VectorType, get_type_of_expression, vector_memory_access, cast_func, \ collate_types, PointerType import pystencils.astnodes as ast -from pystencils.transformations import cut_loop, filtered_tree_iteration +from pystencils.transformations import cut_loop, filtered_tree_iteration, replace_inner_stride_with_one from pystencils.field import Field def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', - assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False): + assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False, + assume_inner_stride_one: bool = False): """Explicit vectorization using SIMD vectorization via intrinsics. Args: @@ -27,7 +26,10 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', instructions have to be used. nontemporal: a container of fields or field names for which nontemporal (streaming) stores are used. If true, nontemporal access instructions are used for all fields. - + assume_inner_stride_one: kernels with non-constant inner loop bound and strides can not be vectorized since + the inner loop stride is a runtime variable and thus might not be always 1. + If this parameter is set to true, the the inner stride is assumed to be always one. + This has to be ensured at runtime! """ all_fields = kernel_ast.fields_accessed if nontemporal is None or nontemporal is False: @@ -35,6 +37,9 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx', elif nontemporal is True: nontemporal = all_fields + if assume_inner_stride_one: + replace_inner_stride_with_one(kernel_ast) + field_float_dtypes = set(f.dtype for f in all_fields if f.dtype.is_float()) if len(field_float_dtypes) != 1: raise NotImplementedError("Cannot vectorize kernels that contain accesses " diff --git a/datahandling/datahandling_interface.py b/datahandling/datahandling_interface.py index f8c21ed01..f8c67c9b0 100644 --- a/datahandling/datahandling_interface.py +++ b/datahandling/datahandling_interface.py @@ -34,7 +34,7 @@ class DataHandling(ABC): @abstractmethod def add_array(self, name: str, values_per_cell: int = 1, dtype=np.float64, latex_name: Optional[str]=None, ghost_layers: Optional[int] = None, layout: Optional[str] = None, - cpu: bool = True, gpu: Optional[bool] = None) -> Field: + cpu: bool = True, gpu: Optional[bool] = None, alignment=False) -> Field: """Adds a (possibly distributed) array to the handling that can be accessed using the given name. For each array a symbolic field is available via the 'fields' dictionary @@ -52,7 +52,7 @@ class DataHandling(ABC): this is only important if values_per_cell > 1 cpu: allocate field on the CPU gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu' - + alignment: either False for no alignment, or the number of bytes to align to Returns: pystencils field, that can be used to formulate symbolic kernels """ -- GitLab