From fb99632c00d8c395cac06ecce97ca36a03fbecf9 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Thu, 17 May 2018 12:16:09 +0200
Subject: [PATCH] Added non-constant field-sizes vectorization support to outer
 interface

---
 cpu/vectorization.py                   | 15 ++++++++++-----
 datahandling/datahandling_interface.py |  4 ++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/cpu/vectorization.py b/cpu/vectorization.py
index 96486b3ea..ecc4ebc1a 100644
--- a/cpu/vectorization.py
+++ b/cpu/vectorization.py
@@ -1,20 +1,19 @@
 import sympy as sp
 import warnings
-
 from typing import Union, Container
-
 from pystencils.backends.simd_instruction_sets import get_vector_instruction_set
 from pystencils.integer_functions import modulo_floor
 from pystencils.sympyextensions import fast_subs
 from pystencils.data_types import TypedSymbol, VectorType, get_type_of_expression, vector_memory_access, cast_func, \
     collate_types, PointerType
 import pystencils.astnodes as ast
-from pystencils.transformations import cut_loop, filtered_tree_iteration
+from pystencils.transformations import cut_loop, filtered_tree_iteration, replace_inner_stride_with_one
 from pystencils.field import Field
 
 
 def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
-              assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False):
+              assume_aligned: bool = False, nontemporal: Union[bool, Container[Union[str, Field]]] = False,
+              assume_inner_stride_one: bool = False):
     """Explicit vectorization using SIMD vectorization via intrinsics.
 
     Args:
@@ -27,7 +26,10 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
                         instructions have to be used.
         nontemporal: a container of fields or field names for which nontemporal (streaming) stores are used.
                      If true, nontemporal access instructions are used for all fields.
-
+        assume_inner_stride_one: kernels with non-constant inner loop bound and strides can not be vectorized since
+                                 the inner loop stride is a runtime variable and thus might not be always 1.
+                                 If this parameter is set to true, the the inner stride is assumed to be always one.
+                                 This has to be ensured at runtime!
     """
     all_fields = kernel_ast.fields_accessed
     if nontemporal is None or nontemporal is False:
@@ -35,6 +37,9 @@ def vectorize(kernel_ast: ast.KernelFunction, instruction_set: str = 'avx',
     elif nontemporal is True:
         nontemporal = all_fields
 
+    if assume_inner_stride_one:
+        replace_inner_stride_with_one(kernel_ast)
+
     field_float_dtypes = set(f.dtype for f in all_fields if f.dtype.is_float())
     if len(field_float_dtypes) != 1:
         raise NotImplementedError("Cannot vectorize kernels that contain accesses "
diff --git a/datahandling/datahandling_interface.py b/datahandling/datahandling_interface.py
index f8c21ed01..f8c67c9b0 100644
--- a/datahandling/datahandling_interface.py
+++ b/datahandling/datahandling_interface.py
@@ -34,7 +34,7 @@ class DataHandling(ABC):
     @abstractmethod
     def add_array(self, name: str, values_per_cell: int = 1, dtype=np.float64,
                   latex_name: Optional[str]=None, ghost_layers: Optional[int] = None, layout: Optional[str] = None,
-                  cpu: bool = True, gpu: Optional[bool] = None) -> Field:
+                  cpu: bool = True, gpu: Optional[bool] = None, alignment=False) -> Field:
         """Adds a (possibly distributed) array to the handling that can be accessed using the given name.
 
         For each array a symbolic field is available via the 'fields' dictionary
@@ -52,7 +52,7 @@ class DataHandling(ABC):
                     this is only important if values_per_cell > 1
             cpu: allocate field on the CPU
             gpu: allocate field on the GPU, if None, a GPU field is allocated if default_target is 'gpu'
-
+            alignment: either False for no alignment, or the number of bytes to align to
         Returns:
             pystencils field, that can be used to formulate symbolic kernels
         """
-- 
GitLab