From bb3febb103a863a46376de7ee8ddfa4024bbc854 Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Tue, 2 Nov 2021 20:51:25 +0000
Subject: [PATCH] Small clean up

---
 .gitignore                             |  1 +
 README.md                              | 11 ++-
 doc/sphinx/api.rst                     |  1 +
 doc/sphinx/enums.rst                   |  6 ++
 doc/sphinx/kernel_compile_and_call.rst |  3 +-
 pystencils/backends/cbackend.py        |  4 +-
 pystencils/data_types.py               |  2 +-
 pystencils/enums.py                    | 29 ++++++++
 pystencils/fast_approximation.py       |  5 +-
 pystencils/field.py                    | 35 ++++------
 pystencils/kernelcreation.py           | 97 ++++++++++++++++++--------
 pystencils_tests/test_field.py         |  2 +-
 pystencils_tests/test_fvm.py           |  2 +-
 pystencils_tests/test_vectorization.py |  2 +-
 pytest.ini                             |  2 +
 15 files changed, 138 insertions(+), 64 deletions(-)
 create mode 100644 doc/sphinx/enums.rst

diff --git a/.gitignore b/.gitignore
index b890ec828..d50d80ff9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@ __pycache__
 /*.egg-info
 .cache
 _build
+/html_doc
 /.idea
 .vscode
 .cache
diff --git a/README.md b/README.md
index bd171e2b2..e3e54f55a 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,9 @@ kernel(f=f_arr, g=g_arr)
 It comes with automatic finite difference discretization for PDEs:
 
 ```python
+import pystencils as ps
+import sympy as sp
+
 c, v = ps.fields("c, v(2): [2D]")
 adv_diff_pde = ps.fd.transient(c) - ps.fd.diffusion(c, sp.symbols("D")) + ps.fd.advection(c, v)
 discretize = ps.fd.Discretization2ndOrder(dx=1, dt=0.01)
@@ -49,20 +52,22 @@ pip install pystencils[interactive]
 Without `[interactive]` you get a minimal version with very little dependencies.
 
 All options:
--  `gpu`: use this if an NVIDIA GPU is available and CUDA is installed
--  `opencl`: basic OpenCL support (experimental)
+- `gpu`: use this if an NVIDIA GPU is available and CUDA is installed
+- `opencl`: basic OpenCL support (experimental)
 - `alltrafos`: pulls in additional dependencies for loop simplification e.g. libisl
 - `bench_db`: functionality to store benchmark result in object databases
 - `interactive`: installs dependencies to work in Jupyter including image I/O, plotting etc.
 - `autodiff`: enable derivation of adjoint kernels and generation of Torch/Tensorflow operations
 - `doc`: packages to build documentation
+- `kerncraft`: use kerncraft for automatic performance analysis
+- `llvm_jit`: llvmlite as additional CPU backend
 
 Options can be combined e.g.
 ```bash
 pip install pystencils[interactive, gpu, doc]
 ```
 
-pystencils is also fully compatible with Windows machines. If working with visual studio, a pycuda makes sure to run example files first to ensure that pycuda can find the compiler's executable.
+pystencils is also fully compatible with Windows machines. If working with visual studio and pycuda makes sure to run example files first to ensure that pycuda can find the compiler's executable.
 
 Documentation
 -------------
diff --git a/doc/sphinx/api.rst b/doc/sphinx/api.rst
index 325d83b54..ce8b1da0c 100644
--- a/doc/sphinx/api.rst
+++ b/doc/sphinx/api.rst
@@ -5,6 +5,7 @@ API Reference
    :maxdepth: 3
 
    kernel_compile_and_call.rst
+   enums.rst
    simplifications.rst
    datahandling.rst
    configuration.rst
diff --git a/doc/sphinx/enums.rst b/doc/sphinx/enums.rst
new file mode 100644
index 000000000..d882f892f
--- /dev/null
+++ b/doc/sphinx/enums.rst
@@ -0,0 +1,6 @@
+************
+Enumerations
+************
+
+.. automodule:: pystencils.enums
+   :members:
diff --git a/doc/sphinx/kernel_compile_and_call.rst b/doc/sphinx/kernel_compile_and_call.rst
index fbe3e2511..5293dc5a7 100644
--- a/doc/sphinx/kernel_compile_and_call.rst
+++ b/doc/sphinx/kernel_compile_and_call.rst
@@ -8,7 +8,8 @@ Creating kernels
 
 .. autofunction:: pystencils.create_kernel
 
-.. autofunction:: pystencils.CreateKernelConfig
+.. autoclass:: pystencils.CreateKernelConfig
+    :members:
 
 .. autofunction:: pystencils.create_domain_kernel
 
diff --git a/pystencils/backends/cbackend.py b/pystencils/backends/cbackend.py
index 9500fba89..695d5fb21 100644
--- a/pystencils/backends/cbackend.py
+++ b/pystencils/backends/cbackend.py
@@ -217,10 +217,10 @@ class CBackend:
         if isinstance(node, str):
             return node
         for cls in type(node).__mro__:
-            method_name = "_print_" + cls.__name__
+            method_name = f"_print_{cls.__name__}"
             if hasattr(self, method_name):
                 return getattr(self, method_name)(node)
-        raise NotImplementedError(self.__class__.__name__ + " does not support node of type " + node.__class__.__name__)
+        raise NotImplementedError(f"{self.__class__.__name__} does not support node of type {node.__class__.__name__}")
 
     def _print_Type(self, node):
         return str(node)
diff --git a/pystencils/data_types.py b/pystencils/data_types.py
index 295afe089..4816b7e41 100644
--- a/pystencils/data_types.py
+++ b/pystencils/data_types.py
@@ -39,7 +39,7 @@ def matrix_symbols(names, dtype, rows, cols):
 
     matrices = []
     for n in names:
-        symbols = typed_symbols("%s:%i" % (n, rows * cols), dtype)
+        symbols = typed_symbols(f"{n}:{rows * cols}", dtype)
         matrices.append(sp.Matrix(rows, cols, lambda i, j: symbols[i * cols + j]))
 
     return tuple(matrices)
diff --git a/pystencils/enums.py b/pystencils/enums.py
index afe20bb26..dff190217 100644
--- a/pystencils/enums.py
+++ b/pystencils/enums.py
@@ -2,13 +2,42 @@ from enum import Enum, auto
 
 
 class Target(Enum):
+    """
+    The Target enumeration represents all possible targets that can be used for the code generation.
+    """
     CPU = auto()
+    """
+    Target CPU architecture.
+    """
     GPU = auto()
+    """
+    Target GPU architecture.
+    """
     OPENCL = auto()
+    """
+    Target all architectures OpenCL covers (Thus both, Target and Backend)
+    """
 
 
 class Backend(Enum):
+    """
+    The Backend enumeration represents all possible backends that can be used for the code generation.
+    Backends and targets must be combined with care. For example CPU as a target and CUDA as a backend makes no sense.
+    """
     C = auto()
+    """
+    Use the C Backend of pystencils.
+    """
     LLVM = auto()
+    r"""
+    Use the ``llvmlite`` package to transform the pystensilc AST to the LLVM ir. 
+    From this point all of LLVMs optimisations can be used.
+    """
     CUDA = auto()
+    """
+    Use the CUDA backend to generate code for NVIDIA GPUs.
+    """
     OPENCL = auto()
+    """
+    Use the OpenCL backend to generate code for OpenCL.
+    """
diff --git a/pystencils/fast_approximation.py b/pystencils/fast_approximation.py
index f33009600..9eee41a96 100644
--- a/pystencils/fast_approximation.py
+++ b/pystencils/fast_approximation.py
@@ -4,6 +4,7 @@ import sympy as sp
 
 from pystencils.astnodes import Node
 from pystencils.simp import AssignmentCollection
+from pystencils.assignment import Assignment
 
 
 # noinspection PyPep8Naming
@@ -32,7 +33,7 @@ def _run(term, visitor):
         return visitor(term)
 
 
-def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection]):
+def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection, Assignment]):
     def visit(expr):
         if isinstance(expr, Node):
             return expr
@@ -48,7 +49,7 @@ def insert_fast_sqrts(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection])
     return _run(term, visit)
 
 
-def insert_fast_divisions(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection]):
+def insert_fast_divisions(term: Union[sp.Expr, List[sp.Expr], AssignmentCollection, Assignment]):
 
     def visit(expr):
         if isinstance(expr, Node):
diff --git a/pystencils/field.py b/pystencils/field.py
index 82882c548..632d6c5d5 100644
--- a/pystencils/field.py
+++ b/pystencils/field.py
@@ -138,7 +138,6 @@ def fields(description=None, index_dimensions=0, layout=None, field_type=FieldTy
 
 
 class AbstractField:
-
     class AbstractAccess:
         pass
 
@@ -432,8 +431,8 @@ class Field(AbstractField):
         elif len(index_shape) == 2:
             return sp.Matrix([[self(i, j) for j in range(index_shape[1])] for i in range(index_shape[0])])
         elif len(index_shape) == 3:
-            return sp.Matrix([[[self(i, j, k) for k in range(index_shape[2])]
-                               for j in range(index_shape[1])] for i in range(index_shape[0])])
+            return sp.Array([[[self(i, j, k) for k in range(index_shape[2])]
+                              for j in range(index_shape[1])] for i in range(index_shape[0])])
         else:
             raise NotImplementedError("center_vector is not implemented for more than 3 index dimensions")
 
@@ -466,8 +465,7 @@ class Field(AbstractField):
         if type(offset) is not tuple:
             offset = (offset,)
         if len(offset) != self.spatial_dimensions:
-            raise ValueError("Wrong number of spatial indices: "
-                             "Got %d, expected %d" % (len(offset), self.spatial_dimensions))
+            raise ValueError(f"Wrong number of spatial indices: Got {len(offset)}, expected {self.spatial_dimensions}")
         return Field.Access(self, offset)
 
     def absolute_access(self, offset, index):
@@ -511,8 +509,7 @@ class Field(AbstractField):
             offset = tuple(direction_string_to_offset(offset, self.spatial_dimensions))
             offset = tuple([o * sp.Rational(1, 2) for o in offset])
         if len(offset) != self.spatial_dimensions:
-            raise ValueError("Wrong number of spatial indices: "
-                             "Got %d, expected %d" % (len(offset), self.spatial_dimensions))
+            raise ValueError(f"Wrong number of spatial indices: Got {len(offset)}, expected {self.spatial_dimensions}")
 
         prefactor = 1
         neighbor_vec = [0] * len(offset)
@@ -526,8 +523,7 @@ class Field(AbstractField):
             if FieldType.is_staggered_flux(self):
                 prefactor = -1
         if neighbor not in self.staggered_stencil:
-            raise ValueError("{} is not a valid neighbor for the {} stencil".format(offset_orig,
-                                                                                    self.staggered_stencil_name))
+            raise ValueError(f"{offset_orig} is not a valid neighbor for the {self.staggered_stencil_name} stencil")
 
         offset = tuple(sp.Matrix(offset) - sp.Rational(1, 2) * sp.Matrix(neighbor_vec))
 
@@ -539,15 +535,13 @@ class Field(AbstractField):
             return prefactor * Field.Access(self, offset, (idx,))
         else:  # this field stores a vector or tensor at each staggered position
             if index is None:
-                raise ValueError("Wrong number of indices: "
-                                 "Got %d, expected %d" % (0, self.index_dimensions - 1))
+                raise ValueError(f"Wrong number of indices: Got 0, expected {self.index_dimensions - 1}")
             if type(index) is np.ndarray:
                 index = tuple(index)
             if type(index) is not tuple:
                 index = (index,)
             if self.index_dimensions != len(index) + 1:
-                raise ValueError("Wrong number of indices: "
-                                 "Got %d, expected %d" % (len(index), self.index_dimensions - 1))
+                raise ValueError(f"Wrong number of indices: Got {len(index)}, expected {self.index_dimensions - 1}")
 
             return prefactor * Field.Access(self, offset, (idx, *index))
 
@@ -587,7 +581,7 @@ class Field(AbstractField):
     @property
     def staggered_stencil_name(self):
         assert FieldType.is_staggered(self)
-        return "D%dQ%d" % (self.spatial_dimensions, self.index_shape[0] * 2 + 1)
+        return f"D{self.spatial_dimensions}Q{self.index_shape[0] * 2 + 1}"
 
     def __call__(self, *args, **kwargs):
         center = tuple([0] * self.spatial_dimensions)
@@ -747,8 +741,7 @@ class Field(AbstractField):
                 idx = ()
 
             if len(idx) != self.field.index_dimensions:
-                raise ValueError("Wrong number of indices: "
-                                 "Got %d, expected %d" % (len(idx), self.field.index_dimensions))
+                raise ValueError(f"Wrong number of indices: Got {len(idx)}, expected {self.field.index_dimensions}")
             return Field.Access(self.field, self._offsets, idx, dtype=self.dtype)
 
         def __getitem__(self, *idx):
@@ -870,15 +863,14 @@ class Field(AbstractField):
 
             if FieldType.is_staggered(self._field):
                 if self.index and self.field.index_dimensions > 1:
-                    return "{{%s}_{%s}^{%s}}" % (n, offset_str, self.index[1:]
-                                                 if len(self.index) > 2 else self.index[1])
+                    return f"{{{n}_{offset_str}^{self.index[1:] if len(self.index) > 2 else self.index[1]}}}"
                 else:
-                    return "{{%s}_{%s}}" % (n, offset_str)
+                    return f"{{{n}_{offset_str}}}"
             else:
                 if self.index and self.field.index_dimensions > 0:
-                    return "{{%s}_{%s}^{%s}}" % (n, offset_str, self.index if len(self.index) > 1 else self.index[0])
+                    return f"{{{n}_{offset_str}^{self.index if len(self.index) > 1 else self.index[0]}}}"
                 else:
-                    return "{{%s}_{%s}}" % (n, offset_str)
+                    return f"{{{n}_{offset_str}}}"
 
         def __str__(self):
             n = self._field.latex_name if self._field.latex_name else self._field.name
@@ -1060,7 +1052,6 @@ def _parse_part1(d):
 
 
 def _parse_description(description):
-
     def parse_part2(d):
         result = type_description_regex.match(d)
         if result:
diff --git a/pystencils/kernelcreation.py b/pystencils/kernelcreation.py
index 19e0180ee..7a7c5cf1d 100644
--- a/pystencils/kernelcreation.py
+++ b/pystencils/kernelcreation.py
@@ -22,56 +22,93 @@ from pystencils.transformations import (
 @dataclass
 class CreateKernelConfig:
     """
-        target: One of Target's enums
-        backend: One of Backend's enums
-        function_name: name of the generated function - only important if generated code is written out
-        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
-                  to type
-        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
-                         part of the field is iterated over
-        ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of
-                      pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
-                      If left to default, the number of ghost layers is determined automatically.
-        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
-                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
-        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
-        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
-                            for documentation of these parameters see vectorize function. Example:
-                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
-        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
-        omp_single_loop: if OpenMP is active: whether multiple outer loops are permitted
-        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
-        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
-                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'
-        use_textures_for_interpolation:
-        cpu_prepend_optimizations: list of extra optimizations to perform first on the AST
-        use_auto_for_assignments:
-        opencl_queue:
-        opencl_ctx:
-        index_fields: list of index fields, i.e. 1D fields with struct data type. If not None, `create_index_kernel`
-                      instead of `create_domain_kernel` is used.
-        coordinate_names: name of the coordinate fields in the struct data type
+    **Below all parameters for the CreateKernelConfig are explained**
     """
     target: Target = Target.CPU
+    """
+    All targets are defined in :class:`pystencils.enums.Target`
+    """
     backend: Backend = None
+    """
+    All backends are defined in :class:`pystencils.enums.Backend`
+    """
     function_name: str = 'kernel'
+    """
+    Name of the generated function - only important if generated code is written out
+    """
     data_type: Union[str, dict] = 'double'
+    """
+    Data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name to type
+    """
     iteration_slice: Tuple = None
+    """
+    Rectangular subset to iterate over, if not specified the complete non-ghost layer part of the field is iterated over
+    """
     ghost_layers: Union[bool, int, List[Tuple[int]]] = None
+    """
+    A single integer specifies the ghost layer count at all borders, can also be a sequence of
+    pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration.
+    If left to default, the number of ghost layers is determined automatically from the assignments.
+    """
     skip_independence_check: bool = False
-    cpu_openmp: bool = False
+    """
+    Don't check that loop iterations are independent. This is needed e.g. for 
+    periodicity kernel, that access the field outside the iteration bounds. Use with care!
+    """
+    cpu_openmp: Union[bool, int] = False
+    """
+    `True` or number of threads for OpenMP parallelization, `False` for no OpenMP. If set to `True`, the maximum number
+    of available threads will be chosen.
+    """
     cpu_vectorize_info: Dict = None
+    """
+    A dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
+    for documentation of these parameters see vectorize function. Example:
+    '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
+    """
     cpu_blocking: Tuple[int] = None
+    """
+    A tuple of block sizes or `None` if no blocking should be applied
+    """
     omp_single_loop: bool = True
+    """
+    If OpenMP is active: whether multiple outer loops are permitted
+    """
     gpu_indexing: str = 'block'
+    """
+    Either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
+    """
     gpu_indexing_params: MappingProxyType = field(default=MappingProxyType({}))
+    """
+    Dict with indexing parameters (constructor parameters of indexing class)
+    e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'.
+    """
     use_textures_for_interpolation: bool = True
     cpu_prepend_optimizations: List[Callable] = field(default_factory=list)
+    """
+    List of extra optimizations to perform first on the AST.
+    """
     use_auto_for_assignments: bool = False
+    """
+    If set to `True`, auto can be used in the generated code for data types. This makes the type system more robust.
+    """
     opencl_queue: Any = None
+    """
+    OpenCL queue if OpenCL target is used.
+    """
     opencl_ctx: Any = None
+    """
+    OpenCL context if OpenCL target is used.
+    """
     index_fields: List[Field] = None
+    """
+    List of index fields, i.e. 1D fields with struct data type. If not `None`, `create_index_kernel`
+    instead of `create_domain_kernel` is used.
+    """
     coordinate_names: Tuple[str, Any] = ('x', 'y', 'z')
+    """
+    Name of the coordinate fields in the struct data type.
+    """
 
     def __post_init__(self):
         # ----  Legacy parameters
diff --git a/pystencils_tests/test_field.py b/pystencils_tests/test_field.py
index 1c57e6ecd..596f9f4da 100644
--- a/pystencils_tests/test_field.py
+++ b/pystencils_tests/test_field.py
@@ -48,7 +48,7 @@ def test_field_basic():
     assert '_' in neighbor._latex('dummy')
 
     f = Field.create_fixed_size('f', (8, 8, 2, 2, 2), index_dimensions=3)
-    assert f.center_vector == sp.Matrix([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
+    assert f.center_vector == sp.Array([[[f(i, j, k) for k in range(2)] for j in range(2)] for i in range(2)])
 
     f = Field.create_generic('f', spatial_dimensions=5, index_dimensions=2)
     field_access = f[1, -1, 2, -3, 0](1, 0)
diff --git a/pystencils_tests/test_fvm.py b/pystencils_tests/test_fvm.py
index f1c41631f..478a95308 100644
--- a/pystencils_tests/test_fvm.py
+++ b/pystencils_tests/test_fvm.py
@@ -117,8 +117,8 @@ def test_advection_diffusion_2d(velocity):
     advection_diffusion.runners[2](velocity)
 
 
-@pytest.mark.longrun
 @pytest.mark.parametrize("velocity", list(product([0, -0.047, 0.041], [0, -0.031, 0.023], [0, -0.017, 0.011])))
+@pytest.mark.longrun
 def test_advection_diffusion_3d(velocity):
     if 3 not in advection_diffusion.runners:
         advection_diffusion.runners[3] = advection_diffusion(3)
diff --git a/pystencils_tests/test_vectorization.py b/pystencils_tests/test_vectorization.py
index 1bbc6a4a4..2eebd65c7 100644
--- a/pystencils_tests/test_vectorization.py
+++ b/pystencils_tests/test_vectorization.py
@@ -220,7 +220,7 @@ def test_logical_operators(instruction_set=instruction_set):
 
 
 def test_hardware_query():
-    assert set(['sse', 'neon', 'sve', 'vsx', 'rvv']).intersection(supported_instruction_sets)
+    assert {'sse', 'neon', 'sve', 'vsx', 'rvv'}.intersection(supported_instruction_sets)
 
 
 def test_vectorised_pow(instruction_set=instruction_set):
diff --git a/pytest.ini b/pytest.ini
index 9d296405e..f77dd5ea5 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,6 +3,7 @@ python_files = test_*.py *_test.py scenario_*.py
 norecursedirs = *.egg-info .git .cache .ipynb_checkpoints htmlcov
 addopts = --doctest-modules --durations=20  --cov-config pytest.ini
 markers =
+       longrun: tests only run at night since they have large execution time
        kerncraft: tests depending on kerncraft
        notebook: mark for notebooks
 # these warnings all come from third party libraries.
@@ -13,6 +14,7 @@ filterwarnings =
        ignore:.*is a deprecated alias for the builtin `bool`:DeprecationWarning
        ignore:'contextfilter' is renamed to 'pass_context':DeprecationWarning
        ignore:Using or importing the ABCs from 'collections' instead of from 'collections.abc':DeprecationWarning
+       ignore:Animation was deleted without rendering anything:UserWarning
 
 [run]
 branch = True
-- 
GitLab