From 1a37be50055e8093fa1ad10e19cb5524630280e3 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Sun, 29 Apr 2018 13:40:14 +0200 Subject: [PATCH] Extended scaling tutorial & more documentation cleanup --- boundaries/boundaryhandling.py | 2 +- cpu/cpujit.py | 2 +- fd/derivative.py | 15 +++++++------ field.py | 2 +- gpucuda/indexing.py | 40 ++++++++++++++++++++-------------- kernelcreation.py | 2 +- simp/simplifications.py | 4 ++-- 7 files changed, 38 insertions(+), 29 deletions(-) diff --git a/boundaries/boundaryhandling.py b/boundaries/boundaryhandling.py index f36cad5ff..264051a2a 100644 --- a/boundaries/boundaryhandling.py +++ b/boundaries/boundaryhandling.py @@ -310,7 +310,7 @@ class BoundaryHandling: gpu_version = gpu_version.boundary_object_to_index_list cpu_version = cpu_version.boundary_object_to_index_list for obj, cpu_arr in cpu_version.items(): - if obj not in gpu_version: + if obj not in gpu_version or gpu_version[obj].shape != cpu_arr.shape: gpu_version[obj] = gpuarray.to_gpu(cpu_arr) else: gpu_version[obj].set(cpu_arr) diff --git a/cpu/cpujit.py b/cpu/cpujit.py index ca915d38b..d20febad9 100644 --- a/cpu/cpujit.py +++ b/cpu/cpujit.py @@ -63,7 +63,7 @@ compiled into the shared library. Then, the same script can be run from the comp - **'read_from_shared_library'**: if true kernels are not compiled but assumed to be in the shared library - **'object_cache'**: path to a folder where intermediate files are stored - **'clear_cache_on_start'**: when true the cache is cleared on each start of a *pystencils* script -- **'shared_library'**: path to a shared library file, which is created if `read_from_shared_library=false` +- **'shared_library'**: path to a shared library file, which is created if 'read_from_shared_library=false' """ from __future__ import print_function import os diff --git a/fd/derivative.py b/fd/derivative.py index c2b497a3d..902bda803 100644 --- a/fd/derivative.py +++ b/fd/derivative.py @@ -108,14 +108,15 @@ class Diff(sp.Expr): class DiffOperator(sp.Expr): - """ - Un-applied differential, i.e. differential operator - Its args are: - - target: the differential is w.r.t to this variable. + """Un-applied differential, i.e. differential operator + + Args: + target: the differential is w.r.t to this variable. This target is mainly for display purposes (its the subscript) and to distinguish DiffOperators If the target is '-1' no subscript is displayed - - superscript: optional marker displayed as superscript - is not displayed if set to '-1' + superscript: optional marker displayed as superscript + is not displayed if set to '-1' + The DiffOperator behaves much like a variable with special name. Its main use is to be applied later, using the DiffOperator.apply(expr, arg) which transforms 'DiffOperator's to applied 'Diff's """ @@ -178,7 +179,7 @@ class DiffOperator(sp.Expr): def diff_terms(expr): """Returns set of all derivatives in an expression. - This function yields different results than `expr.atoms(Diff)` when nested derivatives are in the expression, + This function yields different results than 'expr.atoms(Diff)' when nested derivatives are in the expression, since this function only returns the outer derivatives """ result = set() diff --git a/field.py b/field.py index 69513a3ab..38bdcdc16 100644 --- a/field.py +++ b/field.py @@ -112,7 +112,7 @@ class Field: Creating Fields: The preferred method to create fields is the `fields` function. Alternatively one can use one of the static functions `Field.create_generic`, `Field.create_from_numpy_array` - and `Field.create_fixed_size`. Don't instantiate the Field directly! + and `Field.create_fixed_size`. Don't instantiate the Field directly! Fields can be created with known or unknown shapes: 1. If you want to create a kernel with fixed loop sizes i.e. the shape of the array is already known. diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py index d39a54688..c85c411e4 100644 --- a/gpucuda/indexing.py +++ b/gpucuda/indexing.py @@ -35,21 +35,27 @@ class AbstractIndexing(abc.ABC): @abc.abstractmethod def call_parameters(self, arr_shape): - """ - Determine grid and block size for kernel call - :param arr_shape: the numeric (not symbolic) shape of the array - :return: dict with keys 'blocks' and 'threads' with tuple values for number of (x,y,z) threads and blocks - the kernel should be started with + """Determine grid and block size for kernel call. + + Args: + arr_shape: the numeric (not symbolic) shape of the array + Returns: + dict with keys 'blocks' and 'threads' with tuple values for number of (x,y,z) threads and blocks + the kernel should be started with """ @abc.abstractmethod def guard(self, kernel_content, arr_shape): - """ - In some indexing schemes not all threads of a block execute the kernel content. + """In some indexing schemes not all threads of a block execute the kernel content. + This function can return a Conditional ast node, defining this execution guard. - :param kernel_content: the actual kernel contents which can e.g. be put into the Conditional node as true block - :param arr_shape: the numeric or symbolic shape of the field - :return: ast node, which is put inside the kernel function + + Args: + kernel_content: the actual kernel contents which can e.g. be put into the Conditional node as true block + arr_shape: the numeric or symbolic shape of the field + + Returns: + ast node, which is put inside the kernel function """ @@ -116,12 +122,14 @@ class BlockIndexing(AbstractIndexing): @staticmethod def limit_block_size_to_device_maximum(block_size): - """ - Changes block size according to match device limits according to the following rules: - 1) if the total amount of threads is too big for the current device, the biggest coordinate is divided by 2. - 2) next, if one component is still too big, the component which is too big is divided by 2 and the smallest - component is multiplied by 2, such that the total amount of threads stays the same - Returns the altered block_size + """Changes block size according to match device limits. + + * if the total amount of threads is too big for the current device, the biggest coordinate is divided by 2. + * next, if one component is still too big, the component which is too big is divided by 2 and the smallest + component is multiplied by 2, such that the total amount of threads stays the same + + Returns: + the altered block_size """ # Get device limits import pycuda.driver as cuda diff --git a/kernelcreation.py b/kernelcreation.py index 87bcf5d49..3d4b68ac8 100644 --- a/kernelcreation.py +++ b/kernelcreation.py @@ -33,7 +33,7 @@ def create_kernel(assignments, target='cpu', data_type="double", iteration_slice Returns: abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or - can be compiled with through its `compile()` member + can be compiled with through its 'compile()' member Example: >>> import pystencils as ps diff --git a/simp/simplifications.py b/simp/simplifications.py index 2cb4dd303..076adf967 100644 --- a/simp/simplifications.py +++ b/simp/simplifications.py @@ -61,9 +61,9 @@ def subexpression_substitution_in_main_assignments(ac: AC) -> AC: def add_subexpressions_for_divisions(ac: AC) -> AC: - """Introduces subexpressions for all divisions which have no constant in the denominator. + r"""Introduces subexpressions for all divisions which have no constant in the denominator. - For example :math:`\frac{1}{x}` is replaced, :math:`\frac{1}{3}` is not replaced. + For example :math:`\frac{1}{x}` is replaced while :math:`\frac{1}{3}` is not replaced. """ divisors = set() -- GitLab