From aea202a78df6b713748b1cf5587e11e2867f85e6 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Thu, 3 Nov 2016 11:55:37 +0100
Subject: [PATCH] Documentation & Restructuring

- added sphinx files for documentation generation
- collected kernel creation functions in new "cpu" and "cudagpu" modules
---
 __init__.py                          |   2 +
 ast.py                               |   8 +
 backends/cbackend.py                 |   3 +
 cpu/__init__.py                      |   3 +
 jit.py => cpu/cpujit.py              |  19 +-
 cpu/kernelcreation.py                |  77 ++++++++
 field.py                             | 271 ++++++++++++++-------------
 finitedifferences.py                 |  60 +++---
 gpucuda/__init__.py                  |   0
 cudajit.py => gpucuda/cudajit.py     |   0
 cuda.py => gpucuda/kernelcreation.py |   0
 transformations.py                   | 178 +++++++++---------
 12 files changed, 377 insertions(+), 244 deletions(-)
 create mode 100644 __init__.py
 create mode 100644 cpu/__init__.py
 rename jit.py => cpu/cpujit.py (89%)
 create mode 100644 cpu/kernelcreation.py
 create mode 100644 gpucuda/__init__.py
 rename cudajit.py => gpucuda/cudajit.py (100%)
 rename cuda.py => gpucuda/kernelcreation.py (100%)

diff --git a/__init__.py b/__init__.py
new file mode 100644
index 000000000..8e21541ca
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,2 @@
+from pystencils.field import Field, extractCommonSubexpressions
+
diff --git a/ast.py b/ast.py
index 5eed391d0..301bf81cf 100644
--- a/ast.py
+++ b/ast.py
@@ -5,21 +5,29 @@ from pystencils.typedsymbol import TypedSymbol
 
 
 class Node:
+    """Base class for all AST nodes"""
+
     def __init__(self, parent=None):
         self.parent = parent
 
     def args(self):
+        """Returns all arguments/children of this node"""
         return []
 
     @property
     def symbolsDefined(self):
+        """Set of symbols which are defined in this node or its children"""
         return set()
 
     @property
     def symbolsRead(self):
+        """Set of symbols which are accessed/read in this node or its children"""
         return set()
 
     def atoms(self, argType):
+        """
+        Returns a set of all children which are an instance of the given argType
+        """
         result = set()
         for arg in self.args:
             if isinstance(arg, argType):
diff --git a/backends/cbackend.py b/backends/cbackend.py
index 70d1eb2c0..9f1620da9 100644
--- a/backends/cbackend.py
+++ b/backends/cbackend.py
@@ -4,6 +4,9 @@ from pystencils.ast import Node
 
 
 def printCCode(astNode):
+    """
+    Prints the abstract syntax tree as C function
+    """
     printer = CBackend(cuda=False)
     return printer(astNode)
 
diff --git a/cpu/__init__.py b/cpu/__init__.py
new file mode 100644
index 000000000..71656ee6c
--- /dev/null
+++ b/cpu/__init__.py
@@ -0,0 +1,3 @@
+from pystencils.cpu.kernelcreation import createKernel, addOpenMP
+from pystencils.cpu.cpujit import makePythonFunction
+from pystencils.backends.cbackend import printCCode
diff --git a/jit.py b/cpu/cpujit.py
similarity index 89%
rename from jit.py
rename to cpu/cpujit.py
index 5debe1a68..591a8f26c 100644
--- a/jit.py
+++ b/cpu/cpujit.py
@@ -131,6 +131,18 @@ def makePythonFunctionIncompleteParams(kernelFunctionNode, argumentDict):
 
 
 def makePythonFunction(kernelFunctionNode, argumentDict={}):
+    """
+    Creates C code from the abstract syntax tree, compiles it and makes it accessible as Python function
+
+    The parameters of the kernel are:
+        - numpy arrays for each field used in the kernel. The keyword argument name is the name of the field
+        - all symbols which are not defined in the kernel itself are expected as parameters
+
+    :param kernelFunctionNode: the abstract syntax tree
+    :param argumentDict: parameters passed here are already fixed. Remaining parameters have to be passed to the
+                        returned kernel functor.
+    :return: kernel functor
+    """
     # build up list of CType arguments
     try:
         args = buildCTypeArgumentList(kernelFunctionNode, argumentDict)
@@ -140,10 +152,3 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
     func = compileAndLoad(kernelFunctionNode)[kernelFunctionNode.functionName]
     func.restype = None
     return lambda: func(*args)
-
-
-
-
-
-
-
diff --git a/cpu/kernelcreation.py b/cpu/kernelcreation.py
new file mode 100644
index 000000000..185ebaa3c
--- /dev/null
+++ b/cpu/kernelcreation.py
@@ -0,0 +1,77 @@
+import sympy as sp
+from pystencils.transformations import resolveFieldAccesses, makeLoopOverDomain, typingFromSympyInspection, \
+    typeAllEquations, getOptimalLoopOrdering, parseBasePointerInfo, moveConstantsBeforeLoop, splitInnerLoop
+from pystencils.typedsymbol import TypedSymbol
+from pystencils.field import Field
+import pystencils.ast as ast
+
+
+def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, splitGroups=()):
+    """
+    Creates an abstract syntax tree for a kernel function, by taking a list of update rules.
+
+    Loops are created according to the field accesses in the equations.
+
+    :param listOfEquations: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
+           Defining the update rules of the kernel
+    :param functionName: name of the generated function - only important if generated code is written out
+    :param typeForSymbol: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
+           be of type 'double' except symbols which occur on the left hand side of equations where the
+           right hand side is a sympy Boolean which are assumed to be 'bool' .
+    :param splitGroups: Specification on how to split up inner loop into multiple loops. For details see
+           transformation :func:`pystencils.transformation.splitInnerLoop`
+    :return: :class:`pystencils.ast.KernelFunction` node
+    """
+    if not typeForSymbol:
+        typeForSymbol = typingFromSympyInspection(listOfEquations, "double")
+
+    def typeSymbol(term):
+        if isinstance(term, Field.Access) or isinstance(term, TypedSymbol):
+            return term
+        elif isinstance(term, sp.Symbol):
+            return TypedSymbol(term.name, typeForSymbol[term.name])
+        else:
+            raise ValueError("Term has to be field access or symbol")
+
+    fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
+    allFields = fieldsRead.union(fieldsWritten)
+
+    for field in allFields:
+        field.setReadOnly(False)
+    for field in fieldsRead - fieldsWritten:
+        field.setReadOnly()
+
+    body = ast.Block(assignments)
+    code = makeLoopOverDomain(body, functionName)
+
+    if splitGroups:
+        typedSplitGroups = [[typeSymbol(s) for s in splitGroup] for splitGroup in splitGroups]
+        splitInnerLoop(code, typedSplitGroups)
+
+    loopOrder = getOptimalLoopOrdering(allFields)
+
+    basePointerInfo = [['spatialInner0'], ['spatialInner1']]
+    basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields}
+
+    resolveFieldAccesses(code, fieldToBasePointerInfo=basePointerInfos)
+    moveConstantsBeforeLoop(code)
+
+    return code
+
+
+def addOpenMP(astNode, schedule="static"):
+    """
+    Parallelizes the outer loop with OpenMP
+
+    :param astNode: abstract syntax tree created e.g. by :func:`createKernel`
+    :param schedule: OpenMP scheduling policy e.g. 'static' or 'dynamic'
+    """
+    assert type(astNode) is ast.KernelFunction
+    body = astNode.body
+    wrapperBlock = ast.PragmaBlock('#pragma omp parallel', body.takeChildNodes())
+    body.append(wrapperBlock)
+
+    outerLoops = [l for l in body.atoms(ast.LoopOverCoordinate) if l.isOutermostLoop]
+    assert outerLoops, "No outer loop found"
+    assert len(outerLoops) <= 1, "More than one outer loop found. Which one should be parallelized?"
+    outerLoops[0].prefixLines.append("#pragma omp for schedule(%s)" % (schedule,))
diff --git a/field.py b/field.py
index 2fe288e58..3507cd02e 100644
--- a/field.py
+++ b/field.py
@@ -6,147 +6,31 @@ from sympy.tensor import IndexedBase
 from pystencils.typedsymbol import TypedSymbol
 
 
-def getLayoutFromNumpyArray(arr):
-    """
-    Returns a list indicating the memory layout (linearization order) of the numpy array.
-    Example:
-        >>> getLayoutFromNumpyArray(np.zeros([3,3,3]))
-        [0, 1, 2]
-    In this example the loop over the zeroth coordinate should be the outermost loop,
-    followed by the first and second. Elements arr[x,y,0] and arr[x,y,1] are adjacent in memory.
-    Normally constructed numpy arrays have this order, however by stride tricks or other frameworks, arrays
-    with different memory layout can be created.
-    """
-    coordinates = list(range(len(arr.shape)))
-    return [x for (y, x) in sorted(zip(arr.strides, coordinates), key=lambda pair: pair[0], reverse=True)]
-
-
-def numpyDataTypeToC(dtype):
-    """Mapping numpy data types to C data types"""
-    if dtype == np.float64:
-        return "double"
-    elif dtype == np.float32:
-        return "float"
-    elif dtype == np.int32:
-        return "int"
-    raise NotImplementedError()
-
-
-def offsetComponentToDirectionString(coordinateId, value):
-    """
-    Translates numerical offset to string notation.
-    x offsets are labeled with east 'E' and 'W',
-    y offsets with north 'N' and 'S' and
-    z offsets with top 'T' and bottom 'B'
-    If the absolute value of the offset is bigger than 1, this number is prefixed.
-    :param coordinateId: integer 0, 1 or 2 standing for x,y and z
-    :param value: integer offset
-
-    Example:
-    >>> offsetComponentToDirectionString(0, 1)
-    'E'
-    >>> offsetComponentToDirectionString(1, 2)
-    '2N'
-    """
-    nameComponents = (('W', 'E'),  # west, east
-                      ('S', 'N'),  # south, north
-                      ('B', 'T'),  # bottom, top
-                      )
-    if value == 0:
-        result = ""
-    elif value < 0:
-        result = nameComponents[coordinateId][0]
-    else:
-        result = nameComponents[coordinateId][1]
-    if abs(value) > 1:
-        result = "%d%s" % (abs(value), result)
-    return result
-
-
-def offsetToDirectionString(offsetTuple):
-    """
-    Translates numerical offset to string notation.
-    For details see :func:`offsetComponentToDirectionString`
-    :param offsetTuple: 3-tuple with x,y,z offset
-
-    Example:
-    >>> offsetToDirectionString([1, -1, 0])
-    'SE'
-    >>> offsetToDirectionString(([-3, 0, -2]))
-    '2B3W'
-    """
-    names = ["", "", ""]
-    for i in range(len(offsetTuple)):
-        names[i] = offsetComponentToDirectionString(i, offsetTuple[i])
-    name = "".join(reversed(names))
-    if name == "":
-        name = "C"
-    return name
-
-
-def directionStringToOffset(directionStr, dim=3):
-    """
-    Reverse mapping of :func:`offsetToDirectionString`
-    :param directionStr: string representation of offset
-    :param dim: dimension of offset, i.e the length of the returned list
-
-    >>> directionStringToOffset('NW', dim=3)
-    array([-1,  1,  0])
-    >>> directionStringToOffset('NW', dim=2)
-    array([-1,  1])
-    >>> directionStringToOffset(offsetToDirectionString([3,-2,1]))
-    array([ 3, -2,  1])
-    """
-    offsetMap = {
-        'C': np.array([0, 0, 0]),
-
-        'W': np.array([-1, 0, 0]),
-        'E': np.array([1, 0, 0]),
-
-        'S': np.array([0, -1, 0]),
-        'N': np.array([0, 1, 0]),
-
-        'B': np.array([0, 0, -1]),
-        'T': np.array([0, 0, 1]),
-    }
-    offset = np.array([0, 0, 0])
-
-    while len(directionStr) > 0:
-        factor = 1
-        firstNonDigit = 0
-        while directionStr[firstNonDigit].isdigit():
-            firstNonDigit += 1
-        if firstNonDigit > 0:
-            factor = int(directionStr[:firstNonDigit])
-            directionStr = directionStr[firstNonDigit:]
-        curOffset = offsetMap[directionStr[0]]
-        offset += factor * curOffset
-        directionStr = directionStr[1:]
-    return offset[:dim]
-
-
 class Field:
     """
     With fields one can formulate stencil-like update rules on structured grids.
     This Field class knows about the dimension, memory layout (strides) and optionally about the size of an array.
 
-    To create a field use one of the static create* members. There are two options:
+    Creating Fields:
+
+        To create a field use one of the static create* members. There are two options:
+
         1. create a kernel with fixed loop sizes i.e. the shape of the array is already known. This is usually the
-           case if just-in-time compilation directly from Python is done. (see Field.createFromNumpyArray)
+           case if just-in-time compilation directly from Python is done. (see :func:`Field.createFromNumpyArray`)
         2. create a more general kernel that works for variable array sizes. This can be used to create kernels
-           beforehand for a library. (see Field.createGeneric)
+           beforehand for a library. (see :func:`Field.createGeneric`)
 
     Dimensions:
         A field has spatial and index dimensions, where the spatial dimensions come first.
         The interpretation is that the field has multiple cells in (usually) two or three dimensional space which are
         looped over. Additionally  N values are stored per cell. In this case spatialDimensions is two or three,
         and indexDimensions equals N. If you want to store a matrix on each point in a two dimensional grid, there
-        are four dimensions, two spatial and two index dimensions. len(arr.shape) == spatialDims + indexDims
+        are four dimensions, two spatial and two index dimensions: ``len(arr.shape) == spatialDims + indexDims``
 
     Indexing:
         When accessing (indexing) a field the result is a FieldAccess which is derived from sympy Symbol.
         First specify the spatial offsets in [], then in case indexDimension>0 the indices in ()
-        e.g. f[-1,0,0](7)
+        e.g. ``f[-1,0,0](7)``
 
     Example without index dimensions:
         >>> a = np.zeros([10, 10])
@@ -162,6 +46,7 @@ class Field:
         Eq(dst_C^0, src_C^0)
         Eq(dst_C^1, src_S^1)
         Eq(dst_C^2, src_N^2)
+
     """
     @staticmethod
     def createFromNumpyArray(fieldName, npArray, indexDimensions=0):
@@ -194,8 +79,8 @@ class Field:
         :param spatialDimensions: see documentation of Field
         :param indexDimensions: see documentation of Field
         :param layout: tuple specifying the loop ordering of the spatial dimensions e.g. (2, 1, 0 ) means that
-                       the outer loop loops over dimension 2, the second outer over dimension 1, and the inner loop
-                       over dimension 0
+        the outer loop loops over dimension 2, the second outer over dimension 1, and the inner loop
+        over dimension 0
         """
         if not layout:
             layout = tuple(reversed(range(spatialDimensions)))
@@ -384,3 +269,137 @@ class Field:
             superClassContents = list(super(Field.Access, self)._hashable_content())
             t = tuple([*superClassContents, hash(self._field), self._index] + self._offsets)
             return t
+
+
+def extractCommonSubexpressions(equations):
+    """
+    Uses sympy to find common subexpressions in equations and returns
+    them in a topologically sorted order, ready for evaluation.
+    Usually called before list of equations is passed to :func:`createKernel`
+    """
+    replacements, newEq = sp.cse(equations)
+    replacementEqs = [sp.Eq(*r) for r in replacements]
+    equations = replacementEqs + newEq
+    topologicallySortedPairs = sp.cse_main.reps_toposort([[e.lhs, e.rhs] for e in equations])
+    equations = [sp.Eq(*a) for a in topologicallySortedPairs]
+    return equations
+
+
+def getLayoutFromNumpyArray(arr):
+    """
+    Returns a list indicating the memory layout (linearization order) of the numpy array.
+    Example:
+    >>> getLayoutFromNumpyArray(np.zeros([3,3,3]))
+    [0, 1, 2]
+
+    In this example the loop over the zeroth coordinate should be the outermost loop,
+    followed by the first and second. Elements arr[x,y,0] and arr[x,y,1] are adjacent in memory.
+    Normally constructed numpy arrays have this order, however by stride tricks or other frameworks, arrays
+    with different memory layout can be created.
+    """
+    coordinates = list(range(len(arr.shape)))
+    return [x for (y, x) in sorted(zip(arr.strides, coordinates), key=lambda pair: pair[0], reverse=True)]
+
+
+def numpyDataTypeToC(dtype):
+    """Mapping numpy data types to C data types"""
+    if dtype == np.float64:
+        return "double"
+    elif dtype == np.float32:
+        return "float"
+    elif dtype == np.int32:
+        return "int"
+    raise NotImplementedError()
+
+
+def offsetComponentToDirectionString(coordinateId, value):
+    """
+    Translates numerical offset to string notation.
+    x offsets are labeled with east 'E' and 'W',
+    y offsets with north 'N' and 'S' and
+    z offsets with top 'T' and bottom 'B'
+    If the absolute value of the offset is bigger than 1, this number is prefixed.
+    :param coordinateId: integer 0, 1 or 2 standing for x,y and z
+    :param value: integer offset
+
+    Example:
+    >>> offsetComponentToDirectionString(0, 1)
+    'E'
+    >>> offsetComponentToDirectionString(1, 2)
+    '2N'
+    """
+    nameComponents = (('W', 'E'),  # west, east
+                      ('S', 'N'),  # south, north
+                      ('B', 'T'),  # bottom, top
+                      )
+    if value == 0:
+        result = ""
+    elif value < 0:
+        result = nameComponents[coordinateId][0]
+    else:
+        result = nameComponents[coordinateId][1]
+    if abs(value) > 1:
+        result = "%d%s" % (abs(value), result)
+    return result
+
+
+def offsetToDirectionString(offsetTuple):
+    """
+    Translates numerical offset to string notation.
+    For details see :func:`offsetComponentToDirectionString`
+    :param offsetTuple: 3-tuple with x,y,z offset
+
+    Example:
+    >>> offsetToDirectionString([1, -1, 0])
+    'SE'
+    >>> offsetToDirectionString(([-3, 0, -2]))
+    '2B3W'
+    """
+    names = ["", "", ""]
+    for i in range(len(offsetTuple)):
+        names[i] = offsetComponentToDirectionString(i, offsetTuple[i])
+    name = "".join(reversed(names))
+    if name == "":
+        name = "C"
+    return name
+
+
+def directionStringToOffset(directionStr, dim=3):
+    """
+    Reverse mapping of :func:`offsetToDirectionString`
+    :param directionStr: string representation of offset
+    :param dim: dimension of offset, i.e the length of the returned list
+
+    >>> directionStringToOffset('NW', dim=3)
+    array([-1,  1,  0])
+    >>> directionStringToOffset('NW', dim=2)
+    array([-1,  1])
+    >>> directionStringToOffset(offsetToDirectionString([3,-2,1]))
+    array([ 3, -2,  1])
+    """
+    offsetMap = {
+        'C': np.array([0, 0, 0]),
+
+        'W': np.array([-1, 0, 0]),
+        'E': np.array([1, 0, 0]),
+
+        'S': np.array([0, -1, 0]),
+        'N': np.array([0, 1, 0]),
+
+        'B': np.array([0, 0, -1]),
+        'T': np.array([0, 0, 1]),
+    }
+    offset = np.array([0, 0, 0])
+
+    while len(directionStr) > 0:
+        factor = 1
+        firstNonDigit = 0
+        while directionStr[firstNonDigit].isdigit():
+            firstNonDigit += 1
+        if firstNonDigit > 0:
+            factor = int(directionStr[:firstNonDigit])
+            directionStr = directionStr[firstNonDigit:]
+        curOffset = offsetMap[directionStr[0]]
+        offset += factor * curOffset
+        directionStr = directionStr[1:]
+    return offset[:dim]
diff --git a/finitedifferences.py b/finitedifferences.py
index b31bc5646..442010f26 100644
--- a/finitedifferences.py
+++ b/finitedifferences.py
@@ -1,22 +1,15 @@
 import numpy as np
 import sympy as sp
-
 from pystencils.field import Field
 
 
-def __upDownOffsets(d, dim):
-    coord = [0] * dim
-    coord[d] = 1
-    up = np.array(coord, dtype=np.int)
-    coord[d] = -1
-    down = np.array(coord, dtype=np.int)
-    return up, down
-
-
 def grad(var, dim=3):
-    """Gradients are represented as a special symbol:
-    e.g. :math:`\nabla x = (x^\Delta^0, x^\Delta^1, x^\Delta^2)`
+    r"""
+    Gradients are represented as a special symbol:
+    e.g. :math:`\nabla x = (x^{\Delta 0}, x^{\Delta 1}, x^{\Delta 2})`
+
     This function takes a symbol and creates the gradient symbols according to convention above
+
     :param var: symbol to take the gradient of
     :param dim: dimension (length) of the gradient vector
     """
@@ -29,7 +22,8 @@ def grad(var, dim=3):
 def discretizeCenter(term, symbolsToFieldDict, dx, dim=3):
     """
     Expects term that contains given symbols and gradient components of these symbols and replaces them
-    by field accesses. Gradients are replaced centralized approximations: (upper neighbor - lower neighbor ) / ( 2*dx).
+    by field accesses. Gradients are replaced centralized approximations:
+    ``(upper neighbor - lower neighbor ) / ( 2*dx)``
     :param term: term where symbols and gradient(symbol) should be replaced
     :param symbolsToFieldDict: mapping of symbols to Field
     :param dx: width and height of one cell
@@ -57,15 +51,6 @@ def discretizeCenter(term, symbolsToFieldDict, dx, dim=3):
     return term.subs(substitutions)
 
 
-def fastSubs(term, subsDict):
-    def visit(expr):
-        if expr in subsDict:
-            return subsDict[expr]
-        paramList = [visit(a) for a in expr.args]
-        return expr if not paramList else expr.func(*paramList)
-    return visit(term)
-
-
 def discretizeStaggered(term, symbolsToFieldDict, coordinate, coordinateOffset, dx, dim=3):
     """
     Expects term that contains given symbols and gradient components of these symbols and replaces them
@@ -122,13 +107,14 @@ def discretizeDivergence(vectorTerm, symbolsToFieldDict, dx):
     Computes discrete divergence of symbolic vector
     :param vectorTerm: sequence of terms, interpreted as vector
     :param symbolsToFieldDict: mapping of symbols to Field
+    :param dx: length of a cell
 
     Example: Laplace stencil
-      >>> x, dx = sp.symbols("x dx")
-      >>> gradx = grad(x, dim=3)
-      >>> f = Field.createGeneric('f', spatialDimensions=3)
-      >>> sp.simplify(discretizeDivergence(gradx, x, f, dx))
-      (f_B - 6*f_C + f_E + f_N + f_S + f_T + f_W)/dx
+        >>> x, dx = sp.symbols("x dx")
+        >>> gradX = grad(x, dim=3)
+        >>> f = Field.createGeneric('f', spatialDimensions=3)
+        >>> sp.simplify(discretizeDivergence(gradX, x, f, dx))
+        (f_B - 6*f_C + f_E + f_N + f_S + f_T + f_W)/dx
     """
     dim = len(vectorTerm)
     result = 0
@@ -136,3 +122,23 @@ def discretizeDivergence(vectorTerm, symbolsToFieldDict, dx):
         for offset in [-1, 1]:
             result += offset * discretizeStaggered(vectorTerm[d], symbolsToFieldDict, d, offset, dx, dim)
     return result
+
+
+def __upDownOffsets(d, dim):
+    coord = [0] * dim
+    coord[d] = 1
+    up = np.array(coord, dtype=np.int)
+    coord[d] = -1
+    down = np.array(coord, dtype=np.int)
+    return up, down
+
+
+def fastSubs(term, subsDict):
+    """Similar to sympy subs function.
+    This version is much faster for big substitution dictionaries than sympy version"""
+    def visit(expr):
+        if expr in subsDict:
+            return subsDict[expr]
+        paramList = [visit(a) for a in expr.args]
+        return expr if not paramList else expr.func(*paramList)
+    return visit(term)
diff --git a/gpucuda/__init__.py b/gpucuda/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/cudajit.py b/gpucuda/cudajit.py
similarity index 100%
rename from cudajit.py
rename to gpucuda/cudajit.py
diff --git a/cuda.py b/gpucuda/kernelcreation.py
similarity index 100%
rename from cuda.py
rename to gpucuda/kernelcreation.py
diff --git a/transformations.py b/transformations.py
index 0ef3b0a3f..6ea7014fb 100644
--- a/transformations.py
+++ b/transformations.py
@@ -7,14 +7,12 @@ from pystencils.typedsymbol import TypedSymbol
 import pystencils.ast as ast
 
 
-# --------------------------------------- Factory Functions ------------------------------------------------------------
-
-
 def makeLoopOverDomain(body, functionName):
     """
+    Uses :class:`pystencils.field.Field.Access` to create (multiple) loops around given AST.
     :param body: list of nodes
     :param functionName: name of generated C function
-    :return: LoopOverCoordinate instance with nested loops, ordered according to field layouts
+    :return: :class:`LoopOverCoordinate` instance with nested loops, ordered according to field layouts
     """
     # find correct ordering by inspecting participating FieldAccesses
     fieldAccesses = body.atoms(Field.Access)
@@ -45,9 +43,27 @@ def makeLoopOverDomain(body, functionName):
     return ast.KernelFunction(currentBody, functionName)
 
 
-# --------------------------------------- Transformations --------------------------------------------------------------
-
 def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr):
+    r"""
+    Addressing elements in structured arrays are done with :math:`ptr\left[ \sum_i c_i \cdot s_i \right]`
+    where :math:`c_i` is the coordinate value and :math:`s_i` the stride of a coordinate.
+    The sum can be split up into multiple parts, such that parts of it can be pulled before loops.
+    This function creates such an access for coordinates :math:`i \in \mbox{coordinates}`.
+    Returns a new typed symbol, where the name encodes which coordinates have been resolved.
+    :param fieldAccess: instance of :class:`pystencils.field.Field.Access` which provides strides and offsets
+    :param coordinates: mapping of coordinate ids to its value, where stride*value is calculated
+    :param previousPtr: the pointer which is dereferenced
+    :return: tuple with the new pointer symbol and the calculated offset
+
+    Example:
+        >>> field = Field.createGeneric('myfield', spatialDimensions=2, indexDimensions=1)
+        >>> x, y = sp.symbols("x y")
+        >>> prevPointer = TypedSymbol("ptr", "double")
+        >>> createIntermediateBasePointer(field[1,-2](5), {0: x}, prevPointer)
+        (ptr_E, x*fstride_myfield[0] + fstride_myfield[0])
+        >>> createIntermediateBasePointer(field[1,-2](5), {0: x, 1 : y }, prevPointer)
+        (ptr_E_2S, x*fstride_myfield[0] + y*fstride_myfield[1] + fstride_myfield[0] - 2*fstride_myfield[1])
+    """
     field = fieldAccess.field
 
     offset = 0
@@ -79,15 +95,24 @@ def createIntermediateBasePointer(fieldAccess, coordinates, previousPtr):
 
 def parseBasePointerInfo(basePointerSpecification, loopOrder, field):
     """
+    Creates base pointer specification for :func:`resolveFieldAccesses` function.
+
+    Specification of how many and which intermediate pointers are created for a field access.
+    For example [ (0), (2,3,)]  creates on base pointer for coordinates 2 and 3 and writes the offset for coordinate
+    zero directly in the field access. These specifications are more sensible defined dependent on the loop ordering.
+    This function translates more readable version into the specification above.
+
     Allowed specifications:
-    "spatialInner<int>" spatialInner0 is the innermost loop coordinate, spatialInner1 the loop enclosing the innermost
-    "spatialOuter<int>" spatialOuter0 is the outermost loop
-    "index<int>": index coordinate
-    "<int>": specifying directly the coordinate
+        - "spatialInner<int>" spatialInner0 is the innermost loop coordinate,
+          spatialInner1 the loop enclosing the innermost
+        - "spatialOuter<int>" spatialOuter0 is the outermost loop
+        - "index<int>": index coordinate
+        - "<int>": specifying directly the coordinate
+
     :param basePointerSpecification: nested list with above specifications
     :param loopOrder: list with ordering of loops from inner to outer
     :param field:
-    :return:
+    :return: list of tuples that can be passed to :func:`resolveFieldAccesses`
     """
     result = []
     specifiedCoordinates = set()
@@ -134,8 +159,16 @@ def parseBasePointerInfo(basePointerSpecification, loopOrder, field):
 
 
 def resolveFieldAccesses(astNode, fieldToBasePointerInfo={}, fieldToFixedCoordinates={}):
-    """Substitutes FieldAccess nodes by array indexing"""
-
+    """
+    Substitutes :class:`pystencils.field.Field.Access` nodes by array indexing
+
+    :param astNode: the AST root
+    :param fieldToBasePointerInfo: a list of tuples indicating which intermediate base pointers should be created
+                                   for details see :func:`parseBasePointerInfo`
+    :param fieldToFixedCoordinates: map of field name to a tuple of coordinate symbols. Instead of using the loop
+                                    counters to index the field these symbols are used as coordinates
+    :return: transformed AST
+    """
     def visitSympyExpr(expr, enclosingBlock):
         if isinstance(expr, Field.Access):
             fieldAccess = expr
@@ -196,7 +229,12 @@ def resolveFieldAccesses(astNode, fieldToBasePointerInfo={}, fieldToFixedCoordin
 
 
 def moveConstantsBeforeLoop(astNode):
-
+    """
+    Moves :class:`pystencils.ast.SympyAssignment` nodes out of loop body if they are iteration independent.
+    Call this after creating the loop structure with :func:`makeLoopOverDomain`
+    :param astNode:
+    :return:
+    """
     def findBlockToMoveTo(node):
         """Traverses parents of node as long as the symbols are independent and returns a (parent) block
         the assignment can be safely moved to
@@ -240,6 +278,14 @@ def moveConstantsBeforeLoop(astNode):
 
 
 def splitInnerLoop(astNode, symbolGroups):
+    """
+    Splits inner loop into multiple loops to minimize the amount of simultaneous load/store streams
+    :param astNode: AST root
+    :param symbolGroups: sequence of symbol sequences: for each symbol sequence a new inner loop is created which
+    updates these symbols and their dependent symbols. Symbols which are in none of the symbolGroups and which
+    no symbol in a symbol group depends on, are not updated!
+    :return: transformed AST
+    """
     allLoops = astNode.atoms(ast.LoopOverCoordinate)
     innerLoop = [l for l in allLoops if l.isInnermostLoop]
     assert len(innerLoop) == 1, "Error in AST: multiple innermost loops. Was split transformation already called?"
@@ -293,33 +339,16 @@ def splitInnerLoop(astNode, symbolGroups):
         outerLoop.parent.append(ast.TemporaryMemoryFree(tmpArray))
 
 
-# ------------------------------------- Main ---------------------------------------------------------------------------
-
-
-def extractCommonSubexpressions(equations):
-    """Uses sympy to find common subexpressions in equations and returns
-    them in a topologically sorted order, ready for evaluation"""
-    replacements, newEq = sp.cse(equations)
-    replacementEqs = [sp.Eq(*r) for r in replacements]
-    equations = replacementEqs + newEq
-    topologicallySortedPairs = sp.cse_main.reps_toposort([[e.lhs, e.rhs] for e in equations])
-    equations = [sp.Eq(*a) for a in topologicallySortedPairs]
-    return equations
-
-
-def addOpenMP(astNode):
-    assert type(astNode) is ast.KernelFunction
-    body = astNode.body
-    wrapperBlock = ast.PragmaBlock('#pragma omp parallel', body.takeChildNodes())
-    body.append(wrapperBlock)
-
-    outerLoops = [l for l in body.atoms(ast.LoopOverCoordinate) if l.isOutermostLoop]
-    assert outerLoops, "No outer loop found"
-    assert len(outerLoops) <= 1, "More than one outer loop found. Which one should be parallelized?"
-    outerLoops[0].prefixLines.append("#pragma omp for schedule(static)")
-
-
 def typeAllEquations(eqs, typeForSymbol):
+    """
+    Traverses AST and replaces every :class:`sympy.Symbol` by a :class:`pystencils.typedsymbol.TypedSymbol`.
+    Additionally returns sets of all fields which are read/written
+
+    :param eqs: list of equations
+    :param typeForSymbol: dict mapping symbol names to types. Types are strings of C types like 'int' or 'double'
+    :return: ``fieldsRead, fieldsWritten, typedEquations`` set of read fields, set of written fields, list of equations
+               where symbols have been replaced by typed symbols
+    """
     fieldsWritten = set()
     fieldsRead = set()
 
@@ -361,7 +390,17 @@ def typeAllEquations(eqs, typeForSymbol):
     return fieldsRead, fieldsWritten, typedEquations
 
 
+# --------------------------------------- Helper Functions -------------------------------------------------------------
+
+
 def typingFromSympyInspection(eqs, defaultType="double"):
+    """
+    Creates a default symbol name to type mapping.
+    If a sympy Boolean is assigned to a symbol it is assumed to be 'bool' otherwise the default type, usually ('double')
+    :param eqs: list of equations
+    :param defaultType: the type for non-boolean symbols
+    :return: dictionary, mapping symbol name to type
+    """
     result = defaultdict(lambda: defaultType)
     for eq in eqs:
         if isinstance(eq.rhs, Boolean):
@@ -369,49 +408,10 @@ def typingFromSympyInspection(eqs, defaultType="double"):
     return result
 
 
-def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, splitGroups=[]):
-    if not typeForSymbol:
-        typeForSymbol = typingFromSympyInspection(listOfEquations, "double")
-
-    def typeSymbol(term):
-        if isinstance(term, Field.Access) or isinstance(term, TypedSymbol):
-            return term
-        elif isinstance(term, sp.Symbol):
-            return TypedSymbol(term.name, typeForSymbol[term.name])
-        else:
-            raise ValueError("Term has to be field access or symbol")
-
-    fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
-    allFields = fieldsRead.union(fieldsWritten)
-
-    for field in allFields:
-        field.setReadOnly(False)
-    for field in fieldsRead - fieldsWritten:
-        field.setReadOnly()
-
-    body = ast.Block(assignments)
-    code = makeLoopOverDomain(body, functionName)
-
-    if splitGroups:
-        typedSplitGroups = [[typeSymbol(s) for s in splitGroup] for splitGroup in splitGroups]
-        splitInnerLoop(code, typedSplitGroups)
-
-    loopOrder = getOptimalLoopOrdering(allFields)
-
-    basePointerInfo = [['spatialInner0'], ['spatialInner1']]
-    basePointerInfos = {field.name: parseBasePointerInfo(basePointerInfo, loopOrder, field) for field in allFields}
-
-    resolveFieldAccesses(code, fieldToBasePointerInfo=basePointerInfos)
-    moveConstantsBeforeLoop(code)
-    addOpenMP(code)
-
-    return code
-
-
-# --------------------------------------- Helper Functions -------------------------------------------------------------
-
-
 def getNextParentOfType(node, parentType):
+    """
+    Traverses the AST nodes parents until a parent of given type was found. If no such parent is found, None is returned
+    """
     parent = node.parent
     while parent is not None:
         if isinstance(parent, parentType):
@@ -421,6 +421,12 @@ def getNextParentOfType(node, parentType):
 
 
 def getOptimalLoopOrdering(fields):
+    """
+    Determines the optimal loop order for a given set of fields.
+    If the fields have different memory layout or different sizes an exception is thrown.
+    :param fields: sequence of fields
+    :return: list of coordinate ids, where the first list entry should be the innermost loop
+    """
     assert len(fields) > 0
     refField = next(iter(fields))
     for field in fields:
@@ -434,9 +440,13 @@ def getOptimalLoopOrdering(fields):
     return list(reversed(layout))
 
 
-def getLoopHierarchy(block):
+def getLoopHierarchy(astNode):
+    """Determines the loop structure around a given AST node.
+    :param astNode: the AST node
+    :return: list of coordinate ids, where the first list entry is the innermost loop
+    """
     result = []
-    node = block
+    node = astNode
     while node is not None:
         node = getNextParentOfType(node, ast.LoopOverCoordinate)
         if node:
-- 
GitLab