From 6860c7d2ef092e014de44646b46c57649984e9c4 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Sat, 9 Dec 2017 19:46:01 +0100
Subject: [PATCH] LB boundary generation for waLBerla

---
 field.py                           | 27 ++++++++++++++++++++-------
 gpucuda/cudajit.py                 |  8 +++++---
 gpucuda/indexing.py                | 14 +++++++++++---
 sympyextensions.py                 |  8 ++++++++
 transformations/transformations.py |  7 ++++---
 5 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/field.py b/field.py
index 3fb5b72e3..b0f61e597 100644
--- a/field.py
+++ b/field.py
@@ -4,6 +4,7 @@ import sympy as sp
 from sympy.core.cache import cacheit
 from sympy.tensor import IndexedBase
 from pystencils.data_types import TypedSymbol, createType
+from pystencils.sympyextensions import isIntegerSequence
 
 
 class Field(object):
@@ -49,7 +50,8 @@ class Field(object):
     """
 
     @staticmethod
-    def createGeneric(fieldName, spatialDimensions, dtype=np.float64, indexDimensions=0, layout='numpy'):
+    def createGeneric(fieldName, spatialDimensions, dtype=np.float64, indexDimensions=0, layout='numpy',
+                      indexShape=None):
         """
         Creates a generic field where the field size is not fixed i.e. can be called with arrays of different sizes
 
@@ -60,13 +62,20 @@ class Field(object):
         :param layout: tuple specifying the loop ordering of the spatial dimensions e.g. (2, 1, 0 ) means that
                        the outer loop loops over dimension 2, the second outer over dimension 1, and the inner loop
                        over dimension 0. Also allowed: the strings 'numpy' (0,1,..d) or 'reverseNumpy' (d, ..., 1, 0)
+        :param indexShape: optional shape of the index dimensions i.e. maximum values allowed for each index dimension,
+                           has to be a list or tuple
         """
         if isinstance(layout, str):
             layout = spatialLayoutStringToTuple(layout, dim=spatialDimensions)
         shapeSymbol = IndexedBase(TypedSymbol(Field.SHAPE_PREFIX + fieldName, Field.SHAPE_DTYPE), shape=(1,))
         strideSymbol = IndexedBase(TypedSymbol(Field.STRIDE_PREFIX + fieldName, Field.STRIDE_DTYPE), shape=(1,))
         totalDimensions = spatialDimensions + indexDimensions
-        shape = tuple([shapeSymbol[i] for i in range(totalDimensions)])
+        if indexShape is None or len(indexShape) == 0:
+            shape = tuple([shapeSymbol[i] for i in range(totalDimensions)])
+        else:
+            shape = tuple([shapeSymbol[i] for i in range(spatialDimensions)] + list(indexShape))
+            assert len(shape) == totalDimensions
+
         strides = tuple([strideSymbol[i] for i in range(totalDimensions)])
 
         npDataType = np.dtype(dtype)
@@ -173,18 +182,22 @@ class Field(object):
     def spatialShape(self):
         return self.shape[:self.spatialDimensions]
 
+    @property
+    def indexShape(self):
+        return self.shape[self.spatialDimensions:]
+
     @property
     def hasFixedShape(self):
-        try:
-            [int(i) for i in self.shape]
-            return True
-        except TypeError:
-            return False
+        return isIntegerSequence(self.shape)
 
     @property
     def indexShape(self):
         return self.shape[self.spatialDimensions:]
 
+    @property
+    def hasFixedIndexShape(self):
+        return isIntegerSequence(self.indexShape)
+
     @property
     def spatialStrides(self):
         return self.strides[:self.spatialDimensions]
diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py
index e75603665..b815f5c33 100644
--- a/gpucuda/cudajit.py
+++ b/gpucuda/cudajit.py
@@ -1,7 +1,4 @@
 import numpy as np
-import pycuda.driver as cuda
-import pycuda.autoinit
-from pycuda.compiler import SourceModule
 from pystencils.backends.cbackend import generateC
 from pystencils.transformations import symbolNameToVariableName
 from pystencils.data_types import StructType, getBaseType
@@ -18,6 +15,9 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
                         returned kernel functor.
     :return: kernel functor
     """
+    import pycuda.autoinit
+    from pycuda.compiler import SourceModule
+
     code = "#include <cstdint>\n"
     code += "#define FUNC_PREFIX __global__\n"
     code += "#define RESTRICT __restrict__\n\n"
@@ -55,6 +55,8 @@ def makePythonFunction(kernelFunctionNode, argumentDict={}):
 
 
 def _buildNumpyArgumentList(parameters, argumentDict):
+    import pycuda.driver as cuda
+
     argumentDict = {symbolNameToVariableName(k): v for k, v in argumentDict.items()}
     result = []
     for arg in parameters:
diff --git a/gpucuda/indexing.py b/gpucuda/indexing.py
index 000be750d..d61d5353b 100644
--- a/gpucuda/indexing.py
+++ b/gpucuda/indexing.py
@@ -1,13 +1,13 @@
 import abc
 
 import sympy as sp
-import pycuda.driver as cuda
-import pycuda.autoinit
 
 from pystencils.astnodes import Conditional, Block
 from pystencils.slicing import normalizeSlice
 from pystencils.data_types import TypedSymbol, createTypeFromString
 
+AUTO_BLOCKSIZE_LIMITING = True
+
 BLOCK_IDX = [TypedSymbol("blockIdx." + coord, createTypeFromString("int")) for coord in ('x', 'y', 'z')]
 THREAD_IDX = [TypedSymbol("threadIdx." + coord, createTypeFromString("int")) for coord in ('x', 'y', 'z')]
 
@@ -71,7 +71,9 @@ class BlockIndexing(AbstractIndexing):
         if permuteBlockSizeDependentOnLayout:
             blockSize = self.permuteBlockSizeAccordingToLayout(blockSize, field.layout)
 
-        blockSize = self.limitBlockSizeToDeviceMaximum(blockSize)
+        if AUTO_BLOCKSIZE_LIMITING:
+            blockSize = self.limitBlockSizeToDeviceMaximum(blockSize)
+            
         self._blockSize = blockSize
         self._iterationSlice = normalizeSlice(iterationSlice, field.spatialShape)
         self._dim = field.spatialDimensions
@@ -118,6 +120,9 @@ class BlockIndexing(AbstractIndexing):
         Returns the altered blockSize
         """
         # Get device limits
+        import pycuda.driver as cuda
+        import pycuda.autoinit
+
         da = cuda.device_attribute
         device = cuda.Context.get_device()
 
@@ -169,6 +174,9 @@ class BlockIndexing(AbstractIndexing):
         They can be obtained by ``func.num_regs`` from a pycuda function.
         :returns smaller blockSize if too many registers are used.
         """
+        import pycuda.driver as cuda
+        import pycuda.autoinit
+
         da = cuda.device_attribute
         if device is None:
             device = cuda.Context.get_device()
diff --git a/sympyextensions.py b/sympyextensions.py
index a45ede9f8..f56781e67 100644
--- a/sympyextensions.py
+++ b/sympyextensions.py
@@ -16,6 +16,14 @@ def allIn(a, b):
     return all(element in b for element in a)
 
 
+def isIntegerSequence(sequence):
+    try:
+        [int(i) for i in sequence]
+        return True
+    except TypeError:
+        return False
+
+
 def scalarProduct(a, b):
     return sum(a_i * b_i for a_i, b_i in zip(a, b))
 
diff --git a/transformations/transformations.py b/transformations/transformations.py
index a1202dd75..d41cadc01 100644
--- a/transformations/transformations.py
+++ b/transformations/transformations.py
@@ -306,8 +306,6 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
             dtype = PointerType(field.dtype, const=field.name in readOnlyFieldNames, restrict=True)
             fieldPtr = TypedSymbol("%s%s" % (Field.DATA_PREFIX, symbolNameToVariableName(field.name)), dtype)
 
-            lastPointer = fieldPtr
-
             def createCoordinateDict(group):
                 coordDict = {}
                 for e in group:
@@ -328,6 +326,8 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
                             coordDict[e] = fieldAccess.index[e - field.spatialDimensions]
                 return coordDict
 
+            lastPointer = fieldPtr
+
             for group in reversed(basePointerInfo[1:]):
                 coordDict = createCoordinateDict(group)
                 newPtr, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer)
@@ -338,7 +338,8 @@ def resolveFieldAccesses(astNode, readOnlyFieldNames=set(), fieldToBasePointerIn
 
             coordDict = createCoordinateDict(basePointerInfo[0])
             _, offset = createIntermediateBasePointer(fieldAccess, coordDict, lastPointer)
-            result = ast.ResolvedFieldAccess(lastPointer, offset, fieldAccess.field, fieldAccess.offsets, fieldAccess.index)
+            result = ast.ResolvedFieldAccess(lastPointer, offset, fieldAccess.field,
+                                             fieldAccess.offsets, fieldAccess.index)
 
             if isinstance(getBaseType(fieldAccess.field.dtype), StructType):
                 newType = fieldAccess.field.dtype.getElementType(fieldAccess.index[0])
-- 
GitLab