From 235b906220255ffa47cf2fc7b7703f2d8984beec Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Thu, 16 Feb 2017 16:48:53 +0100
Subject: [PATCH] lbmpy: cuda & square channel scenario

---
 cpu/cpujit.py             |  2 +-
 cpu/kernelcreation.py     |  1 -
 gpucuda/cudajit.py        |  2 +-
 gpucuda/kernelcreation.py |  8 +++-----
 slicing.py                | 20 ++++++++++++++++++++
 5 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/cpu/cpujit.py b/cpu/cpujit.py
index 789d08a64..a0bccbc21 100644
--- a/cpu/cpujit.py
+++ b/cpu/cpujit.py
@@ -12,7 +12,7 @@ import hashlib
 from pystencils.transformations import symbolNameToVariableName
 
 CONFIG_GCC = {
-    'compiler': 'g++',
+    'compiler': 'g++-4.8',
     'flags': '-Ofast -DNDEBUG -fPIC -shared -march=native -fopenmp',
 }
 CONFIG_INTEL = {
diff --git a/cpu/kernelcreation.py b/cpu/kernelcreation.py
index b3340a39b..f5fb46569 100644
--- a/cpu/kernelcreation.py
+++ b/cpu/kernelcreation.py
@@ -43,7 +43,6 @@ def createKernel(listOfEquations, functionName="kernel", typeForSymbol=None, spl
 
     fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
     allFields = fieldsRead.union(fieldsWritten)
-
     readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
 
     body = ast.Block(assignments)
diff --git a/gpucuda/cudajit.py b/gpucuda/cudajit.py
index 71c00832e..d7385eb13 100644
--- a/gpucuda/cudajit.py
+++ b/gpucuda/cudajit.py
@@ -58,7 +58,7 @@ def buildNumpyArgumentList(kernelFunctionNode, argumentDict):
 
 
 def makePythonFunction(kernelFunctionNode, argumentDict={}):
-    mod = SourceModule(str(generateCUDA(kernelFunctionNode)))
+    mod = SourceModule(str(generateCUDA(kernelFunctionNode)), options=["-w"])
     func = mod.get_function(kernelFunctionNode.functionName)
 
     def wrapper(**kwargs):
diff --git a/gpucuda/kernelcreation.py b/gpucuda/kernelcreation.py
index 43118652e..09d1efdf1 100644
--- a/gpucuda/kernelcreation.py
+++ b/gpucuda/kernelcreation.py
@@ -25,7 +25,7 @@ def getLinewiseCoordinates(field, ghostLayers):
                 return arrShape[result.index(cudaIdx)] - 2 * ghostLayers
 
         return {'block': tuple([getShapeOfCudaIdx(idx) for idx in THREAD_IDX]),
-                'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX]) }
+                'grid': tuple([getShapeOfCudaIdx(idx) for idx in BLOCK_IDX])}
 
     return [i + ghostLayers for i in result], getCallParameters
 
@@ -37,18 +37,16 @@ def createCUDAKernel(listOfEquations, functionName="kernel", typeForSymbol=None)
         typeForSymbol = typingFromSympyInspection(listOfEquations, "float")
 
     fieldsRead, fieldsWritten, assignments = typeAllEquations(listOfEquations, typeForSymbol)
-    readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
-
     allFields = fieldsRead.union(fieldsWritten)
+    readOnlyFields = set([f.name for f in fieldsRead - fieldsWritten])
 
-    code = KernelFunction(Block(assignments), fieldsRead.union(fieldsWritten), functionName)
+    code = KernelFunction(Block(assignments), allFields, functionName)
     code.globalVariables.update(BLOCK_IDX + THREAD_IDX)
 
     fieldAccesses = code.atoms(Field.Access)
     requiredGhostLayers = max([fa.requiredGhostLayers for fa in fieldAccesses])
 
     coordMapping, getCallParameters = getLinewiseCoordinates(list(fieldsRead)[0], requiredGhostLayers)
-    allFields = fieldsRead.union(fieldsWritten)
     basePointerInfo = [['spatialInner0']]
     basePointerInfos = {f.name: parseBasePointerInfo(basePointerInfo, [2, 1, 0], f) for f in allFields}
 
diff --git a/slicing.py b/slicing.py
index ad792f6f4..7eebb38ea 100644
--- a/slicing.py
+++ b/slicing.py
@@ -1,4 +1,5 @@
 import sympy as sp
+import numpy as np
 
 
 class SliceMaker(object):
@@ -78,3 +79,22 @@ def sliceFromDirection(directionName, dim, normalOffset=0, tangentialOffset=0):
             assert lowName not in directionName, "Invalid direction name"
             result[dimIdx] = normalSliceHigh
     return tuple(result)
+
+
+def removeGhostLayers(arr, indexDimensions=0, ghostLayers=1):
+    dimensions = len(arr.shape)
+    spatialDimensions = dimensions - indexDimensions
+    indexing = [slice(ghostLayers, -ghostLayers, None), ] * spatialDimensions
+    indexing += [slice(None, None, None)] * indexDimensions
+    return arr[indexing]
+
+
+def addGhostLayers(arr, indexDimensions=0, ghostLayers=1):
+    dimensions = len(arr.shape)
+    spatialDimensions = dimensions - indexDimensions
+    newShape = [e + 2 * ghostLayers for e in arr.shape[:spatialDimensions]] + list(arr.shape[spatialDimensions:])
+    result = np.zeros(newShape)
+    indexing = [slice(ghostLayers, -ghostLayers, None), ] * spatialDimensions
+    indexing += [slice(None, None, None)] * indexDimensions
+    result[indexing] = arr
+    return result
-- 
GitLab