Commit 4305fc5e authored by Richard Angersbach's avatar Richard Angersbach
Browse files

Add handling for vectorized reduction variable array accesses.

parent 4d2b5b3c
...@@ -79,6 +79,7 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O ...@@ -79,6 +79,7 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O
case ex : VectorizationException => case ex : VectorizationException =>
if (DEBUG) { if (DEBUG) {
val msg : String = "[vect] unable to vectorize loop: " + ex.msg + " (line " + ex.getStackTrace()(0).getLineNumber + ')' val msg : String = "[vect] unable to vectorize loop: " + ex.msg + " (line " + ex.getStackTrace()(0).getLineNumber + ')'
Logger.warn(msg)
println(msg) // print directly, logger may be silenced by any surrounding strategy println(msg) // print directly, logger may be silenced by any surrounding strategy
return List(IR_Comment(msg), node) return List(IR_Comment(msg), node)
} }
...@@ -144,6 +145,8 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O ...@@ -144,6 +145,8 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O
private var alignedResidue : Long = -1 private var alignedResidue : Long = -1
private val nameTempl : String = "_vec%02d" private val nameTempl : String = "_vec%02d"
private var reductionVarArrayAccesses : Option[IR_ArrayAccess] = None
// init // init
pushScope() pushScope()
...@@ -241,6 +244,14 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O ...@@ -241,6 +244,14 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O
def getAlignedResidue() : Long = { def getAlignedResidue() : Long = {
alignedResidue alignedResidue
} }
def setReductionArrayAccess(arrAcc : IR_ArrayAccess) = {
reductionVarArrayAccesses = Some(arrAcc)
}
def getReductionArrayAccess() = {
reductionVarArrayAccesses
}
} }
private def containsVarAcc(node : IR_Node, varName : String) : Boolean = { private def containsVarAcc(node : IR_Node, varName : String) : Boolean = {
...@@ -264,6 +275,10 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O ...@@ -264,6 +275,10 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O
if (reduction.isDefined) { if (reduction.isDefined) {
val target = Duplicate(reduction.get.target) val target = Duplicate(reduction.get.target)
val operator = reduction.get.op val operator = reduction.get.op
target match {
case arrAcc : IR_ArrayAccess => ctx.setReductionArrayAccess(arrAcc)
case _ =>
}
val (vecTmp : String, true) = ctx.getName(target) val (vecTmp : String, true) = ctx.getName(target)
val identityElem : IR_Expression = val identityElem : IR_Expression =
...@@ -602,6 +617,11 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O ...@@ -602,6 +617,11 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O
private def vectorizeExpr(expr : IR_Expression, ctx : LoopCtx) : IR_Expression = { private def vectorizeExpr(expr : IR_Expression, ctx : LoopCtx) : IR_Expression = {
expr match { expr match {
case arrAcc : IR_ArrayAccess if ctx.getReductionArrayAccess().contains(arrAcc) =>
// vec was already added to ctx and declared
val (vecTmp : String, false) = ctx.getName(expr)
IR_VariableAccess(vecTmp, SIMD_RealDatatype)
// TODO: do not vectorize if base is not aligned? // TODO: do not vectorize if base is not aligned?
case IR_ArrayAccess(base, index, alignedBase) => case IR_ArrayAccess(base, index, alignedBase) =>
val (vecTmp : String, njuTmp : Boolean) = ctx.getName(expr) val (vecTmp : String, njuTmp : Boolean) = ctx.getName(expr)
......
...@@ -87,8 +87,6 @@ case class CUDA_HandleFragmentLoopsWithReduction( ...@@ -87,8 +87,6 @@ case class CUDA_HandleFragmentLoopsWithReduction(
copyReductionTarget()).expandSpecial().inner copyReductionTarget()).expandSpecial().inner
val resetRedTarget = resetReductionTarget() // reset initial value as it is already in the copies val resetRedTarget = resetReductionTarget() // reset initial value as it is already in the copies
initCopies.parallelization.noVect = true
ListBuffer( ListBuffer(
declCopies, declCopies,
initCopies, initCopies,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment