Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ExaStencils
exastencils-release
Commits
7a759c1f
Commit
7a759c1f
authored
Feb 23, 2022
by
Richard Angersbach
Browse files
Use deep copies of reduction targets for safety.
parent
46a3df8e
Changes
5
Hide whitespace changes
Inline
Side-by-side
Compiler/src/exastencils/optimization/ir/IR_Vectorization.scala
View file @
7a759c1f
...
...
@@ -262,7 +262,7 @@ private object VectorizeInnermost extends PartialFunction[Node, Transformation.O
val
ctx
=
new
LoopCtx
(
itVar
,
incr
)
var
postLoopStmt
:
IR_Statement
=
null
if
(
reduction
.
isDefined
)
{
val
target
=
reduction
.
get
.
target
val
target
=
Duplicate
(
reduction
.
get
.
target
)
val
operator
=
reduction
.
get
.
op
val
(
vecTmp
:
String
,
true
)
=
ctx
.
getName
(
target
)
...
...
Compiler/src/exastencils/parallelization/api/cuda/CUDA_ExtractDeviceCode.scala
View file @
7a759c1f
...
...
@@ -140,11 +140,15 @@ object CUDA_ExtractHostAndDeviceCode extends DefaultStrategy("Transform annotate
val
kernelCount
=
kernelFunctions
.
counterMap
.
getOrElse
(
fctNameCollector
.
getCurrentName
,
-
1
)
+
1
val
reduction
=
loop
.
parallelization
.
reduction
val
reduction
=
Duplicate
(
loop
.
parallelization
.
reduction
)
val
redTarget
=
if
(
reduction
.
isDefined
)
Some
(
Duplicate
(
reduction
.
get
.
target
))
else
None
// local variable for kernels with reductions
val
localTarget
=
if
(
reduction
.
isDefined
)
Some
(
IR_VariableAccess
(
reduction
.
get
.
targetName
+
"_local_"
+
kernelCount
,
CUDA_Util
.
getReductionDatatype
(
red
uction
.
get
.
tar
get
)))
Some
(
IR_VariableAccess
(
reduction
.
get
.
targetName
+
"_local_"
+
kernelCount
,
CUDA_Util
.
getReductionDatatype
(
red
Tar
get
.
get
)))
else
None
...
...
@@ -152,17 +156,17 @@ object CUDA_ExtractHostAndDeviceCode extends DefaultStrategy("Transform annotate
CUDA_GatherVariableAccesses
.
clear
()
CUDA_GatherVariableAccesses
.
kernelCount
=
kernelCount
if
(
reduction
.
isDefined
)
CUDA_GatherVariableAccesses
.
reductionTarget
=
Some
(
reduction
.
get
.
t
arget
)
CUDA_GatherVariableAccesses
.
reductionTarget
=
redT
arget
CUDA_GatherVariableAccesses
.
applyStandalone
(
IR_Scope
(
loop
))
// declare and init local reduction target
if
(
localTarget
.
isDefined
)
{
var
decl
=
IR_VariableDeclaration
(
localTarget
.
get
)
var
initLocalTarget
=
CUDA_Util
.
getReductionDatatype
(
red
uction
.
get
.
tar
get
)
match
{
var
initLocalTarget
=
CUDA_Util
.
getReductionDatatype
(
red
Tar
get
.
get
)
match
{
case
_
:
IR_ScalarDatatype
=>
ListBuffer
[
IR_Statement
](
IR_Assignment
(
localTarget
.
get
,
red
uction
.
get
.
tar
get
))
ListBuffer
[
IR_Statement
](
IR_Assignment
(
localTarget
.
get
,
red
Tar
get
.
get
))
case
mat
:
IR_MatrixDatatype
=>
red
uction
.
get
.
tar
get
match
{
red
Tar
get
.
get
match
{
case
vAcc
:
IR_VariableAccess
=>
IR_GenerateBasicMatrixOperations
.
loopSetSubmatrixMatPointer
(
vAcc
,
localTarget
.
get
,
mat
.
sizeN
,
mat
.
sizeM
,
mat
.
sizeN
,
0
,
0
).
body
...
...
@@ -218,7 +222,7 @@ object CUDA_ExtractHostAndDeviceCode extends DefaultStrategy("Transform annotate
// replace array accesses with accesses to function arguments
// reduction var is not replaced, but later in IR_HandleReductions
if
(
reduction
.
isDefined
)
CUDA_ReplaceNonReductionVarArrayAccesses
.
reductionTarget
=
Some
(
reduction
.
get
.
t
arget
)
CUDA_ReplaceNonReductionVarArrayAccesses
.
reductionTarget
=
redT
arget
else
CUDA_ReplaceNonReductionVarArrayAccesses
.
reductionTarget
=
None
CUDA_ReplaceNonReductionVarArrayAccesses
.
applyStandalone
(
IR_Scope
(
kernelBody
))
...
...
Compiler/src/exastencils/parallelization/api/cuda/CUDA_HandleFragmentLoopsWithReduction.scala
View file @
7a759c1f
...
...
@@ -38,17 +38,18 @@ case class CUDA_HandleFragmentLoopsWithReduction(
val
iter
=
IR_LoopOverFragments
.
defIt
val
redTarget
=
Duplicate
(
reduction
.
target
)
val
red
=
Duplicate
(
reduction
)
val
redTarget
=
Duplicate
(
red
.
target
)
val
reductionDt
=
CUDA_Util
.
getReductionDatatype
(
redTarget
)
val
counter
=
CUDA_HandleFragmentLoopsWithReduction
.
getReductionCounter
(
red
uction
.
targetName
)
val
counter
=
CUDA_HandleFragmentLoopsWithReduction
.
getReductionCounter
(
red
.
targetName
)
val
copies
=
{
val
innerDt
=
reductionDt
match
{
case
scalar
:
IR_ScalarDatatype
=>
scalar
case
hodt
:
IR_HigherDimensionalDatatype
=>
IR_ArrayDatatype
(
hodt
.
resolveBaseDatatype
,
hodt
.
getSizeArray
.
product
)
}
IR_VariableAccess
(
red
uction
.
targetName
+
"_"
+
counter
,
IR_ArrayDatatype
(
innerDt
,
Knowledge
.
domain_numFragmentsPerBlock
))
IR_VariableAccess
(
red
.
targetName
+
"_
fragCpy
"
+
counter
,
IR_ArrayDatatype
(
innerDt
,
Knowledge
.
domain_numFragmentsPerBlock
))
}
val
currCopy
=
IR_ArrayAccess
(
copies
,
iter
)
...
...
@@ -107,10 +108,10 @@ case class CUDA_HandleFragmentLoopsWithReduction(
val
src
=
IR_ArrayAccess
(
currCopy
,
idx
)
IR_ForLoop
(
IR_VariableDeclaration
(
i
,
IR_IntegerConstant
(
0
)),
IR_Lower
(
i
,
mat
.
sizeM
),
IR_PreIncrement
(
i
),
ListBuffer
[
IR_Statement
](
IR_ForLoop
(
IR_VariableDeclaration
(
j
,
0
),
IR_Lower
(
j
,
mat
.
sizeN
),
IR_PreIncrement
(
j
),
ListBuffer
[
IR_Statement
](
IR_Assignment
(
dst
,
IR_BinaryOperators
.
createExpression
(
red
uction
.
op
,
dst
,
src
))))))
IR_Assignment
(
dst
,
IR_BinaryOperators
.
createExpression
(
red
.
op
,
dst
,
src
))))))
case
_
:
IR_ScalarDatatype
=>
IR_Assignment
(
redTarget
,
IR_BinaryOperators
.
createExpression
(
red
uction
.
op
,
redTarget
,
currCopy
))
IR_Assignment
(
redTarget
,
IR_BinaryOperators
.
createExpression
(
red
.
op
,
redTarget
,
currCopy
))
}
body
:+
assign
...
...
@@ -118,21 +119,21 @@ case class CUDA_HandleFragmentLoopsWithReduction(
def
replaceAccesses
(
body
:
ListBuffer
[
IR_Statement
])
=
{
// replace occurrences
CUDA_ReplaceReductionAccesses
.
redTarget
=
redTarget
CUDA_ReplaceReductionAccesses
.
replacement
=
currCopy
CUDA_ReplaceReductionAccesses
.
redTarget
=
Duplicate
(
redTarget
)
CUDA_ReplaceReductionAccesses
.
replacement
=
Duplicate
(
currCopy
)
CUDA_ReplaceReductionAccesses
.
applyStandalone
(
IR_Scope
(
body
))
}
def
addHandling
(
loop
:
IR_ForLoop
)
=
{
replaceAccesses
(
loop
.
body
)
loop
.
body
=
finalizeReduction
(
loop
.
body
)
initCopies
()
:+
loop
initCopies
()
:+
Duplicate
(
loop
)
}
def
addHandling
(
loop
:
IR_LoopOverFragments
)
=
{
replaceAccesses
(
loop
.
body
)
loop
.
body
=
finalizeReduction
(
loop
.
body
)
initCopies
()
:+
loop
initCopies
()
:+
Duplicate
(
loop
)
}
override
def
expand
()
:
OutputType
=
{
...
...
Compiler/src/exastencils/parallelization/api/cuda/CUDA_Kernel.scala
View file @
7a759c1f
...
...
@@ -593,12 +593,12 @@ case class CUDA_Kernel(
var
body
=
ListBuffer
[
IR_Statement
]()
if
(
reduction
.
isDefined
)
{
def
target
=
reduction
.
get
.
target
def
resultDt
=
CUDA_Util
.
getReductionDatatype
(
target
)
def
baseDt
=
resultDt
.
resolveBaseDatatype
val
target
=
Duplicate
(
reduction
.
get
.
target
)
val
resultDt
=
CUDA_Util
.
getReductionDatatype
(
target
)
val
baseDt
=
resultDt
.
resolveBaseDatatype
def
bufSize
=
requiredThreadsPerDim
.
product
def
bufAccess
=
CUDA_ReductionDeviceData
(
bufSize
,
resultDt
)
val
bufSize
=
requiredThreadsPerDim
.
product
val
bufAccess
=
CUDA_ReductionDeviceData
(
bufSize
,
resultDt
)
var
callArgsReduction
=
ListBuffer
[
IR_Expression
](
bufAccess
,
bufSize
)
body
+=
CUDA_Memset
(
bufAccess
,
0
,
bufSize
,
resultDt
)
...
...
@@ -621,7 +621,7 @@ case class CUDA_Kernel(
IR_Return
(
Some
(
callDefaultReductionKernel
))
})
CUDA_KernelFunctions
.
get
.
requiredRedKernels
+=
Tuple2
(
reduction
.
get
.
op
,
target
)
// request reduction kernel and wrapper
CUDA_KernelFunctions
.
get
.
requiredRedKernels
+=
Tuple2
(
reduction
.
get
.
op
,
Duplicate
(
target
)
)
// request reduction kernel and wrapper
}
else
{
body
+=
CUDA_FunctionCall
(
getKernelFctName
,
callArgs
,
numBlocksPerDim
,
numThreadsPerBlock
)
}
...
...
Compiler/src/exastencils/parallelization/api/cuda/CUDA_Reduction.scala
View file @
7a759c1f
...
...
@@ -88,8 +88,8 @@ object CUDA_HandleReductions extends DefaultStrategy("Handle reductions in devic
}
// update local target
CUDA_ReplaceReductionAssignments
.
redTarget
=
target
CUDA_ReplaceReductionAssignments
.
replacement
=
kernel
.
localReductionTarget
.
get
CUDA_ReplaceReductionAssignments
.
redTarget
=
Duplicate
(
target
)
CUDA_ReplaceReductionAssignments
.
replacement
=
Duplicate
(
kernel
.
localReductionTarget
.
get
)
CUDA_ReplaceReductionAssignments
.
applyStandalone
(
IR_Scope
(
kernel
.
body
))
// set element in global reduction buffer to local result
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment