Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ExaStencils
exastencils-release
Commits
0b331211
Commit
0b331211
authored
Feb 21, 2022
by
Sebastian Kuckuk
Browse files
fix for cuda zero copy: adapt device allocations only when corresponding host field is allocated
parent
88264d30
Changes
1
Hide whitespace changes
Inline
Side-by-side
Compiler/src/exastencils/parallelization/api/cuda/CUDA_Memory.scala
View file @
0b331211
...
...
@@ -162,17 +162,34 @@ object CUDA_AdaptDeviceAccessesForMM extends DefaultStrategy("Adapt allocations
/// CUDA_AdaptAllocations
object
CUDA_AdaptAllocations
extends
DefaultStrategy
(
"Adapt allocations and de-allocations on host and device"
)
{
var
fieldHostAllocations
=
ListBuffer
[
IR_Field
]()
var
bufferHostAllocations
=
ListBuffer
[
IR_Field
]()
this
.
onBefore
=
()
=>
{
fieldHostAllocations
.
clear
()
bufferHostAllocations
.
clear
()
}
this
+=
new
Transformation
(
"Scanning host allocations"
,
{
case
alloc
@
IR_ArrayAllocation
(
pointer
:
IR_IV_FieldData
,
_
,
_
)
=>
fieldHostAllocations
+=
pointer
.
field
alloc
case
alloc
@
IR_ArrayAllocation
(
pointer
:
IR_IV_CommBuffer
,
_
,
_
)
=>
fieldHostAllocations
+=
pointer
.
field
alloc
})
this
+=
new
Transformation
(
"Adapting"
,
{
case
alloc
@
CUDA_Allocate
(
fieldData
:
CUDA_FieldDeviceData
,
_
,
_
)
if
Knowledge
.
cuda_useZeroCopy
=>
case
alloc
@
CUDA_Allocate
(
fieldData
:
CUDA_FieldDeviceData
,
_
,
_
)
if
Knowledge
.
cuda_useZeroCopy
&&
fieldHostAllocations
.
contains
(
fieldData
.
field
)
=>
CUDA_GetDevPointer
(
alloc
.
pointer
,
IR_IV_FieldData
(
fieldData
.
field
,
fieldData
.
slot
,
fieldData
.
fragmentIdx
))
case
alloc
@
CUDA_Allocate
(
bufferData
:
CUDA_BufferDeviceData
,
_
,
_
)
if
Knowledge
.
cuda_useZeroCopy
=>
case
alloc
@
CUDA_Allocate
(
bufferData
:
CUDA_BufferDeviceData
,
_
,
_
)
if
Knowledge
.
cuda_useZeroCopy
&&
bufferHostAllocations
.
contains
(
bufferData
.
field
)
=>
CUDA_GetDevPointer
(
alloc
.
pointer
,
IR_IV_CommBuffer
(
bufferData
.
field
,
bufferData
.
direction
,
bufferData
.
size
,
bufferData
.
neighIdx
,
bufferData
.
fragmentIdx
))
case
CUDA_Free
(
_
:
CUDA_FieldDeviceData
)
if
Knowledge
.
cuda_useZeroCopy
=>
case
CUDA_Free
(
fieldData
:
CUDA_FieldDeviceData
)
if
Knowledge
.
cuda_useZeroCopy
&&
fieldHostAllocations
.
contains
(
fieldData
.
field
)
=>
IR_NullStatement
case
CUDA_Free
(
_
:
CUDA_BufferDeviceData
)
if
Knowledge
.
cuda_useZeroCopy
=>
case
CUDA_Free
(
bufferData
:
CUDA_BufferDeviceData
)
if
Knowledge
.
cuda_useZeroCopy
&&
bufferHostAllocations
.
contains
(
bufferData
.
field
)
=>
IR_NullStatement
})
...
...
@@ -193,4 +210,4 @@ object CUDA_AdaptAllocations extends DefaultStrategy("Adapt allocations and de-a
else
free
})
}
\ No newline at end of file
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment