CUDA_Memory.scala 9.36 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//=============================================================================
//
//  This file is part of the ExaStencils code generation framework. ExaStencils
//  is free software: you can redistribute it and/or modify it under the terms
//  of the GNU General Public License as published by the Free Software
//  Foundation, either version 3 of the License, or (at your option) any later
//  version.
//
//  ExaStencils is distributed in the hope that it will be useful, but WITHOUT
//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
//  more details.
//
//  You should have received a copy of the GNU General Public License along
//  with ExaStencils. If not, see <http://www.gnu.org/licenses/>.
//
//=============================================================================

19
package exastencils.parallelization.api.cuda
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
20

21
22
import scala.collection.mutable.ListBuffer

Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
23
import exastencils.base.ir.IR_ImplicitConversion._
24
import exastencils.base.ir._
25
import exastencils.baseExt.ir._
26
import exastencils.communication.ir._
27
import exastencils.config.Knowledge
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
28
29
import exastencils.datastructures.DefaultStrategy
import exastencils.datastructures.Transformation
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
30
import exastencils.datastructures.Transformation.Output
31
import exastencils.field.ir._
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
32
33
34
35
36
37

/// CUDA_Allocate

case class CUDA_Allocate(var pointer : IR_Expression, var numElements : IR_Expression, var datatype : IR_Datatype) extends CUDA_HostStatement with IR_Expandable {
  override def expand() : Output[IR_Statement] = {
    CUDA_CheckError(
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
38
39
40
41
42
43
44
45
46
47
      IR_FunctionCall(IR_ExternalFunctionReference("cudaMalloc"),
        IR_Cast(IR_PointerDatatype(IR_PointerDatatype(IR_UnitDatatype)), IR_AddressOf(pointer)),
        numElements * IR_SizeOf(datatype)))
  }
}

/// CUDA_AllocateHost

case class CUDA_AllocateHost(var pointer : IR_Expression, var numElements : IR_Expression, var datatype : IR_Datatype) extends CUDA_HostStatement with IR_Expandable {
  override def expand() : Output[IR_Statement] = {
48
49
50
51
52
53
54
55
56
57
58
59
    if (Knowledge.cuda_useZeroCopy) {
      CUDA_CheckError(
        IR_FunctionCall(IR_ExternalFunctionReference("cudaHostAlloc"),
          IR_Cast(IR_PointerDatatype(IR_PointerDatatype(IR_UnitDatatype)), IR_AddressOf(pointer)),
          numElements * IR_SizeOf(datatype),
          "cudaHostAllocMapped"))
    } else {
      CUDA_CheckError(
        IR_FunctionCall(IR_ExternalFunctionReference("cudaMallocHost"),
          IR_Cast(IR_PointerDatatype(IR_PointerDatatype(IR_UnitDatatype)), IR_AddressOf(pointer)),
          numElements * IR_SizeOf(datatype)))
    }
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
60
61
62
63
64
65
66
67
68
  }
}

/// CUDA_AllocateManaged

case class CUDA_AllocateManaged(var pointer : IR_Expression, var numElements : IR_Expression, var datatype : IR_Datatype) extends CUDA_HostStatement with IR_Expandable {
  override def expand() : Output[IR_Statement] = {
    CUDA_CheckError(
      IR_FunctionCall(IR_ExternalFunctionReference("cudaMallocManaged"),
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
69
        IR_Cast(IR_PointerDatatype(IR_PointerDatatype(IR_UnitDatatype)), IR_AddressOf(pointer)),
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
70
71
72
73
74
75
76
        numElements * IR_SizeOf(datatype)))
  }
}

/// CUDA_Free

case class CUDA_Free(var pointer : IR_Expression) extends CUDA_HostStatement with IR_Expandable {
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
77
78
79
80
81
82
83
  override def expand() = IR_ExpressionStatement(IR_FunctionCall(IR_ExternalFunctionReference("cudaFree"), pointer))
}

/// CUDA_FreeHost

case class CUDA_FreeHost(var pointer : IR_Expression) extends CUDA_HostStatement with IR_Expandable {
  override def expand() = IR_ExpressionStatement(IR_FunctionCall(IR_ExternalFunctionReference("cudaFreeHost"), pointer))
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
84
85
86
87
88
}

/// CUDA_Memcpy

case class CUDA_Memcpy(var dest : IR_Expression, var src : IR_Expression, var sizeInBytes : IR_Expression, var direction : String) extends CUDA_HostStatement with IR_Expandable {
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
89
90
91
92
93
94
95
  override def expand() = CUDA_CheckError(IR_FunctionCall(IR_ExternalFunctionReference("cudaMemcpy"), dest, src, sizeInBytes, direction))
}

/// CUDA_MemPrefetch

case class CUDA_MemPrefetch(var pointer : IR_Expression, var sizeInBytes : IR_Expression, var target : String) extends CUDA_HostStatement with IR_Expandable {
  override def expand() = CUDA_CheckError(IR_FunctionCall(IR_ExternalFunctionReference("cudaMemPrefetchAsync "), pointer, sizeInBytes, target))
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
96
97
98
99
100
}

/// CUDA_Memset

case class CUDA_Memset(var data : IR_Expression, var value : IR_Expression, var numElements : IR_Expression, var datatype : IR_Datatype) extends CUDA_HostStatement with IR_Expandable {
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
101
102
103
104
105
106
107
108
  override def expand() = CUDA_CheckError(IR_FunctionCall(IR_ExternalFunctionReference("cudaMemset"), data, value, numElements * IR_SizeOf(datatype)))
}

/// CUDA_GetDevPointer

case class CUDA_GetDevPointer(var devicePtr : IR_Expression, var hostPtr : IR_Expression) extends CUDA_HostStatement with IR_Expandable {
  override def expand() = CUDA_CheckError(IR_FunctionCall(IR_ExternalFunctionReference("cudaHostGetDevicePointer"),
    IR_Cast(IR_PointerDatatype(IR_PointerDatatype(IR_UnitDatatype)), IR_AddressOf(devicePtr)), hostPtr, 0))
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
109
}
110
111
112

/// CUDA_FieldDeviceData

113
114
115
case class CUDA_FieldDeviceData(override var field : IR_Field, override var slot : IR_Expression, override var fragmentIdx : IR_Expression = IR_LoopOverFragments.defIt) extends IR_IV_AbstractFieldData {
  override var level : IR_Expression = field.level

116
  override def resolveName() = (if (1 == field.numSlots) s"fieldDeviceData" else "slottedFieldDeviceData") +
117
    resolvePostfix(fragmentIdx.prettyprint, "", if (Knowledge.data_useFieldNamesAsIdx) field.name else field.index.toString, level.prettyprint, "")
118
119
120
121

  override def getDtor() : Option[IR_Statement] = {
    val origSlot = slot
    slot = "slot"
122
    val access = resolveAccess(resolveName(), IR_LoopOverFragments.defIt, IR_LoopOverDomains.defIt, IR_LoopOverFields.defIt, IR_LoopOverLevels.defIt, IR_LoopOverNeighbors.defIt)
123
124
125
126
127
128
129
130
131
132

    val ret = Some(wrapInLoops(
      IR_IfCondition(access,
        ListBuffer(
          CUDA_Free(access),
          IR_Assignment(access, 0)))))
    slot = origSlot
    ret
  }
}
133
134
135
136
137
138
139
140

/// CUDA_BufferDeviceData

case class CUDA_BufferDeviceData(override var field : IR_Field, override var direction : String, override var size : IR_Expression, override var neighIdx : IR_Expression, override var fragmentIdx : IR_Expression = IR_LoopOverFragments.defIt) extends IR_IV_AbstractCommBuffer {
  override def resolveName() = s"bufferDevice_${ direction }" + resolvePostfix(fragmentIdx.prettyprint, "", field.index.toString, field.level.toString, neighIdx.prettyprint)

  override def getDtor() : Option[IR_Statement] = {
    def access = resolveAccess(resolveName(), fragmentIdx, IR_NullExpression, field.index, field.level, neighIdx)
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
141

142
143
144
145
146
147
148
    Some(wrapInLoops(
      IR_IfCondition(access,
        ListBuffer[IR_Statement](
          CUDA_Free(access),
          IR_Assignment(access, 0)))))
  }
}
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

/// CUDA_AdaptDeviceAccessesForMM

object CUDA_AdaptDeviceAccessesForMM extends DefaultStrategy("Adapt allocations and de-allocations on host and device") {
  this += new Transformation("Adapting", {
    case cudaVariant : CUDA_FieldDeviceData if Knowledge.cuda_useManagedMemory =>
      IR_IV_FieldData(cudaVariant.field, cudaVariant.slot, cudaVariant.fragmentIdx)

    case cudaVariant : CUDA_BufferDeviceData if Knowledge.cuda_useManagedMemory =>
      IR_IV_CommBuffer(cudaVariant.field, cudaVariant.direction, cudaVariant.size, cudaVariant.neighIdx, cudaVariant.fragmentIdx)
  })
}

/// CUDA_AdaptAllocations

object CUDA_AdaptAllocations extends DefaultStrategy("Adapt allocations and de-allocations on host and device") {
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
  var fieldHostAllocations = ListBuffer[IR_Field]()
  var bufferHostAllocations = ListBuffer[IR_Field]()

  this.onBefore = () => {
    fieldHostAllocations.clear()
    bufferHostAllocations.clear()
  }

  this += new Transformation("Scanning host allocations", {
    case alloc @ IR_ArrayAllocation(pointer : IR_IV_FieldData, _, _)  =>
      fieldHostAllocations += pointer.field
      alloc
    case alloc @ IR_ArrayAllocation(pointer : IR_IV_CommBuffer, _, _) =>
      fieldHostAllocations += pointer.field
      alloc
  })

Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
182
  this += new Transformation("Adapting", {
183
    case alloc @ CUDA_Allocate(fieldData : CUDA_FieldDeviceData, _, _) if Knowledge.cuda_useZeroCopy && fieldHostAllocations.contains(fieldData.field) =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
184
185
      CUDA_GetDevPointer(alloc.pointer, IR_IV_FieldData(fieldData.field, fieldData.slot, fieldData.fragmentIdx))

186
    case alloc @ CUDA_Allocate(bufferData : CUDA_BufferDeviceData, _, _) if Knowledge.cuda_useZeroCopy && bufferHostAllocations.contains(bufferData.field) =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
187
188
      CUDA_GetDevPointer(alloc.pointer, IR_IV_CommBuffer(bufferData.field, bufferData.direction, bufferData.size, bufferData.neighIdx, bufferData.fragmentIdx))

189
    case CUDA_Free(fieldData : CUDA_FieldDeviceData) if Knowledge.cuda_useZeroCopy && fieldHostAllocations.contains(fieldData.field) =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
190
191
      IR_NullStatement

192
    case CUDA_Free(bufferData : CUDA_BufferDeviceData) if Knowledge.cuda_useZeroCopy && bufferHostAllocations.contains(bufferData.field) =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
      IR_NullStatement
  })

  this += new Transformation("Adapting", {
    case alloc @ IR_ArrayAllocation(pointer, datatype, size) =>
      if (Knowledge.cuda_usePinnedHostMemory)
        CUDA_AllocateHost(pointer, size, datatype)
      else if (Knowledge.cuda_useManagedMemory)
        CUDA_AllocateManaged(pointer, size, datatype)
      else
        alloc

    case free @ IR_ArrayFree(pointer) =>
      if (Knowledge.cuda_usePinnedHostMemory)
        CUDA_FreeHost(pointer)
      else if (Knowledge.cuda_useManagedMemory)
        CUDA_Free(pointer)
      else
        free
  })
213
}