CUDA_HandleFieldAccessLike.scala 6.18 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//=============================================================================
//
//  This file is part of the ExaStencils code generation framework. ExaStencils
//  is free software: you can redistribute it and/or modify it under the terms
//  of the GNU General Public License as published by the Free Software
//  Foundation, either version 3 of the License, or (at your option) any later
//  version.
//
//  ExaStencils is distributed in the hope that it will be useful, but WITHOUT
//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
//  more details.
//
//  You should have received a copy of the GNU General Public License along
//  with ExaStencils. If not, see <http://www.gnu.org/licenses/>.
//
//=============================================================================

19
package exastencils.parallelization.api.cuda
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
20
21
22
23
24
25
26
27

import scala.collection._
import scala.collection.mutable._

import exastencils.base.ir._
import exastencils.config._
import exastencils.datastructures.Transformation._
import exastencils.datastructures._
28
import exastencils.domain.ir.IR_IV_NeighborFragmentIdx
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
29
30
31
32
33
34
35
36
37
38
39
40
41
import exastencils.field.ir._
import exastencils.optimization.ir.IR_SimplifyExpression

/// CUDA_GatherFieldAccessLike

object CUDA_GatherFieldAccessLike extends QuietDefaultStrategy("Gather local FieldAccessLike nodes for shared memory") {
  var loopVariables = ListBuffer[String]()
  var fieldAccesses = new mutable.HashMap[String, List[IR_MultiDimFieldAccess]].withDefaultValue(Nil)
  var fieldIndicesConstantPart = new mutable.HashMap[String, List[Array[Long]]].withDefaultValue(Nil)
  var maximalFieldDim = Platform.hw_cuda_maxNumDimsBlock
  var writtenFields = ListBuffer[String]()

  def extractFieldIdentifier(access : IR_MultiDimFieldAccess) = {
42
    val field = access.field
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
43
44
45
    var identifier = field.codeName

    if (field.numSlots > 1) {
46
      access.slot match {
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
47
48
        case IR_SlotAccess(_, offset) => identifier += s"_o$offset"
        case IR_IntegerConstant(slot) => identifier += s"_s$slot"
49
        case _                        => identifier += s"_s${ access.slot.prettyprint }"
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
50
51
52
      }
    }

53
54
55
56
57
58
    // also consider neighbor fragment accesses
    access.fragIdx match {
      case neigh : IR_IV_NeighborFragmentIdx => identifier += s"_n${ neigh.neighIdx }"
      case _                                 =>
    }

Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
59
60
61
62
    identifier
  }

  this += new Transformation("Searching", {
63
    case stmt @ IR_Assignment(access : IR_MultiDimFieldAccess, _, _)                           =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
64
65
      writtenFields += extractFieldIdentifier(access)
      stmt
66
    case access : IR_MultiDimFieldAccess if access.field.layout.numDimsData <= maximalFieldDim =>
67
      val field = access.field
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
68
69
70
      val identifier = extractFieldIdentifier(access)

      // Evaluate indices. Should be of the form "variable + offset". Ignore all other fields.
71
      var suitableForSharedMemory = field.layout.numDimsData <= Platform.hw_cuda_maxNumDimsBlock
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
72
73
74
75
76
      val accessIndices = access.index.indices
      val indexConstantPart = Array.fill[Long](accessIndices.length)(0)

      accessIndices.indices.foreach(i => {
        accessIndices(i) match {
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
77
          case IR_Addition(ListBuffer(va @ IR_VariableAccess(name : String, _), IR_IntegerConstant(v : Long))) =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
78
79
            suitableForSharedMemory &= loopVariables.contains(name)
            indexConstantPart(i) = v
80
          case va @ IR_VariableAccess(name : String, _)                                                        =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
81
            suitableForSharedMemory &= loopVariables.contains(name)
82
          case IR_IntegerConstant(v : Long)                                                                    =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
83
            indexConstantPart(i) = v
84
          case _                                                                                               =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
            suitableForSharedMemory = false
        }
      })

      if (suitableForSharedMemory) {
        access.allowLinearization = true
        fieldAccesses(identifier) ::= access
        fieldIndicesConstantPart(identifier) ::= indexConstantPart
      }

      access
  }, true)
}

/// CUDA_ReplaceFieldAccessLike

object CUDA_ReplaceFieldAccessLike extends QuietDefaultStrategy("Replace local FieldAccessLike nodes for shared memory") {
102
103
  var fieldToOffset = ""
  var fieldOffset = IR_ExpressionIndex()
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
104
105
106
107
108
109
110
  var offsetForSharedMemoryAccess = 0L
  var sharedArrayStrides = Array[Long]()
  var executionDim = 0
  var baseIndex = IR_ExpressionIndex()
  var applySpatialBlocking = false

  def extractIdentifier(access : IR_MultiDimFieldAccess) = {
111
    val field = access.field
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
112
113
114
115
    var identifier = field.codeName

    // TODO: array fields
    if (field.numSlots > 1) {
116
      access.slot match {
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
117
118
        case IR_SlotAccess(_, offset) => identifier += s"_o$offset"
        case IR_IntegerConstant(slot) => identifier += s"_s$slot"
119
        case _                        => identifier += s"_s${ access.slot.prettyprint }"
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
120
121
122
123
124
125
126
      }
    }

    identifier
  }

  this += new Transformation("Searching", {
127
    case access : IR_MultiDimFieldAccess if fieldToOffset == extractIdentifier(access) =>
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
128
      val identifier = extractIdentifier(access)
129
      val deviation = (IR_ExpressionIndex(access.getAnnotation(CUDA_Kernel.ConstantIndexPart).get.asInstanceOf[Array[Long]]) - fieldOffset).indices
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
130
131
132

      if (applySpatialBlocking && deviation.take(executionDim).forall(x => IR_SimplifyExpression.evalIntegral(x) == 0)) {
        IR_SimplifyExpression.evalIntegral(deviation(executionDim)) match {
133
134
135
136
          // TODO: check if the datatypes in the next three lines are correct
          case 0                                                        => IR_VariableAccess("current", access.datatype)
          case x if 0L to offsetForSharedMemoryAccess contains x        => IR_VariableAccess("infront" + x, access.datatype)
          case y if 0L to -offsetForSharedMemoryAccess by -1 contains y => IR_VariableAccess("behind" + math.abs(y), access.datatype)
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
137
138
        }
      } else {
139
        new CUDA_SharedArrayAccess(IR_VariableAccess(CUDA_Kernel.KernelVariablePrefix + identifier, IR_PointerDatatype(access.field.resolveDeclType)), (access.index - fieldOffset).indices.take(executionDim).reverse, IR_ExpressionIndex(sharedArrayStrides))
Sebastian Kuckuk's avatar
Sebastian Kuckuk committed
140
141
142
      }
  })
}