Commit 88264d30 authored by Sebastian Kuckuk's avatar Sebastian Kuckuk
Browse files

add cuda_genAsyncPrefetch flag (async prefetch is not allowed on all architectures)

parent 33847e58
......@@ -522,6 +522,9 @@ object Knowledge {
// replace device variants of field data and buffers with device pointers derived from host counter-parts
var cuda_useZeroCopy : Boolean = false
// only relevant if cuda_useManagedMemory == true; replace cuda memcpy with asynchronous prefetches
var cuda_genAsyncPrefetch : Boolean = true
// if true, the first dimension of the block size is enlarged if the kernel dimensionality is lower than the global dimensionality
var cuda_foldBlockSizeForRedDimensionality : Boolean = true
......@@ -676,7 +679,6 @@ object Knowledge {
var visit_enable : Boolean = false
var experimental_visit_addCurveMesh : Boolean = false
/// === constraints and resolutions ===
def update() : Unit = {
// NOTE: it is required to call update at least once
......
......@@ -31,12 +31,15 @@ import exastencils.field.ir._
/// CUDA_TransferUtil
object CUDA_TransferUtil {
def genTransfer(hostData : IR_InternalVariable, deviceData : IR_InternalVariable, sizeInBytes : IR_Expression, direction : String) : CUDA_HostStatement = {
def genTransfer(hostData : IR_InternalVariable, deviceData : IR_InternalVariable, sizeInBytes : IR_Expression, direction : String) : IR_Statement = {
if (Knowledge.cuda_useManagedMemory) {
CUDA_MemPrefetch(hostData, sizeInBytes, direction match {
case "H2D" => Knowledge.cuda_deviceId
case "D2H" => "cudaCpuDeviceId"
})
if (Knowledge.cuda_genAsyncPrefetch)
CUDA_MemPrefetch(hostData, sizeInBytes, direction match {
case "H2D" => Knowledge.cuda_deviceId
case "D2H" => "cudaCpuDeviceId"
})
else
IR_NullStatement
} else {
direction match {
case "H2D" => CUDA_Memcpy(deviceData, hostData, sizeInBytes, "cudaMemcpyHostToDevice")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment