Commit 6d6abeec authored by Martin Bauer's avatar Martin Bauer

UniformGridGPU: select comm type by string instead of number

parent 755972f9
......@@ -89,15 +89,27 @@ int main( int argc, char **argv )
lbm::UniformGridGPU_UBB ubb(blocks, pdfFieldGpuID);
lbm::UniformGridGPU_NoSlip noSlip(blocks, pdfFieldGpuID);
//lbm::GeneratedFixedDensity pressure(blocks, pdfFieldGpuID);
ubb.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("UBB"), fluidFlagUID );
noSlip.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID );
//pressure.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("pressure"), fluidFlagUID );
// Communication setup
bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false );
int communicationScheme = parameters.getParameter<int>( "communicationScheme", (int) CommunicationSchemeType::UniformGPUScheme_Baseline );
const std::string communicationSchemeStr = parameters.getParameter<std::string>("communicationScheme", "UniformGPUScheme_Baseline");
CommunicationSchemeType communicationScheme;
if( communicationSchemeStr == "GPUPackInfo_Baseline")
communicationScheme = GPUPackInfo_Baseline;
else if (communicationSchemeStr == "GPUPackInfo_Streams")
communicationScheme = GPUPackInfo_Streams;
else if (communicationSchemeStr == "UniformGPUScheme_Baseline")
communicationScheme = UniformGPUScheme_Baseline;
else if (communicationSchemeStr == "UniformGPUScheme_Memcpy")
communicationScheme = UniformGPUScheme_Memcpy;
else {
WALBERLA_ABORT_NO_DEBUG_INFO("Invalid choice for communicationScheme")
}
Vector3<int> innerOuterSplit = parameters.getParameter<Vector3<int> >("innerOuterSplit", Vector3<int>(1, 1, 1));
......
......@@ -7,18 +7,21 @@ DomainSetup
Parameters
{
omega 1.8;
timesteps 10000;
warmupSteps 0;
outerIterations 1;
remainingTimeLoggerFrequency 30;
vtkWriteFrequency 500;
timesteps 10000; // time steps of one performance measurement
warmupSteps 0; // number of steps to run before measurement starts
outerIterations 1; // how many measurements to conduct
cudaEnabledMPI false;
// Can be one of: GPUPackInfo_Baseline, GPUPackInfo_Streams, UniformGPUScheme_Baseline, UniformGPUScheme_Memcpy
communicationScheme UniformGPUScheme_Baseline;
timeStepStrategy noOverlap;
innerOuterSplit < 64, 1, 1>;
vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled
cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation
timeStepStrategy noOverlap; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
innerOuterSplit < 32, 1, 1>; // slice-thickness that 'outer'-kernels process when overlapping
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
}
/*
......
......@@ -60,15 +60,21 @@ def overlap_benchmark():
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
scenarios.add(Scenario(timeStepStrategy='noOverlap'))
for strategy in ['simpleOverlap', 'complexOverlap']:
for inner_outer_split in inner_outer_splits:
scenario = Scenario(timeStepStrategy=strategy, innerOuterSplit=inner_outer_split)
scenarios.add(scenario)
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']: # 'GPUPackInfo_Baseline', 'GPUPackInfo_Streams'
# no overlap
scenarios.add(Scenario(timeStepStrategy='noOverlap', communicationScheme=comm_strategy, innerOuterSplit=(1, 1, 1)))
# overlap
for overlap_strategy in ['simpleOverlap', 'complexOverlap']:
for inner_outer_split in inner_outer_splits:
scenario = Scenario(timeStepStrategy=overlap_strategy,
communicationScheme=comm_strategy,
innerOuterSplit=inner_outer_split)
scenarios.add(scenario)
if __name__ == '__main__':
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2400]:
def generate_jobscripts(machine='pizdaint_hybrid'):
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 2400]:
with open("job_overlap_benchmark_{:04d}.sh".format(node_count), 'w') as f:
js = createJobscript(nodes=node_count,
output_file='overlap_bench_{:04d}_%j.txt'.format(node_count),
......@@ -77,9 +83,14 @@ if __name__ == '__main__':
exe_name='UniformGridBenchmarkGPU',
parameter_files=['overlap_benchmark.py'],
wall_time=timedelta(minutes=25),
machine='pizdaint_hybrid',
machine=machine,
account='d105',
)
f.write(js)
if __name__ == '__main__':
print("Called without waLBerla - generating job scripts for PizDaint")
generate_jobscripts()
else:
overlap_benchmark()
......@@ -58,8 +58,8 @@ void selectDeviceBasedOnMpiRank()
}
else if ( deviceCount > processesOnNode )
{
WALBERLA_LOG_WARNING( "Not using all available GPUs on node. Processes on node "
<< processesOnNode << " available GPUs on node " << deviceCount );
WALBERLA_LOG_WARNING( "Not using all available GPUs on node. Processes on node: "
<< processesOnNode << ", available GPUs on node: " << deviceCount );
WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ));
}
else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment