Skip to content
Snippets Groups Projects
Commit 6d6abeec authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGPU: select comm type by string instead of number

parent 755972f9
No related merge requests found
...@@ -89,15 +89,27 @@ int main( int argc, char **argv ) ...@@ -89,15 +89,27 @@ int main( int argc, char **argv )
lbm::UniformGridGPU_UBB ubb(blocks, pdfFieldGpuID); lbm::UniformGridGPU_UBB ubb(blocks, pdfFieldGpuID);
lbm::UniformGridGPU_NoSlip noSlip(blocks, pdfFieldGpuID); lbm::UniformGridGPU_NoSlip noSlip(blocks, pdfFieldGpuID);
//lbm::GeneratedFixedDensity pressure(blocks, pdfFieldGpuID);
ubb.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("UBB"), fluidFlagUID ); ubb.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("UBB"), fluidFlagUID );
noSlip.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID ); noSlip.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID );
//pressure.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("pressure"), fluidFlagUID );
// Communication setup // Communication setup
bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false ); bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false );
int communicationScheme = parameters.getParameter<int>( "communicationScheme", (int) CommunicationSchemeType::UniformGPUScheme_Baseline );
const std::string communicationSchemeStr = parameters.getParameter<std::string>("communicationScheme", "UniformGPUScheme_Baseline");
CommunicationSchemeType communicationScheme;
if( communicationSchemeStr == "GPUPackInfo_Baseline")
communicationScheme = GPUPackInfo_Baseline;
else if (communicationSchemeStr == "GPUPackInfo_Streams")
communicationScheme = GPUPackInfo_Streams;
else if (communicationSchemeStr == "UniformGPUScheme_Baseline")
communicationScheme = UniformGPUScheme_Baseline;
else if (communicationSchemeStr == "UniformGPUScheme_Memcpy")
communicationScheme = UniformGPUScheme_Memcpy;
else {
WALBERLA_ABORT_NO_DEBUG_INFO("Invalid choice for communicationScheme")
}
Vector3<int> innerOuterSplit = parameters.getParameter<Vector3<int> >("innerOuterSplit", Vector3<int>(1, 1, 1)); Vector3<int> innerOuterSplit = parameters.getParameter<Vector3<int> >("innerOuterSplit", Vector3<int>(1, 1, 1));
......
...@@ -7,18 +7,21 @@ DomainSetup ...@@ -7,18 +7,21 @@ DomainSetup
Parameters Parameters
{ {
omega 1.8;
timesteps 10000;
warmupSteps 0;
outerIterations 1;
remainingTimeLoggerFrequency 30; timesteps 10000; // time steps of one performance measurement
vtkWriteFrequency 500; warmupSteps 0; // number of steps to run before measurement starts
outerIterations 1; // how many measurements to conduct
cudaEnabledMPI false; // Can be one of: GPUPackInfo_Baseline, GPUPackInfo_Streams, UniformGPUScheme_Baseline, UniformGPUScheme_Memcpy
communicationScheme UniformGPUScheme_Baseline;
timeStepStrategy noOverlap; vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled
innerOuterSplit < 64, 1, 1>; cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation
timeStepStrategy noOverlap; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
innerOuterSplit < 32, 1, 1>; // slice-thickness that 'outer'-kernels process when overlapping
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
} }
/* /*
......
...@@ -60,15 +60,21 @@ def overlap_benchmark(): ...@@ -60,15 +60,21 @@ def overlap_benchmark():
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1), (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)] (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
scenarios.add(Scenario(timeStepStrategy='noOverlap')) for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']: # 'GPUPackInfo_Baseline', 'GPUPackInfo_Streams'
for strategy in ['simpleOverlap', 'complexOverlap']: # no overlap
for inner_outer_split in inner_outer_splits: scenarios.add(Scenario(timeStepStrategy='noOverlap', communicationScheme=comm_strategy, innerOuterSplit=(1, 1, 1)))
scenario = Scenario(timeStepStrategy=strategy, innerOuterSplit=inner_outer_split)
scenarios.add(scenario)
# overlap
for overlap_strategy in ['simpleOverlap', 'complexOverlap']:
for inner_outer_split in inner_outer_splits:
scenario = Scenario(timeStepStrategy=overlap_strategy,
communicationScheme=comm_strategy,
innerOuterSplit=inner_outer_split)
scenarios.add(scenario)
if __name__ == '__main__':
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2400]: def generate_jobscripts(machine='pizdaint_hybrid'):
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 2400]:
with open("job_overlap_benchmark_{:04d}.sh".format(node_count), 'w') as f: with open("job_overlap_benchmark_{:04d}.sh".format(node_count), 'w') as f:
js = createJobscript(nodes=node_count, js = createJobscript(nodes=node_count,
output_file='overlap_bench_{:04d}_%j.txt'.format(node_count), output_file='overlap_bench_{:04d}_%j.txt'.format(node_count),
...@@ -77,9 +83,14 @@ if __name__ == '__main__': ...@@ -77,9 +83,14 @@ if __name__ == '__main__':
exe_name='UniformGridBenchmarkGPU', exe_name='UniformGridBenchmarkGPU',
parameter_files=['overlap_benchmark.py'], parameter_files=['overlap_benchmark.py'],
wall_time=timedelta(minutes=25), wall_time=timedelta(minutes=25),
machine='pizdaint_hybrid', machine=machine,
account='d105', account='d105',
) )
f.write(js) f.write(js)
if __name__ == '__main__':
print("Called without waLBerla - generating job scripts for PizDaint")
generate_jobscripts()
else: else:
overlap_benchmark() overlap_benchmark()
...@@ -58,8 +58,8 @@ void selectDeviceBasedOnMpiRank() ...@@ -58,8 +58,8 @@ void selectDeviceBasedOnMpiRank()
} }
else if ( deviceCount > processesOnNode ) else if ( deviceCount > processesOnNode )
{ {
WALBERLA_LOG_WARNING( "Not using all available GPUs on node. Processes on node " WALBERLA_LOG_WARNING( "Not using all available GPUs on node. Processes on node: "
<< processesOnNode << " available GPUs on node " << deviceCount ); << processesOnNode << ", available GPUs on node: " << deviceCount );
WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode )); WALBERLA_CUDA_CHECK( cudaSetDevice( rankOnNode ));
} }
else else
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment