Commit 628f2060 authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGPU: Fix in timing for shorter runs

parent 79668534
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/PythonCallback.h"
#include "python_coupling/DictWrapper.h"
......@@ -55,8 +56,11 @@ int main( int argc, char **argv )
for( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
{
auto config = *cfg;
logging::configureLogging( config );
auto blocks = blockforest::createUniformBlockGridFromConfig( config );
Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" );
// Reading parameters
auto parameters = config->getOneBlock( "Parameters" );
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
......@@ -177,28 +181,25 @@ int main( int argc, char **argv )
timeLoop.addFuncAfterTimeStep( timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency ), "remaining time logger" );
auto performanceReportFrequency = parameters.getParameter< uint_t >( "performanceReportFrequency", 500 ); // in timesteps
lbm::PerformanceLogger<FlagField_T> performanceLogger(blocks, flagFieldID, fluidFlagUID, performanceReportFrequency);
timeLoop.addFuncAfterTimeStep([&performanceLogger] { performanceLogger(); }, "performance logger" );
WcTimer simTimer;
cudaDeviceSynchronize();
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps");
simTimer.start();
cudaDeviceSynchronize();
timeLoop.run();
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished");
std::map< std::string, int > integerProperties;
std::map< std::string, double > realProperties;
std::map< std::string, std::string > stringProperties;
performanceLogger.logOverallResultsOnRoot();
performanceLogger.getBestResultsForSQLOnRoot(integerProperties, realProperties, stringProperties);
auto time = simTimer.last();
auto nrOfCells = real_c( cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2] );
auto mlupsPerProcess = nrOfCells * real_c( timesteps ) / time * 1e-6;
WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess);
WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c( timesteps ) );
WALBERLA_ROOT_SECTION()
{
python_coupling::PythonCallback pythonCallbackResults ( "results_callback" );
if ( pythonCallbackResults.isCallable() )
{
pythonCallbackResults.data().exposeValue( "mlups_total", realProperties["MLUPS"] );
pythonCallbackResults.data().exposeValue( "mlups_process", realProperties["MLUPS_process"] );
pythonCallbackResults.data().exposeValue( "mlups_per_process", mlupsPerProcess );
// Call Python function to report results
pythonCallbackResults();
......
......@@ -26,7 +26,7 @@ communication_schemes = ['GPUPackInfo_Streams', 'UniformGPUScheme_Baseline', 'Un
def calculate_time_steps(runtime, expected_mlups, domain_size):
cells = reduce(operator.mul, domain_size, 1)
time_steps_per_second = expected_mlups * 1e6 / cells
return time_steps_per_second * runtime
return int(time_steps_per_second * runtime)
def side_length_to_fill_memory(memory_fill_percentage, memory_in_gb):
......
#!/usr/bin/env python3
from os import getcwd
from waLBerla.tools.jobscripts import createJobscript
from datetime import timedelta
......@@ -6,12 +7,13 @@ from datetime import timedelta
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2400]:
with open("job_weak_scaling_{:04d}.sh".format(node_count), 'w') as f:
js = createJobscript(nodes=node_count,
output_file='out_lbm_bench_%j.txt',
error_file='err_lbm_bench_%j.txt',
output_file='out_lbm_bench_{:04d}_%j.txt'.format(node_count),
error_file='err_lbm_bench_{:04d}_%j.txt'.format(node_count),
initial_dir=getcwd(),
exe_name='UniformGridBenchmarkGPU',
parameter_files=['weak_scaling.py'],
wall_time=timedelta(minutes=25),
machine='pizdaint_hybrid'
machine='pizdaint_hybrid',
account='d105',
)
f.write(js)
......@@ -19,7 +19,7 @@ gpu_memory_gb = 16
cells_per_block = [side_length_to_fill_memory(pc, gpu_memory_gb) for pc in (0.8, 0.5, 0.05)]
expected_mlups = 200 # to compute how many time steps have to be done
time_per_scenarios = 3 # benchmark time in seconds
time_per_scenarios = 5 # benchmark time in seconds
fully_periodic = [False, True]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment