From 1a43844657a5fe44ee8abef14b4b2e34377dae80 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Tue, 1 Oct 2019 08:25:52 +0200 Subject: [PATCH] New benchmark for generated LBM on uniform grid on CPU --- apps/benchmarks/CMakeLists.txt | 1 + .../UniformGridGenerated/CMakeLists.txt | 12 ++ .../UniformGridGenerated.cpp | 157 ++++++++++++++++++ .../UniformGridGenerated.py | 133 +++++++++++++++ 4 files changed, 303 insertions(+) create mode 100644 apps/benchmarks/UniformGridGenerated/CMakeLists.txt create mode 100644 apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp create mode 100644 apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt index 8e83319ae..a59d3bc1a 100644 --- a/apps/benchmarks/CMakeLists.txt +++ b/apps/benchmarks/CMakeLists.txt @@ -14,3 +14,4 @@ add_subdirectory( ProbeVsExtraMessage ) add_subdirectory( SchaeferTurek ) add_subdirectory( UniformGrid ) add_subdirectory( UniformGridGPU ) +add_subdirectory( UniformGridGenerated ) diff --git a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt new file mode 100644 index 000000000..def7d93f6 --- /dev/null +++ b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt @@ -0,0 +1,12 @@ +waLBerla_python_file_generates(UniformGridGenerated.py + UniformGridGenerated_LatticeModel.cpp + UniformGridGenerated_Defines.h) + + +foreach(config trt ) + waLBerla_add_executable ( NAME UniformGridBenchmarkGenerated_${config} + FILES UniformGridGenerated.cpp UniformGridGenerated.py + DEPENDS blockforest boundary core domain_decomposition field geometry timeloop vtk gui + CODEGEN_CFG ${config}) +endforeach() + diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp new file mode 100644 index 000000000..f48b0f755 --- /dev/null +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp @@ -0,0 +1,157 @@ +#include "core/Environment.h" +#include "core/logging/Initialization.h" +#include "core/math/Random.h" +#include "python_coupling/CreateConfig.h" +#include "python_coupling/PythonCallback.h" +#include "python_coupling/DictWrapper.h" +#include "blockforest/Initialization.h" +#include "field/vtk/VTKWriter.h" +#include "field/communication/PackInfo.h" +#include "blockforest/communication/UniformBufferedScheme.h" +#include "timeloop/all.h" +#include "core/timing/TimingPool.h" +#include "core/timing/RemainingTimeLogger.h" +#include "domain_decomposition/SharedSweep.h" +#include "lbm/communication/PdfFieldPackInfo.h" +#include "lbm/field/AddToStorage.h" +#include "lbm/vtk/VTKOutput.h" +#include "lbm/gui/Connection.h" +#include "lbm/vtk/Velocity.h" +#include "gui/Gui.h" + +#include "UniformGridGenerated_LatticeModel.h" +#include "UniformGridGenerated_Defines.h" + + +using namespace walberla; + +typedef lbm::UniformGridGenerated_LatticeModel LatticeModel_T; +typedef LatticeModel_T::Stencil Stencil_T; +typedef LatticeModel_T::CommunicationStencil CommunicationStencil_T; +typedef lbm::PdfField< LatticeModel_T > PdfField_T; + + +void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID pdfFieldId, + const real_t xMagnitude=0.1, const real_t fluctuationMagnitude=0.05 ) +{ + math::seedRandomGenerator(0); + auto halfZ = blocks->getDomainCellBB().zMax() / 2; + for( auto & block: *blocks) + { + auto pdfField = block.getData<PdfField_T>( pdfFieldId ); + WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdfField, + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z)); + real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude); + + if( globalCell[2] >= halfZ ) { + pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(xMagnitude, 0, randomReal), real_t(1.0)); + } else { + pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(-xMagnitude, 0, randomReal), real_t(1.0)); + } + ); + } +} + + +int main( int argc, char **argv ) +{ + mpi::Environment env( argc, argv ); + + for( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg ) + { + WALBERLA_MPI_WORLD_BARRIER(); + + auto config = *cfg; + logging::configureLogging( config ); + auto blocks = blockforest::createUniformBlockGridFromConfig( config ); + + Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" ); + // Reading parameters + auto parameters = config->getOneBlock( "Parameters" ); + const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal"); + const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 )); + const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 )); + const bool initShearFlow = parameters.getParameter<bool>("initShearFlow", false); + + // Creating fields + LatticeModel_T latticeModel = LatticeModel_T( omega ); + BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel); + + if( initShearFlow ) { + initShearVelocity(blocks, pdfFieldId); + } + + SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps ); + blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > communication( blocks ); + communication.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldId ) ); + + timeLoop.add() << BeforeFunction( communication, "communication" ) + << Sweep( LatticeModel_T::Sweep( pdfFieldId ), "LB stream & collide" ); + + int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 ); + int outerIterations = parameters.getParameter<int>( "outerIterations", 1 ); + for(int i=0; i < warmupSteps; ++i ) + timeLoop.singleStep(); + + auto remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", -1.0 ); // in seconds + if (remainingTimeLoggerFrequency > 0) { + auto logger = timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps() * outerIterations, remainingTimeLoggerFrequency ); + timeLoop.addFuncAfterTimeStep( logger, "remaining time logger" ); + } + + // VTK + uint_t vtkWriteFrequency = parameters.getParameter<uint_t>( "vtkWriteFrequency", 0 ); + if( vtkWriteFrequency > 0 ) + { + auto vtkOutput = vtk::createVTKOutput_BlockData( *blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", + "simulation_step", false, true, true, false, 0 ); + auto velWriter = make_shared< lbm::VelocityVTKWriter<LatticeModel_T> >(pdfFieldId, "vel"); + vtkOutput->addCellDataWriter(velWriter); + timeLoop.addFuncAfterTimeStep( vtk::writeFiles( vtkOutput ), "VTK Output" ); + } + + + bool useGui = parameters.getParameter<bool>( "useGui", false ); + if( useGui ) + { + GUI gui( timeLoop, blocks, argc, argv); + lbm::connectToGui<LatticeModel_T>(gui); + gui.run(); + } + else + { + for ( int outerIteration = 0; outerIteration < outerIterations; ++outerIteration ) + { + timeLoop.setCurrentTimeStepToZero(); + WcTimer simTimer; + WALBERLA_LOG_INFO_ON_ROOT( "Starting simulation with " << timesteps << " time steps" ); + simTimer.start(); + timeLoop.run(); + simTimer.end(); + WALBERLA_LOG_INFO_ON_ROOT( "Simulation finished" ); + auto time = simTimer.last(); + auto nrOfCells = real_c( cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2] ); + auto mlupsPerProcess = nrOfCells * real_c( timesteps ) / time * 1e-6; + WALBERLA_LOG_RESULT_ON_ROOT( "MLUPS per process " << mlupsPerProcess ); + WALBERLA_LOG_RESULT_ON_ROOT( "Time per time step " << time / real_c( timesteps )); + WALBERLA_ROOT_SECTION() + { + python_coupling::PythonCallback pythonCallbackResults( "results_callback" ); + if ( pythonCallbackResults.isCallable()) + { + pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess ); + pythonCallbackResults.data().exposeValue( "stencil", infoStencil ); + pythonCallbackResults.data().exposeValue( "configName", infoConfigName ); + pythonCallbackResults.data().exposeValue( "cse_global", infoCseGlobal ); + pythonCallbackResults.data().exposeValue( "cse_pdfs", infoCsePdfs ); + // Call Python function to report results + pythonCallbackResults(); + } + } + } + } + } + + return 0; +} diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py new file mode 100644 index 000000000..8f7bf791c --- /dev/null +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py @@ -0,0 +1,133 @@ +import sympy as sp +import pystencils as ps +from lbmpy.creationfunctions import create_lb_collision_rule +from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor +from lbmpy_walberla import generate_lattice_model +from pystencils_walberla import CodeGeneration + +omega = sp.symbols("omega") +omega_fill = sp.symbols("omega_:10") + +options_dict = { + 'srt': { + 'method': 'srt', + 'stencil': 'D3Q19', + 'relaxation_rate': omega, + 'compressible': False, + }, + 'trt': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'relaxation_rate': omega, + }, + 'mrt': { + 'method': 'mrt', + 'stencil': 'D3Q19', + 'relaxation_rates': [0, omega, 1.3, 1.4, omega, 1.2, 1.1, 1.15, 1.234, 1.4235, 1.242, 1.2567, 0.9, 0.7], + }, + 'mrt_full': { + 'method': 'mrt', + 'stencil': 'D3Q19', + 'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2], omega_fill[3], omega_fill[4], omega_fill[5]], + }, + 'mrt3': { + 'method': 'mrt3', + 'stencil': 'D3Q19', + 'relaxation_rates': [omega, 1.1, 1.2], + }, + 'entropic': { + 'method': 'mrt3', + 'stencil': 'D3Q19', + 'compressible': True, + 'relaxation_rates': [omega, omega, sp.Symbol("omega_free")], + 'entropic': True, + }, + 'entropic_kbc_n4': { + 'method': 'trt-kbc-n4', + 'stencil': 'D3Q27', + 'compressible': True, + 'relaxation_rates': [omega, sp.Symbol("omega_free")], + 'entropic': True, + }, + 'smagorinsky': { + 'method': 'srt', + 'stencil': 'D3Q19', + 'smagorinsky': True, + 'relaxation_rate': omega, + }, + 'cumulant': { + 'stencil': 'D3Q19', + 'compressible': True, + 'method': 'mrt', + 'cumulant': True, + 'relaxation_rates': [0, omega, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + }, +} + +info_header = """ +#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q}; +const char * infoStencil = "{stencil}"; +const char * infoConfigName = "{configName}"; +const bool infoCseGlobal = {cse_global}; +const bool infoCsePdfs = {cse_pdfs}; +""" + + +with CodeGeneration() as ctx: + accessor = StreamPullTwoFieldsAccessor() + assert not accessor.is_inplace, "This app does not work for inplace accessors" + + common_options = { + 'field_name': 'pdfs', + 'temporary_field_name': 'pdfs_tmp', + 'kernel_type': accessor, + 'optimization': {'cse_global': True, + 'cse_pdfs': False} + } + config_name = ctx.config + noopt = False + d3q27 = False + if config_name.endswith("_noopt"): + noopt = True + config_name = config_name[:-len("_noopt")] + if config_name.endswith("_d3q27"): + d3q27 = True + config_name = config_name[:-len("_d3q27")] + + options = options_dict[config_name] + options.update(common_options) + options = options.copy() + + if noopt: + options['optimization']['cse_global'] = False + options['optimization']['cse_pdfs'] = False + if d3q27: + options['stencil'] = 'D3Q27' + + stencil_str = options['stencil'] + q = int(stencil_str[stencil_str.find('Q')+1:]) + pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx') + options['optimization']['symbolic_field'] = pdfs + + vp = [ + ('double', 'omega_0'), + ('double', 'omega_1'), + ('double', 'omega_2'), + ('double', 'omega_3'), + ('double', 'omega_4'), + ('double', 'omega_5'), + ('double', 'omega_6'), + ('int32_t', 'cudaBlockSize0'), + ('int32_t', 'cudaBlockSize1'), + ] + update_rule = create_lb_collision_rule(**options) + generate_lattice_model(ctx, 'UniformGridGenerated_LatticeModel', update_rule) + + infoHeaderParams = { + 'stencil': stencil_str, + 'q': q, + 'configName': ctx.config, + 'cse_global': int(options['optimization']['cse_global']), + 'cse_pdfs': int(options['optimization']['cse_pdfs']), + } + ctx.write_file("UniformGridGenerated_Defines.h", info_header.format(**infoHeaderParams)) -- GitLab