//====================================================================================================================== // // This file is part of waLBerla. waLBerla is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public // License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. // // waLBerla is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with waLBerla (see COPYING.txt). If not, see . // //! \file benchmark_multiphase.cpp //! \author Markus Holzer // //====================================================================================================================== #include "blockforest/Initialization.h" #include "blockforest/communication/UniformDirectScheme.h" #include "core/Environment.h" #include "core/logging/Initialization.h" #include "core/math/Constants.h" #include "core/timing/RemainingTimeLogger.h" #include "field/AddToStorage.h" #include "field/FlagField.h" #include "field/vtk/VTKWriter.h" #include "geometry/InitBoundaryHandling.h" #include "python_coupling/CreateConfig.h" #include "python_coupling/DictWrapper.h" #include "python_coupling/PythonCallback.h" #include "timeloop/SweepTimeloop.h" #include "InitializerFunctions.h" ////////////////////////////// // INCLUDE GENERATED FILES // //////////////////////////// #if defined(WALBERLA_BUILD_WITH_CUDA) # include "cuda/AddGPUFieldToStorage.h" # include "cuda/DeviceSelectMPI.h" # include "cuda/ParallelStreams.h" # include "cuda/communication/MemcpyPackInfo.h" # include "cuda/communication/UniformGPUScheme.h" #else # include #endif #include "GenDefines.h" using namespace walberla; using flag_t = walberla::uint8_t; using FlagField_T = FlagField< flag_t >; #if defined(WALBERLA_BUILD_WITH_CUDA) typedef cuda::GPUField< real_t > GPUField; #endif int main(int argc, char** argv) { mpi::Environment env(argc, argv); #if defined(WALBERLA_BUILD_WITH_CUDA) cuda::selectDeviceBasedOnMpiRank(); #endif for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg) { WALBERLA_MPI_WORLD_BARRIER() auto config = *cfg; logging::configureLogging(config); shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config); Vector3< uint_t > cellsPerBlock = config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock"); // Reading parameters auto parameters = config->getOneBlock("Parameters"); const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal"); const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50)); const real_t remainingTimeLoggerFrequency = parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1)); const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2)); #if defined(WALBERLA_BUILD_WITH_CUDA) // CPU fields BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx); BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx); // GPU fields BlockDataID lb_phase_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >( blocks, "lb phase field on GPU", Stencil_phase_T::Size, field::fzyx, 1); BlockDataID lb_velocity_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >( blocks, "lb velocity field on GPU", Stencil_hydro_T::Size, field::fzyx, 1); BlockDataID vel_field_gpu = cuda::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true); BlockDataID phase_field_gpu = cuda::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true); #else BlockDataID lb_phase_field = field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_t(0), field::fzyx); BlockDataID lb_velocity_field = field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_t(0), field::fzyx); BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx); BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx); #endif if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only") { WALBERLA_LOG_INFO_ON_ROOT("initialization of the phase field") if (scenario == 1) { auto bubbleParameters = config->getOneBlock("Bubble"); const Vector3< real_t > bubbleMidPoint = bubbleParameters.getParameter< Vector3< real_t > >("bubbleMidPoint"); const real_t bubbleRadius = bubbleParameters.getParameter< real_t >("bubbleRadius", 20.0); initPhaseField_bubble(blocks, phase_field, bubbleRadius, bubbleMidPoint); } else if (scenario == 2) { initPhaseField_RTI(blocks, phase_field); } #if defined(WALBERLA_BUILD_WITH_CUDA) cuda::fieldCpy< GPUField, PhaseField_T >(blocks, phase_field_gpu, phase_field); #endif WALBERLA_LOG_INFO_ON_ROOT("initialization of the phase field done") } #if defined(WALBERLA_BUILD_WITH_CUDA) Vector3< int32_t > gpuBlockSize = parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1)); pystencils::initialize_phase_field_distributions init_h(lb_phase_field_gpu, phase_field_gpu, vel_field_gpu); pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu); pystencils::phase_field_LB_step phase_field_LB_step( lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); #else pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field); pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field); pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, vel_field); pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field); #endif // add communication #if defined(WALBERLA_BUILD_WITH_CUDA) const bool cudaEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false); auto Comm_velocity_based_distributions = make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field); auto Comm_phase_field_distributions = make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu); Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions); #else blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field); auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field); Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field); Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions); blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks); auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field); Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions); #endif BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field"); // Boundaries const FlagUID fluidFlagUID("Fluid"); auto boundariesConfig = config->getBlock("Boundaries_GPU"); if (boundariesConfig) { geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig); geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID); } // initialize the two lattice Boltzmann fields if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only") { WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions") for (auto& block : *blocks) { init_h(&block); init_g(&block); } WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions done") } #if defined(WALBERLA_BUILD_WITH_CUDA) int streamLowPriority = 0; int streamHighPriority = 0; auto defaultStream = cuda::StreamRAII::newPriorityStream(streamLowPriority); auto innerOuterStreams = cuda::ParallelStreams(streamHighPriority); #endif auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps); #if defined(WALBERLA_BUILD_WITH_CUDA) auto normalTimeStep = [&]() { Comm_velocity_based_distributions->startCommunication(defaultStream); for (auto& block : *blocks) phase_field_LB_step(&block, defaultStream); Comm_velocity_based_distributions->wait(defaultStream); Comm_phase_field_distributions->startCommunication(defaultStream); for (auto& block : *blocks) hydro_LB_step(&block, defaultStream); Comm_phase_field_distributions->wait(defaultStream); }; auto phase_only = [&]() { for (auto& block : *blocks) phase_field_LB_step(&block); }; auto hydro_only = [&]() { for (auto& block : *blocks) hydro_LB_step(&block); }; auto without_comm = [&]() { for (auto& block : *blocks) phase_field_LB_step(&block); for (auto& block : *blocks) hydro_LB_step(&block); }; #else auto normalTimeStep = [&]() { Comm_velocity_based_distributions.startCommunication(); for (auto& block : *blocks) phase_field_LB_step(&block); Comm_velocity_based_distributions.wait(); Comm_phase_field_distributions.startCommunication(); for (auto& block : *blocks) hydro_LB_step(&block); Comm_phase_field_distributions.wait(); }; auto phase_only = [&]() { for (auto& block : *blocks) phase_field_LB_step(&block); }; auto hydro_only = [&]() { for (auto& block : *blocks) hydro_LB_step(&block); }; auto without_comm = [&]() { for (auto& block : *blocks) phase_field_LB_step(&block); for (auto& block : *blocks) hydro_LB_step(&block); }; #endif std::function< void() > timeStep; if (timeStepStrategy == "phase_only") { timeStep = std::function< void() >(phase_only); WALBERLA_LOG_INFO_ON_ROOT("started only phasefield step without communication for benchmarking") } else if (timeStepStrategy == "hydro_only") { timeStep = std::function< void() >(hydro_only); WALBERLA_LOG_INFO_ON_ROOT("started only hydro step without communication for benchmarking") } else if (timeStepStrategy == "kernel_only") { timeStep = std::function< void() >(without_comm); WALBERLA_LOG_INFO_ON_ROOT("started complete phasefield model without communication for benchmarking") } else { timeStep = std::function< void() >(normalTimeStep); WALBERLA_LOG_INFO_ON_ROOT("normal timestep with overlapping") } timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step"); // remaining time logger if (remainingTimeLoggerFrequency > 0) timeLoop->addFuncAfterTimeStep( timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency), "remaining time logger"); uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); if (vtkWriteFrequency > 1) { auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step", false, true, true, false, 0); #if defined(WALBERLA_BUILD_WITH_CUDA) vtkOutput->addBeforeFunction( [&]() { cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu); }); #endif auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "phase"); vtkOutput->addCellDataWriter(phaseWriter); timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); } for (uint_t i = 0; i < warmupSteps; ++i) timeLoop->singleStep(); timeLoop->setCurrentTimeStepToZero(); WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps") WcTimer simTimer; #if defined(WALBERLA_BUILD_WITH_CUDA) cudaDeviceSynchronize(); #endif simTimer.start(); timeLoop->run(); #if defined(WALBERLA_BUILD_WITH_CUDA) cudaDeviceSynchronize(); #endif simTimer.end(); WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") auto time = simTimer.last(); auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]); auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6; WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess) WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s") WALBERLA_ROOT_SECTION() { python_coupling::PythonCallback pythonCallbackResults("results_callback"); if (pythonCallbackResults.isCallable()) { pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess); pythonCallbackResults.data().exposeValue("stencil_phase", StencilNamePhase); pythonCallbackResults.data().exposeValue("stencil_hydro", StencilNameHydro); #if defined(WALBERLA_BUILD_WITH_CUDA) pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", cudaEnabledMpi); #endif // Call Python function to report results pythonCallbackResults(); } } } return EXIT_SUCCESS; }