//====================================================================================================================== // // This file is part of waLBerla. waLBerla is free software: you can // redistribute it and/or modify it under the terms of the GNU General Public // License as published by the Free Software Foundation, either version 3 of // the License, or (at your option) any later version. // // waLBerla is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. // //! \file FlowAroundSphereCodeGen.cpp //! \author Frederik Hennig <frederik.hennig@fau.de> //! \author Markus Holzer <markus.holzer@fau.de> // //====================================================================================================================== #include "blockforest/all.h" #include "core/all.h" #include "domain_decomposition/all.h" #include "field/all.h" #include "geometry/all.h" #include "lbm/inplace_streaming/TimestepTracker.h" #include "lbm/vtk/QCriterion.h" #include "python_coupling/CreateConfig.h" #include "python_coupling/PythonCallback.h" #include "timeloop/all.h" #if defined(WALBERLA_BUILD_WITH_CUDA) # include "cuda/AddGPUFieldToStorage.h" # include "cuda/DeviceSelectMPI.h" # include "cuda/HostFieldAllocator.h" # include "cuda/NVTX.h" # include "cuda/ParallelStreams.h" # include "cuda/communication/GPUPackInfo.h" # include "cuda/communication/UniformGPUScheme.h" #endif // CodeGen includes #include "FlowAroundSphereCodeGen_InfoHeader.h" namespace walberla { typedef lbm::FlowAroundSphereCodeGen_PackInfoEven PackInfoEven_T; typedef lbm::FlowAroundSphereCodeGen_PackInfoOdd PackInfoOdd_T; typedef walberla::uint8_t flag_t; typedef FlagField< flag_t > FlagField_T; #if defined(WALBERLA_BUILD_WITH_CUDA) typedef cuda::GPUField< real_t > GPUField; #endif using namespace std::placeholders; auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) { return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block), storage->getNumberOfZCells(*block), uint_t(1), field::fzyx, make_shared< field::AllocateAligned< real_t, 64 > >()); }; auto VelocityCallback = [](const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block, real_t inflow_velocity) { Cell globalCell; CellInterval domain = SbF->getDomainCellBB(); real_t h_y = domain.yMax() - domain.yMin(); real_t h_z = domain.zMax() - domain.zMin(); SbF->transformBlockLocalToGlobalCell(globalCell, block, pos); real_t y1 = globalCell[1] - (h_y / 2.0 + 0.5); real_t z1 = globalCell[2] - (h_z / 2.0 + 0.5); real_t u = (inflow_velocity * 16) / (h_y * h_y * h_z * h_z) * (h_y / 2.0 - y1) * (h_y / 2 + y1) * (h_z / 2 - z1) * (h_z / 2 + z1); Vector3< real_t > result(u, 0.0, 0.0); return result; }; class AlternatingBeforeFunction { public: typedef std::function< void() > BeforeFunction; AlternatingBeforeFunction(BeforeFunction evenFunc, BeforeFunction oddFunc, std::shared_ptr< lbm::TimestepTracker >& tracker) : tracker_(tracker), funcs_{ evenFunc, oddFunc } {}; void operator()() { funcs_[tracker_->getCounter()](); } private: std::shared_ptr< lbm::TimestepTracker > tracker_; std::vector< BeforeFunction > funcs_; }; class Filter { public: explicit Filter(Vector3< uint_t > numberOfCells) : numberOfCells_(numberOfCells) {} void operator()(const IBlock& /*block*/) {} bool operator()(const cell_idx_t x, const cell_idx_t y, const cell_idx_t z) const { return x >= -1 && x <= cell_idx_t(numberOfCells_[0]) && y >= -1 && y <= cell_idx_t(numberOfCells_[1]) && z >= -1 && z <= cell_idx_t(numberOfCells_[2]); } private: Vector3< uint_t > numberOfCells_; }; using FluidFilter_T = Filter; int main(int argc, char** argv) { walberla::Environment walberlaEnv(argc, argv); #if defined(WALBERLA_BUILD_WITH_CUDA) cuda::selectDeviceBasedOnMpiRank(); #endif for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg) { WALBERLA_MPI_WORLD_BARRIER() auto config = *cfg; logging::configureLogging(config); auto blocks = blockforest::createUniformBlockGridFromConfig(config); // read parameters Vector3< uint_t > cellsPerBlock = config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock"); auto parameters = config->getOneBlock("Parameters"); const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(10)); const real_t omega = parameters.getParameter< real_t >("omega", real_t(1.9)); const real_t u_max = parameters.getParameter< real_t >("u_max", real_t(0.05)); const real_t reynolds_number = parameters.getParameter< real_t >("reynolds_number", real_t(1000)); const uint_t diameter_sphere = parameters.getParameter< uint_t >("diameter_sphere", uint_t(5)); const double remainingTimeLoggerFrequency = parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds // create fields BlockDataID pdfFieldID = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "PDFs"); BlockDataID velFieldID = field::addToStorage< VelocityField_T >(blocks, "velocity", real_t(0), field::fzyx); BlockDataID densityFieldID = field::addToStorage< ScalarField_T >(blocks, "density", real_t(0), field::fzyx); #if defined(WALBERLA_BUILD_WITH_CUDA) BlockDataID pdfFieldIDGPU = cuda::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldID, "PDFs on GPU", true); BlockDataID velFieldIDGPU = cuda::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldID, "velocity on GPU", true); BlockDataID densityFieldIDGPU = cuda::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldID, "density on GPU", true); #endif BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field"); // initialise all PDFs #if defined(WALBERLA_BUILD_WITH_CUDA) pystencils::FlowAroundSphereCodeGen_MacroSetter setterSweep(pdfFieldIDGPU, velFieldIDGPU); for (auto& block : *blocks) setterSweep(&block); cuda::fieldCpy< PdfField_T, GPUField >(blocks, pdfFieldID, pdfFieldIDGPU); #else pystencils::FlowAroundSphereCodeGen_MacroSetter setterSweep(pdfFieldID, velFieldID); for (auto& block : *blocks) setterSweep(&block); #endif // Create communication #if defined(WALBERLA_BUILD_WITH_CUDA) // This way of using alternating pack infos is temporary and will soon be replaced // by something more straight-forward cuda::communication::UniformGPUScheme< Stencil_T > comEven(blocks, false); comEven.addPackInfo(make_shared< PackInfoEven_T >(pdfFieldIDGPU)); auto evenComm = std::function< void() >([&]() { comEven.communicate(nullptr); }); cuda::communication::UniformGPUScheme< Stencil_T > comODD(blocks, false); comODD.addPackInfo(make_shared< PackInfoOdd_T >(pdfFieldIDGPU)); auto oddComm = std::function< void() >([&]() { comODD.communicate(nullptr); }); #else blockforest::communication::UniformBufferedScheme< Stencil_T > evenComm(blocks); evenComm.addPackInfo(make_shared< PackInfoEven_T >(pdfFieldID)); blockforest::communication::UniformBufferedScheme< Stencil_T > oddComm(blocks); oddComm.addPackInfo(make_shared< PackInfoOdd_T >(pdfFieldID)); #endif // create and initialize boundary handling const FlagUID fluidFlagUID("Fluid"); auto boundariesConfig = config->getOneBlock("Boundaries"); std::function< Vector3< real_t >(const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) > velocity_initialisation = std::bind(VelocityCallback, _1, _2, _3, u_max); #if defined(WALBERLA_BUILD_WITH_CUDA) lbm::FlowAroundSphereCodeGen_UBB ubb(blocks, pdfFieldIDGPU, velocity_initialisation); lbm::FlowAroundSphereCodeGen_NoSlip noSlip(blocks, pdfFieldIDGPU); lbm::FlowAroundSphereCodeGen_Outflow outflow(blocks, pdfFieldIDGPU, pdfFieldID); lbm::FlowAroundSphereCodeGen_LbSweep lbSweep(densityFieldIDGPU, pdfFieldIDGPU, velFieldIDGPU, omega); #else lbm::FlowAroundSphereCodeGen_UBB ubb(blocks, pdfFieldID, velocity_initialisation); lbm::FlowAroundSphereCodeGen_NoSlip noSlip(blocks, pdfFieldID); lbm::FlowAroundSphereCodeGen_Outflow outflow(blocks, pdfFieldID); lbm::FlowAroundSphereCodeGen_LbSweep lbSweep(densityFieldID, pdfFieldID, velFieldID, omega); #endif geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, boundariesConfig); geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID); ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("UBB"), fluidFlagUID); noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID); outflow.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("Outflow"), fluidFlagUID); // create time loop SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps); // Timestep Tracking and Sweeps auto tracker = make_shared< lbm::TimestepTracker >(0); AlternatingBeforeFunction communication(evenComm, oddComm, tracker); // add LBM sweep and communication to time loop timeloop.add() << BeforeFunction(communication, "communication") << Sweep(noSlip.getSweep(tracker), "noSlip boundary"); timeloop.add() << Sweep(outflow.getSweep(tracker), "outflow boundary"); timeloop.add() << Sweep(ubb.getSweep(tracker), "ubb boundary"); timeloop.add() << BeforeFunction(tracker->getAdvancementFunction(), "Timestep Advancement") << Sweep(lbSweep.getSweep(tracker), "LB update rule"); // LBM stability check timeloop.addFuncAfterTimeStep(makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >( config, blocks, pdfFieldID, flagFieldId, fluidFlagUID)), "LBM stability check"); // log remaining time timeloop.addFuncAfterTimeStep( timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency), "remaining time logger"); // add VTK output to time loop uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0); if (vtkWriteFrequency > 0) { auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step", false, true, true, false, 0); #if defined(WALBERLA_BUILD_WITH_CUDA) vtkOutput->addBeforeFunction([&]() { cuda::fieldCpy< VelocityField_T, GPUField >(blocks, velFieldID, velFieldIDGPU); cuda::fieldCpy< ScalarField_T, GPUField >(blocks, densityFieldID, densityFieldIDGPU); }); #endif auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "velocity"); auto densityWriter = make_shared< field::VTKWriter< ScalarField_T > >(densityFieldID, "density"); FluidFilter_T filter(cellsPerBlock); auto QCriterionWriter = make_shared< lbm::QCriterionVTKWriter< VelocityField_T, FluidFilter_T > >( blocks, filter, velFieldID, "Q-Criterion"); vtkOutput->addCellDataWriter(velWriter); vtkOutput->addCellDataWriter(densityWriter); vtkOutput->addCellDataWriter(QCriterionWriter); timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); } WcTimer simTimer; WALBERLA_LOG_INFO_ON_ROOT("Simulating flow around sphere:" "\n timesteps: " << timesteps << "\n reynolds number: " << reynolds_number << "\n relaxation rate: " << omega << "\n maximum inflow velocity: " << u_max << "\n diameter_sphere: " << diameter_sphere) simTimer.start(); timeloop.run(); simTimer.end(); WALBERLA_LOG_INFO_ON_ROOT("Simulation finished") auto time = simTimer.last(); auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]); auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6; WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess) WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps)) } return EXIT_SUCCESS; } } // namespace walberla int main(int argc, char** argv) { walberla::main(argc, argv); }