Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 2306 additions and 53 deletions
waLBerla_add_executable ( NAME MotionSingleHeavySphere
DEPENDS blockforest boundary core domain_decomposition field lbm pe pe_coupling postprocessing timeloop vtk )
\ No newline at end of file
waLBerla_add_executable ( NAME MotionSingleHeavySphere
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::domain_decomposition walberla::field walberla::lbm walberla::pe walberla::pe_coupling walberla::postprocessing walberla::timeloop walberla::vtk )
\ No newline at end of file
......@@ -2,5 +2,5 @@
waLBerla_link_files_to_builddir( "*.dat" )
waLBerla_add_executable ( NAME NonUniformGridBenchmark
DEPENDS blockforest boundary core domain_decomposition field lbm postprocessing timeloop vtk sqlite)
\ No newline at end of file
waLBerla_add_executable ( NAME NonUniformGridBenchmark
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::domain_decomposition walberla::field walberla::lbm walberla::postprocessing walberla::timeloop walberla::vtk walberla::sqlite )
\ No newline at end of file
......@@ -243,23 +243,23 @@ static void refinementSelection( SetupBlockForest& forest )
AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMax() - zSize,
domain.xMax(), domain.yMax(), domain.zMax() );
for( auto block = forest.begin(); block != forest.end(); ++block )
for(auto & block : forest)
{
auto & aabb = block->getAABB();
auto & aabb = block.getAABB();
if( leftCorner.intersects( aabb ) || rightCorner.intersects( aabb ) )
{
if( block->getLevel() < ( BlockForestLevels - uint_t(1) ) )
block->setMarker( true );
if( block.getLevel() < ( BlockForestLevels - uint_t(1) ) )
block.setMarker( true );
}
}
}
static void workloadAndMemoryAssignment( SetupBlockForest & forest, const memory_t memoryPerBlock ) {
for( auto block = forest.begin(); block != forest.end(); ++block )
for(auto & block : forest)
{
block->setWorkload( numeric_cast< workload_t >( uint_t(1) << block->getLevel() ) );
block->setMemory( memoryPerBlock );
block.setWorkload( numeric_cast< workload_t >( uint_t(1) << block.getLevel() ) );
block.setMemory( memoryPerBlock );
}
}
......@@ -295,7 +295,7 @@ void createSetupBlockForest( blockforest::SetupBlockForest & sforest, const Conf
#ifndef NDEBUG
WALBERLA_ASSERT_EQUAL( sforest.getNumberOfBlocks(), numberOfBlocks( workerProcesses, fineBlocksPerProcess ) );
for( uint_t i = uint_t(0); i != BlockForestLevels; ++i )
for( auto i = uint_t(0); i != BlockForestLevels; ++i )
{
std::vector< blockforest::SetupBlock * > blocks;
sforest.getBlocks( blocks, i );
......@@ -415,13 +415,13 @@ void ReGrid::operator()( std::vector< std::pair< const Block *, uint_t > > & min
domain.xMax(), domain.yMax(), domain.zMin() + real_c(0.99) * zSize );
}
for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
for(auto & minTargetLevel : minTargetLevels)
{
auto & aabb = it->first->getAABB();
auto & aabb = minTargetLevel.first->getAABB();
if( left.intersects( aabb ) || right.intersects( aabb ) )
it->second = BlockForestLevels - uint_t(1);
minTargetLevel.second = BlockForestLevels - uint_t(1);
else
it->second = uint_t(0);
minTargetLevel.second = uint_t(0);
}
}
......
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated
FILE NonUniformGridCPU.py
OUT_FILES NonUniformGridCPUStorageSpecification.h NonUniformGridCPUStorageSpecification.cpp
NonUniformGridCPUSweepCollection.h NonUniformGridCPUSweepCollection.cpp
NoSlip.h NoSlip.cpp
UBB.h UBB.cpp
NonUniformGridCPUBoundaryCollection.h
NonUniformGridCPUInfoHeader.h)
waLBerla_add_executable( NAME NonUniformGridCPU
FILES NonUniformGridCPU.cpp LdcSetup.h GridGeneration.h
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::domain_decomposition walberla::field walberla::geometry walberla::lbm_generated walberla::python_coupling walberla::timeloop walberla::vtk NonUniformGridCPUGenerated )
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file GridGeneration.h
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/Initialization.h"
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include <string>
#include "LdcSetup.h"
#include "NonUniformGridCPUInfoHeader.h"
using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using namespace walberla;
void createSetupBlockForest(SetupBlockForest& setupBfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
const bool useMPIManager=false)
{
WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
if(useMPIManager)
numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
const LDC ldc(refinementDepth);
auto refSelection = ldc.refinementSelector();
setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
if(mpi::MPIManager::instance()->numProcesses() > 1)
return;
{
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
setupBfs.saveToFile(oss.str().c_str());
}
if(writeVtk){
setupBfs.writeVTKOutput(blockForestFilestem);
}
if(outputStatistics){
WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++)
{
const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
}
const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
}
}
void createBlockForest(shared_ptr< BlockForest >& bfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
{
if (mpi::MPIManager::instance()->numProcesses() > 1)
{
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
// Load structured block forest from file
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
const std::string setupBlockForestFilepath = oss.str();
std::ifstream infile(setupBlockForestFilepath.c_str());
if(!infile.good())
{
WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
else
{
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
setupBlockForestFilepath.c_str(), false);
}
}
else
{
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
}
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file LdcSetup.h
//! \author Markus Holzer <markus.holzer@fau.de>
//! \author Frederik Hennig <frederik.hennig@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/StructuredBlockForest.h"
#include "core/all.h"
#include "field/FlagField.h"
#include "field/FlagUID.h"
using namespace walberla;
using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
using FlagField_T = FlagField< uint8_t >;
class LDCRefinement
{
private:
const uint_t refinementDepth_;
public:
explicit LDCRefinement(const uint_t depth) : refinementDepth_(depth){};
void operator()(SetupBlockForest& forest) const
{
const AABB & domain = forest.getDomain();
const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
for(auto & block : forest)
{
auto & aabb = block.getAABB();
if( leftCorner.intersects( aabb ) || rightCorner.intersects( aabb ) )
{
if( block.getLevel() < refinementDepth_)
block.setMarker( true );
}
}
}
};
class LDC
{
private:
const std::string refinementProfile_;
const uint_t refinementDepth_;
const FlagUID noSlipFlagUID_;
const FlagUID ubbFlagUID_;
public:
explicit LDC(const uint_t depth) : refinementDepth_(depth), noSlipFlagUID_("NoSlip"), ubbFlagUID_("UBB"){};
RefinementSelectionFunctor refinementSelector() const
{
return LDCRefinement(refinementDepth_);
}
void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID)
{
for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt)
{
auto& b = dynamic_cast< Block& >(*bIt);
const uint_t level = b.getLevel();
auto flagField = b.getData< FlagField_T >(flagFieldID);
const uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_);
const uint8_t ubbFlag = flagField->registerFlag(ubbFlagUID_);
for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt)
{
const Cell localCell = cIt.cell();
Cell globalCell(localCell);
sbfs.transformBlockLocalToGlobalCell(globalCell, b);
if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 ||
globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level)))
{
flagField->addFlag(localCell, noslipFlag);
}
}
}
}
};
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file NonUniformGridCPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//! \author Frederik Hennig <frederik.hennig@fau.de>
//
//======================================================================================================================
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include "field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/DictWrapper.h"
#include "python_coupling/PythonCallback.h"
#include "timeloop/SweepTimeloop.h"
#include <cmath>
#include "GridGeneration.h"
#include "LdcSetup.h"
#include "NonUniformGridCPUInfoHeader.h"
#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
#include "lbm_generated/evaluation/PerformanceEvaluation.h"
#include "lbm_generated/field/AddToStorage.h"
#include "lbm_generated/field/PdfField.h"
#include "lbm_generated/refinement/BasicRecursiveTimeStep.h"
using namespace walberla;
using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
using BoundaryCollection_T = lbm::NonUniformGridCPUBoundaryCollection< FlagField_T >;
using SweepCollection_T = lbm::NonUniformGridCPUSweepCollection;
using blockforest::communication::NonUniformBufferedScheme;
int main(int argc, char** argv)
{
const mpi::Environment env(argc, argv);
mpi::MPIManager::instance()->useWorldComm();
const std::string input_filename(argv[1]);
const bool inputIsPython = string_ends_with(input_filename, ".py");
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
WALBERLA_MPI_BARRIER()
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BLOCK FOREST SETUP ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto config = *cfg;
logging::configureLogging(config);
auto domainSetup = config->getOneBlock("DomainSetup");
auto blockForestSetup = config->getOneBlock("SetupBlockForest");
const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
shared_ptr< BlockForest > bfs;
createBlockForest(bfs, domainSetup, blockForestSetup);
if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
{
WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
return EXIT_SUCCESS;
}
auto blocks =
std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes();
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++)
{
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// DISTRIBUTED DATA STRUCTURES ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto ldc = std::make_shared< LDC >(refinementDepth);
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4));
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
const bool benchmarkKernelOnly = parameters.getParameter< bool >("benchmarkKernelOnly", false);
// Creating fields
const StorageSpecification_T StorageSpec = StorageSpecification_T();
const BlockDataID pdfFieldID =
lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx);
const BlockDataID velFieldID =
field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2));
const BlockDataID densityFieldID =
field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2));
const BlockDataID flagFieldID =
field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
const Cell innerOuterSplit =
Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit);
for (auto& block : *blocks)
{
sweepCollection.initialise(&block, cell_idx_c(1));
}
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Initialisation done")
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
const FlagUID fluidFlagUID("Fluid");
ldc->setupBoundaryFlagField(*blocks, flagFieldID);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 2);
BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldID, fluidFlagUID);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// COMMUNICATION SCHEME ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
auto communication = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(blocks);
auto packInfo = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, pdfFieldID);
communication->addPackInfo(packInfo);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME STEP DEFINITIONS ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(
blocks, pdfFieldID, sweepCollection, boundaryCollection, communication, packInfo);
SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
if (benchmarkKernelOnly)
{
timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
}
else { LBMMeshRefinement.addRefinementToTimeLoop(timeLoop); }
// VTK
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
auto finalDomain = blocks->getDomain();
if (vtkWriteFrequency > 0)
{
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldID, "vel");
vtkOutput->addCellDataWriter(velWriter);
if (parameters.getParameter< bool >("writeOnlySlice", true)){
const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(refinementDepth),
finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(refinementDepth));
vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
}
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
});
timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BENCHMARK ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
if (remainingTimeLoggerFrequency > 0)
{
auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
}
lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
fluidCells();
WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells()
<< " fluid cells (in total on all levels)")
WcTimingPool timeloopTiming;
WcTimer simTimer;
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Starting benchmark with " << timesteps << " time steps")
WALBERLA_MPI_BARRIER()
simTimer.start();
timeLoop.run(timeloopTiming);
WALBERLA_MPI_BARRIER()
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Benchmark finished")
double time = simTimer.max();
WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
performance.logResultOnRoot(timesteps, time);
const auto reducedTimeloopTiming = timeloopTiming.getReduced();
WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
WALBERLA_ROOT_SECTION()
{
if (inputIsPython)
{
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores());
pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess",
performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil", infoStencil);
pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
// Call Python function to report results
pythonCallbackResults();
}
}
}
}
return EXIT_SUCCESS;
}
\ No newline at end of file
import sympy as sp
import pystencils as ps
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
from pystencils_walberla import CodeGeneration, generate_info_header
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free")
info_header = """
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
cpu_vec = {"instruction_set": None}
streaming_pattern = 'esopull'
timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19)
method_enum = Method.CUMULANT
fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx")
method = create_lb_method(lbm_config=lbm_config)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
boundary_object=NoSlip())
ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
boundary_object=UBB([0.05, 0, 0], data_type=field_type))
generate_lbm_package(ctx, name="NonUniformGridCPU",
collision_rule=collision_rule,
lbm_config=lbm_config, lbm_optimisation=lbm_opt,
nonuniform=True, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
target=ps.Target.CPU, cpu_vectorize_info=cpu_vec,)
infoHeaderParams = {
'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
field_typedefs = {'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
generate_info_header(ctx, 'NonUniformGridCPUInfoHeader',
field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3
import os
import sys
try:
import machinestate as ms
except ImportError:
ms = None
DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario:
def __init__(self,
domain_size=(64, 64, 64),
root_blocks=(2, 2, 2),
num_processes=1,
refinement_depth=0,
cells_per_block=(32, 32, 32),
timesteps=101,
vtk_write_frequency=0,
logger_frequency=0,
blockforest_filestem="blockforest",
write_setup_vtk=True,
db_file_name=None):
self.domain_size = domain_size
self.root_blocks = root_blocks
self.cells_per_block = cells_per_block
self.periodic = (0, 0, 0)
self.refinement_depth = refinement_depth
self.num_processes = num_processes
self.bfs_filestem = blockforest_filestem
self.write_setup_vtk = write_setup_vtk
self.timesteps = timesteps
self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'domainSize': self.domain_size,
'rootBlocks': self.root_blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic
},
'SetupBlockForest': {
'refinementDepth': self.refinement_depth,
'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk,
'outputStatistics': True,
'writeSetupForestAndReturn': True,
},
'Parameters': {
'omega': 1.95,
'timesteps': self.timesteps,
'remainingTimeLoggerFrequency': self.logger_frequency,
'vtkWriteFrequency': self.vtk_write_frequency,
'useVTKAMRWriter': True,
'oneFilePerProcess': False,
'writeOnlySlice': False
},
'Logging': {
'logLevel': "info",
}
}
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs"
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
def weak_scaling_ldc(num_proc, uniform=False):
wlb.log_info_on_root("Running weak scaling benchmark...")
# This benchmark must run from 16 processes onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if uniform:
factor = 3 * num_proc
name = "uniform"
else:
if num_proc % 16 != 0:
raise RuntimeError("Number of processes must be dividable by 16")
factor = int(num_proc // 16)
name = "nonuniform"
cells_per_block = (WeakX, WeakY, WeakZ)
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
db_file_name=f"weakScalingCPU{name}LDC.sqlite3")
scenarios.add(scenario)
def strong_scaling_ldc(num_proc, uniform=False):
wlb.log_info_on_root("Running strong scaling benchmark...")
# This benchmark must run from 64 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if num_proc % 64 != 0:
raise RuntimeError("Number of processes must be dividable by 64")
cells_per_block = (StrongX, StrongY, StrongZ)
if uniform:
domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
name = "uniform"
else:
factor = int(num_proc / 64)
blocks64 = block_decomposition(factor)
cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
name = "nonuniform"
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
db_file_name=f"strongScalingCPU{name}LDC.sqlite3")
scenarios.add(scenario)
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
domain_size = (96, 96, 32)
cells_per_block = (32, 32, 32)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks,
num_processes=1,
refinement_depth=3,
cells_per_block=cells_per_block,
timesteps=1001,
vtk_write_frequency=100,
logger_frequency=5,
write_setup_vtk=True)
scenarios.add(scenario)
if BENCHMARK == 0:
validation_run()
elif BENCHMARK == 1:
weak_scaling_ldc(1, False)
elif BENCHMARK == 2:
strong_scaling_ldc(1, False)
else:
print(f"Invalid benchmark case {BENCHMARK}")
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated
FILE NonUniformGridGPU.py
OUT_FILES NonUniformGridGPUStorageSpecification.h NonUniformGridGPUStorageSpecification.${CODEGEN_FILE_SUFFIX}
NonUniformGridGPUSweepCollection.h NonUniformGridGPUSweepCollection.${CODEGEN_FILE_SUFFIX}
NoSlip.h NoSlip.${CODEGEN_FILE_SUFFIX}
UBB.h UBB.${CODEGEN_FILE_SUFFIX}
NonUniformGridGPUBoundaryCollection.h
NonUniformGridGPUInfoHeader.h)
waLBerla_add_executable( NAME NonUniformGridGPU
FILES NonUniformGridGPU.cpp LdcSetup.h GridGeneration.h
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::gpu walberla::domain_decomposition walberla::field walberla::geometry walberla::lbm_generated walberla::python_coupling walberla::timeloop walberla::vtk NonUniformGridGPUGenerated )
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file GridGeneration.h
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/Initialization.h"
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include <string>
#include "LdcSetup.h"
#include "NonUniformGridGPUInfoHeader.h"
using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using namespace walberla;
void createSetupBlockForest(SetupBlockForest& setupBfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
const bool useMPIManager=false)
{
WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
if(useMPIManager)
numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
const LDC ldc(refinementDepth);
auto refSelection = ldc.refinementSelector();
setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
if(mpi::MPIManager::instance()->numProcesses() > 1)
return;
{
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
setupBfs.saveToFile(oss.str().c_str());
}
if(writeVtk){
setupBfs.writeVTKOutput(blockForestFilestem);
}
if(outputStatistics){
WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++){
const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
}
const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
}
}
void createBlockForest(shared_ptr< BlockForest >& bfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
{
if (mpi::MPIManager::instance()->numProcesses() > 1){
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
// Load structured block forest from file
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
const std::string setupBlockForestFilepath = oss.str();
std::ifstream infile(setupBlockForestFilepath.c_str());
if(!infile.good()){
WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
else{
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
setupBlockForestFilepath.c_str(), false);
}
}
else{
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
}
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file LdcSetup.h
//! \author Markus Holzer <markus.holzer@fau.de>
//! \author Frederik Hennig <frederik.hennig@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/StructuredBlockForest.h"
#include "core/all.h"
#include "field/FlagField.h"
#include "field/FlagUID.h"
using namespace walberla;
using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
using FlagField_T = FlagField< uint8_t >;
class LDCRefinement
{
private:
const uint_t refinementDepth_;
public:
explicit LDCRefinement(const uint_t depth) : refinementDepth_(depth){};
void operator()(SetupBlockForest& forest) const
{
const AABB & domain = forest.getDomain();
const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
for(auto & block : forest)
{
auto & aabb = block.getAABB();
if( leftCorner.intersects( aabb ) || rightCorner.intersects( aabb ) )
{
if( block.getLevel() < refinementDepth_)
block.setMarker( true );
}
}
}
};
class LDC
{
private:
const std::string refinementProfile_;
const uint_t refinementDepth_;
const FlagUID noSlipFlagUID_;
const FlagUID ubbFlagUID_;
public:
explicit LDC(const uint_t depth) : refinementDepth_(depth), noSlipFlagUID_("NoSlip"), ubbFlagUID_("UBB"){};
RefinementSelectionFunctor refinementSelector() const
{
return LDCRefinement(refinementDepth_);
}
void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID)
{
for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt)
{
auto& b = dynamic_cast< Block& >(*bIt);
const uint_t level = b.getLevel();
auto flagField = b.getData< FlagField_T >(flagFieldID);
const uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_);
const uint8_t ubbFlag = flagField->registerFlag(ubbFlagUID_);
for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt)
{
const Cell localCell = cIt.cell();
Cell globalCell(localCell);
sbfs.transformBlockLocalToGlobalCell(globalCell, b);
if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
else if (globalCell.y() < 0 || globalCell.x() < 0 || globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)))
{
flagField->addFlag(localCell, noslipFlag);
}
}
}
}
};
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file NonUniformGridGPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include "field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/DeviceSelectMPI.h"
#include "gpu/ErrorChecking.h"
#include "gpu/FieldCopy.h"
#include "gpu/HostFieldAllocator.h"
#include "gpu/ParallelStreams.h"
#include "gpu/communication/NonUniformGPUScheme.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/DictWrapper.h"
#include "python_coupling/PythonCallback.h"
#include <cmath>
#include "GridGeneration.h"
#include "LdcSetup.h"
#include "NonUniformGridGPUInfoHeader.h"
#include "lbm_generated/evaluation/PerformanceEvaluation.h"
#include "lbm_generated/field/AddToStorage.h"
#include "lbm_generated/field/PdfField.h"
#include "lbm_generated/gpu/AddToStorage.h"
#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
#include "lbm_generated/gpu/GPUPdfField.h"
#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
using namespace walberla;
using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
using FlagField_T = FlagField< uint8_t >;
using BoundaryCollection_T = lbm::NonUniformGridGPUBoundaryCollection< FlagField_T >;
using SweepCollection_T = lbm::NonUniformGridGPUSweepCollection;
using gpu::communication::NonUniformGPUScheme;
int main(int argc, char** argv)
{
const mpi::Environment env(argc, argv);
mpi::MPIManager::instance()->useWorldComm();
gpu::selectDeviceBasedOnMpiRank();
const std::string input_filename(argv[1]);
const bool inputIsPython = string_ends_with(input_filename, ".py");
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
WALBERLA_MPI_WORLD_BARRIER()
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// SETUP AND CONFIGURATION ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto config = *cfg;
logging::configureLogging(config);
auto domainSetup = config->getOneBlock("DomainSetup");
auto blockForestSetup = config->getOneBlock("SetupBlockForest");
const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.95));
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false);
shared_ptr< BlockForest > bfs;
createBlockForest(bfs, domainSetup, blockForestSetup);
if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
{
WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
return EXIT_SUCCESS;
}
auto blocks =
std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes();
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks() << " on " << blocks->getNumberOfLevels() << " refinement levels")
for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++)
{
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
}
WALBERLA_LOG_INFO_ON_ROOT("Start field allocation")
// Creating fields
const StorageSpecification_T StorageSpec = StorageSpecification_T();
auto allocator = make_shared< gpu::HostFieldAllocator< real_t > >();
const BlockDataID pdfFieldCpuID =
lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
const BlockDataID velFieldCpuID =
field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
const BlockDataID densityFieldCpuID =
field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
const BlockDataID flagFieldID =
field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
const BlockDataID pdfFieldGpuID =
lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
const BlockDataID velFieldGpuID =
gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true);
const BlockDataID densityFieldGpuID =
gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true);
WALBERLA_LOG_INFO_ON_ROOT("Finished field allocation")
const Cell innerOuterSplit =
Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
Vector3< int32_t > gpuBlockSize =
parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0],
gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
for (auto& iBlock : *blocks){
sweepCollection.initialise(&iBlock, cell_idx_c(1));
}
sweepCollection.initialiseBlockPointer();
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Initialisation done")
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto ldc = std::make_shared< LDC >(blocks->getDepth());
const FlagUID fluidFlagUID("Fluid");
ldc->setupBoundaryFlagField(*blocks, flagFieldID);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 0);
BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// COMMUNICATION SCHEME ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
auto communication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI);
auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, pdfFieldGpuID);
communication->addPackInfo(packInfo);
WALBERLA_MPI_BARRIER()
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME STEP DEFINITIONS ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int streamHighPriority = 0;
int streamLowPriority = 0;
WALBERLA_GPU_CHECK(gpuDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority))
sweepCollection.setOuterPriority(streamHighPriority);
auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >
LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
LBMMeshRefinement.addRefinementToTimeLoop(timeLoop, uint_c(0));
// VTK
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
auto finalDomain = blocks->getDomain();
if (vtkWriteFrequency > 0){
auto vtkOutput =
vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step",
false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldCpuID, "vel");
vtkOutput->addCellDataWriter(velWriter);
if (parameters.getParameter< bool >("writeOnlySlice", true)){
const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(blocks->getDepth()),
finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(blocks->getDepth()));
vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
}
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
});
timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BENCHMARK ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
if (remainingTimeLoggerFrequency > 0){
auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
}
lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
fluidCells();
WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells()
<< " fluid cells (in total on all levels)")
WcTimingPool timeloopTiming;
WcTimer simTimer;
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
WALBERLA_MPI_BARRIER()
simTimer.start();
timeLoop.run(timeloopTiming);
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_MPI_BARRIER()
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
double time = simTimer.max();
WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
performance.logResultOnRoot(timesteps, time);
const auto reducedTimeloopTiming = timeloopTiming.getReduced();
WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
WALBERLA_ROOT_SECTION()
{
if (inputIsPython)
{
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
pythonCallbackResults.data().exposeValue("numProcesses", lbm_generated::PerformanceEvaluation< FlagField_T >::processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores());
pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil", infoStencil);
pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
// Call Python function to report results
pythonCallbackResults();
}
}
}
}
return EXIT_SUCCESS;
}
\ No newline at end of file
import sympy as sp
import numpy as np
import pystencils as ps
from pystencils.typing import TypedSymbol
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, SubgridScaleModel
from pystencils_walberla import CodeGeneration, generate_info_header
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free")
compile_time_block_size = False
max_threads = 256
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32),
TypedSymbol("cudaBlockSize2", np.int32))
gpu_indexing_params = {'block_size': sweep_block_size}
info_header = """
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
streaming_pattern = 'esopull'
timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19)
method_enum = Method.CUMULANT
fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
method = create_lb_method(lbm_config=lbm_config)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
boundary_object=NoSlip())
ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
boundary_object=UBB([0.05, 0, 0], data_type=field_type))
generate_lbm_package(ctx, name="NonUniformGridGPU",
collision_rule=collision_rule,
lbm_config=lbm_config, lbm_optimisation=lbm_opt,
nonuniform=True, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
max_threads=max_threads)
infoHeaderParams = {
'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
field_typedefs = {'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
generate_info_header(ctx, 'NonUniformGridGPUInfoHeader',
field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3
import os
import sys
try:
import machinestate as ms
except ImportError:
ms = None
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario:
def __init__(self,
domain_size=(64, 64, 64),
root_blocks=(2, 2, 2),
num_processes=1,
refinement_depth=0,
cells_per_block=(32, 32, 32),
timesteps=101,
gpu_enabled_mpi=False,
vtk_write_frequency=0,
logger_frequency=30,
blockforest_filestem="blockforest",
write_setup_vtk=True,
db_file_name=None):
self.domain_size = domain_size
self.root_blocks = root_blocks
self.cells_per_block = cells_per_block
self.periodic = (0, 0, 1)
self.refinement_depth = refinement_depth
self.num_processes = num_processes
self.bfs_filestem = blockforest_filestem
self.write_setup_vtk = write_setup_vtk
self.timesteps = timesteps
self.gpu_enabled_mpi = gpu_enabled_mpi
self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'domainSize': self.domain_size,
'rootBlocks': self.root_blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'SetupBlockForest': {
'refinementDepth': self.refinement_depth,
'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk,
'outputStatistics': True,
'writeSetupForestAndReturn': True,
},
'Parameters': {
'omega': 1.95,
'timesteps': self.timesteps,
'remainingTimeLoggerFrequency': self.logger_frequency,
'vtkWriteFrequency': self.vtk_write_frequency,
'useVTKAMRWriter': True,
'oneFilePerProcess': False,
'writeOnlySlice': False,
'gpuEnabledMPI': self.gpu_enabled_mpi,
'gpuBlockSize': (128, 1, 1),
},
'Logging': {
'logLevel': "info",
}
}
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs"
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
domain_size = (192, 192, 64)
cells_per_block = (64, 64, 64)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks,
cells_per_block=cells_per_block,
timesteps=0,
vtk_write_frequency=0,
refinement_depth=3,
gpu_enabled_mpi=False)
scenarios.add(scenario)
def weak_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running weak scaling benchmark...")
# This benchmark must run from 16 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if uniform:
factor = 3 * num_proc
name = "uniform"
else:
if num_proc % 16 != 0:
raise RuntimeError("Number of processes must be dividable by 16")
factor = int(num_proc // 16)
name = "nonuniform"
cells_per_block = (WeakX, WeakY, WeakZ)
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
db_file_name=f"weakScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
def strong_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running strong scaling benchmark...")
# This benchmark must run from 64 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if num_proc % 64 != 0:
raise RuntimeError("Number of processes must be dividable by 64")
cells_per_block = (StrongX, StrongY, StrongZ)
if uniform:
domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
name = "uniform"
else:
factor = int(num_proc / 64)
blocks64 = block_decomposition(factor)
cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
name = "nonuniform"
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
db_file_name=f"strongScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
if BENCHMARK == 0:
validation_run()
elif BENCHMARK == 1:
weak_scaling_ldc(1, True, False)
elif BENCHMARK == 2:
strong_scaling_ldc(1, True, False)
else:
print(f"Invalid benchmark case {BENCHMARK}")
waLBerla_link_files_to_builddir("*.prm")
if (WALBERLA_BUILD_WITH_CODEGEN)
if (NOT WALBERLA_BUILD_WITH_GPU_SUPPORT OR (WALBERLA_BUILD_WITH_GPU_SUPPORT AND (CMAKE_CUDA_ARCHITECTURES GREATER_EQUAL 60 OR WALBERLA_BUILD_WITH_HIP)))
waLBerla_add_executable(NAME Percolation FILES Percolation.cpp
DEPENDS walberla::blockforest walberla::core walberla::field walberla::geometry walberla::gpu walberla::lbm walberla::lbm_mesapd_coupling walberla::mesa_pd walberla::sqlite walberla::vtk PSMCodegenPython_trt-smagorinsky_sc1 )
target_compile_definitions(Percolation PRIVATE Weighting=2)
endif ()
endif ()
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file Percolation.cpp
//! \ingroup lbm_mesapd_coupling
//! \author Samuel Kemmler <samuel.kemmler@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/grid_generator/SCIterator.h"
#include "core/logging/all.h"
#include "core/timing/RemainingTimeLogger.h"
#include "field/AddToStorage.h"
#include "field/vtk/all.h"
#include "geometry/InitBoundaryHandling.h"
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/DeviceSelectMPI.h"
#include "gpu/communication/UniformGPUScheme.h"
#include "lbm/PerformanceLogger.h"
#include "lbm/vtk/all.h"
#include "lbm_mesapd_coupling/DataTypesCodegen.h"
#include "lbm_mesapd_coupling/partially_saturated_cells_method/codegen/PSMSweepCollection.h"
#include "lbm_mesapd_coupling/utility/ParticleSelector.h"
#include "mesa_pd/data/DataTypes.h"
#include "mesa_pd/data/ParticleAccessorWithShape.h"
#include "mesa_pd/data/ParticleStorage.h"
#include "mesa_pd/data/ShapeStorage.h"
#include "mesa_pd/data/shape/Sphere.h"
#include "mesa_pd/domain/BlockForestDomain.h"
#include "mesa_pd/mpi/SyncNextNeighbors.h"
#include "mesa_pd/vtk/ParticleVtkOutput.h"
#include "sqlite/SQLite.h"
#include "vtk/all.h"
#include "LBMSweep.h"
#include "PSMPackInfo.h"
#include "PSMSweep.h"
#include "PSM_Density.h"
#include "PSM_InfoHeader.h"
#include "PSM_MacroGetter.h"
namespace percolation
{
///////////
// USING //
///////////
using namespace walberla;
using namespace lbm_mesapd_coupling::psm::gpu;
using PackInfo_T = pystencils::PSMPackInfo;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;
///////////
// FLAGS //
///////////
const FlagUID Fluid_Flag("Fluid");
const FlagUID Density_Flag("Density");
const FlagUID NoSlip_Flag("NoSlip");
const FlagUID Inflow_Flag("Inflow");
//////////
// MAIN //
//////////
//*******************************************************************************************************************
/*!\brief Benchmark of a percolation setup
*
* This code can be used as a percolation (useParticles=true) or as a channel flow (useParticles=false) benchmark.
* A constant inflow from west is applied and a pressure boundary condition is set at the east.
* For the percolation, mono-sized fixed spherical particles are generated on a structured grid with an offset for
* every second particle layer in flow direction to avoid channels in flow direction. The flow is described by Darcy's
* law. For the channel flow, the flow is described by the Hagen–Poiseuille equation.
*
* The domain is either periodic or bounded by (no slip) walls in the vertical directions (y and z).
*
* For the percolation, the PSM is used in combination with a two-way coupling, but no particle dynamics.
* For the channel flow, only the LBM is used.
*
* The parameters can be changed via the input file.
*
*/
//*******************************************************************************************************************
int main(int argc, char** argv)
{
Environment env(argc, argv);
auto cfgFile = env.config();
if (!cfgFile) { WALBERLA_ABORT("Usage: " << argv[0] << " path-to-configuration-file \n"); }
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::selectDeviceBasedOnMpiRank();
#endif
WALBERLA_LOG_INFO_ON_ROOT("waLBerla revision: " << std::string(WALBERLA_GIT_SHA1).substr(0, 8));
WALBERLA_LOG_INFO_ON_ROOT("compiler flags: " << std::string(WALBERLA_COMPILER_FLAGS));
WALBERLA_LOG_INFO_ON_ROOT("build machine: " << std::string(WALBERLA_BUILD_MACHINE));
WALBERLA_LOG_INFO_ON_ROOT(*cfgFile);
// Read config file
Config::BlockHandle numericalSetup = cfgFile->getBlock("NumericalSetup");
const uint_t numXBlocks = numericalSetup.getParameter< uint_t >("numXBlocks");
const uint_t numYBlocks = numericalSetup.getParameter< uint_t >("numYBlocks");
const uint_t numZBlocks = numericalSetup.getParameter< uint_t >("numZBlocks");
WALBERLA_CHECK_EQUAL(numXBlocks * numYBlocks * numZBlocks, uint_t(MPIManager::instance()->numProcesses()),
"When using GPUs, the number of blocks ("
<< numXBlocks * numYBlocks * numZBlocks << ") has to match the number of MPI processes ("
<< uint_t(MPIManager::instance()->numProcesses()) << ")");
const bool periodicInY = numericalSetup.getParameter< bool >("periodicInY");
const bool periodicInZ = numericalSetup.getParameter< bool >("periodicInZ");
const uint_t numXCellsPerBlock = numericalSetup.getParameter< uint_t >("numXCellsPerBlock");
const uint_t numYCellsPerBlock = numericalSetup.getParameter< uint_t >("numYCellsPerBlock");
const uint_t numZCellsPerBlock = numericalSetup.getParameter< uint_t >("numZCellsPerBlock");
const bool sendDirectlyFromGPU = numericalSetup.getParameter< bool >("sendDirectlyFromGPU");
const bool useCommunicationHiding = numericalSetup.getParameter< bool >("useCommunicationHiding");
const Vector3< uint_t > frameWidth = numericalSetup.getParameter< Vector3< uint_t > >("frameWidth");
const uint_t timeSteps = numericalSetup.getParameter< uint_t >("timeSteps");
const bool useParticles = numericalSetup.getParameter< bool >("useParticles");
const real_t particleDiameter = numericalSetup.getParameter< real_t >("particleDiameter");
const real_t particleGenerationSpacing = numericalSetup.getParameter< real_t >("particleGenerationSpacing");
const Vector3< real_t > generationDomainFraction =
numericalSetup.getParameter< Vector3< real_t > >("generationDomainFraction");
const Vector3< uint_t > generationPointOfReferenceOffset =
numericalSetup.getParameter< Vector3< uint_t > >("generationPointOfReferenceOffset");
const bool useParticleOffset = numericalSetup.getParameter< bool >("useParticleOffset");
const Vector3< uint_t > particleSubBlockSize =
numericalSetup.getParameter< Vector3< uint_t > >("particleSubBlockSize");
const real_t uInflow = numericalSetup.getParameter< real_t >("uInflow");
const real_t relaxationRate = numericalSetup.getParameter< real_t >("relaxationRate");
if ((periodicInY && numYBlocks == 1) || (periodicInZ && numZBlocks == 1))
{
WALBERLA_LOG_WARNING_ON_ROOT("Using only 1 block in periodic dimensions can lead to unexpected behavior.")
}
const real_t viscosity = lbm::collision_model::viscosityFromOmega(relaxationRate);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(viscosity)
Config::BlockHandle outputSetup = cfgFile->getBlock("Output");
const uint_t vtkSpacing = outputSetup.getParameter< uint_t >("vtkSpacing");
const std::string vtkFolder = outputSetup.getParameter< std::string >("vtkFolder");
const uint_t performanceLogFrequency = outputSetup.getParameter< uint_t >("performanceLogFrequency");
///////////////////////////
// BLOCK STRUCTURE SETUP //
///////////////////////////
const bool periodicInX = false;
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid(
numXBlocks, numYBlocks, numZBlocks, numXCellsPerBlock, numYCellsPerBlock, numZCellsPerBlock, real_t(1), uint_t(0),
false, false, periodicInX, periodicInY, periodicInZ, // periodicity
false);
auto simulationDomain = blocks->getDomain();
////////////
// MesaPD //
////////////
auto rpdDomain = std::make_shared< mesa_pd::domain::BlockForestDomain >(blocks->getBlockForestPointer());
// Init data structures
auto ps = walberla::make_shared< mesa_pd::data::ParticleStorage >(1);
auto ss = walberla::make_shared< mesa_pd::data::ShapeStorage >();
using ParticleAccessor_T = mesa_pd::data::ParticleAccessorWithShape;
auto accessor = walberla::make_shared< ParticleAccessor_T >(ps, ss);
auto sphereShape = ss->create< mesa_pd::data::Sphere >(particleDiameter * real_t(0.5));
// Create spheres
if (useParticles)
{
// Ensure that generation domain is computed correctly
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.xMin(), real_t(0));
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.yMin(), real_t(0));
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.zMin(), real_t(0));
auto generationDomain = math::AABB::createFromMinMaxCorner(
math::Vector3< real_t >(simulationDomain.xMax() * (real_t(1) - generationDomainFraction[0]) / real_t(2),
simulationDomain.yMax() * (real_t(1) - generationDomainFraction[1]) / real_t(2),
simulationDomain.zMax() * (real_t(1) - generationDomainFraction[2]) / real_t(2)),
math::Vector3< real_t >(simulationDomain.xMax() * (real_t(1) + generationDomainFraction[0]) / real_t(2),
simulationDomain.yMax() * (real_t(1) + generationDomainFraction[1]) / real_t(2),
simulationDomain.zMax() * (real_t(1) + generationDomainFraction[2]) / real_t(2)));
real_t particleOffset = particleGenerationSpacing / real_t(2);
for (auto pt :
grid_generator::SCGrid(generationDomain, generationDomain.center() + generationPointOfReferenceOffset,
particleGenerationSpacing))
{
// Offset every second particle layer in flow direction to avoid channels in flow direction
if (useParticleOffset &&
uint_t(round(math::abs(generationDomain.center()[0] - pt[0]) / (particleGenerationSpacing))) % uint_t(2) !=
uint_t(0))
{
pt = pt + Vector3(real_t(0), particleOffset, particleOffset);
}
if (rpdDomain->isContainedInProcessSubdomain(uint_c(mpi::MPIManager::instance()->rank()), pt))
{
mesa_pd::data::Particle&& p = *ps->create();
p.setPosition(pt);
p.setInteractionRadius(particleDiameter * real_t(0.5));
p.setOwner(mpi::MPIManager::instance()->rank());
p.setShapeID(sphereShape);
p.setType(0);
}
}
}
///////////////////////
// ADD DATA TO BLOCKS //
////////////////////////
// Setting initial PDFs to nan helps to detect bugs in the initialization/BC handling
// Depending on WALBERLA_BUILD_WITH_GPU_SUPPORT, pdfFieldCPUGPUID is either a CPU or a CPU field
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
BlockDataID pdfFieldID =
field::addToStorage< PdfField_T >(blocks, "pdf field (fzyx)", real_c(std::nan("")), field::fzyx);
BlockDataID BFieldID = field::addToStorage< BField_T >(blocks, "B field", 0, field::fzyx, 1);
BlockDataID pdfFieldCPUGPUID = gpu::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldID, "pdf field GPU");
#else
BlockDataID pdfFieldCPUGPUID =
field::addToStorage< PdfField_T >(blocks, "pdf field CPU", real_c(std::nan("")), field::fzyx);
#endif
BlockDataID densityFieldID = field::addToStorage< DensityField_T >(blocks, "density field", real_t(0), field::fzyx);
BlockDataID velFieldID = field::addToStorage< VelocityField_T >(blocks, "velocity field", real_t(0), field::fzyx);
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
// Synchronize particles between the blocks for the correct mapping of ghost particles
mesa_pd::mpi::SyncNextNeighbors syncNextNeighborFunc;
syncNextNeighborFunc(*ps, *rpdDomain);
// Assemble boundary block string
std::string boundariesBlockString = " Boundaries"
"{"
"Border { direction W; walldistance -1; flag Inflow; }"
"Border { direction E; walldistance -1; flag Density; }";
if (!periodicInY)
{
boundariesBlockString += "Border { direction S; walldistance -1; flag NoSlip; }"
"Border { direction N; walldistance -1; flag NoSlip; }";
}
if (!periodicInZ)
{
boundariesBlockString += "Border { direction T; walldistance -1; flag NoSlip; }"
"Border { direction B; walldistance -1; flag NoSlip; }";
}
boundariesBlockString += "}";
WALBERLA_ROOT_SECTION()
{
std::ofstream boundariesFile("boundaries.prm");
boundariesFile << boundariesBlockString;
boundariesFile.close();
}
WALBERLA_MPI_BARRIER()
auto boundariesCfgFile = Config();
boundariesCfgFile.readParameterFile("boundaries.prm");
auto boundariesConfig = boundariesCfgFile.getBlock("Boundaries");
// map boundaries into the LBM simulation
geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, Fluid_Flag);
lbm::PSM_Density density_bc(blocks, pdfFieldCPUGPUID, real_t(1.0));
density_bc.fillFromFlagField< FlagField_T >(blocks, flagFieldID, Density_Flag, Fluid_Flag);
lbm::PSM_NoSlip noSlip(blocks, pdfFieldCPUGPUID);
noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, NoSlip_Flag, Fluid_Flag);
lbm::PSM_UBB ubb(blocks, pdfFieldCPUGPUID, uInflow, real_t(0), real_t(0));
ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldID, Inflow_Flag, Fluid_Flag);
///////////////
// TIME LOOP //
///////////////
// Map particles into the fluid domain
ParticleAndVolumeFractionSoA_T< Weighting > particleAndVolumeFractionSoA(blocks, relaxationRate);
PSMSweepCollection psmSweepCollection(blocks, accessor, lbm_mesapd_coupling::RegularParticlesSelector(),
particleAndVolumeFractionSoA, particleSubBlockSize);
if (useParticles)
{
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
psmSweepCollection.particleMappingSweep(&(*blockIt));
}
}
// Initialize PDFs
pystencils::InitializeDomainForPSM pdfSetter(
particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleVelocitiesFieldID, pdfFieldCPUGPUID, real_t(0), real_t(0), real_t(0),
real_t(1.0), real_t(0), real_t(0), real_t(0));
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
// pdfSetter requires particle velocities at cell centers
if (useParticles) { psmSweepCollection.setParticleVelocitiesSweep(&(*blockIt)); }
pdfSetter(&(*blockIt));
}
// Setup of the LBM communication for synchronizing the pdf field between neighboring blocks
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::communication::UniformGPUScheme< Stencil_T > com(blocks, sendDirectlyFromGPU, false);
#else
walberla::blockforest::communication::UniformBufferedScheme< Stencil_T > com(blocks);
#endif
com.addPackInfo(make_shared< PackInfo_T >(pdfFieldCPUGPUID));
auto communication = std::function< void() >([&]() { com.communicate(); });
SweepTimeloop commTimeloop(blocks->getBlockStorage(), timeSteps);
SweepTimeloop timeloop(blocks->getBlockStorage(), timeSteps);
timeloop.addFuncBeforeTimeStep(RemainingTimeLogger(timeloop.getNrOfTimeSteps()), "Remaining Time Logger");
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
pystencils::PSM_MacroGetter getterSweep(BFieldID, densityFieldID, pdfFieldID, velFieldID, real_t(0.0), real_t(0.0),
real_t(0.0));
#else
pystencils::PSM_MacroGetter getterSweep(particleAndVolumeFractionSoA.BFieldID, densityFieldID, pdfFieldCPUGPUID,
velFieldID, real_t(0.0), real_t(0.0), real_t(0.0));
#endif
// VTK output
if (vtkSpacing != uint_t(0))
{
// Spheres
auto particleVtkOutput = make_shared< mesa_pd::vtk::ParticleVtkOutput >(ps);
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleUid >("uid");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleLinearVelocity >("velocity");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleInteractionRadius >("radius");
// Limit output to process-local spheres
particleVtkOutput->setParticleSelector([sphereShape](const mesa_pd::data::ParticleStorage::iterator& pIt) {
return pIt->getShapeID() == sphereShape &&
!(mesa_pd::data::particle_flags::isSet(pIt->getFlags(), mesa_pd::data::particle_flags::GHOST));
});
auto particleVtkWriter = vtk::createVTKOutput_PointData(particleVtkOutput, "particles", vtkSpacing, vtkFolder);
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(particleVtkWriter), "VTK (sphere data)");
// Fields
auto pdfFieldVTK = vtk::createVTKOutput_BlockData(blocks, "fluid", vtkSpacing, 0, false, vtkFolder);
pdfFieldVTK->addBeforeFunction(communication);
pdfFieldVTK->addBeforeFunction([&]() {
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::fieldCpy< PdfField_T, gpu::GPUField< real_t > >(blocks, pdfFieldID, pdfFieldCPUGPUID);
gpu::fieldCpy< GhostLayerField< real_t, 1 >, BFieldGPU_T >(blocks, BFieldID,
particleAndVolumeFractionSoA.BFieldID);
#endif
for (auto& block : *blocks)
getterSweep(&block);
});
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "Velocity"));
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< DensityField_T > >(densityFieldID, "Density"));
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< BField_T > >(BFieldID, "OverlapFraction"));
#else
pdfFieldVTK->addCellDataWriter(
make_shared< field::VTKWriter< BField_T > >(particleAndVolumeFractionSoA.BFieldID, "OverlapFraction"));
#endif
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< FlagField_T > >(flagFieldID, "FlagField"));
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(pdfFieldVTK), "VTK (fluid field data)");
}
if (vtkSpacing != uint_t(0)) { vtk::writeDomainDecomposition(blocks, "domain_decomposition", vtkFolder); }
// Add performance logging
lbm::PerformanceLogger< FlagField_T > performanceLogger(blocks, flagFieldID, Fluid_Flag, performanceLogFrequency);
if (performanceLogFrequency > 0)
{
timeloop.addFuncAfterTimeStep(performanceLogger, "Evaluate performance logging");
}
// Add LBM communication function and boundary handling sweep
if (useCommunicationHiding)
{
timeloop.add() << Sweep(deviceSyncWrapper(density_bc.getSweep()), "Boundary Handling (Density)");
}
else
{
timeloop.add() << BeforeFunction(communication, "LBM Communication")
<< Sweep(deviceSyncWrapper(density_bc.getSweep()), "Boundary Handling (Density)");
}
timeloop.add() << Sweep(deviceSyncWrapper(ubb.getSweep()), "Boundary Handling (UBB)");
if (!periodicInY || !periodicInZ)
{
timeloop.add() << Sweep(deviceSyncWrapper(noSlip.getSweep()), "Boundary Handling (NoSlip)");
}
// PSM kernel
pystencils::PSMSweep PSMSweep(particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleForcesFieldID,
particleAndVolumeFractionSoA.particleVelocitiesFieldID, pdfFieldCPUGPUID, real_t(0.0),
real_t(0.0), real_t(0.0), relaxationRate);
pystencils::PSMSweepSplit PSMSplitSweep(
particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleForcesFieldID, particleAndVolumeFractionSoA.particleVelocitiesFieldID,
pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate, frameWidth);
pystencils::LBMSweep LBMSweep(pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate);
pystencils::LBMSplitSweep LBMSplitSweep(pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate,
frameWidth);
if (useParticles)
{
if (useCommunicationHiding)
{
addPSMSweepsToTimeloops(commTimeloop, timeloop, com, psmSweepCollection, PSMSplitSweep);
}
else { addPSMSweepsToTimeloop(timeloop, psmSweepCollection, PSMSweep); }
}
else
{
if (useCommunicationHiding)
{
commTimeloop.add() << BeforeFunction([&]() { com.startCommunication(); }, "LBM Communication (start)")
<< Sweep(deviceSyncWrapper(LBMSplitSweep.getInnerSweep()), "LBM inner sweep")
<< AfterFunction([&]() { com.wait(); }, "LBM Communication (wait)");
timeloop.add() << Sweep(deviceSyncWrapper(LBMSplitSweep.getOuterSweep()), "LBM outer sweep");
}
else { timeloop.add() << Sweep(deviceSyncWrapper(LBMSweep), "LBM sweep"); }
}
WcTimingPool timeloopTiming;
// TODO: maybe add warmup phase
for (uint_t timeStep = 0; timeStep < timeSteps; ++timeStep)
{
if (useCommunicationHiding) { commTimeloop.singleStep(timeloopTiming); }
timeloop.singleStep(timeloopTiming);
}
timeloopTiming.logResultOnRoot();
auto timeloopTimingReduced = timeloopTiming.getReduced();
// Write parameters and performance results in sqlite database
WALBERLA_ROOT_SECTION()
{
// Use DB_FILE environment variable if set
std::string dbFile;
if (std::getenv("DB_FILE") != nullptr) { dbFile = std::getenv("DB_FILE"); }
else
{
if (useParticles) { dbFile = "percolation_benchmark.sqlite3"; }
else { dbFile = "channel_flow_benchmark.sqlite3"; }
}
std::map< std::string, int > integerProperties;
std::map< std::string, double > realProperties;
std::map< std::string, std::string > stringProperties;
integerProperties["numXBlocks"] = int(numXBlocks);
integerProperties["numYBlocks"] = int(numYBlocks);
integerProperties["numZBlocks"] = int(numZBlocks);
integerProperties["numXCellsPerBlock"] = int(numXCellsPerBlock);
integerProperties["numYCellsPerBlock"] = int(numYCellsPerBlock);
integerProperties["numZCellsPerBlock"] = int(numZCellsPerBlock);
integerProperties["timeSteps"] = int(timeSteps);
integerProperties["sendDirectlyFromGPU"] = int(sendDirectlyFromGPU);
integerProperties["useCommunicationHiding"] = int(useCommunicationHiding);
integerProperties["communicationHidingXWidth"] = int(frameWidth[0]);
integerProperties["communicationHidingYWidth"] = int(frameWidth[1]);
integerProperties["communicationHidingZWidth"] = int(frameWidth[2]);
integerProperties["useParticles"] = int(useParticles);
integerProperties["numParticles"] = int(ps->size());
integerProperties["particleSubBlockXSize"] = int(particleSubBlockSize[0]);
integerProperties["particleSubBlockYSize"] = int(particleSubBlockSize[1]);
integerProperties["particleSubBlockZSize"] = int(particleSubBlockSize[2]);
realProperties["particleDiameter"] = double(particleDiameter);
realProperties["particleGenerationSpacing"] = double(particleGenerationSpacing);
performanceLogger.getBestResultsForSQLOnRoot(integerProperties, realProperties, stringProperties);
auto runId = sqlite::storeRunInSqliteDB(dbFile, integerProperties, stringProperties, realProperties);
sqlite::storeTimingPoolInSqliteDB(dbFile, runId, *timeloopTimingReduced, "Timeloop");
}
return EXIT_SUCCESS;
}
} // namespace percolation
int main(int argc, char** argv) { percolation::main(argc, argv); }
NumericalSetup
{
// product of number of blocks should be equal to number of used processes
numXBlocks 1;
numYBlocks 1;
numZBlocks 1;
periodicInY false;
periodicInZ false;
numXCellsPerBlock 256;
numYCellsPerBlock 128;
numZCellsPerBlock 128;
timeSteps 100;
sendDirectlyFromGPU false; // use GPU-GPU communication
useCommunicationHiding false;
frameWidth <1, 1, 1>; // width of the outer region if splitting the LBM/PSM into inner and outer (only used if useCommunicationHiding is true)
// particle distribution in LBM units
useParticles true; // if true, PSM/particle mapping/velocity computation/hydrodynamic force reduction is used, else LBM is used
particleDiameter 20.0;
particleGenerationSpacing 21.0;
generationDomainFraction <0.8, 1.0, 1.0>; // fraction of the domain where particles are generated
generationPointOfReferenceOffset <0, 0, 0>; // offset of point of reference from domain center, see SCIterator.h
useParticleOffset true; // offset every second particle layer in flow direction
particleSubBlockSize <8, 8, 8>;
// fluid quantities in LBM units
uInflow 0.00008;
relaxationRate 0.9;
}
Output
{
vtkSpacing 0;
vtkFolder vtk_out;
performanceLogFrequency 100;
}
......@@ -12,13 +12,13 @@ waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGen
PackInfo_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_velocity_based_distributions.h
GenDefines.h)
if (WALBERLA_BUILD_WITH_CUDA)
if (WALBERLA_BUILD_WITH_GPU_SUPPORT )
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
DEPENDS blockforest core cuda field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen)
DEPENDS walberla::blockforest walberla::core walberla::gpu walberla::field walberla::postprocessing walberla::python_coupling walberla::lbm_generated walberla::geometry walberla::timeloop BenchmarkPhaseFieldCodeGen )
else ()
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
DEPENDS blockforest core field postprocessing python_coupling lbm geometry timeloop gui BenchmarkPhaseFieldCodeGen)
endif (WALBERLA_BUILD_WITH_CUDA)
DEPENDS walberla::blockforest walberla::core walberla::field walberla::postprocessing walberla::python_coupling walberla::lbm_generated walberla::geometry walberla::timeloop BenchmarkPhaseFieldCodeGen )
endif (WALBERLA_BUILD_WITH_GPU_SUPPORT )
......@@ -8,6 +8,11 @@ from waLBerla.tools.config import block_decomposition
import sys
from math import prod
try:
import machinestate as ms
except ImportError:
ms = None
def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
......@@ -20,7 +25,9 @@ def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, s
class Scenario:
def __init__(self, time_step_strategy, cuda_block_size, cells_per_block=(256, 256, 256),
def __init__(self, time_step_strategy,
cuda_block_size,
cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies
self.vtkWriteFrequency = 0
......@@ -89,6 +96,14 @@ class Scenario:
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
df = pd.DataFrame.from_records([data])
......@@ -101,43 +116,19 @@ class Scenario:
def benchmark():
scenarios = wlb.ScenarioManager()
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
block_size = (256, 256, 256)
block_size = (320, 320, 320)
cuda_enabled_mpi = True
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
else:
scenarios.add(Scenario(time_step_strategy='normal', cuda_block_size=(256, 1, 1), cells_per_block=block_size))
scenarios.add(Scenario(time_step_strategy='normal',
cuda_block_size=(128, 1, 1),
cells_per_block=block_size,
cuda_enabled_mpi=cuda_enabled_mpi))
def kernel_benchmark():
scenarios = wlb.ScenarioManager()
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
gpu_mem = gpu_mem_gb * (2 ** 30)
block_sizes = [(i, i, i) for i in (32, 64, 128, 256, 320, 384, 448, 512)]
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1),
(32, 2, 1), (64, 2, 1), (128, 2, 1),
(32, 4, 1), (64, 4, 1),
(32, 4, 2),
(32, 8, 1),
(16, 16, 1)]
for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
for cuda_block in cuda_blocks:
for block_size in block_sizes:
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(time_step_strategy=time_step_strategy,
cuda_block_size=cuda_block,
cells_per_block=block_size)
scenarios.add(scenario)
# benchmark()
kernel_benchmark()
benchmark()