Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 2335 additions and 50 deletions
waLBerla_add_executable ( NAME MotionSingleHeavySphere
DEPENDS blockforest boundary core domain_decomposition field lbm pe pe_coupling postprocessing timeloop vtk )
\ No newline at end of file
waLBerla_add_executable ( NAME MotionSingleHeavySphere
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::domain_decomposition walberla::field walberla::lbm walberla::pe walberla::pe_coupling walberla::postprocessing walberla::timeloop walberla::vtk )
\ No newline at end of file
......@@ -76,29 +76,29 @@ using walberla::uint_t;
//////////////
// PDF field, flag field & body field
typedef lbm::D3Q19< lbm::collision_model::TRT, false > LatticeModel_T;
using LatticeModel_T = lbm::D3Q19<lbm::collision_model::TRT, false>;
using Stencil_T = LatticeModel_T::Stencil;
using PdfField_T = lbm::PdfField<LatticeModel_T>;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField<flag_t>;
typedef GhostLayerField< pe::BodyID, 1 > BodyField_T;
using BodyField_T = GhostLayerField<pe::BodyID, 1>;
typedef std::pair< pe::BodyID, real_t > BodyAndVolumeFraction_T;
typedef GhostLayerField< std::vector< BodyAndVolumeFraction_T >, 1 > BodyAndVolumeFractionField_T;
using BodyAndVolumeFraction_T = std::pair<pe::BodyID, real_t>;
using BodyAndVolumeFractionField_T = GhostLayerField<std::vector<BodyAndVolumeFraction_T>, 1>;
const uint_t FieldGhostLayers = 1;
// boundary handling
typedef lbm::SimpleUBB< LatticeModel_T, flag_t > UBB_T;
typedef lbm::SimplePressure< LatticeModel_T, flag_t > Outlet_T;
using UBB_T = lbm::SimpleUBB<LatticeModel_T, flag_t>;
using Outlet_T = lbm::SimplePressure<LatticeModel_T, flag_t>;
typedef pe_coupling::SimpleBB< LatticeModel_T, FlagField_T > MEM_BB_T;
typedef pe_coupling::CurvedLinear< LatticeModel_T, FlagField_T > MEM_CLI_T;
typedef pe_coupling::CurvedQuadratic< LatticeModel_T, FlagField_T > MEM_MR_T;
using MEM_BB_T = pe_coupling::SimpleBB<LatticeModel_T, FlagField_T>;
using MEM_CLI_T = pe_coupling::CurvedLinear<LatticeModel_T, FlagField_T>;
using MEM_MR_T = pe_coupling::CurvedQuadratic<LatticeModel_T, FlagField_T>;
typedef BoundaryHandling< FlagField_T, Stencil_T, UBB_T, Outlet_T, MEM_BB_T, MEM_CLI_T, MEM_MR_T > BoundaryHandling_T;
using BoundaryHandling_T = BoundaryHandling<FlagField_T, Stencil_T, UBB_T, Outlet_T, MEM_BB_T, MEM_CLI_T, MEM_MR_T>;
using BodyTypeTuple = std::tuple<pe::Sphere>;
......@@ -538,9 +538,7 @@ public:
WALBERLA_MPI_SECTION()
{
mpi::allReduceInplace( u_p[0], mpi::SUM );
mpi::allReduceInplace( u_p[1], mpi::SUM );
mpi::allReduceInplace( u_p[2], mpi::SUM );
mpi::allReduceInplace( u_p, mpi::SUM );
}
......@@ -615,7 +613,7 @@ private:
/*
* This extensive, physical test case simulates a single, heavy sphere in ambient fluid flow.
* It is based on the benchmark proposed in
* Uhlman, Dusek - "The motion of a single heavy sphere in ambient fluid: A benchmark for interface-resolved
* Uhlmann, Dusek - "The motion of a single heavy sphere in ambient fluid: A benchmark for interface-resolved
* particulate flow simulations with significant relative velocities" IJMF (2014),
* doi: 10.1016/j.ijmultiphaseflow.2013.10.010
* Results for LBM done with waLBerla are published in
......@@ -634,9 +632,9 @@ private:
* - solid collision operator variant: 1, 2, or 3
* - weighting factor variant: 1, or 2
*
* The results obtained by this benchmark should be compared to the reference spectral method results from Uhlman & Dusek.
* The results obtained by this benchmark should be compared to the reference spectral method results from Uhlmann & Dusek.
* Furthermore, comparisons to the CFD-IBM (classical Navier-Stokes solver with immersed boundary method)
* results from Uhlman & Dusek are available in their paper.
* results from Uhlmann & Dusek are available in their paper.
* New coupling algorithms or algorithmic changes to the exiting approaches should also be cross-checked with the
* values in Rettinger & Ruede.
*
......@@ -786,26 +784,27 @@ int main( int argc, char **argv )
///////////////////////////
const int numProcs = MPIManager::instance()->numProcesses();
WALBERLA_CHECK(numProcs % 16 != 0, "An integer multiple of 16 MPI ranks has to be used due to horizontal periodicity and domain decomposition requirements!");
uint_t XBlocks;
uint_t YBlocks;
uint_t ZBlocks;
if( numProcs >= 192 )
if( numProcs > 192 )
{
XBlocks = uint_t(6);
YBlocks = uint_t(6);
ZBlocks = uint_c(numProcs) / ( XBlocks * YBlocks );
WALBERLA_CHECK(numProcs % 36 == 0, "An integer multiple of 36 MPI ranks has to be used due to horizontal periodicity and domain partitioning requirements!");
} else {
XBlocks = uint_t(4);
YBlocks = uint_t(4);
ZBlocks = uint_c(MPIManager::instance()->numProcesses()) / ( XBlocks * YBlocks );
WALBERLA_CHECK(numProcs % 16 == 0, "An integer multiple of 16 MPI ranks has to be used due to horizontal periodicity and domain partitioning requirements!");
}
const uint_t XCells = xlength / XBlocks;
const uint_t YCells = ylength / YBlocks;
const uint_t ZCells = zlength / ZBlocks;
if( (xlength != XCells * XBlocks) && (ylength != YCells * YBlocks) && (zlength != ZCells * ZBlocks) )
if( (xlength != XCells * XBlocks) || (ylength != YCells * YBlocks) || (zlength != ZCells * ZBlocks) )
{
WALBERLA_ABORT("Domain decomposition does not fit to total domain size!");
}
......@@ -903,20 +902,20 @@ int main( int argc, char **argv )
// add PDF field
// initial velocity in domain = inflow velocity
BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel, uInfty, real_t(1), uint_t(1), field::zyxf );
BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (fzyx)", latticeModel, uInfty, real_t(1), uint_t(1), field::fzyx );
// add PDF field (needed to store pre collision values for MEM_MR scheme)
BlockDataID pdfFieldPreColID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "nqOdd field (zyxf)", latticeModel, uInfty, real_t(1), uint_t(1), field::zyxf );
BlockDataID pdfFieldPreColID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "nqOdd field (fzyx)", latticeModel, uInfty, real_t(1), uint_t(1), field::fzyx );
// add flag field
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" );
// add body field
BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", nullptr, field::zyxf );
BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", nullptr, field::fzyx );
// add body and volume fraction field
BlockDataID bodyAndVolumeFractionFieldID = field::addToStorage< BodyAndVolumeFractionField_T >( blocks, "body and volume fraction field",
std::vector<BodyAndVolumeFraction_T>(), field::zyxf, 0 );
std::vector<BodyAndVolumeFraction_T>(), field::fzyx, 0 );
// add boundary handling & initialize outer domain boundaries
BlockDataID boundaryHandlingID = blocks->addStructuredBlockData< BoundaryHandling_T >(
......@@ -1235,7 +1234,7 @@ int main( int argc, char **argv )
// reconstruct missing PDFs
using ExtrapolationFinder_T = pe_coupling::SphereNormalExtrapolationDirectionFinder;
ExtrapolationFinder_T extrapolationFinder( blocks, bodyFieldID );
typedef pe_coupling::ExtrapolationReconstructor< LatticeModel_T, BoundaryHandling_T, ExtrapolationFinder_T > Reconstructor_T;
using Reconstructor_T = pe_coupling::ExtrapolationReconstructor<LatticeModel_T, BoundaryHandling_T, ExtrapolationFinder_T>;
Reconstructor_T reconstructor( blocks, boundaryHandlingID, bodyFieldID, extrapolationFinder, true );
timeloop.add() << Sweep( pe_coupling::PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructor_T >
( blocks, pdfFieldID, boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, reconstructor, FormerMEM_Flag, Fluid_Flag ), "PDF Restore" );
......
......@@ -2,5 +2,5 @@
waLBerla_link_files_to_builddir( "*.dat" )
waLBerla_add_executable ( NAME NonUniformGridBenchmark
DEPENDS blockforest boundary core domain_decomposition field lbm postprocessing timeloop vtk sqlite)
\ No newline at end of file
waLBerla_add_executable ( NAME NonUniformGridBenchmark
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::domain_decomposition walberla::field walberla::lbm walberla::postprocessing walberla::timeloop walberla::vtk walberla::sqlite )
\ No newline at end of file
......@@ -97,11 +97,11 @@ using walberla::real_t;
// TYPEDEFS //
//////////////
typedef lbm::D3Q19< lbm::collision_model::SRT, false > D3Q19_SRT_INCOMP;
typedef lbm::D3Q19< lbm::collision_model::SRT, true > D3Q19_SRT_COMP;
typedef lbm::D3Q19< lbm::collision_model::TRT, false > D3Q19_TRT_INCOMP;
typedef lbm::D3Q19< lbm::collision_model::TRT, true > D3Q19_TRT_COMP;
typedef lbm::D3Q19< lbm::collision_model::D3Q19MRT, false > D3Q19_MRT_INCOMP;
using D3Q19_SRT_INCOMP = lbm::D3Q19<lbm::collision_model::SRT, false>;
using D3Q19_SRT_COMP = lbm::D3Q19<lbm::collision_model::SRT, true>;
using D3Q19_TRT_INCOMP = lbm::D3Q19<lbm::collision_model::TRT, false>;
using D3Q19_TRT_COMP = lbm::D3Q19<lbm::collision_model::TRT, true>;
using D3Q19_MRT_INCOMP = lbm::D3Q19<lbm::collision_model::D3Q19MRT, false>;
template< typename LatticeModel_T >
struct Types
......@@ -243,23 +243,23 @@ static void refinementSelection( SetupBlockForest& forest )
AABB rightCorner( domain.xMax() - xSize, domain.yMin(), domain.zMax() - zSize,
domain.xMax(), domain.yMax(), domain.zMax() );
for( auto block = forest.begin(); block != forest.end(); ++block )
for(auto & block : forest)
{
auto & aabb = block->getAABB();
auto & aabb = block.getAABB();
if( leftCorner.intersects( aabb ) || rightCorner.intersects( aabb ) )
{
if( block->getLevel() < ( BlockForestLevels - uint_t(1) ) )
block->setMarker( true );
if( block.getLevel() < ( BlockForestLevels - uint_t(1) ) )
block.setMarker( true );
}
}
}
static void workloadAndMemoryAssignment( SetupBlockForest & forest, const memory_t memoryPerBlock ) {
for( auto block = forest.begin(); block != forest.end(); ++block )
for(auto & block : forest)
{
block->setWorkload( numeric_cast< workload_t >( uint_t(1) << block->getLevel() ) );
block->setMemory( memoryPerBlock );
block.setWorkload( numeric_cast< workload_t >( uint_t(1) << block.getLevel() ) );
block.setMemory( memoryPerBlock );
}
}
......@@ -295,7 +295,7 @@ void createSetupBlockForest( blockforest::SetupBlockForest & sforest, const Conf
#ifndef NDEBUG
WALBERLA_ASSERT_EQUAL( sforest.getNumberOfBlocks(), numberOfBlocks( workerProcesses, fineBlocksPerProcess ) );
for( uint_t i = uint_t(0); i != BlockForestLevels; ++i )
for( auto i = uint_t(0); i != BlockForestLevels; ++i )
{
std::vector< blockforest::SetupBlock * > blocks;
sforest.getBlocks( blocks, i );
......@@ -308,7 +308,7 @@ void createSetupBlockForest( blockforest::SetupBlockForest & sforest, const Conf
const memory_t memoryLimit = configBlock.getParameter< memory_t >( "memoryLimit", numeric_cast< memory_t >(256) );
sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(true), numberOfProcesses, bufferProcesses, memoryLimit, true,
(bufferProcesses != uint_t(0)) ? true : false );
bufferProcesses != uint_t(0) );
WALBERLA_LOG_INFO_ON_ROOT( "SetupBlockForest created successfully:\n" << sforest );
}
......@@ -415,13 +415,13 @@ void ReGrid::operator()( std::vector< std::pair< const Block *, uint_t > > & min
domain.xMax(), domain.yMax(), domain.zMin() + real_c(0.99) * zSize );
}
for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
for(auto & minTargetLevel : minTargetLevels)
{
auto & aabb = it->first->getAABB();
auto & aabb = minTargetLevel.first->getAABB();
if( left.intersects( aabb ) || right.intersects( aabb ) )
it->second = BlockForestLevels - uint_t(1);
minTargetLevel.second = BlockForestLevels - uint_t(1);
else
it->second = uint_t(0);
minTargetLevel.second = uint_t(0);
}
}
......@@ -437,10 +437,10 @@ void ReGrid::operator()( std::vector< std::pair< const Block *, uint_t > > & min
template< typename LatticeModel_T >
struct MyBoundaryTypes
{
typedef lbm::NoSlip< LatticeModel_T, flag_t > NoSlip_T;
typedef lbm::SimpleUBB< LatticeModel_T, flag_t > UBB_T;
using NoSlip_T = lbm::NoSlip<LatticeModel_T, flag_t>;
using UBB_T = lbm::SimpleUBB<LatticeModel_T, flag_t>;
typedef BoundaryHandling< FlagField_T, typename Types<LatticeModel_T>::Stencil_T, NoSlip_T, UBB_T > BoundaryHandling_T;
using BoundaryHandling_T = BoundaryHandling<FlagField_T, typename Types<LatticeModel_T>::Stencil_T, NoSlip_T, UBB_T>;
};
template< typename LatticeModel_T >
......@@ -632,7 +632,7 @@ struct AddRefinementTimeStep
}
else
{
typedef lbm::SplitSweep< LatticeModel_T, FlagField_T > Sweep_T;
using Sweep_T = lbm::SplitSweep<LatticeModel_T, FlagField_T>;
auto mySweep = make_shared< Sweep_T >( pdfFieldId, flagFieldId, Fluid_Flag );
addRefinementTimeStep< LatticeModel_T, Sweep_T >( timeloop, blocks, pdfFieldId, boundaryHandlingId, timingPool, levelwiseTimingPool,
......
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
waLBerla_generate_target_from_python(NAME NonUniformGridCPUGenerated
FILE NonUniformGridCPU.py
OUT_FILES NonUniformGridCPUStorageSpecification.h NonUniformGridCPUStorageSpecification.cpp
NonUniformGridCPUSweepCollection.h NonUniformGridCPUSweepCollection.cpp
NoSlip.h NoSlip.cpp
UBB.h UBB.cpp
NonUniformGridCPUBoundaryCollection.h
NonUniformGridCPUInfoHeader.h)
waLBerla_add_executable( NAME NonUniformGridCPU
FILES NonUniformGridCPU.cpp LdcSetup.h GridGeneration.h
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::domain_decomposition walberla::field walberla::geometry walberla::lbm_generated walberla::python_coupling walberla::timeloop walberla::vtk NonUniformGridCPUGenerated )
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file GridGeneration.h
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/Initialization.h"
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include <string>
#include "LdcSetup.h"
#include "NonUniformGridCPUInfoHeader.h"
using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using namespace walberla;
void createSetupBlockForest(SetupBlockForest& setupBfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
const bool useMPIManager=false)
{
WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
if(useMPIManager)
numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
const LDC ldc(refinementDepth);
auto refSelection = ldc.refinementSelector();
setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
if(mpi::MPIManager::instance()->numProcesses() > 1)
return;
{
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
setupBfs.saveToFile(oss.str().c_str());
}
if(writeVtk){
setupBfs.writeVTKOutput(blockForestFilestem);
}
if(outputStatistics){
WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++)
{
const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
}
const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
}
}
void createBlockForest(shared_ptr< BlockForest >& bfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
{
if (mpi::MPIManager::instance()->numProcesses() > 1)
{
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
// Load structured block forest from file
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
const std::string setupBlockForestFilepath = oss.str();
std::ifstream infile(setupBlockForestFilepath.c_str());
if(!infile.good())
{
WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
else
{
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
setupBlockForestFilepath.c_str(), false);
}
}
else
{
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
}
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file LdcSetup.h
//! \author Markus Holzer <markus.holzer@fau.de>
//! \author Frederik Hennig <frederik.hennig@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/StructuredBlockForest.h"
#include "core/all.h"
#include "field/FlagField.h"
#include "field/FlagUID.h"
using namespace walberla;
using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
using FlagField_T = FlagField< uint8_t >;
class LDCRefinement
{
private:
const uint_t refinementDepth_;
public:
explicit LDCRefinement(const uint_t depth) : refinementDepth_(depth){};
void operator()(SetupBlockForest& forest) const
{
const AABB & domain = forest.getDomain();
const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
for(auto & block : forest)
{
auto & aabb = block.getAABB();
if( leftCorner.intersects( aabb ) || rightCorner.intersects( aabb ) )
{
if( block.getLevel() < refinementDepth_)
block.setMarker( true );
}
}
}
};
class LDC
{
private:
const std::string refinementProfile_;
const uint_t refinementDepth_;
const FlagUID noSlipFlagUID_;
const FlagUID ubbFlagUID_;
public:
explicit LDC(const uint_t depth) : refinementDepth_(depth), noSlipFlagUID_("NoSlip"), ubbFlagUID_("UBB"){};
RefinementSelectionFunctor refinementSelector() const
{
return LDCRefinement(refinementDepth_);
}
void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID)
{
for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt)
{
auto& b = dynamic_cast< Block& >(*bIt);
const uint_t level = b.getLevel();
auto flagField = b.getData< FlagField_T >(flagFieldID);
const uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_);
const uint8_t ubbFlag = flagField->registerFlag(ubbFlagUID_);
for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt)
{
const Cell localCell = cIt.cell();
Cell globalCell(localCell);
sbfs.transformBlockLocalToGlobalCell(globalCell, b);
if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
else if (globalCell.z() < 0 || globalCell.y() < 0 || globalCell.x() < 0 ||
globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)) || globalCell.z() >= cell_idx_c(sbfs.getNumberOfZCells(level)))
{
flagField->addFlag(localCell, noslipFlag);
}
}
}
}
};
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file NonUniformGridCPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//! \author Frederik Hennig <frederik.hennig@fau.de>
//
//======================================================================================================================
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include "field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/DictWrapper.h"
#include "python_coupling/PythonCallback.h"
#include "timeloop/SweepTimeloop.h"
#include <cmath>
#include "GridGeneration.h"
#include "LdcSetup.h"
#include "NonUniformGridCPUInfoHeader.h"
#include "lbm_generated/communication/NonuniformGeneratedPdfPackInfo.h"
#include "lbm_generated/evaluation/PerformanceEvaluation.h"
#include "lbm_generated/field/AddToStorage.h"
#include "lbm_generated/field/PdfField.h"
#include "lbm_generated/refinement/BasicRecursiveTimeStep.h"
using namespace walberla;
using StorageSpecification_T = lbm::NonUniformGridCPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
using BoundaryCollection_T = lbm::NonUniformGridCPUBoundaryCollection< FlagField_T >;
using SweepCollection_T = lbm::NonUniformGridCPUSweepCollection;
using blockforest::communication::NonUniformBufferedScheme;
int main(int argc, char** argv)
{
const mpi::Environment env(argc, argv);
mpi::MPIManager::instance()->useWorldComm();
const std::string input_filename(argv[1]);
const bool inputIsPython = string_ends_with(input_filename, ".py");
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
WALBERLA_MPI_BARRIER()
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BLOCK FOREST SETUP ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto config = *cfg;
logging::configureLogging(config);
auto domainSetup = config->getOneBlock("DomainSetup");
auto blockForestSetup = config->getOneBlock("SetupBlockForest");
const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
shared_ptr< BlockForest > bfs;
createBlockForest(bfs, domainSetup, blockForestSetup);
if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
{
WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
return EXIT_SUCCESS;
}
auto blocks =
std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes();
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++)
{
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// DISTRIBUTED DATA STRUCTURES ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto ldc = std::make_shared< LDC >(refinementDepth);
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.4));
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
const bool benchmarkKernelOnly = parameters.getParameter< bool >("benchmarkKernelOnly", false);
// Creating fields
const StorageSpecification_T StorageSpec = StorageSpecification_T();
const BlockDataID pdfFieldID =
lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx);
const BlockDataID velFieldID =
field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2));
const BlockDataID densityFieldID =
field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2));
const BlockDataID flagFieldID =
field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
const Cell innerOuterSplit =
Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit);
for (auto& block : *blocks)
{
sweepCollection.initialise(&block, cell_idx_c(1));
}
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Initialisation done")
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
const FlagUID fluidFlagUID("Fluid");
ldc->setupBoundaryFlagField(*blocks, flagFieldID);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 2);
BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldID, fluidFlagUID);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// COMMUNICATION SCHEME ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
auto communication = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(blocks);
auto packInfo = lbm_generated::setupNonuniformPdfCommunication< PdfField_T >(blocks, pdfFieldID);
communication->addPackInfo(packInfo);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME STEP DEFINITIONS ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
lbm_generated::BasicRecursiveTimeStep< PdfField_T, SweepCollection_T, BoundaryCollection_T > LBMMeshRefinement(
blocks, pdfFieldID, sweepCollection, boundaryCollection, communication, packInfo);
SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
if (benchmarkKernelOnly)
{
timeLoop.add() << Sweep(sweepCollection.streamCollide(SweepCollection_T::ALL), "LBM StreamCollide");
}
else { LBMMeshRefinement.addRefinementToTimeLoop(timeLoop); }
// VTK
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
auto finalDomain = blocks->getDomain();
if (vtkWriteFrequency > 0)
{
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldID, "vel");
vtkOutput->addCellDataWriter(velWriter);
if (parameters.getParameter< bool >("writeOnlySlice", true)){
const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(refinementDepth),
finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(refinementDepth));
vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
}
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
});
timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BENCHMARK ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
if (remainingTimeLoggerFrequency > 0)
{
auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
}
lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
fluidCells();
WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells()
<< " fluid cells (in total on all levels)")
WcTimingPool timeloopTiming;
WcTimer simTimer;
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Starting benchmark with " << timesteps << " time steps")
WALBERLA_MPI_BARRIER()
simTimer.start();
timeLoop.run(timeloopTiming);
WALBERLA_MPI_BARRIER()
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Benchmark finished")
double time = simTimer.max();
WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
performance.logResultOnRoot(timesteps, time);
const auto reducedTimeloopTiming = timeloopTiming.getReduced();
WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
WALBERLA_ROOT_SECTION()
{
if (inputIsPython)
{
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
pythonCallbackResults.data().exposeValue("numProcesses", performance.processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores());
pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess",
performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil", infoStencil);
pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
// Call Python function to report results
pythonCallbackResults();
}
}
}
}
return EXIT_SUCCESS;
}
\ No newline at end of file
import sympy as sp
import pystencils as ps
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil
from pystencils_walberla import CodeGeneration, generate_info_header
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free")
info_header = """
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
cpu_vec = {"instruction_set": None}
streaming_pattern = 'esopull'
timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19)
method_enum = Method.CUMULANT
fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout="fzyx")
method = create_lb_method(lbm_config=lbm_config)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
boundary_object=NoSlip())
ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
boundary_object=UBB([0.05, 0, 0], data_type=field_type))
generate_lbm_package(ctx, name="NonUniformGridCPU",
collision_rule=collision_rule,
lbm_config=lbm_config, lbm_optimisation=lbm_opt,
nonuniform=True, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
target=ps.Target.CPU, cpu_vectorize_info=cpu_vec,)
infoHeaderParams = {
'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
field_typedefs = {'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
generate_info_header(ctx, 'NonUniformGridCPUInfoHeader',
field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3
import os
import sys
try:
import machinestate as ms
except ImportError:
ms = None
DB_FILE = os.environ.get('DB_FILE', "cpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario:
def __init__(self,
domain_size=(64, 64, 64),
root_blocks=(2, 2, 2),
num_processes=1,
refinement_depth=0,
cells_per_block=(32, 32, 32),
timesteps=101,
vtk_write_frequency=0,
logger_frequency=0,
blockforest_filestem="blockforest",
write_setup_vtk=True,
db_file_name=None):
self.domain_size = domain_size
self.root_blocks = root_blocks
self.cells_per_block = cells_per_block
self.periodic = (0, 0, 0)
self.refinement_depth = refinement_depth
self.num_processes = num_processes
self.bfs_filestem = blockforest_filestem
self.write_setup_vtk = write_setup_vtk
self.timesteps = timesteps
self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'domainSize': self.domain_size,
'rootBlocks': self.root_blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic
},
'SetupBlockForest': {
'refinementDepth': self.refinement_depth,
'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk,
'outputStatistics': True,
'writeSetupForestAndReturn': True,
},
'Parameters': {
'omega': 1.95,
'timesteps': self.timesteps,
'remainingTimeLoggerFrequency': self.logger_frequency,
'vtkWriteFrequency': self.vtk_write_frequency,
'useVTKAMRWriter': True,
'oneFilePerProcess': False,
'writeOnlySlice': False
},
'Logging': {
'logLevel': "info",
}
}
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs"
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
def weak_scaling_ldc(num_proc, uniform=False):
wlb.log_info_on_root("Running weak scaling benchmark...")
# This benchmark must run from 16 processes onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if uniform:
factor = 3 * num_proc
name = "uniform"
else:
if num_proc % 16 != 0:
raise RuntimeError("Number of processes must be dividable by 16")
factor = int(num_proc // 16)
name = "nonuniform"
cells_per_block = (WeakX, WeakY, WeakZ)
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
db_file_name=f"weakScalingCPU{name}LDC.sqlite3")
scenarios.add(scenario)
def strong_scaling_ldc(num_proc, uniform=False):
wlb.log_info_on_root("Running strong scaling benchmark...")
# This benchmark must run from 64 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if num_proc % 64 != 0:
raise RuntimeError("Number of processes must be dividable by 64")
cells_per_block = (StrongX, StrongY, StrongZ)
if uniform:
domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
name = "uniform"
else:
factor = int(num_proc / 64)
blocks64 = block_decomposition(factor)
cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
name = "nonuniform"
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
db_file_name=f"strongScalingCPU{name}LDC.sqlite3")
scenarios.add(scenario)
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
domain_size = (96, 96, 32)
cells_per_block = (32, 32, 32)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks,
num_processes=1,
refinement_depth=3,
cells_per_block=cells_per_block,
timesteps=1001,
vtk_write_frequency=100,
logger_frequency=5,
write_setup_vtk=True)
scenarios.add(scenario)
if BENCHMARK == 0:
validation_run()
elif BENCHMARK == 1:
weak_scaling_ldc(1, False)
elif BENCHMARK == 2:
strong_scaling_ldc(1, False)
else:
print(f"Invalid benchmark case {BENCHMARK}")
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
waLBerla_generate_target_from_python(NAME NonUniformGridGPUGenerated
FILE NonUniformGridGPU.py
OUT_FILES NonUniformGridGPUStorageSpecification.h NonUniformGridGPUStorageSpecification.${CODEGEN_FILE_SUFFIX}
NonUniformGridGPUSweepCollection.h NonUniformGridGPUSweepCollection.${CODEGEN_FILE_SUFFIX}
NoSlip.h NoSlip.${CODEGEN_FILE_SUFFIX}
UBB.h UBB.${CODEGEN_FILE_SUFFIX}
NonUniformGridGPUBoundaryCollection.h
NonUniformGridGPUInfoHeader.h)
waLBerla_add_executable( NAME NonUniformGridGPU
FILES NonUniformGridGPU.cpp LdcSetup.h GridGeneration.h
DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::gpu walberla::domain_decomposition walberla::field walberla::geometry walberla::lbm_generated walberla::python_coupling walberla::timeloop walberla::vtk NonUniformGridGPUGenerated )
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file GridGeneration.h
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/Initialization.h"
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/loadbalancing/StaticCurve.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include <string>
#include "LdcSetup.h"
#include "NonUniformGridGPUInfoHeader.h"
using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using namespace walberla;
void createSetupBlockForest(SetupBlockForest& setupBfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup,
const bool useMPIManager=false)
{
WALBERLA_LOG_INFO_ON_ROOT("Generating SetupBlockForest...")
Vector3<real_t> domainSize = domainSetup.getParameter<Vector3<real_t> >("domainSize");
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
Vector3<uint_t> rootBlocks = domainSetup.getParameter<Vector3<uint_t> >("rootBlocks");
Vector3<bool> periodic = domainSetup.getParameter<Vector3<bool> >("periodic");
const uint_t refinementDepth = blockForestSetup.getParameter< uint_t >("refinementDepth", uint_c(1));
uint_t numProcesses = blockForestSetup.getParameter< uint_t >( "numProcesses");
const std::string blockForestFilestem = blockForestSetup.getParameter< std::string > ("blockForestFilestem", "blockforest");
const bool writeVtk = blockForestSetup.getParameter< bool >("writeVtk", false);
const bool outputStatistics = blockForestSetup.getParameter< bool >("outputStatistics", false);
if(useMPIManager)
numProcesses = uint_c(mpi::MPIManager::instance()->numProcesses());
const LDC ldc(refinementDepth);
auto refSelection = ldc.refinementSelector();
setupBfs.addRefinementSelectionFunction(std::function<void(SetupBlockForest &)>(refSelection));
const AABB domain(real_t(0.0), real_t(0.0), real_t(0.0), domainSize[0], domainSize[1], domainSize[2]);
setupBfs.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment);
setupBfs.init(domain, rootBlocks[0], rootBlocks[1], rootBlocks[2], periodic[0], periodic[1], periodic[2]);
setupBfs.balanceLoad(blockforest::StaticLevelwiseCurveBalanceWeighted(), numProcesses);
if(mpi::MPIManager::instance()->numProcesses() > 1)
return;
{
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
setupBfs.saveToFile(oss.str().c_str());
}
if(writeVtk){
setupBfs.writeVTKOutput(blockForestFilestem);
}
if(outputStatistics){
WALBERLA_LOG_INFO_ON_ROOT("=========================== BLOCK FOREST STATISTICS ============================");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks())
for (uint_t level = 0; level <= refinementDepth; level++){
const uint_t numberOfBlocks = setupBfs.getNumberOfBlocks(level);
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << numberOfBlocks)
}
const real_t avgBlocksPerProc = real_c(setupBfs.getNumberOfBlocks()) / real_c(setupBfs.getNumberOfProcesses());
WALBERLA_LOG_INFO_ON_ROOT("Average blocks per process: " << avgBlocksPerProc);
const uint_t totalNumberCells = setupBfs.getNumberOfBlocks() * cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
const real_t averageCellsPerGPU = avgBlocksPerProc * real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
const uint_t PDFsPerCell = StorageSpecification_T::inplace ? Stencil_T::Q : 2 * Stencil_T::Q;
const uint_t valuesPerCell = (PDFsPerCell + VelocityField_T::F_SIZE + ScalarField_T::F_SIZE);
const uint_t sizePerValue = sizeof(StorageSpecification_T::value_type);
const double expectedMemory = double_c(totalNumberCells * valuesPerCell * sizePerValue) * 1e-9;
const double expectedMemoryPerGPU = double_c(averageCellsPerGPU * valuesPerCell * sizePerValue) * 1e-9;
WALBERLA_LOG_INFO_ON_ROOT( "Total number of cells will be " << totalNumberCells << " fluid cells (in total on all levels)")
WALBERLA_LOG_INFO_ON_ROOT( "Expected total memory demand will be " << expectedMemory << " GB")
WALBERLA_LOG_INFO_ON_ROOT( "Average memory demand per GPU will be " << expectedMemoryPerGPU << " GB")
WALBERLA_LOG_INFO_ON_ROOT("=================================================================================");
}
}
void createBlockForest(shared_ptr< BlockForest >& bfs,
const Config::BlockHandle& domainSetup, const Config::BlockHandle& blockForestSetup)
{
if (mpi::MPIManager::instance()->numProcesses() > 1){
const std::string blockForestFilestem =
blockForestSetup.getParameter< std::string >("blockForestFilestem", "blockforest");
// Load structured block forest from file
std::ostringstream oss;
oss << blockForestFilestem << ".bfs";
const std::string setupBlockForestFilepath = oss.str();
std::ifstream infile(setupBlockForestFilepath.c_str());
if(!infile.good()){
WALBERLA_LOG_WARNING_ON_ROOT("Blockforest was not created beforehand and thus needs to be created on the fly. For large simulation runs this can be a severe problem!")
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup, true);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
else{
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()),
setupBlockForestFilepath.c_str(), false);
}
}
else{
SetupBlockForest setupBfs;
createSetupBlockForest(setupBfs, domainSetup, blockForestSetup);
bfs = std::make_shared< BlockForest >(uint_c(MPIManager::instance()->worldRank()), setupBfs);
}
}
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file LdcSetup.h
//! \author Markus Holzer <markus.holzer@fau.de>
//! \author Frederik Hennig <frederik.hennig@fau.de>
//
//======================================================================================================================
#pragma once
#include "blockforest/SetupBlock.h"
#include "blockforest/SetupBlockForest.h"
#include "blockforest/StructuredBlockForest.h"
#include "core/all.h"
#include "field/FlagField.h"
#include "field/FlagUID.h"
using namespace walberla;
using RefinementSelectionFunctor = SetupBlockForest::RefinementSelectionFunction;
using FlagField_T = FlagField< uint8_t >;
class LDCRefinement
{
private:
const uint_t refinementDepth_;
public:
explicit LDCRefinement(const uint_t depth) : refinementDepth_(depth){};
void operator()(SetupBlockForest& forest) const
{
const AABB & domain = forest.getDomain();
const AABB leftCorner( 0, domain.yMax() -1, 0, 1, domain.yMax() , domain.zMax() );
const AABB rightCorner( domain.xMax() - 1, domain.yMax() -1, 0, domain.xMax(), domain.yMax() , domain.zMax() );
for(auto & block : forest)
{
auto & aabb = block.getAABB();
if( leftCorner.intersects( aabb ) || rightCorner.intersects( aabb ) )
{
if( block.getLevel() < refinementDepth_)
block.setMarker( true );
}
}
}
};
class LDC
{
private:
const std::string refinementProfile_;
const uint_t refinementDepth_;
const FlagUID noSlipFlagUID_;
const FlagUID ubbFlagUID_;
public:
explicit LDC(const uint_t depth) : refinementDepth_(depth), noSlipFlagUID_("NoSlip"), ubbFlagUID_("UBB"){};
RefinementSelectionFunctor refinementSelector() const
{
return LDCRefinement(refinementDepth_);
}
void setupBoundaryFlagField(StructuredBlockForest& sbfs, const BlockDataID flagFieldID)
{
for (auto bIt = sbfs.begin(); bIt != sbfs.end(); ++bIt)
{
auto& b = dynamic_cast< Block& >(*bIt);
const uint_t level = b.getLevel();
auto flagField = b.getData< FlagField_T >(flagFieldID);
const uint8_t noslipFlag = flagField->registerFlag(noSlipFlagUID_);
const uint8_t ubbFlag = flagField->registerFlag(ubbFlagUID_);
for (auto cIt = flagField->beginWithGhostLayerXYZ(2); cIt != flagField->end(); ++cIt)
{
const Cell localCell = cIt.cell();
Cell globalCell(localCell);
sbfs.transformBlockLocalToGlobalCell(globalCell, b);
if (globalCell.y() >= cell_idx_c(sbfs.getNumberOfYCells(level))) { flagField->addFlag(localCell, ubbFlag); }
else if (globalCell.y() < 0 || globalCell.x() < 0 || globalCell.x() >= cell_idx_c(sbfs.getNumberOfXCells(level)))
{
flagField->addFlag(localCell, noslipFlag);
}
}
}
}
};
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file NonUniformGridGPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include "field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/DeviceSelectMPI.h"
#include "gpu/ErrorChecking.h"
#include "gpu/FieldCopy.h"
#include "gpu/HostFieldAllocator.h"
#include "gpu/ParallelStreams.h"
#include "gpu/communication/NonUniformGPUScheme.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/DictWrapper.h"
#include "python_coupling/PythonCallback.h"
#include <cmath>
#include "GridGeneration.h"
#include "LdcSetup.h"
#include "NonUniformGridGPUInfoHeader.h"
#include "lbm_generated/evaluation/PerformanceEvaluation.h"
#include "lbm_generated/field/AddToStorage.h"
#include "lbm_generated/field/PdfField.h"
#include "lbm_generated/gpu/AddToStorage.h"
#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
#include "lbm_generated/gpu/GPUPdfField.h"
#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
using namespace walberla;
using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
using FlagField_T = FlagField< uint8_t >;
using BoundaryCollection_T = lbm::NonUniformGridGPUBoundaryCollection< FlagField_T >;
using SweepCollection_T = lbm::NonUniformGridGPUSweepCollection;
using gpu::communication::NonUniformGPUScheme;
int main(int argc, char** argv)
{
const mpi::Environment env(argc, argv);
mpi::MPIManager::instance()->useWorldComm();
gpu::selectDeviceBasedOnMpiRank();
const std::string input_filename(argv[1]);
const bool inputIsPython = string_ends_with(input_filename, ".py");
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
WALBERLA_MPI_WORLD_BARRIER()
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// SETUP AND CONFIGURATION ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto config = *cfg;
logging::configureLogging(config);
auto domainSetup = config->getOneBlock("DomainSetup");
auto blockForestSetup = config->getOneBlock("SetupBlockForest");
const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.95));
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false);
shared_ptr< BlockForest > bfs;
createBlockForest(bfs, domainSetup, blockForestSetup);
if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
{
WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
return EXIT_SUCCESS;
}
auto blocks =
std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes();
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks() << " on " << blocks->getNumberOfLevels() << " refinement levels")
for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++)
{
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
}
WALBERLA_LOG_INFO_ON_ROOT("Start field allocation")
// Creating fields
const StorageSpecification_T StorageSpec = StorageSpecification_T();
auto allocator = make_shared< gpu::HostFieldAllocator< real_t > >();
const BlockDataID pdfFieldCpuID =
lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
const BlockDataID velFieldCpuID =
field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
const BlockDataID densityFieldCpuID =
field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
const BlockDataID flagFieldID =
field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
const BlockDataID pdfFieldGpuID =
lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
const BlockDataID velFieldGpuID =
gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true);
const BlockDataID densityFieldGpuID =
gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true);
WALBERLA_LOG_INFO_ON_ROOT("Finished field allocation")
const Cell innerOuterSplit =
Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
Vector3< int32_t > gpuBlockSize =
parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0],
gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
for (auto& iBlock : *blocks){
sweepCollection.initialise(&iBlock, cell_idx_c(1));
}
sweepCollection.initialiseBlockPointer();
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Initialisation done")
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto ldc = std::make_shared< LDC >(blocks->getDepth());
const FlagUID fluidFlagUID("Fluid");
ldc->setupBoundaryFlagField(*blocks, flagFieldID);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 0);
BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// COMMUNICATION SCHEME ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
auto communication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI);
auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, pdfFieldGpuID);
communication->addPackInfo(packInfo);
WALBERLA_MPI_BARRIER()
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME STEP DEFINITIONS ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int streamHighPriority = 0;
int streamLowPriority = 0;
WALBERLA_GPU_CHECK(gpuDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority))
sweepCollection.setOuterPriority(streamHighPriority);
auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >
LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
LBMMeshRefinement.addRefinementToTimeLoop(timeLoop, uint_c(0));
// VTK
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
auto finalDomain = blocks->getDomain();
if (vtkWriteFrequency > 0){
auto vtkOutput =
vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step",
false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldCpuID, "vel");
vtkOutput->addCellDataWriter(velWriter);
if (parameters.getParameter< bool >("writeOnlySlice", true)){
const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(blocks->getDepth()),
finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(blocks->getDepth()));
vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
}
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
});
timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BENCHMARK ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
if (remainingTimeLoggerFrequency > 0){
auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
}
lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
fluidCells();
WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells()
<< " fluid cells (in total on all levels)")
WcTimingPool timeloopTiming;
WcTimer simTimer;
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
WALBERLA_MPI_BARRIER()
simTimer.start();
timeLoop.run(timeloopTiming);
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_MPI_BARRIER()
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
double time = simTimer.max();
WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
performance.logResultOnRoot(timesteps, time);
const auto reducedTimeloopTiming = timeloopTiming.getReduced();
WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
WALBERLA_ROOT_SECTION()
{
if (inputIsPython)
{
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
pythonCallbackResults.data().exposeValue("numProcesses", lbm_generated::PerformanceEvaluation< FlagField_T >::processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores());
pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil", infoStencil);
pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
// Call Python function to report results
pythonCallbackResults();
}
}
}
}
return EXIT_SUCCESS;
}
\ No newline at end of file
import sympy as sp
import numpy as np
import pystencils as ps
from pystencils.typing import TypedSymbol
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, SubgridScaleModel
from pystencils_walberla import CodeGeneration, generate_info_header
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free")
compile_time_block_size = False
max_threads = 256
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32),
TypedSymbol("cudaBlockSize2", np.int32))
gpu_indexing_params = {'block_size': sweep_block_size}
info_header = """
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
streaming_pattern = 'esopull'
timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19)
method_enum = Method.CUMULANT
fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
method = create_lb_method(lbm_config=lbm_config)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
boundary_object=NoSlip())
ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
boundary_object=UBB([0.05, 0, 0], data_type=field_type))
generate_lbm_package(ctx, name="NonUniformGridGPU",
collision_rule=collision_rule,
lbm_config=lbm_config, lbm_optimisation=lbm_opt,
nonuniform=True, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
max_threads=max_threads)
infoHeaderParams = {
'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
field_typedefs = {'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
generate_info_header(ctx, 'NonUniformGridGPUInfoHeader',
field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3
import os
import sys
try:
import machinestate as ms
except ImportError:
ms = None
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario:
def __init__(self,
domain_size=(64, 64, 64),
root_blocks=(2, 2, 2),
num_processes=1,
refinement_depth=0,
cells_per_block=(32, 32, 32),
timesteps=101,
gpu_enabled_mpi=False,
vtk_write_frequency=0,
logger_frequency=30,
blockforest_filestem="blockforest",
write_setup_vtk=True,
db_file_name=None):
self.domain_size = domain_size
self.root_blocks = root_blocks
self.cells_per_block = cells_per_block
self.periodic = (0, 0, 1)
self.refinement_depth = refinement_depth
self.num_processes = num_processes
self.bfs_filestem = blockforest_filestem
self.write_setup_vtk = write_setup_vtk
self.timesteps = timesteps
self.gpu_enabled_mpi = gpu_enabled_mpi
self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'domainSize': self.domain_size,
'rootBlocks': self.root_blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'SetupBlockForest': {
'refinementDepth': self.refinement_depth,
'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk,
'outputStatistics': True,
'writeSetupForestAndReturn': True,
},
'Parameters': {
'omega': 1.95,
'timesteps': self.timesteps,
'remainingTimeLoggerFrequency': self.logger_frequency,
'vtkWriteFrequency': self.vtk_write_frequency,
'useVTKAMRWriter': True,
'oneFilePerProcess': False,
'writeOnlySlice': False,
'gpuEnabledMPI': self.gpu_enabled_mpi,
'gpuBlockSize': (128, 1, 1),
},
'Logging': {
'logLevel': "info",
}
}
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs"
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
domain_size = (192, 192, 64)
cells_per_block = (64, 64, 64)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks,
cells_per_block=cells_per_block,
timesteps=0,
vtk_write_frequency=0,
refinement_depth=3,
gpu_enabled_mpi=False)
scenarios.add(scenario)
def weak_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running weak scaling benchmark...")
# This benchmark must run from 16 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if uniform:
factor = 3 * num_proc
name = "uniform"
else:
if num_proc % 16 != 0:
raise RuntimeError("Number of processes must be dividable by 16")
factor = int(num_proc // 16)
name = "nonuniform"
cells_per_block = (WeakX, WeakY, WeakZ)
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
db_file_name=f"weakScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
def strong_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running strong scaling benchmark...")
# This benchmark must run from 64 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if num_proc % 64 != 0:
raise RuntimeError("Number of processes must be dividable by 64")
cells_per_block = (StrongX, StrongY, StrongZ)
if uniform:
domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
name = "uniform"
else:
factor = int(num_proc / 64)
blocks64 = block_decomposition(factor)
cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
name = "nonuniform"
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
db_file_name=f"strongScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
if BENCHMARK == 0:
validation_run()
elif BENCHMARK == 1:
weak_scaling_ldc(1, True, False)
elif BENCHMARK == 2:
strong_scaling_ldc(1, True, False)
else:
print(f"Invalid benchmark case {BENCHMARK}")
waLBerla_link_files_to_builddir("*.prm")
if (WALBERLA_BUILD_WITH_CODEGEN)
if (NOT WALBERLA_BUILD_WITH_GPU_SUPPORT OR (WALBERLA_BUILD_WITH_GPU_SUPPORT AND (CMAKE_CUDA_ARCHITECTURES GREATER_EQUAL 60 OR WALBERLA_BUILD_WITH_HIP)))
waLBerla_add_executable(NAME Percolation FILES Percolation.cpp
DEPENDS walberla::blockforest walberla::core walberla::field walberla::geometry walberla::gpu walberla::lbm walberla::lbm_mesapd_coupling walberla::mesa_pd walberla::sqlite walberla::vtk PSMCodegenPython_trt-smagorinsky_sc1 )
target_compile_definitions(Percolation PRIVATE Weighting=2)
endif ()
endif ()
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file Percolation.cpp
//! \ingroup lbm_mesapd_coupling
//! \author Samuel Kemmler <samuel.kemmler@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/grid_generator/SCIterator.h"
#include "core/logging/all.h"
#include "core/timing/RemainingTimeLogger.h"
#include "field/AddToStorage.h"
#include "field/vtk/all.h"
#include "geometry/InitBoundaryHandling.h"
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/DeviceSelectMPI.h"
#include "gpu/communication/UniformGPUScheme.h"
#include "lbm/PerformanceLogger.h"
#include "lbm/vtk/all.h"
#include "lbm_mesapd_coupling/DataTypesCodegen.h"
#include "lbm_mesapd_coupling/partially_saturated_cells_method/codegen/PSMSweepCollection.h"
#include "lbm_mesapd_coupling/utility/ParticleSelector.h"
#include "mesa_pd/data/DataTypes.h"
#include "mesa_pd/data/ParticleAccessorWithShape.h"
#include "mesa_pd/data/ParticleStorage.h"
#include "mesa_pd/data/ShapeStorage.h"
#include "mesa_pd/data/shape/Sphere.h"
#include "mesa_pd/domain/BlockForestDomain.h"
#include "mesa_pd/mpi/SyncNextNeighbors.h"
#include "mesa_pd/vtk/ParticleVtkOutput.h"
#include "sqlite/SQLite.h"
#include "vtk/all.h"
#include "LBMSweep.h"
#include "PSMPackInfo.h"
#include "PSMSweep.h"
#include "PSM_Density.h"
#include "PSM_InfoHeader.h"
#include "PSM_MacroGetter.h"
namespace percolation
{
///////////
// USING //
///////////
using namespace walberla;
using namespace lbm_mesapd_coupling::psm::gpu;
using PackInfo_T = pystencils::PSMPackInfo;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;
///////////
// FLAGS //
///////////
const FlagUID Fluid_Flag("Fluid");
const FlagUID Density_Flag("Density");
const FlagUID NoSlip_Flag("NoSlip");
const FlagUID Inflow_Flag("Inflow");
//////////
// MAIN //
//////////
//*******************************************************************************************************************
/*!\brief Benchmark of a percolation setup
*
* This code can be used as a percolation (useParticles=true) or as a channel flow (useParticles=false) benchmark.
* A constant inflow from west is applied and a pressure boundary condition is set at the east.
* For the percolation, mono-sized fixed spherical particles are generated on a structured grid with an offset for
* every second particle layer in flow direction to avoid channels in flow direction. The flow is described by Darcy's
* law. For the channel flow, the flow is described by the Hagen–Poiseuille equation.
*
* The domain is either periodic or bounded by (no slip) walls in the vertical directions (y and z).
*
* For the percolation, the PSM is used in combination with a two-way coupling, but no particle dynamics.
* For the channel flow, only the LBM is used.
*
* The parameters can be changed via the input file.
*
*/
//*******************************************************************************************************************
int main(int argc, char** argv)
{
Environment env(argc, argv);
auto cfgFile = env.config();
if (!cfgFile) { WALBERLA_ABORT("Usage: " << argv[0] << " path-to-configuration-file \n"); }
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::selectDeviceBasedOnMpiRank();
#endif
WALBERLA_LOG_INFO_ON_ROOT("waLBerla revision: " << std::string(WALBERLA_GIT_SHA1).substr(0, 8));
WALBERLA_LOG_INFO_ON_ROOT("compiler flags: " << std::string(WALBERLA_COMPILER_FLAGS));
WALBERLA_LOG_INFO_ON_ROOT("build machine: " << std::string(WALBERLA_BUILD_MACHINE));
WALBERLA_LOG_INFO_ON_ROOT(*cfgFile);
// Read config file
Config::BlockHandle numericalSetup = cfgFile->getBlock("NumericalSetup");
const uint_t numXBlocks = numericalSetup.getParameter< uint_t >("numXBlocks");
const uint_t numYBlocks = numericalSetup.getParameter< uint_t >("numYBlocks");
const uint_t numZBlocks = numericalSetup.getParameter< uint_t >("numZBlocks");
WALBERLA_CHECK_EQUAL(numXBlocks * numYBlocks * numZBlocks, uint_t(MPIManager::instance()->numProcesses()),
"When using GPUs, the number of blocks ("
<< numXBlocks * numYBlocks * numZBlocks << ") has to match the number of MPI processes ("
<< uint_t(MPIManager::instance()->numProcesses()) << ")");
const bool periodicInY = numericalSetup.getParameter< bool >("periodicInY");
const bool periodicInZ = numericalSetup.getParameter< bool >("periodicInZ");
const uint_t numXCellsPerBlock = numericalSetup.getParameter< uint_t >("numXCellsPerBlock");
const uint_t numYCellsPerBlock = numericalSetup.getParameter< uint_t >("numYCellsPerBlock");
const uint_t numZCellsPerBlock = numericalSetup.getParameter< uint_t >("numZCellsPerBlock");
const bool sendDirectlyFromGPU = numericalSetup.getParameter< bool >("sendDirectlyFromGPU");
const bool useCommunicationHiding = numericalSetup.getParameter< bool >("useCommunicationHiding");
const Vector3< uint_t > frameWidth = numericalSetup.getParameter< Vector3< uint_t > >("frameWidth");
const uint_t timeSteps = numericalSetup.getParameter< uint_t >("timeSteps");
const bool useParticles = numericalSetup.getParameter< bool >("useParticles");
const real_t particleDiameter = numericalSetup.getParameter< real_t >("particleDiameter");
const real_t particleGenerationSpacing = numericalSetup.getParameter< real_t >("particleGenerationSpacing");
const Vector3< real_t > generationDomainFraction =
numericalSetup.getParameter< Vector3< real_t > >("generationDomainFraction");
const Vector3< uint_t > generationPointOfReferenceOffset =
numericalSetup.getParameter< Vector3< uint_t > >("generationPointOfReferenceOffset");
const bool useParticleOffset = numericalSetup.getParameter< bool >("useParticleOffset");
const Vector3< uint_t > particleSubBlockSize =
numericalSetup.getParameter< Vector3< uint_t > >("particleSubBlockSize");
const real_t uInflow = numericalSetup.getParameter< real_t >("uInflow");
const real_t relaxationRate = numericalSetup.getParameter< real_t >("relaxationRate");
if ((periodicInY && numYBlocks == 1) || (periodicInZ && numZBlocks == 1))
{
WALBERLA_LOG_WARNING_ON_ROOT("Using only 1 block in periodic dimensions can lead to unexpected behavior.")
}
const real_t viscosity = lbm::collision_model::viscosityFromOmega(relaxationRate);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(viscosity)
Config::BlockHandle outputSetup = cfgFile->getBlock("Output");
const uint_t vtkSpacing = outputSetup.getParameter< uint_t >("vtkSpacing");
const std::string vtkFolder = outputSetup.getParameter< std::string >("vtkFolder");
const uint_t performanceLogFrequency = outputSetup.getParameter< uint_t >("performanceLogFrequency");
///////////////////////////
// BLOCK STRUCTURE SETUP //
///////////////////////////
const bool periodicInX = false;
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid(
numXBlocks, numYBlocks, numZBlocks, numXCellsPerBlock, numYCellsPerBlock, numZCellsPerBlock, real_t(1), uint_t(0),
false, false, periodicInX, periodicInY, periodicInZ, // periodicity
false);
auto simulationDomain = blocks->getDomain();
////////////
// MesaPD //
////////////
auto rpdDomain = std::make_shared< mesa_pd::domain::BlockForestDomain >(blocks->getBlockForestPointer());
// Init data structures
auto ps = walberla::make_shared< mesa_pd::data::ParticleStorage >(1);
auto ss = walberla::make_shared< mesa_pd::data::ShapeStorage >();
using ParticleAccessor_T = mesa_pd::data::ParticleAccessorWithShape;
auto accessor = walberla::make_shared< ParticleAccessor_T >(ps, ss);
auto sphereShape = ss->create< mesa_pd::data::Sphere >(particleDiameter * real_t(0.5));
// Create spheres
if (useParticles)
{
// Ensure that generation domain is computed correctly
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.xMin(), real_t(0));
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.yMin(), real_t(0));
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.zMin(), real_t(0));
auto generationDomain = math::AABB::createFromMinMaxCorner(
math::Vector3< real_t >(simulationDomain.xMax() * (real_t(1) - generationDomainFraction[0]) / real_t(2),
simulationDomain.yMax() * (real_t(1) - generationDomainFraction[1]) / real_t(2),
simulationDomain.zMax() * (real_t(1) - generationDomainFraction[2]) / real_t(2)),
math::Vector3< real_t >(simulationDomain.xMax() * (real_t(1) + generationDomainFraction[0]) / real_t(2),
simulationDomain.yMax() * (real_t(1) + generationDomainFraction[1]) / real_t(2),
simulationDomain.zMax() * (real_t(1) + generationDomainFraction[2]) / real_t(2)));
real_t particleOffset = particleGenerationSpacing / real_t(2);
for (auto pt :
grid_generator::SCGrid(generationDomain, generationDomain.center() + generationPointOfReferenceOffset,
particleGenerationSpacing))
{
// Offset every second particle layer in flow direction to avoid channels in flow direction
if (useParticleOffset &&
uint_t(round(math::abs(generationDomain.center()[0] - pt[0]) / (particleGenerationSpacing))) % uint_t(2) !=
uint_t(0))
{
pt = pt + Vector3(real_t(0), particleOffset, particleOffset);
}
if (rpdDomain->isContainedInProcessSubdomain(uint_c(mpi::MPIManager::instance()->rank()), pt))
{
mesa_pd::data::Particle&& p = *ps->create();
p.setPosition(pt);
p.setInteractionRadius(particleDiameter * real_t(0.5));
p.setOwner(mpi::MPIManager::instance()->rank());
p.setShapeID(sphereShape);
p.setType(0);
}
}
}
///////////////////////
// ADD DATA TO BLOCKS //
////////////////////////
// Setting initial PDFs to nan helps to detect bugs in the initialization/BC handling
// Depending on WALBERLA_BUILD_WITH_GPU_SUPPORT, pdfFieldCPUGPUID is either a CPU or a CPU field
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
BlockDataID pdfFieldID =
field::addToStorage< PdfField_T >(blocks, "pdf field (fzyx)", real_c(std::nan("")), field::fzyx);
BlockDataID BFieldID = field::addToStorage< BField_T >(blocks, "B field", 0, field::fzyx, 1);
BlockDataID pdfFieldCPUGPUID = gpu::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldID, "pdf field GPU");
#else
BlockDataID pdfFieldCPUGPUID =
field::addToStorage< PdfField_T >(blocks, "pdf field CPU", real_c(std::nan("")), field::fzyx);
#endif
BlockDataID densityFieldID = field::addToStorage< DensityField_T >(blocks, "density field", real_t(0), field::fzyx);
BlockDataID velFieldID = field::addToStorage< VelocityField_T >(blocks, "velocity field", real_t(0), field::fzyx);
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
// Synchronize particles between the blocks for the correct mapping of ghost particles
mesa_pd::mpi::SyncNextNeighbors syncNextNeighborFunc;
syncNextNeighborFunc(*ps, *rpdDomain);
// Assemble boundary block string
std::string boundariesBlockString = " Boundaries"
"{"
"Border { direction W; walldistance -1; flag Inflow; }"
"Border { direction E; walldistance -1; flag Density; }";
if (!periodicInY)
{
boundariesBlockString += "Border { direction S; walldistance -1; flag NoSlip; }"
"Border { direction N; walldistance -1; flag NoSlip; }";
}
if (!periodicInZ)
{
boundariesBlockString += "Border { direction T; walldistance -1; flag NoSlip; }"
"Border { direction B; walldistance -1; flag NoSlip; }";
}
boundariesBlockString += "}";
WALBERLA_ROOT_SECTION()
{
std::ofstream boundariesFile("boundaries.prm");
boundariesFile << boundariesBlockString;
boundariesFile.close();
}
WALBERLA_MPI_BARRIER()
auto boundariesCfgFile = Config();
boundariesCfgFile.readParameterFile("boundaries.prm");
auto boundariesConfig = boundariesCfgFile.getBlock("Boundaries");
// map boundaries into the LBM simulation
geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, Fluid_Flag);
lbm::PSM_Density density_bc(blocks, pdfFieldCPUGPUID, real_t(1.0));
density_bc.fillFromFlagField< FlagField_T >(blocks, flagFieldID, Density_Flag, Fluid_Flag);
lbm::PSM_NoSlip noSlip(blocks, pdfFieldCPUGPUID);
noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, NoSlip_Flag, Fluid_Flag);
lbm::PSM_UBB ubb(blocks, pdfFieldCPUGPUID, uInflow, real_t(0), real_t(0));
ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldID, Inflow_Flag, Fluid_Flag);
///////////////
// TIME LOOP //
///////////////
// Map particles into the fluid domain
ParticleAndVolumeFractionSoA_T< Weighting > particleAndVolumeFractionSoA(blocks, relaxationRate);
PSMSweepCollection psmSweepCollection(blocks, accessor, lbm_mesapd_coupling::RegularParticlesSelector(),
particleAndVolumeFractionSoA, particleSubBlockSize);
if (useParticles)
{
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
psmSweepCollection.particleMappingSweep(&(*blockIt));
}
}
// Initialize PDFs
pystencils::InitializeDomainForPSM pdfSetter(
particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleVelocitiesFieldID, pdfFieldCPUGPUID, real_t(0), real_t(0), real_t(0),
real_t(1.0), real_t(0), real_t(0), real_t(0));
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
// pdfSetter requires particle velocities at cell centers
if (useParticles) { psmSweepCollection.setParticleVelocitiesSweep(&(*blockIt)); }
pdfSetter(&(*blockIt));
}
// Setup of the LBM communication for synchronizing the pdf field between neighboring blocks
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::communication::UniformGPUScheme< Stencil_T > com(blocks, sendDirectlyFromGPU, false);
#else
walberla::blockforest::communication::UniformBufferedScheme< Stencil_T > com(blocks);
#endif
com.addPackInfo(make_shared< PackInfo_T >(pdfFieldCPUGPUID));
auto communication = std::function< void() >([&]() { com.communicate(); });
SweepTimeloop commTimeloop(blocks->getBlockStorage(), timeSteps);
SweepTimeloop timeloop(blocks->getBlockStorage(), timeSteps);
timeloop.addFuncBeforeTimeStep(RemainingTimeLogger(timeloop.getNrOfTimeSteps()), "Remaining Time Logger");
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
pystencils::PSM_MacroGetter getterSweep(BFieldID, densityFieldID, pdfFieldID, velFieldID, real_t(0.0), real_t(0.0),
real_t(0.0));
#else
pystencils::PSM_MacroGetter getterSweep(particleAndVolumeFractionSoA.BFieldID, densityFieldID, pdfFieldCPUGPUID,
velFieldID, real_t(0.0), real_t(0.0), real_t(0.0));
#endif
// VTK output
if (vtkSpacing != uint_t(0))
{
// Spheres
auto particleVtkOutput = make_shared< mesa_pd::vtk::ParticleVtkOutput >(ps);
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleUid >("uid");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleLinearVelocity >("velocity");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleInteractionRadius >("radius");
// Limit output to process-local spheres
particleVtkOutput->setParticleSelector([sphereShape](const mesa_pd::data::ParticleStorage::iterator& pIt) {
return pIt->getShapeID() == sphereShape &&
!(mesa_pd::data::particle_flags::isSet(pIt->getFlags(), mesa_pd::data::particle_flags::GHOST));
});
auto particleVtkWriter = vtk::createVTKOutput_PointData(particleVtkOutput, "particles", vtkSpacing, vtkFolder);
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(particleVtkWriter), "VTK (sphere data)");
// Fields
auto pdfFieldVTK = vtk::createVTKOutput_BlockData(blocks, "fluid", vtkSpacing, 0, false, vtkFolder);
pdfFieldVTK->addBeforeFunction(communication);
pdfFieldVTK->addBeforeFunction([&]() {
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::fieldCpy< PdfField_T, gpu::GPUField< real_t > >(blocks, pdfFieldID, pdfFieldCPUGPUID);
gpu::fieldCpy< GhostLayerField< real_t, 1 >, BFieldGPU_T >(blocks, BFieldID,
particleAndVolumeFractionSoA.BFieldID);
#endif
for (auto& block : *blocks)
getterSweep(&block);
});
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "Velocity"));
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< DensityField_T > >(densityFieldID, "Density"));
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< BField_T > >(BFieldID, "OverlapFraction"));
#else
pdfFieldVTK->addCellDataWriter(
make_shared< field::VTKWriter< BField_T > >(particleAndVolumeFractionSoA.BFieldID, "OverlapFraction"));
#endif
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< FlagField_T > >(flagFieldID, "FlagField"));
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(pdfFieldVTK), "VTK (fluid field data)");
}
if (vtkSpacing != uint_t(0)) { vtk::writeDomainDecomposition(blocks, "domain_decomposition", vtkFolder); }
// Add performance logging
lbm::PerformanceLogger< FlagField_T > performanceLogger(blocks, flagFieldID, Fluid_Flag, performanceLogFrequency);
if (performanceLogFrequency > 0)
{
timeloop.addFuncAfterTimeStep(performanceLogger, "Evaluate performance logging");
}
// Add LBM communication function and boundary handling sweep
if (useCommunicationHiding)
{
timeloop.add() << Sweep(deviceSyncWrapper(density_bc.getSweep()), "Boundary Handling (Density)");
}
else
{
timeloop.add() << BeforeFunction(communication, "LBM Communication")
<< Sweep(deviceSyncWrapper(density_bc.getSweep()), "Boundary Handling (Density)");
}
timeloop.add() << Sweep(deviceSyncWrapper(ubb.getSweep()), "Boundary Handling (UBB)");
if (!periodicInY || !periodicInZ)
{
timeloop.add() << Sweep(deviceSyncWrapper(noSlip.getSweep()), "Boundary Handling (NoSlip)");
}
// PSM kernel
pystencils::PSMSweep PSMSweep(particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleForcesFieldID,
particleAndVolumeFractionSoA.particleVelocitiesFieldID, pdfFieldCPUGPUID, real_t(0.0),
real_t(0.0), real_t(0.0), relaxationRate);
pystencils::PSMSweepSplit PSMSplitSweep(
particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleForcesFieldID, particleAndVolumeFractionSoA.particleVelocitiesFieldID,
pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate, frameWidth);
pystencils::LBMSweep LBMSweep(pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate);
pystencils::LBMSplitSweep LBMSplitSweep(pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate,
frameWidth);
if (useParticles)
{
if (useCommunicationHiding)
{
addPSMSweepsToTimeloops(commTimeloop, timeloop, com, psmSweepCollection, PSMSplitSweep);
}
else { addPSMSweepsToTimeloop(timeloop, psmSweepCollection, PSMSweep); }
}
else
{
if (useCommunicationHiding)
{
commTimeloop.add() << BeforeFunction([&]() { com.startCommunication(); }, "LBM Communication (start)")
<< Sweep(deviceSyncWrapper(LBMSplitSweep.getInnerSweep()), "LBM inner sweep")
<< AfterFunction([&]() { com.wait(); }, "LBM Communication (wait)");
timeloop.add() << Sweep(deviceSyncWrapper(LBMSplitSweep.getOuterSweep()), "LBM outer sweep");
}
else { timeloop.add() << Sweep(deviceSyncWrapper(LBMSweep), "LBM sweep"); }
}
WcTimingPool timeloopTiming;
// TODO: maybe add warmup phase
for (uint_t timeStep = 0; timeStep < timeSteps; ++timeStep)
{
if (useCommunicationHiding) { commTimeloop.singleStep(timeloopTiming); }
timeloop.singleStep(timeloopTiming);
}
timeloopTiming.logResultOnRoot();
auto timeloopTimingReduced = timeloopTiming.getReduced();
// Write parameters and performance results in sqlite database
WALBERLA_ROOT_SECTION()
{
// Use DB_FILE environment variable if set
std::string dbFile;
if (std::getenv("DB_FILE") != nullptr) { dbFile = std::getenv("DB_FILE"); }
else
{
if (useParticles) { dbFile = "percolation_benchmark.sqlite3"; }
else { dbFile = "channel_flow_benchmark.sqlite3"; }
}
std::map< std::string, int > integerProperties;
std::map< std::string, double > realProperties;
std::map< std::string, std::string > stringProperties;
integerProperties["numXBlocks"] = int(numXBlocks);
integerProperties["numYBlocks"] = int(numYBlocks);
integerProperties["numZBlocks"] = int(numZBlocks);
integerProperties["numXCellsPerBlock"] = int(numXCellsPerBlock);
integerProperties["numYCellsPerBlock"] = int(numYCellsPerBlock);
integerProperties["numZCellsPerBlock"] = int(numZCellsPerBlock);
integerProperties["timeSteps"] = int(timeSteps);
integerProperties["sendDirectlyFromGPU"] = int(sendDirectlyFromGPU);
integerProperties["useCommunicationHiding"] = int(useCommunicationHiding);
integerProperties["communicationHidingXWidth"] = int(frameWidth[0]);
integerProperties["communicationHidingYWidth"] = int(frameWidth[1]);
integerProperties["communicationHidingZWidth"] = int(frameWidth[2]);
integerProperties["useParticles"] = int(useParticles);
integerProperties["numParticles"] = int(ps->size());
integerProperties["particleSubBlockXSize"] = int(particleSubBlockSize[0]);
integerProperties["particleSubBlockYSize"] = int(particleSubBlockSize[1]);
integerProperties["particleSubBlockZSize"] = int(particleSubBlockSize[2]);
realProperties["particleDiameter"] = double(particleDiameter);
realProperties["particleGenerationSpacing"] = double(particleGenerationSpacing);
performanceLogger.getBestResultsForSQLOnRoot(integerProperties, realProperties, stringProperties);
auto runId = sqlite::storeRunInSqliteDB(dbFile, integerProperties, stringProperties, realProperties);
sqlite::storeTimingPoolInSqliteDB(dbFile, runId, *timeloopTimingReduced, "Timeloop");
}
return EXIT_SUCCESS;
}
} // namespace percolation
int main(int argc, char** argv) { percolation::main(argc, argv); }
NumericalSetup
{
// product of number of blocks should be equal to number of used processes
numXBlocks 1;
numYBlocks 1;
numZBlocks 1;
periodicInY false;
periodicInZ false;
numXCellsPerBlock 256;
numYCellsPerBlock 128;
numZCellsPerBlock 128;
timeSteps 100;
sendDirectlyFromGPU false; // use GPU-GPU communication
useCommunicationHiding false;
frameWidth <1, 1, 1>; // width of the outer region if splitting the LBM/PSM into inner and outer (only used if useCommunicationHiding is true)
// particle distribution in LBM units
useParticles true; // if true, PSM/particle mapping/velocity computation/hydrodynamic force reduction is used, else LBM is used
particleDiameter 20.0;
particleGenerationSpacing 21.0;
generationDomainFraction <0.8, 1.0, 1.0>; // fraction of the domain where particles are generated
generationPointOfReferenceOffset <0, 0, 0>; // offset of point of reference from domain center, see SCIterator.h
useParticleOffset true; // offset every second particle layer in flow direction
particleSubBlockSize <8, 8, 8>;
// fluid quantities in LBM units
uInflow 0.00008;
relaxationRate 0.9;
}
Output
{
vtkSpacing 0;
vtkFolder vtk_out;
performanceLogFrequency 100;
}
waLBerla_link_files_to_builddir(*.prm)
waLBerla_link_files_to_builddir(*.py)
waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGen
FILE multiphase_codegen.py
OUT_FILES initialize_phase_field_distributions.${CODEGEN_FILE_SUFFIX} initialize_phase_field_distributions.h
initialize_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} initialize_velocity_based_distributions.h
phase_field_LB_step.${CODEGEN_FILE_SUFFIX} phase_field_LB_step.h
hydro_LB_step.${CODEGEN_FILE_SUFFIX} hydro_LB_step.h
PackInfo_phase_field_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field_distributions.h
PackInfo_phase_field.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field.h
PackInfo_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_velocity_based_distributions.h
GenDefines.h)
if (WALBERLA_BUILD_WITH_GPU_SUPPORT )
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
DEPENDS walberla::blockforest walberla::core walberla::gpu walberla::field walberla::postprocessing walberla::python_coupling walberla::lbm_generated walberla::geometry walberla::timeloop BenchmarkPhaseFieldCodeGen )
else ()
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
DEPENDS walberla::blockforest walberla::core walberla::field walberla::postprocessing walberla::python_coupling walberla::lbm_generated walberla::geometry walberla::timeloop BenchmarkPhaseFieldCodeGen )
endif (WALBERLA_BUILD_WITH_GPU_SUPPORT )