From 29e89b3ac443a44549e1ff1f1daba389ca5a84fd Mon Sep 17 00:00:00 2001 From: Christoph Rettinger <christoph.rettinger@fau.de> Date: Tue, 19 Jun 2018 13:20:49 +0200 Subject: [PATCH] Added benchmarks for AMR and LB functionalities of coupling --- .../AMRSedimentSettling.cpp | 2249 +++++++++++++++++ .../CMakeLists.txt | 5 + .../WorkloadEvaluation.cpp | 985 ++++++++ apps/benchmarks/CMakeLists.txt | 1 + 4 files changed, 3240 insertions(+) create mode 100644 apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/AMRSedimentSettling.cpp create mode 100644 apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt create mode 100644 apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/WorkloadEvaluation.cpp diff --git a/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/AMRSedimentSettling.cpp b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/AMRSedimentSettling.cpp new file mode 100644 index 000000000..988079c48 --- /dev/null +++ b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/AMRSedimentSettling.cpp @@ -0,0 +1,2249 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file AMRSedimentSettling.cpp +//! \ingroup pe_coupling +//! \author Christoph Rettinger <christoph.rettinger@fau.de> +// +//====================================================================================================================== + +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformBufferedScheme.h" +#include "blockforest/loadbalancing/all.h" +#include "blockforest/AABBRefinementSelection.h" + +#include "boundary/all.h" + +#include "core/DataTypes.h" +#include "core/Environment.h" +#include "core/SharedFunctor.h" +#include "core/debug/Debug.h" +#include "core/debug/TestSubsystem.h" +#include "core/math/all.h" +#include "core/timing/RemainingTimeLogger.h" +#include "core/mpi/Broadcast.h" + +#include "domain_decomposition/SharedSweep.h" +#include "domain_decomposition/BlockSweepWrapper.h" + +#include "field/AddToStorage.h" +#include "field/StabilityChecker.h" +#include "field/communication/PackInfo.h" + +#include "lbm/boundary/all.h" +#include "lbm/communication/PdfFieldPackInfo.h" +#include "lbm/field/AddToStorage.h" +#include "lbm/field/PdfField.h" +#include "lbm/field/VelocityFieldWriter.h" +#include "lbm/lattice_model/D3Q19.h" +#include "lbm/refinement/all.h" +#include "lbm/sweeps/CellwiseSweep.h" +#include "lbm/sweeps/SweepWrappers.h" + +#include "pe/basic.h" +#include "pe/Types.h" +#include "pe/fcd/GJKEPACollideFunctor.h" +#include "pe/vtk/BodyVtkOutput.h" +#include "pe/vtk/SphereVtkOutput.h" +#include "pe/vtk/EllipsoidVtkOutput.h" +#include "pe/cr/ICR.h" +#include "pe/synchronization/ClearSynchronization.h" +#include "pe/amr/InfoCollection.h" +#include "pe/amr/weight_assignment/WeightAssignmentFunctor.h" +#include "pe/amr/weight_assignment/MetisAssignmentFunctor.h" + +#include "pe_coupling/amr/all.h" +#include "pe_coupling/mapping/all.h" +#include "pe_coupling/momentum_exchange_method/all.h" +#include "pe_coupling/utility/all.h" + +#include "timeloop/SweepTimeloop.h" + +#include "vtk/all.h" +#include "field/vtk/all.h" +#include "lbm/vtk/all.h" + +namespace amr_sediment_settling +{ + +/////////// +// USING // +/////////// + +using namespace walberla; +using walberla::uint_t; + +////////////// +// TYPEDEFS // +////////////// + +// PDF field, flag field & body field +typedef lbm::D3Q19< lbm::collision_model::TRT, false> LatticeModel_T; +typedef LatticeModel_T::Stencil Stencil_T; +typedef lbm::PdfField< LatticeModel_T > PdfField_T; + +typedef walberla::uint8_t flag_t; +typedef FlagField< flag_t > FlagField_T; +typedef GhostLayerField< pe::BodyID, 1 > BodyField_T; +typedef GhostLayerField< Vector3<real_t>, 1 > VelocityField_T; + +const uint_t FieldGhostLayers = 4; + +// boundary handling +typedef lbm::NoSlip< LatticeModel_T, flag_t > NoSlip_T; + +typedef pe_coupling::CurvedLinear< LatticeModel_T, FlagField_T > MO_T; + +typedef boost::tuples::tuple< NoSlip_T, MO_T > BoundaryConditions_T; +typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T; + +typedef boost::tuple<pe::Sphere, pe::Ellipsoid, pe::Plane> BodyTypeTuple; + +/////////// +// FLAGS // +/////////// + +const FlagUID Fluid_Flag( "fluid" ); +const FlagUID NoSlip_Flag( "no slip" ); +const FlagUID MO_Flag( "moving obstacle" ); +const FlagUID FormerMO_Flag( "former moving obstacle" ); + + +////////////////////////////////////// +// DYNAMIC REFINEMENT FUNCTIONALITY // +////////////////////////////////////// + + +/* + * Refinement check based on gradient magnitude + * If gradient magnitude is below lowerLimit in all cells of a block, that block could be coarsened. + * If the gradient value is above the upperLimit for at least one cell, that block gets marked for refinement. + * Else, the block remains on the current level. + */ +template< typename LatticeModel_T, typename Filter_T > +class VectorGradientRefinement +{ +public: + typedef GhostLayerField< Vector3<real_t>, 1 > VectorField_T; + typedef typename LatticeModel_T::Stencil Stencil_T; + + VectorGradientRefinement( const ConstBlockDataID & fieldID, const Filter_T & filter, + const real_t upperLimit, const real_t lowerLimit, const uint_t maxLevel ) : + fieldID_( fieldID ), filter_( filter ), + upperLimit_( upperLimit ), lowerLimit_( lowerLimit ), maxLevel_( maxLevel ) + {} + + void operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels, + std::vector< const Block * > & blocksAlreadyMarkedForRefinement, + const BlockForest & forest ); + +private: + + ConstBlockDataID fieldID_; + + Filter_T filter_; + + real_t upperLimit_; + real_t lowerLimit_; + + uint_t maxLevel_; + +}; // class GradientRefinement + +template< typename LatticeModel_T, typename Filter_T > +void VectorGradientRefinement< LatticeModel_T, Filter_T >::operator()( std::vector< std::pair< const Block *, uint_t > > & minTargetLevels, + std::vector< const Block * > &, const BlockForest & ) +{ + for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it ) + { + const Block * const block = it->first; + + const uint_t currentLevelOfBlock = block->getLevel(); + + const VectorField_T * uField = block->template getData< VectorField_T >( fieldID_ ); + + if( uField == NULL ) + { + it->second = uint_t(0); + continue; + } + + Matrix3<real_t> uGradient( real_t(0) ); + + bool refine( false ); + bool coarsen( true ); + + filter_( *block ); + + WALBERLA_FOR_ALL_CELLS_XYZ( uField, + + std::vector< Vector3<real_t> > uValues( Stencil_T::Size, Vector3<real_t>(real_t(0)) ); + + Vector3<real_t> uInCenterCell = uField->get( x,y,z ); + + for( auto dir = Stencil_T::beginNoCenter(); dir != Stencil_T::end(); ++dir) + { + // check if boundary treatment is necessary + if( filter_( x+dir.cx(),y+dir.cy(),z+dir.cz() ) ) + { + // copy from center cell + uValues[ *dir ] = uInCenterCell; + } else { + uValues[ *dir ] = uField->get( x+dir.cx(),y+dir.cy(),z+dir.cz() ); + } + } + + // obtain the matrix grad(u) with the help of the gradient formula from + // See: Ramadugu et al - Lattice differential operators for computational physics (2013) + // with T = c_s**2 + const real_t inv_c_s_sqr = real_t(3); + uGradient = real_t(0); + for( auto dir = Stencil_T::beginNoCenter(); dir != Stencil_T::end(); ++dir) + { + real_t cx = real_c(dir.cx()); + real_t cy = real_c(dir.cy()); + real_t cz = real_c(dir.cz()); + + // grad(ux) + real_t ux = uValues[ *dir ][0]; + uGradient[ 0 ] += LatticeModel_T::w[ dir.toIdx() ] * cx * ux; + uGradient[ 3 ] += LatticeModel_T::w[ dir.toIdx() ] * cy * ux; + uGradient[ 6 ] += LatticeModel_T::w[ dir.toIdx() ] * cz * ux; + + // grad(uy) + real_t uy = uValues[ *dir ][1]; + uGradient[ 1 ] += LatticeModel_T::w[ dir.toIdx() ] * cx * uy; + uGradient[ 4 ] += LatticeModel_T::w[ dir.toIdx() ] * cy * uy; + uGradient[ 7 ] += LatticeModel_T::w[ dir.toIdx() ] * cz * uy; + + // grad(uz) + real_t uz = uValues[ *dir ][2]; + uGradient[ 2 ] += LatticeModel_T::w[ dir.toIdx() ] * cx * uz; + uGradient[ 5 ] += LatticeModel_T::w[ dir.toIdx() ] * cy * uz; + uGradient[ 8 ] += LatticeModel_T::w[ dir.toIdx() ] * cz * uz; + + } + uGradient *= inv_c_s_sqr; + + real_t norm( real_t(0) ); + //compute maximums norm of 3x3 matrix + for( uint_t i = uint_t(0); i < uint_t(3*3); ++i ) + norm = std::max(norm, std::fabs(uGradient[i])); + + if( norm > lowerLimit_ ) + { + coarsen = false; + if( norm > upperLimit_ ) + refine = true; + } + + ) + + if( refine && currentLevelOfBlock < maxLevel_ ) + { + WALBERLA_ASSERT( !coarsen ); + it->second = currentLevelOfBlock + uint_t(1); + } + if( coarsen && currentLevelOfBlock > uint_t(0) ) + { + WALBERLA_ASSERT( !refine ); + it->second = currentLevelOfBlock - uint_t(1); + } + } +} + + +// Load estimators for spheres and ellipsoids, obtained at SuperMUC Phase 2 +// See Sec. 3 in the paper for more infos + +///////////// +// Spheres // +///////////// +real_t fittedLBMWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Ce = blockInfo.numberOfCells; + uint_t F = blockInfo.numberOfFluidCells; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t weight = real_t(9.990957667738165e-06) * real_c(Ce) + real_t(0.00015749920523711047) * real_c(F) + real_t(-0.08232498905584973); + return weight; +} + +real_t fittedBHWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Ce = blockInfo.numberOfCells; + uint_t NB = blockInfo.numberOfNearBoundaryCells; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t weight = real_t(6.654810986939097e-06) * real_c(Ce) + real_t(0.0007061414693533274) * real_c(NB) + real_t(-0.1094292992294259); + return weight; +} + +real_t fittedCoupling1WeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Ce = blockInfo.numberOfCells; + uint_t F = blockInfo.numberOfFluidCells; + uint_t Pl = blockInfo.numberOfLocalBodies; + uint_t Ps = blockInfo.numberOfShadowBodies; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t weight = real_t(3.07542641675429e-06) * real_c(Ce) + real_t(2.419364600880769e-07) * real_c(F) + real_t(0.01413718259604757) * real_c(Pl) + real_t(0.027761707343462727) * real_c(Ps) + real_t(-0.13991481483939272); + return weight; +} + +real_t fittedCoupling2WeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Ce = blockInfo.numberOfCells; + uint_t F = blockInfo.numberOfFluidCells; + uint_t Pl = blockInfo.numberOfLocalBodies; + uint_t Ps = blockInfo.numberOfShadowBodies; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t weight = real_t(5.988401232749505e-06) * real_c(Ce) + real_t(3.903532223977357e-06) * real_c(F) + real_t(-0.008802674250816316) * real_c(Pl) + real_t(0.02505020738346139) * real_c(Ps) + real_t(-0.12970723676003335); + return weight; +} + +real_t fittedPEWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Pl = blockInfo.numberOfLocalBodies; + uint_t Ps = blockInfo.numberOfShadowBodies; + uint_t Ct = blockInfo.numberOfContacts; + uint_t Sc = blockInfo.numberOfPeSubCycles; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t cPlPs2 = real_t(1.1562854544700417e-06); + real_t cPl = real_t(0.0009620525068318354); + real_t cPs = real_t(0.00027549401081063894); + real_t cCt = real_t(0.0014801932788115464); + real_t c = real_t(0.01883682418448259); + real_t weight = real_c(Sc) * ( cPlPs2 * real_c(Pl+Ps) * real_c(Pl+Ps) + cPl * real_c(Pl) + cPs * real_c(Ps) + cCt * real_c(Ct) + c ); + return weight; +} + +real_t fittedTotalWeightEvaluationFunctionSpheres(const pe_coupling::BlockInfo& blockInfo) +{ + return fittedLBMWeightEvaluationFunctionSpheres(blockInfo) + fittedBHWeightEvaluationFunctionSpheres(blockInfo) + + fittedCoupling1WeightEvaluationFunctionSpheres(blockInfo) + fittedCoupling2WeightEvaluationFunctionSpheres(blockInfo) + + fittedPEWeightEvaluationFunctionSpheres(blockInfo); +} + +//////////////// +// Ellipsoids // +//////////////// +real_t fittedLBMWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Ce = blockInfo.numberOfCells; + uint_t F = blockInfo.numberOfFluidCells; + uint_t NB = blockInfo.numberOfNearBoundaryCells; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t cCe = real_t(4.69973868717e-05); + real_t cF = real_t(0.000110568537442); + real_t weight = cCe * real_c(Ce) + cF * real_c(F); + if( NB > uint_t(0) ) weight += real_t(5.96551488486e-05) * real_c(Ce) + real_t(-5.75351782026e-05) * real_c(F) + real_t(0.000695800745231) * real_c(NB); + return weight; +} + +real_t fittedCouplingWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Ce = blockInfo.numberOfCells; + uint_t F = blockInfo.numberOfFluidCells; + uint_t Pl = blockInfo.numberOfLocalBodies; + uint_t Ps = blockInfo.numberOfShadowBodies; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t cCe = real_t(0.000176674935526); + real_t cF = real_t(-0.000170513513027); + real_t cPl = real_t(0.0252031634776); + real_t cPs = real_t(0.0356835220918); + real_t weight = real_t(0); + if( (Pl + Ps ) > uint_t(0) ) weight = cCe * real_c(Ce) + cF * real_c(F) + cPl * real_c(Pl) + cPs * real_c(Ps); + return weight; +} + +real_t fittedPEWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo) +{ + uint_t Pl = blockInfo.numberOfLocalBodies; + uint_t Ps = blockInfo.numberOfShadowBodies; + uint_t Ct = blockInfo.numberOfContacts; + uint_t Sc = blockInfo.numberOfPeSubCycles; + + // from fits with optimized D3Q19 LBM kernel (no forces) + real_t cPlPs2 = real_t(8.24153555785e-06); + real_t cPl = real_t(0.00135966650494); + real_t cPs = real_t(0.00440464092538); + real_t cCt = real_t(0.0216278259881); + real_t weight = real_c(Sc) * ( cPlPs2 * real_c(Pl+Ps) * real_c(Pl+Ps) + cPl * real_c(Pl) + cPs * real_c(Ps) + cCt * real_c(Ct) ); + return weight; +} + +real_t fittedTotalWeightEvaluationFunctionEllipsoids(const pe_coupling::BlockInfo& blockInfo) +{ + return fittedLBMWeightEvaluationFunctionEllipsoids(blockInfo) + + fittedCouplingWeightEvaluationFunctionEllipsoids(blockInfo) + + fittedPEWeightEvaluationFunctionEllipsoids(blockInfo); +} + + +struct TimingPoolLogger +{ + TimingPoolLogger( WcTimingPool & timingPool, const SweepTimeloop & timeloop, const uint_t interval ) + : timingPool_( timingPool ), timeloop_( timeloop ), interval_( interval ) + { + } + + void operator()() + { + if( interval_ > uint_t(0) && timeloop_.getCurrentTimeStep() % interval_ == uint_t(0) ) + { + timingPool_.logResultOnRoot(); + } + } + +private: + WcTimingPool & timingPool_; + const SweepTimeloop & timeloop_; + uint_t interval_; +}; + +struct TimingTreeLogger +{ + TimingTreeLogger( WcTimingTree & timingTree, const SweepTimeloop & timeloop, const uint_t interval ) + : timingTree_( timingTree ), timeloop_( timeloop ), interval_( interval ) + { + } + + void operator()() + { + if( interval_ > uint_t(0) && timeloop_.getCurrentTimeStep() % interval_ == uint_t(0) ) + { + timingTree_.synchronize(); + auto reducedTimingTree = timingTree_.getReduced(); + WALBERLA_LOG_INFO_ON_ROOT( reducedTimingTree ); + } + } + +private: + WcTimingTree & timingTree_; + const SweepTimeloop & timeloop_; + uint_t interval_; +}; + +///////////////////// +// BLOCK STRUCTURE // +///////////////////// + +static void refinementSelection( SetupBlockForest& forest, uint_t levels, const AABB & refinementBox ) +{ + real_t dx = real_t(1); // dx on finest level + for( auto block = forest.begin(); block != forest.end(); ++block ) + { + uint_t blockLevel = block->getLevel(); + uint_t levelScalingFactor = ( uint_t(1) << (levels - uint_t(1) - blockLevel) ); + real_t dxOnLevel = dx * real_c(levelScalingFactor); + AABB blockAABB = block->getAABB(); + + // extend block AABB by ghostlayers + AABB extendedBlockAABB = blockAABB.getExtended( dxOnLevel * real_c(FieldGhostLayers) ); + + if( extendedBlockAABB.intersects( refinementBox ) ) + if( blockLevel < ( levels - uint_t(1) ) ) + block->setMarker( true ); + } +} + +static void workloadAndMemoryAssignment( SetupBlockForest& forest ) +{ + for( auto block = forest.begin(); block != forest.end(); ++block ) + { + block->setWorkload( numeric_cast< workload_t >( uint_t(1) << block->getLevel() ) ); + block->setMemory( numeric_cast< memory_t >(1) ); + } +} + +static shared_ptr< StructuredBlockForest > createBlockStructure( const AABB & domainAABB, Vector3<uint_t> blockSizeInCells, + uint_t numberOfLevels, const AABB & refinementBox, + bool useBox, std::string loadDistributionStrategy, + bool keepGlobalBlockInformation = false ) +{ + SetupBlockForest sforest; + + Vector3<uint_t> numberOfFineBlocksPerDirection( uint_c(domainAABB.size(0)) / blockSizeInCells[0], + uint_c(domainAABB.size(1)) / blockSizeInCells[1], + uint_c(domainAABB.size(2)) / blockSizeInCells[2] ); + + for(uint_t i = 0; i < 3; ++i ) + { + WALBERLA_CHECK_EQUAL( numberOfFineBlocksPerDirection[i] * blockSizeInCells[i], uint_c(domainAABB.size(i)), + "Domain can not be decomposed in direction " << i << " into fine blocks of size " << blockSizeInCells[i] ); + } + + uint_t levelScalingFactor = ( uint_t(1) << ( numberOfLevels - uint_t(1) ) ); + Vector3<uint_t> numberOfCoarseBlocksPerDirection( numberOfFineBlocksPerDirection / levelScalingFactor ); + + for(uint_t i = 0; i < 3; ++i ) + { + WALBERLA_CHECK_EQUAL(numberOfCoarseBlocksPerDirection[i] * levelScalingFactor, numberOfFineBlocksPerDirection[i], + "Domain can not be refined in direction " << i << " according to the specified number of levels!" ); + } + + WALBERLA_LOG_INFO_ON_ROOT(" - refinement box: " << refinementBox); + + MPIManager::instance()->useWorldComm(); + + sforest.addRefinementSelectionFunction( std::bind( refinementSelection, std::placeholders::_1, numberOfLevels, refinementBox ) ); + sforest.addWorkloadMemorySUIDAssignmentFunction( workloadAndMemoryAssignment ); + + Vector3<bool> periodicity( true, true, false); + if( useBox ) + { + periodicity[0] = false; + periodicity[1] = false; + } + sforest.init( domainAABB, + numberOfCoarseBlocksPerDirection[0], numberOfCoarseBlocksPerDirection[1], numberOfCoarseBlocksPerDirection[2], + periodicity[0], periodicity[1], periodicity[2]); + + // calculate process distribution + const memory_t memoryLimit = math::Limits< memory_t >::inf(); + + if( loadDistributionStrategy == "Hilbert" ) + { + bool useHilbert = true; + sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(useHilbert), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true ); + } else if ( loadDistributionStrategy == "Morton" ) + { + bool useHilbert = false; + sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(useHilbert), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true ); + } else if ( loadDistributionStrategy == "ParMetis" ) + { + blockforest::StaticLevelwiseParMetis::Algorithm algorithm = blockforest::StaticLevelwiseParMetis::Algorithm::PARMETIS_PART_GEOM_KWAY; + blockforest::StaticLevelwiseParMetis staticParMetis(algorithm); + sforest.balanceLoad( staticParMetis, uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true ); + } else if (loadDistributionStrategy == "Diffusive" ) + { + // also use Hilbert curve here + bool useHilbert = true; + sforest.balanceLoad( blockforest::StaticLevelwiseCurveBalance(useHilbert), uint_c( MPIManager::instance()->numProcesses() ), real_t(0), memoryLimit, true ); + } else + { + WALBERLA_ABORT("Load distribution strategy \"" << loadDistributionStrategy << "\t not implemented! - Aborting" ); + } + + WALBERLA_LOG_INFO_ON_ROOT( sforest ); + + + // create StructuredBlockForest (encapsulates a newly created BlockForest) + shared_ptr< StructuredBlockForest > sbf = + make_shared< StructuredBlockForest >( make_shared< BlockForest >( uint_c( MPIManager::instance()->rank() ), sforest, keepGlobalBlockInformation ), + blockSizeInCells[0], blockSizeInCells[1], blockSizeInCells[2]); + sbf->createCellBoundingBoxes(); + + return sbf; +} + +///////////////////////////////////// +// BOUNDARY HANDLING CUSTOMIZATION // +///////////////////////////////////// +class MyBoundaryHandling : public blockforest::AlwaysInitializeBlockDataHandling< BoundaryHandling_T > +{ +public: + MyBoundaryHandling( const weak_ptr< StructuredBlockStorage > & blocks, + const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID ) : + blocks_( blocks ), flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_ ( bodyFieldID ) + {} + + BoundaryHandling_T * initialize( IBlock * const block ); + +private: + + weak_ptr< StructuredBlockStorage > blocks_; + + const BlockDataID flagFieldID_; + const BlockDataID pdfFieldID_; + const BlockDataID bodyFieldID_; + + +}; // class MyBoundaryHandling + +BoundaryHandling_T * MyBoundaryHandling::initialize( IBlock * const block ) +{ + WALBERLA_ASSERT_NOT_NULLPTR( block ); + + FlagField_T * flagField = block->getData< FlagField_T >( flagFieldID_ ); + PdfField_T * pdfField = block->getData< PdfField_T > ( pdfFieldID_ ); + BodyField_T * bodyField = block->getData< BodyField_T >( bodyFieldID_ ); + + const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag ); + + auto blocksPtr = blocks_.lock(); + WALBERLA_CHECK_NOT_NULLPTR( blocksPtr ); + + BoundaryHandling_T * handling = new BoundaryHandling_T( "moving obstacle boundary handling", flagField, fluid, + boost::tuples::make_tuple( NoSlip_T( "NoSlip", NoSlip_Flag, pdfField ), + MO_T( "MO", MO_Flag, pdfField, flagField, bodyField, fluid, *blocksPtr, *block ) ), + BoundaryHandling_T::Mode::ENTIRE_FIELD_TRAVERSAL); + + handling->fillWithDomain( FieldGhostLayers ); + + return handling; +} + + +//******************************************************************************************************************* + + +//******************************************************************************************************************* +/*!\brief Evaluating the position and velocity of the sediments + * + */ +//******************************************************************************************************************* +class PropertyLogger +{ +public: + PropertyLogger( SweepTimeloop* timeloop, const shared_ptr< StructuredBlockStorage > & blocks, + const BlockDataID & bodyStorageID, const std::string & fileName, bool fileIO) : + timeloop_( timeloop ), blocks_( blocks ), bodyStorageID_( bodyStorageID ), fileName_( fileName ), fileIO_(fileIO), + meanPos_( real_t(0) ), meanVel_( real_t(0) ), maxVel_( real_t(0) ) + { + if ( fileIO_ ) + { + WALBERLA_ROOT_SECTION() + { + std::ofstream file; + file.open( fileName_.c_str() ); + file << "#\t pos\t vel\t maxVel\n"; + file.close(); + } + } + } + + void operator()() + { + const uint_t timestep (timeloop_->getCurrentTimeStep() ); + + uint_t numSediments( uint_t(0)); + real_t meanPos(real_t(0)); + real_t meanVel(real_t(0)); + real_t maxVel(real_t(0)); + + for( auto blockIt = blocks_->begin(); blockIt != blocks_->end(); ++blockIt ) + { + for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID_); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt ) + { + meanPos += bodyIt->getPosition()[2]; + meanVel += bodyIt->getLinearVel()[2]; + maxVel = std::max(maxVel, std::fabs(bodyIt->getLinearVel()[2])); + ++numSediments; + } + } + + WALBERLA_MPI_SECTION() + { + mpi::allReduceInplace( numSediments, mpi::SUM ); + mpi::allReduceInplace( meanPos, mpi::SUM ); + mpi::allReduceInplace( meanVel, mpi::SUM ); + mpi::allReduceInplace( maxVel, mpi::MAX ); + } + + meanPos /= real_c(numSediments); + meanVel /= real_c(numSediments); + + meanPos_ = meanPos; + meanVel_ = meanVel; + maxVel_ = maxVel; + + if( fileIO_ ) + writeToFile( timestep ); + } + + real_t getMeanPosition() const + { + return meanPos_; + } + + real_t getMaxVelocity() const + { + return maxVel_; + } + + real_t getMeanVelocity() const + { + return meanVel_; + } + + +private: + void writeToFile( uint_t timestep ) + { + WALBERLA_ROOT_SECTION() + { + std::ofstream file; + file.open( fileName_.c_str(), std::ofstream::app ); + + file << timestep << "\t" << meanPos_ << "\t" << meanVel_ << "\t" << maxVel_ << "\n"; + file.close(); + } + } + + SweepTimeloop* timeloop_; + shared_ptr< StructuredBlockStorage > blocks_; + const BlockDataID bodyStorageID_; + std::string fileName_; + bool fileIO_; + + real_t meanPos_; + real_t meanVel_; + real_t maxVel_; +}; + +void clearBoundaryHandling( BlockForest & forest, const BlockDataID & boundaryHandlingID ) +{ + for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt ) + { + BoundaryHandling_T * boundaryHandling = blockIt->getData<BoundaryHandling_T>(boundaryHandlingID); + boundaryHandling->clear( FieldGhostLayers ); + } +} + +void clearBodyField( BlockForest & forest, const BlockDataID & bodyFieldID ) +{ + for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt ) + { + BodyField_T * bodyField = blockIt->getData<BodyField_T>(bodyFieldID); + bodyField->setWithGhostLayer( NULL ); + } +} + +void recreateBoundaryHandling( BlockForest & forest, const BlockDataID & boundaryHandlingID ) +{ + for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt ) + { + BoundaryHandling_T * boundaryHandling = blockIt->getData<BoundaryHandling_T>(boundaryHandlingID); + boundaryHandling->fillWithDomain( FieldGhostLayers ); + } +} + + +class TimingEvaluator +{ +public: + TimingEvaluator( WcTimingPool & levelwiseTimingPool, WcTimingTree & peTimingTree, uint_t numberOfLevels) + : levelwiseTimingPool_( levelwiseTimingPool ), peTimingTree_( peTimingTree ), numberOfLevels_( numberOfLevels ) + {} + + real_t getTimings(const std::vector<std::string> & timerNames, uint_t level ) + { + + real_t timing = real_t(0); + for( auto timerIt = timerNames.begin(); timerIt != timerNames.end(); ++timerIt ) + { + std::string timerNameLvlWise = *timerIt;// + + // put level between timer string and possible suffix + auto suffixBegin = timerNameLvlWise.find_first_of("["); + if( suffixBegin != std::string::npos) + { + // suffix detected + auto suffixEnd = timerNameLvlWise.find_last_of("]"); + if( suffixEnd != std::string::npos) + { + auto timerString = timerNameLvlWise.substr(0,suffixBegin); + auto suffixString = timerNameLvlWise.substr(suffixBegin,suffixEnd-suffixBegin+1); + + timerNameLvlWise = timerString + "(" + std::to_string(level) + ") " + suffixString; + + } + else + { + WALBERLA_ABORT("Invalid timer string"); + } + } + else + { + timerNameLvlWise += " (" + std::to_string(level) + ")";; + } + + if( levelwiseTimingPool_.timerExists(timerNameLvlWise)) + timing += levelwiseTimingPool_[timerNameLvlWise].total(); + + if( level == numberOfLevels_- 1) + { + std::string timerNamePE = *timerIt; + if( peTimingTree_.timerExists(timerNamePE)) + timing += peTimingTree_[timerNamePE].total(); + } + } + + return timing; + } + + +private: + + WcTimingPool & levelwiseTimingPool_; + WcTimingTree & peTimingTree_; + uint_t numberOfLevels_; +}; + + +real_t weightEvaluation(BlockForest & forest, + const shared_ptr<pe_coupling::InfoCollection>& couplingInfoCollection, + const shared_ptr<pe::InfoCollection> & peInfoCollection, + real_t peBlockBaseWeight, + std::string loadEvaluationStrategy, + uint_t level, + bool useEllipsoids ) +{ + real_t weight = real_t(0); + for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt ) + { + if( forest.getLevel(*blockIt) != level) continue; + + + blockforest::Block* block = static_cast<blockforest::Block*> (&(*blockIt)); + auto blockID = block->getId(); + + if(loadEvaluationStrategy == "LBM") + { + auto infoIt = couplingInfoCollection->find( blockID ); + weight += pe_coupling::amr::defaultWeightEvaluationFunction(infoIt->second); + + }else if(loadEvaluationStrategy == "PE") + { + auto infoIt = peInfoCollection->find( blockID ); + weight += real_c(infoIt->second.numberOfLocalBodies) + peBlockBaseWeight; + }else if(loadEvaluationStrategy == "Fit" || loadEvaluationStrategy == "FitMulti") + { + auto infoIt = couplingInfoCollection->find( blockID ); + if( useEllipsoids ) + { + weight += fittedTotalWeightEvaluationFunctionEllipsoids(infoIt->second); + } + else + { + weight += fittedTotalWeightEvaluationFunctionSpheres(infoIt->second); + } + }else + { + WALBERLA_ABORT("Load balancing strategy not defined"); + } + } + return weight; +} + + +uint_t evaluateEdgeCut(BlockForest & forest) +{ + + //note: only works for edges in uniform grids + + uint_t edgecut = uint_t(0); // = edge weights between processes + + for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt ) + { + blockforest::Block* block = static_cast<blockforest::Block*> (&(*blockIt)); + + real_t blockVolume = block->getAABB().volume(); + real_t approximateEdgeLength = std::cbrt( blockVolume ); + + uint_t faceNeighborWeight = uint_c(approximateEdgeLength * approximateEdgeLength ); //common face + uint_t edgeNeighborWeight = uint_c(approximateEdgeLength); //common edge + uint_t cornerNeighborWeight = uint_c( 1 ); //common corner + + + for( const uint_t idx : blockforest::getFaceNeighborhoodSectionIndices() ) + { + for( uint_t nb = uint_t(0); nb < block->getNeighborhoodSectionSize(idx); ++nb ) + { + if( block->neighborExistsRemotely(idx,nb) ) edgecut += faceNeighborWeight; + } + } + + for( const uint_t idx : blockforest::getEdgeNeighborhoodSectionIndices() ) + { + for( uint_t nb = uint_t(0); nb < block->getNeighborhoodSectionSize(idx); ++nb ) + { + if( block->neighborExistsRemotely(idx,nb) ) edgecut += edgeNeighborWeight; + } + } + + for( const uint_t idx : blockforest::getCornerNeighborhoodSectionIndices() ) + { + for( uint_t nb = uint_t(0); nb < block->getNeighborhoodSectionSize(idx); ++nb ) + { + if( block->neighborExistsRemotely(idx,nb) ) edgecut += cornerNeighborWeight; + } + } + } + return edgecut; +} + + +void evaluateTotalSimulationTimePassed(WcTimingPool & timeloopTimingPool, real_t & totalSimTime, real_t & totalLBTime) +{ + shared_ptr< WcTimingPool> reduced = timeloopTimingPool.getReduced(WcTimingPool::REDUCE_TOTAL, 0); + + std::string simulationString("LBM refinement time step"); + real_t totalTime = real_t(0); + WALBERLA_ROOT_SECTION(){ + totalTime = (*reduced)[simulationString].total(); + } + totalSimTime = totalTime; + + std::string lbString("refinement checking"); + real_t lbTime = real_t(0); + WALBERLA_ROOT_SECTION(){ + lbTime = (*reduced)[lbString].total(); + } + totalLBTime = lbTime; + +} + +void createSedimentLayer(uint_t numberOfSediments, const AABB & generationDomain, real_t diameter, real_t heightBorder, + pe::MaterialID peMaterial, + pe::cr::HCSITS & cr, const std::function<void(void)> & syncCall, + const shared_ptr< StructuredBlockForest > & blocks, + const shared_ptr<pe::BodyStorage> & globalBodyStorage, BlockDataID bodyStorageID, + real_t gravitationalAcceleration, bool useEllipsoids, bool shortRun) +{ + WALBERLA_LOG_INFO_ON_ROOT("Starting creation of sediments"); + + real_t xParticle = real_t(0); + real_t yParticle = real_t(0); + real_t zParticle = real_t(0); + + for( uint_t nSed = 0; nSed < numberOfSediments; ++nSed ) + { + + WALBERLA_ROOT_SECTION() + { + xParticle = math::realRandom<real_t>(generationDomain.xMin(), generationDomain.xMax()); + yParticle = math::realRandom<real_t>(generationDomain.yMin(), generationDomain.yMax()); + zParticle = math::realRandom<real_t>(generationDomain.zMin(), generationDomain.zMax()); + } + + WALBERLA_MPI_SECTION() + { + mpi::broadcastObject( xParticle ); + mpi::broadcastObject( yParticle ); + mpi::broadcastObject( zParticle ); + } + + if( useEllipsoids ) + { + // prolate ellipsoids + real_t axisFactor = real_t(1.5); + real_t axisFactor2 = std::sqrt(real_t(1)/axisFactor); + real_t radius = diameter * real_t(0.5); + pe::createEllipsoid( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), Vector3<real_t>(axisFactor*radius, axisFactor2*radius, axisFactor2*radius), peMaterial ); + } + else + { + pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), diameter * real_t(0.5), peMaterial ); + } + + } + + syncCall(); + + // carry out 100 simulations to resolve all overlaps + for( uint_t pet = uint_t(1); pet <= uint_t(100); ++pet ) + { + cr.timestep( real_t(1) ); + syncCall(); + + // reset all velocities to zero + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt ) + { + bodyIt->setLinearVel(Vector3<real_t>(real_t(0))); + bodyIt->setAngularVel(Vector3<real_t>(real_t(0))); + } + } + } + + + const uint_t maxInitialPeSteps = (shortRun) ? uint_t(10) : uint_t(200000); + const real_t dt_PE_init = real_t(1); + + real_t gravityGeneration = real_t(0.1) * gravitationalAcceleration; + cr.setGlobalLinearAcceleration(Vector3<real_t>(real_t(0), real_t(0), gravityGeneration)); + + real_t oldMinBodyPosition = real_t(0); + real_t convergenceLimit = std::fabs(gravityGeneration); + for( uint_t pet = uint_t(1); pet <= maxInitialPeSteps; ++pet ) + { + cr.timestep( dt_PE_init ); + syncCall(); + + real_t minBodyPosition = generationDomain.zMax(); + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + for( auto bodyIt = pe::LocalBodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt ) + { + minBodyPosition = std::min(bodyIt->getPosition()[2], minBodyPosition); + } + } + + WALBERLA_MPI_SECTION() + { + mpi::allReduceInplace(minBodyPosition, mpi::MIN); + } + + if( minBodyPosition > heightBorder ) break; + + if( pet % 500 == 0) + { + if( std::fabs(minBodyPosition - oldMinBodyPosition) / minBodyPosition < convergenceLimit ) break; + oldMinBodyPosition = minBodyPosition; + } + + WALBERLA_ROOT_SECTION() + { + if( pet % 100 == 0) + { + WALBERLA_LOG_INFO("[" << pet << "] Min position of all bodies = " << minBodyPosition << " with goal height " << heightBorder); + } + } + + } + + // revert gravitational acceleration to 'real' direction + cr.setGlobalLinearAcceleration(Vector3<real_t>(real_t(0), real_t(0), -gravityGeneration)); + + // carry out a few time steps to relax the system towards the real condition + const uint_t relaxationTimeSteps = uint_t(std::sqrt(real_t(2)/std::fabs(gravitationalAcceleration))); + WALBERLA_LOG_INFO_ON_ROOT("Carrying out " << relaxationTimeSteps << " more time steps with correct gravity"); + for( uint_t pet = uint_t(1); pet <= relaxationTimeSteps; ++pet ) + { + cr.timestep(dt_PE_init); + syncCall(); + } + + WALBERLA_LOG_INFO_ON_ROOT("Sediment layer creation done!"); + + // reset all velocities to zero + Vector3<real_t> initialBodyVelocity(real_t(0)); + WALBERLA_LOG_INFO_ON_ROOT("Setting initial velocity " << initialBodyVelocity << " of all bodies"); + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt ) + { + bodyIt->setLinearVel(initialBodyVelocity); + bodyIt->setAngularVel(Vector3<real_t>(real_t(0))); + } + } + + cr.setGlobalLinearAcceleration(Vector3<real_t>(real_t(0))); +} + + +//******************************************************************************************************************* +/*!\brief Simulation of settling particles inside a rectangular column filled with viscous fluid + * + * This application is used in the paper + * Rettinger, Ruede - "Dynamic Load Balancing Techniques for Particulate Flow Simulations", submitted to Computation + * in Section 4 to apply the load estimator and to evaluate different load distribution strategies. + * + * It, however, features several different command line arguments that can be used to tweak the simulation. + * The setup can be horizontally period, a box or a hopper geometry (configurable, as in the paper). + * The size, resolution and used blocks for the domain partitioning can be changed. + * It even features adaptive mesh refinement, with different refinement criteria: + * - particle based (always on, also for global bodies like bounding planes) + * - optionally: vorticity- or gradient-based (with lower and upper limits) + * Since the paper, however, uses a uniform grid, many evaluation functionalities might not work properly for this case. + * Initially, all particles are pushed upwards to obtain a dense packing at the top plane. + * + * Most importantly, the load balancing can be modified: + * - load estimation strategies: + * - pure LBM = number of cells per block = constant workload per block + * - pure PE = number of local particles + baseweight + * - coupling based load estimator = use fitted function from Sec. 3 of paper + * - load distribution strategies: + * - space-filling curves: Hilbert and Morton + * - ParMETIS (and several algorithms and parameters, also multiple constraints possible) + * - diffusive (and options) + * - load balancing (/refinement check ) frequency + */ +//******************************************************************************************************************* +int main( int argc, char **argv ) +{ + debug::enterTestMode(); + + mpi::Environment env( argc, argv ); + + /////////////////// + // Customization // + /////////////////// + + // simulation control + bool shortRun = false; + bool funcTest = false; + bool fileIO = true; + bool logging = false; // logging of physical components + uint_t vtkWriteFreqDD = 0; //domain decomposition + uint_t vtkWriteFreqBo = 0; //bodies + uint_t vtkWriteFreqFl = 0; //fluid + uint_t vtkWriteFreq = 0; //general + std::string baseFolder = "vtk_out_AMRSedimentSettling"; // folder for vtk and file output + + // physical setup + real_t GalileoNumber = real_t(50); + real_t densityRatio = real_t(1.5); + real_t diameter = real_t(15); + real_t solidVolumeFraction = real_t(0.1); + uint_t blockSize = uint_t(32); + uint_t XBlocks = uint_t(12); + uint_t YBlocks = uint_t(12); + uint_t ZBlocks = uint_t(16); + bool useBox = false; + bool useHopper = false; + bool useEllipsoids = false; + real_t hopperRelHeight = real_t(0.5); // for hopper setup + real_t hopperRelOpening = real_t(0.3); // for hopper setup + + uint_t timestepsOnFinestLevel = uint_t(80000); + + //numerical parameters + bool averageForceTorqueOverTwoTimSteps = true; + uint_t numberOfLevels = uint_t(1); + uint_t refinementCheckFrequency = uint_t(100); + uint_t numPeSubCycles = uint_t(10); + + // refinement criteria + real_t lowerFluidRefinementLimit = real_t(0); + real_t upperFluidRefinementLimit = std::numeric_limits<real_t>::infinity(); + bool useVorticityCriterion = false; + bool useGradientCriterion = false; + + // load balancing + std::string loadEvaluationStrategy = "LBM"; //LBM, PE, Fit + std::string loadDistributionStrategy = "Hilbert"; //Morton, Hilbert, ParMetis, Diffusive + + real_t parMetis_ipc2redist = real_t(1000); + real_t parMetisTolerance = real_t(-1); + std::string parMetisAlgorithmString = "ADAPTIVE_REPART"; + + uint_t diffusionFlowIterations = uint_t(15); + uint_t diffusionMaxIterations = uint_t(20); + + + for( int i = 1; i < argc; ++i ) + { + if( std::strcmp( argv[i], "--shortRun" ) == 0 ) { shortRun = true; continue; } + if( std::strcmp( argv[i], "--funcTest" ) == 0 ) { funcTest = true; continue; } + if( std::strcmp( argv[i], "--fileIO" ) == 0 ) { fileIO = true; continue; } + if( std::strcmp( argv[i], "--logging" ) == 0 ) { logging = true; continue; } + if( std::strcmp( argv[i], "--vtkWriteFreqDD" ) == 0 ) { vtkWriteFreqDD = uint_c( std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--vtkWriteFreqBo" ) == 0 ) { vtkWriteFreqBo = uint_c( std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--vtkWriteFreqFl" ) == 0 ) { vtkWriteFreqFl = uint_c( std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--vtkWriteFreq" ) == 0 ) { vtkWriteFreq = uint_c( std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--baseFolder" ) == 0 ) { baseFolder = argv[++i]; continue; } + if( std::strcmp( argv[i], "--densityRatio" ) == 0 ) { densityRatio = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--Ga" ) == 0 ) { GalileoNumber = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--diameter" ) == 0 ) { diameter = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--blockSize" ) == 0 ) { blockSize = uint_c(std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--XBlocks" ) == 0 ) { XBlocks = uint_c(std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--YBlocks" ) == 0 ) { YBlocks = uint_c(std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--ZBlocks" ) == 0 ) { ZBlocks = uint_c(std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--useBox" ) == 0 ) { useBox = true; continue; } + if( std::strcmp( argv[i], "--useHopper" ) == 0 ) { useHopper = true; continue; } + if( std::strcmp( argv[i], "--hopperHeight" ) == 0 ) { hopperRelHeight = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--hopperOpening" ) == 0 ) { hopperRelOpening = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--timesteps" ) == 0 ) { timestepsOnFinestLevel = uint_c(std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--noForceAveraging" ) == 0 ) { averageForceTorqueOverTwoTimSteps = false; continue; } + if( std::strcmp( argv[i], "--numPeSubCycles" ) == 0 ) { numPeSubCycles = uint_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--numLevels" ) == 0 ) { numberOfLevels = uint_c( std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--refinementCheckFrequency" ) == 0 ) { refinementCheckFrequency = uint_c( std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--lowerLimit" ) == 0 ) { lowerFluidRefinementLimit = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--upperLimit" ) == 0 ) { upperFluidRefinementLimit = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--useVorticityCriterion" ) == 0 ) { useVorticityCriterion = true; continue; } + if( std::strcmp( argv[i], "--useGradientCriterion" ) == 0 ) { useGradientCriterion = true; continue; } + if( std::strcmp( argv[i], "--loadEvaluationStrategy" ) == 0 ) { loadEvaluationStrategy = argv[++i]; continue; } + if( std::strcmp( argv[i], "--loadDistributionStrategy" ) == 0 ) { loadDistributionStrategy = argv[++i]; continue; } + if( std::strcmp( argv[i], "--ipc2redist" ) == 0 ) { parMetis_ipc2redist = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--parMetisTolerance" ) == 0 ) { parMetisTolerance = std::atof( argv[++i] ); continue; } + if( std::strcmp( argv[i], "--parMetisAlgorithm" ) == 0 ) { parMetisAlgorithmString = argv[++i]; continue; } + if( std::strcmp( argv[i], "--diffusionFlowIterations" ) == 0 ) { diffusionFlowIterations = uint_c(std::atof(argv[++i])); continue; } + if( std::strcmp( argv[i], "--diffusionMaxIterations" ) == 0 ) { diffusionMaxIterations = uint_c(std::atof(argv[++i])); continue; } + if( std::strcmp( argv[i], "--useEllipsoids" ) == 0 ) { useEllipsoids = true; continue; } + WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]); + } + + if( funcTest ) + { + walberla::logging::Logging::instance()->setLogLevel(logging::Logging::LogLevel::WARNING); + } + + if( fileIO || logging ) + { + WALBERLA_ROOT_SECTION(){ + // create base directory if it does not yet exist + filesystem::path tpath( baseFolder ); + if( !filesystem::exists( tpath ) ) + filesystem::create_directory( tpath ); + } + } + + if( useVorticityCriterion && useGradientCriterion ) + { + WALBERLA_ABORT("Use either vorticity or gradient criterion for refinement!"); + } + + if( loadEvaluationStrategy != "LBM" && loadEvaluationStrategy != "PE" && loadEvaluationStrategy != "Fit" && loadEvaluationStrategy != "FitMulti") + { + WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy); + } + + if( vtkWriteFreq != 0 ) + { + vtkWriteFreqDD = vtkWriteFreq; + vtkWriteFreqBo = vtkWriteFreq; + vtkWriteFreqFl = vtkWriteFreq; + } + + if( diameter > real_c(blockSize) ) + { + WALBERLA_LOG_WARNING("PE Body Synchronization might not work since bodies are large compared to block size!"); + } + + if( useHopper ) + { + WALBERLA_CHECK(hopperRelHeight >= real_t(0) && hopperRelHeight <= real_t(1), "Invalid relative hopper height of " << hopperRelHeight); + WALBERLA_CHECK(hopperRelOpening >= real_t(0) && hopperRelOpening <= real_t(1), "Invalid relative hopper opening of " << hopperRelOpening); + } + + + ////////////////////////// + // NUMERICAL PARAMETERS // + ////////////////////////// + + const Vector3<uint_t> domainSize( XBlocks * blockSize, YBlocks * blockSize, ZBlocks * blockSize ); + const real_t domainVolume = real_t(domainSize[0] * domainSize[1] * domainSize[2]); + const real_t sphereVolume = math::M_PI / real_t(6) * diameter * diameter * diameter; + const uint_t numberOfSediments = uint_c(std::ceil(solidVolumeFraction * domainVolume / sphereVolume)); + + real_t expectedSedimentVolumeFraction = (useBox||useHopper) ? real_t(0.45) : real_t(0.52); + const real_t expectedSedimentedVolume = real_t(1)/expectedSedimentVolumeFraction * real_c(numberOfSediments) * sphereVolume; + const real_t expectedSedimentedHeight = std::max(diameter, expectedSedimentedVolume / real_c(domainSize[0] * domainSize[1])); + + const real_t uRef = real_t(0.02); + const real_t xRef = diameter; + const real_t tRef = xRef / uRef; + + const real_t gravitationalAcceleration = uRef * uRef / ( (densityRatio-real_t(1)) * diameter ); + const real_t viscosity = uRef * diameter / GalileoNumber; + const real_t omega = lbm::collision_model::omegaFromViscosity(viscosity); + const real_t tau = real_t(1) / omega; + + const uint_t loggingDisplayFrequency = uint_t(100); + + const real_t dx = real_t(1); + const real_t overlap = real_t( 1.5 ) * dx; + + if( useVorticityCriterion && floatIsEqual(lowerFluidRefinementLimit, real_t(0)) && std::isinf(upperFluidRefinementLimit) ) + { + // use computed criterion instead of user input + lowerFluidRefinementLimit = real_t(0.05) * uRef; + upperFluidRefinementLimit = real_t(0.1) * uRef; + } + + const uint_t finestLevel = numberOfLevels - uint_t(1); + std::stringstream omega_msg; + for( uint_t i = 0; i < numberOfLevels; ++i ) + { + real_t omegaLvl = lbm::collision_model::levelDependentRelaxationParameter( i, omega, finestLevel ); + omega_msg << omegaLvl << " ( on level " << i << ", tau = " << real_t(1)/omega << " ), "; + } + + const uint_t levelScalingFactor = ( uint_t(1) << finestLevel ); + const uint_t lbmTimeStepsPerTimeLoopIteration = levelScalingFactor; + + const uint_t timesteps = funcTest ? 1 : ( shortRun ? uint_t(100) : uint_t( timestepsOnFinestLevel / lbmTimeStepsPerTimeLoopIteration ) ); + + WALBERLA_LOG_INFO_ON_ROOT("Setup (in simulation, i.e. lattice, units):"); + WALBERLA_LOG_INFO_ON_ROOT(" - domain size = " << domainSize); + WALBERLA_LOG_INFO_ON_ROOT(" - sediment diameter = " << diameter ); + WALBERLA_LOG_INFO_ON_ROOT(" - Galileo number = " << GalileoNumber ); + WALBERLA_LOG_INFO_ON_ROOT(" - number of sediments: " << numberOfSediments); + WALBERLA_LOG_INFO_ON_ROOT(" - densityRatio = " << densityRatio ); + WALBERLA_LOG_INFO_ON_ROOT(" - fluid: relaxation time (tau) = " << tau << ", kin. visc = " << viscosity ); + WALBERLA_LOG_INFO_ON_ROOT(" - gravitational acceleration = " << gravitationalAcceleration ); + WALBERLA_LOG_INFO_ON_ROOT(" - reference values: x = " << xRef << ", t = " << tRef << ", vel = " << uRef); + WALBERLA_LOG_INFO_ON_ROOT(" - omega: " << omega_msg.str()); + WALBERLA_LOG_INFO_ON_ROOT(" - number of levels: " << numberOfLevels); + WALBERLA_LOG_INFO_ON_ROOT(" - number of pe sub cycles: " << numPeSubCycles); + if( useVorticityCriterion ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - using vorticity criterion with lower limit = " << lowerFluidRefinementLimit << " and upper limit = " << upperFluidRefinementLimit ); + } + if( useGradientCriterion ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - using gradient criterion with lower limit = " << lowerFluidRefinementLimit << " and upper limit = " << upperFluidRefinementLimit ); + } + if( vtkWriteFreqDD > 0 ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - writing vtk files of domain decomposition to folder \"" << baseFolder << "\" with frequency " << vtkWriteFreqDD); + } + if( vtkWriteFreqBo > 0 ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - writing vtk files of bodies data to folder \"" << baseFolder << "\" with frequency " << vtkWriteFreqBo); + } + if( vtkWriteFreqFl > 0 ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - writing vtk files of fluid data to folder \"" << baseFolder << "\" with frequency " << vtkWriteFreqFl); + } + if( useEllipsoids ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - using (prolate) ellipsoids as sediments"); + } + if( useBox ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - using box setup"); + } + else if ( useHopper ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - using hopper setup"); + } + else + { + WALBERLA_LOG_INFO_ON_ROOT(" - using horizontally periodic domain"); + } + + + + if( refinementCheckFrequency == 0 && numberOfLevels != 1 ) + { + // determine check frequency automatically based on maximum admissable velocity and block sizes + real_t uMax = real_t(0.1); + refinementCheckFrequency = uint_c(( overlap + real_c(blockSize) - real_t(2) * real_t(FieldGhostLayers) * dx) / uMax) / lbmTimeStepsPerTimeLoopIteration; + } + WALBERLA_LOG_INFO_ON_ROOT(" - refinement / load balancing check frequency (coarse time steps): " << refinementCheckFrequency); + WALBERLA_LOG_INFO_ON_ROOT(" - load evaluation strategy: " << loadEvaluationStrategy); + WALBERLA_LOG_INFO_ON_ROOT(" - load distribution strategy: " << loadDistributionStrategy); + + /////////////////////////// + // BLOCK STRUCTURE SETUP // + /////////////////////////// + + Vector3<uint_t> blockSizeInCells( blockSize ); + + AABB simulationDomain( real_t(0), real_t(0), real_t(0), real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) ); + AABB sedimentDomain( real_t(0), real_t(0), real_c(domainSize[2]) - expectedSedimentedHeight, real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) ); + + AABB initialRefinementDomain = sedimentDomain; + if( useBox || useHopper ) + { + // require finest levels also along bounding planes -> initially refine everywhere + initialRefinementDomain = simulationDomain; + } + + auto blocks = createBlockStructure( simulationDomain, blockSizeInCells, numberOfLevels, initialRefinementDomain, (useBox||useHopper), loadDistributionStrategy ); + + //write initial domain decomposition to file + if( vtkWriteFreqDD > 0 ) + { + vtk::writeDomainDecomposition( blocks, "initial_domain_decomposition", baseFolder ); + } + + + ///////////////// + // PE COUPLING // + ///////////////// + + // set up pe functionality + shared_ptr<pe::BodyStorage> globalBodyStorage = make_shared<pe::BodyStorage>(); + pe::SetBodyTypeIDs<BodyTypeTuple>::execute(); + + auto bodyStorageID = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "pe Body Storage"); + auto ccdID = blocks->addBlockData(pe::ccd::createHashGridsDataHandling( globalBodyStorage, bodyStorageID ), "CCD"); + BlockDataID fcdID = (useEllipsoids) ? blocks->addBlockData( pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::GJKEPACollideFunctor>(), "FCD" ) + : blocks->addBlockData(pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD"); + + WcTimingTree timingTreePE; + + // set up collision response + pe::cr::HCSITS cr(globalBodyStorage, blocks->getBlockStoragePointer(), bodyStorageID, ccdID, fcdID, &timingTreePE ); + cr.setMaxIterations(10); + cr.setRelaxationModel( pe::cr::HardContactSemiImplicitTimesteppingSolvers::ApproximateInelasticCoulombContactByDecoupling ); + + // set up synchronization procedure + std::function<void(void)> syncCall = std::bind( pe::syncNextNeighbors<BodyTypeTuple>, std::ref(blocks->getBlockForest()), bodyStorageID, &timingTreePE, overlap, false ); + + // create pe bodies + + // add the sediments + auto peMaterial = pe::createMaterial( "mat", densityRatio, real_t(1), real_t(0.25), real_t(0.25), real_t(0), real_t(200), real_t(100), real_t(100), real_t(100) ); + + // create two planes at bottom and top of domain for a horizontally periodic box + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,1), Vector3<real_t>(0,0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,-1), Vector3<real_t>(0,0,simulationDomain.zMax()), peMaterial ); + if( useBox ) + { + // add four more planes to obtain a closed box + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(1,0,0), Vector3<real_t>(0,0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(-1,0,0), Vector3<real_t>(simulationDomain.xMax(),0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,1,0), Vector3<real_t>(0,0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-1,0), Vector3<real_t>(0,simulationDomain.yMax(),0), peMaterial ); + } + else if ( useHopper ) + { + // box bounding planes + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(1,0,0), Vector3<real_t>(0,0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(-1,0,0), Vector3<real_t>(simulationDomain.xMax(),0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,1,0), Vector3<real_t>(0,0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-1,0), Vector3<real_t>(0,simulationDomain.yMax(),0), peMaterial ); + + //hopper planes + real_t xMax = simulationDomain.xMax(); + real_t yMax = simulationDomain.yMax(); + real_t zMax = simulationDomain.zMax(); + Vector3<real_t> p1(0,0,hopperRelHeight*zMax); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(p1[2],0,hopperRelOpening*xMax-p1[0]), p1, peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,p1[2],hopperRelOpening*yMax-p1[0]), p1, peMaterial ); + + Vector3<real_t> p2(xMax,yMax,hopperRelHeight*zMax); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(-p2[2],0,-((real_t(1)-hopperRelOpening)*xMax-p2[0])), p2, peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,-p2[2],-((real_t(1)-hopperRelOpening)*yMax-p2[1])), p2, peMaterial ); + } + + AABB sedimentGenerationDomain( real_t(0), real_t(0), real_t(0.5)*real_c(domainSize[2]), real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2]) ); + createSedimentLayer(numberOfSediments, sedimentGenerationDomain, diameter, sedimentDomain.zMin(), peMaterial, cr, syncCall, blocks, globalBodyStorage, bodyStorageID, gravitationalAcceleration, useEllipsoids, shortRun ); + + // reset timer to not cover init stats + timingTreePE.reset(); + + // now we can use the information about the body positions to adapt the refinement + + /////////////////////////// + // DYNAMIC REFINEMENT, 1 // + /////////////////////////// + + auto & blockforest = blocks->getBlockForest(); + blockforest.recalculateBlockLevelsInRefresh( true ); + blockforest.alwaysRebalanceInRefresh( true ); //load balancing every time refresh is triggered + blockforest.reevaluateMinTargetLevelsAfterForcedRefinement( false ); + blockforest.allowRefreshChangingDepth( false ); + blockforest.allowMultipleRefreshCycles( false ); // otherwise info collections are invalid + + { + blockforest::CombinedMinTargetLevelDeterminationFunctions initialMinTargetLevelDeterminationFunctions; + + blockforest::AABBRefinementSelection aabbRefinementSelection; + aabbRefinementSelection.addAABB(sedimentDomain,finestLevel ); + initialMinTargetLevelDeterminationFunctions.add( aabbRefinementSelection ); + + // refinement along global bodies (bounding planes) to have consistent mapping (required for CLI always, or SimpleBB with non-AABB planes) + real_t blockExtension = real_c(FieldGhostLayers); + pe_coupling::amr::GlobalBodyPresenceLevelDetermination globalBodyPresenceRefinement( globalBodyStorage, finestLevel, blockExtension, pe_coupling::selectGlobalBodies ); + initialMinTargetLevelDeterminationFunctions.add(globalBodyPresenceRefinement); + + blockforest.setRefreshMinTargetLevelDeterminationFunction( initialMinTargetLevelDeterminationFunctions ); + + for( uint_t refreshCycle = uint_t(0); refreshCycle < finestLevel; ++refreshCycle) + { + + WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...") + + // check refinement criterions and refine/coarsen if necessary + uint_t stampBefore = blocks->getBlockForest().getModificationStamp(); + blocks->refresh(); + uint_t stampAfter = blocks->getBlockForest().getModificationStamp(); + + if( stampBefore == stampAfter ) + { + break; + } + + WALBERLA_LOG_INFO_ON_ROOT("Recreating data structures.."); + + // rebuild PE data structures + pe::clearSynchronization( blockforest, bodyStorageID); + + syncCall(); + + for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) + { + pe::ccd::ICCD* ccd = blockIt->getData< pe::ccd::ICCD >( ccdID ); + ccd->reloadBodies(); + } + } + } + + uint_t numberOfInitialFineBlocks = blockforest.getNumberOfBlocks(finestLevel); + mpi::allReduceInplace(numberOfInitialFineBlocks, mpi::SUM); + WALBERLA_LOG_INFO_ON_ROOT("Total number of initial fine blocks in simulation: " << numberOfInitialFineBlocks); + + uint_t numberOfProcesses = uint_c(MPIManager::instance()->numProcesses()); + + + /////////////////////// + // ADD DATA TO BLOCKS // + //////////////////////// + + // create the lattice model + LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( omega, lbm::collision_model::TRT::threeSixteenth, finestLevel ) ); + + // add PDF field + BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel, + Vector3< real_t >( real_t(0) ), real_t(1), + FieldGhostLayers, field::zyxf ); + // add flag field + BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field", FieldGhostLayers ); + + // add body field + BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", NULL, field::zyxf, FieldGhostLayers ); + + // add velocity field and utility + BlockDataID velocityFieldID = field::addToStorage<VelocityField_T>( blocks, "velocity field", Vector3<real_t>(real_t(0)), field::zyxf, uint_t(2) ); + + typedef lbm::VelocityFieldWriter< PdfField_T, VelocityField_T > VelocityFieldWriter_T; + BlockSweepWrapper< VelocityFieldWriter_T > velocityFieldWriter( blocks, VelocityFieldWriter_T( pdfFieldID, velocityFieldID ) ); + + + shared_ptr<blockforest::communication::NonUniformBufferedScheme<stencil::D3Q27> > velocityCommunicationScheme = make_shared<blockforest::communication::NonUniformBufferedScheme<stencil::D3Q27> >( blocks ); + velocityCommunicationScheme->addPackInfo( make_shared< field::refinement::PackInfo<VelocityField_T, stencil::D3Q27> >( velocityFieldID ) ); + + // add boundary handling & initialize outer domain boundaries + BlockDataID boundaryHandlingID = blocks->addBlockData( make_shared< MyBoundaryHandling >( blocks, flagFieldID, pdfFieldID, bodyFieldID ), + "boundary handling" ); + + // map planes into the LBM simulation -> act as no-slip boundaries + //pe_coupling::mapBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, NoSlip_Flag, pe_coupling::selectGlobalBodies ); + pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, bodyFieldID, MO_Flag, pe_coupling::selectGlobalBodies ); + + // map pe bodies into the LBM simulation + pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, bodyFieldID, MO_Flag, pe_coupling::selectRegularBodies ); + + + // force averaging functionality + shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer1 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID); + std::function<void(void)> storeForceTorqueInCont1 = std::bind(&pe_coupling::BodiesForceTorqueContainer::store, bodiesFTContainer1); + + shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer2 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID); + std::function<void(void)> setForceTorqueOnBodiesFromCont2 = std::bind(&pe_coupling::BodiesForceTorqueContainer::setOnBodies, bodiesFTContainer2); + + shared_ptr<pe_coupling::ForceTorqueOnBodiesScaler> forceScaler = make_shared<pe_coupling::ForceTorqueOnBodiesScaler>(blocks, bodyStorageID, real_t(0.5)); + std::function<void(void)> setForceScalingFactorToOne = std::bind(&pe_coupling::ForceTorqueOnBodiesScaler::resetScalingFactor,forceScaler,real_t(1)); + std::function<void(void)> setForceScalingFactorToHalf = std::bind(&pe_coupling::ForceTorqueOnBodiesScaler::resetScalingFactor,forceScaler,real_t(0.5)); + + if( averageForceTorqueOverTwoTimSteps ) { + bodiesFTContainer2->store(); + + setForceScalingFactorToOne(); + } + + /////////////////////////// + // DYNAMIC REFINEMENT, 2 // + /////////////////////////// + + blockforest::CombinedMinTargetLevelDeterminationFunctions minTargetLevelDeterminationFunctions; + + // add refinement criterion based on particle presence + shared_ptr<pe_coupling::InfoCollection> couplingInfoCollection = walberla::make_shared<pe_coupling::InfoCollection>(); + pe_coupling::amr::BodyPresenceLevelDetermination particlePresenceRefinement( couplingInfoCollection, finestLevel ); + + minTargetLevelDeterminationFunctions.add( particlePresenceRefinement ); + + // also add (possible) refinement criteria based on fluid quantities + + if( useVorticityCriterion ) + { + // add refinement criterion based on vorticity magnitude + field::FlagFieldEvaluationFilter<FlagField_T> flagFieldFilter( flagFieldID, Fluid_Flag ); + lbm::refinement::VorticityBasedLevelDetermination< field::FlagFieldEvaluationFilter<FlagField_T> > vorticityRefinement( + velocityFieldID, flagFieldFilter, upperFluidRefinementLimit, lowerFluidRefinementLimit, finestLevel ); + + minTargetLevelDeterminationFunctions.add( vorticityRefinement ); + } + + if( useGradientCriterion ) + { + // add refinement criterion based on velocity gradient magnitude + field::FlagFieldEvaluationFilter<FlagField_T> flagFieldFilter( flagFieldID, Fluid_Flag ); + VectorGradientRefinement< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> > gradientRefinement( + velocityFieldID, flagFieldFilter, upperFluidRefinementLimit, lowerFluidRefinementLimit, finestLevel ); + + minTargetLevelDeterminationFunctions.add( gradientRefinement ); + } + + // refinement along global bodies (bounding planes) to have consistent mapping (required for CLI always, or SimpleBB with non-AABB planes) + real_t blockExtension = real_c(FieldGhostLayers); + pe_coupling::amr::GlobalBodyPresenceLevelDetermination globalBodyPresenceRefinement( globalBodyStorage, finestLevel, blockExtension, pe_coupling::selectGlobalBodies ); + minTargetLevelDeterminationFunctions.add(globalBodyPresenceRefinement); + + blockforest.setRefreshMinTargetLevelDeterminationFunction( minTargetLevelDeterminationFunctions ); + + bool curveAllGather = true; + bool balanceLevelwise = true; + + real_t peBlockBaseWeight = real_t(1); //default value, might not be the best + shared_ptr<pe::InfoCollection> peInfoCollection = walberla::make_shared<pe::InfoCollection>(); + + if( loadDistributionStrategy == "Hilbert" || loadDistributionStrategy == "Morton") + { + if( loadDistributionStrategy == "Hilbert") + { + bool useHilbert = true; + blockforest.setRefreshPhantomBlockMigrationPreparationFunction( blockforest::DynamicCurveBalance< blockforest::PODPhantomWeight<real_t> >( useHilbert, curveAllGather, balanceLevelwise ) ); + } + else if (loadDistributionStrategy == "Morton" ) + { + bool useHilbert = false; + blockforest.setRefreshPhantomBlockMigrationPreparationFunction( blockforest::DynamicCurveBalance< blockforest::PODPhantomWeight<real_t> >( useHilbert, curveAllGather, balanceLevelwise ) ); + } + + blockforest.setRefreshPhantomBlockDataPackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>()); + blockforest.setRefreshPhantomBlockDataUnpackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>()); + + if( loadEvaluationStrategy == "Fit" ) + { + if( useEllipsoids ) + { + pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionEllipsoids); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } else{ + pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionSpheres); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + } + else if( loadEvaluationStrategy == "PE" ) + { + pe::amr::WeightAssignmentFunctor weightAssignmentFunctor(peInfoCollection, peBlockBaseWeight ); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + else if( loadEvaluationStrategy == "LBM" ) + { + pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, pe_coupling::amr::defaultWeightEvaluationFunction); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + else + { + WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy); + } + + } + else if( loadDistributionStrategy == "ParMetis") + { + uint_t ncon = 1; + if( loadEvaluationStrategy == "FitMulti") + { + ncon = 2; + } + + blockforest::DynamicParMetis::Algorithm parMetisAlgorithm = blockforest::DynamicParMetis::stringToAlgorithm(parMetisAlgorithmString); + blockforest::DynamicParMetis::WeightsToUse parMetisWeightsToUse = blockforest::DynamicParMetis::WeightsToUse::PARMETIS_BOTH_WEIGHTS; + blockforest::DynamicParMetis::EdgeSource parMetisEdgeSource = blockforest::DynamicParMetis::EdgeSource::PARMETIS_EDGES_FROM_EDGE_WEIGHTS; + + blockforest::DynamicParMetis dynamicParMetis(parMetisAlgorithm, parMetisWeightsToUse, parMetisEdgeSource, ncon); + dynamicParMetis.setipc2redist(parMetis_ipc2redist); + + real_t loadImbalanceTolerance = (parMetisTolerance < real_t(1)) ? std::max(real_t(1.05), real_t(1) + real_t(1) / ( real_c(numberOfInitialFineBlocks) / real_c(numberOfProcesses) ) ) : parMetisTolerance; + std::vector<double> parMetisLoadImbalanceTolerance(ncon, double(loadImbalanceTolerance)); + dynamicParMetis.setImbalanceTolerance(parMetisLoadImbalanceTolerance[0], 0); + + WALBERLA_LOG_INFO_ON_ROOT(" - ParMetis configuration: "); + WALBERLA_LOG_INFO_ON_ROOT(" - algorithm = " << dynamicParMetis.algorithmToString() ); + WALBERLA_LOG_INFO_ON_ROOT(" - weights to use = " << dynamicParMetis.weightsToUseToString() ); + WALBERLA_LOG_INFO_ON_ROOT(" - edge source = " << dynamicParMetis.edgeSourceToString() ); + WALBERLA_LOG_INFO_ON_ROOT(" - ncon = " << ncon ); + WALBERLA_LOG_INFO_ON_ROOT(" - ipc2redist parameter = " << dynamicParMetis.getipc2redist() ); + + blockforest.setRefreshPhantomBlockDataPackFunction(blockforest::DynamicParMetisBlockInfoPackUnpack()); + blockforest.setRefreshPhantomBlockDataUnpackFunction(blockforest::DynamicParMetisBlockInfoPackUnpack()); + + if( loadEvaluationStrategy == "Fit" ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - load imbalance tolerance = <" << parMetisLoadImbalanceTolerance[0] << ">" ); + if( useEllipsoids ) + { + pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionEllipsoids); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } else{ + pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionSpheres); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + } + else if( loadEvaluationStrategy == "FitMulti" ) + { + double imbalanceTolerancePE = 10.; + parMetisLoadImbalanceTolerance[1] = std::min(imbalanceTolerancePE, real_c(MPIManager::instance()->numProcesses())); + WALBERLA_LOG_INFO_ON_ROOT(" - load imbalance tolerances = <" << parMetisLoadImbalanceTolerance[0] << ", " << parMetisLoadImbalanceTolerance[1] << ">" ); + dynamicParMetis.setImbalanceTolerance(parMetisLoadImbalanceTolerance[1], 1); + + if( useEllipsoids ) + { + std::vector< std::function<real_t(const pe_coupling::BlockInfo&)> > weightEvaluationFunctions(ncon); + weightEvaluationFunctions[0] = fittedLBMWeightEvaluationFunctionEllipsoids; + weightEvaluationFunctions[1] = fittedPEWeightEvaluationFunctionEllipsoids; + pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, weightEvaluationFunctions); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } else{ + std::vector< std::function<real_t(const pe_coupling::BlockInfo&)> > weightEvaluationFunctions(ncon); + weightEvaluationFunctions[0] = fittedLBMWeightEvaluationFunctionSpheres; + weightEvaluationFunctions[1] = fittedPEWeightEvaluationFunctionSpheres; + pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, weightEvaluationFunctions); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + } + else if( loadEvaluationStrategy == "PE" ) + { + pe::amr::MetisAssignmentFunctor weightAssignmentFunctor(peInfoCollection, peBlockBaseWeight ); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + else if( loadEvaluationStrategy == "LBM" ) + { + pe_coupling::amr::MetisAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, pe_coupling::amr::defaultWeightEvaluationFunction); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + else + { + WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy); + } + + blockforest.setRefreshPhantomBlockMigrationPreparationFunction( dynamicParMetis ); + + } + else if( loadDistributionStrategy == "Diffusive") + { + using DB_T = blockforest::DynamicDiffusionBalance< blockforest::PODPhantomWeight<real_t> >; + DB_T dynamicDiffusion(diffusionMaxIterations, diffusionFlowIterations ); + dynamicDiffusion.setMode(DB_T::Mode::DIFFUSION_PUSH); + + WALBERLA_LOG_INFO_ON_ROOT(" - Dynamic diffusion configuration: "); + WALBERLA_LOG_INFO_ON_ROOT(" - max iterations = " << dynamicDiffusion.getMaxIterations() ); + WALBERLA_LOG_INFO_ON_ROOT(" - flow iterations = " << dynamicDiffusion.getFlowIterations()); + + blockforest.setRefreshPhantomBlockDataPackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>()); + blockforest.setRefreshPhantomBlockDataUnpackFunction(blockforest::PODPhantomWeightPackUnpack<real_t>()); + blockforest.setRefreshPhantomBlockMigrationPreparationFunction( dynamicDiffusion ); + + if( loadEvaluationStrategy == "Fit" ) + { + if( useEllipsoids ) + { + pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionEllipsoids); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } else{ + pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, fittedTotalWeightEvaluationFunctionSpheres); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + } + else if( loadEvaluationStrategy == "PE" ) + { + pe::amr::WeightAssignmentFunctor weightAssignmentFunctor(peInfoCollection, peBlockBaseWeight ); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + else if( loadEvaluationStrategy == "LBM" ) + { + pe_coupling::amr::WeightAssignmentFunctor weightAssignmentFunctor(couplingInfoCollection, pe_coupling::amr::defaultWeightEvaluationFunction); + blockforest.setRefreshPhantomBlockDataAssignmentFunction(weightAssignmentFunctor); + } + else + { + WALBERLA_ABORT("Invalid load evaluation strategy: " << loadEvaluationStrategy); + } + + } else + { + WALBERLA_ABORT("Load distribution strategy \"" << loadDistributionStrategy << "\t not implemented! - Aborting" ); + } + + + /////////////// + // TIME LOOP // + /////////////// + + // create the timeloop + SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps ); + + if( vtkWriteFreqBo != uint_t(0) ) { + + // pe bodies + if (useEllipsoids) { + auto bodyVtkOutput = make_shared<pe::EllipsoidVtkOutput>(bodyStorageID, blocks->getBlockStorage()); + auto bodyVTK = vtk::createVTKOutput_PointData(bodyVtkOutput, "bodies", vtkWriteFreqBo, baseFolder); + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(bodyVTK), "VTK (sediment data)"); + + } else { + auto bodyVtkOutput = make_shared<pe::SphereVtkOutput>(bodyStorageID, blocks->getBlockStorage()); + auto bodyVTK = vtk::createVTKOutput_PointData(bodyVtkOutput, "bodies", vtkWriteFreqBo, baseFolder); + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(bodyVTK), "VTK (sediment data)"); + } + } + + if( vtkWriteFreqFl != uint_t(0) ) { + + // pdf field + auto pdfFieldVTK = vtk::createVTKOutput_BlockData(blocks, "fluid_field", vtkWriteFreqFl, 0, false, baseFolder); + + field::FlagFieldCellFilter<FlagField_T> fluidFilter(flagFieldID); + fluidFilter.addFlag(Fluid_Flag); + pdfFieldVTK->addCellInclusionFilter(fluidFilter); + + pdfFieldVTK->addCellDataWriter( + make_shared<lbm::VelocityVTKWriter<LatticeModel_T, float> >(pdfFieldID, "VelocityFromPDF")); + pdfFieldVTK->addCellDataWriter( + make_shared<lbm::DensityVTKWriter<LatticeModel_T, float> >(pdfFieldID, "DensityFromPDF")); + + timeloop.addFuncBeforeTimeStep(vtk::writeFiles(pdfFieldVTK), "VTK (fluid field data)"); + } + + if( vtkWriteFreqDD != uint_t(0) ) { + auto domainDecompVTK = vtk::createVTKOutput_DomainDecomposition(blocks, "domain_decomposition", vtkWriteFreqDD, baseFolder ); + timeloop.addFuncBeforeTimeStep( vtk::writeFiles(domainDecompVTK), "VTK (domain decomposition)"); + } + + WcTimingPool timeloopTiming; + shared_ptr<WcTimingPool> timeloopRefinementTiming = make_shared<WcTimingPool>(); + shared_ptr<WcTimingPool> timeloopRefinementTimingLevelwise = make_shared<WcTimingPool>(); + + + auto sweep = lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag ); + auto refinementTimestep = lbm::refinement::makeTimeStep< LatticeModel_T, BoundaryHandling_T >( blocks, sweep, pdfFieldID, boundaryHandlingID ); + + refinementTimestep->enableTiming( timeloopRefinementTiming, timeloopRefinementTimingLevelwise ); + + // Averaging the force/torque over two time steps is said to damp oscillations of the interaction force/torque. + // See Ladd - " Numerical simulations of particulate suspensions via a discretized Boltzmann equation. Part 1. Theoretical foundation", 1994, p. 302 + if( averageForceTorqueOverTwoTimSteps ) { + + // store force/torque from hydrodynamic interactions in container1 + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(storeForceTorqueInCont1), "Force Storing", finestLevel); + + // set force/torque from previous time step (in container2) + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(setForceTorqueOnBodiesFromCont2), "Force setting", finestLevel); + + // average the force/torque by scaling it with factor 1/2 (except in first timestep and directly after refinement, there it is 1) + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(SharedFunctor<pe_coupling::ForceTorqueOnBodiesScaler>(forceScaler)), "Force averaging", finestLevel); + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(setForceScalingFactorToHalf), "Force scaling adjustment", finestLevel); + + // swap containers + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(pe_coupling::BodyContainerSwapper(bodiesFTContainer1, bodiesFTContainer2)), "Swap FT container", finestLevel); + + } + + Vector3<real_t> gravitationalForce( real_t(0), real_t(0), -(densityRatio - real_t(1)) * gravitationalAcceleration * sphereVolume ); + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(pe_coupling::ForceOnBodiesAdder( blocks, bodyStorageID, gravitationalForce )), "Gravitational force", finestLevel ); + + // add pe timesteps + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::FunctorWrapper(pe_coupling::TimeStep( blocks, bodyStorageID, cr, syncCall, real_t(1), numPeSubCycles)), + "pe Time Step", finestLevel ); + + // add sweep for updating the pe body mapping into the LBM simulation + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::SweepAsFunctorWrapper( pe_coupling::BodyMapping< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, MO_Flag, FormerMO_Flag, pe_coupling::selectRegularBodies ), blocks ), + "Body Mapping", finestLevel ); + + // add sweep for restoring PDFs in cells previously occupied by pe bodies + typedef pe_coupling::EquilibriumReconstructor< LatticeModel_T, BoundaryHandling_T > Reconstructor_T; + Reconstructor_T reconstructor( blocks, boundaryHandlingID, pdfFieldID, bodyFieldID ); + refinementTimestep->addPostStreamVoidFunction(lbm::refinement::SweepAsFunctorWrapper( pe_coupling::PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructor_T > ( blocks, + boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, reconstructor, FormerMO_Flag, Fluid_Flag ), blocks ), + "PDF Restore", finestLevel ); + + + // add LBM sweep with refinement + timeloop.addFuncBeforeTimeStep( makeSharedFunctor( refinementTimestep ), "LBM refinement time step" ); + + std::string loggingFileName( baseFolder + "/Logging_Ga"); + loggingFileName += std::to_string(uint_c(GalileoNumber)); + loggingFileName += "_lvl"; + loggingFileName += std::to_string(numberOfLevels); + loggingFileName += ".txt"; + if( logging ) + { + WALBERLA_LOG_INFO_ON_ROOT(" - writing logging output to file \"" << loggingFileName << "\""); + } + shared_ptr< PropertyLogger > logger = walberla::make_shared< PropertyLogger >( &timeloop, blocks, bodyStorageID, + loggingFileName, fileIO ); + if(logging) + { + timeloop.addFuncAfterTimeStep( SharedFunctor< PropertyLogger >( logger ), "Property logger" ); + } + + + timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" ); + + + // add top level timing pool output + timeloop.addFuncAfterTimeStep( TimingPoolLogger( timeloopTiming, timeloop, loggingDisplayFrequency ), "Regular Timing Logger" ); + + // add regular refinement timing pool output + timeloop.addFuncAfterTimeStep( TimingPoolLogger( *timeloopRefinementTiming, timeloop, loggingDisplayFrequency ), "Refinement Timing Logger" ); + + // add level wise timing pool output + //if( numberOfLevels != uint_t(1)) + timeloop.addFuncAfterTimeStep( TimingPoolLogger( *timeloopRefinementTimingLevelwise, timeloop, loggingDisplayFrequency ), "Refinement Levelwise Timing Logger" ); + + // add PE timing tree output + timeloop.addFuncAfterTimeStep( TimingTreeLogger( timingTreePE, timeloop, loggingDisplayFrequency ), "PE Timing Tree Timing Logger" ); + + + //////////////////////// + // EXECUTE SIMULATION // + //////////////////////// + + uint_t loadEvaluationFrequency = refinementCheckFrequency; + TimingEvaluator timingEvaluator( *timeloopRefinementTimingLevelwise, timingTreePE, numberOfLevels ); + + // file for simulation infos + std::string infoFileName( baseFolder + "/simulation_info.txt"); + WALBERLA_ROOT_SECTION() + { + std::ofstream file; + file.open( infoFileName.c_str(), std::fstream::out | std::fstream::trunc ); + file << "#i\t t\t tSim\t tLB\t numProcs\t levelwise blocks (min/max/sum)\n"; + file.close(); + } + + // process local timing measurements and predicted loads + std::string processLocalFiles(baseFolder + "/processLocalFiles"); + WALBERLA_ROOT_SECTION() + { + filesystem::path tpath( processLocalFiles ); + if( !filesystem::exists( tpath ) ) + filesystem::create_directory( tpath ); + } + std::string measurementFileProcessName(processLocalFiles + "/measurements_" + std::to_string(MPIManager::instance()->rank()) + ".txt"); + { + std::ofstream file; + file.open( measurementFileProcessName.c_str(), std::fstream::out | std::fstream::trunc ); + file << "#i\t t\t mTotSim\t mLB\t mLBM\t mBH\t mCoup1\t mCoup2\t mRB\t cLBM\t cRB\t numBlocks\n"; + file.close(); + } + + std::string predictionFileProcessName(processLocalFiles + "/predictions_" + std::to_string(MPIManager::instance()->rank()) + ".txt"); + { + std::ofstream file; + file.open( predictionFileProcessName.c_str(), std::fstream::out | std::fstream::trunc ); + file << "#i\t t\t wlLBM\t wlBH\t wlCoup1\t wlCoup2\t wlRB\t edgecut\t numBlocks\n"; + file.close(); + } + + std::vector<std::string> LBMTimer; + LBMTimer.push_back("collide"); + LBMTimer.push_back("stream"); + LBMTimer.push_back("stream & collide"); + + std::vector<std::string> bhTimer; + bhTimer.push_back("boundary handling"); + + std::vector<std::string> couplingTimer1; + couplingTimer1.push_back("Body Mapping"); + std::vector<std::string> couplingTimer2; + couplingTimer2.push_back("PDF Restore"); + + std::vector<std::string> peTimer; + peTimer.push_back("Simulation Step.Collision Detection"); + peTimer.push_back("Simulation Step.Collision Response Integration"); + peTimer.push_back("Simulation Step.Collision Response Resolution.Collision Response Solving"); + + std::vector<std::string> LBMCommTimer; + LBMCommTimer.push_back("communication equal level [pack & send]"); + LBMCommTimer.push_back("communication equal level [wait & unpack]"); + + std::vector<std::string> peCommTimer; + //Adapt if using different collision response (like DEM!) + peCommTimer.push_back("Simulation Step.Collision Response Resolution.Velocity Sync"); + peCommTimer.push_back("Sync"); + + + real_t terminationPosition = expectedSedimentedHeight; + real_t terminationVelocity = real_t(0.05) * uRef; + + real_t oldmTotSim = real_t(0); + real_t oldmLB = real_t(0); + + uint_t measurementFileCounter = uint_t(0); + uint_t predictionFileCounter = uint_t(0); + + std::string loadEvaluationStep("load evaluation"); + + // time loop + for (uint_t i = 0; i < timesteps; ++i ) + { + + // evaluate measurements (note: reflect simulation behavior BEFORE the evaluation) + if( loadEvaluationFrequency > 0 && i % loadEvaluationFrequency == 0 && i > 0 && fileIO) + { + + timeloopTiming[loadEvaluationStep].start(); + + // write process local timing measurements to files (per process, per load balancing step) + { + + // evaluate all required timers + uint_t evalLevel = finestLevel; + + real_t mTotSim = ( timeloopTiming.timerExists("LBM refinement time step") ) ? timeloopTiming["LBM refinement time step"].total() : real_t(0); + + real_t mLB = ( timeloopTiming.timerExists("refinement checking") ) ? timeloopTiming["refinement checking"].total() : real_t(0); + + real_t mLBM = timingEvaluator.getTimings(LBMTimer, evalLevel); + real_t mBH = timingEvaluator.getTimings(bhTimer, evalLevel); + real_t mCoup1 = timingEvaluator.getTimings(couplingTimer1, evalLevel); + real_t mCoup2 = timingEvaluator.getTimings(couplingTimer2, evalLevel); + real_t mPE = timingEvaluator.getTimings(peTimer, evalLevel); + + real_t cLBM = timingEvaluator.getTimings(LBMCommTimer, evalLevel); + real_t cRB = timingEvaluator.getTimings(peCommTimer, evalLevel); + + auto & forest = blocks->getBlockForest(); + uint_t numBlocks = forest.getNumberOfBlocks(finestLevel); + + // write to process local file + std::ofstream file; + file.open( measurementFileProcessName.c_str(), std::ofstream::app ); + file << measurementFileCounter << "\t " << real_c(i) / tRef << "\t" + << mTotSim - oldmTotSim << "\t" << mLB - oldmLB << "\t" << mLBM << "\t" << mBH << "\t" << mCoup1 << "\t" + << mCoup2 << "\t" << mPE << "\t" << cLBM << "\t" << cRB << "\t" << numBlocks << "\n"; + file.close(); + + oldmTotSim = mTotSim; + oldmLB = mLB; + measurementFileCounter++; + + // reset timers to have measurement from evaluation to evaluation point + timeloopRefinementTimingLevelwise->clear(); + timingTreePE.reset(); + + } + + // evaluate general simulation infos (on root) + { + real_t totalTimeToCurrentTimestep, totalLBTimeToCurrentTimestep; + evaluateTotalSimulationTimePassed(timeloopTiming, totalTimeToCurrentTimestep, totalLBTimeToCurrentTimestep); + std::vector<math::DistributedSample> numberOfBlocksPerLevel(numberOfLevels); + + auto & forest = blocks->getBlockForest(); + for( uint_t lvl = 0; lvl < numberOfLevels; ++lvl) + { + uint_t numBlocks = forest.getNumberOfBlocks(lvl); + numberOfBlocksPerLevel[lvl].castToRealAndInsert(numBlocks); + } + + for( uint_t lvl = 0; lvl < numberOfLevels; ++lvl) + { + numberOfBlocksPerLevel[lvl].mpiGatherRoot(); + } + + WALBERLA_ROOT_SECTION() + { + std::ofstream file; + file.open( infoFileName.c_str(), std::ofstream::app ); + file << i << "\t " << real_c(i) / tRef << "\t" + << totalTimeToCurrentTimestep << "\t " << totalLBTimeToCurrentTimestep << "\t " << numberOfProcesses << "\t "; + + for( uint_t lvl = 0; lvl < numberOfLevels; ++lvl) + { + file << uint_c(numberOfBlocksPerLevel[numberOfLevels-1-lvl].min()) << "\t "; + file << uint_c(numberOfBlocksPerLevel[numberOfLevels-1-lvl].max()) << "\t "; + file << uint_c(numberOfBlocksPerLevel[numberOfLevels-1-lvl].sum()) << "\t "; + } + file << "\n"; + + file.close(); + } + } + + timeloopTiming[loadEvaluationStep].end(); + + } + + + if( refinementCheckFrequency != 0 && i % refinementCheckFrequency == 0) + { + + WALBERLA_LOG_INFO_ON_ROOT("Checking for refinement and load balancing...") + + std::string refinementCheckStep("refinement checking"); + timeloopTiming[refinementCheckStep].start(); + + if( loadEvaluationStrategy != "LBM" ) { + + // first evaluate all data that is required for the refinement checks + + // update info collections for the particle presence based check and the load balancing: + auto &forest = blocks->getBlockForest(); + pe_coupling::createWithNeighborhood<BoundaryHandling_T>(forest, boundaryHandlingID, bodyStorageID, ccdID, + fcdID, numPeSubCycles, *couplingInfoCollection); + pe::createWithNeighborhood(forest, bodyStorageID, *peInfoCollection); + + // for the fluid property based check: + if (useVorticityCriterion || useGradientCriterion) { + velocityFieldWriter(); + (*velocityCommunicationScheme)(); + } + + WALBERLA_LOG_INFO_ON_ROOT("Refreshing blockforest...") + + // check refinement criterions and refine/coarsen if necessary + uint_t stampBefore = blocks->getBlockForest().getModificationStamp(); + blocks->refresh(); + uint_t stampAfter = blocks->getBlockForest().getModificationStamp(); + + bool recreatingNecessary = false; + + if (stampBefore != stampAfter) { + recreatingNecessary = true; + } + + bool reducedRecreationFlag = mpi::allReduce(recreatingNecessary, mpi::LOGICAL_OR); + + if (reducedRecreationFlag != recreatingNecessary) { + WALBERLA_LOG_INFO("Reduced recreation flag different from individual one"); + } + + recreatingNecessary = reducedRecreationFlag; + + if (recreatingNecessary) { + + WALBERLA_LOG_INFO_ON_ROOT("Recreating data structures.."); + + // rebuild PE data structures + pe::clearSynchronization(blockforest, bodyStorageID); + + syncCall(); + + for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { + pe::ccd::ICCD *ccd = blockIt->getData<pe::ccd::ICCD>(ccdID); + ccd->reloadBodies(); + } + + clearBoundaryHandling(forest, boundaryHandlingID); + clearBodyField(forest, bodyFieldID); + + if (averageForceTorqueOverTwoTimSteps) { + + // clear containers from old values + bodiesFTContainer1->clear(); + bodiesFTContainer2->clear(); + + // initialize FT container on all blocks anew, i.e. with the currently acting force/torque, which is zero after the refinement step + bodiesFTContainer2->store(); + + // set force scaling factor to one after refinement since force history is not present on blocks after refinement + // thus the usual averaging of 1/2 (over two time steps) can not be carried out, i.e. it would lead to 1/2 of the acting force + // the scaling factor is thus adapted for the next timestep to 1, and then changed back to 1/2 (in the timeloop) + setForceScalingFactorToOne(); + } + + recreateBoundaryHandling(forest, boundaryHandlingID); + + // re-set the no-slip flags along the walls + pe_coupling::mapMovingBodies<BoundaryHandling_T>(*blocks, boundaryHandlingID, bodyStorageID, + *globalBodyStorage, bodyFieldID, MO_Flag, + pe_coupling::selectGlobalBodies); + + // re-map the body into the domain (initializing the bodyField as well) + pe_coupling::mapMovingBodies<BoundaryHandling_T>(*blocks, boundaryHandlingID, bodyStorageID, + *globalBodyStorage, bodyFieldID, MO_Flag, + pe_coupling::selectRegularBodies); + } + + } + + timeloopTiming[refinementCheckStep].end(); + } + + // evaluate predictions (note: reflect the predictions for all upcoming simulations, thus the corresponding measurements have to be taken afterwards) + if( loadEvaluationFrequency > 0 && i % loadEvaluationFrequency == 0 && fileIO) + { + + timeloopTiming[loadEvaluationStep].start(); + + // write process local load predictions to files (per process, per load balancing step) + { + + real_t wlLBM = real_t(0); + real_t wlBH = real_t(0); + real_t wlCoup1 = real_t(0); + real_t wlCoup2 = real_t(0); + real_t wlRB = real_t(0); + + auto & forest = blocks->getBlockForest(); + pe_coupling::createWithNeighborhood<BoundaryHandling_T>(forest, boundaryHandlingID, bodyStorageID, ccdID, fcdID, numPeSubCycles, *couplingInfoCollection); + + for( auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt ) { + blockforest::Block *block = static_cast<blockforest::Block *> (&(*blockIt)); + auto blockID = block->getId(); + auto infoIt = couplingInfoCollection->find(blockID); + auto blockInfo = infoIt->second; + + if( useEllipsoids ) + { + + WALBERLA_ABORT("Not yet implemented!"); + + } + else + { + wlLBM += fittedLBMWeightEvaluationFunctionSpheres(blockInfo); + wlBH += fittedBHWeightEvaluationFunctionSpheres(blockInfo); + wlCoup1 += fittedCoupling1WeightEvaluationFunctionSpheres(blockInfo); + wlCoup2 += fittedCoupling2WeightEvaluationFunctionSpheres(blockInfo); + wlRB += fittedPEWeightEvaluationFunctionSpheres(blockInfo); + } + + } + + // note: we count the edge weight doubled here in total (to and from the other process). ParMetis only counts one direction. + uint_t edgecut = evaluateEdgeCut(forest); + + uint_t numBlocks = forest.getNumberOfBlocks(finestLevel); + + std::ofstream file; + file.open( predictionFileProcessName.c_str(), std::ofstream::app ); + file << predictionFileCounter << "\t " << real_c(i) / tRef << "\t" + << wlLBM << "\t" << wlBH << "\t" << wlCoup1 << "\t" << wlCoup2 << "\t" << wlRB << "\t" + << edgecut << "\t" << numBlocks << "\n"; + file.close(); + + predictionFileCounter++;; + } + + timeloopTiming[loadEvaluationStep].end(); + + } + + // perform a single simulation step + timeloop.singleStep( timeloopTiming ); + + + if( logging ) + { + real_t curMeanPos = logger->getMeanPosition(); + real_t curMeanVel = logger->getMeanVelocity(); + if( curMeanPos <= terminationPosition && std::fabs(curMeanVel) < terminationVelocity ) + { + WALBERLA_LOG_INFO_ON_ROOT("Sediments passed terminal mean position " << terminationPosition << " and reached velocity " << curMeanVel << " - terminating simulation!"); + break; + } + } + + } + + timeloopTiming.logResultOnRoot(); + + + return EXIT_SUCCESS; +} + +} // namespace amr_sediment_settling + +int main( int argc, char **argv ){ + amr_sediment_settling::main(argc, argv); +} diff --git a/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt new file mode 100644 index 000000000..5d3db5ef3 --- /dev/null +++ b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/CMakeLists.txt @@ -0,0 +1,5 @@ +waLBerla_add_executable( NAME WorkloadEvaluation FILES WorkloadEvaluation.cpp DEPENDS blockforest boundary core field lbm pe pe_coupling postprocessing stencil timeloop vtk ) + +if( WALBERLA_BUILD_WITH_PARMETIS ) + waLBerla_add_executable( NAME AMRSedimentSettling FILES AMRSedimentSettling.cpp DEPENDS blockforest boundary core field lbm pe pe_coupling postprocessing stencil timeloop vtk ) +endif() \ No newline at end of file diff --git a/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/WorkloadEvaluation.cpp b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/WorkloadEvaluation.cpp new file mode 100644 index 000000000..038b054a5 --- /dev/null +++ b/apps/benchmarks/AdaptiveMeshRefinementFluidParticleCoupling/WorkloadEvaluation.cpp @@ -0,0 +1,985 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file WorkLoadEvaluation.cpp +//! \ingroup pe_coupling +//! \author Christoph Rettinger <christoph.rettinger@fau.de> +// +//====================================================================================================================== + +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformBufferedScheme.h" + +#include "boundary/all.h" + +#include "core/DataTypes.h" +#include "core/Environment.h" +#include "core/debug/Debug.h" +#include "core/debug/TestSubsystem.h" +#include "core/logging/Logging.h" +#include "core/math/all.h" +#include "core/SharedFunctor.h" +#include "core/timing/RemainingTimeLogger.h" +#include "core/mpi/MPIManager.h" +#include "core/mpi/Reduce.h" +#include "core/mpi/Broadcast.h" + +#include "domain_decomposition/SharedSweep.h" + +#include "field/AddToStorage.h" +#include "field/communication/PackInfo.h" + +#include "lbm/boundary/all.h" +#include "lbm/communication/PdfFieldPackInfo.h" +#include "lbm/field/AddToStorage.h" +#include "lbm/field/MacroscopicValueCalculation.h" +#include "lbm/field/PdfField.h" +#include "lbm/lattice_model/D3Q19.h" +#include "lbm/lattice_model/ForceModel.h" +#include "lbm/sweeps/CellwiseSweep.h" +#include "lbm/sweeps/SweepWrappers.h" +#include "lbm/BlockForestEvaluation.h" + +#include "stencil/D3Q27.h" + +#include "timeloop/SweepTimeloop.h" + +#include "pe/basic.h" +#include "pe/fcd/GJKEPACollideFunctor.h" +#include "pe/vtk/BodyVtkOutput.h" +#include "pe/vtk/EllipsoidVtkOutput.h" +#include "pe/vtk/SphereVtkOutput.h" + +#include "pe_coupling/mapping/all.h" +#include "pe_coupling/momentum_exchange_method/all.h" +#include "pe_coupling/utility/all.h" + +#include "vtk/all.h" +#include "field/vtk/all.h" +#include "lbm/vtk/all.h" + +#include <vector> +#include <iomanip> +#include <iostream> + +namespace workload_evaluation +{ + +/////////// +// USING // +/////////// + +using namespace walberla; +using walberla::uint_t; + +// PDF field, flag field & body field +typedef lbm::D3Q19< lbm::collision_model::TRT, false> LatticeModel_T; + +typedef LatticeModel_T::Stencil Stencil_T; +typedef lbm::PdfField< LatticeModel_T > PdfField_T; + +typedef walberla::uint8_t flag_t; +typedef FlagField< flag_t > FlagField_T; +typedef GhostLayerField< pe::BodyID, 1 > BodyField_T; + +const uint_t FieldGhostLayers = 1; + +// boundary handling +typedef pe_coupling::CurvedLinear< LatticeModel_T, FlagField_T > MO_CLI_T; + +typedef boost::tuples::tuple<MO_CLI_T > BoundaryConditions_T; +typedef BoundaryHandling< FlagField_T, Stencil_T, BoundaryConditions_T > BoundaryHandling_T; + +typedef boost::tuple<pe::Sphere, pe::Ellipsoid, pe::Plane> BodyTypeTuple; + +/////////// +// FLAGS // +/////////// + +const FlagUID Fluid_Flag ( "fluid" ); +const FlagUID MO_CLI_Flag ( "moving obstacle CLI" ); +const FlagUID FormerMO_Flag( "former moving obstacle" ); + + +///////////////////////////////////// +// BOUNDARY HANDLING CUSTOMIZATION // +///////////////////////////////////// + +class MyBoundaryHandling +{ +public: + + MyBoundaryHandling( const BlockDataID & flagFieldID, const BlockDataID & pdfFieldID, const BlockDataID & bodyFieldID, bool useEntireFieldTraversal ) : + flagFieldID_( flagFieldID ), pdfFieldID_( pdfFieldID ), bodyFieldID_ ( bodyFieldID ), useEntireFieldTraversal_( useEntireFieldTraversal ) {} + + BoundaryHandling_T * operator()( IBlock* const block, const StructuredBlockStorage* const storage ) const; + +private: + + const BlockDataID flagFieldID_; + const BlockDataID pdfFieldID_; + const BlockDataID bodyFieldID_; + bool useEntireFieldTraversal_; + +}; // class MyBoundaryHandling + +BoundaryHandling_T * MyBoundaryHandling::operator()( IBlock * const block, const StructuredBlockStorage * const storage ) const +{ + WALBERLA_ASSERT_NOT_NULLPTR( block ); + WALBERLA_ASSERT_NOT_NULLPTR( storage ); + + FlagField_T * flagField = block->getData< FlagField_T >( flagFieldID_ ); + PdfField_T * pdfField = block->getData< PdfField_T > ( pdfFieldID_ ); + BodyField_T * bodyField = block->getData< BodyField_T >( bodyFieldID_ ); + + const auto fluid = flagField->flagExists( Fluid_Flag ) ? flagField->getFlag( Fluid_Flag ) : flagField->registerFlag( Fluid_Flag ); + + BoundaryHandling_T::Mode mode = (useEntireFieldTraversal_) ? BoundaryHandling_T::Mode::ENTIRE_FIELD_TRAVERSAL : BoundaryHandling_T::Mode::OPTIMIZED_SPARSE_TRAVERSAL ; + + BoundaryHandling_T * handling = new BoundaryHandling_T( "fixed obstacle boundary handling", flagField, fluid, + boost::tuples::make_tuple( MO_CLI_T ( "MO_CLI", MO_CLI_Flag, pdfField, flagField, bodyField, fluid, *storage, *block ) ), + mode); + + // boundaries are set by mapping the planes into the domain + + handling->fillWithDomain( FieldGhostLayers ); + + return handling; +} + + +class CollisionPropertiesEvaluator +{ +public: + CollisionPropertiesEvaluator( pe::cr::ICR & collisionResponse ) : collisionResponse_( collisionResponse ) + {} + + real_t get() + { + real_t maxPen = std::fabs(collisionResponse_.getMaximumPenetration()); + WALBERLA_MPI_SECTION() + { + mpi::allReduceInplace( maxPen, mpi::MAX ); + } + return maxPen; + } + +private: + pe::cr::ICR & collisionResponse_; +}; + +class ContactDistanceEvaluator +{ +public: + ContactDistanceEvaluator( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID ccdID, const BlockDataID fcdID ) : + blocks_( blocks ), ccdID_(ccdID), fcdID_(fcdID) + {} + + real_t get() + { + real_t maximumPenetration = real_t(0); + for (auto it = blocks_->begin(); it != blocks_->end(); ++it) { + IBlock ¤tBlock = *it; + + pe::ccd::ICCD *ccd = currentBlock.getData<pe::ccd::ICCD>(ccdID_); + pe::fcd::IFCD *fcd = currentBlock.getData<pe::fcd::IFCD>(fcdID_); + ccd->generatePossibleContacts(); + pe::Contacts& contacts = fcd->generateContacts( ccd->getPossibleContacts() ); + size_t numContacts( contacts.size() ); + + for( size_t i = 0; i < numContacts; ++i ) + { + const pe::ContactID c( &contacts[i] ); + maximumPenetration = std::max( maximumPenetration, std::fabs(c->getDistance())); + } + } + WALBERLA_MPI_SECTION() + { + mpi::allReduceInplace( maximumPenetration, mpi::MAX ); + } + return maximumPenetration; + } +private: + shared_ptr< StructuredBlockStorage > blocks_; + const BlockDataID ccdID_; + const BlockDataID fcdID_; +}; + +class MaxVelocityEvaluator +{ +public: + MaxVelocityEvaluator( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID bodyStorageID ) : + blocks_( blocks ), bodyStorageID_(bodyStorageID) + {} + + Vector3<real_t> get() + { + real_t maxVelX = real_t(0); + real_t maxVelY = real_t(0); + real_t maxVelZ = real_t(0); + + for (auto it = blocks_->begin(); it != blocks_->end(); ++it) { + + for( auto bodyIt = pe::LocalBodyIterator::begin(*it, bodyStorageID_ ); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt ) { + auto vel = bodyIt->getLinearVel(); + maxVelX = std::max(maxVelX, std::fabs(vel[0])); + maxVelY = std::max(maxVelY, std::fabs(vel[1])); + maxVelZ = std::max(maxVelZ, std::fabs(vel[2])); + } + } + WALBERLA_MPI_SECTION() + { + mpi::allReduceInplace( maxVelX, mpi::MAX ); + mpi::allReduceInplace( maxVelY, mpi::MAX ); + mpi::allReduceInplace( maxVelZ, mpi::MAX ); + } + return Vector3<real_t>(maxVelX, maxVelY, maxVelZ); + } + + real_t getMagnitude() + { + real_t magnitude = real_t(0); + + for (auto it = blocks_->begin(); it != blocks_->end(); ++it) { + + for( auto bodyIt = pe::LocalBodyIterator::begin(*it, bodyStorageID_ ); bodyIt != pe::LocalBodyIterator::end(); ++bodyIt ) { + magnitude = std::max(magnitude, bodyIt->getLinearVel().length()); + } + } + WALBERLA_MPI_SECTION() + { + mpi::allReduceInplace( magnitude, mpi::MAX ); + } + return magnitude; + + } + +private: + shared_ptr< StructuredBlockStorage > blocks_; + const BlockDataID bodyStorageID_; +}; + +void evaluateFluidQuantities(const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID boundaryHandlingID, + uint_t & numCells, uint_t & numFluidCells, uint_t & numNBCells ) +{ + + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) + { + BoundaryHandling_T * boundaryHandling = blockIt->getData< BoundaryHandling_T >( boundaryHandlingID ); + auto xyzSize = boundaryHandling->getFlagField()->xyzSize(); + numCells += xyzSize.numCells(); + + for( cell_idx_t z = cell_idx_t(xyzSize.zMin()); z <= cell_idx_t(xyzSize.zMax()); ++z ){ + for( cell_idx_t y = cell_idx_t(xyzSize.yMin()); y <= cell_idx_t(xyzSize.yMax()); ++y ){ + for( cell_idx_t x = cell_idx_t(xyzSize.xMin()); x <= cell_idx_t(xyzSize.xMax()); ++x ) { + if (boundaryHandling->isDomain(x, y, z)) { + ++numFluidCells; + } + if (boundaryHandling->isNearBoundary(x, y, z)) { + ++numNBCells; + } + } + } + } + } +} + +void evaluatePEQuantities( const shared_ptr< StructuredBlockStorage > & blocks, const BlockDataID bodyStorageID, + const BlockDataID ccdID, const BlockDataID fcdID, + uint_t & numLocalParticles, uint_t & numShadowParticles, uint_t & numContacts) +{ + + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { + pe::Storage * bodyStorage = blockIt->getData<pe::Storage>(bodyStorageID); + pe::BodyStorage const & localStorage = (*bodyStorage)[pe::StorageType::LOCAL]; + pe::BodyStorage const & shadowStorage = (*bodyStorage)[pe::StorageType::SHADOW]; + numLocalParticles += localStorage.size(); + numShadowParticles += shadowStorage.size(); + + pe::ccd::ICCD * ccd = blockIt->getData<pe::ccd::ICCD>(ccdID); + pe::fcd::IFCD * fcd = blockIt->getData<pe::fcd::IFCD>(fcdID); + ccd->generatePossibleContacts(); + pe::Contacts& contacts = fcd->generateContacts( ccd->getPossibleContacts() ); + numContacts += contacts.size(); + } +} + +void evaluateTimers(WcTimingPool & timingPool, WcTimingTree & peTimingTree, + const std::vector<std::vector<std::string> > & timerKeys, + std::vector<real_t> & timings ) +{ + + for( auto timingsIt = timings.begin(); timingsIt != timings.end(); ++timingsIt ) + { + *timingsIt = real_t(0); + } + + timingPool.unifyRegisteredTimersAcrossProcesses(); + peTimingTree.synchronize(); + + real_t scalingFactor = real_t(1000); // milliseconds + + for(uint_t i = uint_t(0); i < timerKeys.size(); ++i ) + { + auto keys = timerKeys[i]; + for(auto keyIt = keys.begin(); keyIt != keys.end(); ++keyIt) + { + std::string timerName = *keyIt; + if(timingPool.timerExists(timerName)) + { + timings[i] += real_c(timingPool[timerName].total()) * scalingFactor; + } + if(peTimingTree.timerExists(timerName)) + { + timings[i] += real_c(peTimingTree[timerName].total()) * scalingFactor; + } + } + + } +} + +void resetTimers(WcTimingPool & timingPool, WcTimingTree & peTimingTree) +{ + timingPool.clear(); + peTimingTree.reset(); +} + +//******************************************************************************************************************* +/*! Application to evaluate the workload (time measurements) for a fluid-particle simulation + * + * This application is used in the paper + * Rettinger, Ruede - "Dynamic Load Balancing Techniques for Particulate Flow Simulations", submitted to Computation + * in Section 3 to develop and calibrate the workload estimator. + * The setup features settling particle inside a horizontally periodic box. + * A comprehensive description is given in Sec. 3.3 of the paper. + * It uses 4 x 4 x 5 blocks for domain partitioning. + * For each block ( = each process), the block local quantities are evaluated as well as the timing infos of + * the fluid-particle interaction algorithm. Those infos are then written to files that can be used later on + * for function fitting to obtain a workload estimator. + * + * NOTE: Since this estimator relies on timing measurements, this evaluation procedure should be carried out everytime + * a different implementation, hardware or algorithm is used. + * + */ +//******************************************************************************************************************* +int main( int argc, char **argv ) +{ + debug::enterTestMode(); + + mpi::Environment env( argc, argv ); + + + real_t solidVolumeFraction = real_t(0.2); + + // LBM / numerical parameters + uint_t blockSize = uint_t(32); + real_t uSettling = real_t(0.1); // characteristic settling velocity + real_t diameter = real_t(10); + + real_t Ga = real_t(30); //Galileo number + uint_t numPeSubCycles = uint_t(10); + + uint_t vtkIOFreq = 0; + real_t timestepsNonDim = real_t(2.5); + uint_t numSamples = uint_t(2000); + std::string baseFolder = "workload_files"; // folder for vtk and file output + + bool useEllipsoids = false; + bool optimizeForSmallObstacleFraction = false; + bool noFileOutput = false; + bool fixBodies = false; + bool useEntireFieldTraversal = true; + bool averageForceTorqueOverTwoTimSteps = true; + bool useFusedStreamCollide = false; + + + for( int i = 1; i < argc; ++i ) + { + if( std::strcmp( argv[i], "--vtkIOFreq" ) == 0 ) { vtkIOFreq = uint_c( std::atof( argv[++i] ) ); continue; } + if( std::strcmp( argv[i], "--noFileOutput" ) == 0 ) { noFileOutput = true; continue; } + if( std::strcmp( argv[i], "--basefolder" ) == 0 ) { baseFolder = argv[++i]; continue; } + if( std::strcmp( argv[i], "--solidVolumeFraction" ) == 0 ) { solidVolumeFraction = real_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--diameter" ) == 0 ) { diameter = real_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--blockSize" ) == 0 ) { blockSize = uint_c(std::atof( argv[++i]) ); continue; } + if( std::strcmp( argv[i], "--uSettling" ) == 0 ) { uSettling = real_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--Ga" ) == 0 ) { Ga = real_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--timestepsNonDim" ) == 0 ) { timestepsNonDim = real_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--numPeSubCycles" ) == 0 ) { numPeSubCycles = uint_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--useEllipsoids" ) == 0 ) { useEllipsoids = true; continue; } + if( std::strcmp( argv[i], "--optSmallSVF" ) == 0 ) { optimizeForSmallObstacleFraction = true; continue; } + if( std::strcmp( argv[i], "--fixBodies" ) == 0 ) { fixBodies = true; continue; } + if( std::strcmp( argv[i], "--useEntireFieldTraversal" ) == 0 ) { useEntireFieldTraversal = true; continue; } + if( std::strcmp( argv[i], "--numSamples" ) == 0 ) { numSamples = uint_c(std::atof( argv[++i] )); continue; } + if( std::strcmp( argv[i], "--noForceAveraging" ) == 0 ) { averageForceTorqueOverTwoTimSteps = false; continue; } + if( std::strcmp( argv[i], "--useFusedStreamCollide" ) == 0 ) { useFusedStreamCollide = true; continue; } + WALBERLA_ABORT("Unrecognized command line argument found: " << argv[i]); + } + + WALBERLA_CHECK(diameter > real_t(1)); + WALBERLA_CHECK(uSettling > real_t(0)); + WALBERLA_CHECK(Ga > real_t(0)); + WALBERLA_CHECK(solidVolumeFraction > real_t(0)); + WALBERLA_CHECK(solidVolumeFraction < real_t(0.65)); + + /////////////////////////// + // SIMULATION PROPERTIES // + /////////////////////////// + + const uint_t XBlocks = uint_t(4); + const uint_t YBlocks = uint_t(4); + const uint_t ZBlocks = uint_t(5); + + if( MPIManager::instance()->numProcesses() != XBlocks * YBlocks * ZBlocks ) + { + WALBERLA_LOG_WARNING_ON_ROOT("WARNING! You have specified less or more processes than number of blocks -> the time measurements are no longer blockwise!") + } + + if( diameter > real_c(blockSize) ) + { + WALBERLA_LOG_WARNING_ON_ROOT("The bodies might be too large to work with the currently used synchronization!"); + } + + + WALBERLA_LOG_INFO_ON_ROOT("Using setup with sedimenting particles -> creating two planes and applying gravitational force") + if( useEllipsoids ){ WALBERLA_LOG_INFO_ON_ROOT("using ELLIPSOIDS"); } + else{ WALBERLA_LOG_INFO_ON_ROOT("using SPHERES"); } + + + const uint_t XCells = blockSize * XBlocks; + const uint_t YCells = blockSize * YBlocks; + const uint_t ZCells = blockSize * ZBlocks; + + const real_t topWallOffset = real_t(1.05) * real_t(blockSize); // move the top wall downwards to take away a certain portion of the overall domain + + + // determine number of spheres to generate, if necessary scale diameter a bit to reach desired solid volume fraction + real_t domainHeight = real_c(ZCells) - topWallOffset; + real_t fluidVolume = real_c( XCells * YCells ) * domainHeight; + real_t solidVolume = solidVolumeFraction * fluidVolume; + uint_t numberOfParticles = uint_c(std::ceil(solidVolume / ( math::M_PI / real_t(6) * diameter * diameter * diameter ))); + diameter = std::cbrt( solidVolume / ( real_c(numberOfParticles) * math::M_PI / real_t(6) ) ); + + real_t densityRatio = real_t(2.5); + + real_t viscosity = uSettling * diameter / Ga; + const real_t omega = lbm::collision_model::omegaFromViscosity(viscosity); + + const real_t gravitationalAcceleration = uSettling * uSettling / ( (densityRatio-real_t(1)) * diameter ); + + real_t tref = diameter / uSettling; + real_t Tref = domainHeight / uSettling; + + uint_t timesteps = uint_c(timestepsNonDim * Tref); + + const real_t dx = real_c(1); + WALBERLA_LOG_INFO_ON_ROOT("viscosity = " << viscosity); + WALBERLA_LOG_INFO_ON_ROOT("tau = " << real_t(1)/omega); + WALBERLA_LOG_INFO_ON_ROOT("diameter = " << diameter); + WALBERLA_LOG_INFO_ON_ROOT("solid volume fraction = " << solidVolumeFraction); + WALBERLA_LOG_INFO_ON_ROOT("domain size (in cells) = " << XCells << " x " << ZCells << " x " << ZCells); + WALBERLA_LOG_INFO_ON_ROOT("number of bodies = " << numberOfParticles); + WALBERLA_LOG_INFO_ON_ROOT("gravitational acceleration = " << gravitationalAcceleration); + WALBERLA_LOG_INFO_ON_ROOT("Ga = " << Ga); + WALBERLA_LOG_INFO_ON_ROOT("uSettling = " << uSettling); + WALBERLA_LOG_INFO_ON_ROOT("tref = " << tref); + WALBERLA_LOG_INFO_ON_ROOT("Tref = " << Tref); + WALBERLA_LOG_INFO_ON_ROOT("timesteps = " << timesteps); + WALBERLA_LOG_INFO_ON_ROOT("number of workload samples = " << numSamples); + + // create folder to store logging files + WALBERLA_ROOT_SECTION() + { + walberla::filesystem::path path1( baseFolder ); + if( !walberla::filesystem::exists( path1 ) ) + walberla::filesystem::create_directory( path1 ); + } + + + /////////////////////////// + // BLOCK STRUCTURE SETUP // + /////////////////////////// + + Vector3<bool> periodicity( true ); + periodicity[2] = false; + + // create domain + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid( XBlocks, YBlocks, ZBlocks, blockSize, blockSize, blockSize, dx, + 0, false, false, //one block per process! + periodicity[0], periodicity[1], periodicity[2], //periodicity + false ); + + //////// + // PE // + //////// + + shared_ptr<pe::BodyStorage> globalBodyStorage = make_shared<pe::BodyStorage>(); + pe::SetBodyTypeIDs<BodyTypeTuple>::execute(); + auto bodyStorageID = blocks->addBlockData(pe::createStorageDataHandling<BodyTypeTuple>(), "pe Body Storage"); + auto ccdID = blocks->addBlockData(pe::ccd::createHashGridsDataHandling( globalBodyStorage, bodyStorageID ), "CCD"); + BlockDataID fcdID = (useEllipsoids) ? blocks->addBlockData( pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::GJKEPACollideFunctor>(), "FCD" ) + : blocks->addBlockData(pe::fcd::createGenericFCDDataHandling<BodyTypeTuple, pe::fcd::AnalyticCollideFunctor>(), "FCD"); + + WcTimingTree timingTreePE; + + pe::cr::HCSITS cr(globalBodyStorage, blocks->getBlockStoragePointer(), bodyStorageID, ccdID, fcdID, &timingTreePE ); + cr.setMaxIterations(10); + cr.setRelaxationModel( pe::cr::HardContactSemiImplicitTimesteppingSolvers::ApproximateInelasticCoulombContactByDecoupling ); + cr.setErrorReductionParameter(real_t(0.8)); + + ///////////////// + // PE COUPLING // + ///////////////// + + // connect to pe + const real_t overlap = real_c( 1.5 ) * dx; + + std::function<void(void)> syncCall = std::bind( pe::syncNextNeighbors<BodyTypeTuple>, std::ref(blocks->getBlockForest()), bodyStorageID, &timingTreePE, overlap, false ); + + auto generationDomain = AABB( real_t(0), real_t(0), real_t(0), real_c(XCells), real_c(YCells), real_c(ZCells) - topWallOffset); + auto peMaterial = pe::createMaterial( "mat", densityRatio, real_t(1), real_t(0.25), real_t(0.25), real_t(0), real_t(200), real_t(100), real_t(100), real_t(100) ); + + // create two planes at bottom and top of domain + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,1), Vector3<real_t>(0,0,0), peMaterial ); + pe::createPlane( *globalBodyStorage, 0, Vector3<real_t>(0,0,-1), Vector3<real_t>(0,0,real_c(ZCells)-topWallOffset), peMaterial ); + + real_t xParticle = real_t(0); + real_t yParticle = real_t(0); + real_t zParticle = real_t(0); + + for( uint_t nPart = 0; nPart < numberOfParticles; ++nPart ) + { + + WALBERLA_ROOT_SECTION() + { + xParticle = math::realRandom<real_t>(generationDomain.xMin(), generationDomain.xMax()); + yParticle = math::realRandom<real_t>(generationDomain.yMin(), generationDomain.yMax()); + zParticle = math::realRandom<real_t>(generationDomain.zMin(), generationDomain.zMax()); + } + + WALBERLA_MPI_SECTION() + { + mpi::broadcastObject( xParticle ); + mpi::broadcastObject( yParticle ); + mpi::broadcastObject( zParticle ); + } + + if( useEllipsoids ) + { + // prolate ellipsoids + real_t axisFactor = real_t(1.5); + real_t axisFactor2 = std::sqrt(real_t(1)/axisFactor); + real_t radius = diameter * real_t(0.5); + pe::createEllipsoid( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), Vector3<real_t>(axisFactor*radius, axisFactor2*radius, axisFactor2*radius), peMaterial ); + + } else{ + pe::createSphere( *globalBodyStorage, blocks->getBlockStorage(), bodyStorageID, 0, Vector3<real_t>( xParticle, yParticle, zParticle ), diameter * real_t(0.5), peMaterial ); + } + + } + + syncCall(); + + // resolve possible overlaps of the particles due to the random initialization + + // 100 iterations of solver to resolve all major overlaps + { + for( uint_t pet = uint_t(1); pet <= uint_t(100); ++pet ) + { + cr.timestep( real_t(1) ); + syncCall(); + + // reset all velocities to zero + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt ) + { + bodyIt->setLinearVel(Vector3<real_t>(real_t(0))); + bodyIt->setAngularVel(Vector3<real_t>(real_t(0))); + } + } + } + } + + + // resolve remaining overlaps via particle simulation + { + const uint_t initialPeSteps = uint_t(2000); + const real_t dt_PE_init = real_t(1); + const real_t overlapLimit = real_t(0.001) * diameter; + + WALBERLA_LOG_INFO_ON_ROOT("Particle creation done --- resolving overlaps with goal all < " << overlapLimit / diameter * real_t(100) << "%"); + + CollisionPropertiesEvaluator collisionPropertiesEvaluator( cr ); + + ContactDistanceEvaluator contactDistanceEvaluator(blocks, ccdID, fcdID); + MaxVelocityEvaluator maxVelEvaluator(blocks, bodyStorageID); + + for( uint_t pet = uint_t(1); pet <= initialPeSteps; ++pet ) + { + cr.timestep( dt_PE_init ); + syncCall(); + real_t maxPen = collisionPropertiesEvaluator.get(); + + if( maxPen < overlapLimit ) + { + WALBERLA_LOG_INFO_ON_ROOT("Carried out " << pet << " PE-only time steps to resolve initial overlaps"); + WALBERLA_LOG_INFO_ON_ROOT("Final max penetration from cr is " << maxPen << " = " << maxPen / diameter * real_t(100) << "%"); + + break; + } + + real_t maxMagnitude = maxVelEvaluator.getMagnitude(); + + if( maxMagnitude * dt_PE_init > overlapLimit) + { + // avoid too large response velocities by setting them to zero + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt ) + { + bodyIt->setLinearVel(Vector3<real_t>(real_t(0))); + bodyIt->setAngularVel(Vector3<real_t>(real_t(0))); + } + } + } + else + { + cr.setErrorReductionParameter(real_t(0.8)); + } + + if( pet % uint_t(20) == uint_t(0) ) + { + WALBERLA_LOG_INFO_ON_ROOT(pet << " - current max overlap = " << maxPen / diameter * real_t(100) << "%, max vel magnitude = " << maxMagnitude ); + } + } + } + + // reset all velocities to zero + Vector3<real_t> initialBodyVelocity(real_t(0)); + + WALBERLA_LOG_INFO_ON_ROOT("Setting initial velocity " << initialBodyVelocity << " of all bodies"); + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + for( auto bodyIt = pe::BodyIterator::begin( *blockIt, bodyStorageID); bodyIt != pe::BodyIterator::end(); ++bodyIt ) + { + bodyIt->setLinearVel(initialBodyVelocity); + bodyIt->setAngularVel(Vector3<real_t>(real_t(0))); + } + } + + /////////////////////// + // ADD DATA TO BLOCKS // + //////////////////////// + + // create the lattice model + LatticeModel_T latticeModel = LatticeModel_T( lbm::collision_model::TRT::constructWithMagicNumber( omega ) ); + + // add PDF field + BlockDataID pdfFieldID = lbm::addPdfFieldToStorage< LatticeModel_T >( blocks, "pdf field (zyxf)", latticeModel, + Vector3< real_t >( real_t(0) ), real_t(1), + uint_t(1), field::zyxf ); + + // add flag field + BlockDataID flagFieldID = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field" ); + + // add body field + BlockDataID bodyFieldID = field::addToStorage<BodyField_T>( blocks, "body field", NULL, field::zyxf ); + + // add boundary handling & initialize outer domain boundaries + BlockDataID boundaryHandlingID = blocks->addStructuredBlockData< BoundaryHandling_T >( + MyBoundaryHandling( flagFieldID, pdfFieldID, bodyFieldID, useEntireFieldTraversal ), "boundary handling" ); + + + // initially map pe bodies into the LBM simulation + pe_coupling::mapMovingBodies< BoundaryHandling_T >( *blocks, boundaryHandlingID, bodyStorageID, *globalBodyStorage, bodyFieldID, MO_CLI_Flag ); + + lbm::BlockForestEvaluation<FlagField_T> bfEval(blocks, flagFieldID, Fluid_Flag); + + WALBERLA_LOG_INFO_ON_ROOT(bfEval.loggingString()); + + /////////////// + // TIME LOOP // + /////////////// + + // create the timeloop + SweepTimeloop timeloop( blocks->getBlockStorage(), timesteps ); + + if( vtkIOFreq != uint_t(0) ) + { + // pe bodies + if(useEllipsoids) + { + auto bodyVtkOutput = make_shared<pe::EllipsoidVtkOutput>( bodyStorageID, blocks->getBlockStorage() ); + auto bodyVTK = vtk::createVTKOutput_PointData( bodyVtkOutput, "bodies", vtkIOFreq, baseFolder ); + timeloop.addFuncBeforeTimeStep( vtk::writeFiles( bodyVTK ), "VTK (body data)" ); + + }else + { + auto bodyVtkOutput = make_shared<pe::SphereVtkOutput>( bodyStorageID, blocks->getBlockStorage() ); + auto bodyVTK = vtk::createVTKOutput_PointData( bodyVtkOutput, "bodies", vtkIOFreq, baseFolder ); + timeloop.addFuncBeforeTimeStep( vtk::writeFiles( bodyVTK ), "VTK (body data)" ); + } + + + // flag field + auto flagFieldVTK = vtk::createVTKOutput_BlockData( blocks, "flag_field", vtkIOFreq, 1, false, baseFolder ); + flagFieldVTK->addCellDataWriter( make_shared< field::VTKWriter< FlagField_T > >( flagFieldID, "FlagField" ) ); + timeloop.addFuncAfterTimeStep( vtk::writeFiles( flagFieldVTK ), "VTK (flag field data)" ); + + // pdf field + auto pdfFieldVTK = vtk::createVTKOutput_BlockData( blocks, "fluid_field", vtkIOFreq, 0, false, baseFolder ); + + field::FlagFieldCellFilter< FlagField_T > fluidFilter( flagFieldID ); + fluidFilter.addFlag( Fluid_Flag ); + pdfFieldVTK->addCellInclusionFilter( fluidFilter ); + + pdfFieldVTK->addCellDataWriter( make_shared< lbm::VelocityVTKWriter< LatticeModel_T, float > >( pdfFieldID, "VelocityFromPDF" ) ); + pdfFieldVTK->addCellDataWriter( make_shared< lbm::DensityVTKWriter < LatticeModel_T, float > >( pdfFieldID, "DensityFromPDF" ) ); + + timeloop.addFuncBeforeTimeStep( vtk::writeFiles( pdfFieldVTK ), "VTK (fluid field data)" ); + + auto domainDecompVTK = vtk::createVTKOutput_DomainDecomposition(blocks, "domain_decomposition", vtkIOFreq, baseFolder ); + timeloop.addFuncBeforeTimeStep( vtk::writeFiles(domainDecompVTK), "VTK (domain decomposition)"); + } + + // sweep for updating the pe body mapping into the LBM simulation + timeloop.add() + << Sweep( pe_coupling::BodyMapping< BoundaryHandling_T >( blocks, boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, MO_CLI_Flag, FormerMO_Flag, pe_coupling::selectRegularBodies ), "Body Mapping" ); + + // sweep for restoring PDFs in cells previously occupied by pe bodies + typedef pe_coupling::EquilibriumReconstructor< LatticeModel_T, BoundaryHandling_T > Reconstructor_T; + Reconstructor_T reconstructor( blocks, boundaryHandlingID, pdfFieldID, bodyFieldID); + timeloop.add() + << Sweep( pe_coupling::PDFReconstruction< LatticeModel_T, BoundaryHandling_T, Reconstructor_T > + ( blocks, boundaryHandlingID, bodyStorageID, globalBodyStorage, bodyFieldID, reconstructor, FormerMO_Flag, Fluid_Flag, pe_coupling::selectRegularBodies, optimizeForSmallObstacleFraction ), "PDF Restore" ); + + shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer1 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID); + std::function<void(void)> storeForceTorqueInCont1 = std::bind(&pe_coupling::BodiesForceTorqueContainer::store, bodiesFTContainer1); + shared_ptr<pe_coupling::BodiesForceTorqueContainer> bodiesFTContainer2 = make_shared<pe_coupling::BodiesForceTorqueContainer>(blocks, bodyStorageID); + std::function<void(void)> setForceTorqueOnBodiesFromCont2 = std::bind(&pe_coupling::BodiesForceTorqueContainer::setOnBodies, bodiesFTContainer2); + shared_ptr<pe_coupling::ForceTorqueOnBodiesScaler> forceScaler = make_shared<pe_coupling::ForceTorqueOnBodiesScaler>(blocks, bodyStorageID, real_t(1)); + std::function<void(void)> setForceScalingFactorToHalf = std::bind(&pe_coupling::ForceTorqueOnBodiesScaler::resetScalingFactor,forceScaler,real_t(0.5)); + + if( averageForceTorqueOverTwoTimSteps ) { + bodiesFTContainer2->store(); + } + + + // setup of the LBM communication for synchronizing the pdf field between neighboring blocks + std::function< void () > commFunction; + + blockforest::communication::UniformBufferedScheme< stencil::D3Q27 > scheme( blocks ); + scheme.addPackInfo( make_shared< field::communication::PackInfo< PdfField_T > >( pdfFieldID ) ); + commFunction = scheme; + + auto sweep = lbm::makeCellwiseSweep< LatticeModel_T, FlagField_T >( pdfFieldID, flagFieldID, Fluid_Flag ); + + if( !useFusedStreamCollide ) + { + // streaming & collide + timeloop.add() << Sweep( makeCollideSweep(sweep), "Collide" ); + } + + // add LBM communication function and boundary handling sweep (does the hydro force calculations and the no-slip treatment) + timeloop.add() << BeforeFunction( commFunction, "LBM Communication" ) + << Sweep( BoundaryHandling_T::getBlockSweep( boundaryHandlingID ), "Boundary Handling" ); + + if( useFusedStreamCollide ) + { + // streaming & collide + timeloop.add() << Sweep( makeSharedSweep(sweep), "Stream&Collide" ); + } else + { + // streaming & collide + timeloop.add() << Sweep( makeStreamSweep(sweep), "Stream" ); + + } + + // Averaging the force/torque over two time steps is said to damp oscillations of the interaction force/torque. + // See Ladd - " Numerical simulations of particulate suspensions via a discretized Boltzmann equation. Part 1. Theoretical foundation", 1994, p. 302 + if( averageForceTorqueOverTwoTimSteps ) { + + // store force/torque from hydrodynamic interactions in container1 + timeloop.addFuncAfterTimeStep(storeForceTorqueInCont1, "Force Storing"); + + // set force/torque from previous time step (in container2) + timeloop.addFuncAfterTimeStep(setForceTorqueOnBodiesFromCont2, "Force setting"); + + // average the force/torque by scaling it with factor 1/2 (except in first timestep, there it is 1, which it is initially) + timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesScaler(blocks, bodyStorageID, real_t(0.5)), "Force averaging"); + timeloop.addFuncAfterTimeStep( setForceScalingFactorToHalf, "Force scaling adjustment" ); + + // swap containers + timeloop.addFuncAfterTimeStep( pe_coupling::BodyContainerSwapper( bodiesFTContainer1, bodiesFTContainer2 ), "Swap FT container" ); + + } + + real_t sphereVolume = diameter * diameter * diameter * math::M_PI / real_t(6); + Vector3<real_t> gravitationalForce( real_t(0), real_t(0), -(densityRatio - real_t(1)) * gravitationalAcceleration * sphereVolume ); + timeloop.addFuncAfterTimeStep(pe_coupling::ForceOnBodiesAdder( blocks, bodyStorageID, gravitationalForce ), "Gravitational force" ); + + if( fixBodies ) { + // reset all forces + timeloop.addFuncAfterTimeStep( pe_coupling::ForceTorqueOnBodiesResetter(blocks, bodyStorageID), "Force Resetting"); + } else{ + // add pe timesteps + timeloop.addFuncAfterTimeStep( pe_coupling::TimeStep( blocks, bodyStorageID, cr, syncCall, real_t(1), numPeSubCycles ), "pe Time Step" ); + } + + timeloop.addFuncAfterTimeStep( RemainingTimeLogger( timeloop.getNrOfTimeSteps() ), "Remaining Time Logger" ); + + + + //////////////////////// + // EXECUTE SIMULATION // + //////////////////////// + + WcTimingPool timeloopTiming; + + std::vector< std::vector<std::string> > timerKeys; + std::vector<std::string> LBMTimer; + LBMTimer.push_back("Stream&Collide"); + LBMTimer.push_back("Stream"); + LBMTimer.push_back("Collide"); + timerKeys.push_back(LBMTimer); + + std::vector<std::string> bhTimer; + bhTimer.push_back("Boundary Handling"); + timerKeys.push_back(bhTimer); + + std::vector<std::string> couplingTimer1; + couplingTimer1.push_back("Body Mapping"); + std::vector<std::string> couplingTimer2; + couplingTimer2.push_back("PDF Restore"); + timerKeys.push_back(couplingTimer1); + timerKeys.push_back(couplingTimer2); + + std::vector<std::string> peTimer; + peTimer.push_back("Simulation Step.Collision Detection"); + peTimer.push_back("Simulation Step.Collision Response Integration"); + peTimer.push_back("Simulation Step.Collision Response Resolution.Collision Response Solving"); + timerKeys.push_back(peTimer); + + uint_t numCells, numFluidCells, numNBCells, numLocalParticles, numShadowParticles, numContacts; + numCells = uint_t(0); + numFluidCells = uint_t(0); + numNBCells = uint_t(0); + numLocalParticles = uint_t(0); + numShadowParticles = uint_t(0); + numContacts = uint_t(0); + + std::vector<real_t> timings(timerKeys.size()); + + resetTimers(timeloopTiming, timingTreePE); + + // every rank writes its own file -> numProcesses number of samples! + int myRank = MPIManager::instance()->rank(); + + std::string logFileName = baseFolder + "/load"; + logFileName += "_settling"; + if( useEllipsoids) + { + logFileName += "_ellipsoids"; + } + else + { + logFileName += "_spheres"; + } + logFileName += "_d" + std::to_string(int_c(std::ceil(diameter))); + logFileName += "_bs" + std::to_string(blockSize); + logFileName += "_" + std::to_string(myRank) + ".txt"; + + + std::ofstream file; + + if(!noFileOutput) + { + WALBERLA_LOG_INFO_ON_ROOT("Writing load info to file " << logFileName); + file.open( logFileName.c_str(), std::ofstream::app ); + file << "# svf = " << solidVolumeFraction << ", d = " << diameter << ", domain = " << XCells << "x" << YCells << "x" << ZCells << "\n"; + } + + + uint_t timeStepOfFirstTiming = uint_t(50); + + if( timesteps - timeStepOfFirstTiming < numSamples ) + { + WALBERLA_LOG_WARNING_ON_ROOT("Less actual time steps than number of required samples!"); + } + + uint_t nSample( 0 ); // number of current sample + real_t samplingFrequency = real_c(timesteps - timeStepOfFirstTiming) / real_c(numSamples); + + // time loop + for (uint_t i = 1; i <= timesteps; ++i ) + { + // perform a single simulation step + timeloop.singleStep( timeloopTiming ); + + // check if current time step should be included in sample + if( i >= uint_c( samplingFrequency * real_c(nSample) ) + timeStepOfFirstTiming ) + { + // include -> evaluate all timers and quantities + + evaluateFluidQuantities(blocks, boundaryHandlingID, numCells, numFluidCells, numNBCells); + evaluatePEQuantities(blocks, bodyStorageID, ccdID, fcdID, numLocalParticles, numShadowParticles, numContacts); + + evaluateTimers(timeloopTiming, timingTreePE, timerKeys, timings); + + if(!noFileOutput) + { + real_t totalTime = std::accumulate(timings.begin(), timings.end(), real_t(0) ); + + file << timeloop.getCurrentTimeStep() << " " << real_c(timeloop.getCurrentTimeStep()) / Tref << " " + << numCells << " " << numFluidCells << " " << numNBCells << " " + << numLocalParticles << " " << numShadowParticles << " " << numContacts << " " << numPeSubCycles; + for( auto timeIt = timings.begin(); timeIt != timings.end(); ++timeIt ) + { + file << " " << *timeIt; + } + file << " " << totalTime << "\n"; + } + + numCells = uint_t(0); + numFluidCells = uint_t(0); + numNBCells = uint_t(0); + numLocalParticles = uint_t(0); + numShadowParticles = uint_t(0); + numContacts = uint_t(0); + + ++nSample; + } + + // reset timers to always include only a single time step in them + resetTimers(timeloopTiming, timingTreePE); + } + + if(!noFileOutput) { + file.close(); + } + + //timeloopTiming.logResultOnRoot(); + + WALBERLA_LOG_INFO_ON_ROOT("Simulation finished!"); + + return 0; + +} + +} //namespace workload_evaluation + +int main( int argc, char **argv ){ + workload_evaluation::main(argc, argv); +} diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt index 31e6417ae..4fa0c4cd0 100644 --- a/apps/benchmarks/CMakeLists.txt +++ b/apps/benchmarks/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory( AdaptiveMeshRefinementFluidParticleCoupling ) add_subdirectory( ComplexGeometry ) add_subdirectory( DEM ) add_subdirectory( MeshDistance ) -- GitLab