Commit b95ff457 authored by Martin Bauer's avatar Martin Bauer
Browse files

Benchmark application for CPU ghost layer communication

- benchmarks ghost layer field communication
- direct vs. buffered, various field & block sizes
- multiple blocks per process
- extensive database output
parent 13ec7ae2
add_subdirectory( AdaptiveMeshRefinementFluidParticleCoupling )
add_subdirectory( ComplexGeometry )
add_subdirectory( DEM )
add_subdirectory( FieldCommunication )
add_subdirectory( MeshDistance )
add_subdirectory( CouetteFlow )
add_subdirectory( ForcesOnSphereNearPlaneInShearFlow )
......
waLBerla_link_files_to_builddir( "*.dat" )
waLBerla_link_files_to_builddir( "*.py" )
waLBerla_add_executable ( NAME FieldCommunication
DEPENDS blockforest core domain_decomposition field postprocessing )
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "core/mpi/MPIManager.h"
#include "core/Environment.h"
#include "core/OpenMP.h"
#include "core/mpi/Broadcast.h"
#include "core/math/IntegerFactorization.h"
#include "core/timing/TimingPool.h"
#include "core/waLBerlaBuildInfo.h"
#include "field/communication/StencilRestrictedMPIDatatypeInfo.h"
#include "field/AddToStorage.h"
#include "field/GhostLayerField.h"
#include "field/communication/PackInfo.h"
#include "field/communication/StencilRestrictedPackInfo.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "postprocessing/sqlite/SQLite.h"
#include "python_coupling/CreateConfig.h"
#include "stencil/D3Q7.h"
#include "stencil/D3Q19.h"
#include "stencil/D3Q27.h"
#include <functional>
using namespace walberla;
using blockforest::communication::UniformDirectScheme;
using blockforest::communication::UniformBufferedScheme;
using field::communication::UniformMPIDatatypeInfo;
using field::communication::PackInfo;
using field::communication::StencilRestrictedPackInfo;
using field::communication::StencilRestrictedMPIDatatypeInfo;
template<typename Stencil_T>
class SingleMessageBufferedScheme
{
public:
typedef Stencil_T Stencil;
SingleMessageBufferedScheme( weak_ptr_wrapper< StructuredBlockForest > bf, const int tag = 17953 )
: blockForest_( bf ), tag_( tag ) {}
inline void addDataToCommunicate( const shared_ptr< communication::UniformPackInfo > &packInfo )
{
tag_ += 1;
auto newScheme = make_shared< UniformBufferedScheme< Stencil > >( blockForest_, tag_++ );
newScheme->addDataToCommunicate( packInfo );
schemes_.push_back( newScheme );
}
inline void setLocalMode( const blockforest::LocalCommunicationMode &mode )
{
for ( auto &s : schemes_ )
s->setLocalMOde( mode );
}
inline void startCommunication()
{
for ( auto &s : schemes_ )
s->startCommunication();
}
inline void wait()
{
for ( auto &s : schemes_ )
s->wait();
}
private:
std::vector< shared_ptr< UniformBufferedScheme< Stencil>> > schemes_;
weak_ptr_wrapper< StructuredBlockForest > blockForest_;
int tag_;
};
template<typename FieldType, typename Stencil>
void addDataToCommunicate( const shared_ptr< UniformDirectScheme< Stencil > > &scheme, BlockDataID id, uint_t ghostLayers )
{
scheme->addDataToCommunicate( make_shared< UniformMPIDatatypeInfo< FieldType > >( id, ghostLayers ));
}
template<typename FieldType, typename Scheme_T>
void addDataToCommunicate( const shared_ptr< Scheme_T > &scheme, BlockDataID id, uint_t ghostLayers )
{
scheme->addDataToCommunicate( make_shared< PackInfo< FieldType > >( id, ghostLayers ));
}
template<typename FieldType, typename Scheme_T>
void addDataToCommunicate( const shared_ptr< Scheme_T > &scheme, BlockDataID id, uint_t ghostLayers, bool )
{
if ( ghostLayers != 1 )
scheme->addDataToCommunicate( make_shared< PackInfo< FieldType > >( id, ghostLayers ));
else
scheme->addDataToCommunicate( make_shared< StencilRestrictedPackInfo< FieldType, typename Scheme_T::Stencil > >( id ));
}
template<typename FieldType, typename Stencil_T>
void addDataToCommunicate( const shared_ptr< UniformDirectScheme< Stencil_T > > &scheme, BlockDataID id, uint_t ghostLayers, bool )
{
if ( ghostLayers != 1 )
scheme->addDataToCommunicate( make_shared< UniformMPIDatatypeInfo< FieldType > >( id, ghostLayers ));
else
{
scheme->addDataToCommunicate( make_shared< StencilRestrictedMPIDatatypeInfo< FieldType, Stencil_T > >( id ));
}
}
template<typename Scheme1, typename Scheme2>
void addData( const shared_ptr< StructuredBlockForest > &blocks, const config::Config::BlockHandle &configBlock,
const shared_ptr< Scheme1 > &scheme1, const shared_ptr< Scheme2 > &scheme2,
uint_t ghostLayers, field::Layout layout )
{
auto numPdfFields = configBlock.getParameter< uint_t >( "pdf" );
for ( uint_t i = 0; i < numPdfFields; ++i )
{
typedef field::GhostLayerField< real_t, Scheme1::Stencil::Q > Field_T;
BlockDataID bdId = field::addToStorage< Field_T >( blocks, "pdf", 0.0, layout, ghostLayers );
addDataToCommunicate< Field_T >( scheme1, bdId, ghostLayers );
addDataToCommunicate< Field_T >( scheme2, bdId, ghostLayers );
}
auto numPdfOptFields = configBlock.getParameter< uint_t >( "pdfOpt" );
for ( uint_t i = 0; i < numPdfOptFields; ++i )
{
typedef field::GhostLayerField< real_t, Scheme1::Stencil::Q > Field_T;
BlockDataID bdId = field::addToStorage< Field_T >( blocks, "pdfopt", 0.0, layout, ghostLayers );
addDataToCommunicate< Field_T >( scheme1, bdId, ghostLayers, true );
addDataToCommunicate< Field_T >( scheme2, bdId, ghostLayers, true );
}
auto numVectorFields = configBlock.getParameter< uint_t >( "vector" );
for ( uint_t i = 0; i < numVectorFields; ++i )
{
typedef field::GhostLayerField< real_t, 3 > Field_T;
BlockDataID bdId = field::addToStorage< Field_T >( blocks, "vector", 0.0, layout, ghostLayers );
addDataToCommunicate< Field_T >( scheme1, bdId, ghostLayers );
addDataToCommunicate< Field_T >( scheme2, bdId, ghostLayers );
}
auto numScalarFields = configBlock.getParameter< uint_t >( "scalar" );
for ( uint_t i = 0; i < numScalarFields; ++i )
{
typedef field::GhostLayerField< real_t, 1 > Field_T;
BlockDataID bdId = field::addToStorage< Field_T >( blocks, "scalar", 0.0, layout, ghostLayers );
addDataToCommunicate< Field_T >( scheme1, bdId, ghostLayers );
addDataToCommunicate< Field_T >( scheme2, bdId, ghostLayers );
}
}
template<typename Stencil>
void createCommunication( const shared_ptr< StructuredBlockForest > &blocks,
bool buffered, const config::Config::BlockHandle &fieldCfg, uint_t ghostLayers, field::Layout layout,
blockforest::LocalCommunicationMode localCommunicationMode, bool singleMessage,
std::function< void() > &commStart, std::function< void() > &commWait )
{
auto directScheme = make_shared< UniformDirectScheme< Stencil > >( blocks, shared_ptr< communication::UniformMPIDatatypeInfo >(), 42 );
auto bufferedScheme = make_shared< UniformBufferedScheme< Stencil > >( blocks, 4242 );
auto bufferedSchemeSingle = make_shared< SingleMessageBufferedScheme< Stencil > >( blocks , 24242);
bufferedScheme->setLocalMode( localCommunicationMode );
if ( buffered )
{
if ( !singleMessage )
{
addData( blocks, fieldCfg, directScheme, bufferedScheme, ghostLayers, layout );
commStart = [=]() { bufferedScheme->startCommunication(); };
commWait = [=]() { bufferedScheme->wait(); };
}
else
{
addData( blocks, fieldCfg, directScheme, bufferedSchemeSingle, ghostLayers, layout );
commStart = [=]() { bufferedSchemeSingle->startCommunication(); };
commWait = [=]() { bufferedSchemeSingle->wait(); };
}
}
else
{
addData( blocks, fieldCfg, directScheme, bufferedScheme, ghostLayers, layout );
commStart = [=]() { directScheme->startCommunication(); };
commWait = [=]() { directScheme->wait(); };
}
}
std::string fromEnv( const char *envVar )
{
auto env = std::getenv( envVar );
return env != nullptr ? std::string( env ) : "";
}
int main( int argc, char **argv )
{
mpi::Environment env( argc, argv );
int scenarioNr = 0;
auto mpiManager = mpi::MPIManager::instance();
for ( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
{
if ( mpiManager->isMPIInitialized())
mpiManager->resetMPI();
auto config = *cfg;
auto commCfg = config->getOneBlock( "Communication" );
auto domainCfg = config->getOneBlock( "Domain" );
bool cartesianCommunicator = commCfg.getParameter< bool >( "cartesianCommunicator", true );
if ( !cartesianCommunicator )
mpiManager->useWorldComm();
scenarioNr += 1;
WALBERLA_LOG_INFO_ON_ROOT( "Simulating scenario " << scenarioNr );
WALBERLA_LOG_INFO_ON_ROOT( *config );
// ---- Domain Setup ----
const Vector3< uint_t > cellsPerBlock = domainCfg.getParameter< Vector3< uint_t > >( "cellsPerBlock" );
const Vector3< real_t > domainWeights = domainCfg.getParameter< Vector3< real_t > >( "domainWeights", Vector3< real_t >( 1.0, 1.0, 1.0 ));
uint_t blocksPerProcess = domainCfg.getParameter< uint_t >( "blocksPerProcess", 1 );
auto numProcesses = mpiManager->numProcesses();
auto processes = math::getFactors3D( uint_c( numProcesses ), domainWeights );
auto blockDecomposition = math::getFactors3D( uint_c( numProcesses ) * blocksPerProcess, domainWeights );
auto aabb = AABB( real_t( 0 ), real_t( 0 ), real_t( 0 ),
real_c( cellsPerBlock[0] * processes[0] * blocksPerProcess ),
real_c( cellsPerBlock[1] * processes[1] * blocksPerProcess ),
real_c( cellsPerBlock[2] * processes[2] * blocksPerProcess ));
auto blocks = blockforest::createUniformBlockGrid( aabb,
blockDecomposition[0], blockDecomposition[1], blockDecomposition[2],
cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2],
processes[0], processes[1], processes[2],
true, true, true, //periodicity
false // keepGlobalBlockInformation
);
// ---- Communication Setup ----
auto fieldCfg = commCfg.getOneBlock( "Fields" );
const bool buffered = commCfg.getParameter< bool >( "buffered", true );
const std::string stencil = commCfg.getParameter< std::string >( "stencil", "D3Q19" );
const uint_t ghostLayers = commCfg.getParameter< uint_t >( "ghostLayers", 1 );
const std::string layoutStr = commCfg.getParameter< std::string >( "layout", "fzyx" );
const std::string localCommModeStr = commCfg.getParameter< std::string >( "localCommunicationMode", "start" );
const bool singleMessage = commCfg.getParameter< bool >( "singleMessage", false );
blockforest::LocalCommunicationMode localCommunicationMode;
if ( localCommModeStr == "start" )
localCommunicationMode = blockforest::START;
else if ( localCommModeStr == "wait" )
localCommunicationMode = blockforest::WAIT;
else if ( localCommModeStr == "buffer" )
localCommunicationMode = blockforest::BUFFER;
else if ( localCommModeStr == "noOptimization" )
localCommunicationMode = blockforest::NO_OPTIMIZATION;
else
{
WALBERLA_ABORT_NO_DEBUG_INFO( "Unknown localCommunicationMode " << layoutStr << ". Valid values are start, wait, buffer and noOptimization" )
}
field::Layout layout;
if ( layoutStr == "fzyx" )
layout = field::fzyx;
else if ( layoutStr == "zyxf" )
layout = field::zyxf;
else
{
WALBERLA_ABORT_NO_DEBUG_INFO( "Unknown layout string " << layoutStr << ". Valid values are fzyx and zyxf." )
}
std::function< void() > commStart;
std::function< void() > commWait;
if ( stencil == "D3Q19" )
createCommunication< stencil::D3Q19 >( blocks, buffered, fieldCfg, ghostLayers, layout, localCommunicationMode, singleMessage, commStart,
commWait );
else if ( stencil == "D3Q27" )
createCommunication< stencil::D3Q27 >( blocks, buffered, fieldCfg, ghostLayers, layout, localCommunicationMode, singleMessage, commStart,
commWait );
else if ( stencil == "D3Q7" )
createCommunication< stencil::D3Q7 >( blocks, buffered, fieldCfg, ghostLayers, layout, localCommunicationMode, singleMessage, commStart, commWait );
else
{
WALBERLA_ABORT_NO_DEBUG_INFO( "Unknown stencil " << stencil << ". Has to be one of D3Q7, D3Q19, D3Q27." )
}
// ---- Timing ----
auto runCfg = config->getOneBlock( "Run" );
const uint_t warmupIterations = runCfg.getParameter< uint_t >( "warmupIterations", 2 );
uint_t iterations = runCfg.getParameter< uint_t >( "iterations", 10 );
const uint_t minIterations = runCfg.getParameter< uint_t >( "minIterations", 2 );
const uint_t maxIterations = runCfg.getParameter< uint_t >( "maxIterations", 100 );
const real_t timeForBenchmark = runCfg.getParameter< real_t >( "timeForBenchmark", real_t(-1.0) );
const uint_t outerIterations = runCfg.getParameter< uint_t >( "outerIterations", 2 );
const std::string databaseFile = runCfg.getParameter< std::string >( "databaseFile", "FieldCommunication.sqlite" );
commStart();
commWait();
WcTimer warmupTimer;
warmupTimer.start();
for ( uint_t warmupCounter = 0; warmupCounter < warmupIterations; ++warmupCounter )
{
commStart();
commWait();
}
warmupTimer.end();
auto estimatedTimePerIteration = warmupTimer.last() / real_c(warmupIterations);
if( timeForBenchmark > 0 ) {
iterations = uint_c( timeForBenchmark / estimatedTimePerIteration );
if( iterations < minIterations )
iterations = minIterations;
if( iterations > maxIterations)
iterations = maxIterations;
}
mpi::broadcastObject(iterations);
WcTimingPool timingPool;
WALBERLA_MPI_BARRIER();
WALBERLA_LOG_INFO_ON_ROOT("Running " << outerIterations << " outer iterations of size " << iterations );
for ( uint_t outerCtr = 0; outerCtr < outerIterations; ++outerCtr )
{
timingPool["totalTime"].start();
for ( uint_t ctr = 0; ctr < iterations; ++ctr )
{
timingPool["commStart"].start();
commStart();
timingPool["commStart"].end();
timingPool["commWait"].start();
commWait();
timingPool["commWait"].end();
}
timingPool["totalTime"].end();
}
auto numThreads = omp_get_max_threads();
auto reducedTimingPool = timingPool.getReduced( timing::REDUCE_TOTAL, 0 );
WALBERLA_ROOT_SECTION()
{
WALBERLA_LOG_RESULT( *reducedTimingPool );
std::map< std::string, walberla::int64_t > integerProperties;
std::map< std::string, double > realProperties;
std::map< std::string, std::string > stringProperties;
auto databaseBlock = config->getBlock( "Database" );
if ( databaseBlock )
{
for ( auto it = databaseBlock.begin(); it != databaseBlock.end(); ++it )
stringProperties[it->first] = it->second;
}
realProperties["total_min"] = real_c( timingPool["totalTime"].min()) / real_c( iterations );
realProperties["total_avg"] = real_c( timingPool["totalTime"].average() / real_c( iterations ));
realProperties["total_max"] = real_c( timingPool["totalTime"].max() / real_c( iterations ));
integerProperties["cellsPerBlock0"] = int64_c( cellsPerBlock[0] );
integerProperties["cellsPerBlock1"] = int64_c( cellsPerBlock[1] );
integerProperties["cellsPerBlock2"] = int64_c( cellsPerBlock[2] );
integerProperties["processes0"] = int64_c( processes[0] );
integerProperties["processes1"] = int64_c( processes[1] );
integerProperties["processes2"] = int64_c( processes[2] );
integerProperties["blocks0"] = int64_c( blockDecomposition[0] );
integerProperties["blocks1"] = int64_c( blockDecomposition[1] );
integerProperties["blocks2"] = int64_c( blockDecomposition[2] );
integerProperties["blocksPerProcess"] = int64_c( blocksPerProcess );
integerProperties["ghostLayers"] = int64_c( ghostLayers );
integerProperties["fieldsPdf"] = fieldCfg.getParameter< int64_t >( "pdf" );
integerProperties["fieldsPdfOpt"] = fieldCfg.getParameter< int64_t >( "pdfOpt" );
integerProperties["fieldsVector"] = fieldCfg.getParameter< int64_t >( "vector" );
integerProperties["fieldsScalar"] = fieldCfg.getParameter< int64_t >( "scalar" );
integerProperties["numThreads"] = int64_c( numThreads );
integerProperties["cartesianCommunicator"] = mpiManager->hasCartesianSetup();
integerProperties["warmupIterations"] = int64_c( warmupIterations );
integerProperties["iterations"] = int64_c( iterations );
integerProperties["outerIterations"] = int64_c( outerIterations );
integerProperties["buffered"] = int64_c( buffered );
integerProperties["singleMessage"] = int64_c( singleMessage );
stringProperties["stencil"] = stencil;
stringProperties["layout"] = layoutStr;
stringProperties["localCommunicationMode"] = localCommModeStr;
stringProperties["SLURM_CLUSTER_NAME"] = fromEnv( "SLURM_CLUSTER_NAME" );
stringProperties["SLURM_CPUS_ON_NODE"] = fromEnv( "SLURM_CPUS_ON_NODE" );
stringProperties["SLURM_CPUS_PER_TASK"] = fromEnv( "SLURM_CPUS_PER_TASK" );
stringProperties["SLURM_JOB_ACCOUNT"] = fromEnv( "SLURM_JOB_ACCOUNT" );
stringProperties["SLURM_JOB_ID"] = fromEnv( "SLURM_JOB_ID" );
stringProperties["SLURM_JOB_CPUS_PER_NODE"] = fromEnv( "SLURM_JOB_CPUS_PER_NODE" );
stringProperties["SLURM_JOB_NAME"] = fromEnv( "SLURM_JOB_NAME" );
stringProperties["SLURM_JOB_NUM_NODES"] = fromEnv( "SLURM_JOB_NUM_NODES" );
stringProperties["SLURM_NTASKS"] = fromEnv( "SLURM_NTASKS" );
stringProperties["SLURM_NTASKS_PER_CORE"] = fromEnv( "SLURM_NTASKS_PER_CORE" );
stringProperties["SLURM_NTASKS_PER_NODE"] = fromEnv( "SLURM_NTASKS_PER_NODE" );
stringProperties["SLURM_NTASKS_PER_SOCKET"] = fromEnv( "SLURM_NTASKS_PER_SOCKET" );
stringProperties["SLURM_TASKS_PER_NODE"] = fromEnv( "SLURM_TASKS_PER_NODE" );
stringProperties["buildMachine"] = std::string( WALBERLA_BUILD_MACHINE );
stringProperties["gitVersion"] = std::string( WALBERLA_GIT_SHA1 );
stringProperties["buildType"] = std::string( WALBERLA_BUILD_TYPE );
stringProperties["compilerFlags"] = std::string( WALBERLA_COMPILER_FLAGS );
auto runId = postprocessing::storeRunInSqliteDB( databaseFile, integerProperties, stringProperties, realProperties );
postprocessing::storeTimingPoolInSqliteDB( databaseFile, runId, timingPool, "TimingRoot" );
postprocessing::storeTimingPoolInSqliteDB( databaseFile, runId, *reducedTimingPool, "TimingReduced" );
}
}
return 0;
}
\ No newline at end of file
Domain
{
cellsPerBlock < 60, 60, 60 >;
domainWeights < 1, 1, 1 >;
blocksPerProcess 1;
}
Communication
{
stencil D3Q19;
ghostLayers 1;
buffered 1;
singleMessage 1;
Fields {
pdf 1;
pdfOpt 1;
vector 0;
scalar 0;
}
localCommunicationMode start;
layout fzyx;
}
Run
{
warmupIterations 2;
iterations 50;
databaseFile FieldCommunication.sqlite;
}
\ No newline at end of file
import waLBerla
import subprocess
import re
from collections import defaultdict
base = (32, 16, 2, 64)
BlOCK_SIZES_SQ = [(i, i, i) for i in base]
BLOCK_SIZES_RECT = [(i, i, i // 2) for i in base] + [(i, i // 2, i // 2) for i in base]
def supermuc_network_spread():
try:
node_list = subprocess.check_output("scontrol show hostname $SLURM_JOB_NODELIST", shell=True, encoding='utf8')
except subprocess.CalledProcessError:
return defaultdict(lambda: 0)
spread = defaultdict(set)
for s in node_list.split("\n"):
m = re.search("i(\d\d)r(\d\d)c(\d\d)s(\d\d)", s)
if m:
for name, idx in zip(['i', 'r', 'c', 's'], range(1, 5)):
spread[name].add(m.group(idx))
return {k: len(v) for k, v in spread.items()}
sng_network = supermuc_network_spread()
class AlreadySimulated:
def __init__(self, db_file, properties=('layout', 'ghostLayers', 'cartesianCommunicator', 'stencil',
'cellsPerBlock0', 'cellsPerBlock1', 'cellsPerBlock2',
'blocksPerProcess', 'localCommunicationMode', 'singleMessage',
'fieldsPdf', 'fieldsPdfOpt', 'fieldsVector', 'fieldsScalar',
'buffered')):
self.properties = properties
import sqlite3
conn = sqlite3.connect(db_file)
self.data = set()
try:
for row in conn.execute("SELECT {} FROM runs;".format(",".join(self.properties))):
self.data.add(row)
except sqlite3.OperationalError:
pass
waLBerla.log_info_on_root("Loaded {} scenarios".format(len(self.data)))
def in_db(self, args):
return args in self.data
@waLBerla.callback("config")
def config():
simulated_db = AlreadySimulated('FieldCommunication.sqlite')
skipped = 0
for layout in ('fzyx', 'zyxf'):
for ghost_layers in (1, 2):
for cartesian_comm in (False, True):
for stencil in ('D3Q19', 'D3Q27', 'D3Q7'):
for cells in BlOCK_SIZES_SQ + BLOCK_SIZES_RECT:
for blocksPerProcess in (1, 2, 4, 8, 16):
for local_comm in ('start', 'noOptimization', 'buffer'):
for single_message in (True, False):
for pdf, pdf_opt, vector, scalar in ([1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1],
[0, 0, 0, 3], [0, 0, 0, 19], [2, 0, 0, 0], [0, 2, 0, 0]):
for buffered in (0, 1):
if blocksPerProcess >= 8 and cells[0] >= 64 and cells[1] >= 64 and cells[2] >= 64:
continue
data = (layout, ghost_layers, int(cartesian_comm), stencil, *cells, blocksPerProcess, local_comm,
int(single_message), pdf, pdf_opt, vector, scalar, buffered)
if simulated_db.in_db(data):
skipped += 1
if skipped % 100 == 0:
waLBerla.log_info_on_root("Skipped {} scenarios".format(skipped))
continue
cfg = {
'Domain': {
'cellsPerBlock': cells,
'domainWeights': (1, 1, 1),
'blocksPerProcess': blocksPerProcess,
},
'Communication': {
'buffered': buffered,
'stencil': stencil,
'ghostLayers': ghost_layers,
'cartesianCommunicator': cartesian_comm,
'singleMessage': single_message,
'Fields': {
'pdf': pdf,
'pdfOpt': pdf_opt,
'vector': vector,
'scalar': scalar,
},
</