Commit 82bfb70f authored by Frederik Hennig's avatar Frederik Hennig
Browse files

Fixed benchmark setup

parent cbda51ec
Pipeline #34247 failed with stages
in 14 minutes and 17 seconds
......@@ -35,6 +35,8 @@
#include "vtk/all.h"
#include "pybind11/stl.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/PythonCallback.h"
#include "python_coupling/DictWrapper.h"
......@@ -720,21 +722,30 @@ void run(std::shared_ptr< Config > & config) {
// Aaaand... Go!
uint_t totalCellsPerBlock(1);
for(uint_t i = 0; i < 3; ++i){
totalCellsPerBlock *= sbfs->getNumberOfCellsPerBlock(i);
}
const uint_t numProcesses = uint_c( MPIManager::instance()->numProcesses() );
const uint_t ompNumThreads = uint_c(omp_get_max_threads());
const uint_t numNodes = params.getParameter< uint_t >("numNodes");
const uint_t numCoresPerNode = params.getParameter< uint_t >("physicalCoresPerNode");
const uint_t numProcessesPerNode = params.getParameter< uint_t >("processesPerNode");
const uint_t numProcesses = uint_c( MPIManager::instance()->numProcesses() );
const uint_t numThreadsPerProcess = uint_c(omp_get_max_threads());
const uint_t processLocalNumberOfBlocks = sbfs->getNumberOfBlocks();
const uint_t globalNumberOfBlocks = setupBfs.getNumberOfBlocks();
const real_t avgNumberOfBlocksPerProcess = real_c(globalNumberOfBlocks) / real_c(numProcesses);
const uint_t totalCellsPerBlock = cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2];
std::vector< uint_t > blocksPerLevel(setupBfs.getNumberOfLevels());
std::vector< uint_t > cellsPerLevel(setupBfs.getNumberOfLevels());
std::vector< uint_t > updatesPerRunAndLevel(setupBfs.getNumberOfLevels());
uint_t totalUpdatesPerRun = 0;
for(uint_t k = 0; k <= setupBfs.getDepth(); ++k){
blocksPerLevel[k] = setupBfs.getNumberOfBlocks(k);
cellsPerLevel[k] = blocksPerLevel[k] * totalCellsPerBlock;
updatesPerRunAndLevel[k] = cellsPerLevel[k] * stepsPerRun * (uint_t(1) << k);
totalUpdatesPerRun += updatesPerRunAndLevel[k];
}
const uint_t processLocalNumberOfCells = processLocalNumberOfBlocks * totalCellsPerBlock;
const uint_t globalNumberOfCells = globalNumberOfBlocks * totalCellsPerBlock;
const real_t avgNumberOfCellsPerProcess = avgNumberOfBlocksPerProcess * real_c(totalCellsPerBlock);
pybind11::list blocksPerLevelPy = pybind11::cast(blocksPerLevel);
pybind11::list cellsPerLevelPy = pybind11::cast(cellsPerLevel);
pybind11::list updatesPerRunAndLevelPy = pybind11::cast(updatesPerRunAndLevel);
#define MLUPS (real_c(1e-6))
......@@ -756,16 +767,14 @@ void run(std::shared_ptr< Config > & config) {
double localTime = simTimer.last();
double globalMaxTime = mpi::reduce(localTime, mpi::MAX);
double localMlups = ( real_c( processLocalNumberOfCells * stepsPerRun ) / localTime ) * MLUPS;
double localMlupsMin = mpi::reduce(localMlups, mpi::MIN);
double localMlupsMax = mpi::reduce(localMlups, mpi::MAX);
double localMlupsAvg = mpi::reduce(localMlups, mpi::SUM) / double( numProcesses );
WALBERLA_ROOT_SECTION()
{
// Total MLUPs
double globalMlups = ( real_c( globalNumberOfCells * stepsPerRun ) / globalMaxTime) * MLUPS;
double globalMlups = (double(totalUpdatesPerRun) / globalMaxTime) * MLUPS;
double mlupsPerProcess = globalMlups / double(numProcesses);
double mlupsPerThread = mlupsPerProcess / double(numThreadsPerProcess);
double mlupsPerCore = globalMlups / double(numNodes * numCoresPerNode);
double mlupsPerNode = globalMlups / double(numNodes);
// Callback
python_coupling::PythonCallback resultsCallback("results_callback");
......@@ -780,32 +789,30 @@ void run(std::shared_ptr< Config > & config) {
* - Total MLUPs ( by localTime of slowest process )
*/
resultsCallback.data().exposeValue("mpiNumProcesses", numProcesses);
resultsCallback.data().exposeValue("ompNumThreads", ompNumThreads);
resultsCallback.data().exposeValue("numNodes", numNodes);
resultsCallback.data().exposeValue("numCoresPerNode", numCoresPerNode);
resultsCallback.data().exposeValue("numProcessesPerNode", numProcessesPerNode);
resultsCallback.data().exposeValue("numProcesses", numProcesses);
resultsCallback.data().exposeValue("numThreadsPerProcess", numThreadsPerProcess);
resultsCallback.data().exposeValue("stencil", stencilName);
resultsCallback.data().exposeValue("streamingPattern", streamingPattern);
resultsCallback.data().exposeValue("collisionSetup", collisionSetup);
resultsCallback.data().exposeValue("globalNumberOfBlocks", globalNumberOfBlocks);
resultsCallback.data().exposeValue("avgNumberOfBlocksPerProcess", avgNumberOfBlocksPerProcess);
resultsCallback.data().exposeValue("globalNumberOfCells", globalNumberOfCells);
resultsCallback.data().exposeValue("avgNumberOfCellsPerProcess", avgNumberOfCellsPerProcess);
for(uint_t l = 0; l < setupBfs.getNumberOfLevels(); ++l){
uint_t numBlocks = setupBfs.getNumberOfBlocks(l);
std::string keyTotal("globalNumberOfLevel" + std::to_string(l) + "Blocks");
resultsCallback.data().exposeValue(&keyTotal[0], numBlocks);
real_t avgNumBlocks = real_c(numBlocks) / real_c(numProcesses);
std::string keyAvg("avgNumberOfLevel" + std::to_string(l) + "BlocksPerProcess");
resultsCallback.data().exposeValue(&keyAvg[0], avgNumBlocks);
}
resultsCallback.data().exposeValue("totalCellsPerBlock", totalCellsPerBlock);
resultsCallback.data().exposeValue("blocksPerLevel", blocksPerLevelPy);
resultsCallback.data().exposeValue("cellsPerLevel", cellsPerLevelPy);
resultsCallback.data().exposeValue("updatesPerRunAndLevel", updatesPerRunAndLevelPy);
resultsCallback.data().exposeValue("totalUpdatesPerRun", totalUpdatesPerRun);
resultsCallback.data().exposeValue("globalRuntime", globalMaxTime);
resultsCallback.data().exposeValue("globalMLUPs", globalMlups);
resultsCallback.data().exposeValue("localMLUPsMin", localMlupsMin);
resultsCallback.data().exposeValue("localMLUPsMax", localMlupsMax);
resultsCallback.data().exposeValue("localMLUPsAvg", localMlupsAvg);
resultsCallback.data().exposeValue("mlupsPerProcess", mlupsPerProcess);
resultsCallback.data().exposeValue("mlupsPerThread", mlupsPerThread);
resultsCallback.data().exposeValue("mlupsPerCore", mlupsPerCore);
resultsCallback.data().exposeValue("mlupsPerNode", mlupsPerNode);
resultsCallback();
}
......
......@@ -2,15 +2,18 @@ import waLBerla as wlb
import os
import sys
import sqlite3
import numpy as np
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
from lbmpy.relaxationrates import relaxation_rate_from_lattice_viscosity
def relaxation_rate_from_lattice_viscosity(nu):
return 2 / (6 * nu + 1)
def safe_int(some_float):
assert int(some_float) == some_float, f"{some_float} is not an integer. :("
return int(some_float)
class FlowAroundCylinderScenario:
def __init__(self, domainSize, cylinderCenter, cylinderRadius, cylinderRadius_coarsest_LU,
cellsPerBlock,
......@@ -33,6 +36,10 @@ class FlowAroundCylinderScenario:
self.db_file = db_file
self.db_table = db_table
self.num_nodes = int(os.environ['NUM_NODES'])
self.physical_cores_per_node = int(os.environ['PHYSICAL_CORES_PER_NODE'])
self.ppn = int(os.environ['PROCESSES_PER_NODE'])
self.params = {
'domainSize': domainSize,
'rootBlocks': rootBlocks,
......@@ -60,7 +67,11 @@ class FlowAroundCylinderScenario:
'refinementDepth': refinementDepth,
'refinementRadii': refinementRadii,
'refinementTailLengths': refinementTailLengths,
'useUniformPackInfo': useUniformPackInfo
'useUniformPackInfo': useUniformPackInfo,
'numNodes': self.num_nodes,
'physicalCoresPerNode': self.physical_cores_per_node,
'processesPerNode': self.ppn
}
@wlb.member_callback
......@@ -87,14 +98,8 @@ class FlowAroundCylinderScenario:
# Run Information
result['executable'] = sys.argv[0]
# MPI processes and OMP threads come by kwargs
# Environment Information
result['num_nodes'] = int(os.environ.get('NUM_NODES', -1))
result['virtual_cores_per_node'] = int(os.environ.get('VIRTUAL_CORES_PER_NODE', -1))
result['processes_per_node'] = int(os.environ.get('PROCESSES_PER_NODE', -1))
# Codegen Setup and Results
# Processes, Nodes, Threads, etc. come from C++
# Environment Setup, Codegen Setup and Results
result.update(kwargs)
wlb.log_info_on_root(f"Storing Results:\n{result}")
......@@ -110,26 +115,30 @@ class FlowAroundCylinderScenario:
except sqlite3.OperationalError as e:
wlb.log_warning("Sqlite DB writing failed: try {}/{} {}".format(num_try + 1, num_tries, str(e)))
def run_flow_around_cylinder(db_file):
def run_flow_around_cylinder_local():
manager = wlb.ScenarioManager()
os.environ['NUM_NODES'] = '1'
os.environ['PHYSICAL_CORES_PER_NODE'] = '6'
os.environ['PROCESSES_PER_NODE'] = str(wlb.mpi.numProcesses())
cells_per_block = 8
manager.add(FlowAroundCylinderScenario(
domainSize=(18, 8, 1),
cylinderCenter=(4, 4, 0),
cylinderRadius=1,
cylinderRadius_coarsest_LU=16,
cylinderRadius_coarsest_LU=8,
cellsPerBlock=(cells_per_block, cells_per_block, cells_per_block),
peakInflowVelocity_LU=0.05,
reynoldsNumber=100,
walls=False,
refinementProfile='refineEverywhere',
refinementDepth=0,
refinementDepth=1,
refinementRadii=(2, 1.5, 1.125), refinementTailLengths=(10, 8, 6),
useUniformPackInfo=True,
runs=1, steps_per_run=101, outputFrequency=100,
db_file=db_file
runs=1, steps_per_run=101, outputFrequency=20,
db_file='localFlowAroundCylinderBenchmark.sqlite'
))
......@@ -142,10 +151,10 @@ def run_flow_around_cylinder_emmy():
refinementSetups = [
{'refinementProfile': 'refineEverywhere',
'refinementDepth': 0,
'refinementDepth': 2,
'useUniformPackInfo': True},
{'refinementProfile': 'refineEverywhere',
'refinementDepth': 0,
'refinementDepth': 2,
'useUniformPackInfo': False},
{'refinementProfile': 'ConcentricCylinderWake',
'refinementDepth': 3,
......@@ -166,7 +175,7 @@ def run_flow_around_cylinder_emmy():
peakInflowVelocity_LU=0.05,
reynoldsNumber=100,
walls=False,
runs=10, steps_per_run=10, outputFrequency=0,
runs=10, steps_per_run=5, outputFrequency=0,
db_file=f"CylinderBenchmark.sqlite",
db_table=f"setup{i}",
vtkOutputFolder=output,
......@@ -183,6 +192,7 @@ if __name__ == '__main__':
for num_nodes in [64]:
filename = f'{scripts_dir}/flowAroundCylinderBM_{num_nodes}nodes.sh'
with open(filename, 'w') as shfile:
shfile.write(get_script_d3q19_esotwist_True(num_nodes, [1, 5, 10], scenario_script, walltime="03:00:00"))
shfile.write(get_script_d3q19_esotwist_True(num_nodes, [1, 5, 10], scenario_script, walltime="09:00:00"))
else:
#run_flow_around_cylinder_local()
run_flow_around_cylinder_emmy()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment