Commit bccf0a06 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

Prepare Emmy Benchmark

parent 2fb5a778
Pipeline #34219 failed with stages
in 2 minutes and 19 seconds
......@@ -2,7 +2,7 @@
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "Setups" )
foreach( config d3q19_pull d3q19_esotwist )
foreach( config d3q19_pull d3q19_esotwist d3q19_esotwist_True )
waLBerla_generate_target_from_python( NAME RefinementCodegen_${config}
FILE RefinementCodegen.py
CODEGEN_CFG ${config}
......
......@@ -58,7 +58,7 @@ using VelocityCallback = std::function< Vector3<real_t> (const Cell &, con
using stencil::Direction;
using blockforest::communication::NonUniformBufferedScheme;
using blockforest::communication::UniformBufferedScheme;
//////////////////////////////////////////////////////////////////
......@@ -579,8 +579,11 @@ void run(std::shared_ptr< Config > & config) {
const std::string vtkBaseFolder = params.getParameter< std::string >("vtkBaseFolder", "vtk_out");
const bool useUniformPackInfo = params.getParameter< bool >("useUniformPackInfo", false);
const uint_t runs = params.getParameter< uint_t >("runs", uint_c(500));
const uint_t stepsPerRun = params.getParameter< uint_t >("stepsPerRun", uint_c(500));
const uint_t warmup = params.getParameter< uint_t >("warmupSteps", uint_c(2));
const uint_t vtkWriteFrequency = params.getParameter< uint_t >("vtkWriteFrequency", uint_c(0));
const double remainingTimeLoggerFrequency =
......@@ -612,12 +615,19 @@ void run(std::shared_ptr< Config > & config) {
flowSetup->setupBoundaryFlagField(*sbfs, flagFieldID);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*sbfs, flagFieldID, fluidFlagUID, 2);
auto flagFieldWriter = field::createVTKOutput< FlagField_T, uint8_t >(flagFieldID, *sbfs, "boundaryFlagFieldOutput", 1, 0, false, vtkBaseFolder);
flagFieldWriter();
if(vtkWriteFrequency > 0){
auto flagFieldWriter = field::createVTKOutput< FlagField_T, uint8_t >(flagFieldID, *sbfs, "boundaryFlagFieldOutput", 1, 0, false, vtkBaseFolder);
flagFieldWriter();
}
// Maybe exit early
if(stepsPerRun == 0) {
WALBERLA_LOG_INFO_ON_ROOT("Domain setup complete and written to VTK. Terminating, since stepsPerRun == 0.");
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << setupBfs.getNumberOfBlocks());
WALBERLA_LOG_INFO_ON_ROOT("Level 0 Blocks: " << setupBfs.getNumberOfBlocks(0));
WALBERLA_LOG_INFO_ON_ROOT("Level 1 Blocks: " << setupBfs.getNumberOfBlocks(1));
WALBERLA_LOG_INFO_ON_ROOT("Level 2 Blocks: " << setupBfs.getNumberOfBlocks(2));
WALBERLA_LOG_INFO_ON_ROOT("Level 3 Blocks: " << setupBfs.getNumberOfBlocks(3));
return;
}
......@@ -645,20 +655,10 @@ void run(std::shared_ptr< Config > & config) {
}
};
// Setup Communication
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...");
auto comm = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(sbfs);
auto packInfo = lbm::setupNonuniformPdfCommunication< LatticeModel_T, lbm::GeneratedPackingKernels >(sbfs, pdfFieldID);
comm->addPackInfo(packInfo);
// Velocity Field Setup
WALBERLA_LOG_INFO_ON_ROOT("Setting up velocity field...");
setupUniformVelocityField(sbfs, pdfFieldID, densityFieldID, velocityFieldID, Vector3< real_t > (0.0));
// Recursive Time Step
lbm::GeneratedLatticeModel::Sweep sweep(pdfFieldID);
lbm::BasicRecursiveTimeStep< LatticeModel_T, lbm::GeneratedPackingKernels, LatticeModel_T::Sweep > timestep(
sbfs, pdfFieldID, sweep, boundaryFunctor, comm, packInfo);
Vector3< real_t > initVelocity = params.getParameter< Vector3< real_t > >("initVelocity", Vector3< real_t > (0.0));
setupUniformVelocityField(sbfs, pdfFieldID, densityFieldID, velocityFieldID, initVelocity);
// Timeloop
WALBERLA_LOG_INFO_ON_ROOT("Setting up Time Loop...");
......@@ -689,7 +689,34 @@ void run(std::shared_ptr< Config > & config) {
timeloop.addFuncBeforeTimeStep(flowFieldWriter);
}
timeloop.addFuncAfterTimeStep(timestep);
lbm::GeneratedLatticeModel::Sweep sweep(pdfFieldID);
if(useUniformPackInfo){
auto comm = std::make_shared< UniformBufferedScheme< CommunicationStencil_T > >(sbfs);
auto packInfo = std::make_shared< lbm::UniformGeneratedPdfPackInfo< LatticeModel_T, lbm::GeneratedPackingKernels > >(pdfFieldID);
comm->addPackInfo(packInfo);
std::function< void() > bh = [&](){
for(auto it = sbfs->begin(); it != sbfs->end(); ++it){
Block * b = dynamic_cast< Block * >(it.get());
boundaryFunctor(b);
}
};
timeloop.add() << Sweep(sweep) << AfterFunction(makeSharedFunctor(comm)) << AfterFunction(bh);
} else {
// Setup Communication
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...");
auto comm = std::make_shared< NonUniformBufferedScheme< CommunicationStencil_T > >(sbfs);
auto packInfo = lbm::setupNonuniformPdfCommunication< LatticeModel_T, lbm::GeneratedPackingKernels >(sbfs, pdfFieldID);
comm->addPackInfo(packInfo);
// Recursive Time Step
lbm::BasicRecursiveTimeStep< LatticeModel_T, lbm::GeneratedPackingKernels, LatticeModel_T::Sweep > timestep(
sbfs, pdfFieldID, sweep, boundaryFunctor, comm, packInfo);
timeloop.addFuncAfterTimeStep(timestep);
}
// Aaaand... Go!
......@@ -711,6 +738,12 @@ void run(std::shared_ptr< Config > & config) {
#define MLUPS (real_c(1e-6))
WALBERLA_LOG_INFO_ON_ROOT("Warming Up...")
for(uint_t t = 0; t < warmup; ++t){
timeloop.singleStep();
}
WALBERLA_LOG_INFO_ON_ROOT("Starting Benchmark...")
for(uint_t run = 0; run < runs; ++run){
timeloop.setCurrentTimeStepToZero();
......
......@@ -26,6 +26,11 @@ with CodeGeneration() as ctx:
q = len(stencil)
streaming_pattern = cfg_tokes[1]
if len(cfg_tokes) >= 3:
serial_packing = bool(cfg_tokes[2])
else:
serial_packing = False
# Parameters
omega_shear = sp.Symbol('omega_s')
magic_number = sp.Rational(3, 16)
......@@ -78,7 +83,7 @@ with CodeGeneration() as ctx:
generate_lattice_model(ctx, "GeneratedLatticeModel", collision_rule, ghost_layers=1,
streaming_pattern=streaming_pattern, refinement_scaling=scaling)
generate_packing_kernels(ctx, 'GeneratedPackingKernels', stencil, streaming_pattern, nonuniform=True)
generate_packing_kernels(ctx, 'GeneratedPackingKernels', stencil, streaming_pattern, nonuniform=True, serial=serial_packing)
generate_alternating_lbm_boundary(ctx, "GeneratedNoSlip", noslip, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern)
generate_alternating_lbm_boundary(ctx, "GeneratedUBB", ubb, lb_method, field_name=pdfs.name,
......
......@@ -12,16 +12,16 @@ def safe_int(some_float):
return int(some_float)
class FlowAroundCylinderScenario:
def __init__(self, domainSize, cylinderCenter, cylinderRadius, cylinderRadius_finest_LU,
def __init__(self, domainSize, cylinderCenter, cylinderRadius, cylinderRadius_coarsest_LU,
cellsPerBlock,
peakInflowVelocity_LU, reynoldsNumber, walls,
refinementProfile, refinementDepth,
refinementRadii=(0,0,0), refinementTailLengths=(0,0,0),
useUniformPackInfo=False,
runs=10, steps_per_run=1000, outputFrequency=0,
db_file='benchmark.sqlite'):
db_file='benchmark.sqlite', vtkOutputFolder='vtk_out'):
dx_finest = cylinderRadius / cylinderRadius_finest_LU
dx_coarsest = dx_finest * (2 ** refinementDepth)
dx_coarsest = cylinderRadius / cylinderRadius_coarsest_LU
totalCells_coarsest = tuple(safe_int(ds / dx_coarsest) for ds in domainSize)
rootBlocks = tuple(safe_int(c / cpb) for c, cpb in zip(totalCells_coarsest, cellsPerBlock))
......@@ -39,7 +39,7 @@ class FlowAroundCylinderScenario:
'periodic': (0, 0 if walls else 1, 1),
'vtkWriteFrequency': outputFrequency,
"vtkBaseFolder": "FlowAroundCylinderOutput",
"vtkBaseFolder": vtkOutputFolder,
'runs': runs,
'stepsPerRun': steps_per_run,
......@@ -58,7 +58,8 @@ class FlowAroundCylinderScenario:
'refinementProfile': refinementProfile,
'refinementDepth': refinementDepth,
'refinementRadii': refinementRadii,
'refinementTailLengths': refinementTailLengths
'refinementTailLengths': refinementTailLengths,
'useUniformPackInfo': useUniformPackInfo
}
@wlb.member_callback
......@@ -111,22 +112,73 @@ class FlowAroundCylinderScenario:
def run_flow_around_cylinder(db_file):
manager = wlb.ScenarioManager()
cells_per_block = 8
manager.add(FlowAroundCylinderScenario(
(16, 8, 2), (4, 4, 0), 1, 128, (64, 64, 64),
0.05, 100, True, 'ConcentricCylinderWake', 2,
refinementRadii=(2, 2, 2), refinementTailLengths=(8, 6, 6),
runs=1, steps_per_run=0, outputFrequency=100, db_file=db_file
domainSize=(18, 8, 1),
cylinderCenter=(4, 4, 0),
cylinderRadius=1,
cylinderRadius_coarsest_LU=16,
cellsPerBlock=(cells_per_block, cells_per_block, cells_per_block),
peakInflowVelocity_LU=0.05,
reynoldsNumber=100,
walls=False,
refinementProfile='refineEverywhere',
refinementDepth=0,
refinementRadii=(2, 1.5, 1.125), refinementTailLengths=(10, 8, 6),
useUniformPackInfo=True,
runs=1, steps_per_run=101, outputFrequency=100,
db_file=db_file
))
def run_flow_around_cylinder_emmy(db_file):
manager = wlb.ScenarioManager()
cylinderRadiusCoarseLU = 64
refinementSetups = [
{'refinementProfile': 'refineEverywhere',
'refinementDepth': 0,
'useUniformPackInfo': True},
{'refinementProfile': 'refineEverywhere',
'refinementDepth': 0,
'useUniformPackInfo': False},
{'refinementProfile': 'ConcentricCylinderWake',
'refinementDepth': 3,
'refinementRadii': (2, 1.5, 1.125),
'refinementTailLengths': (10, 8, 6)}
]
for cells_per_block in [64, 32, 16, 8]:
for i, setup in enumerate(refinementSetups):
output = f"CylinderOutput_{cells_per_block}_{i}"
manager.add(FlowAroundCylinderScenario(
domainSize=(18, 8, 1),
cylinderCenter=(4, 4, 0),
cylinderRadius=1,
cylinderRadius_coarsest_LU=cylinderRadiusCoarseLU,
cellsPerBlock=(cells_per_block, cells_per_block, cells_per_block),
peakInflowVelocity_LU=0.05,
reynoldsNumber=100,
walls=False,
runs=10, steps_per_run=10, outputFrequency=0,
db_file=db_file,
vtkOutputFolder=output,
**setup
))
if __name__ == '__main__':
# make runscript
from makeEmmyRunscript import get_script_d3q19_esotwist
from makeEmmyRunscript import get_script_d3q19_esotwist_True
scenario_script = "FlowAroundCylinderPerformance.py"
scripts_dir = 'cylinderrun'
os.makedirs(scripts_dir, exist_ok=True)
for num_nodes in [2, 4, 8, 16, 32, 64]:
for num_nodes in [64]:
filename = f'{scripts_dir}/flowAroundCylinderBM_{num_nodes}nodes.sh'
with open(filename, 'w') as shfile:
shfile.write(get_script_d3q19_esotwist(num_nodes, [1, 5, 10], scenario_script, walltime="02:00:00"))
shfile.write(get_script_d3q19_esotwist_True(num_nodes, [1, 5, 10], scenario_script, walltime="08:00:00"))
else:
run_flow_around_cylinder('FlowAroundCylinderScalingBenchmarkEmmy.sqlite')
run_flow_around_cylinder_emmy('FlowAroundCylinderScalingBenchmarkEmmy.sqlite')
......@@ -4,7 +4,7 @@ EMMY_PHYSICAL_CORES_PER_NODE = 20
SCRIPT_HEADER_TEMPLATE = """#!/bin/bash -l
#
#PBS -l nodes={num_nodes}:ppn=40,walltime={walltime}
#PBS -N waLBerlaRefinedLidDrivenCavityBenchmark
#PBS -N {name}
#PBS -M frederik.hennig@fau.de
#PBS -m abe
......@@ -27,13 +27,14 @@ def safe_int(some_float):
assert int(some_float) == some_float
return int(some_float)
def get_script_d3q19_esotwist(num_nodes, omp_threads, scenario_script, walltime="01:00:00"):
def get_script_d3q19_esotwist_True(num_nodes, omp_threads, scenario_script, name="walberlaRefinementBenchmark", walltime="01:00:00"):
script = SCRIPT_HEADER_TEMPLATE.format(
num_nodes=num_nodes,
walltime=walltime
walltime=walltime,
name=name
)
config = 'd3q19_esotwist'
config = 'd3q19_esotwist_True'
for t in omp_threads:
procs_per_node = safe_int(EMMY_PHYSICAL_CORES_PER_NODE / t)
......
......@@ -23,7 +23,7 @@ from lbmpy_walberla.alternating_sweeps import EvenIntegerCondition
def generate_packing_kernels(generation_context, class_name: str, stencil, streaming_pattern,
namespace='lbm', nonuniform=False, **create_kernel_params):
namespace='lbm', nonuniform=False, serial=False, **create_kernel_params):
if 'cpu_vectorize_info' in create_kernel_params:
vec_params = create_kernel_params['cpu_vectorize_info']
if 'instruction_set' in vec_params and vec_params['instruction_set'] is not None:
......@@ -33,6 +33,8 @@ def generate_packing_kernels(generation_context, class_name: str, stencil, strea
stencil = get_stencil(stencil)
create_kernel_params = default_create_kernel_parameters(generation_context, create_kernel_params)
if serial:
create_kernel_params['cpu_openmp'] = False
target = create_kernel_params.get('target', 'cpu')
if target == 'gpu':
raise NotImplementedError("Packing kernels for GPU are not yet implemented")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment