Commit c8ce4417 authored by Markus Holzer's avatar Markus Holzer
Browse files

Cleaned Turbulent Channel for benchmarking

parent 763ec5e1
Pipeline #39185 failed with stages
in 4 minutes and 29 seconds
...@@ -17,6 +17,10 @@ add_subdirectory( ProbeVsExtraMessage ) ...@@ -17,6 +17,10 @@ add_subdirectory( ProbeVsExtraMessage )
add_subdirectory( SchaeferTurek ) add_subdirectory( SchaeferTurek )
add_subdirectory( UniformGrid ) add_subdirectory( UniformGrid )
if ( WALBERLA_BUILD_WITH_CODEGEN )
add_subdirectory( TurbulentChannel )
endif()
if ( WALBERLA_BUILD_WITH_PYTHON ) if ( WALBERLA_BUILD_WITH_PYTHON )
add_subdirectory( FieldCommunication ) add_subdirectory( FieldCommunication )
......
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_generate_target_from_python(NAME TurbulentChannelGenerated
FILE TurbulentChannel.py
OUT_FILES TurbulentChannel_LbKernel.cpp TurbulentChannel_LbKernel.h
TurbulentChannel_MacroSetter.cpp TurbulentChannel_MacroSetter.h
TurbulentChannel_MacroGetter.cpp TurbulentChannel_MacroGetter.h
TurbulentChannel_NoSlip.cpp TurbulentChannel_NoSlip.h
TurbulentChannel_Outflow.cpp TurbulentChannel_Outflow.h
TurbulentChannel_UBB.cpp TurbulentChannel_UBB.h
TurbulentChannel_PackInfoEven.cpp TurbulentChannel_PackInfoEven.h
TurbulentChannel_PackInfoOdd.cpp TurbulentChannel_PackInfoOdd.h
TurbulentChannel_InfoHeader.h)
waLBerla_add_executable(NAME TurbulentChannel
FILES TurbulentChannel.cpp
DEPENDS blockforest core field lbm geometry timeloop TurbulentChannelGenerated)
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file UniformGridCPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/Environment.h"
#include "core/OpenMP.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include "domain_decomposition/SharedSweep.h"
#include "field/AddToStorage.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
#include "lbm/communication/CombinedInPlaceCpuPackInfo.h"
#include "timeloop/all.h"
#include <iomanip>
#include "TurbulentChannel_InfoHeader.h"
using namespace walberla;
using PackInfoEven_T = lbm::TurbulentChannel_PackInfoEven;
using PackInfoOdd_T = lbm::TurbulentChannel_PackInfoOdd;
using LbSweep = lbm::TurbulentChannel_LbKernel;
using FlagField_T = FlagField< uint8_t >;
auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const storage) {
return new PdfField_T(storage->getNumberOfXCells(*block), storage->getNumberOfYCells(*block),
storage->getNumberOfZCells(*block), uint_t(1), field::fzyx,
make_shared< field::AllocateAligned< real_t, 64 > >());
};
int main(int argc, char** argv)
{
walberla::Environment walberlaEnv(argc, argv);
auto config = walberlaEnv.config();
auto blocks = blockforest::createUniformBlockGridFromConfig(walberlaEnv.config());
WALBERLA_LOG_INFO_ON_ROOT("Blocks in x direction: " << blocks->getXSize())
WALBERLA_LOG_INFO_ON_ROOT("Blocks in y direction: " << blocks->getYSize())
WALBERLA_LOG_INFO_ON_ROOT("Blocks in z direction: " << blocks->getZSize())
WALBERLA_LOG_INFO_ON_ROOT("Cells in x direction per block: " << blocks->getNumberOfXCellsPerBlock())
WALBERLA_LOG_INFO_ON_ROOT("Cells in y direction per block: " << blocks->getNumberOfYCellsPerBlock())
WALBERLA_LOG_INFO_ON_ROOT("Cells in z direction per block: " << blocks->getNumberOfZCellsPerBlock())
Vector3< uint_t > cellsPerBlock(blocks->getNumberOfXCellsPerBlock(), blocks->getNumberOfYCellsPerBlock(), blocks->getNumberOfZCellsPerBlock());
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const real_t omega = parameters.getParameter< real_t >("omega");
const real_t force = parameters.getParameter< real_t >("force");
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps");
// Creating fields
BlockDataID pdfFieldId = blocks->addStructuredBlockData< PdfField_T >(pdfFieldAdder, "pdfs");
BlockDataID velFieldId = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx);
BlockDataID densityFieldId = field::addToStorage< ScalarField_T >(blocks, "density", real_t(1.0), field::fzyx);
pystencils::TurbulentChannel_MacroSetter setterSweep(densityFieldId, pdfFieldId, velFieldId, force);
pystencils::TurbulentChannel_MacroGetter getterSweep(densityFieldId, pdfFieldId, velFieldId, force);
// Set up initial PDF values
for (auto& block : *blocks)
setterSweep(&block);
Vector3< int > innerOuterSplit =
parameters.getParameter< Vector3< int > >("innerOuterSplit", Vector3< int >(1, 1, 1));
for (uint_t i = 0; i < 3; ++i)
{
if (int_c(cellsPerBlock[i]) <= innerOuterSplit[i] * 2)
{
WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock")
}
}
Cell innerOuterSplitCell(innerOuterSplit[0], innerOuterSplit[1], innerOuterSplit[2]);
LbSweep lbSweep(pdfFieldId, force, omega, innerOuterSplitCell);
// Boundaries
const FlagUID fluidFlagUID("Fluid");
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field");
auto boundariesConfig = config->getBlock("Boundaries");
bool boundaries = false;
if (boundariesConfig)
{
WALBERLA_LOG_INFO_ON_ROOT("Setting boundary conditions")
boundaries = true;
geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID);
}
lbm::TurbulentChannel_NoSlip noSlip(blocks, pdfFieldId);
noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID);
lbm::TurbulentChannel_UBB ubb(blocks, pdfFieldId);
ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("UBB"), fluidFlagUID);
lbm::TurbulentChannel_Outflow outflow(blocks, pdfFieldId);
outflow.fillFromFlagField< FlagField_T >(blocks, flagFieldID, FlagUID("Outflow"), fluidFlagUID);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// COMMUNICATION SCHEME ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Initial setup is the post-collision state of an even time step
auto tracker = make_shared< lbm::TimestepTracker >(0);
auto packInfo = make_shared< lbm::CombinedInPlaceCpuPackInfo< PackInfoEven_T, PackInfoOdd_T > >(tracker, pdfFieldId);
blockforest::communication::UniformBufferedScheme< Stencil_T > communication(blocks);
communication.addPackInfo(packInfo);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME STEP DEFINITIONS ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto boundarySweep = [&](IBlock* block, uint8_t t) {
noSlip.run(block, t);
ubb.run(block, t);
outflow.run(block, t);
};
auto boundaryInner = [&](IBlock* block, uint8_t t) {
noSlip.inner(block, t);
ubb.inner(block, t);
outflow.inner(block, t);
};
auto boundaryOuter = [&](IBlock* block, uint8_t t) {
noSlip.outer(block, t);
ubb.outer(block, t);
outflow.outer(block, t);
};
auto simpleOverlapTimeStep = [&]() {
// Communicate post-collision values of previous timestep...
communication.startCommunication();
for (auto& block : *blocks)
{
if (boundaries) boundaryInner(&block, tracker->getCounter());
lbSweep.inner(&block, tracker->getCounterPlusOne());
}
communication.wait();
for (auto& block : *blocks)
{
if (boundaries) boundaryOuter(&block, tracker->getCounter());
lbSweep.outer(&block, tracker->getCounterPlusOne());
}
tracker->advance();
};
auto normalTimeStep = [&]() {
communication.communicate();
for (auto& block : *blocks)
{
if (boundaries) boundarySweep(&block, tracker->getCounter());
lbSweep(&block, tracker->getCounterPlusOne());
}
tracker->advance();
};
// With two-fields patterns, ghost layer cells act as constant stream-in boundaries;
// with in-place patterns, ghost layer cells act as wet-node no-slip boundaries.
auto kernelOnlyFunc = [&]() {
tracker->advance();
for (auto& block : *blocks)
lbSweep(&block, tracker->getCounter());
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME LOOP SETUP ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "noOverlap");
std::function< void() > timeStep;
if (timeStepStrategy == "noOverlap")
{
WALBERLA_LOG_INFO_ON_ROOT("Normal timestep without overlapping communication")
timeStep = normalTimeStep;
}
else if (timeStepStrategy == "Timestep with overlapping communication")
{
timeStep = simpleOverlapTimeStep;
}
else if (timeStepStrategy == "kernelOnly")
{
WALBERLA_LOG_INFO_ON_ROOT(
"Running only compute kernel without boundary - this makes only sense for benchmarking!")
// Run initial communication once to provide any missing stream-in populations
communication.communicate();
timeStep = kernelOnlyFunc;
}
else
{
WALBERLA_ABORT_NO_DEBUG_INFO("Invalid value for 'timeStepStrategy': " << timeStepStrategy << ". Allowed values are 'noOverlap', "
"'simpleOverlap', 'kernelOnly'")
}
timeLoop.add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
if (vtkWriteFrequency > 0)
{
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >(velFieldId, "vel");
vtkOutput->addCellDataWriter(velWriter);
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
{
getterSweep(&block);
}
});
timeLoop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BENCHMARK ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int warmupSteps = parameters.getParameter< int >("warmupSteps", 2);
int outerIterations = parameters.getParameter< int >("outerIterations", 1);
for (int i = 0; i < warmupSteps; ++i)
timeLoop.singleStep();
real_t remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", -1.0); // in seconds
if (remainingTimeLoggerFrequency > 0)
{
auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps() * uint_c(outerIterations),
remainingTimeLoggerFrequency);
timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
}
for (int outerIteration = 0; outerIteration < outerIterations; ++outerIteration)
{
timeLoop.setCurrentTimeStepToZero();
WcTimer simTimer;
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
simTimer.start();
timeLoop.run();
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
real_t time = simTimer.last();
WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process " << mlupsPerProcess)
WALBERLA_LOG_RESULT_ON_ROOT("Time per time step " << time / real_c(timesteps))
}
return EXIT_SUCCESS;
}
from dataclasses import replace
import sympy as sp
import pystencils as ps
from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from lbmpy.creationfunctions import LBMConfig, LBMOptimisation, LBStencil, create_lb_collision_rule
from lbmpy.enums import Method, Stencil
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep,\
generate_mpidtype_info_from_kernel, generate_info_header
from lbmpy_walberla.additional_data_handler import OutflowAdditionalDataHandler
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_lbm_boundary, generate_lb_pack_info
omega = sp.symbols('omega')
omega_free = sp.Symbol('omega_free')
info_header = """
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
with CodeGeneration() as ctx:
stencil = LBStencil(Stencil.D3Q19)
streaming_pattern = "pull"
openmp = True if ctx.openmp else False
field_type = "float64" if ctx.double_accuracy else "float32"
if ctx.optimize_for_localhost:
cpu_vec = {"nontemporal": True, "assume_aligned": True}
else:
cpu_vec = None
q = stencil.Q
dim = stencil.D
assert dim == 3, "This app supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({q}), pdfs_tmp({q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
lbm_config = LBMConfig(stencil=stencil, method=Method.SRT, compressible=True, relaxation_rate=omega,
force=(sp.Symbol("fx"), 0, 0),
field_name=pdfs.name, streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx')
if not is_inplace(streaming_pattern):
lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)
field_swaps = [(pdfs, pdfs_tmp)]
else:
field_swaps = []
# LB Sweep
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
lb_method = collision_rule.method
generate_alternating_lbm_sweep(ctx, 'TurbulentChannel_LbKernel', collision_rule, lbm_config=lbm_config,
lbm_optimisation=lbm_opt, target=ps.Target.CPU,
inner_outer_split=True, field_swaps=field_swaps,
cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
# getter & setter
setter_assignments = macroscopic_values_setter(lb_method,
density=density_field.center, velocity=velocity_field.center_vector,
pdfs=pdfs,
streaming_pattern=streaming_pattern,
previous_timestep=Timestep.EVEN)
getter_assignments = macroscopic_values_getter(lb_method,
density=density_field, velocity=velocity_field,
pdfs=pdfs,
streaming_pattern=streaming_pattern,
previous_timestep=Timestep.EVEN)
generate_sweep(ctx, 'TurbulentChannel_MacroSetter', setter_assignments, target=ps.Target.CPU, cpu_openmp=openmp)
generate_sweep(ctx, 'TurbulentChannel_MacroGetter', getter_assignments, target=ps.Target.CPU, cpu_openmp=openmp)
# Boundaries
noslip = NoSlip()
ubb = UBB((0.05, 0, 0), data_type=field_type)
outflow = ExtrapolationOutflow(stencil[4], lb_method, streaming_pattern=streaming_pattern)
outflow_data_handler = OutflowAdditionalDataHandler(stencil, outflow, target=ps.Target.CPU)
generate_alternating_lbm_boundary(ctx, 'TurbulentChannel_NoSlip', noslip, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target=ps.Target.CPU, cpu_openmp=openmp)
generate_alternating_lbm_boundary(ctx, 'TurbulentChannel_UBB', ubb, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target=ps.Target.CPU, cpu_openmp=openmp)
generate_alternating_lbm_boundary(ctx, 'TurbulentChannel_Outflow', outflow, lb_method,
target=ps.Target.CPU, streaming_pattern=streaming_pattern,
additional_data_handler=outflow_data_handler,
layout='fzyx')
# communication
generate_lb_pack_info(ctx, 'TurbulentChannel_PackInfo', stencil, pdfs,
streaming_pattern=streaming_pattern, target=ps.Target.CPU,
always_generate_separate_classes=True)
infoHeaderParams = {
'stencil': stencil.name,
'streaming_pattern': streaming_pattern,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
stencil_typedefs = {'Stencil_T': stencil,
'CommunicationStencil_T': stencil}
field_typedefs = {'PdfField_T': pdfs,
'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'TurbulentChannel_InfoHeader',
stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))
Parameters
{
omega 1.8;
force 0.0;
timesteps 101;
innerOuterSplit <1, 1, 1>;
timeStepStrategy noOverlap;
remainingTimeLoggerFrequency 60;
vtkWriteFrequency 0;
}
DomainSetup
{
Cells < 32, 32, 32 >;
periodic < 1, 0, 1 >;
}
Boundaries
{
Border { direction N; walldistance -1; flag NoSlip; }
Border { direction S; walldistance -1; flag NoSlip; }
}
Parameters
{
omega 1.8;
force 0.0;
timesteps 101;
innerOuterSplit <1, 1, 1>;
timeStepStrategy noOverlap;
remainingTimeLoggerFrequency 60;
vtkWriteFrequency 0;
}
DomainSetup
{
cellsPerBlock < 32, 32, 32 >;
blocks < 1, 1, 1 >;
periodic < 1, 0, 1 >;
}
Boundaries
{
Border { direction N; walldistance -1; flag NoSlip; }
Border { direction S; walldistance -1; flag NoSlip; }
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment