Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 2574 additions and 692 deletions
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file NonUniformGridGPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/timing/TimingPool.h"
#include "field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/DeviceSelectMPI.h"
#include "gpu/ErrorChecking.h"
#include "gpu/FieldCopy.h"
#include "gpu/HostFieldAllocator.h"
#include "gpu/ParallelStreams.h"
#include "gpu/communication/NonUniformGPUScheme.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/DictWrapper.h"
#include "python_coupling/PythonCallback.h"
#include <cmath>
#include "GridGeneration.h"
#include "LdcSetup.h"
#include "NonUniformGridGPUInfoHeader.h"
#include "lbm_generated/evaluation/PerformanceEvaluation.h"
#include "lbm_generated/field/AddToStorage.h"
#include "lbm_generated/field/PdfField.h"
#include "lbm_generated/gpu/AddToStorage.h"
#include "lbm_generated/gpu/BasicRecursiveTimeStepGPU.h"
#include "lbm_generated/gpu/GPUPdfField.h"
#include "lbm_generated/gpu/NonuniformGeneratedGPUPdfPackInfo.h"
using namespace walberla;
using StorageSpecification_T = lbm::NonUniformGridGPUStorageSpecification;
using Stencil_T = StorageSpecification_T::Stencil;
using CommunicationStencil_T = StorageSpecification_T::CommunicationStencil;
using PdfField_T = lbm_generated::PdfField< StorageSpecification_T >;
using GPUPdfField_T = lbm_generated::GPUPdfField< StorageSpecification_T >;
using FlagField_T = FlagField< uint8_t >;
using BoundaryCollection_T = lbm::NonUniformGridGPUBoundaryCollection< FlagField_T >;
using SweepCollection_T = lbm::NonUniformGridGPUSweepCollection;
using gpu::communication::NonUniformGPUScheme;
int main(int argc, char** argv)
{
const mpi::Environment env(argc, argv);
mpi::MPIManager::instance()->useWorldComm();
gpu::selectDeviceBasedOnMpiRank();
const std::string input_filename(argv[1]);
const bool inputIsPython = string_ends_with(input_filename, ".py");
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
{
WALBERLA_MPI_WORLD_BARRIER()
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// SETUP AND CONFIGURATION ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto config = *cfg;
logging::configureLogging(config);
auto domainSetup = config->getOneBlock("DomainSetup");
auto blockForestSetup = config->getOneBlock("SetupBlockForest");
const bool writeSetupForestAndReturn = blockForestSetup.getParameter< bool >("writeSetupForestAndReturn", true);
Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock");
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.95));
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
const bool gpuEnabledMPI = parameters.getParameter< bool >("gpuEnabledMPI", false);
shared_ptr< BlockForest > bfs;
createBlockForest(bfs, domainSetup, blockForestSetup);
if (writeSetupForestAndReturn && mpi::MPIManager::instance()->numProcesses() == 1)
{
WALBERLA_LOG_INFO_ON_ROOT("BlockForest has been created and writen to file. Returning program")
return EXIT_SUCCESS;
}
auto blocks =
std::make_shared< StructuredBlockForest >(bfs, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blocks->createCellBoundingBoxes();
WALBERLA_LOG_INFO_ON_ROOT("Blocks created: " << blocks->getNumberOfBlocks() << " on " << blocks->getNumberOfLevels() << " refinement levels")
for (uint_t level = 0; level < blocks->getNumberOfLevels(); level++)
{
WALBERLA_LOG_INFO_ON_ROOT("Level " << level << " Blocks: " << blocks->getNumberOfBlocks(level))
}
WALBERLA_LOG_INFO_ON_ROOT("Start field allocation")
// Creating fields
const StorageSpecification_T StorageSpec = StorageSpecification_T();
auto allocator = make_shared< gpu::HostFieldAllocator< real_t > >();
const BlockDataID pdfFieldCpuID =
lbm_generated::addPdfFieldToStorage(blocks, "pdfs", StorageSpec, uint_c(2), field::fzyx, allocator);
const BlockDataID velFieldCpuID =
field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx, uint_c(2), allocator);
const BlockDataID densityFieldCpuID =
field::addToStorage< ScalarField_T >(blocks, "density", real_c(1.0), field::fzyx, uint_c(2), allocator);
const BlockDataID flagFieldID =
field::addFlagFieldToStorage< FlagField_T >(blocks, "Boundary Flag Field", uint_c(3));
const BlockDataID pdfFieldGpuID =
lbm_generated::addGPUPdfFieldToStorage< PdfField_T >(blocks, pdfFieldCpuID, StorageSpec, "pdfs on GPU", true);
const BlockDataID velFieldGpuID =
gpu::addGPUFieldToStorage< VelocityField_T >(blocks, velFieldCpuID, "velocity on GPU", true);
const BlockDataID densityFieldGpuID =
gpu::addGPUFieldToStorage< ScalarField_T >(blocks, densityFieldCpuID, "velocity on GPU", true);
WALBERLA_LOG_INFO_ON_ROOT("Finished field allocation")
const Cell innerOuterSplit =
Cell(parameters.getParameter< Vector3< cell_idx_t > >("innerOuterSplit", Vector3< cell_idx_t >(1, 1, 1)));
Vector3< int32_t > gpuBlockSize =
parameters.getParameter< Vector3< int32_t > >("gpuBlockSize", Vector3< int32_t >(256, 1, 1));
SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0],
gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
for (auto& iBlock : *blocks){
sweepCollection.initialise(&iBlock, cell_idx_c(1));
}
sweepCollection.initialiseBlockPointer();
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Initialisation done")
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// LB SWEEPS AND BOUNDARY HANDLING ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto ldc = std::make_shared< LDC >(blocks->getDepth());
const FlagUID fluidFlagUID("Fluid");
ldc->setupBoundaryFlagField(*blocks, flagFieldID);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, fluidFlagUID, 0);
BoundaryCollection_T boundaryCollection(blocks, flagFieldID, pdfFieldGpuID, fluidFlagUID);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// COMMUNICATION SCHEME ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Setting up communication...")
auto communication = std::make_shared< NonUniformGPUScheme< CommunicationStencil_T > >(blocks, gpuEnabledMPI);
auto packInfo = lbm_generated::setupNonuniformGPUPdfCommunication< GPUPdfField_T >(blocks, pdfFieldGpuID);
communication->addPackInfo(packInfo);
WALBERLA_MPI_BARRIER()
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME STEP DEFINITIONS ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int streamHighPriority = 0;
int streamLowPriority = 0;
WALBERLA_GPU_CHECK(gpuDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority))
sweepCollection.setOuterPriority(streamHighPriority);
auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
lbm_generated::BasicRecursiveTimeStepGPU< GPUPdfField_T, SweepCollection_T, BoundaryCollection_T >
LBMMeshRefinement(blocks, pdfFieldGpuID, sweepCollection, boundaryCollection, communication, packInfo);
SweepTimeloop timeLoop(blocks->getBlockStorage(), timesteps);
LBMMeshRefinement.addRefinementToTimeLoop(timeLoop, uint_c(0));
// VTK
const uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
const bool useVTKAMRWriter = parameters.getParameter< bool >("useVTKAMRWriter", false);
const bool oneFilePerProcess = parameters.getParameter< bool >("oneFilePerProcess", false);
auto finalDomain = blocks->getDomain();
if (vtkWriteFrequency > 0){
auto vtkOutput =
vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step",
false, true, true, false, 0, useVTKAMRWriter, oneFilePerProcess);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float32 > >(velFieldCpuID, "vel");
vtkOutput->addCellDataWriter(velWriter);
if (parameters.getParameter< bool >("writeOnlySlice", true)){
const AABB sliceXY(finalDomain.xMin(), finalDomain.yMin(), finalDomain.center()[2] - blocks->dz(blocks->getDepth()),
finalDomain.xMax(), finalDomain.yMax(), finalDomain.center()[2] + blocks->dz(blocks->getDepth()));
vtkOutput->addCellInclusionFilter(vtk::AABBCellFilter(sliceXY));
}
vtkOutput->addBeforeFunction([&]() {
for (auto& block : *blocks)
sweepCollection.calculateMacroscopicParameters(&block);
gpu::fieldCpy< VelocityField_T, gpu::GPUField< real_t > >(blocks, velFieldCpuID, velFieldGpuID);
});
timeLoop.addFuncAfterTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BENCHMARK ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", real_c(-1.0)); // in seconds
if (remainingTimeLoggerFrequency > 0){
auto logger = timing::RemainingTimeLogger(timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency);
timeLoop.addFuncAfterTimeStep(logger, "remaining time logger");
}
lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
fluidCells();
WALBERLA_LOG_INFO_ON_ROOT("Non uniform Grid benchmark with " << fluidCells.numberOfCells()
<< " fluid cells (in total on all levels)")
WcTimingPool timeloopTiming;
WcTimer simTimer;
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
WALBERLA_MPI_BARRIER()
simTimer.start();
timeLoop.run(timeloopTiming);
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_MPI_BARRIER()
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
double time = simTimer.max();
WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
performance.logResultOnRoot(timesteps, time);
const auto reducedTimeloopTiming = timeloopTiming.getReduced();
WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
WALBERLA_ROOT_SECTION()
{
if (inputIsPython)
{
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
pythonCallbackResults.data().exposeValue("numProcesses", lbm_generated::PerformanceEvaluation< FlagField_T >::processes());
pythonCallbackResults.data().exposeValue("numThreads", performance.threads());
pythonCallbackResults.data().exposeValue("numCores", performance.cores());
pythonCallbackResults.data().exposeValue("numberOfCells", performance.numberOfCells());
pythonCallbackResults.data().exposeValue("numberOfFluidCells", performance.numberOfFluidCells());
pythonCallbackResults.data().exposeValue("mlups", performance.mlups(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerCore", performance.mlupsPerCore(timesteps, time));
pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil", infoStencil);
pythonCallbackResults.data().exposeValue("streamingPattern", infoStreamingPattern);
pythonCallbackResults.data().exposeValue("collisionSetup", infoCollisionSetup);
pythonCallbackResults.data().exposeValue("cse_global", infoCseGlobal);
pythonCallbackResults.data().exposeValue("cse_pdfs", infoCsePdfs);
// Call Python function to report results
pythonCallbackResults();
}
}
}
}
return EXIT_SUCCESS;
}
\ No newline at end of file
import sympy as sp
import numpy as np
import pystencils as ps
from pystencils.typing import TypedSymbol
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_method, create_lb_collision_rule
from lbmpy import LBMConfig, LBMOptimisation, Stencil, Method, LBStencil, SubgridScaleModel
from pystencils_walberla import CodeGeneration, generate_info_header
from lbmpy_walberla import generate_lbm_package, lbm_boundary_generator
omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free")
compile_time_block_size = False
max_threads = 256
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32),
TypedSymbol("cudaBlockSize2", np.int32))
gpu_indexing_params = {'block_size': sweep_block_size}
info_header = """
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
streaming_pattern = 'esopull'
timesteps = get_timesteps(streaming_pattern)
stencil = LBStencil(Stencil.D3Q19)
method_enum = Method.CUMULANT
fourth_order_correction = 0.01 if method_enum == Method.CUMULANT and stencil.Q == 27 else False
collision_setup = "cumulant-K17" if fourth_order_correction else method_enum.name.lower()
assert stencil.D == 3, "This application supports only three-dimensional stencils"
pdfs, pdfs_tmp = ps.fields(f"pdfs({stencil.Q}), pdfs_tmp({stencil.Q}): {field_type}[3D]", layout='fzyx')
density_field, velocity_field = ps.fields(f"density, velocity(3) : {field_type}[3D]", layout='fzyx')
macroscopic_fields = {'density': density_field, 'velocity': velocity_field}
lbm_config = LBMConfig(stencil=stencil, method=method_enum, relaxation_rate=omega, compressible=True,
fourth_order_correction=fourth_order_correction,
streaming_pattern=streaming_pattern)
lbm_opt = LBMOptimisation(cse_global=False, field_layout='fzyx')
method = create_lb_method(lbm_config=lbm_config)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
no_slip = lbm_boundary_generator(class_name='NoSlip', flag_uid='NoSlip',
boundary_object=NoSlip())
ubb = lbm_boundary_generator(class_name='UBB', flag_uid='UBB',
boundary_object=UBB([0.05, 0, 0], data_type=field_type))
generate_lbm_package(ctx, name="NonUniformGridGPU",
collision_rule=collision_rule,
lbm_config=lbm_config, lbm_optimisation=lbm_opt,
nonuniform=True, boundaries=[no_slip, ubb],
macroscopic_fields=macroscopic_fields,
target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
max_threads=max_threads)
infoHeaderParams = {
'stencil': stencil.name.lower(),
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
field_typedefs = {'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
generate_info_header(ctx, 'NonUniformGridGPUInfoHeader',
field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sqlite3
import os
import sys
try:
import machinestate as ms
except ImportError:
ms = None
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
class Scenario:
def __init__(self,
domain_size=(64, 64, 64),
root_blocks=(2, 2, 2),
num_processes=1,
refinement_depth=0,
cells_per_block=(32, 32, 32),
timesteps=101,
gpu_enabled_mpi=False,
vtk_write_frequency=0,
logger_frequency=30,
blockforest_filestem="blockforest",
write_setup_vtk=True,
db_file_name=None):
self.domain_size = domain_size
self.root_blocks = root_blocks
self.cells_per_block = cells_per_block
self.periodic = (0, 0, 1)
self.refinement_depth = refinement_depth
self.num_processes = num_processes
self.bfs_filestem = blockforest_filestem
self.write_setup_vtk = write_setup_vtk
self.timesteps = timesteps
self.gpu_enabled_mpi = gpu_enabled_mpi
self.vtk_write_frequency = vtk_write_frequency
self.logger_frequency = logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'domainSize': self.domain_size,
'rootBlocks': self.root_blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'SetupBlockForest': {
'refinementDepth': self.refinement_depth,
'numProcesses': self.num_processes,
'blockForestFilestem': self.bfs_filestem,
'writeVtk': self.write_setup_vtk,
'outputStatistics': True,
'writeSetupForestAndReturn': True,
},
'Parameters': {
'omega': 1.95,
'timesteps': self.timesteps,
'remainingTimeLoggerFrequency': self.logger_frequency,
'vtkWriteFrequency': self.vtk_write_frequency,
'useVTKAMRWriter': True,
'oneFilePerProcess': False,
'writeOnlySlice': False,
'gpuEnabledMPI': self.gpu_enabled_mpi,
'gpuBlockSize': (128, 1, 1),
},
'Logging': {
'logLevel': "info",
}
}
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs"
table_name = table_name.replace("-", "_")
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
domain_size = (192, 192, 64)
cells_per_block = (64, 64, 64)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(domain_size=domain_size,
root_blocks=root_blocks,
cells_per_block=cells_per_block,
timesteps=0,
vtk_write_frequency=0,
refinement_depth=3,
gpu_enabled_mpi=False)
scenarios.add(scenario)
def weak_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running weak scaling benchmark...")
# This benchmark must run from 16 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if uniform:
factor = 3 * num_proc
name = "uniform"
else:
if num_proc % 16 != 0:
raise RuntimeError("Number of processes must be dividable by 16")
factor = int(num_proc // 16)
name = "nonuniform"
cells_per_block = (WeakX, WeakY, WeakZ)
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
db_file_name=f"weakScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
def strong_scaling_ldc(num_proc, gpu_enabled_mpi=False, uniform=True):
wlb.log_info_on_root("Running strong scaling benchmark...")
# This benchmark must run from 64 GPUs onwards
if wlb.mpi.numProcesses() > 1:
num_proc = wlb.mpi.numProcesses()
if num_proc % 64 != 0:
raise RuntimeError("Number of processes must be dividable by 64")
cells_per_block = (StrongX, StrongY, StrongZ)
if uniform:
domain_size = (cells_per_block[0] * 2, cells_per_block[1] * 2, cells_per_block[2] * 16)
name = "uniform"
else:
factor = int(num_proc / 64)
blocks64 = block_decomposition(factor)
cells_per_block = tuple([int(c / b) for c, b in zip(cells_per_block, reversed(blocks64))])
domain_size = (cells_per_block[0] * 3, cells_per_block[1] * 3, cells_per_block[2] * factor)
name = "nonuniform"
root_blocks = tuple([d // c for d, c in zip(domain_size, cells_per_block)])
scenarios = wlb.ScenarioManager()
scenario = Scenario(blockforest_filestem=f"blockforest_{name}_{num_proc}",
domain_size=domain_size,
root_blocks=root_blocks,
num_processes=num_proc,
cells_per_block=cells_per_block,
refinement_depth=0 if uniform else 3,
timesteps=10,
gpu_enabled_mpi=gpu_enabled_mpi,
db_file_name=f"strongScalingGPU{name}LDC.sqlite3")
scenarios.add(scenario)
if BENCHMARK == 0:
validation_run()
elif BENCHMARK == 1:
weak_scaling_ldc(1, True, False)
elif BENCHMARK == 2:
strong_scaling_ldc(1, True, False)
else:
print(f"Invalid benchmark case {BENCHMARK}")
waLBerla_link_files_to_builddir("*.prm")
if (WALBERLA_BUILD_WITH_CODEGEN)
if (NOT WALBERLA_BUILD_WITH_GPU_SUPPORT OR (WALBERLA_BUILD_WITH_GPU_SUPPORT AND (CMAKE_CUDA_ARCHITECTURES GREATER_EQUAL 60 OR WALBERLA_BUILD_WITH_HIP)))
waLBerla_add_executable(NAME Percolation FILES Percolation.cpp
DEPENDS walberla::blockforest walberla::core walberla::field walberla::geometry walberla::gpu walberla::lbm walberla::lbm_mesapd_coupling walberla::mesa_pd walberla::sqlite walberla::vtk PSMCodegenPython_trt-smagorinsky_sc1 )
target_compile_definitions(Percolation PRIVATE Weighting=2)
endif ()
endif ()
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file Percolation.cpp
//! \ingroup lbm_mesapd_coupling
//! \author Samuel Kemmler <samuel.kemmler@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/grid_generator/SCIterator.h"
#include "core/logging/all.h"
#include "core/timing/RemainingTimeLogger.h"
#include "field/AddToStorage.h"
#include "field/vtk/all.h"
#include "geometry/InitBoundaryHandling.h"
#include "gpu/AddGPUFieldToStorage.h"
#include "gpu/DeviceSelectMPI.h"
#include "gpu/communication/UniformGPUScheme.h"
#include "lbm/PerformanceLogger.h"
#include "lbm/vtk/all.h"
#include "lbm_mesapd_coupling/DataTypesCodegen.h"
#include "lbm_mesapd_coupling/partially_saturated_cells_method/codegen/PSMSweepCollection.h"
#include "lbm_mesapd_coupling/utility/ParticleSelector.h"
#include "mesa_pd/data/DataTypes.h"
#include "mesa_pd/data/ParticleAccessorWithShape.h"
#include "mesa_pd/data/ParticleStorage.h"
#include "mesa_pd/data/ShapeStorage.h"
#include "mesa_pd/data/shape/Sphere.h"
#include "mesa_pd/domain/BlockForestDomain.h"
#include "mesa_pd/mpi/SyncNextNeighbors.h"
#include "mesa_pd/vtk/ParticleVtkOutput.h"
#include "sqlite/SQLite.h"
#include "vtk/all.h"
#include "LBMSweep.h"
#include "PSMPackInfo.h"
#include "PSMSweep.h"
#include "PSM_Density.h"
#include "PSM_InfoHeader.h"
#include "PSM_MacroGetter.h"
namespace percolation
{
///////////
// USING //
///////////
using namespace walberla;
using namespace lbm_mesapd_coupling::psm::gpu;
typedef pystencils::PSMPackInfo PackInfo_T;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;
///////////
// FLAGS //
///////////
const FlagUID Fluid_Flag("Fluid");
const FlagUID Density_Flag("Density");
const FlagUID NoSlip_Flag("NoSlip");
const FlagUID Inflow_Flag("Inflow");
//////////
// MAIN //
//////////
//*******************************************************************************************************************
/*!\brief Benchmark of a percolation setup
*
* This code can be used as a percolation (useParticles=true) or as a channel flow (useParticles=false) benchmark.
* A constant inflow from west is applied and a pressure boundary condition is set at the east.
* For the percolation, mono-sized fixed spherical particles are generated on a structured grid with an offset for
* every second particle layer in flow direction to avoid channels in flow direction. The flow is described by Darcy's
* law. For the channel flow, the flow is described by the Hagen–Poiseuille equation.
*
* The domain is either periodic or bounded by (no slip) walls in the vertical directions (y and z).
*
* For the percolation, the PSM is used in combination with a two-way coupling, but no particle dynamics.
* For the channel flow, only the LBM is used.
*
* The parameters can be changed via the input file.
*
*/
//*******************************************************************************************************************
int main(int argc, char** argv)
{
Environment env(argc, argv);
auto cfgFile = env.config();
if (!cfgFile) { WALBERLA_ABORT("Usage: " << argv[0] << " path-to-configuration-file \n"); }
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::selectDeviceBasedOnMpiRank();
#endif
WALBERLA_LOG_INFO_ON_ROOT("waLBerla revision: " << std::string(WALBERLA_GIT_SHA1).substr(0, 8));
WALBERLA_LOG_INFO_ON_ROOT("compiler flags: " << std::string(WALBERLA_COMPILER_FLAGS));
WALBERLA_LOG_INFO_ON_ROOT("build machine: " << std::string(WALBERLA_BUILD_MACHINE));
WALBERLA_LOG_INFO_ON_ROOT(*cfgFile);
// Read config file
Config::BlockHandle numericalSetup = cfgFile->getBlock("NumericalSetup");
const uint_t numXBlocks = numericalSetup.getParameter< uint_t >("numXBlocks");
const uint_t numYBlocks = numericalSetup.getParameter< uint_t >("numYBlocks");
const uint_t numZBlocks = numericalSetup.getParameter< uint_t >("numZBlocks");
WALBERLA_CHECK_EQUAL(numXBlocks * numYBlocks * numZBlocks, uint_t(MPIManager::instance()->numProcesses()),
"When using GPUs, the number of blocks ("
<< numXBlocks * numYBlocks * numZBlocks << ") has to match the number of MPI processes ("
<< uint_t(MPIManager::instance()->numProcesses()) << ")");
const bool periodicInY = numericalSetup.getParameter< bool >("periodicInY");
const bool periodicInZ = numericalSetup.getParameter< bool >("periodicInZ");
const uint_t numXCellsPerBlock = numericalSetup.getParameter< uint_t >("numXCellsPerBlock");
const uint_t numYCellsPerBlock = numericalSetup.getParameter< uint_t >("numYCellsPerBlock");
const uint_t numZCellsPerBlock = numericalSetup.getParameter< uint_t >("numZCellsPerBlock");
const bool sendDirectlyFromGPU = numericalSetup.getParameter< bool >("sendDirectlyFromGPU");
const bool useCommunicationHiding = numericalSetup.getParameter< bool >("useCommunicationHiding");
const Vector3< uint_t > frameWidth = numericalSetup.getParameter< Vector3< uint_t > >("frameWidth");
const uint_t timeSteps = numericalSetup.getParameter< uint_t >("timeSteps");
const bool useParticles = numericalSetup.getParameter< bool >("useParticles");
const real_t particleDiameter = numericalSetup.getParameter< real_t >("particleDiameter");
const real_t particleGenerationSpacing = numericalSetup.getParameter< real_t >("particleGenerationSpacing");
const Vector3< real_t > generationDomainFraction =
numericalSetup.getParameter< Vector3< real_t > >("generationDomainFraction");
const Vector3< uint_t > generationPointOfReferenceOffset =
numericalSetup.getParameter< Vector3< uint_t > >("generationPointOfReferenceOffset");
const bool useParticleOffset = numericalSetup.getParameter< bool >("useParticleOffset");
const Vector3< uint_t > particleSubBlockSize =
numericalSetup.getParameter< Vector3< uint_t > >("particleSubBlockSize");
const real_t uInflow = numericalSetup.getParameter< real_t >("uInflow");
const real_t relaxationRate = numericalSetup.getParameter< real_t >("relaxationRate");
if ((periodicInY && numYBlocks == 1) || (periodicInZ && numZBlocks == 1))
{
WALBERLA_LOG_WARNING_ON_ROOT("Using only 1 block in periodic dimensions can lead to unexpected behavior.")
}
const real_t viscosity = lbm::collision_model::viscosityFromOmega(relaxationRate);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(viscosity)
Config::BlockHandle outputSetup = cfgFile->getBlock("Output");
const uint_t vtkSpacing = outputSetup.getParameter< uint_t >("vtkSpacing");
const std::string vtkFolder = outputSetup.getParameter< std::string >("vtkFolder");
const uint_t performanceLogFrequency = outputSetup.getParameter< uint_t >("performanceLogFrequency");
///////////////////////////
// BLOCK STRUCTURE SETUP //
///////////////////////////
const bool periodicInX = false;
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid(
numXBlocks, numYBlocks, numZBlocks, numXCellsPerBlock, numYCellsPerBlock, numZCellsPerBlock, real_t(1), uint_t(0),
false, false, periodicInX, periodicInY, periodicInZ, // periodicity
false);
auto simulationDomain = blocks->getDomain();
////////////
// MesaPD //
////////////
auto rpdDomain = std::make_shared< mesa_pd::domain::BlockForestDomain >(blocks->getBlockForestPointer());
// Init data structures
auto ps = walberla::make_shared< mesa_pd::data::ParticleStorage >(1);
auto ss = walberla::make_shared< mesa_pd::data::ShapeStorage >();
using ParticleAccessor_T = mesa_pd::data::ParticleAccessorWithShape;
auto accessor = walberla::make_shared< ParticleAccessor_T >(ps, ss);
auto sphereShape = ss->create< mesa_pd::data::Sphere >(particleDiameter * real_t(0.5));
// Create spheres
if (useParticles)
{
// Ensure that generation domain is computed correctly
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.xMin(), real_t(0));
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.yMin(), real_t(0));
WALBERLA_CHECK_FLOAT_EQUAL(simulationDomain.zMin(), real_t(0));
auto generationDomain = math::AABB::createFromMinMaxCorner(
math::Vector3< real_t >(simulationDomain.xMax() * (real_t(1) - generationDomainFraction[0]) / real_t(2),
simulationDomain.yMax() * (real_t(1) - generationDomainFraction[1]) / real_t(2),
simulationDomain.zMax() * (real_t(1) - generationDomainFraction[2]) / real_t(2)),
math::Vector3< real_t >(simulationDomain.xMax() * (real_t(1) + generationDomainFraction[0]) / real_t(2),
simulationDomain.yMax() * (real_t(1) + generationDomainFraction[1]) / real_t(2),
simulationDomain.zMax() * (real_t(1) + generationDomainFraction[2]) / real_t(2)));
real_t particleOffset = particleGenerationSpacing / real_t(2);
for (auto pt :
grid_generator::SCGrid(generationDomain, generationDomain.center() + generationPointOfReferenceOffset,
particleGenerationSpacing))
{
// Offset every second particle layer in flow direction to avoid channels in flow direction
if (useParticleOffset &&
uint_t(round(math::abs(generationDomain.center()[0] - pt[0]) / (particleGenerationSpacing))) % uint_t(2) !=
uint_t(0))
{
pt = pt + Vector3(real_t(0), particleOffset, particleOffset);
}
if (rpdDomain->isContainedInProcessSubdomain(uint_c(mpi::MPIManager::instance()->rank()), pt))
{
mesa_pd::data::Particle&& p = *ps->create();
p.setPosition(pt);
p.setInteractionRadius(particleDiameter * real_t(0.5));
p.setOwner(mpi::MPIManager::instance()->rank());
p.setShapeID(sphereShape);
p.setType(0);
}
}
}
///////////////////////
// ADD DATA TO BLOCKS //
////////////////////////
// Setting initial PDFs to nan helps to detect bugs in the initialization/BC handling
// Depending on WALBERLA_BUILD_WITH_GPU_SUPPORT, pdfFieldCPUGPUID is either a CPU or a CPU field
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
BlockDataID pdfFieldID =
field::addToStorage< PdfField_T >(blocks, "pdf field (fzyx)", real_c(std::nan("")), field::fzyx);
BlockDataID BFieldID = field::addToStorage< BField_T >(blocks, "B field", 0, field::fzyx, 1);
BlockDataID pdfFieldCPUGPUID = gpu::addGPUFieldToStorage< PdfField_T >(blocks, pdfFieldID, "pdf field GPU");
#else
BlockDataID pdfFieldCPUGPUID =
field::addToStorage< PdfField_T >(blocks, "pdf field CPU", real_c(std::nan("")), field::fzyx);
#endif
BlockDataID densityFieldID = field::addToStorage< DensityField_T >(blocks, "density field", real_t(0), field::fzyx);
BlockDataID velFieldID = field::addToStorage< VelocityField_T >(blocks, "velocity field", real_t(0), field::fzyx);
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
// Synchronize particles between the blocks for the correct mapping of ghost particles
mesa_pd::mpi::SyncNextNeighbors syncNextNeighborFunc;
syncNextNeighborFunc(*ps, *rpdDomain);
// Assemble boundary block string
std::string boundariesBlockString = " Boundaries"
"{"
"Border { direction W; walldistance -1; flag Inflow; }"
"Border { direction E; walldistance -1; flag Density; }";
if (!periodicInY)
{
boundariesBlockString += "Border { direction S; walldistance -1; flag NoSlip; }"
"Border { direction N; walldistance -1; flag NoSlip; }";
}
if (!periodicInZ)
{
boundariesBlockString += "Border { direction T; walldistance -1; flag NoSlip; }"
"Border { direction B; walldistance -1; flag NoSlip; }";
}
boundariesBlockString += "}";
WALBERLA_ROOT_SECTION()
{
std::ofstream boundariesFile("boundaries.prm");
boundariesFile << boundariesBlockString;
boundariesFile.close();
}
WALBERLA_MPI_BARRIER()
auto boundariesCfgFile = Config();
boundariesCfgFile.readParameterFile("boundaries.prm");
auto boundariesConfig = boundariesCfgFile.getBlock("Boundaries");
// map boundaries into the LBM simulation
geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldID, boundariesConfig);
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldID, Fluid_Flag);
lbm::PSM_Density density_bc(blocks, pdfFieldCPUGPUID, real_t(1.0));
density_bc.fillFromFlagField< FlagField_T >(blocks, flagFieldID, Density_Flag, Fluid_Flag);
lbm::PSM_NoSlip noSlip(blocks, pdfFieldCPUGPUID);
noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldID, NoSlip_Flag, Fluid_Flag);
lbm::PSM_UBB ubb(blocks, pdfFieldCPUGPUID, uInflow, real_t(0), real_t(0));
ubb.fillFromFlagField< FlagField_T >(blocks, flagFieldID, Inflow_Flag, Fluid_Flag);
///////////////
// TIME LOOP //
///////////////
// Map particles into the fluid domain
ParticleAndVolumeFractionSoA_T< Weighting > particleAndVolumeFractionSoA(blocks, relaxationRate);
PSMSweepCollection psmSweepCollection(blocks, accessor, lbm_mesapd_coupling::RegularParticlesSelector(),
particleAndVolumeFractionSoA, particleSubBlockSize);
if (useParticles)
{
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
psmSweepCollection.particleMappingSweep(&(*blockIt));
}
}
// Initialize PDFs
pystencils::InitializeDomainForPSM pdfSetter(
particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleVelocitiesFieldID, pdfFieldCPUGPUID, real_t(0), real_t(0), real_t(0),
real_t(1.0), real_t(0), real_t(0), real_t(0));
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
// pdfSetter requires particle velocities at cell centers
if (useParticles) { psmSweepCollection.setParticleVelocitiesSweep(&(*blockIt)); }
pdfSetter(&(*blockIt));
}
// Setup of the LBM communication for synchronizing the pdf field between neighboring blocks
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::communication::UniformGPUScheme< Stencil_T > com(blocks, sendDirectlyFromGPU, false);
#else
walberla::blockforest::communication::UniformBufferedScheme< Stencil_T > com(blocks);
#endif
com.addPackInfo(make_shared< PackInfo_T >(pdfFieldCPUGPUID));
auto communication = std::function< void() >([&]() { com.communicate(); });
SweepTimeloop commTimeloop(blocks->getBlockStorage(), timeSteps);
SweepTimeloop timeloop(blocks->getBlockStorage(), timeSteps);
timeloop.addFuncBeforeTimeStep(RemainingTimeLogger(timeloop.getNrOfTimeSteps()), "Remaining Time Logger");
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
pystencils::PSM_MacroGetter getterSweep(BFieldID, densityFieldID, pdfFieldID, velFieldID, real_t(0.0), real_t(0.0),
real_t(0.0));
#else
pystencils::PSM_MacroGetter getterSweep(particleAndVolumeFractionSoA.BFieldID, densityFieldID, pdfFieldCPUGPUID,
velFieldID, real_t(0.0), real_t(0.0), real_t(0.0));
#endif
// VTK output
if (vtkSpacing != uint_t(0))
{
// Spheres
auto particleVtkOutput = make_shared< mesa_pd::vtk::ParticleVtkOutput >(ps);
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleUid >("uid");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleLinearVelocity >("velocity");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleInteractionRadius >("radius");
// Limit output to process-local spheres
particleVtkOutput->setParticleSelector([sphereShape](const mesa_pd::data::ParticleStorage::iterator& pIt) {
return pIt->getShapeID() == sphereShape &&
!(mesa_pd::data::particle_flags::isSet(pIt->getFlags(), mesa_pd::data::particle_flags::GHOST));
});
auto particleVtkWriter = vtk::createVTKOutput_PointData(particleVtkOutput, "particles", vtkSpacing, vtkFolder);
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(particleVtkWriter), "VTK (sphere data)");
// Fields
auto pdfFieldVTK = vtk::createVTKOutput_BlockData(blocks, "fluid", vtkSpacing, 0, false, vtkFolder);
pdfFieldVTK->addBeforeFunction(communication);
pdfFieldVTK->addBeforeFunction([&]() {
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
gpu::fieldCpy< PdfField_T, gpu::GPUField< real_t > >(blocks, pdfFieldID, pdfFieldCPUGPUID);
gpu::fieldCpy< GhostLayerField< real_t, 1 >, BFieldGPU_T >(blocks, BFieldID,
particleAndVolumeFractionSoA.BFieldID);
#endif
for (auto& block : *blocks)
getterSweep(&block);
});
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< VelocityField_T > >(velFieldID, "Velocity"));
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< DensityField_T > >(densityFieldID, "Density"));
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< BField_T > >(BFieldID, "OverlapFraction"));
#else
pdfFieldVTK->addCellDataWriter(
make_shared< field::VTKWriter< BField_T > >(particleAndVolumeFractionSoA.BFieldID, "OverlapFraction"));
#endif
pdfFieldVTK->addCellDataWriter(make_shared< field::VTKWriter< FlagField_T > >(flagFieldID, "FlagField"));
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(pdfFieldVTK), "VTK (fluid field data)");
}
if (vtkSpacing != uint_t(0)) { vtk::writeDomainDecomposition(blocks, "domain_decomposition", vtkFolder); }
// Add performance logging
lbm::PerformanceLogger< FlagField_T > performanceLogger(blocks, flagFieldID, Fluid_Flag, performanceLogFrequency);
if (performanceLogFrequency > 0)
{
timeloop.addFuncAfterTimeStep(performanceLogger, "Evaluate performance logging");
}
// Add LBM communication function and boundary handling sweep
if (useCommunicationHiding)
{
timeloop.add() << Sweep(deviceSyncWrapper(density_bc.getSweep()), "Boundary Handling (Density)");
}
else
{
timeloop.add() << BeforeFunction(communication, "LBM Communication")
<< Sweep(deviceSyncWrapper(density_bc.getSweep()), "Boundary Handling (Density)");
}
timeloop.add() << Sweep(deviceSyncWrapper(ubb.getSweep()), "Boundary Handling (UBB)");
if (!periodicInY || !periodicInZ)
{
timeloop.add() << Sweep(deviceSyncWrapper(noSlip.getSweep()), "Boundary Handling (NoSlip)");
}
// PSM kernel
pystencils::PSMSweep PSMSweep(particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleForcesFieldID,
particleAndVolumeFractionSoA.particleVelocitiesFieldID, pdfFieldCPUGPUID, real_t(0.0),
real_t(0.0), real_t(0.0), relaxationRate);
pystencils::PSMSweepSplit PSMSplitSweep(
particleAndVolumeFractionSoA.BsFieldID, particleAndVolumeFractionSoA.BFieldID,
particleAndVolumeFractionSoA.particleForcesFieldID, particleAndVolumeFractionSoA.particleVelocitiesFieldID,
pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate, frameWidth);
pystencils::LBMSweep LBMSweep(pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate);
pystencils::LBMSplitSweep LBMSplitSweep(pdfFieldCPUGPUID, real_t(0.0), real_t(0.0), real_t(0.0), relaxationRate,
frameWidth);
if (useParticles)
{
if (useCommunicationHiding)
{
addPSMSweepsToTimeloops(commTimeloop, timeloop, com, psmSweepCollection, PSMSplitSweep);
}
else { addPSMSweepsToTimeloop(timeloop, psmSweepCollection, PSMSweep); }
}
else
{
if (useCommunicationHiding)
{
commTimeloop.add() << BeforeFunction([&]() { com.startCommunication(); }, "LBM Communication (start)")
<< Sweep(deviceSyncWrapper(LBMSplitSweep.getInnerSweep()), "LBM inner sweep")
<< AfterFunction([&]() { com.wait(); }, "LBM Communication (wait)");
timeloop.add() << Sweep(deviceSyncWrapper(LBMSplitSweep.getOuterSweep()), "LBM outer sweep");
}
else { timeloop.add() << Sweep(deviceSyncWrapper(LBMSweep), "LBM sweep"); }
}
WcTimingPool timeloopTiming;
// TODO: maybe add warmup phase
for (uint_t timeStep = 0; timeStep < timeSteps; ++timeStep)
{
if (useCommunicationHiding) { commTimeloop.singleStep(timeloopTiming); }
timeloop.singleStep(timeloopTiming);
}
timeloopTiming.logResultOnRoot();
auto timeloopTimingReduced = timeloopTiming.getReduced();
// Write parameters and performance results in sqlite database
WALBERLA_ROOT_SECTION()
{
// Use DB_FILE environment variable if set
std::string dbFile;
if (std::getenv("DB_FILE") != nullptr) { dbFile = std::getenv("DB_FILE"); }
else
{
if (useParticles) { dbFile = "percolation_benchmark.sqlite3"; }
else { dbFile = "channel_flow_benchmark.sqlite3"; }
}
std::map< std::string, int > integerProperties;
std::map< std::string, double > realProperties;
std::map< std::string, std::string > stringProperties;
integerProperties["numXBlocks"] = int(numXBlocks);
integerProperties["numYBlocks"] = int(numYBlocks);
integerProperties["numZBlocks"] = int(numZBlocks);
integerProperties["numXCellsPerBlock"] = int(numXCellsPerBlock);
integerProperties["numYCellsPerBlock"] = int(numYCellsPerBlock);
integerProperties["numZCellsPerBlock"] = int(numZCellsPerBlock);
integerProperties["timeSteps"] = int(timeSteps);
integerProperties["sendDirectlyFromGPU"] = int(sendDirectlyFromGPU);
integerProperties["useCommunicationHiding"] = int(useCommunicationHiding);
integerProperties["communicationHidingXWidth"] = int(frameWidth[0]);
integerProperties["communicationHidingYWidth"] = int(frameWidth[1]);
integerProperties["communicationHidingZWidth"] = int(frameWidth[2]);
integerProperties["useParticles"] = int(useParticles);
integerProperties["numParticles"] = int(ps->size());
integerProperties["particleSubBlockXSize"] = int(particleSubBlockSize[0]);
integerProperties["particleSubBlockYSize"] = int(particleSubBlockSize[1]);
integerProperties["particleSubBlockZSize"] = int(particleSubBlockSize[2]);
realProperties["particleDiameter"] = double(particleDiameter);
realProperties["particleGenerationSpacing"] = double(particleGenerationSpacing);
performanceLogger.getBestResultsForSQLOnRoot(integerProperties, realProperties, stringProperties);
auto runId = sqlite::storeRunInSqliteDB(dbFile, integerProperties, stringProperties, realProperties);
sqlite::storeTimingPoolInSqliteDB(dbFile, runId, *timeloopTimingReduced, "Timeloop");
}
return EXIT_SUCCESS;
}
} // namespace percolation
int main(int argc, char** argv) { percolation::main(argc, argv); }
NumericalSetup
{
// product of number of blocks should be equal to number of used processes
numXBlocks 1;
numYBlocks 1;
numZBlocks 1;
periodicInY false;
periodicInZ false;
numXCellsPerBlock 256;
numYCellsPerBlock 128;
numZCellsPerBlock 128;
timeSteps 100;
sendDirectlyFromGPU false; // use GPU-GPU communication
useCommunicationHiding false;
frameWidth <1, 1, 1>; // width of the outer region if splitting the LBM/PSM into inner and outer (only used if useCommunicationHiding is true)
// particle distribution in LBM units
useParticles true; // if true, PSM/particle mapping/velocity computation/hydrodynamic force reduction is used, else LBM is used
particleDiameter 20.0;
particleGenerationSpacing 21.0;
generationDomainFraction <0.8, 1.0, 1.0>; // fraction of the domain where particles are generated
generationPointOfReferenceOffset <0, 0, 0>; // offset of point of reference from domain center, see SCIterator.h
useParticleOffset true; // offset every second particle layer in flow direction
particleSubBlockSize <8, 8, 8>;
// fluid quantities in LBM units
uInflow 0.00008;
relaxationRate 0.9;
}
Output
{
vtkSpacing 0;
vtkFolder vtk_out;
performanceLogFrequency 100;
}
waLBerla_link_files_to_builddir(*.prm)
waLBerla_link_files_to_builddir(*.py)
if (WALBERLA_BUILD_WITH_CUDA)
waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGenGPU
FILE multiphase_codegen.py
OUT_FILES initialize_phase_field_distributions.cu initialize_phase_field_distributions.h
initialize_velocity_based_distributions.cu initialize_velocity_based_distributions.h
phase_field_LB_step.cu phase_field_LB_step.h
hydro_LB_step.cu hydro_LB_step.h
PackInfo_phase_field_distributions.cu PackInfo_phase_field_distributions.h
PackInfo_phase_field.cu PackInfo_phase_field.h
PackInfo_velocity_based_distributions.cu PackInfo_velocity_based_distributions.h
GenDefines.h)
waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGen
FILE multiphase_codegen.py
OUT_FILES initialize_phase_field_distributions.${CODEGEN_FILE_SUFFIX} initialize_phase_field_distributions.h
initialize_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} initialize_velocity_based_distributions.h
phase_field_LB_step.${CODEGEN_FILE_SUFFIX} phase_field_LB_step.h
hydro_LB_step.${CODEGEN_FILE_SUFFIX} hydro_LB_step.h
PackInfo_phase_field_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field_distributions.h
PackInfo_phase_field.${CODEGEN_FILE_SUFFIX} PackInfo_phase_field.h
PackInfo_velocity_based_distributions.${CODEGEN_FILE_SUFFIX} PackInfo_velocity_based_distributions.h
GenDefines.h)
if (WALBERLA_BUILD_WITH_GPU_SUPPORT )
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
DEPENDS blockforest core cuda field postprocessing lbm geometry timeloop gui BenchmarkPhaseFieldCodeGenGPU)
set_target_properties(benchmark_multiphase PROPERTIES CXX_VISIBILITY_PRESET hidden)
DEPENDS walberla::blockforest walberla::core walberla::gpu walberla::field walberla::postprocessing walberla::python_coupling walberla::lbm_generated walberla::geometry walberla::timeloop BenchmarkPhaseFieldCodeGen )
else ()
waLBerla_generate_target_from_python(NAME BenchmarkPhaseFieldCodeGenCPU
FILE multiphase_codegen.py
OUT_FILES initialize_phase_field_distributions.cpp initialize_phase_field_distributions.h
initialize_velocity_based_distributions.cpp initialize_velocity_based_distributions.h
phase_field_LB_step.cpp phase_field_LB_step.h
hydro_LB_step.cpp hydro_LB_step.h
PackInfo_phase_field_distributions.cpp PackInfo_phase_field_distributions.h
PackInfo_phase_field.cpp PackInfo_phase_field.h
PackInfo_velocity_based_distributions.cpp PackInfo_velocity_based_distributions.h
GenDefines.h)
waLBerla_add_executable(NAME benchmark_multiphase
FILES benchmark_multiphase.cpp InitializerFunctions.cpp multiphase_codegen.py
DEPENDS blockforest core field postprocessing lbm geometry timeloop gui BenchmarkPhaseFieldCodeGenCPU)
set_target_properties(benchmark_multiphase PROPERTIES CXX_VISIBILITY_PRESET hidden)
endif (WALBERLA_BUILD_WITH_CUDA)
DEPENDS walberla::blockforest walberla::core walberla::field walberla::postprocessing walberla::python_coupling walberla::lbm_generated walberla::geometry walberla::timeloop BenchmarkPhaseFieldCodeGen )
endif (WALBERLA_BUILD_WITH_GPU_SUPPORT )
......@@ -3,30 +3,50 @@ import waLBerla as wlb
import pandas as pd
from waLBerla.tools.sqlitedb import sequenceValuesToScalars
from waLBerla.tools.config import block_decomposition
import sys
from math import prod
try:
import machinestate as ms
except ImportError:
ms = None
def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
cells = prod(b + 2 * gls for b in block_size)
# 3 values for the velocity and two for the phase field and the temporary phase field
values_per_cell = 2 * q_phase + 2 * q_hydro + 3 + 2
needed_memory = values_per_cell * cells * size_per_value
return needed_memory < total_mem
class Scenario:
def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size):
def __init__(self, time_step_strategy,
cuda_block_size,
cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies
self.vtkWriteFrequency = 0
# simulation parameters
self.timesteps = 101
edge_size = 32
self.cells = (edge_size, edge_size, edge_size)
self.blocks = (1, 1, 1)
self.cells_per_block = cells_per_block
self.blocks = block_decomposition(wlb.mpi.numProcesses())
self.periodic = (1, 1, 1)
self.size = (self.cells[0] * self.blocks[0],
self.cells[1] * self.blocks[1],
self.cells[2] * self.blocks[2])
self.size = (self.cells_per_block[0] * self.blocks[0],
self.cells_per_block[1] * self.blocks[1],
self.cells_per_block[2] * self.blocks[2])
self.timeStepStrategy = timeStepStrategy
self.overlappingWidth = overlappingWidth
self.timeStepStrategy = time_step_strategy
self.cuda_block_size = cuda_block_size
self.warmupSteps = 10
self.cudaEnabledMpi = cuda_enabled_mpi
# bubble parameters
self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
......@@ -41,19 +61,18 @@ class Scenario:
return {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0,
'remainingTimeLoggerFrequency': -1,
'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size,
'warmupSteps': self.warmupSteps,
'scenario': self.scenario,
'cudaEnabledMpi': self.cudaEnabledMpi
},
'Boundaries_GPU': {
'Border': []
......@@ -77,6 +96,14 @@ class Scenario:
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
df = pd.DataFrame.from_records([data])
......@@ -86,43 +113,22 @@ class Scenario:
df.to_csv(self.csv_file, index=False, mode='a', header=False)
def overlap_benchmark():
scenarios = wlb.ScenarioManager()
overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1),
(32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
(32, 4, 1), (64, 4, 1), (128, 4, 1),
(32, 4, 2), (64, 4, 2), (128, 2, 2),
(32, 8, 1), (64, 8, 1), (64, 4, 2),
(32, 16, 1), (16, 16, 1), (16, 16, 2)]
# no overlap
scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1)))
# overlap
for overlap_strategy in ['overlap']:
for overlappingWidth in overlappingWidths:
for cuda_block in cuda_blocks:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=overlappingWidth,
cuda_block_size=cuda_block)
scenarios.add(scenario)
def kernel_benchmark():
def benchmark():
scenarios = wlb.ScenarioManager()
# overlap
# for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
for overlap_strategy in ['overlap']:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=(1, 1, 1),
cuda_block_size=(128, 1, 1))
scenarios.add(scenario)
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
block_size = (320, 320, 320)
cuda_enabled_mpi = True
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
else:
scenarios.add(Scenario(time_step_strategy='normal',
cuda_block_size=(128, 1, 1),
cells_per_block=block_size,
cuda_enabled_mpi=cuda_enabled_mpi))
# overlap_benchmark()
kernel_benchmark()
benchmark()
......@@ -3,57 +3,55 @@ import waLBerla as wlb
import pandas as pd
from waLBerla.tools.sqlitedb import sequenceValuesToScalars
from waLBerla.tools.config import block_decomposition
import sys
class Scenario:
def __init__(self, timeStepStrategy, overlappingWidth):
def __init__(self, time_step_strategy, cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies
self.vtkWriteFrequency = -1
self.vtkWriteFrequency = 0
# simulation parameters
self.timesteps = 500
edge_size = 50
self.cells = (edge_size, edge_size, edge_size)
self.blocks = (1, 1, 1)
self.timesteps = 101
self.cells_per_block = cells_per_block
self.blocks = block_decomposition(wlb.mpi.numProcesses())
self.periodic = (1, 1, 1)
self.size = (self.cells[0] * self.blocks[0],
self.cells[1] * self.blocks[1],
self.cells[2] * self.blocks[2])
self.size = (self.cells_per_block[0] * self.blocks[0],
self.cells_per_block[1] * self.blocks[1],
self.cells_per_block[2] * self.blocks[2])
self.timeStepStrategy = timeStepStrategy
self.overlappingWidth = overlappingWidth
self.cuda_block_size = (-1, -1, -1)
self.timeStepStrategy = time_step_strategy
self.warmupSteps = 10
self.cudaEnabledMpi = cuda_enabled_mpi
# bubble parameters
self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
self.scenario = 1 # 1 rising bubble, 2 RTI
self.config_dict = self.config()
self.csv_file = "benchmark_cpu.csv"
self.csv_file = "benchmark.csv"
@wlb.member_callback
def config(self):
return {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0,
'remainingTimeLoggerFrequency': 10.0,
'remainingTimeLoggerFrequency': -1,
'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size,
'warmupSteps': self.warmupSteps,
'scenario': self.scenario,
'scenario': 1,
'cudaEnabledMpi': self.cudaEnabledMpi
},
'Boundaries_GPU': {
'Border': []
......@@ -86,31 +84,23 @@ class Scenario:
df.to_csv(self.csv_file, index=False, mode='a', header=False)
def overlap_benchmark():
def benchmark():
scenarios = wlb.ScenarioManager()
overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32),
(64, 32, 32), (64, 64, 32), (64, 64, 64)]
# no overlap
scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1)))
# overlap
for overlap_strategy in ['overlap']:
for overlappingWidth in overlappingWidths:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=overlappingWidth)
scenarios.add(scenario)
block_size = (64, 64, 64)
scenarios.add(Scenario(time_step_strategy='normal', cells_per_block=block_size))
def kernel_benchmark():
scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (8, 16, 32, 64, 128)]
# overlap
scenario = Scenario(timeStepStrategy='overlap',
overlappingWidth=(8, 8, 8))
scenarios.add(scenario)
for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
for block_size in block_sizes:
scenario = Scenario(time_step_strategy=time_step_strategy,
cells_per_block=block_size)
scenarios.add(scenario)
# overlap_benchmark()
# benchmark()
kernel_benchmark()
......@@ -18,7 +18,6 @@
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "core/Environment.h"
#include "core/logging/Initialization.h"
......@@ -30,6 +29,7 @@
#include "field/vtk/VTKWriter.h"
#include "geometry/InitBoundaryHandling.h"
#include "lbm_generated/evaluation/PerformanceEvaluation.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/DictWrapper.h"
......@@ -37,8 +37,6 @@
#include "timeloop/SweepTimeloop.h"
#include <blockforest/communication/UniformBufferedScheme.h>
#include "InitializerFunctions.h"
//////////////////////////////
......@@ -46,53 +44,31 @@
////////////////////////////
#if defined(WALBERLA_BUILD_WITH_CUDA)
# include "cuda/AddGPUFieldToStorage.h"
# include "cuda/DeviceSelectMPI.h"
# include "cuda/HostFieldAllocator.h"
# include "cuda/ParallelStreams.h"
# include "cuda/communication/GPUPackInfo.h"
# include "cuda/communication/MemcpyPackInfo.h"
# include "cuda/communication/UniformGPUScheme.h"
# include "GenDefines.h"
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
# include "gpu/AddGPUFieldToStorage.h"
# include "gpu/DeviceSelectMPI.h"
# include "gpu/ParallelStreams.h"
# include "gpu/communication/MemcpyPackInfo.h"
# include "gpu/communication/UniformGPUScheme.h"
#else
# include "GenDefines.h"
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
# include <blockforest/communication/UniformBufferedScheme.h>
#endif
#include "GenDefines.h"
using namespace walberla;
using PdfField_phase_T = GhostLayerField< real_t, Stencil_phase_T::Size >;
using PdfField_hydro_T = GhostLayerField< real_t, Stencil_hydro_T::Size >;
using VelocityField_T = GhostLayerField< real_t, Stencil_hydro_T::Dimension >;
using PhaseField_T = GhostLayerField< real_t, 1 >;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;
#if defined(WALBERLA_BUILD_WITH_CUDA)
typedef cuda::GPUField< real_t > GPUField;
typedef gpu::GPUField< real_t > GPUField;
#endif
// using CommScheme_T = cuda::communication::UniformGPUScheme<stencil::D2Q9>;
int main(int argc, char** argv)
{
mpi::Environment env(argc, argv);
const mpi::Environment env(argc, argv);
#if defined(WALBERLA_BUILD_WITH_CUDA)
cuda::selectDeviceBasedOnMpiRank();
gpu::selectDeviceBasedOnMpiRank();
#endif
for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg)
......@@ -103,39 +79,35 @@ int main(int argc, char** argv)
logging::configureLogging(config);
shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGridFromConfig(config);
Vector3< uint_t > cellsPerBlock =
config->getBlock("DomainSetup").getParameter< Vector3< uint_t > >("cellsPerBlock");
// Reading parameters
auto parameters = config->getOneBlock("Parameters");
const std::string timeStepStrategy = parameters.getParameter< std::string >("timeStepStrategy", "normal");
const uint_t timesteps = parameters.getParameter< uint_t >("timesteps", uint_c(50));
const real_t remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0);
const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1));
Vector3< int > overlappingWidth =
parameters.getParameter< Vector3< int > >("overlappingWidth", Vector3< int >(1, 1, 1));
const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2));
const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2));
#if defined(WALBERLA_BUILD_WITH_CUDA)
// CPU fields
BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx);
BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx);
const BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_c(0.0), field::fzyx);
// GPU fields
BlockDataID lb_phase_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >(
const BlockDataID lb_phase_field_gpu = gpu::addGPUFieldToStorage< gpu::GPUField< real_t > >(
blocks, "lb phase field on GPU", Stencil_phase_T::Size, field::fzyx, 1);
BlockDataID lb_velocity_field_gpu = cuda::addGPUFieldToStorage< cuda::GPUField< real_t > >(
const BlockDataID lb_velocity_field_gpu = gpu::addGPUFieldToStorage< gpu::GPUField< real_t > >(
blocks, "lb velocity field on GPU", Stencil_hydro_T::Size, field::fzyx, 1);
BlockDataID vel_field_gpu =
cuda::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true);
const BlockDataID vel_field_gpu =
gpu::addGPUFieldToStorage< VelocityField_T >(blocks, vel_field, "velocity field on GPU", true);
BlockDataID phase_field_gpu =
cuda::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true);
gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "phase field on GPU", true);
BlockDataID phase_field_tmp = gpu::addGPUFieldToStorage< PhaseField_T >(blocks, phase_field, "temporary phasefield", true);
#else
BlockDataID lb_phase_field =
field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_t(0), field::fzyx);
field::addToStorage< PdfField_phase_T >(blocks, "lb phase field", real_c(0.0), field::fzyx);
BlockDataID lb_velocity_field =
field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_t(0), field::fzyx);
BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_t(0), field::fzyx);
BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_t(0), field::fzyx);
field::addToStorage< PdfField_hydro_T >(blocks, "lb velocity field", real_c(0.0), field::fzyx);
BlockDataID vel_field = field::addToStorage< VelocityField_T >(blocks, "vel", real_c(0.0), field::fzyx);
BlockDataID phase_field = field::addToStorage< PhaseField_T >(blocks, "phase", real_c(0.0), field::fzyx);
BlockDataID phase_field_tmp = field::addToStorage< PhaseField_T >(blocks, "phase tmp", real_c(0.0), field::fzyx);
#endif
if (timeStepStrategy != "phase_only" && timeStepStrategy != "hydro_only" && timeStepStrategy != "kernel_only")
......@@ -146,7 +118,7 @@ int main(int argc, char** argv)
auto bubbleParameters = config->getOneBlock("Bubble");
const Vector3< real_t > bubbleMidPoint =
bubbleParameters.getParameter< Vector3< real_t > >("bubbleMidPoint");
const real_t bubbleRadius = bubbleParameters.getParameter< real_t >("bubbleRadius", 20.0);
const real_t bubbleRadius = bubbleParameters.getParameter< real_t >("bubbleRadius", real_c(20.0));
initPhaseField_bubble(blocks, phase_field, bubbleRadius, bubbleMidPoint);
}
else if (scenario == 2)
......@@ -154,7 +126,7 @@ int main(int argc, char** argv)
initPhaseField_RTI(blocks, phase_field);
}
#if defined(WALBERLA_BUILD_WITH_CUDA)
cuda::fieldCpy< GPUField, PhaseField_T >(blocks, phase_field_gpu, phase_field);
gpu::fieldCpy< GPUField, PhaseField_T >(blocks, phase_field_gpu, phase_field);
#endif
WALBERLA_LOG_INFO_ON_ROOT("initialization of the phase field done")
}
......@@ -166,54 +138,83 @@ int main(int argc, char** argv)
pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu);
pystencils::phase_field_LB_step phase_field_LB_step(
lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1],
Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
lb_phase_field_gpu, phase_field_gpu, phase_field_tmp, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0],
gpuBlockSize[1],
Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
gpuBlockSize[1], gpuBlockSize[2]);
#else
pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field);
pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field);
pystencils::phase_field_LB_step phase_field_LB_step(
lb_phase_field, phase_field, vel_field, Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field,
Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, phase_field_tmp, vel_field);
pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field);
#endif
// add communication
#if defined(WALBERLA_BUILD_WITH_CUDA)
auto Comm_velocity_based_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
auto generatedPackInfo_velocity_based_distributions =
make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
const bool gpuEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false);
const int streamLowPriority = 0;
const int streamHighPriority = 0;
auto defaultStream = gpu::StreamRAII::newPriorityStream(streamLowPriority);
auto innerOuterStreams = gpu::ParallelStreams(streamHighPriority);
auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field_gpu);
auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu);
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field);
auto Comm_phase_field_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0);
auto generatedPackInfo_phase_field_distributions =
make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
#else
auto UniformGPUSchemeVelocityBasedDistributions = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false);
auto UniformGPUSchemePhaseFieldDistributions = make_shared< gpu::communication::UniformGPUScheme< Full_Stencil_T > >(blocks, gpuEnabledMpi, false);
auto UniformGPUSchemePhaseField = make_shared< gpu::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, gpuEnabledMpi, false, 65432);
UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); });
auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); });
auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); });
auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); });
blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks);
auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); });
auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
{
auto phaseField = b->getData< gpu::GPUField<real_t> >(phase_field_gpu);
auto phaseFieldTMP = b->getData< gpu::GPUField<real_t> >(phase_field_tmp);
phaseField->swapDataPointers(phaseFieldTMP);
});
#else
auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions>(lb_phase_field);
auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field);
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field);
auto generatedPackInfo_velocity_based_distributions =
make_shared< pystencils::PackInfo_velocity_based_distributions >(lb_velocity_field);
Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_phase_field);
Comm_velocity_based_distributions.addPackInfo(generatedPackInfo_velocity_based_distributions);
auto UniformGPUSchemeVelocityBasedDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks);
auto UniformGPUSchemePhaseFieldDistributions = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks);
auto UniformGPUSchemePhaseField = make_shared< blockforest::communication::UniformBufferedScheme< Full_Stencil_T > >(blocks, 65432);
UniformGPUSchemeVelocityBasedDistributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
UniformGPUSchemePhaseFieldDistributions->addPackInfo(generatedPackInfo_phase_field_distributions);
UniformGPUSchemePhaseField->addPackInfo(generatedPackInfo_phase_field);
auto Comm_velocity_based_distributions_start = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->startCommunication(); });
auto Comm_velocity_based_distributions_wait = std::function< void() >([&]() { UniformGPUSchemeVelocityBasedDistributions->wait(); });
auto Comm_phase_field_distributions = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->communicate(); });
auto Comm_phase_field_distributions_start = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->startCommunication(); });
auto Comm_phase_field_distributions_wait = std::function< void() >([&]() { UniformGPUSchemePhaseFieldDistributions->wait(); });
blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_phase_field_distributions(blocks);
auto generatedPackInfo_phase_field_distributions =
make_shared< pystencils::PackInfo_phase_field_distributions >(lb_phase_field);
Comm_phase_field_distributions.addPackInfo(generatedPackInfo_phase_field_distributions);
auto Comm_phase_field = std::function< void() >([&]() { UniformGPUSchemePhaseField->communicate(); });
auto swapPhaseField = std::function< void(IBlock *) >([&](IBlock * b)
{
auto phaseField = b->getData< PhaseField_T >(phase_field);
auto phaseFieldTMP = b->getData< PhaseField_T >(phase_field_tmp);
phaseField->swapDataPointers(phaseFieldTMP);
});
#endif
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
BlockDataID const flagFieldID = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
// Boundaries
const FlagUID fluidFlagUID("Fluid");
auto boundariesConfig = config->getBlock("Boundaries_GPU");
......@@ -232,177 +233,105 @@ int main(int argc, char** argv)
init_h(&block);
init_g(&block);
}
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("initialization of the distributions done")
}
SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
#if defined(WALBERLA_BUILD_WITH_CUDA)
int streamLowPriority = 0;
int streamHighPriority = 0;
auto defaultStream = cuda::StreamRAII::newPriorityStream(streamLowPriority);
auto innerOuterStreams = cuda::ParallelStreams(streamHighPriority);
#endif
timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
<< Sweep(phase_field_LB_step.getSweep(defaultStream), "Phase LB Step")
<< AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication");
auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps);
#if defined(WALBERLA_BUILD_WITH_CUDA)
auto normalTimeStep = [&]() {
for (auto& block : *blocks)
{
Comm_phase_field_distributions->communicate(nullptr);
phase_field_LB_step(&block);
timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
<< Sweep(hydro_LB_step.getSweep(defaultStream), "Hydro LB Step");
timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField")
<< AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
Comm_velocity_based_distributions->communicate(nullptr);
hydro_LB_step(&block);
}
};
auto simpleOverlapTimeStep = [&]() {
Comm_phase_field_distributions->startCommunication(defaultStream);
for (auto& block : *blocks)
phase_field_LB_step.inner(&block, defaultStream);
Comm_phase_field_distributions->wait(defaultStream);
for (auto& block : *blocks)
phase_field_LB_step.outer(&block, defaultStream);
timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field");
Comm_velocity_based_distributions->startCommunication(defaultStream);
for (auto& block : *blocks)
hydro_LB_step.inner(&block, defaultStream);
Comm_velocity_based_distributions->wait(defaultStream);
for (auto& block : *blocks)
hydro_LB_step.outer(&block, defaultStream);
};
auto phase_only = [&]() {
for (auto& block : *blocks)
phase_field_LB_step(&block);
};
auto hydro_only = [&]() {
for (auto& block : *blocks)
hydro_LB_step(&block);
};
auto without_comm = [&]() {
for (auto& block : *blocks)
phase_field_LB_step(&block);
for (auto& block : *blocks)
hydro_LB_step(&block);
};
#else
auto normalTimeStep = [&]() {
for (auto& block : *blocks)
{
Comm_phase_field_distributions();
phase_field_LB_step(&block);
Comm_velocity_based_distributions();
hydro_LB_step(&block);
}
};
auto simpleOverlapTimeStep = [&]() {
Comm_phase_field_distributions.startCommunication();
for (auto& block : *blocks)
phase_field_LB_step.inner(&block);
Comm_phase_field_distributions.wait();
for (auto& block : *blocks)
phase_field_LB_step.outer(&block);
Comm_velocity_based_distributions.startCommunication();
for (auto& block : *blocks)
hydro_LB_step.inner(&block);
Comm_velocity_based_distributions.wait();
for (auto& block : *blocks)
hydro_LB_step.outer(&block);
};
auto phase_only = [&]() {
for (auto& block : *blocks)
phase_field_LB_step(&block);
};
auto hydro_only = [&]() {
for (auto& block : *blocks)
hydro_LB_step(&block);
};
auto without_comm = [&]() {
for (auto& block : *blocks)
phase_field_LB_step(&block);
for (auto& block : *blocks)
hydro_LB_step(&block);
};
#endif
std::function< void() > timeStep;
if (timeStepStrategy == "overlap")
{
timeStep = std::function< void() >(simpleOverlapTimeStep);
WALBERLA_LOG_INFO_ON_ROOT("overlapping timestep")
}
else if (timeStepStrategy == "phase_only")
{
timeStep = std::function< void() >(phase_only);
WALBERLA_LOG_INFO_ON_ROOT("started only phasefield step without communication for benchmarking")
}
else if (timeStepStrategy == "hydro_only")
{
timeStep = std::function< void() >(hydro_only);
WALBERLA_LOG_INFO_ON_ROOT("started only hydro step without communication for benchmarking")
}
else if (timeStepStrategy == "kernel_only")
{
timeStep = std::function< void() >(without_comm);
WALBERLA_LOG_INFO_ON_ROOT("started complete phasefield model without communication for benchmarking")
}
else
{
timeStep = std::function< void() >(normalTimeStep);
WALBERLA_LOG_INFO_ON_ROOT("normal timestep with no overlapping")
}
timeloop.add() << BeforeFunction(Comm_velocity_based_distributions_start, "Start Hydro PDFs Communication")
<< Sweep(phase_field_LB_step.getSweep(), "Phase LB Step")
<< AfterFunction(Comm_velocity_based_distributions_wait, "Wait Hydro PDFs Communication");
timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step");
timeloop.add() << BeforeFunction(Comm_phase_field_distributions_start, "Start Phase PDFs Communication")
<< Sweep(hydro_LB_step.getSweep(), "Hydro LB Step");
timeloop.add() << Sweep(swapPhaseField, "Swap PhaseField")
<< AfterFunction(Comm_phase_field_distributions_wait, "Wait Phase PDFs Communication");
// remaining time logger
if (remainingTimeLoggerFrequency > 0)
timeLoop->addFuncAfterTimeStep(
timing::RemainingTimeLogger(timeLoop->getNrOfTimeSteps(), remainingTimeLoggerFrequency),
"remaining time logger");
timeloop.addFuncAfterTimeStep(Comm_phase_field, "Communication Phase field");
#endif
uint_t vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
uint_t const vtkWriteFrequency = parameters.getParameter< uint_t >("vtkWriteFrequency", 0);
if (vtkWriteFrequency > 1)
{
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0);
#if defined(WALBERLA_BUILD_WITH_CUDA)
vtkOutput->addBeforeFunction([&]() {
cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu);
// cuda::fieldCpy<VelocityField_T, GPUField>( blocks, vel_field, vel_field_gpu );
});
vtkOutput->addBeforeFunction(
[&]() { gpu::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu); });
#endif
auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "phase");
vtkOutput->addCellDataWriter(phaseWriter);
timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output");
}
lbm_generated::PerformanceEvaluation< FlagField_T > const performance(blocks, flagFieldID, fluidFlagUID);
field::CellCounter< FlagField_T > fluidCells(blocks, flagFieldID, fluidFlagUID);
fluidCells();
WALBERLA_LOG_INFO_ON_ROOT("Multiphase benchmark with " << fluidCells.numberOfCells() << " fluid cells")
WALBERLA_LOG_INFO_ON_ROOT("Running " << warmupSteps << " timesteps to warm up the system")
for (uint_t i = 0; i < warmupSteps; ++i)
timeLoop->singleStep();
timeloop.singleStep();
timeLoop->setCurrentTimeStepToZero();
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Warmup timesteps done")
timeloop.setCurrentTimeStepToZero();
WALBERLA_MPI_BARRIER()
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps")
WcTimingPool timeloopTiming;
WcTimer simTimer;
#if defined(WALBERLA_BUILD_WITH_CUDA)
cudaDeviceSynchronize();
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
#endif
simTimer.start();
timeLoop->run();
timeloop.run(timeloopTiming);
#if defined(WALBERLA_BUILD_WITH_CUDA)
cudaDeviceSynchronize();
WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
WALBERLA_GPU_CHECK(gpuPeekAtLastError())
#endif
WALBERLA_MPI_BARRIER()
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished")
auto time = simTimer.last();
auto nrOfCells = real_c(cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2]);
auto mlupsPerProcess = nrOfCells * real_c(timesteps) / time * 1e-6;
WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << mlupsPerProcess)
double time = simTimer.max();
WALBERLA_MPI_SECTION() { walberla::mpi::reduceInplace(time, walberla::mpi::MAX); }
performance.logResultOnRoot(timesteps, time);
const auto reducedTimeloopTiming = timeloopTiming.getReduced();
WALBERLA_LOG_RESULT_ON_ROOT("Time loop timing:\n" << *reducedTimeloopTiming)
WALBERLA_LOG_RESULT_ON_ROOT("MLUPS per process: " << performance.mlupsPerProcess(timesteps, time))
WALBERLA_LOG_RESULT_ON_ROOT("Time per time step: " << time / real_c(timesteps) << " s")
WALBERLA_ROOT_SECTION()
{
python_coupling::PythonCallback pythonCallbackResults("results_callback");
if (pythonCallbackResults.isCallable())
{
pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess);
pythonCallbackResults.data().exposeValue("mlupsPerProcess", performance.mlupsPerProcess(timesteps, time));
pythonCallbackResults.data().exposeValue("stencil_phase", StencilNamePhase);
pythonCallbackResults.data().exposeValue("stencil_hydro", StencilNameHydro);
#if defined(WALBERLA_BUILD_WITH_CUDA)
pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", gpuEnabledMpi);
#endif
// Call Python function to report results
pythonCallbackResults();
}
......
import os
import waLBerla as wlb
import csv
from waLBerla.tools.sqlitedb import sequenceValuesToScalars
import sys
class Scenario:
def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size):
# output frequencies
self.vtkWriteFrequency = 0
# simulation parameters
self.timesteps = 201
edge_size = 100
self.cells = (edge_size, edge_size, edge_size)
self.blocks = (1, 1, 1)
self.periodic = (1, 1, 1)
self.size = (self.cells[0] * self.blocks[0],
self.cells[1] * self.blocks[1],
self.cells[2] * self.blocks[2])
self.timeStepStrategy = timeStepStrategy
self.overlappingWidth = overlappingWidth
self.cuda_block_size = cuda_block_size
self.warmupSteps = 20
# bubble parameters
self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
self.scenario = 1 # 1 rising bubble, 2 RTI
self.config_dict = self.config()
self.csv_file = "benchmark_piz_daint.csv"
@wlb.member_callback
def config(self):
return {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells,
'periodic': self.periodic,
},
'Parameters': {
'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0,
'remainingTimeLoggerFrequency': -1,
'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size,
'warmupSteps': self.warmupSteps,
'scenario': self.scenario,
},
'Boundaries_GPU': {
'Border': []
},
'Boundaries_CPU': {
'Border': []
},
'Bubble': {
'bubbleMidPoint': self.bubbleMidPoint,
'bubbleRadius': self.bubbleRadius,
},
}
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
sequenceValuesToScalars(data)
if not os.path.isfile(self.csv_file):
try:
with open(self.csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=data)
writer.writeheader()
writer.writerow(data)
except IOError:
print("could not create csv file")
else:
try:
with open(self.csv_file, 'a') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=data)
writer.writerow(data)
except IOError:
print("could not write to csv file")
def overlap_benchmark():
scenarios = wlb.ScenarioManager()
overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1),
(32, 2, 1), (64, 2, 1), (128, 2, 1),
(32, 4, 1), (64, 4, 1),
(32, 4, 2), (32, 8, 1), (16, 16, 1)]
# no overlap
scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1)))
# overlap
for overlap_strategy in ['overlap']:
for overlappingWidth in overlappingWidths:
for cuda_block in cuda_blocks:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=overlappingWidth,
cuda_block_size=cuda_block)
scenarios.add(scenario)
def kernel_benchmark():
scenarios = wlb.ScenarioManager()
# overlap
# for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
for overlap_strategy in ['overlap']:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=(16, 16, 16),
cuda_block_size=(128, 4, 1))
scenarios.add(scenario)
def weak_scaling():
scenarios = wlb.ScenarioManager()
# overlap
# for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
for overlap_strategy in ['overlap']:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=(32, 32, 32),
cuda_block_size=(128, 2, 1))
scenarios.add(scenario)
# overlap_benchmark()
# kernel_benchmark()
weak_scaling()
from pystencils import fields, TypedSymbol
from pystencils.simp import sympy_cse
from pystencils import AssignmentCollection
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule
from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_from_kernel
from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
initializer_kernel_hydro_lb, interface_tracking_force, \
hydrodynamic_force, get_collision_assignments_hydro
from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel
import numpy as np
import sympy as sp
stencil_phase = get_stencil("D3Q15", "walberla")
stencil_hydro = get_stencil("D3Q27", "walberla")
q_phase = len(stencil_phase)
q_hydro = len(stencil_hydro)
maxwellian_moments = False
assert (len(stencil_phase[0]) == len(stencil_hydro[0]))
dimensions = len(stencil_hydro[0])
########################
# PARAMETER DEFINITION #
########################
density_liquid = 1.0
density_gas = 0.001
surface_tension = 0.0001
M = 0.02
# phase-field parameter
drho3 = (density_liquid - density_gas) / 3
# interface thickness
W = 5
# coefficient related to surface tension
beta = 12.0 * (surface_tension / W)
# coefficient related to surface tension
kappa = 1.5 * surface_tension * W
# relaxation rate allen cahn (h)
w_c = 1.0 / (0.5 + (3.0 * M))
########################
# FIELDS #
########################
# velocity field
u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx')
# phase-field
C = fields(f"phase_field: [{dimensions}D]", layout='fzyx')
# phase-field distribution functions
h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx')
h_tmp = fields(f"lb_phase_field_tmp({q_phase}): [{dimensions}D]", layout='fzyx')
# hydrodynamic distribution functions
g = fields(f"lb_velocity_field({q_hydro}): [{dimensions}D]", layout='fzyx')
g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): [{dimensions}D]", layout='fzyx')
########################################
# RELAXATION RATES AND EXTERNAL FORCES #
########################################
# calculate the relaxation rate for the hydro lb as well as the body force
density = density_gas + C.center * (density_liquid - density_gas)
# force acting on all phases of the model
body_force = np.array([0, 1e-6, 0])
relaxation_time = 0.03 + 0.5
relaxation_rate = 1.0 / relaxation_time
###############
# LBM METHODS #
###############
method_phase = create_lb_method(stencil=stencil_phase, method='srt', relaxation_rate=w_c, compressible=True)
method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True,
relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1],
maxwellian_moments=maxwellian_moments)
# create the kernels for the initialization of the g and h field
h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W)]
force_model_h = MultiphaseForceModel(force=force_h)
force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force)
h_tmp_symbol_list = [h_tmp.center(i) for i, _ in enumerate(stencil_phase)]
sum_h = np.sum(h_tmp_symbol_list[:])
####################
# LBM UPDATE RULES #
####################
method_phase.set_force_model(force_model_h)
phase_field_LB_step = create_lb_update_rule(lb_method=method_phase,
velocity_input=u,
compressible=True,
optimization={"symbolic_field": h,
"symbolic_temporary_field": h_tmp},
kernel_type='stream_pull_collide')
phase_field_LB_step.set_main_assignments_from_dict({**phase_field_LB_step.main_assignments_dict, **{C.center: sum_h}})
phase_field_LB_step = AssignmentCollection(main_assignments=phase_field_LB_step.main_assignments,
subexpressions=phase_field_LB_step.subexpressions)
phase_field_LB_step = sympy_cse(phase_field_LB_step)
# ---------------------------------------------------------------------------------------------------------
hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro,
density=density,
velocity_input=u,
force=force_g,
sub_iterations=1,
symbolic_fields={"symbolic_field": g,
"symbolic_temporary_field": g_tmp},
kernel_type='collide_stream_push')
# streaming of the hydrodynamic distribution
stream_hydro = create_lb_update_rule(stencil=stencil_hydro,
optimization={"symbolic_field": g,
"symbolic_temporary_field": g_tmp},
kernel_type='stream_pull_only')
###################
# GENERATE SWEEPS #
###################
cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': True}
vp = [('int32_t', 'cudaBlockSize0'),
('int32_t', 'cudaBlockSize1')]
from pystencils import fields, Target
from pystencils.typing import TypedSymbol
from pystencils.simp import sympy_cse
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32),
1)
sweep_params = {'block_size': sweep_block_size}
from lbmpy import LBMConfig, LBStencil, Method, Stencil
from lbmpy.creationfunctions import LBMOptimisation, create_lb_method, create_lb_update_rule
from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \
initializer_kernel_hydro_lb, interface_tracking_force, hydrodynamic_force, add_interface_tracking_force, \
add_hydrodynamic_force, hydrodynamic_force_assignments
from lbmpy.phasefield_allen_cahn.parameter_calculation import AllenCahnParameters
info_header = f"""
#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase};
#include "stencil/D3Q{q_hydro}.h"\nusing Stencil_hydro_T = walberla::stencil::D3Q{q_hydro};
"""
from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field, generate_info_header
from lbmpy_walberla import generate_lb_pack_info
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
stencil_phase = LBStencil(Stencil.D3Q15)
stencil_hydro = LBStencil(Stencil.D3Q19)
assert (stencil_phase.D == stencil_hydro.D)
########################
# PARAMETER DEFINITION #
########################
# In the codegneration skript we only need the symbolic parameters
parameters = AllenCahnParameters(density_heavy=1.0, density_light=0.001,
dynamic_viscosity_heavy=0.03, dynamic_viscosity_light=0.03,
surface_tension=0.0001, mobility=0.02, interface_thickness=5,
gravitational_acceleration=1e-6)
########################
# FIELDS #
########################
# velocity field
u = fields(f"vel_field({stencil_hydro.D}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
# phase-field
C = fields(f"phase_field: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
C_tmp = fields(f"phase_field_tmp: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
# phase-field distribution functions
h = fields(f"lb_phase_field({stencil_phase.Q}): {field_type}[{stencil_phase.D}D]", layout='fzyx')
h_tmp = fields(f"lb_phase_field_tmp({stencil_phase.Q}): {field_type}[{stencil_phase.D}D]", layout='fzyx')
# hydrodynamic distribution functions
g = fields(f"lb_velocity_field({stencil_hydro.Q}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
g_tmp = fields(f"lb_velocity_field_tmp({stencil_hydro.Q}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
########################################
# RELAXATION RATES AND EXTERNAL FORCES #
########################################
# relaxation rate for interface tracking LB step
relaxation_rate_allen_cahn = 1.0 / (0.5 + (3.0 * parameters.symbolic_mobility))
# calculate the relaxation rate for hydrodynamic LB step
rho_L = parameters.symbolic_density_light
rho_H = parameters.symbolic_density_heavy
density = rho_L + C.center * (rho_H - rho_L)
# force acting on all phases of the model
body_force = [0, 0, 0]
body_force[1] = parameters.symbolic_gravitational_acceleration * density
omega = parameters.omega(C)
###############
# LBM METHODS #
###############
rates = [0.0]
rates += [relaxation_rate_allen_cahn] * stencil_phase.D
rates += [1.0] * (stencil_phase.Q - stencil_phase.D - 1)
lbm_config_phase = LBMConfig(stencil=stencil_phase, method=Method.MRT, compressible=True,
delta_equilibrium=False,
force=sp.symbols(f"F_:{stencil_phase.D}"), velocity_input=u,
weighted=True, relaxation_rates=rates,
output={'density': C_tmp})
method_phase = create_lb_method(lbm_config=lbm_config_phase)
lbm_config_hydro = LBMConfig(stencil=stencil_hydro, method=Method.MRT, compressible=False,
weighted=True, relaxation_rate=omega,
force=sp.symbols(f"F_:{stencil_hydro.D}"),
output={'velocity': u})
method_hydro = create_lb_method(lbm_config=lbm_config_hydro)
# create the kernels for the initialization of the g and h field
h_updates = initializer_kernel_phase_field_lb(method_phase, C, u, h, parameters)
h_updates = h_updates.new_with_substitutions(parameters.parameter_map())
g_updates = initializer_kernel_hydro_lb(method_hydro, 1, u, g)
g_updates = g_updates.new_with_substitutions(parameters.parameter_map())
force_h = interface_tracking_force(C, stencil_phase, parameters)
hydro_force = hydrodynamic_force(C, method_hydro, parameters, body_force)
####################
# LBM UPDATE RULES #
####################
lbm_optimisation = LBMOptimisation(symbolic_field=h, symbolic_temporary_field=h_tmp)
phase_field_LB_step = create_lb_update_rule(lbm_config=lbm_config_phase,
lbm_optimisation=lbm_optimisation)
phase_field_LB_step = add_interface_tracking_force(phase_field_LB_step, force_h)
phase_field_LB_step = phase_field_LB_step.new_with_substitutions(parameters.parameter_map())
phase_field_LB_step = sympy_cse(phase_field_LB_step)
# ---------------------------------------------------------------------------------------------------------
force_Assignments = hydrodynamic_force_assignments(u, C, method_hydro, parameters, body_force)
lbm_optimisation = LBMOptimisation(symbolic_field=g, symbolic_temporary_field=g_tmp)
hydro_LB_step = create_lb_update_rule(lbm_config=lbm_config_hydro,
lbm_optimisation=lbm_optimisation)
hydro_LB_step = add_hydrodynamic_force(hydro_LB_step, force_Assignments, C, g, parameters)
hydro_LB_step = hydro_LB_step.new_with_substitutions(parameters.parameter_map())
hydro_LB_step = sympy_cse(hydro_LB_step)
###################
# GENERATE SWEEPS #
###################
# by default NT Stores are deactivated because they do not work in all cases
# must be activated to achieve full potential for example on AVX512 CPUs
cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': False}
vp = [('int32_t', 'cudaBlockSize0'),
('int32_t', 'cudaBlockSize1'),
('int32_t', 'cudaBlockSize2')]
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32),
TypedSymbol("cudaBlockSize2", np.int32))
sweep_params = {'block_size': sweep_block_size}
stencil_typedefs = {'Stencil_phase_T': stencil_phase,
'Stencil_hydro_T': stencil_hydro,
'Full_Stencil_T': LBStencil(Stencil.D3Q27)}
field_typedefs = {'PdfField_phase_T': h,
'PdfField_hydro_T': g,
'VelocityField_T': u,
'PhaseField_T': C}
additional_code = f"""
const char * StencilNamePhase = "{stencil_phase.name}";
const char * StencilNameHydro = "{stencil_hydro.name}";
"""
if not ctx.cuda:
if not ctx.optimize_for_localhost:
cpu_vec = {'instruction_set': None}
generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates)
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates)
generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates, target=Target.CPU)
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target=Target.CPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec)
cpu_vectorize_info=cpu_vec,
target=Target.CPU)
generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
field_swaps=[(g, g_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec)
cpu_vectorize_info=cpu_vec,
target=Target.CPU)
# communication
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions',
phase_field_LB_step.main_assignments, target='cpu')
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
hydro_LB_step.all_assignments, target='cpu', kind='pull')
generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions',
hydro_LB_step.all_assignments, target='cpu', kind='push')
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target=Target.CPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='pull', target=Target.CPU)
ctx.write_file("GenDefines.h", info_header)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.CPU)
if ctx.cuda:
generate_sweep(ctx, 'initialize_phase_field_distributions',
h_updates, target='gpu')
h_updates, target=Target.GPU)
generate_sweep(ctx, 'initialize_velocity_based_distributions',
g_updates, target='gpu')
g_updates, target=Target.GPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp)],
inner_outer_split=True,
target='gpu',
target=Target.GPU,
gpu_indexing_params=sweep_params,
varying_parameters=vp)
generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
field_swaps=[(g, g_tmp)],
inner_outer_split=True,
target='gpu',
target=Target.GPU,
gpu_indexing_params=sweep_params,
varying_parameters=vp)
# communication
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field_distributions',
phase_field_LB_step.main_assignments, target='gpu')
generate_pack_info_from_kernel(ctx, 'PackInfo_phase_field',
hydro_LB_step.all_assignments, target='gpu', kind='pull')
generate_pack_info_from_kernel(ctx, 'PackInfo_velocity_based_distributions',
hydro_LB_step.all_assignments, target='gpu', kind='push')
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target=Target.GPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='pull', target=Target.GPU)
ctx.write_file("GenDefines.h", info_header)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.GPU)
print("finished code generation successfully")
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'GenDefines', stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
additional_code=additional_code)
......@@ -16,15 +16,13 @@ class Scenario:
self.cells[1] * self.blocks[1],
self.cells[2] * self.blocks[2])
self.timeStepStrategy = 'phase_only'
self.overlappingWidth = (1, 1, 1)
self.timeStepStrategy = 'phase_only' # 'phase_only', 'hydro_only', 'kernel_only', 'normal'
self.cuda_block_size = (128, 1, 1)
# bubble parameters
self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
self.scenario = 1 # 1 rising bubble, 2 RTI
self.config_dict = self.config()
@wlb.member_callback
......@@ -38,13 +36,11 @@ class Scenario:
'Parameters': {
'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0,
'remainingTimeLoggerFrequency': -1,
'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size,
'warmupSteps': 0,
'scenario': self.scenario,
'scenario': 1,
},
'Boundaries_GPU': {
'Border': []
......
waLBerla_link_files_to_builddir( "*.dat" )
waLBerla_add_executable( NAME PoiseuilleChannel DEPENDS blockforest boundary core field lbm postprocessing stencil timeloop vtk sqlite)
waLBerla_add_executable( NAME PoiseuilleChannel DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::field walberla::lbm walberla::postprocessing walberla::stencil walberla::timeloop walberla::vtk walberla::sqlite )
##############
# Some tests #
##############
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlatesNoCheckRelease COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlatesNoCheck.dat --trt --linear-exp PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipeNoCheckRelease COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipeNoCheck.dat --trt --linear-exp PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlatesNoCheckRelease COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlatesNoCheck.dat --trt --linear-exp PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo DEPENDS_ON_TARGETS PoiseuilleChannel )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipeNoCheckRelease COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipeNoCheck.dat --trt --linear-exp PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo DEPENDS_ON_TARGETS PoiseuilleChannel )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlatesNoCheckDebug COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlatesNoCheck.dat --trt --linear-exp PROCESSES 4 LABELS longrun CONFIGURATIONS Debug DebugOptimized )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipeNoCheckDebug COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipeNoCheck.dat --trt --linear-exp PROCESSES 4 LABELS longrun CONFIGURATIONS Debug DebugOptimized )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlatesNoCheckDebug COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlatesNoCheck.dat --trt --linear-exp PROCESSES 4 LABELS longrun CONFIGURATIONS Debug DebugOptimized DEPENDS_ON_TARGETS PoiseuilleChannel )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipeNoCheckDebug COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipeNoCheck.dat --trt --linear-exp PROCESSES 4 LABELS longrun CONFIGURATIONS Debug DebugOptimized DEPENDS_ON_TARGETS PoiseuilleChannel )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlates0 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlates0.dat --trt --linear-exp LABELS longrun CONFIGURATIONS Release RelWithDbgInfo )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlates2 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlates2.dat --trt --linear-exp LABELS longrun verylongrun PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipe0 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipe0.dat --trt --linear-exp LABELS longrun CONFIGURATIONS Release RelWithDbgInfo )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipe2 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipe2.dat --trt --linear-exp LABELS longrun verylongrun PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlates0 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlates0.dat --trt --linear-exp LABELS longrun CONFIGURATIONS Release RelWithDbgInfo DEPENDS_ON_TARGETS PoiseuilleChannel )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestParallelPlates2 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestParallelPlates2.dat --trt --linear-exp LABELS longrun verylongrun PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo DEPENDS_ON_TARGETS PoiseuilleChannel )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipe0 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipe0.dat --trt --linear-exp LABELS longrun CONFIGURATIONS Release RelWithDbgInfo DEPENDS_ON_TARGETS PoiseuilleChannel )
waLBerla_execute_test( NO_MODULE_LABEL NAME PoiseuilleChannelTestPipe2 COMMAND $<TARGET_FILE:PoiseuilleChannel> ${CMAKE_CURRENT_SOURCE_DIR}/TestPipe2.dat --trt --linear-exp LABELS longrun verylongrun PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo DEPENDS_ON_TARGETS PoiseuilleChannel )
......@@ -608,7 +608,7 @@ protected:
const auto & dy = this->blockStorage_->dy( level );
WALBERLA_ASSERT_FLOAT_EQUAL( dy, this->blockStorage_->dz( level ) );
const real_t acceleration_L = pdf_->latticeModel().forceModel().force()[0]; // force = acceleration (in lattice units of the current level!)
const real_t acceleration_L = pdf_->latticeModel().forceModel().forceDensity()[0]; // force = acceleration (in lattice units of the current level!)
const real_t viscosity_L = pdf_->latticeModel().collisionModel().viscosity(); // in lattice units on the current level
const real_t radius_L = channelRadius / dy; // in lattice units on the current level
......@@ -890,7 +890,7 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod
// remaining time logger
const double remainingTimeLoggerFrequency = configBlock.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 );
const real_t remainingTimeLoggerFrequency = configBlock.getParameter< real_t >( "remainingTimeLoggerFrequency", real_c(3.0) );
timeloop.addFuncAfterTimeStep( timing::RemainingTimeLogger( timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency ), "Remaining time logger" );
// logging right before the simulation starts
......
waLBerla_add_executable ( NAME PackPerformance
FILES PackPerformance.cpp
DEPENDS core )
DEPENDS walberla::core )
waLBerla_add_executable ( NAME ProbeVsExtraMessage
FILES ProbeVsExtraMessage.cpp
DEPENDS core postprocessing stencil sqlite)
DEPENDS walberla::core walberla::postprocessing walberla::stencil walberla::sqlite )
waLBerla_link_files_to_builddir( "*.dat" )
waLBerla_add_executable( NAME SchaeferTurek DEPENDS blockforest boundary core field lbm postprocessing stencil timeloop vtk sqlite )
waLBerla_execute_test( NO_MODULE_LABEL NAME SchaeferTurekTest COMMAND $<TARGET_FILE:SchaeferTurek> Test2D.dat PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo )
\ No newline at end of file
waLBerla_add_executable( NAME SchaeferTurek DEPENDS walberla::blockforest walberla::boundary walberla::core walberla::field walberla::lbm walberla::postprocessing walberla::stencil walberla::timeloop walberla::vtk walberla::sqlite )
waLBerla_execute_test( NO_MODULE_LABEL NAME SchaeferTurekTest COMMAND $<TARGET_FILE:SchaeferTurek> Test2D.dat PROCESSES 4 CONFIGURATIONS Release RelWithDbgInfo DEPENDS_ON_TARGETS SchaeferTurek )
\ No newline at end of file
......@@ -1064,7 +1064,7 @@ void keepInflowOutflowAtTheSameLevel( std::vector< std::pair< const Block *, uin
uint_t maxInflowLevel( uint_t(0) );
uint_t maxOutflowLevel( uint_t(0) );
// In addtion to keeping in- and outflow blocks at the same level, this callback also
// In addition to keeping in- and outflow blocks at the same level, this callback also
// prevents these blocks from coarsening.
for( auto it = minTargetLevels.begin(); it != minTargetLevels.end(); ++it )
......@@ -1472,7 +1472,7 @@ void Evaluation< LatticeModel_T >::operator()()
{
WALBERLA_LOG_RESULT_ON_ROOT( "force acting on cylinder (in dimensionless lattice units of the coarsest grid - evaluated in time step "
<< forceEvaluationExecutionCount_ << "):\n " << force_ << oss.str() <<
"\ndrag and lift coefficients (including extremas of last " << ( coefficients_[0].size() * checkFrequency_ ) << " time steps):"
"\ndrag and lift coefficients (including extrema of last " << ( coefficients_[0].size() * checkFrequency_ ) << " time steps):"
"\n \"real\" area:"
"\n c_D: " << cDRealArea << " (min = " << coefficientExtremas_[0].first << ", max = " << coefficientExtremas_[0].second << ")" <<
"\n c_L: " << cLRealArea << " (min = " << coefficientExtremas_[1].first << ", max = " << coefficientExtremas_[1].second << ")" <<
......@@ -2050,9 +2050,7 @@ void Evaluation< LatticeModel_T >::evaluate( real_t & cDRealArea, real_t & cLRea
// force on obstacle
mpi::reduceInplace( force_[0], mpi::SUM );
mpi::reduceInplace( force_[1], mpi::SUM );
mpi::reduceInplace( force_[2], mpi::SUM );
mpi::reduceInplace( force_, mpi::SUM );
if( setup_.evaluateForceComponents )
{
......@@ -2365,7 +2363,7 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod
// add velocity field + initialize velocity field writer (only used for simulations with an adaptive block structure)
using VelocityField_T = field::GhostLayerField<Vector3<real_t>, 1>;
BlockDataID velocityFieldId = field::addToStorage< VelocityField_T >( blocks, "velocity", Vector3<real_t>(0), field::zyxf, FieldGhostLayers, true, None, Empty );
BlockDataID velocityFieldId = field::addToStorage< VelocityField_T >( blocks, "velocity", Vector3<real_t>(0), field::fzyx, FieldGhostLayers, true, None, Empty );
using VelocityFieldWriter_T = lbm::VelocityFieldWriter<typename Types<LatticeModel_T>::PdfField_T, VelocityField_T>;
BlockSweepWrapper< VelocityFieldWriter_T > velocityFieldWriter( blocks, VelocityFieldWriter_T( pdfFieldId, velocityFieldId ), None, Empty );
......@@ -2569,14 +2567,14 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod
blockforest::DynamicDiffusionBalance< blockforest::NoPhantomData >( maxIterations, flowIterations ) );
}
// add callback functions which are executed after all block data was unpakced after the dynamic load balancing
// add callback functions which are executed after all block data was unpacked after the dynamic load balancing
// for blocks that have *not* migrated: store current flag field state (required for lbm::PostProcessing)
blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( lbm::MarkerFieldGenerator< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> >(
pdfFieldId, markerDataId, flagFieldFilter ) );
// (re)set boundaries = (re)initialize flag field for every block with respect to the new block structure (the size of neighbor blocks might have changed)
blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( blockforest::BlockForest::RefreshCallbackWrappper( boundarySetter ) );
// treat boundary-fluid cell convertions
// treat boundary-fluid cell conversions
blockforest.addRefreshCallbackFunctionAfterBlockDataIsUnpacked( lbm::PostProcessing< LatticeModel_T, field::FlagFieldEvaluationFilter<FlagField_T> >(
pdfFieldId, markerDataId, flagFieldFilter ) );
// (re)set velocity field (velocity field data is not migrated!)
......@@ -2625,7 +2623,7 @@ void run( const shared_ptr< Config > & config, const LatticeModel_T & latticeMod
// remaining time logger
const double remainingTimeLoggerFrequency = configBlock.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 );
const real_t remainingTimeLoggerFrequency = configBlock.getParameter< real_t >( "remainingTimeLoggerFrequency", real_c(3.0) );
timeloop.addFuncAfterTimeStep( timing::RemainingTimeLogger( timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency ), "Remaining time logger" );
// logging right before the simulation starts
......@@ -2920,10 +2918,10 @@ int main( int argc, char **argv )
"// //\n"
"// Schaefer Turek Benchmark //\n"
"// //\n"
"// Reference: Schaefer, M. and Turek, S. (1996) 'Benchmark computations of laminar flow around a cylinder (with support //\n"
"// Reference: Schaefer, M. and Turek, S. (1996) Benchmark computations of laminar flow around a cylinder (with support //\n"
"// by F. Durst, E. Krause and R. Rannacher), in E. Hirschel (Ed.): Flow Simulation with High-Performance //\n"
"// Computers II. DFG Priority Research Program Results 1993-1995, No. 52 in Notes Numer, Fluid Mech., //\n"
"// pp.547-566, Vieweg, Weisbaden. //\n"
"// Computers II. DFG Priority Research Program Results 1993-1995, No. 48 in Notes on Numerical Fluid //\n"
"// Mechanics, pp.547-566, Vieweg, Weisbaden. //\n"
"// //\n"
"//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////" );
......
waLBerla_link_files_to_builddir( *.prm )
if( WALBERLA_BUILD_WITH_CODEGEN )
# Turbulent Channel generation
walberla_generate_target_from_python( NAME TurbulentChannel_CodeGeneration
FILE TurbulentChannel.py
OUT_FILES CodegenIncludes.h
TurbulentChannel_Sweep.cpp TurbulentChannel_Sweep.h
TurbulentChannel_PackInfo.cpp TurbulentChannel_PackInfo.h
TurbulentChannel_Setter.cpp TurbulentChannel_Setter.h
TurbulentChannel_NoSlip.cpp TurbulentChannel_NoSlip.h
TurbulentChannel_FreeSlip_top.cpp TurbulentChannel_FreeSlip_top.h
TurbulentChannel_WFB_bottom.cpp TurbulentChannel_WFB_bottom.h
TurbulentChannel_WFB_top.cpp TurbulentChannel_WFB_top.h
TurbulentChannel_Welford.cpp TurbulentChannel_Welford.h
TurbulentChannel_Welford_TKE_SGS.cpp TurbulentChannel_Welford_TKE_SGS.h
TurbulentChannel_TKE_SGS_Writer.cpp TurbulentChannel_TKE_SGS_Writer.h
)
walberla_add_executable( NAME TurbulentChannel_Application
FILES TurbulentChannel.cpp
DEPENDS walberla::blockforest walberla::core walberla::domain_decomposition walberla::field walberla::geometry walberla::timeloop
walberla::lbm walberla::stencil walberla::vtk TurbulentChannel_CodeGeneration )
endif()
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file TurbulentChannel.cpp
//! \author Helen Schottenhamml <helen.schottenhamml@fau.de>
//
//======================================================================================================================
#include <memory>
#include <cmath>
#include <string>
#include <iostream>
#include <blockforest/all.h>
#include <core/all.h>
#include <domain_decomposition/all.h>
#include <field/all.h>
#include <field/vtk/VTKWriter.h>
#include <geometry/all.h>
#include <timeloop/all.h>
#include <lbm/all.h>
// Codegen Includes
#include "CodegenIncludes.h"
namespace walberla {
///////////////////////
/// Typedef Aliases ///
///////////////////////
using Stencil_T = codegen::Stencil_T;
// Communication Pack Info
using PackInfo_T = pystencils::TurbulentChannel_PackInfo;
// PDF field type
using PdfField_T = field::GhostLayerField< real_t, Stencil_T::Size >;
// Field Types
using ScalarField_T = field::GhostLayerField< real_t, 1 >;
using VectorField_T = field::GhostLayerField< real_t, Stencil_T::D >;
using TensorField_T = field::GhostLayerField< real_t, Stencil_T::D*Stencil_T::D >;
using Setter_T = pystencils::TurbulentChannel_Setter;
using StreamCollideSweep_T = pystencils::TurbulentChannel_Sweep;
using WelfordSweep_T = pystencils::TurbulentChannel_Welford;
using TKEWelfordSweep_T = pystencils::TurbulentChannel_Welford_TKE_SGS;
using TkeSgsWriter_T = pystencils::TurbulentChannel_TKE_SGS_Writer;
// Boundary Handling
using flag_t = uint8_t;
using FlagField_T = FlagField< flag_t >;
using NoSlip_T = lbm::TurbulentChannel_NoSlip;
using FreeSlip_top_T = lbm::TurbulentChannel_FreeSlip_top;
using WFB_bottom_T = lbm::TurbulentChannel_WFB_bottom;
using WFB_top_T = lbm::TurbulentChannel_WFB_top;
/// DEAN CORRELATIONS
namespace dean_correlation {
real_t calculateFrictionReynoldsNumber(const real_t reynoldsBulk) {
return std::pow(0.073_r / 8_r, 1_r / 2_r) * std::pow(reynoldsBulk, 7_r / 8_r);
}
real_t calculateBulkReynoldsNumber(const real_t reynoldsFriction) {
return std::pow(8_r / 0.073_r, 4_r / 7_r) * std::pow(reynoldsFriction, 8_r / 7_r);
}
} // namespace dean_correlation
/// VELOCITY FIELD INITIALISATION
/*
* Initialises the velocity field with a logarithmic profile and sinusoidal perturbations to trigger turbulence.
* This initialisation is provided by Henrik Asmuth.
*/
template<typename VelocityField_T>
void setVelocityFieldsAsmuth( const std::weak_ptr<StructuredBlockStorage>& forest,
const BlockDataID & velocityFieldId, const BlockDataID & meanVelocityFieldId,
const real_t frictionVelocity, const uint_t channel_half_width,
const real_t B, const real_t kappa, const real_t viscosity,
const uint_t wallAxis, const uint_t flowAxis ) {
auto blocks = forest.lock();
WALBERLA_CHECK_NOT_NULLPTR(blocks)
const auto domainSize = blocks->getDomain().max();
const auto delta = real_c(channel_half_width);
const auto remAxis = 3 - wallAxis - flowAxis;
for( auto block = blocks->begin(); block != blocks->end(); ++block ) {
auto * velocityField = block->template getData<VelocityField_T>(velocityFieldId);
WALBERLA_CHECK_NOT_NULLPTR(velocityField)
auto * meanVelocityField = block->template getData<VelocityField_T>(meanVelocityFieldId);
WALBERLA_CHECK_NOT_NULLPTR(meanVelocityField)
const auto ci = velocityField->xyzSizeWithGhostLayer();
for(auto cellIt = ci.begin(); cellIt != ci.end(); ++cellIt) {
Cell globalCell(*cellIt);
blocks->transformBlockLocalToGlobalCell(globalCell, *block);
Vector3<real_t> cellCenter;
blocks->getCellCenter(cellCenter, globalCell);
const auto y = cellCenter[wallAxis];
const auto rel_x = cellCenter[flowAxis] / domainSize[flowAxis];
const auto rel_z = cellCenter[remAxis] / domainSize[remAxis];
const real_t pos = std::max(delta - std::abs(y - delta - 1_r), 0.05_r);
const auto rel_y = pos / delta;
auto initialVel = frictionVelocity * (std::log(frictionVelocity * pos / viscosity) / kappa + B);
Vector3<real_t> vel;
vel[flowAxis] = initialVel;
vel[remAxis] = 2_r * frictionVelocity / kappa * std::sin(math::pi * 16_r * rel_x) *
std::sin(math::pi * 8_r * rel_y) / (std::pow(rel_y, 2_r) + 1_r);
vel[wallAxis] = 8_r * frictionVelocity / kappa *
(std::sin(math::pi * 8_r * rel_z) * std::sin(math::pi * 8_r * rel_y) +
std::sin(math::pi * 8_r * rel_x)) / (std::pow(0.5_r * delta - pos, 2_r) + 1_r);
for(uint_t d = 0; d < 3; ++d) {
velocityField->get(*cellIt, d) = vel[d];
meanVelocityField->get(*cellIt, d) = vel[d];
}
}
}
} // function setVelocityFieldsHenrik
/// SIMULATION PARAMETERS
struct SimulationParameters {
SimulationParameters(const Config::BlockHandle & config)
{
channelHalfWidth = config.getParameter<uint_t>("channel_half_width");
fullChannel = config.getParameter<bool>("full_channel", false);
/// TARGET QUANTITIES
targetFrictionReynolds = config.getParameter<real_t>("target_friction_reynolds");
targetBulkVelocity = config.getParameter<real_t>("target_bulk_velocity", 0.05_r);
targetBulkReynolds = dean_correlation::calculateBulkReynoldsNumber(targetFrictionReynolds);
viscosity = 2_r * real_c(channelHalfWidth) * targetBulkVelocity / targetBulkReynolds;
targetFrictionVelocity = targetFrictionReynolds * viscosity / real_c(channelHalfWidth);
/// TIMESTEPS
timesteps = config.getParameter<uint_t>("timesteps", 0);
const uint_t turnoverPeriods = config.getParameter<uint_t>("turnover_periods", 0);
WALBERLA_ASSERT((timesteps != 0) != (turnoverPeriods != 0),
"Either timesteps OR turnover periods must be given.")
if(turnoverPeriods != 0) {
// turnover period defined by T = delta / u_tau
timesteps = turnoverPeriods * uint_c((real_c(channelHalfWidth) / targetFrictionVelocity));
}
/// DOMAIN DEFINITIONS
// obtained from codegen script -> adapt there
wallAxis = codegen::wallAxis;
flowAxis = codegen::flowAxis;
uint_t sizeFlowAxis = config.getParameter<uint_t>("size_flow_axis", 0);
uint_t sizeRemainingAxis = config.getParameter<uint_t>("size_remaining_axis", 0);
WALBERLA_ASSERT_NOT_IDENTICAL(wallAxis, flowAxis, "Wall and flow axis must be different.")
const auto sizeFactor = channelHalfWidth / uint_t(10);
if( !sizeFlowAxis) sizeFlowAxis = sizeFactor * 64;
if( !sizeRemainingAxis) sizeRemainingAxis = sizeFactor * 32;
domainSize[wallAxis] = fullChannel ? 2 * channelHalfWidth : channelHalfWidth;
domainSize[flowAxis] = sizeFlowAxis;
domainSize[3- wallAxis - flowAxis] = sizeRemainingAxis;
periodicity[wallAxis] = false;
boundaryCondition = config.getParameter<std::string>("wall_boundary_condition", "WFB");
/// OUTPUT
auto tsPerPeriod = uint_c((real_c(channelHalfWidth) / targetFrictionVelocity));
vtkFrequency = config.getParameter<uint_t>("vtk_frequency", 0);
vtkStart = config.getParameter<uint_t>("vtk_start", 0);
plotFrequency = config.getParameter<uint_t>("plot_frequency", 0);
plotStart = config.getParameter<uint_t>("plot_start", 0);
// vtk start
vtkStart = config.getParameter<uint_t>("vtk_start_timesteps", 0);
const uint_t vtkStartPeriods = config.getParameter<uint_t>("vtk_start_periods", 0);
if(vtkStart || vtkStartPeriods) {
WALBERLA_ASSERT((vtkStart != 0) != (vtkStartPeriods != 0),
"VTK start must be given in timesteps OR periods, not both.")
}
if(vtkStartPeriods != 0) {
// turnover period defined by T = delta / u_tau
vtkStart = vtkStartPeriods * tsPerPeriod;
}
// plot start
plotStart = config.getParameter<uint_t>("plot_start_timesteps", 0);
const uint_t plotStartPeriods = config.getParameter<uint_t>("plot_start_periods", 0);
if(plotStart || plotStartPeriods) {
WALBERLA_ASSERT((plotStart != 0) != (plotStartPeriods != 0),
"Plotting start must be given in timesteps OR periods, not both.")
}
if(plotStartPeriods != 0) {
// turnover period defined by T = delta / u_tau
plotStart = plotStartPeriods * tsPerPeriod;
}
// frequencies
if(plotFrequency) {
timesteps = uint_c(std::ceil(real_c(timesteps) / real_c(plotFrequency))) * plotFrequency;
}
// sampling start & interval
samplingStart = config.getParameter<uint_t>("sampling_start_timesteps", 0);
const uint_t samplingStartPeriods = config.getParameter<uint_t>("sampling_start_periods", 0);
if(samplingStart || samplingStartPeriods) {
WALBERLA_ASSERT((samplingStart != 0) != (samplingStartPeriods != 0),
"Sampling start must be given in timesteps OR periods, not both.")
}
if(samplingStartPeriods != 0) {
// turnover period defined by T = delta / u_tau
samplingStart = samplingStartPeriods * tsPerPeriod;
}
samplingInterval = config.getParameter<uint_t>("sampling_interval_timesteps", 0);
const uint_t samplingIntervalPeriods = config.getParameter<uint_t>("sampling_interval_periods", 0);
if(samplingInterval || samplingIntervalPeriods) {
WALBERLA_ASSERT((samplingInterval != 0) != (samplingIntervalPeriods != 0),
"Sampling start must be given in timesteps OR periods, not both.")
}
if(samplingStartPeriods != 0) {
// turnover period defined by T = delta / u_tau
samplingInterval = samplingIntervalPeriods * tsPerPeriod;
}
timesteps += 1;
}
uint_t channelHalfWidth{};
bool fullChannel{};
Vector3<uint_t> domainSize{};
Vector3<uint_t> periodicity{true};
real_t targetFrictionReynolds{};
real_t targetBulkReynolds{};
real_t targetFrictionVelocity{};
real_t targetBulkVelocity{};
real_t viscosity{};
const real_t density{1.0};
uint_t timesteps{};
std::string boundaryCondition{};
uint_t wallAxis{};
uint_t flowAxis{};
/// output
uint_t vtkFrequency{};
uint_t vtkStart{};
uint_t plotFrequency{};
uint_t plotStart{};
uint_t samplingStart{};
uint_t samplingInterval{};
};
namespace boundaries {
void createBoundaryConfig(const SimulationParameters & parameters, Config::Block & boundaryBlock) {
auto & bottomWall = boundaryBlock.createBlock("Border");
bottomWall.addParameter("direction", stencil::dirToString[stencil::directionFromAxis(parameters.wallAxis, true)]);
bottomWall.addParameter("walldistance", "-1");
if(parameters.boundaryCondition == "NoSlip") {
bottomWall.addParameter("flag", "NoSlip");
} else if(parameters.boundaryCondition == "WFB") {
bottomWall.addParameter("flag", "WFB_bottom");
}
auto & topWall = boundaryBlock.createBlock("Border");
topWall.addParameter("direction", stencil::dirToString[stencil::directionFromAxis(parameters.wallAxis, false)]);
topWall.addParameter("walldistance", "-1");
if(parameters.fullChannel) {
if (parameters.boundaryCondition == "NoSlip") {
topWall.addParameter("flag", "NoSlip");
} else if (parameters.boundaryCondition == "WFB") {
topWall.addParameter("flag", "WFB_top");
}
} else {
topWall.addParameter("flag", "FreeSlip");
}
}
}
/// BULK VELOCITY CALCULATION
template< typename VelocityField_T >
class ForceCalculator {
public:
ForceCalculator(const std::weak_ptr<StructuredBlockStorage> & blocks, const BlockDataID meanVelocityId,
const SimulationParameters & parameter)
: blocks_(blocks), meanVelocityId_(meanVelocityId), channelHalfWidth_(real_c(parameter.channelHalfWidth)),
targetBulkVelocity_(parameter.targetBulkVelocity), targetFrictionVelocity_(parameter.targetFrictionVelocity)
{
const auto & domainSize = parameter.domainSize;
Cell maxCell;
maxCell[parameter.wallAxis] = int_c(parameter.channelHalfWidth) - 1;
maxCell[flowDirection_] = int_c(domainSize[flowDirection_]) - 1;
const auto remainingIdx = 3 - parameter.wallAxis - flowDirection_;
maxCell[remainingIdx] = int_c(domainSize[remainingIdx]) - 1;
ci_ = CellInterval(Cell{}, maxCell);
numCells_ = real_c(parameter.channelHalfWidth * domainSize[flowDirection_] * domainSize[remainingIdx]);
}
real_t bulkVelocity() const { return bulkVelocity_; }
void setBulkVelocity(const real_t bulkVelocity) { bulkVelocity_ = bulkVelocity; }
void calculateBulkVelocity() {
// reset bulk velocity
bulkVelocity_ = 0_r;
auto blocks = blocks_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blocks)
for( auto block = blocks->begin(); block != blocks->end(); ++block) {
auto * meanVelocityField = block->template getData<VelocityField_T>(meanVelocityId_);
WALBERLA_CHECK_NOT_NULLPTR(meanVelocityField)
auto fieldSize = meanVelocityField->xyzSize();
CellInterval localCi;
blocks->transformGlobalToBlockLocalCellInterval(localCi, *block, ci_);
fieldSize.intersect(localCi);
auto * slicedField = meanVelocityField->getSlicedField(fieldSize);
WALBERLA_CHECK_NOT_NULLPTR(meanVelocityField)
for(auto fieldIt = slicedField->beginXYZ(); fieldIt != slicedField->end(); ++fieldIt) {
const auto localMean = fieldIt[flowDirection_];
bulkVelocity_ += localMean;
}
}
mpi::allReduceInplace< real_t >(bulkVelocity_, mpi::SUM);
bulkVelocity_ /= numCells_;
}
real_t calculateDrivingForce() const {
// forcing term as in Malaspinas (2014) "Wall model for large-eddy simulation based on the lattice Boltzmann method"
const auto force = targetFrictionVelocity_ * targetFrictionVelocity_ / channelHalfWidth_
+ (targetBulkVelocity_ - bulkVelocity_) * targetBulkVelocity_ / channelHalfWidth_;
return force;
}
private:
const std::weak_ptr<StructuredBlockStorage> blocks_{};
const BlockDataID meanVelocityId_{};
const uint_t flowDirection_{};
const real_t channelHalfWidth_{};
const real_t targetBulkVelocity_{};
const real_t targetFrictionVelocity_{};
CellInterval ci_{};
real_t numCells_{};
real_t bulkVelocity_{};
};
template< typename Welford_T >
class TurbulentChannelPlotter {
public:
TurbulentChannelPlotter(SimulationParameters const * const parameters, Timeloop * const timeloop,
ForceCalculator<VectorField_T> const * const forceCalculator,
const std::weak_ptr<StructuredBlockStorage> & blocks,
const BlockDataID velocityFieldId, const BlockDataID meanVelocityFieldId,
const BlockDataID meanTkeSGSFieldId, const BlockDataID sosFieldId, Welford_T * velocityWelford,
const bool separateFile = false)
: parameters_(parameters), forceCalculator_(forceCalculator), timeloop_(timeloop), blocks_(blocks),
velocityWelford_(velocityWelford), velocityFieldId_(velocityFieldId), meanVelocityFieldId_(meanVelocityFieldId),
meanTkeSGSFieldId_(meanTkeSGSFieldId), sosFieldId_(sosFieldId), plotFrequency_(parameters->plotFrequency), plotStart_(parameters->plotStart),
separateFiles_(separateFile)
{
if(!plotFrequency_)
return;
// prepare output folder
const filesystem::path path(baseFolder_);
std::string fileSuffix = parameters->boundaryCondition + "_";
if(parameters->fullChannel)
fileSuffix += "full_D";
else
fileSuffix += "half_D";
fileSuffix += std::to_string(parameters->channelHalfWidth) + "_Re" +
std::to_string(int(parameters->targetFrictionReynolds)) ;
velocityProfilesFilePath_ = path / ("velocity_profiles_" + fileSuffix);
forcingDataFilePath_ = path / ("forcing_data_" + fileSuffix + "_t" +
std::to_string(parameters->timesteps-1) + ".txt");
WALBERLA_ROOT_SECTION() {
// create directory if not existent; empty if existent
if( !filesystem::exists(path) ) {
filesystem::create_directories(path);
} else {
for (const auto& entry : filesystem::directory_iterator(path))
std::filesystem::remove_all(entry.path());
}
}
// write force header
std::ofstream os (forcingDataFilePath_, std::ios::out | std::ios::trunc);
if(os.is_open()) {
os << "# timestep\t bulk_velocity\t driving_force\n";
os.close();
} else {
WALBERLA_ABORT("Could not open forcing data file.")
}
}
void operator()() {
const auto ts = timeloop_->getCurrentTimeStep();
if(ts < plotStart_)
return;
if(!plotFrequency_ || (ts % plotFrequency_))
return;
const auto channelHalfWidth = real_c(parameters_->channelHalfWidth);
const auto bulkVelocity = forceCalculator_->bulkVelocity();
/// write force data
WALBERLA_ROOT_SECTION() {
std::ofstream forceOS(forcingDataFilePath_, std::ios::out | std::ios::app);
if (forceOS.is_open())
{
forceOS << ts << "\t" << bulkVelocity << "\t" << forceCalculator_->calculateDrivingForce() << "\n";
forceOS.close();
}
}
/// write velocity data
// gather velocity data
std::vector<real_t> instantaneousVelocityVector(parameters_->channelHalfWidth, 0_r);
std::vector<real_t> meanVelocityVector(parameters_->channelHalfWidth, 0_r);
std::vector<real_t> tkeSGSVector(parameters_->channelHalfWidth, 0_r);
std::vector<real_t> tkeResolvedVector(parameters_->channelHalfWidth, 0_r);
std::vector<real_t> reynoldsStressVector(parameters_->channelHalfWidth * TensorField_T::F_SIZE, 0_r);
const auto idxFlow = int_c(parameters_->domainSize[parameters_->flowAxis] / uint_t(2));
const auto idxRem = int_c(parameters_->domainSize[3 - parameters_->flowAxis - parameters_->wallAxis] / uint_t(2));
Cell point;
point[parameters_->flowAxis] = idxFlow;
point[3 - parameters_->flowAxis - parameters_->wallAxis] = idxRem;
const auto flowAxis = int_c(parameters_->flowAxis);
auto blocks = blocks_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blocks)
for(auto block = blocks->begin(); block != blocks->end(); ++block) {
const auto * const velocity = block->template getData<VectorField_T>(velocityFieldId_);
WALBERLA_CHECK_NOT_NULLPTR(velocity)
const auto * const meanVelocity = block->template getData<VectorField_T>(meanVelocityFieldId_);
WALBERLA_CHECK_NOT_NULLPTR(meanVelocity)
const auto * const tkeSGS = block->template getData<ScalarField_T>(meanTkeSGSFieldId_);
WALBERLA_CHECK_NOT_NULLPTR(tkeSGS)
const auto * const sos = block->template getData<TensorField_T>(sosFieldId_);
WALBERLA_CHECK_NOT_NULLPTR(sos)
for(uint_t idx = 0; idx < parameters_->channelHalfWidth; ++idx) {
point[parameters_->wallAxis] = int_c(idx);
Cell localCell;
blocks->transformGlobalToBlockLocalCell(localCell, *block, point);
if(velocity->xyzSize().contains(localCell)){
instantaneousVelocityVector[idx] = velocity->get(localCell, flowAxis);
meanVelocityVector[idx] = meanVelocity->get(localCell, flowAxis);
tkeSGSVector[idx] = tkeSGS->get(localCell);
for(uint_t i = 0; i < TensorField_T::F_SIZE; ++i) {
reynoldsStressVector[idx*TensorField_T::F_SIZE+i] = sos->get(localCell,i) / velocityWelford_->getCounter();
}
tkeResolvedVector[idx] = real_c(0.5) * (
reynoldsStressVector[idx*TensorField_T::F_SIZE+0] +
reynoldsStressVector[idx*TensorField_T::F_SIZE+4] +
reynoldsStressVector[idx*TensorField_T::F_SIZE+8]
);
}
}
}
// MPI exchange information
mpi::reduceInplace(instantaneousVelocityVector, mpi::SUM);
mpi::reduceInplace(meanVelocityVector, mpi::SUM);
mpi::reduceInplace(tkeSGSVector, mpi::SUM);
mpi::reduceInplace(tkeResolvedVector, mpi::SUM);
mpi::reduceInplace(reynoldsStressVector, mpi::SUM);
WALBERLA_ROOT_SECTION()
{
std::ofstream velocityOS;
filesystem::path path = velocityProfilesFilePath_;
if (separateFiles_) {
path.concat("_t" + std::to_string(timeloop_->getCurrentTimeStep()) + ".txt");
velocityOS.open(path, std::ios::out | std::ios::trunc);
} else {
path.concat("_t" + std::to_string(parameters_->timesteps-1) + ".txt");
velocityOS.open(path, std::ios::out | std::ios::trunc);
}
if (velocityOS.is_open()) {
if (!separateFiles_) velocityOS << "# t = " << ts << "\n";
velocityOS << "# y/delta\t y+\t u+\t u_mean\t u_instantaneous\t TKE_SGS\t TKE_resolved\t uu_rms\t uv_rms\t uw_rms\t vu_rms\t vv_rms\t vw_rms\t wu_rms\t wv_rms\t ww_rms\n";
const auto & viscosity = parameters_->viscosity;
const auto bulkReynolds = 2_r * channelHalfWidth * bulkVelocity / viscosity;
const auto frictionReynolds = dean_correlation::calculateFrictionReynoldsNumber(bulkReynolds);
const auto frictionVelocity = frictionReynolds * viscosity / channelHalfWidth;
for(uint_t idx = 0; idx < parameters_->channelHalfWidth; ++idx) {
// relative position
velocityOS << (real_c(idx)+0.5_r) / channelHalfWidth << "\t";
// y+
velocityOS << (real_c(idx)+0.5_r) * frictionVelocity / viscosity << "\t";
// u+
velocityOS << meanVelocityVector[idx] / frictionVelocity << "\t";
// mean velocity
velocityOS << meanVelocityVector[idx] << "\t";
// instantaneous velocity
velocityOS << instantaneousVelocityVector[idx] << "\t";
// subgrid-scale TKE
velocityOS << tkeSGSVector[idx] << "\t";
// resolved TKE
velocityOS << tkeResolvedVector[idx] << "\t";
// Reynolds stresses
for(uint_t i = 0; i < TensorField_T::F_SIZE; ++i) {
velocityOS << reynoldsStressVector[idx*TensorField_T::F_SIZE+i] << "\t";
}
velocityOS << "\n";
}
velocityOS.close();
} else{
WALBERLA_ABORT("Could not open velocity plot file " << path.generic_string())
}
}
}
private:
SimulationParameters const * const parameters_{};
ForceCalculator<VectorField_T> const * const forceCalculator_{};
Timeloop * const timeloop_{};
const std::weak_ptr<StructuredBlockStorage> blocks_;
Welford_T * const velocityWelford_{};
const BlockDataID velocityFieldId_{};
const BlockDataID meanVelocityFieldId_{};
const BlockDataID meanTkeSGSFieldId_{};
const BlockDataID sosFieldId_{};
const uint_t plotFrequency_{};
const uint_t plotStart_{};
const bool separateFiles_{false};
const std::string baseFolder_{"output"};
filesystem::path velocityProfilesFilePath_;
filesystem::path forcingDataFilePath_;
};
/////////////////////
/// Main Function ///
/////////////////////
int main(int argc, char** argv) {
Environment walberlaEnv(argc, argv);
if (!walberlaEnv.config()) { WALBERLA_ABORT("No configuration file specified!") }
///////////////////////////////////////////////////////
/// Block Storage Creation and Simulation Parameter ///
///////////////////////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Creating block forest...")
const auto channelParameter = walberlaEnv.config()->getOneBlock("TurbulentChannel");
SimulationParameters simulationParameters(channelParameter);
// domain creation
std::shared_ptr<StructuredBlockForest> blocks;
{
Vector3< uint_t > numBlocks;
Vector3< uint_t > cellsPerBlock;
blockforest::calculateCellDistribution(simulationParameters.domainSize,
uint_c(mpi::MPIManager::instance()->numProcesses()),
numBlocks, cellsPerBlock);
const auto & periodicity = simulationParameters.periodicity;
auto & domainSize = simulationParameters.domainSize;
const Vector3<uint_t> newDomainSize(numBlocks[0] * cellsPerBlock[0], numBlocks[1] * cellsPerBlock[1], numBlocks[2] * cellsPerBlock[2]);
if(domainSize != newDomainSize) {
domainSize = newDomainSize;
WALBERLA_LOG_WARNING_ON_ROOT("\nWARNING: Domain size has changed due to the chosen domain decomposition.\n")
}
SetupBlockForest sforest;
sforest.addWorkloadMemorySUIDAssignmentFunction( blockforest::uniformWorkloadAndMemoryAssignment );
sforest.init( AABB(0_r, 0_r, 0_r, real_c(domainSize[0]), real_c(domainSize[1]), real_c(domainSize[2])),
numBlocks[0], numBlocks[1], numBlocks[2], periodicity[0], periodicity[1], periodicity[2] );
// calculate process distribution
const memory_t memoryLimit = numeric_cast< memory_t >( sforest.getNumberOfBlocks() );
const blockforest::GlobalLoadBalancing::MetisConfiguration< SetupBlock > metisConfig(
true, false, std::bind( blockforest::cellWeightedCommunicationCost, std::placeholders::_1, std::placeholders::_2,
cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2] ) );
sforest.calculateProcessDistribution_Default( uint_c( MPIManager::instance()->numProcesses() ), memoryLimit,
"hilbert", 10, false, metisConfig );
if( !MPIManager::instance()->rankValid() )
MPIManager::instance()->useWorldComm();
// create StructuredBlockForest (encapsulates a newly created BlockForest)
WALBERLA_LOG_INFO_ON_ROOT("SetupBlockForest created successfully:\n" << sforest)
sforest.writeVTKOutput("domain_decomposition");
auto bf = std::make_shared< BlockForest >( uint_c( MPIManager::instance()->rank() ), sforest, false );
blocks = std::make_shared< StructuredBlockForest >( bf, cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2] );
blocks->createCellBoundingBoxes();
}
////////////////////////////////////
/// PDF Field and Velocity Setup ///
////////////////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Creating fields...")
// Common Fields
const BlockDataID velocityFieldId = field::addToStorage< VectorField_T >(blocks, "velocity", real_c(0.0), codegen::layout);
const BlockDataID meanVelocityFieldId = field::addToStorage< VectorField_T >(blocks, "mean velocity", real_c(0.0), codegen::layout);
const BlockDataID sosFieldId = field::addToStorage< TensorField_T >(blocks, "sum of squares", real_c(0.0), codegen::layout);
const BlockDataID tkeSgsFieldId = field::addToStorage< ScalarField_T >(blocks, "tke_SGS", real_c(0.0), codegen::layout);
const BlockDataID meanTkeSgsFieldId = field::addToStorage< ScalarField_T >(blocks, "mean_tke_SGS", real_c(0.0), codegen::layout);
const BlockDataID omegaFieldId = field::addToStorage< ScalarField_T >(blocks, "omega_out", real_c(0.0), codegen::layout);
const BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
// CPU Field for PDFs
const BlockDataID pdfFieldId = field::addToStorage< PdfField_T >(blocks, "pdf field", real_c(0.0), codegen::layout);
///////////////////////////////////////////
/// Force and bulk velocity calculation ///
///////////////////////////////////////////
ForceCalculator<VectorField_T> forceCalculator(blocks, velocityFieldId, simulationParameters);
//////////////
/// Setter ///
//////////////
WALBERLA_LOG_INFO_ON_ROOT("Setting up fields...")
// Velocity field setup
setVelocityFieldsAsmuth<VectorField_T>(
blocks, velocityFieldId, meanVelocityFieldId,
simulationParameters.targetFrictionVelocity, simulationParameters.channelHalfWidth,
5.5_r, 0.41_r, simulationParameters.viscosity,
simulationParameters.wallAxis, simulationParameters.flowAxis );
forceCalculator.setBulkVelocity(simulationParameters.targetBulkVelocity);
const auto initialForce = forceCalculator.calculateDrivingForce();
// pdfs setup
Setter_T pdfSetter(pdfFieldId, velocityFieldId, initialForce, simulationParameters.density);
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
pdfSetter(blockIt.get());
/////////////
/// Sweep ///
/////////////
WALBERLA_LOG_INFO_ON_ROOT("Creating sweeps...")
const auto omega = lbm::collision_model::omegaFromViscosity(simulationParameters.viscosity);
StreamCollideSweep_T streamCollideSweep(omegaFieldId, pdfFieldId, velocityFieldId, initialForce, omega);
WelfordSweep_T welfordSweep(meanVelocityFieldId, sosFieldId, velocityFieldId, 0_r);
TKEWelfordSweep_T welfordTKESweep(meanTkeSgsFieldId, tkeSgsFieldId, 0_r);
TkeSgsWriter_T tkeSgsWriter(omegaFieldId, pdfFieldId, tkeSgsFieldId, initialForce, omega);
/////////////////////////
/// Boundary Handling ///
/////////////////////////
WALBERLA_LOG_INFO_ON_ROOT("Creating boundary handling...")
const FlagUID fluidFlagUID("Fluid");
Config::Block boundaryBlock;
boundaries::createBoundaryConfig(simulationParameters, boundaryBlock);
std::unique_ptr<WFB_bottom_T> wfb_bottom_ptr = std::make_unique<WFB_bottom_T>(blocks, meanVelocityFieldId, pdfFieldId, omega, simulationParameters.targetFrictionVelocity);
std::unique_ptr<WFB_top_T > wfb_top_ptr = std::make_unique<WFB_top_T>(blocks, meanVelocityFieldId, pdfFieldId, omega, simulationParameters.targetFrictionVelocity);
NoSlip_T noSlip(blocks, pdfFieldId);
FreeSlip_top_T freeSlip_top(blocks, pdfFieldId);
geometry::initBoundaryHandling< FlagField_T >(*blocks, flagFieldId, Config::BlockHandle(&boundaryBlock));
geometry::setNonBoundaryCellsToDomain< FlagField_T >(*blocks, flagFieldId, fluidFlagUID);
noSlip.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("NoSlip"), fluidFlagUID);
freeSlip_top.fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("FreeSlip"), fluidFlagUID);
wfb_bottom_ptr->fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("WFB_bottom"), fluidFlagUID);
wfb_top_ptr->fillFromFlagField< FlagField_T >(blocks, flagFieldId, FlagUID("WFB_top"), fluidFlagUID);
//////////////
/// Output ///
//////////////
WALBERLA_LOG_INFO_ON_ROOT("Creating field output...")
// vtk output
auto vtkWriter = vtk::createVTKOutput_BlockData(
blocks, "field_writer", simulationParameters.vtkFrequency, 0, false, "vtk_out", "simulation_step",
false, false, true, false
);
vtkWriter->setInitialWriteCallsToSkip(simulationParameters.vtkStart);
// velocity field writer
auto velocityWriter = std::make_shared<field::VTKWriter<VectorField_T>>(velocityFieldId, "instantaneous velocity");
vtkWriter->addCellDataWriter(velocityWriter);
auto meanVelocityFieldWriter = std::make_shared<field::VTKWriter<VectorField_T>>(meanVelocityFieldId, "mean velocity");
vtkWriter->addCellDataWriter(meanVelocityFieldWriter);
// vtk writer
{
auto flagOutput = vtk::createVTKOutput_BlockData(
blocks, "flag_writer", 1, 1, false, "vtk_out", "simulation_step",
false, true, true, false
);
auto flagWriter = std::make_shared<field::VTKWriter<FlagField_T>>(flagFieldId, "flag field");
flagOutput->addCellDataWriter(flagWriter);
flagOutput->write();
}
/////////////////
/// Time Loop ///
/////////////////
WALBERLA_LOG_INFO_ON_ROOT("Creating timeloop...")
SweepTimeloop timeloop(blocks->getBlockStorage(), simulationParameters.timesteps);
// Communication
blockforest::communication::UniformBufferedScheme< Stencil_T > communication(blocks);
communication.addPackInfo(make_shared< PackInfo_T >(pdfFieldId));
auto setNewForce = [&](const real_t newForce) {
streamCollideSweep.setF_x(newForce);
tkeSgsWriter.setF_x(newForce);
};
// plotting
const bool outputSeparateFiles = channelParameter.getParameter<bool>("separate_files", false);
const TurbulentChannelPlotter<WelfordSweep_T > plotter(&simulationParameters, &timeloop, &forceCalculator, blocks,
velocityFieldId, meanVelocityFieldId,
meanTkeSgsFieldId, sosFieldId, &welfordSweep,
outputSeparateFiles);
//NOTE must convert sweeps that are altered to lambdas, otherwise copy and counter will stay 0
auto welfordLambda = [&welfordSweep, &welfordTKESweep](IBlock * block) {
welfordSweep(block);
welfordTKESweep(block);
};
auto wfbLambda = [&wfb_bottom_ptr, &wfb_top_ptr](IBlock * block) {
wfb_bottom_ptr->operator()(block);
wfb_top_ptr->operator()(block);
};
auto streamCollideLambda = [&streamCollideSweep](IBlock * block) {
streamCollideSweep(block);
};
// Timeloop
timeloop.add() << BeforeFunction(communication, "communication")
<< BeforeFunction([&](){forceCalculator.calculateBulkVelocity();}, "bulk velocity calculation")
<< BeforeFunction([&](){
const auto newForce = forceCalculator.calculateDrivingForce();
setNewForce(newForce);
}, "new force setter")
<< Sweep([](IBlock *){}, "new force setter");
timeloop.add() << Sweep(freeSlip_top, "freeSlip");
timeloop.add() << Sweep(noSlip, "noSlip");
timeloop.add() << Sweep(wfbLambda, "wall function bounce");
timeloop.add() << Sweep(streamCollideLambda, "stream and collide");
timeloop.add() << BeforeFunction([&](){
const uint_t velCtr = uint_c(welfordSweep.getCounter());
if((timeloop.getCurrentTimeStep() == simulationParameters.samplingStart) ||
(timeloop.getCurrentTimeStep() > simulationParameters.samplingStart && simulationParameters.samplingInterval && (velCtr % simulationParameters.samplingInterval == 0))) {
welfordSweep.setCounter(real_c(0.0));
welfordTKESweep.setCounter(real_c(0.0));
for(auto & block : *blocks) {
auto * sopField = block.template getData<TensorField_T >(sosFieldId);
sopField->setWithGhostLayer(0.0);
auto * tkeField = block.template getData<ScalarField_T>(tkeSgsFieldId);
tkeField->setWithGhostLayer(0.0);
}
}
welfordSweep.setCounter(welfordSweep.getCounter() + real_c(1.0));
welfordTKESweep.setCounter(welfordTKESweep.getCounter() + real_c(1.0));
}, "welford sweep")
<< Sweep(welfordLambda, "welford sweep");
timeloop.add() << Sweep(tkeSgsWriter, "TKE_SGS writer");
timeloop.addFuncAfterTimeStep(vtk::writeFiles(vtkWriter), "VTK field output");
timeloop.addFuncAfterTimeStep(plotter, "Turbulent quantity plotting");
// LBM stability check
timeloop.addFuncAfterTimeStep( makeSharedFunctor( field::makeStabilityChecker< PdfField_T, FlagField_T >(
walberlaEnv.config(), blocks, pdfFieldId, flagFieldId, fluidFlagUID ) ),
"LBM stability check" );
// Time logger
timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), 5_r),
"remaining time logger");
WALBERLA_LOG_INFO_ON_ROOT("Running timeloop with " << timeloop.getNrOfTimeSteps() - 1 << " timesteps...")
WcTimingPool timing;
WcTimer timer;
timer.start();
timeloop.run(timing);
timer.end();
double time = timer.max();
walberla::mpi::reduceInplace(time, walberla::mpi::MAX);
const auto timeloopTiming = timing.getReduced();
WALBERLA_LOG_INFO_ON_ROOT("Timeloop timing:\n" << *timeloopTiming)
const walberla::lbm::PerformanceEvaluation<FlagField_T> performance(blocks, flagFieldId, fluidFlagUID);
performance.logResultOnRoot(simulationParameters.timesteps, time);
return EXIT_SUCCESS;
}
} // namespace walberla
int main(int argc, char** argv) { return walberla::main(argc, argv); }