Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 3199 additions and 380 deletions
#pragma once
#include "core/debug/Debug.h"
#include "blockforest/Block.h"
#include "blockforest/StructuredBlockForest.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "field/communication/StencilRestrictedMPIDatatypeInfo.h"
#include "field/communication/UniformMPIDatatypeInfo.h"
#include "gpu/communication/GPUPackInfo.h"
#include "gpu/communication/UniformGPUScheme.h"
#include "gpu/communication/MemcpyPackInfo.h"
#include "UniformGridGPU_PackInfo.h"
using namespace walberla;
enum CommunicationSchemeType {
GPUPackInfo_Baseline = 0,
GPUPackInfo_Streams = 1,
UniformGPUScheme_Baseline = 2,
UniformGPUScheme_Memcpy = 3,
MPIDatatypes = 4,
MPIDatatypesFull = 5
};
template<typename StencilType, typename GPUFieldType >
class UniformGridGPU_Communication
{
public:
explicit UniformGridGPU_Communication(weak_ptr<StructuredBlockForest> bf, const BlockDataID & bdId,
CommunicationSchemeType commSchemeType, bool cudaEnabledMPI = false)
: _commSchemeType(commSchemeType), _cpuCommunicationScheme(nullptr), _gpuPackInfo(nullptr),
_gpuCommunicationScheme(nullptr), _directScheme(nullptr)
{
auto generatedPackInfo = make_shared<pystencils::UniformGridGPU_PackInfo>( bdId );
auto memcpyPackInfo = make_shared< gpu::communication::MemcpyPackInfo< GPUFieldType > >( bdId );
auto dataTypeInfo = make_shared< field::communication::StencilRestrictedMPIDatatypeInfo< GPUFieldType, StencilType > >( bdId );
auto dataTypeInfoFull = make_shared< field::communication::UniformMPIDatatypeInfo<GPUFieldType> >( bdId );
switch(_commSchemeType)
{
case GPUPackInfo_Baseline:
_gpuPackInfo = make_shared< gpu::communication::GPUPackInfo< GPUFieldType > >( bdId );
_cpuCommunicationScheme = make_shared< blockforest::communication::UniformBufferedScheme< StencilType > >( bf );
_cpuCommunicationScheme->addPackInfo( _gpuPackInfo );
break;
case GPUPackInfo_Streams:
_gpuPackInfo = make_shared< gpu::communication::GPUPackInfo< GPUFieldType > >( bdId );
_cpuCommunicationScheme = make_shared< blockforest::communication::UniformBufferedScheme< StencilType > >( bf );
_cpuCommunicationScheme->addPackInfo( _gpuPackInfo );
break;
case UniformGPUScheme_Baseline:
_gpuCommunicationScheme = make_shared< gpu::communication::UniformGPUScheme< StencilType > >( bf, cudaEnabledMPI );
_gpuCommunicationScheme->addPackInfo( generatedPackInfo );
break;
case UniformGPUScheme_Memcpy:
_gpuCommunicationScheme = make_shared< gpu::communication::UniformGPUScheme< StencilType > >( bf, cudaEnabledMPI );
_gpuCommunicationScheme->addPackInfo( memcpyPackInfo );
break;
case MPIDatatypes:
if( ! cudaEnabledMPI ) {
WALBERLA_ABORT("MPI datatype-based communication not possible if no cudaEnabledMPI is available.");
}
_directScheme = make_shared< blockforest::communication::UniformDirectScheme< StencilType > >( bf, dataTypeInfo );
break;
case MPIDatatypesFull:
if( ! cudaEnabledMPI ) {
WALBERLA_ABORT("MPI datatype-based communication not possible if no cudaEnabledMPI is available.");
}
_directScheme = make_shared< blockforest::communication::UniformDirectScheme< StencilType > >( bf, dataTypeInfoFull );
break;
default:
WALBERLA_ABORT("Invalid GPU communication scheme specified!");
}
}
UniformGridGPU_Communication(UniformGridGPU_Communication &) = delete;
void operator()( cudaStream_t communicationStream = 0 )
{
startCommunication( communicationStream );
wait( communicationStream );
}
void startCommunication( cudaStream_t communicationStream )
{
switch( _commSchemeType )
{
case GPUPackInfo_Streams:
// Set communication stream to enable asynchronous operations
// in GPUPackInfo.
WALBERLA_ASSERT_NOT_NULLPTR( _gpuPackInfo );
_gpuPackInfo->setCommunicationStream( communicationStream );
// Start communication using UniformBufferedScheme
WALBERLA_ASSERT_NOT_NULLPTR( _cpuCommunicationScheme );
_cpuCommunicationScheme->startCommunication();
break;
case GPUPackInfo_Baseline:
// Start communication using UniformBufferedScheme
WALBERLA_ASSERT_NOT_NULLPTR( _cpuCommunicationScheme );
_cpuCommunicationScheme->startCommunication();
break;
case UniformGPUScheme_Baseline:
WALBERLA_ASSERT_NOT_NULLPTR( _gpuCommunicationScheme );
_gpuCommunicationScheme->startCommunication( communicationStream );
break;
case UniformGPUScheme_Memcpy:
WALBERLA_ASSERT_NOT_NULLPTR( _gpuCommunicationScheme );
_gpuCommunicationScheme->startCommunication( communicationStream );
break;
case MPIDatatypes:
case MPIDatatypesFull:
WALBERLA_ASSERT_NOT_NULLPTR( _directScheme );
_directScheme->startCommunication();
break;
}
}
void wait( cudaStream_t communicationStream )
{
switch( _commSchemeType )
{
case GPUPackInfo_Baseline:
WALBERLA_ASSERT_NOT_NULLPTR( _cpuCommunicationScheme );
_cpuCommunicationScheme->wait();
break;
case GPUPackInfo_Streams:
WALBERLA_ASSERT_NOT_NULLPTR( _cpuCommunicationScheme );
_gpuPackInfo->setCommunicationStream( communicationStream );
_cpuCommunicationScheme->wait();
break;
case UniformGPUScheme_Baseline:
WALBERLA_ASSERT_NOT_NULLPTR( _gpuCommunicationScheme );
_gpuCommunicationScheme->wait( communicationStream );
break;
case UniformGPUScheme_Memcpy:
WALBERLA_ASSERT_NOT_NULLPTR( _gpuCommunicationScheme );
_gpuCommunicationScheme->wait( communicationStream );
break;
case MPIDatatypes:
case MPIDatatypesFull:
WALBERLA_ASSERT_NOT_NULLPTR( _directScheme );
_directScheme->wait();
break;
}
}
private:
CommunicationSchemeType _commSchemeType;
shared_ptr< blockforest::communication::UniformBufferedScheme< StencilType > > _cpuCommunicationScheme;
shared_ptr< gpu::communication::GPUPackInfo< GPUFieldType > > _gpuPackInfo;
shared_ptr< gpu::communication::UniformGPUScheme< StencilType > > _gpuCommunicationScheme;
shared_ptr< blockforest::communication::UniformDirectScheme<StencilType> > _directScheme;
};
#!/usr/bin/env python3
"""
This is a waLBerla parameter file that tests (almost) all parameter combinations for GPU communication.
Build waLBerla with -DWALBERLA_BUILD_WITH_PYTHON=1 then run e.g.
./UniformGridGPU_d3q27_aa_srt simulation_setup/benchmark_configs.py
Look at the end of the file to select the benchmark to run
"""
import os
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
from copy import deepcopy
from functools import reduce
import operator
import sys
import sqlite3
# Number of time steps run for a workload of 128^3 per GPU
# if double as many cells are on the GPU, half as many time steps are run etc.
# increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = 200
DB_FILE = "gpu_benchmark.sqlite3"
BASE_CONFIG = {
'DomainSetup': {
'cellsPerBlock': (256, 128, 128),
'periodic': (1, 1, 1),
},
'Parameters': {
'omega': 1.8,
'cudaEnabledMPI': False,
'warmupSteps': 5,
'outerIterations': 3,
}
}
def prod(seq):
return reduce(operator.mul, seq, 1)
def num_time_steps(block_size, time_steps_for_128_block=200):
cells = block_size[0] * block_size[1] * block_size[2]
time_steps = (128 ** 3 / cells) * time_steps_for_128_block
return int(time_steps)
def cuda_block_size_ok(block_size, regs_per_threads=168):
"""Checks if a given CUDA block size does not exceed the SM register limit.
168 registers per thread was obtained using cuobjdump on both SRT and Cumulant
kernels. You might want to validate that for your own kernels."""
return prod(block_size) * regs_per_threads < 64 * (2 ** 10)
def domain_block_size_ok(block_size, total_mem, gls=1, q=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
return prod(b + 2 * gls for b in block_size) * q * size_per_value < total_mem
class Scenario:
def __init__(self, cells_per_block=(256, 128, 128), db_file=DB_FILE, **kwargs):
self.db_file = db_file
self.config_dict = deepcopy(BASE_CONFIG)
self.config_dict['Parameters'].update(kwargs)
self.config_dict['DomainSetup']['blocks'] = block_decomposition(wlb.mpi.numProcesses())
self.config_dict['DomainSetup']['cellsPerBlock'] = cells_per_block
@wlb.member_callback
def config(self):
from pprint import pformat
wlb.log_info_on_root("Scenario:\n" + pformat(self.config_dict))
# Write out the configuration as text-based prm:
# print(toPrm(self.config_dict))
return self.config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, "runs", self.db_file)
storeSingle(result, "runs", self.db_file)
break
except sqlite3.OperationalError as e:
wlb.log_warning("Sqlite DB writing failed: try {}/{} {}".format(num_try + 1, num_tries, str(e)))
# -------------------------------------- Functions trying different parameter sets -----------------------------------
def overlap_benchmark():
"""Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running different communication overlap strategies")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
inner_outer_splits = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
# 'GPUPackInfo_Baseline', 'GPUPackInfo_Streams'
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']:
# no overlap
scenarios.add(Scenario(timeStepStrategy='noOverlap', communicationScheme=comm_strategy,
innerOuterSplit=(1, 1, 1)))
# overlap
for overlap_strategy in ['simpleOverlap', 'complexOverlap']:
for inner_outer_split in inner_outer_splits:
scenario = Scenario(timeStepStrategy=overlap_strategy,
communicationScheme=comm_strategy,
innerOuterSplit=inner_outer_split,
timesteps=num_time_steps((256, 128, 128)))
scenarios.add(scenario)
def communication_compare():
"""Tests different communication strategies"""
wlb.log_info_on_root("Running benchmarks to compare communication strategies")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128), (32, 32, 32), (64, 64, 64), (256, 256, 256)]:
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']:
sc = Scenario(cells_per_block=block_size,
gpuBlockSize=(128, 1, 1),
timeStepStrategy='noOverlap',
communicationScheme=comm_strategy,
timesteps=num_time_steps(block_size))
scenarios.add(sc)
for inner_outer_split in [(4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1)]:
# ensure that the inner part of the domain is still large enough
if 3 * inner_outer_split[0] > block_size[0]:
continue
sc = Scenario(cells_per_block=block_size,
gpuBlockSize=(128, 1, 1),
timeStepStrategy='simpleOverlap',
innerOuterSplit=inner_outer_split,
communicationScheme=comm_strategy,
timesteps=num_time_steps(block_size))
scenarios.add(sc)
def single_gpu_benchmark():
"""Benchmarks only the LBM compute kernel"""
wlb.log_info_on_root("Running single GPU benchmarks")
wlb.log_info_on_root("")
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
gpu_mem = gpu_mem_gb * (2 ** 30)
gpu_type = os.environ.get('GPU_TYPE')
kwargs = {}
if gpu_type is not None:
kwargs['gpu_type'] = gpu_type
scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (64, 128, 256, 384)] + [(512, 512, 128)]
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1),
(32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
(32, 4, 1), (64, 4, 1), (128, 4, 1),
(32, 8, 1), (64, 8, 1),
(32, 16, 1)]
for block_size in block_sizes:
for cuda_block_size in cuda_blocks:
if not cuda_block_size_ok(cuda_block_size):
wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
continue
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(cells_per_block=block_size,
gpuBlockSize=cuda_block_size,
timeStepStrategy='kernelOnly',
timesteps=num_time_steps(block_size),
**kwargs)
scenarios.add(scenario)
# -------------------------------------- Optional job script generation for PizDaint ---------------------------------
job_script_header = """
#!/bin/bash -l
#SBATCH --job-name=scaling
#SBATCH --time=0:30:00
#SBATCH --nodes={nodes}
#SBATCH -o out_scaling_{nodes}_%j.txt
#SBATCH -e err_scaling_{nodes}_%j.txt
#SBATCH --ntasks-per-core=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --partition=normal
#SBATCH --constraint=gpu
#SBATCH --account=d105
cd {folder}
source ~/env.sh
module load daint-gpu
module load craype-accel-nvidia60
export MPICH_RDMA_ENABLED_CUDA=1 # allow GPU-GPU data transfer
export CRAY_CUDA_MPS=1 # allow GPU sharing
export MPICH_G2G_PIPELINE=256 # adapt maximum number of concurrent in-flight messages
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export CRAY_CUDA_MPS=1
export MPICH_RANK_REORDER_METHOD=3
export PMI_MMAP_SYNC_WAIT_TIME=300
# grid_order -R -H -c 1,1,8 -g 16,16,8
ulimit -c 0
"""
job_script_exe_part = """
export WALBERLA_SCENARIO_IDX=0
while srun -n {nodes} ./{app} {config}
do
((WALBERLA_SCENARIO_IDX++))
done
"""
all_executables = ('UniformGridBenchmarkGPU_mrt_d3q27',
'UniformGridBenchmarkGPU_smagorinsky_d3q27',
'UniformGridBenchmarkGPU_cumulant'
'UniformGridBenchmarkGPU_cumulant_d3q27')
def generate_jobscripts(exe_names=all_executables):
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 2400]:
folder_name = "scaling_{:04d}".format(node_count)
os.makedirs(folder_name, exist_ok=True)
# run grid_order
import subprocess
decomposition = block_decomposition(node_count)
decomposition_str = ",".join(str(e) for e in decomposition)
subprocess.check_call(['grid_order', '-R', '-H', '-g', decomposition_str])
job_script = job_script_header.format(nodes=node_count, folder=os.path.join(os.getcwd(), folder_name))
for exe in exe_names:
job_script += job_script_exe_part.format(app="../" + exe, nodes=node_count,
config='../communication_compare.py')
with open(os.path.join(folder_name, 'job.sh'), 'w') as f:
f.write(job_script)
if __name__ == '__main__':
print("Called without waLBerla - generating job scripts for PizDaint")
generate_jobscripts()
else:
wlb.log_info_on_root("Batch run of benchmark scenarios, saving result to {}".format(DB_FILE))
# Select the benchmark you want to run
single_gpu_benchmark()
# benchmarks different CUDA block sizes and domain sizes and measures single
# GPU performance of compute kernel (no communication)
# communication_compare(): benchmarks different communication routines, with and without overlap
# overlap_benchmark(): benchmarks different communication overlap options
DomainSetup
{
blocks < 1,1,1 >; // use as many blocks as MPI processes
cellsPerBlock < 128,64,64 >; // domain size per MPI process, leave constant for weak scaling
periodic < 1,1,1 >;
}
Parameters
{
initShearFlow False;
cudaEnabledMPI False; // set to true, if MPI was compiled with CUDA
outerIterations 3; // number of measurements to make
timeStepStrategy simpleOverlap; // one of simpleOverlap, noOverlap, the non-AA version also supports complexOverlap
// fastest is simpleOverlap
innerOuterSplit <8, 1, 1>; // only important when overlapping communication
// domain is split into communication-dependent outer and inner part
// this parameter makes the outer part larger than necessary since the processing of a single outer layer is slow
// this parameter specifies the thickness of the outer layer in each direction
// make sure your block is large enough, the outer part is 2*innerOuterSplit big, make sure there is a inner part left
timesteps 2000; // time steps per measurement
warmupSteps 5; // number of time steps before starting measurement
vtkWriteFrequency 0; // how often to write VTK output
gpuBlockSize < 128,1,1 >; // size of CUDA blocks - usually large x extents are fast
omega 1.8;
// valid in the non-AA version - determines how the ghost layer exchange is done
// the AA version uses always the fastest "UniformGPUScheme_Baseline"
//communicationScheme UniformGPUScheme_Baseline
// UniformGPUScheme_Baseline: packing/unpacking in generated kernels, every direction is handled by separate CUDA stream, can make use of CUDA aware MPI, most probably the fastest version
// UniformGPUScheme_Memcpy: some as above, but packing is done with cudaMemcpy(3D)
// MPIDatatypes: use MPI datatypes for packing, needs CUDA aware MPI
// MPIDatatypesFull: same as above but sends all PDFs
// GPUPackInfo_Baseline: old implementation based on communication mechanism for CPUs
// GPUPackInfo_Streams: same as above but with CUDA streams
}
Boundaries {
Border { direction T; walldistance -1; flag UBB; }
Border { direction B; walldistance -1; flag NoSlip; }
Border { direction W; walldistance -1; flag NoSlip; }
Border { direction E; walldistance -1; flag NoSlip; }
}
File added
import os
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sys
import sqlite3
from math import prod
try:
import machinestate as ms
except ImportError:
ms = None
# Number of time steps run for a workload of 128^3 per GPU
# if double as many cells are on the GPU, half as many time steps are run etc.
# increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = 1000
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
BASE_CONFIG = {
'DomainSetup': {
'cellsPerBlock': (256, 128, 128),
'periodic': (1, 1, 1),
},
'Parameters': {
'omega': 1.8,
'cudaEnabledMPI': False,
'warmupSteps': 5,
'outerIterations': 3,
}
}
ldc_setup = {'Border': [
{'direction': 'N', 'walldistance': -1, 'flag': 'UBB'},
{'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
]}
def num_time_steps(block_size, time_steps_for_128_block=200):
cells = block_size[0] * block_size[1] * block_size[2]
time_steps = (128 ** 3 / cells) * time_steps_for_128_block
if time_steps < 10:
time_steps = 10
return int(time_steps)
def cuda_block_size_ok(block_size, regs_per_threads=168):
"""Checks if a given CUDA block size does not exceed the SM register limit.
168 registers per thread was obtained using cuobjdump on both SRT and Cumulant
kernels. You might want to validate that for your own kernels."""
return prod(block_size) * regs_per_threads < 64 * (2 ** 10)
def domain_block_size_ok(block_size, total_mem, gls=1, q=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
return prod(b + 2 * gls for b in block_size) * q * size_per_value < total_mem
class Scenario:
def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blocks=(128, 1, 1),
timesteps=None, time_step_strategy="normal", omega=1.8, cuda_enabled_mpi=False,
inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3,
init_shear_flow=False, boundary_setup=False,
vtk_write_frequency=0, remaining_time_logger_frequency=-1,
additional_info=None, blocks=None, db_file_name=None):
if boundary_setup:
init_shear_flow = False
periodic = (0, 0, 0)
self.blocks = blocks if blocks else block_decomposition(wlb.mpi.numProcesses())
self.cells_per_block = cells_per_block
self.periodic = periodic
self.time_step_strategy = time_step_strategy
self.omega = omega
self.timesteps = timesteps if timesteps else num_time_steps(cells_per_block)
self.cuda_enabled_mpi = cuda_enabled_mpi
self.inner_outer_split = inner_outer_split
self.init_shear_flow = init_shear_flow
self.boundary_setup = boundary_setup
self.warmup_steps = warmup_steps
self.outer_iterations = outer_iterations
self.cuda_blocks = cuda_blocks
self.vtk_write_frequency = vtk_write_frequency
self.remaining_time_logger_frequency = remaining_time_logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
self.additional_info = additional_info
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'omega': self.omega,
'cudaEnabledMPI': self.cuda_enabled_mpi,
'warmupSteps': self.warmup_steps,
'outerIterations': self.outer_iterations,
'timeStepStrategy': self.time_step_strategy,
'timesteps': self.timesteps,
'initShearFlow': self.init_shear_flow,
'gpuBlockSize': self.cuda_blocks,
'innerOuterSplit': self.inner_outer_split,
'vtkWriteFrequency': self.vtk_write_frequency,
'remainingTimeLoggerFrequency': self.remaining_time_logger_frequency
},
'Logging': {
'logLevel': 'info', # info progress detail tracing
}
}
if self.boundary_setup:
config_dict["Boundaries"] = ldc_setup
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
if self.additional_info:
wlb.log_info_on_root("Additional Info:\n" + pformat(self.additional_info))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
if self.additional_info is not None:
data.update(self.additional_info)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs_{data['stencil']}_{data['streamingPattern']}_{data['collisionSetup']}_{prod(self.blocks)}"
table_name = table_name.replace("-", "_") # - not allowed for table name would lead to syntax error
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
# -------------------------------------- Profiling -----------------------------------
def profiling():
"""Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running 2 timesteps for profiling")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
cells = (128, 128, 128)
cuda_enabled_mpi = False
scenarios.add(Scenario(cells_per_block=cells, time_step_strategy='kernelOnly',
inner_outer_split=(1, 1, 1), timesteps=2, cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1, warmup_steps=0))
# -------------------------------------- Functions trying different parameter sets -----------------------------------
def overlap_benchmark():
"""Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running different communication overlap strategies")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
cuda_enabled_mpi = False
inner_outer_splits = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
# no overlap
scenarios.add(Scenario(time_step_strategy='noOverlap',
inner_outer_split=(1, 1, 1),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1))
for inner_outer_split in inner_outer_splits:
scenario = Scenario(time_step_strategy='simpleOverlap',
inner_outer_split=inner_outer_split,
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1)
scenarios.add(scenario)
def no_overlap_scaling(cuda_enabled_mpi=False):
"""Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running scaling benchmark without communication hiding")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
# no overlap
scenarios.add(Scenario(cells_per_block=(256, 256, 256),
cuda_blocks=(128, 1, 1),
time_step_strategy='noOverlap',
inner_outer_split=(1, 1, 1),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1))
def weak_scaling_overlap(cuda_enabled_mpi=False):
"""Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running scaling benchmark with communication hiding")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
# overlap
for t in ["noOverlap", "simpleOverlap"]:
scenarios.add(Scenario(cells_per_block=(WeakX, WeakY, WeakZ),
cuda_blocks=(128, 1, 1),
time_step_strategy=t,
inner_outer_split=(8, 8, 8),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
boundary_setup=True,
db_file_name="weakScalingUniformGrid.sqlite3"))
def strong_scaling_overlap(cuda_enabled_mpi=False):
wlb.log_info_on_root("Running strong scaling benchmark with one block per proc with communication hiding")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
domain_size = (StrongX, StrongY, StrongZ)
blocks = block_decomposition(wlb.mpi.numProcesses())
cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
# overlap
for t in ["noOverlap", "simpleOverlap"]:
scenarios.add(Scenario(cells_per_block=cells_per_block,
cuda_blocks=(128, 1, 1),
time_step_strategy=t,
inner_outer_split=(1, 1, 1),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
timesteps=50,
blocks=blocks,
boundary_setup=True,
db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
def single_gpu_benchmark():
"""Benchmarks only the LBM compute kernel"""
wlb.log_info_on_root("Running single GPU benchmarks")
wlb.log_info_on_root("")
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
gpu_type = os.environ.get('GPU_TYPE')
additional_info = {}
if gpu_type is not None:
additional_info['gpu_type'] = gpu_type
scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (128, 256, 320)]
cuda_blocks = [(128, 1, 1), ]
for block_size in block_sizes:
for cuda_block_size in cuda_blocks:
# cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
if cuda_block_size > block_size:
continue
if not cuda_block_size_ok(cuda_block_size):
wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
continue
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(cells_per_block=block_size,
cuda_blocks=cuda_block_size,
time_step_strategy='kernelOnly',
timesteps=num_time_steps(block_size, 2000),
outer_iterations=1,
additional_info=additional_info)
scenarios.add(scenario)
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
wlb.log_info_on_root("")
time_step_strategy = "noOverlap" # "simpleOverlap"
scenarios = wlb.ScenarioManager()
scenario = Scenario(cells_per_block=(128, 128, 128),
time_step_strategy=time_step_strategy,
timesteps=10001,
outer_iterations=1,
warmup_steps=0,
init_shear_flow=False,
boundary_setup=True,
vtk_write_frequency=5000,
remaining_time_logger_frequency=30)
scenarios.add(scenario)
wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FILE}")
# Select the benchmark you want to run
# single_gpu_benchmark() # benchmarks different CUDA block sizes and domain sizes and measures single GPU
# performance of compute kernel (no communication)
# overlap_benchmark() # benchmarks different communication overlap options
# profiling() # run only two timesteps on a smaller domain for profiling only
# validation_run()
if BENCHMARK == 0:
single_gpu_benchmark()
elif BENCHMARK == 1:
weak_scaling_overlap(True)
elif BENCHMARK == 2:
strong_scaling_overlap(True)
else:
validation_run()
import os
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sys
import sqlite3
from math import prod
try:
import machinestate as ms
except ImportError:
ms = None
# Number of time steps run for a workload of 128^3 per GPU
# if double as many cells are on the GPU, half as many time steps are run etc.
# increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = int(os.environ.get('TIME_STEPS_FOR_128_BLOCK', 1000))
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
BASE_CONFIG = {
'DomainSetup': {
'cellsPerBlock': (256, 128, 128),
'periodic': (1, 1, 1),
},
'Parameters': {
'omega': 1.8,
'cudaEnabledMPI': False,
'warmupSteps': 5,
'outerIterations': 3,
}
}
ldc_setup = {'Border': [
{'direction': 'N', 'walldistance': -1, 'flag': 'UBB'},
{'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
]}
def num_time_steps(block_size, time_steps_for_128_block=TIME_STEPS_FOR_128_BLOCK):
"""
Calculate the number of time steps based on the block size.
This function computes the number of time steps required for a given block size
by scaling the time steps that could be executed on one process within one second
for a 128x128x128 cells_per_block to the given cells_per_block size.
Parameters:
block_size (tuple): A tuple of three integers representing the dimensions of the cells_per_block (x, y, z).
time_steps_for_128_block (int, optional): The number of time steps for a 128x128x128 block. Default is 1000,
which is approximately the number of timesteps that were executed within one second on one entire MareNostrum5-acc node.
Returns:
int: The calculated number of time steps, with a minimum value of 10.
"""
cells = block_size[0] * block_size[1] * block_size[2]
time_steps = (128 ** 3 / cells) * time_steps_for_128_block
if time_steps < 10:
time_steps = 10
return int(time_steps)
def cuda_block_size_ok(block_size, regs_per_threads=168):
"""
Checks if a given CUDA block size does not exceed the SM register limit.
168 registers per thread was obtained using cuobjdump on both SRT and Cumulant
kernels. You might want to validate that for your own kernels.
"""
return prod(block_size) * regs_per_threads < 64 * (2 ** 10)
def domain_block_size_ok(block_size, total_mem, gls=1, q=27, size_per_value=8):
"""
Checks if a single block of given size fits into GPU memory.
"""
return prod(b + 2 * gls for b in block_size) * q * size_per_value < total_mem
class Scenario:
def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blocks=(128, 1, 1),
timesteps=None, time_step_strategy="normal", omega=1.8, cuda_enabled_mpi=False,
inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3,
init_shear_flow=False, boundary_setup=False,
vtk_write_frequency=0, remaining_time_logger_frequency=-1,
additional_info=None, blocks=None, db_file_name=None):
if boundary_setup:
init_shear_flow = False
periodic = (0, 0, 0)
self.blocks = blocks if blocks else block_decomposition(wlb.mpi.numProcesses())
self.cells_per_block = cells_per_block
self.periodic = periodic
self.time_step_strategy = time_step_strategy
self.omega = omega
self.timesteps = timesteps if timesteps else num_time_steps(cells_per_block)
self.cuda_enabled_mpi = cuda_enabled_mpi
self.inner_outer_split = inner_outer_split
self.init_shear_flow = init_shear_flow
self.boundary_setup = boundary_setup
self.warmup_steps = warmup_steps
self.outer_iterations = outer_iterations
self.cuda_blocks = cuda_blocks
self.vtk_write_frequency = vtk_write_frequency
self.remaining_time_logger_frequency = remaining_time_logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
self.additional_info = additional_info
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'omega': self.omega,
'cudaEnabledMPI': self.cuda_enabled_mpi,
'warmupSteps': self.warmup_steps,
'outerIterations': self.outer_iterations,
'timeStepStrategy': self.time_step_strategy,
'timesteps': self.timesteps,
'initShearFlow': self.init_shear_flow,
'gpuBlockSize': self.cuda_blocks,
'innerOuterSplit': self.inner_outer_split,
'vtkWriteFrequency': self.vtk_write_frequency,
'remainingTimeLoggerFrequency': self.remaining_time_logger_frequency
},
'Logging': {
'logLevel': 'info', # info progress detail tracing
}
}
if self.boundary_setup:
config_dict["Boundaries"] = ldc_setup
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
if self.additional_info:
wlb.log_info_on_root("Additional Info:\n" + pformat(self.additional_info))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
if self.additional_info is not None:
data.update(self.additional_info)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs_{data['stencil']}_{data['streamingPattern']}_{data['collisionSetup']}_{prod(self.blocks)}"
table_name = table_name.replace("-", "_") # - not allowed for table name would lead to syntax error
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
# -------------------------------------- Functions trying different parameter sets -----------------------------------
def weak_scaling_overlap(cuda_enabled_mpi=False):
wlb.log_info_on_root("Running scaling benchmark with communication hiding")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
# overlap
for t in ["simpleOverlap"]:
scenarios.add(Scenario(cells_per_block=(WeakX, WeakY, WeakZ),
cuda_blocks=(128, 1, 1),
time_step_strategy=t,
inner_outer_split=(8, 8, 8),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
boundary_setup=True,
db_file_name="weakScalingUniformGrid.sqlite3"))
def strong_scaling_overlap(cuda_enabled_mpi=False):
wlb.log_info_on_root("Running strong scaling benchmark with one block per proc with communication hiding")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
domain_size = (StrongX, StrongY, StrongZ)
blocks = block_decomposition(wlb.mpi.numProcesses())
cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
# overlap
for t in ["simpleOverlap"]:
scenarios.add(Scenario(cells_per_block=cells_per_block,
cuda_blocks=(128, 1, 1),
time_step_strategy=t,
inner_outer_split=(1, 1, 1),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
timesteps=num_time_steps(cells_per_block),
blocks=blocks,
boundary_setup=True,
db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
def single_gpu_benchmark():
"""Benchmarks only the LBM compute kernel"""
wlb.log_info_on_root("Running single GPU benchmarks")
wlb.log_info_on_root("")
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
gpu_type = os.environ.get('GPU_TYPE')
additional_info = {}
if gpu_type is not None:
additional_info['gpu_type'] = gpu_type
scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (128, 256, 320)]
cuda_blocks = [(128, 1, 1), ]
for block_size in block_sizes:
for cuda_block_size in cuda_blocks:
# cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
if cuda_block_size > block_size:
continue
if not cuda_block_size_ok(cuda_block_size):
wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
continue
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(cells_per_block=block_size,
cuda_blocks=cuda_block_size,
time_step_strategy='kernelOnly',
timesteps=num_time_steps(block_size, 2000),
outer_iterations=1,
additional_info=additional_info)
scenarios.add(scenario)
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
wlb.log_info_on_root("")
time_step_strategy = "noOverlap" # "simpleOverlap"
scenarios = wlb.ScenarioManager()
scenario = Scenario(cells_per_block=(128, 128, 128),
time_step_strategy=time_step_strategy,
timesteps=10001,
outer_iterations=1,
warmup_steps=0,
init_shear_flow=False,
boundary_setup=True,
vtk_write_frequency=5000,
remaining_time_logger_frequency=30)
scenarios.add(scenario)
wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FILE}")
# Select the benchmark you want to run
# single_gpu_benchmark() # benchmarks different CUDA block sizes and domain sizes and measures single GPU
# performance of compute kernel (no communication)
# overlap_benchmark() # benchmarks different communication overlap options
# profiling() # run only two timesteps on a smaller domain for profiling only
# validation_run()
if BENCHMARK == 0:
single_gpu_benchmark()
elif BENCHMARK == 1:
weak_scaling_overlap(True)
elif BENCHMARK == 2:
strong_scaling_overlap(True)
else:
validation_run()
# waLBerla Python module
if ( WALBERLA_BUILD_WITH_PYTHON_MODULE )
set(PYTHON_MODULE_DEPENDENCIES blockforest boundary domain_decomposition core field geometry lbm postprocessing python_coupling timeloop vtk)
if (WALBERLA_BUILD_WITH_CUDA)
set(PYTHON_MODULE_DEPENDENCIES ${PYTHON_MODULE_DEPENDENCIES} cuda)
if ( NOT TARGET walberla::python_coupling )
message( WARNING "python module ist not build since the walberla::python_coupling target is non-existent" )
else()
if ( WALBERLA_BUILD_WITH_PYTHON )
if ( WALBERLA_BUILD_WITH_GPU_SUPPORT )
set( PYTHON_MODULE_DEPENDENCIES walberla::blockforest walberla::boundary walberla::domain_decomposition walberla::core walberla::field walberla::python_coupling walberla::timeloop walberla::vtk walberla::gpu )
else()
set( PYTHON_MODULE_DEPENDENCIES walberla::blockforest walberla::boundary walberla::domain_decomposition walberla::core walberla::field walberla::python_coupling walberla::timeloop walberla::vtk )
endif()
if( WALBERLA_CXX_COMPILER_IS_MSVC )
set ( pythonModules ${PYTHON_MODULE_DEPENDENCIES})
set ( pythonModules ${PYTHON_MODULE_DEPENDENCIES})
elseif( APPLE )
set ( pythonModules "-Wl,-force_load" ${PYTHON_MODULE_DEPENDENCIES})
set ( pythonModules "-Wl,-force_load" ${PYTHON_MODULE_DEPENDENCIES})
else()
set ( pythonModules "-Wl,-whole-archive" ${PYTHON_MODULE_DEPENDENCIES} "-Wl,-no-whole-archive" )
set ( pythonModules "-Wl,-whole-archive" ${PYTHON_MODULE_DEPENDENCIES} "-Wl,-no-whole-archive" )
endif()
if( WALBERLA_BUILD_WITH_PYTHON_LBM )
add_library( walberla_cpp SHARED PythonModuleWithLbmModule.cpp )
else()
add_library( walberla_cpp SHARED PythonModule.cpp )
endif()
add_library( walberla_cpp SHARED PythonModule.cpp )
target_link_libraries( walberla_cpp ${pythonModules} ${SERVICE_LIBS} )
set_target_properties( walberla_cpp PROPERTIES PREFIX "")
if ( APPLE )
set_target_properties( walberla_cpp PROPERTIES SUFFIX ".so")
endif()
target_link_libraries( walberla_cpp ${WALBERLA_LINK_LIBRARIES_KEYWORD} ${pythonModules} ${SERVICE_LIBS} )
set_target_properties(
walberla_cpp PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
set_target_properties( walberla_cpp PROPERTIES MACOSX_RPATH TRUE )
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/setup.py ${CMAKE_CURRENT_BINARY_DIR}/setup.py )
add_custom_target( pythonModule ALL ${PYTHON_EXECUTABLE} setup.py build DEPENDS walberla_cpp )
add_custom_target( pythonModuleInstall ${PYTHON_EXECUTABLE} setup.py install DEPENDS walberla_cpp )
add_test( NAME PythonModuleTest
COMMAND ${PYTHON_EXECUTABLE} -m unittest discover -v -s ${walberla_SOURCE_DIR}/python/waLBerla_tests )
add_custom_target( pythonModule ALL ${Python_EXECUTABLE} setup.py build DEPENDS walberla_cpp )
add_custom_target( pythonModuleInstall ${Python_EXECUTABLE} setup.py install --user DEPENDS walberla_cpp )
endif()
endif()
\ No newline at end of file
......@@ -15,117 +15,76 @@
//
//! \file PythonModule.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include "python_coupling/PythonWrapper.h"
#include "waLBerlaDefinitions.h"
#include "blockforest/python/Exports.h"
#include "field/GhostLayerField.h"
#include "field/python/Exports.h"
#include "geometry/python/Exports.h"
#include "postprocessing/python/Exports.h"
#include "python_coupling/Manager.h"
#include "python_coupling/export/BlockForestExport.h"
#include "python_coupling/export/FieldExports.h"
#include "python_coupling/export/VTKExport.h"
#include "python_coupling/helper/ModuleInit.h"
#include "stencil/all.h"
#include "timeloop/python/Exports.h"
#include "vtk/python/Exports.h"
#ifdef WALBERLA_BUILD_WITH_CUDA
#include "cuda/python/Exports.h"
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
#include "python_coupling/export/GPUExport.h"
#endif
#include <boost/mpl/vector.hpp>
#include <boost/mpl/insert_range.hpp>
namespace bmpl = boost::mpl;
using namespace walberla;
typedef bmpl::vector<
Field<walberla::real_t,1>,
Field<walberla::real_t,2>,
Field<walberla::real_t,3>,
Field<walberla::real_t,4>,
Field<walberla::real_t,5>,
Field<walberla::real_t,9>,
Field<walberla::real_t,19>,
Field<walberla::real_t,27>,
Field<walberla::int8_t,1>,
Field<walberla::int16_t,1>,
Field<walberla::int32_t,1>,
Field<walberla::int64_t,1>,
Field<walberla::int64_t,2>,
Field<walberla::int64_t,3>,
Field<walberla::int64_t,4>,
Field<walberla::int64_t,5>,
Field<walberla::uint8_t,1>,
Field<walberla::uint16_t,1>,
Field<walberla::uint32_t,1>
> FieldTypes;
typedef bmpl::vector<
GhostLayerField<walberla::real_t,1>,
GhostLayerField<walberla::real_t,3>
> FieldTypesForMeshGeneration;
typedef bmpl::vector< FlagField<walberla::uint8_t>,
FlagField<walberla::uint16_t> > FlagFieldTypes;
typedef bmpl::vector< stencil::D2Q5,
stencil::D2Q9,
stencil::D3Q7,
stencil::D3Q19,
stencil::D3Q27 > Stencils;
typedef GhostLayerField<walberla::real_t,3> VecField_T;
using namespace walberla;
#define FIELD_TYPES \
Field<walberla::real_t,1>,\
Field<walberla::real_t,2>,\
Field<walberla::real_t,3>,\
Field<walberla::real_t,4>,\
Field<walberla::real_t,9>,\
Field<walberla::real_t,15>,\
Field<walberla::real_t,19>,\
Field<walberla::real_t,27>,\
Field<walberla::int8_t,1>,\
Field<walberla::int32_t,1>,\
Field<walberla::int64_t,1>,\
Field<walberla::int64_t,2>,\
Field<walberla::int64_t,3>,\
Field<walberla::uint8_t,1>,\
Field<walberla::uint16_t,1>,\
Field<walberla::uint32_t,1>
#define GPU_FIELD_TYPES \
GPUField<real_t>,\
GPUField<int8_t>,\
GPUField<int32_t>,\
GPUField<int64_t>,\
GPUField<uint8_t>,\
GPUField<uint64_t>
struct InitObject
{
InitObject()
{
namespace bmpl = boost::mpl;
auto pythonManager = python_coupling::Manager::instance();
// Field
pythonManager->addExporterFunction( field::exportModuleToPython<FieldTypes> );
pythonManager->addExporterFunction( field::exportGatherFunctions<FieldTypes> );
pythonManager->addBlockDataConversion<FieldTypes>() ;
pythonManager->addExporterFunction( field::exportModuleToPython<FIELD_TYPES> );
pythonManager->addExporterFunction( field::exportGatherFunctions<FIELD_TYPES> );
pythonManager->addBlockDataConversion<FIELD_TYPES>();
// Blockforest
pythonManager->addExporterFunction( blockforest::exportModuleToPython<Stencils> );
// Geometry
pythonManager->addExporterFunction( geometry::exportModuleToPython );
pythonManager->addExporterFunction(blockforest::exportModuleToPython<stencil::D2Q5, stencil::D2Q9, stencil::D3Q7, stencil::D3Q19, stencil::D3Q27>);
// VTK
pythonManager->addExporterFunction( vtk::exportModuleToPython );
// Postprocessing
pythonManager->addExporterFunction( postprocessing::exportModuleToPython<FieldTypesForMeshGeneration, FlagFieldTypes> );
// Timeloop
pythonManager->addExporterFunction( timeloop::exportModuleToPython );
#ifdef WALBERLA_BUILD_WITH_CUDA
using walberla::cuda::GPUField;
typedef bmpl::vector<GPUField<double>, GPUField<float>, GPUField<int>, GPUField<uint8_t>, GPUField<uint16_t> > GPUFields;
pythonManager->addExporterFunction( cuda::exportModuleToPython<GPUFields> );
pythonManager->addBlockDataConversion<GPUFields>();
#endif
#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
using walberla::gpu::GPUField;
pythonManager->addExporterFunction(gpu::exportModuleToPython<GPU_FIELD_TYPES> );
pythonManager->addExporterFunction(gpu::exportCopyFunctionsToPython<FIELD_TYPES> );
pythonManager->addBlockDataConversion<GPU_FIELD_TYPES>();
#endif
//
python_coupling::initWalberlaForPythonModule();
}
};
InitObject globalInitObject;
InitObject globalInitObject;
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file PythonModule.cpp
//! \author Martin Bauer <martin.bauer@fau.de>
//
//======================================================================================================================
#include "python_coupling/PythonWrapper.h"
#include "waLBerlaDefinitions.h"
#include "blockforest/python/Exports.h"
#include "field/GhostLayerField.h"
#include "field/python/Exports.h"
#include "geometry/python/Exports.h"
#include "lbm/all.h"
#include "lbm/lattice_model/ForceModel.h"
#include "lbm/python/Exports.h"
#include "postprocessing/python/Exports.h"
#include "python_coupling/Manager.h"
#include "python_coupling/helper/ModuleInit.h"
#include "stencil/all.h"
#include "timeloop/python/Exports.h"
#include "vtk/python/Exports.h"
#include <boost/mpl/vector.hpp>
#include <boost/mpl/insert_range.hpp>
namespace bmpl = boost::mpl;
using namespace walberla;
typedef bmpl::vector<
Field<walberla::real_t,1>,
Field<walberla::real_t,2>,
Field<walberla::real_t,3>,
Field<walberla::real_t,4>,
Field<walberla::real_t,5>,
Field<walberla::real_t,9>,
Field<walberla::real_t,19>,
Field<walberla::real_t,27>,
Field<walberla::int8_t,1>,
Field<walberla::int16_t,1>,
Field<walberla::int32_t,1>,
Field<walberla::int64_t,1>,
Field<walberla::int64_t,2>,
Field<walberla::int64_t,3>,
Field<walberla::int64_t,4>,
Field<walberla::int64_t,5>,
Field<walberla::uint8_t,1>,
Field<walberla::uint16_t,1>,
Field<walberla::uint32_t,1>
> FieldTypes;
typedef bmpl::vector<
GhostLayerField<walberla::real_t,1>,
GhostLayerField<walberla::real_t,3>
> FieldTypesForMeshGeneration;
typedef bmpl::vector< FlagField<walberla::uint8_t>,
FlagField<walberla::uint16_t> > FlagFieldTypes;
typedef bmpl::vector< stencil::D2Q5,
stencil::D2Q9,
stencil::D3Q7,
stencil::D3Q19,
stencil::D3Q27 > Stencils;
typedef lbm::collision_model::SRTField< GhostLayerField<walberla::real_t,1> > SRTField_T;
typedef GhostLayerField<walberla::real_t,3> VecField_T;
// ------------------------- D2Q9 -----------------------------------------------------------------------------
typedef bmpl::list< lbm::D2Q9 < lbm::collision_model::SRT, false, lbm::force_model::None, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, false, lbm::force_model::SimpleConstant, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, false, lbm::force_model::GuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, false, lbm::force_model::LuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, true, lbm::force_model::None, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, true, lbm::force_model::SimpleConstant, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, true, lbm::force_model::GuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, false, lbm::force_model::GuoField<VecField_T>, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, true, lbm::force_model::LuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::SRT, true, lbm::force_model::None, 1>,
lbm::D2Q9 < lbm::collision_model::SRT, false, lbm::force_model::None, 1>
> LM_D2Q19_SRT;
typedef bmpl::list< lbm::D2Q9 < lbm::collision_model::TRT, false, lbm::force_model::None, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, false, lbm::force_model::SimpleConstant, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, false, lbm::force_model::GuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, false, lbm::force_model::LuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, true, lbm::force_model::None, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, true, lbm::force_model::SimpleConstant, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, true, lbm::force_model::GuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, true, lbm::force_model::LuoConstant, 2>,
lbm::D2Q9 < lbm::collision_model::TRT, true, lbm::force_model::None, 1>,
lbm::D2Q9 < lbm::collision_model::TRT, false, lbm::force_model::None, 1>
> LM_D2Q19_TRT;
typedef lbm::collision_model::SRTField<GhostLayerField<real_t,1> > SRTFieldCollisionModel;
typedef bmpl::list< lbm::D2Q9 < SRTFieldCollisionModel, false, lbm::force_model::None, 2>,
lbm::D2Q9 < SRTFieldCollisionModel, false, lbm::force_model::SimpleConstant, 2>,
lbm::D2Q9 < SRTFieldCollisionModel, false, lbm::force_model::GuoConstant, 2>,
lbm::D2Q9 < SRTFieldCollisionModel, false, lbm::force_model::LuoConstant, 2>,
lbm::D2Q9 < SRTFieldCollisionModel, true, lbm::force_model::None, 2>,
lbm::D2Q9 < SRTFieldCollisionModel, true, lbm::force_model::SimpleConstant, 2>,
lbm::D2Q9 < SRTFieldCollisionModel, true, lbm::force_model::GuoConstant, 2>,
lbm::D2Q9 < SRTFieldCollisionModel, true, lbm::force_model::LuoConstant, 2>
> LM_D2Q9_SRTField;
// ------------------------- D3Q19 ----------------------------------------------------------------------------
typedef bmpl::list< lbm::D3Q19 < lbm::collision_model::SRT, false, lbm::force_model::None, 2>,
lbm::D3Q19 < lbm::collision_model::SRT, false, lbm::force_model::SimpleConstant, 2>,
lbm::D3Q19 < lbm::collision_model::SRT, false, lbm::force_model::GuoConstant, 2>,
lbm::D3Q19 < lbm::collision_model::SRT, false, lbm::force_model::LuoConstant, 2>,
lbm::D3Q19 < lbm::collision_model::SRT, true, lbm::force_model::None, 2>,
lbm::D3Q19 < lbm::collision_model::SRT, true, lbm::force_model::SimpleConstant, 2>,
lbm::D3Q19 < lbm::collision_model::SRT, true, lbm::force_model::GuoConstant, 2>,
lbm::D3Q19 < lbm::collision_model::SRT, true, lbm::force_model::LuoConstant, 2>
> LM_D3Q19_SRT;
typedef bmpl::list< lbm::D3Q19 < lbm::collision_model::TRT, false, lbm::force_model::None, 2>,
lbm::D3Q19 < lbm::collision_model::TRT, false, lbm::force_model::SimpleConstant, 2>,
lbm::D3Q19 < lbm::collision_model::TRT, false, lbm::force_model::GuoConstant, 2>,
lbm::D3Q19 < lbm::collision_model::TRT, false, lbm::force_model::LuoConstant, 2>,
lbm::D3Q19 < lbm::collision_model::TRT, true, lbm::force_model::None, 2>,
lbm::D3Q19 < lbm::collision_model::TRT, true, lbm::force_model::SimpleConstant, 2>,
lbm::D3Q19 < lbm::collision_model::TRT, true, lbm::force_model::GuoConstant, 2>,
lbm::D3Q19 < lbm::collision_model::TRT, true, lbm::force_model::LuoConstant, 2>
> LM_D3Q19_TRT;
typedef bmpl::list<lbm::D3Q19 < lbm::collision_model::D3Q19MRT, false, lbm::force_model::None, 2>,
lbm::D3Q19 < lbm::collision_model::D3Q19MRT, false, lbm::force_model::SimpleConstant, 2>,
lbm::D3Q19 < lbm::collision_model::D3Q19MRT, false, lbm::force_model::GuoConstant, 2>,
lbm::D3Q19 < lbm::collision_model::D3Q19MRT, false, lbm::force_model::LuoConstant, 2>,
lbm::D3Q27 < lbm::collision_model::D3Q27Cumulant, true, lbm::force_model::None, 2>
> LM_D3Q19_Extra;
typedef lbm::collision_model::SRTField<GhostLayerField<real_t,1> > SRTFieldCollisionModel;
typedef bmpl::list< lbm::D3Q19 < SRTFieldCollisionModel, false, lbm::force_model::None, 2>,
lbm::D3Q19 < SRTFieldCollisionModel, false, lbm::force_model::SimpleConstant, 2>,
lbm::D3Q19 < SRTFieldCollisionModel, false, lbm::force_model::GuoConstant, 2>,
lbm::D3Q19 < SRTFieldCollisionModel, false, lbm::force_model::LuoConstant, 2>,
lbm::D3Q19 < SRTFieldCollisionModel, true, lbm::force_model::None, 2>,
lbm::D3Q19 < SRTFieldCollisionModel, true, lbm::force_model::SimpleConstant, 2>,
lbm::D3Q19 < SRTFieldCollisionModel, true, lbm::force_model::GuoConstant, 2>,
lbm::D3Q19 < SRTFieldCollisionModel, true, lbm::force_model::LuoConstant, 2>
> LM_D3Q19_SRTField;
typedef bmpl::insert_range< LM_D2Q19_SRT, bmpl::end< LM_D2Q19_SRT >::type, LM_D2Q19_TRT> ::type D2Q9_SRT_TRT;
typedef bmpl::insert_range< D2Q9_SRT_TRT, bmpl::end< D2Q9_SRT_TRT >::type, LM_D2Q9_SRTField>::type LatticeModels2D;
typedef bmpl::insert_range< LM_D3Q19_SRT, bmpl::end< LM_D3Q19_SRT > ::type, LM_D3Q19_TRT>::type D3Q19_SRT_TRT;
typedef bmpl::insert_range< D3Q19_SRT_TRT, bmpl::end< D3Q19_SRT_TRT >::type, LM_D3Q19_Extra>::type D3Q19_SRT_TRT_MRT;
typedef bmpl::insert_range< D3Q19_SRT_TRT_MRT, bmpl::end< D3Q19_SRT_TRT_MRT >::type, LM_D3Q19_SRTField>::type LatticeModels3D;
typedef bmpl::insert_range< LatticeModels2D, bmpl::end< LatticeModels2D >::type, LatticeModels3D>::type LatticeModels;
typedef typename lbm::AdaptorsFromLatticeModels<LatticeModels>::type LBMAdaptors;
using namespace walberla;
struct InitObject
{
InitObject()
{
namespace bmpl = boost::mpl;
auto pythonManager = python_coupling::Manager::instance();
// Field
pythonManager->addExporterFunction( field::exportModuleToPython<FieldTypes> );
pythonManager->addExporterFunction( field::exportGatherFunctions<bmpl::joint_view<FieldTypes,LBMAdaptors>::type > );
pythonManager->addBlockDataConversion< bmpl::joint_view<FieldTypes,LBMAdaptors>::type >() ;
// Blockforest
pythonManager->addExporterFunction( blockforest::exportModuleToPython<Stencils> );
// Geometry
pythonManager->addExporterFunction( geometry::exportModuleToPython );
// VTK
pythonManager->addExporterFunction( vtk::exportModuleToPython );
// LBM
typedef bmpl::vector< FlagField<walberla::uint8_t> > FlagFieldTypesForLBM;
pythonManager->addExporterFunction( lbm::exportBasic<LatticeModels, FlagFieldTypesForLBM> );
pythonManager->addExporterFunction( lbm::exportBoundary<LatticeModels, FlagFieldTypesForLBM> );
pythonManager->addExporterFunction( lbm::exportSweeps<LatticeModels, FlagFieldTypesForLBM> );
// Postprocessing
pythonManager->addExporterFunction( postprocessing::exportModuleToPython<FieldTypesForMeshGeneration, FlagFieldTypes> );
// Timeloop
pythonManager->addExporterFunction( timeloop::exportModuleToPython );
python_coupling::initWalberlaForPythonModule();
}
};
InitObject globalInitObject;
from distutils.core import setup
import shutil
from os.path import exists, join
import platform
import sys
import platform
from os.path import exists, join
import shutil
from setuptools import setup
# The following variables are configure by CMake
walberla_source_dir = "${walberla_SOURCE_DIR}"
walberla_source_dir = "${walberla_SOURCE_DIR}"
walberla_binary_dir = "${CMAKE_CURRENT_BINARY_DIR}"
suffix = "${PYTHON_MODULE_EXTENSION}"
prefix = "${PYTHON_MODULE_PREFIX}"
walberla_module_file_name = prefix + "walberla_cpp" + suffix
version = "${WALBERLA_VERSION}"
if platform.system() == 'Windows':
extension = ( 'dll', 'pyd' )
configuration = 'Release'
configuration = 'Release'
else:
extension = ( 'so', 'so' )
configuration = ''
configuration = ''
def collectFiles():
src_shared_lib = join(walberla_binary_dir, configuration, 'walberla_cpp.' + extension[0] )
dst_shared_lib = join(walberla_binary_dir, 'waLBerla', 'walberla_cpp.' + extension[1] )
src_shared_lib = join(walberla_binary_dir, configuration, walberla_module_file_name)
dst_shared_lib = join(walberla_binary_dir, 'waLBerla', walberla_module_file_name)
# copy everything inplace
print( src_shared_lib )
if not exists( src_shared_lib ):
if not exists(src_shared_lib):
print("Python Module was not built yet - run 'make walberla_cpp'")
exit(1)
shutil.rmtree(join(walberla_binary_dir, 'waLBerla'), ignore_errors=True)
shutil.rmtree( join(walberla_binary_dir, 'waLBerla'), ignore_errors=True )
shutil.copytree(join(walberla_source_dir, 'python', 'waLBerla'),
join(walberla_binary_dir, 'waLBerla'))
shutil.copytree( join(walberla_source_dir, 'python', 'waLBerla'),
join(walberla_binary_dir, 'waLBerla') )
shutil.copy(src_shared_lib,
dst_shared_lib)
shutil.copy( src_shared_lib,
dst_shared_lib )
packages = ['waLBerla',
'waLBerla.evaluation',
......@@ -44,17 +45,19 @@ packages = ['waLBerla',
'waLBerla.tools.report',
'waLBerla.tools.sqlitedb',
'waLBerla.tools.lbm_unitconversion',
'waLBerla.tools.jobscripts']
'waLBerla.tools.jobscripts',
'waLBerla.tools.config']
collectFiles()
setup( name='waLBerla',
version='1.0',
author='Martin Bauer',
author_email='martin.bauer@fau.de',
url='http://www.walberla.net',
packages=packages,
package_data = {'' : ['walberla_cpp.' + extension[1]] }
setup(name='waLBerla',
version=str(version),
author='Markus Holzer',
author_email='markus.holzer@fau.de',
url='http://www.walberla.net',
packages=packages,
package_data={'': [walberla_module_file_name]}
)
if sys.argv[1] == 'build':
......@@ -62,4 +65,4 @@ if sys.argv[1] == 'build':
" - to install run 'make pythonModuleInstall'\n"
" - for development usage: \n"
" bash: export PYTHONPATH=%s:$PYTHONPATH \n"
" fish: set -x PYTHONPATH %s $PYTHONPATH \n" % ( walberla_binary_dir,walberla_binary_dir ) )
\ No newline at end of file
" fish: set -x PYTHONPATH %s $PYTHONPATH \n" % (walberla_binary_dir, walberla_binary_dir))
if __name__ == "__main__":
install()
\ No newline at end of file
install() # noqa: F821
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file AntiDunes.cpp
//! \author Samuel Kemmler <samuel.kemmler@fau.de>
//! \author Jonas Plewinski <jonas.plewinski@fau.de>
//! \author Christoph Rettinger <christoph.rettinger@fau.de>
//! \author Christoph Schwarzmeier <christoph.schwarzmeier@fau.de>
//
// This showcase simulates antidunes, i.e., particulate dunes that travel in opposite stream-wise direction. See
// simulation results published in https://doi.org/10.1017/jfm.2023.262.
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "core/Environment.h"
#include "core/SharedFunctor.h"
#include "core/timing/RemainingTimeLogger.h"
#include "domain_decomposition/SharedSweep.h"
#include "field/StabilityChecker.h"
#include "lbm/PerformanceLogger.h"
#include "lbm/field/AddToStorage.h"
#include "lbm/free_surface/BlockStateDetectorSweep.h"
#include "lbm/free_surface/SurfaceMeshWriter.h"
#include "lbm/free_surface/TotalMassComputer.h"
#include "lbm/free_surface/VtkWriter.h"
#include "lbm/free_surface/dynamics/CellConversionSweep.h"
#include "lbm/free_surface/dynamics/ConversionFlagsResetSweep.h"
#include "lbm/free_surface/dynamics/ExcessMassDistributionSweep.h"
#include "lbm/free_surface/dynamics/PdfRefillingSweep.h"
#include "lbm/free_surface/dynamics/StreamReconstructAdvectSweep.h"
#include "lbm/free_surface/surface_geometry/CurvatureSweep.h"
#include "lbm/free_surface/surface_geometry/NormalSweep.h"
#include "lbm/free_surface/surface_geometry/SmoothingSweep.h"
#include "lbm/free_surface/surface_geometry/Utility.h"
#include "lbm_mesapd_coupling/mapping/ParticleMapping.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/MovingParticleMapping.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/reconstruction/PdfReconstructionManager.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/reconstruction/Reconstructor.h"
#include "lbm_mesapd_coupling/utility/AddHydrodynamicInteractionKernel.h"
#include "lbm_mesapd_coupling/utility/AverageHydrodynamicForceTorqueKernel.h"
#include "lbm_mesapd_coupling/utility/InitializeHydrodynamicForceTorqueForAveragingKernel.h"
#include "lbm_mesapd_coupling/utility/LubricationCorrectionKernel.h"
#include "lbm_mesapd_coupling/utility/ParticleSelector.h"
#include "lbm_mesapd_coupling/utility/ResetHydrodynamicForceTorqueKernel.h"
#include "mesa_pd/collision_detection/AnalyticContactDetection.h"
#include "mesa_pd/data/ParticleAccessorWithBaseShape.h"
#include "mesa_pd/data/ParticleStorage.h"
#include "mesa_pd/data/shape/Sphere.h"
#include "mesa_pd/domain/BlockForestDataHandling.h"
#include "mesa_pd/domain/BlockForestDomain.h"
#include "mesa_pd/kernel/DoubleCast.h"
#include "mesa_pd/kernel/LinearSpringDashpot.h"
#include "mesa_pd/kernel/ParticleSelector.h"
#include "mesa_pd/kernel/VelocityVerlet.h"
#include "mesa_pd/mpi/ClearGhostOwnerSync.h"
#include "mesa_pd/mpi/ClearNextNeighborSync.h"
#include "mesa_pd/mpi/ContactFilter.h"
#include "mesa_pd/mpi/ReduceContactHistory.h"
#include "mesa_pd/mpi/ReduceProperty.h"
#include "mesa_pd/mpi/SyncGhostOwners.h"
#include "mesa_pd/mpi/SyncNextNeighbors.h"
#include "mesa_pd/mpi/notifications/ForceTorqueNotification.h"
#include "mesa_pd/mpi/notifications/HydrodynamicForceTorqueNotification.h"
#include "mesa_pd/vtk/ParticleVtkOutput.h"
#include "vtk/VTKOutput.h"
#include <core/waLBerlaBuildInfo.h>
#include "AntidunesBoundaryHandling.h"
#include "AntidunesLatticeModel.h"
#include "PIDController.h"
#include "Utility.h"
namespace walberla
{
namespace antidunes
{
using ScalarField_T = GhostLayerField< real_t, 1 >;
using VectorField_T = GhostLayerField< Vector3< real_t >, 1 >;
using VectorFieldFlattened_T = GhostLayerField< real_t, 3 >;
using LatticeModel_T = lbm::AntidunesLatticeModel;
using LatticeModelStencil_T = LatticeModel_T::Stencil;
using PdfField_T = lbm::PdfField< LatticeModel_T >;
using PdfCommunication_T = blockforest::SimpleCommunication< LatticeModelStencil_T >;
// the geometry computations in SurfaceGeometryHandler require meaningful values in the ghost layers in corner
// directions (flag field and fill level field); this holds, even if the lattice model uses a D3Q19 stencil
using CommunicationStencil_T =
std::conditional_t< LatticeModel_T::Stencil::D == uint_t(2), stencil::D2Q9, stencil::D3Q27 >;
using Communication_T = blockforest::SimpleCommunication< CommunicationStencil_T >;
using ParticleAccessor_T = mesa_pd::data::ParticleAccessorWithBaseShape;
using flag_t = uint32_t;
using FlagField_T = FlagField< flag_t >;
using AntidunesBoundaryHandling_T =
free_surface::AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >;
using StateSweep = walberla::free_surface::BlockStateDetectorSweep< FlagField_T >;
const FlagUID FormerMO_Flag("former moving obstacle");
// empty sweep required for using selectors (e.g. StateSweep::fullFreeSurface)
struct emptySweep
{
void operator()(IBlock*) {}
};
// data handling for loading a field of type ScalarField_T from file
template< typename ScalarField_T >
class ScalarFieldHandling : public field::BlockDataHandling< ScalarField_T >
{
public:
ScalarFieldHandling(const weak_ptr< StructuredBlockStorage >& blocks, uint_t numberGhostLayer)
: blocks_(blocks), numberGhostLayer_(numberGhostLayer)
{}
protected:
ScalarField_T* allocate(IBlock* const block) override { return allocateDispatch(block); }
ScalarField_T* reallocate(IBlock* const block) override { return allocateDispatch(block); }
private:
weak_ptr< StructuredBlockStorage > blocks_;
uint_t numberGhostLayer_;
ScalarField_T* allocateDispatch(IBlock* const block)
{
WALBERLA_ASSERT_NOT_NULLPTR(block);
auto blocks = blocks_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blocks);
return new ScalarField_T(blocks->getNumberOfXCells(*block), blocks->getNumberOfYCells(*block),
blocks->getNumberOfZCells(*block), numberGhostLayer_, real_c(0), field::fzyx);
}
}; // class ScalarFieldHandling
// data handling for loading a field of type VectorFieldFlattened_T from file
template< typename VectorFieldFlattened_T >
class VectorFieldFlattenedHandling : public field::BlockDataHandling< VectorFieldFlattened_T >
{
public:
VectorFieldFlattenedHandling(const weak_ptr< StructuredBlockStorage >& blocks, uint_t numberGhostLayer)
: blocks_(blocks), numberGhostLayer_(numberGhostLayer)
{}
protected:
VectorFieldFlattened_T* allocate(IBlock* const block) override { return allocateDispatch(block); }
VectorFieldFlattened_T* reallocate(IBlock* const block) override { return allocateDispatch(block); }
private:
weak_ptr< StructuredBlockStorage > blocks_;
uint_t numberGhostLayer_;
VectorFieldFlattened_T* allocateDispatch(IBlock* const block)
{
WALBERLA_ASSERT_NOT_NULLPTR(block);
auto blocks = blocks_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blocks);
return new VectorFieldFlattened_T(blocks->getNumberOfXCells(*block), blocks->getNumberOfYCells(*block),
blocks->getNumberOfZCells(*block), numberGhostLayer_, field::fzyx);
}
}; // class VectorFieldFlattenedHandling
// sweep for computing the force density from the fluid density and fill level
template< typename LatticeModel_T, typename FlagField_T, typename VectorFieldFlattened_T, typename ScalarField_T >
class ForceDensityCodegenSweep
{
public:
ForceDensityCodegenSweep(BlockDataID forceDensityFieldID, ConstBlockDataID pdfFieldID, ConstBlockDataID flagFieldID,
ConstBlockDataID fillFieldID,
const walberla::free_surface::FlagInfo< FlagField_T >& flagInfo,
std::shared_ptr< Vector3< real_t > > globalAcceleration)
: forceDensityFieldID_(forceDensityFieldID), pdfFieldID_(pdfFieldID), flagFieldID_(flagFieldID),
fillFieldID_(fillFieldID), flagInfo_(flagInfo), globalAcceleration_(globalAcceleration)
{}
void operator()(IBlock* const block)
{
VectorFieldFlattened_T* const forceDensityField = block->getData< VectorFieldFlattened_T >(forceDensityFieldID_);
const PdfField_T* const pdfField = block->getData< const PdfField_T >(pdfFieldID_);
const FlagField_T* const flagField = block->getData< const FlagField_T >(flagFieldID_);
const ScalarField_T* const fillField = block->getData< const ScalarField_T >(fillFieldID_);
WALBERLA_FOR_ALL_CELLS(forceDensityFieldIt, forceDensityField, pdfFieldIt, pdfField, flagFieldIt, flagField,
fillFieldIt, fillField, {
flag_t flag = *flagFieldIt;
// set force density in cells to acceleration * density * fillLevel (see equation 15
// in Koerner et al., 2005);
if (flagInfo_.isInterface(flag))
{
const real_t density = pdfField->getDensity(pdfFieldIt.cell());
forceDensityFieldIt[0] = (*globalAcceleration_)[0] * *fillFieldIt * density;
forceDensityFieldIt[1] = (*globalAcceleration_)[1] * *fillFieldIt * density;
forceDensityFieldIt[2] = (*globalAcceleration_)[2] * *fillFieldIt * density;
}
else
{
if (flagInfo_.isLiquid(flag))
{
const real_t density = pdfField->getDensity(pdfFieldIt.cell());
forceDensityFieldIt[0] = (*globalAcceleration_)[0] * density;
forceDensityFieldIt[1] = (*globalAcceleration_)[1] * density;
forceDensityFieldIt[2] = (*globalAcceleration_)[2] * density;
}
}
}) // WALBERLA_FOR_ALL_CELLS
}
private:
using flag_t = typename FlagField_T::flag_t;
BlockDataID forceDensityFieldID_;
ConstBlockDataID pdfFieldID_;
ConstBlockDataID flagFieldID_;
ConstBlockDataID fillFieldID_;
walberla::free_surface::FlagInfo< FlagField_T > flagInfo_;
std::shared_ptr< Vector3< real_t > > globalAcceleration_;
}; // class ForceDensitySweep
// function describing the global initialization profile
inline real_t initializationProfile(real_t x, real_t amplitude, real_t offset, real_t wavelength)
{
return amplitude * std::cos(x / wavelength * real_c(2) * math::pi + math::pi) + offset;
}
real_t getHydrostaticDensity(real_t height, real_t referenceHeight, real_t gravitationalAcceleration)
{
return real_c(1) + real_c(3) * gravitationalAcceleration * (height - referenceHeight);
}
void initializePoiseuilleProfile(StructuredBlockForest& forest, const BlockDataID& pdfFieldID,
const ConstBlockDataID& fillFieldID, const real_t& averageBedHeight,
const real_t& averageFluidHeight, const real_t& accelerationX, const real_t& viscosity,
real_t amplitude, real_t wavelength)
{
WALBERLA_LOG_INFO_ON_ROOT("Initializing Poiseuille velocity profile");
const real_t rho = real_c(1);
for (auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt)
{
PdfField_T* const pdfField = blockIt->getData< PdfField_T >(pdfFieldID);
const ScalarField_T* const fillField = blockIt->getData< const ScalarField_T >(fillFieldID);
WALBERLA_FOR_ALL_CELLS_XYZ(
pdfField, const Vector3< real_t > coord = forest.getBlockLocalCellCenter(*blockIt, Cell(x, y, z));
Vector3< real_t > velocity(real_c(0));
auto localBedHeight = initializationProfile(coord[0], amplitude, averageBedHeight, wavelength);
auto heightAboveBed = coord[2] - localBedHeight;
const real_t fillLevel = fillField->get(x, y, z);
if (heightAboveBed >= real_c(0) && fillLevel > real_c(0)) {
velocity[0] = accelerationX / (real_c(2) * viscosity) * heightAboveBed *
(real_c(2) * averageFluidHeight - heightAboveBed);
} pdfField->setToEquilibrium(x, y, z, velocity, rho);)
}
}
/***********************************************************************************************************************
* Initialize the hydrostatic pressure in the direction in which a force is acting in ALL cells (regardless of a cell's
* flag). The velocity remains unchanged.
*
* The force vector must have only one component, i.e., the direction of the force can only be in x-, y- or z-axis.
* The variable fluidHeight determines the height at which the density is equal to reference density (=1).
**********************************************************************************************************************/
template< typename PdfField_T >
void initHydrostaticPressure(const std::weak_ptr< StructuredBlockForest >& blockForestPtr,
const BlockDataID& pdfFieldID,
std::function< real_t(const Vector3< real_t >&) > hydrostaticDensityFct)
{
WALBERLA_LOG_INFO_ON_ROOT("Initializing hydrostatic pressure");
const auto blockForest = blockForestPtr.lock();
WALBERLA_CHECK_NOT_NULLPTR(blockForest);
for (auto blockIt = blockForest->begin(); blockIt != blockForest->end(); ++blockIt)
{
PdfField_T* const pdfField = blockIt->getData< PdfField_T >(pdfFieldID);
CellInterval local = pdfField->xyzSizeWithGhostLayer(); // block-, i.e., process-local cell interval
for (const auto& cell : local)
{
// initialize the (hydrostatic) pressure, i.e., LBM density
// Bernoulli: p = p0 + density * gravity * height
// => LBM (density=1): rho = rho0 + gravity * height = 1 + 1/cs^2 * g * h = 1 + 3 * g * h
// shift global cell by 0.5 since density is set for cell center
Vector3< real_t > cellCenter = blockForest->getBlockLocalCellCenter(*blockIt, cell);
const real_t rho = hydrostaticDensityFct(cellCenter);
const Vector3< real_t > velocity = pdfField->getVelocity(cell);
pdfField->setDensityAndVelocity(cell, velocity, rho);
}
}
}
template< typename FreeSurfaceBoundaryHandling_T, typename PdfField_T, typename FlagField_T >
class MeanVelocityComputer
{
public:
MeanVelocityComputer(const std::weak_ptr< const StructuredBlockForest >& blockForest,
const std::weak_ptr< const FreeSurfaceBoundaryHandling_T >& freeSurfaceBoundaryHandling,
const ConstBlockDataID& pdfFieldID, const std::shared_ptr< Vector3< real_t > >& meanVelocity,
real_t averagingFactor)
: blockForest_(blockForest), freeSurfaceBoundaryHandling_(freeSurfaceBoundaryHandling), pdfFieldID_(pdfFieldID),
meanVelocity_(meanVelocity), averagingFactor_(averagingFactor)
{}
void operator()()
{
auto blockForest = blockForest_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blockForest);
auto freeSurfaceBoundaryHandling = freeSurfaceBoundaryHandling_.lock();
WALBERLA_CHECK_NOT_NULLPTR(freeSurfaceBoundaryHandling);
getMeanVelocity(blockForest, freeSurfaceBoundaryHandling);
}
void getMeanVelocity(const std::shared_ptr< const StructuredBlockForest >& blockForest,
const std::shared_ptr< const FreeSurfaceBoundaryHandling_T >& freeSurfaceBoundaryHandling)
{
const BlockDataID flagFieldID = freeSurfaceBoundaryHandling->getFlagFieldID();
const typename FreeSurfaceBoundaryHandling_T::FlagInfo_T& flagInfo = freeSurfaceBoundaryHandling->getFlagInfo();
// use separate variables for the velocity in each direction; syntax meanVelocity[0] does not work in OMP-macro
real_t meanVelocityX = real_c(0);
real_t meanVelocityY = real_c(0);
real_t meanVelocityZ = real_c(0);
for (auto blockIt = blockForest->begin(); blockIt != blockForest->end(); ++blockIt)
{
const FlagField_T* const flagField = blockIt->template getData< const FlagField_T >(flagFieldID);
const PdfField_T* const pdfField = blockIt->template getData< const PdfField_T >(pdfFieldID_);
WALBERLA_FOR_ALL_CELLS_OMP(flagFieldIt, flagField, pdfFieldIt, pdfField,
omp parallel for schedule(static) reduction(+:meanVelocityX)
reduction(+:meanVelocityY) reduction(+:meanVelocityZ),
{
if (flagInfo.isLiquid(flagFieldIt) || flagInfo.isInterface(flagFieldIt))
{
const Vector3< real_t > velocity = pdfField->getVelocity(pdfFieldIt.cell());
meanVelocityX += velocity[0];
meanVelocityY += velocity[1];
meanVelocityZ += velocity[2];
//++cellCount;
}
}) // WALBERLA_FOR_ALL_CELLS_OMP
}
Vector3< real_t > meanVelocity(meanVelocityX, meanVelocityY, meanVelocityZ);
mpi::allReduceInplace< real_t >(meanVelocity, mpi::SUM);
// mpi::allReduceInplace< uint_t >(cellCount, mpi::SUM);
meanVelocity *= averagingFactor_;
*meanVelocity_ = meanVelocity;
};
private:
std::weak_ptr< const StructuredBlockForest > blockForest_;
std::weak_ptr< const FreeSurfaceBoundaryHandling_T > freeSurfaceBoundaryHandling_;
const ConstBlockDataID pdfFieldID_;
std::shared_ptr< Vector3< real_t > > meanVelocity_;
real_t averagingFactor_;
}; // class MeanVelocityComputer
class ForcingAdjuster
{
public:
ForcingAdjuster(const shared_ptr< StructuredBlockStorage >& blocks, real_t targetVelocity, real_t externalForcing,
real_t proportionalGain, real_t derivativeGain, real_t integralGain, real_t maxRamp,
real_t minActuatingVariable, real_t maxActuatingVariable)
: blocks_(blocks), currentExternalForcing_(externalForcing),
pid_(targetVelocity, externalForcing, proportionalGain, derivativeGain, integralGain, maxRamp,
minActuatingVariable, maxActuatingVariable)
{
WALBERLA_LOG_INFO_ON_ROOT("Creating PID controller with pg = " << pid_.getProportionalGain()
<< ", dg = " << pid_.getDerivateGain()
<< ", ig = " << pid_.getIntegralGain());
}
void operator()(const real_t currentMeanVelocity)
{
// compute new forcing value on root (since flow rate only known on root)
WALBERLA_ROOT_SECTION()
{
real_t newExternalForcing = pid_.update(currentMeanVelocity);
currentExternalForcing_ = newExternalForcing;
}
// send updated external forcing to all other processes
mpi::broadcastObject(currentExternalForcing_);
}
real_t getExternalForcing() { return currentExternalForcing_; }
void storePIDSnapshot(std::string filename)
{
WALBERLA_ROOT_SECTION() { pid_.writeStateToFile(filename); }
}
void loadPIDSnapshot(std::string filename) { pid_.readStateFromFile(filename); }
private:
shared_ptr< StructuredBlockStorage > blocks_;
real_t currentExternalForcing_;
PIDController pid_;
}; // ForcingAdjuster
int main(int argc, char** argv)
{
Environment walberlaEnv(argc, argv);
WALBERLA_LOG_INFO_ON_ROOT("waLBerla Revision: " << std::string(WALBERLA_GIT_SHA1).substr(0, 8))
if (argc < 2) { WALBERLA_ABORT("Please specify a parameter file as input argument.") }
WALBERLA_LOG_DEVEL_ON_ROOT("Using generated lattice model.");
auto configPtr = walberlaEnv.config();
// print content of parameter file
WALBERLA_LOG_INFO_ON_ROOT(*configPtr);
WALBERLA_ROOT_SECTION()
{
std::ofstream file;
file.open("parameterConfiguration.cfg");
file << *configPtr;
file.close();
}
// get block forest parameters from parameter file
auto blockForestParameters = configPtr->getOneBlock("BlockForestParameters");
const Vector3< uint_t > cellsPerBlock = blockForestParameters.getParameter< Vector3< uint_t > >("cellsPerBlock");
const Vector3< bool > periodicity = blockForestParameters.getParameter< Vector3< bool > >("periodicity");
const bool loadSnapshot = blockForestParameters.getParameter< bool >("loadSnapshot");
const bool storeSnapshot = blockForestParameters.getParameter< bool >("storeSnapshot");
const uint_t snapshotFrequency = blockForestParameters.getParameter< uint_t >("snapshotFrequency");
const std::string snapshotBaseFolder = blockForestParameters.getParameter< std::string >("snapshotBaseFolder");
// get domain parameters from parameter file
auto domainParameters = configPtr->getOneBlock("DomainParameters");
const Vector3< uint_t > domainSize = domainParameters.getParameter< Vector3< uint_t > >("domainSize");
const uint_t wavePeriods = domainParameters.getParameter< uint_t >("wavePeriods");
const real_t liquidHeightFactor = domainParameters.getParameter< real_t >("liquidHeightFactor");
const real_t floorHeightFactor = domainParameters.getParameter< real_t >("floorHeightFactor");
const real_t initialAmplitude = domainParameters.getParameter< real_t >("initialAmplitude");
// compute number of blocks as defined by domainSize and cellsPerBlock
Vector3< uint_t > numBlocks;
for (uint_t i = uint_c(0); i <= uint_c(2); ++i)
{
numBlocks[i] = domainSize[i] / cellsPerBlock[i];
WALBERLA_CHECK_EQUAL(numBlocks[i] * cellsPerBlock[i], domainSize[i],
"Domain size in direction " << i << " is not a multiple of cells per block.")
}
// get number of (MPI) processes
const uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses());
WALBERLA_CHECK_EQUAL(numProcesses, numBlocks[0] * numBlocks[1] * numBlocks[2],
"The number of MPI processes is different from the number of blocks as defined by "
"\"domainSize/cellsPerBlock\".")
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(numProcesses);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(numBlocks);
// get PID controller parameters
auto PIDParameters = configPtr->getOneBlock("PIDParameters");
const real_t targetMeanVelocityMagnitude = PIDParameters.getParameter< real_t >("targetMeanVelocityMagnitude");
const real_t proportionalGain = PIDParameters.getParameter< real_t >("proportionalGain");
const real_t derivativeGain = PIDParameters.getParameter< real_t >("derivativeGain");
const real_t integralGain = PIDParameters.getParameter< real_t >("integralGain");
const real_t maxRamp = PIDParameters.getParameter< real_t >("maxRamp");
const real_t minActuatingVariable = PIDParameters.getParameter< real_t >("minActuatingVariable");
const real_t maxActuatingVariable = PIDParameters.getParameter< real_t >("maxActuatingVariable");
// read particle infos
const auto particleParameters = configPtr->getOneBlock("ParticleParameters");
const std::string particleInFileName = particleParameters.getParameter< std::string >("inFileName");
const uint_t bedCopiesInX = particleParameters.getParameter< uint_t >("bedCopiesInX");
const uint_t bedCopiesInY = particleParameters.getParameter< uint_t >("bedCopiesInY");
const real_t particleDensityRatio = particleParameters.getParameter< real_t >("densityRatio");
const real_t particleFixingHeightFactor = particleParameters.getParameter< real_t >("fixingHeightFactor");
const real_t particleFrictionCoefficient = particleParameters.getParameter< real_t >("frictionCoefficient");
const real_t particleRestitutionCoefficient = particleParameters.getParameter< real_t >("restitutionCoefficient");
const uint_t particleNumSubCycles = particleParameters.getParameter< uint_t >("numSubCycles");
const bool useLubricationCorrection = particleParameters.getParameter< bool >("useLubricationCorrection");
const bool useNoSlipParticles = particleParameters.getParameter< bool >("useNoSlipParticles");
const real_t particlePoissonsRatio = real_c(0.22);
const real_t particleKappa = real_c(2) * (real_c(1) - particlePoissonsRatio) / (real_c(2) - particlePoissonsRatio);
real_t particleCollisionTimeNonDim = real_c(4);
bool useOpenMP = false;
const uint_t vtkSpacingParticles =
configPtr->getOneBlock("VTK").getOneBlock("fluid_field").getParameter< uint_t >("writeFrequency");
const std::string vtkFolder =
configPtr->getOneBlock("VTK").getOneBlock("fluid_field").getParameter< std::string >("baseFolder");
// get physics parameters from parameter file
auto physicsParameters = configPtr->getOneBlock("PhysicsParameters");
const bool enableWetting = physicsParameters.getParameter< bool >("enableWetting");
const uint_t timesteps = physicsParameters.getParameter< uint_t >("timesteps");
const real_t Re = physicsParameters.getParameter< real_t >("Re");
const real_t Fr = physicsParameters.getParameter< real_t >("Fr");
const real_t We = physicsParameters.getParameter< real_t >("We");
// get avgDiameter and scaling factor
real_t avgParticleDiameter = real_c(0);
real_t particleScalingFactor = real_c(0);
getAvgDiameterScalingFactor(particleInFileName, domainSize, bedCopiesInX, bedCopiesInY, avgParticleDiameter,
particleScalingFactor);
const real_t liquidHeight = avgParticleDiameter * liquidHeightFactor;
const real_t floorHeight = avgParticleDiameter * floorHeightFactor;
const real_t particleFixingHeight = particleFixingHeightFactor * avgParticleDiameter;
WALBERLA_CHECK_FLOAT_UNEQUAL(liquidHeight, 0.0)
WALBERLA_CHECK_FLOAT_UNEQUAL(floorHeight, 0.0)
const real_t absoluteLiquidHeight = liquidHeight + floorHeight;
const real_t viscosity = targetMeanVelocityMagnitude * liquidHeight / Re;
const real_t relaxationRate = real_c(1.0) / (real_c(3) * viscosity + real_c(0.5));
const real_t gravity = (targetMeanVelocityMagnitude / Fr) * (targetMeanVelocityMagnitude / Fr) / liquidHeight;
const real_t accelerationX = real_c(3) * targetMeanVelocityMagnitude * viscosity / (liquidHeight * liquidHeight);
std::shared_ptr< Vector3< real_t > > acceleration =
std::make_shared< Vector3< real_t > >(accelerationX, real_c(0.0), -gravity);
const real_t surfaceTension =
real_c(1.0) * targetMeanVelocityMagnitude * targetMeanVelocityMagnitude * liquidHeight / We;
// compute SI dx and dt
const real_t viscosity_SI = real_c(1.0016e-6); // kinemtic viscosity of water at 20°C at 1 bar
const real_t dx_SI = real_c(1) / particleScalingFactor;
const real_t dt_SI = viscosity / viscosity_SI * dx_SI * dx_SI;
WALBERLA_LOG_INFO_ON_ROOT("\nPhysical parameters:")
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(liquidHeight);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(floorHeight);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(dx_SI);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(dt_SI);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(*acceleration);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(relaxationRate);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(viscosity);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(absoluteLiquidHeight);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(avgParticleDiameter);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(particleScalingFactor);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(particleFixingHeight);
WALBERLA_LOG_INFO_ON_ROOT("\nFree surface physical parameters")
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(surfaceTension);
if ((periodicity[0] && numBlocks[0] < uint_c(3)) || (periodicity[1] && numBlocks[1] < uint_c(3)) ||
(periodicity[2] && numBlocks[2] < uint_c(3)))
{
WALBERLA_ABORT("When using particles, use at least three blocks per periodic direction.");
}
// read model parameters from parameter file
const auto modelParameters = configPtr->getOneBlock("ModelParameters");
const std::string pdfReconstructionModel = modelParameters.getParameter< std::string >("pdfReconstructionModel");
const std::string excessMassDistributionModel =
modelParameters.getParameter< std::string >("excessMassDistributionModel");
const walberla::free_surface::ExcessMassDistributionModel excessMassModel(excessMassDistributionModel);
const bool useSimpleMassExchange = modelParameters.getParameter< bool >("useSimpleMassExchange");
const real_t cellConversionThreshold = modelParameters.getParameter< real_t >("cellConversionThreshold");
const real_t cellConversionForceThreshold = modelParameters.getParameter< real_t >("cellConversionForceThreshold");
// read evaluation parameters from parameter file
const auto evaluationParameters = configPtr->getOneBlock("EvaluationParameters");
const uint_t performanceLogFrequency = evaluationParameters.getParameter< uint_t >("performanceLogFrequency");
const uint_t evaluationFrequency = evaluationParameters.getParameter< uint_t >("evaluationFrequency");
const std::string baseFolderName = evaluationParameters.getParameter< std::string >("baseFolderName");
uint_t beginTimeStep = 0;
const std::string checkpointConfigFile("antidunesCheckpointConfig.file");
if (loadSnapshot)
{
WALBERLA_ROOT_SECTION()
{
std::ifstream file;
file.open(snapshotBaseFolder + "/" + checkpointConfigFile);
if (file.fail()) WALBERLA_ABORT("Error: " << checkpointConfigFile << " could not be opened!");
file >> beginTimeStep;
file >> (*acceleration)[0];
file.close();
}
mpi::broadcastObject(beginTimeStep);
mpi::broadcastObject(*acceleration);
WALBERLA_LOG_INFO_ON_ROOT("Successfully read config parameters from checkpoint config file:")
WALBERLA_LOG_INFO_ON_ROOT(" - beginTimeStep = " << beginTimeStep)
WALBERLA_LOG_INFO_ON_ROOT(" - acceleration = < " << (*acceleration)[0] << ", " << (*acceleration)[1] << ", "
<< (*acceleration)[2] << " >")
}
if (loadSnapshot)
{
// modify config file to start VTK output from "loadFromTimestep" rather than from 0
std::vector< config::Config::Block* > configVTKBlock;
configPtr->getWritableGlobalBlock().getWritableBlocks("VTK", configVTKBlock, 1, 1);
std::vector< config::Config::Block* > configVTKFluidFieldBlock;
configVTKBlock[0]->getWritableBlocks("fluid_field", configVTKFluidFieldBlock, 1, 1);
configVTKFluidFieldBlock[0]->setOrAddParameter("initialExecutionCount", std::to_string(beginTimeStep));
}
WALBERLA_ROOT_SECTION()
{
// create base directories if they do not yet exist
filesystem::path tpath(baseFolderName);
if (!filesystem::exists(tpath)) filesystem::create_directory(tpath);
filesystem::path snapshotPath(snapshotBaseFolder);
if (!filesystem::exists(snapshotPath)) filesystem::create_directory(snapshotPath);
}
std::shared_ptr< StructuredBlockForest > blockForest(nullptr);
const std::string blockForestFile("blockForest.file");
if (loadSnapshot)
{
// load block forest from file
MPIManager::instance()->useWorldComm();
blockForest = make_shared< StructuredBlockForest >(
make_shared< BlockForest >(uint_c(MPIManager::instance()->rank()),
(std::string(snapshotBaseFolder + "/" + blockForestFile)).c_str(), true, false),
cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blockForest->createCellBoundingBoxes();
}
else
{
// create uniform block forest
blockForest = blockforest::createUniformBlockGrid(numBlocks[0], numBlocks[1], numBlocks[2], // blocks
cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2], // cells
real_c(1.0), // dx
true, // one block per process
periodicity[0], periodicity[1], periodicity[2]); // periodicity
}
// save block forest to file (but do not overwrite existing file if snapshot is loaded)
if (storeSnapshot && !loadSnapshot)
{
blockForest->getBlockForest().saveToFile(snapshotBaseFolder + "/" + blockForestFile);
}
const auto vtkParameters = configPtr->getOneBlock("VTK");
const auto vtkFluidParameters = vtkParameters.getOneBlock("fluid_field");
BlockDataID pdfFieldID;
const std::string pdfFieldFile("pdfField.file");
BlockDataID fillFieldID;
const std::string fillFieldFile("fillField.file");
BlockDataID excessMassFieldID;
const std::string excessMassFieldFile("excessMassField.file");
// force density field must be added here to create lattice model
BlockDataID forceDensityFieldID =
field::addToStorage< VectorFieldFlattened_T >(blockForest, "Force field", real_c(0), field::fzyx, uint_c(1));
if (excessMassModel.isEvenlyAllInterfaceFallbackLiquidType())
{
// add additional field for storing excess mass in liquid cells
excessMassFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Excess mass", real_c(0), field::fzyx, uint_c(1));
}
LatticeModel_T latticeModel = LatticeModel_T(forceDensityFieldID, relaxationRate);
if (loadSnapshot)
{
// load PDF field from file
shared_ptr< lbm::internal::PdfFieldHandling< LatticeModel_T > > pdfFieldDataHandling =
make_shared< lbm::internal::PdfFieldHandling< LatticeModel_T > >(
blockForest, latticeModel, false, Vector3< real_t >(real_c(0)), real_c(1), uint_c(1), field::fzyx);
pdfFieldID = (blockForest->getBlockStorage())
.loadBlockData(snapshotBaseFolder + "/" + pdfFieldFile, pdfFieldDataHandling, "PDF field");
// load fill level field from file
std::shared_ptr< ScalarFieldHandling< ScalarField_T > > fillFieldDataHandling =
std::make_shared< ScalarFieldHandling< ScalarField_T > >(blockForest, uint_c(2));
fillFieldID =
(blockForest->getBlockStorage())
.loadBlockData(snapshotBaseFolder + "/" + fillFieldFile, fillFieldDataHandling, "Fill level field");
// load fill level field from file
std::shared_ptr< ScalarFieldHandling< ScalarField_T > > excessMassFieldDataHandling =
std::make_shared< ScalarFieldHandling< ScalarField_T > >(blockForest, uint_c(1));
excessMassFieldID = (blockForest->getBlockStorage())
.loadBlockData(snapshotBaseFolder + "/" + excessMassFieldFile, excessMassFieldDataHandling,
"Excess mass field");
}
else
{
// add PDF field
pdfFieldID =
lbm::addPdfFieldToStorage(blockForest, "PDF field", latticeModel,
Vector3< real_t >(targetMeanVelocityMagnitude, 0, 0), real_c(1.0), field::fzyx);
// add fill level field (initialized with 0, i.e., gas everywhere)
fillFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Fill level field", real_c(0.0), field::fzyx, uint_c(2));
// add fill level field (initialized with 0, i.e., gas everywhere)
excessMassFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Excess mass field", real_c(0.0), field::fzyx, uint_c(1));
}
// MesaPD data structures
auto particleStorage = std::make_shared< mesa_pd::data::ParticleStorage >(1);
auto particleAccessor = std::make_shared< mesa_pd::data::ParticleAccessorWithBaseShape >(particleStorage);
auto mesapdDomain = std::make_shared< mesa_pd::domain::BlockForestDomain >(blockForest->getBlockForestPointer());
BlockDataID particleStorageID;
const std::string particleStorageFile("particleStorageFile.file");
if (loadSnapshot)
{
WALBERLA_LOG_INFO_ON_ROOT("Initializing particles from checkpointing file!");
particleStorageID = blockForest->loadBlockData(snapshotBaseFolder + "/" + particleStorageFile,
mesa_pd::domain::createBlockForestDataHandling(particleStorage),
"Particle Storage");
mesa_pd::mpi::ClearNextNeighborSync CNNS;
CNNS(*particleAccessor);
mesa_pd::mpi::ClearGhostOwnerSync CGOS;
CGOS(*particleAccessor);
}
else
{
particleStorageID =
blockForest->addBlockData(mesa_pd::domain::createBlockForestDataHandling(particleStorage), "Particle Storage");
}
BlockDataID particleFieldID = field::addToStorage< lbm_mesapd_coupling::ParticleField_T >(
blockForest, "Particle field", particleAccessor->getInvalidUid(), field::fzyx, uint_c(2));
auto densityReferenceHeight = absoluteLiquidHeight;
auto hydrostaticDensityFct = [acceleration, densityReferenceHeight](const Vector3< real_t >& position) {
uint_t forceComponent = uint_c(2); // gravity is here strictly only acting in z direction!
return getHydrostaticDensity(position[forceComponent], densityReferenceHeight, (*acceleration)[forceComponent]);
};
// add boundary handling
const std::shared_ptr< AntidunesBoundaryHandling_T > antidunesBoundaryHandling =
std::make_shared< AntidunesBoundaryHandling_T >(blockForest, pdfFieldID, fillFieldID, particleFieldID,
particleAccessor, hydrostaticDensityFct);
const BlockDataID flagFieldID = antidunesBoundaryHandling->getFlagFieldID();
const typename AntidunesBoundaryHandling_T::FlagInfo_T& flagInfo = antidunesBoundaryHandling->getFlagInfo();
real_t sinusAmplitude = real_c(0.5) * initialAmplitude;
real_t sinusOffset = floorHeight;
real_t sinusWavelength = real_c(domainSize[0]) / real_c(wavePeriods);
if (!loadSnapshot)
{
// samples used in the Monte-Carlo like estimation of the fill level
const uint_t fillLevelInitSamples = uint_c(100); // actually there will be 101 since 0 is also included
const uint_t numTotalPoints = (fillLevelInitSamples + uint_c(1)) * (fillLevelInitSamples + uint_c(1));
const real_t stepsize = real_c(1) / real_c(fillLevelInitSamples);
// initialize free-surface sine profile
for (auto blockIt = blockForest->begin(); blockIt != blockForest->end(); ++blockIt)
{
ScalarField_T* const fillField = blockIt->getData< ScalarField_T >(fillFieldID);
WALBERLA_FOR_ALL_CELLS(fillFieldIt, fillField, {
// cell in block-local coordinates
const Cell localCell = fillFieldIt.cell();
// get cell in global coordinates
Cell globalCell = localCell;
blockForest->transformBlockLocalToGlobalCell(globalCell, *blockIt, localCell);
// Monte-Carlo like estimation of the fill level:
// create uniformly-distributed sample points in each cell and count the number of points below the sine
// profile; this fraction of points is used as the fill level to initialize the profile
uint_t numPointsBelow = uint_c(0);
for (uint_t xSample = uint_c(0); xSample <= fillLevelInitSamples; ++xSample)
{
// Pascal et al. (2021) defined the amplitude to span from minimum peak to maximum peak; in
// initializationProfile(), the amplitude is defined to range from the average to the maximum peak
const real_t functionValue =
initializationProfile(real_c(globalCell[0]) + real_c(xSample) * stepsize, sinusAmplitude,
absoluteLiquidHeight + real_c(0.5), sinusWavelength);
for (uint_t zSample = uint_c(0); zSample <= fillLevelInitSamples; ++zSample)
{
const real_t zPoint = real_c(globalCell[2]) + real_c(zSample) * stepsize;
// with operator <, a fill level of 1 can not be reached when the line is equal to the cell's top
// border; with operator <=, a fill level of 0 can not be reached when the line is equal to the cell's
// bottom border
if (zPoint < functionValue) { ++numPointsBelow; }
}
}
// fill level is fraction of points below sine profile
fillField->get(localCell) = real_c(numPointsBelow) / real_c(numTotalPoints);
}) // WALBERLA_FOR_ALL_CELLS
}
initializePoiseuilleProfile(*blockForest, pdfFieldID, fillFieldID, floorHeight, liquidHeight + real_c(0.5),
(*acceleration)[0], viscosity, sinusAmplitude, sinusWavelength);
}
// initialize domain boundary conditions from config file
const auto boundaryParameters = configPtr->getOneBlock("BoundaryParameters");
antidunesBoundaryHandling->initFromConfig(boundaryParameters);
std::function< void(void) > syncCall;
auto simulationDomainAABB = blockForest->getDomain();
lbm_mesapd_coupling::ParticleMappingKernel< AntidunesBoundaryHandling_T::BoundaryHandling_T > particleMappingKernel(
blockForest, antidunesBoundaryHandling->getHandlingID());
lbm_mesapd_coupling::MovingParticleMappingKernel< AntidunesBoundaryHandling_T::BoundaryHandling_T >
movingParticleMappingKernel(blockForest, antidunesBoundaryHandling->getHandlingID(), particleFieldID);
uint_t numParticles = uint_c(0);
// initialize bottom solid sine profile
if (!loadSnapshot)
{
auto createParticleFct = [sinusAmplitude, sinusOffset, sinusWavelength](Vector3< real_t > pos) {
return pos[2] < initializationProfile(pos[0], sinusAmplitude, sinusOffset, sinusWavelength);
};
real_t maxParticleHeight = real_c(0);
initSpheresFromFile(particleInFileName, *particleStorage, *mesapdDomain, particleDensityRatio, domainSize,
createParticleFct, simulationDomainAABB, bedCopiesInX, bedCopiesInY, numParticles,
maxParticleHeight, particleScalingFactor);
WALBERLA_LOG_INFO_ON_ROOT("Max particle height " << maxParticleHeight);
if ((sinusOffset + sinusAmplitude) > maxParticleHeight)
WALBERLA_ABORT("Created particle bed is below desired sinus shape!");
if (real_c(2) * sinusAmplitude > (maxParticleHeight - particleFixingHeight))
WALBERLA_ABORT("Created mobile particle bed is not high enough for desired sinus shape!");
if (useNoSlipParticles && (particleFixingHeight < maxParticleHeight))
WALBERLA_ABORT("You are using no-slip BCs on particles (which does not set hydrodynamic forces) but do not "
"fix all particles - this leads to wrong behavior and is not permitted!")
// fix lower particles
particleStorage->forEachParticle(
useOpenMP, mesa_pd::kernel::SelectAll(), *particleAccessor,
[particleFixingHeight](const size_t idx, auto& ac) {
if (ac.getPosition(idx)[2] < particleFixingHeight)
mesa_pd::data::particle_flags::set(ac.getFlagsRef(idx), mesa_pd::data::particle_flags::FIXED);
},
*particleAccessor);
}
else
{
real_t avgParticleDiameterTest = real_c(0);
particleStorage->forEachParticle(
false, mesa_pd::kernel::SelectLocal(), *particleAccessor,
[&numParticles, &avgParticleDiameterTest](const size_t idx, auto& ac) {
auto sp = static_cast< mesa_pd::data::Sphere* >(ac.getBaseShape(idx).get());
++numParticles;
avgParticleDiameterTest += real_c(2) * sp->getRadius();
},
*particleAccessor);
mpi::allReduceInplace(numParticles, mpi::SUM);
mpi::allReduceInplace(avgParticleDiameterTest, mpi::SUM);
avgParticleDiameterTest /= real_c(numParticles);
WALBERLA_LOG_INFO_ON_ROOT("Read particles from check pointing file with avg diameter of "
<< avgParticleDiameterTest)
if (std::abs(avgParticleDiameterTest - avgParticleDiameter) / avgParticleDiameterTest > real_c(0.05))
{
WALBERLA_ABORT("Particle diameters not correct.")
}
}
WALBERLA_LOG_INFO_ON_ROOT("Created " << numParticles << " particles");
// create planes
createPlane(*particleStorage, simulationDomainAABB.minCorner(), Vector3< real_t >(real_c(0), real_c(0), real_c(1)));
createPlane(*particleStorage, simulationDomainAABB.maxCorner(), Vector3< real_t >(real_c(0), real_c(0), real_c(-1)));
const real_t blockSyncExtension = real_c(2.5);
real_t maxPossibleParticleDiameter = avgParticleDiameter * real_c(1.1);
if (maxPossibleParticleDiameter < real_c(2) * real_c(cellsPerBlock.min()) - blockSyncExtension)
{
WALBERLA_LOG_INFO_ON_ROOT("Using next neighbor sync for particles");
syncCall = [particleStorage, mesapdDomain, blockSyncExtension]() {
mesa_pd::mpi::SyncNextNeighbors syncNextNeighborFunc;
syncNextNeighborFunc(*particleStorage, *mesapdDomain, blockSyncExtension);
};
syncCall();
}
else
{
WALBERLA_LOG_INFO_ON_ROOT("Using ghost owner sync for particles")
syncCall = [particleStorage, mesapdDomain, blockSyncExtension]() {
mesa_pd::mpi::SyncGhostOwners syncGhostOwnersFunc;
syncGhostOwnersFunc(*particleStorage, *mesapdDomain, blockSyncExtension);
};
for (uint_t i = uint_c(0); i < uint_c(std::ceil(maxPossibleParticleDiameter / real_c(cellsPerBlock.min()))); ++i)
syncCall();
}
if (useNoSlipParticles)
{
particleStorage->forEachParticle(useOpenMP, SphereSelector(), *particleAccessor, particleMappingKernel,
*particleAccessor, AntidunesBoundaryHandling_T::noSlipFlagID);
}
else
{
particleStorage->forEachParticle(useOpenMP, SphereSelector(), *particleAccessor, movingParticleMappingKernel,
*particleAccessor, AntidunesBoundaryHandling_T::movingObstacleFlagID);
}
// IMPORTANT REMARK: this must be only called after every solid flag has been set; otherwise, the boundary handling
// might not detect solid flags correctly
antidunesBoundaryHandling->initFlagsFromFillLevel();
// initialize hydrostatic pressure
if (!loadSnapshot) { initHydrostaticPressure< PdfField_T >(blockForest, pdfFieldID, hydrostaticDensityFct); }
// initialize force density field
walberla::free_surface::initForceDensityFieldCodegen< PdfField_T, FlagField_T, VectorFieldFlattened_T,
ScalarField_T >(
blockForest, forceDensityFieldID, fillFieldID, pdfFieldID, flagFieldID, flagInfo, *acceleration);
// communication after initialization
Communication_T communication(blockForest, flagFieldID, fillFieldID, forceDensityFieldID);
communication();
PdfCommunication_T pdfCommunication(blockForest, pdfFieldID);
pdfCommunication();
// add bubble model
std::shared_ptr< walberla::free_surface::bubble_model::BubbleModelBase > bubbleModel =
std::make_shared< walberla::free_surface::bubble_model::BubbleModelConstantPressure >(real_c(1));
// set density in non-liquid or non-interface cells to 1 (after initializing with hydrostatic pressure)
// setDensityInNonFluidCellsToOne< FlagField_T, PdfField_T >(blockForest, flagInfo, flagFieldID, pdfFieldID);
// create timeloop
SweepTimeloop timeloop(blockForest, timesteps);
timeloop.setCurrentTimeStep(beginTimeStep);
timeloop.addFuncBeforeTimeStep(RemainingTimeLogger(timeloop.getNrOfTimeSteps()), "Remaining Time Logger");
// Laplace pressure = 2 * surface tension * curvature; curvature computation is not necessary with no surface
// tension
bool computeCurvature = false;
if (!realIsEqual(surfaceTension, real_c(0), real_c(1e-14))) { computeCurvature = true; }
auto blockStateUpdate = StateSweep(blockForest, flagInfo, flagFieldID);
// add surface geometry handler
BlockDataID curvatureFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Curvature field", real_c(0), field::fzyx, uint_c(1));
BlockDataID normalFieldID = field::addToStorage< VectorField_T >(
blockForest, "Normal field", Vector3< real_t >(real_c(0)), field::fzyx, uint_c(1));
BlockDataID obstacleNormalFieldID = field::addToStorage< VectorField_T >(
blockForest, "Obstacle normal field", Vector3< real_t >(real_c(0)), field::fzyx, uint_c(1));
// add field for smoothed fill levels
BlockDataID smoothFillFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Smooth fill level field", real_c(0), field::fzyx, uint_c(1));
// smooth fill level field for decreasing error in finite difference normal and curvature computation (see
// dissertation of S. Bogner, 2017 (section 4.4.2.1))
walberla::free_surface::SmoothingSweep< CommunicationStencil_T, FlagField_T, ScalarField_T, VectorField_T >
smoothingSweep(smoothFillFieldID, fillFieldID, flagFieldID,
walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, flagInfo.getObstacleIDSet(),
enableWetting);
// IMPORTANT REMARK: SmoothingSweep must be executed on all blocks, because the algorithm works on all liquid,
// interface and gas cells. This is necessary since the normals are not only computed in interface cells, but also
// in the neighborhood of interface cells. Therefore, meaningful values for the fill levels of the second
// neighbors of interface cells are also required in NormalSweep.
timeloop.add() << Sweep(smoothingSweep, "Sweep: fill level smoothing")
<< AfterFunction(Communication_T(blockForest, smoothFillFieldID),
"Communication: after smoothing sweep");
// compute interface normals (using smoothed fill level field)
walberla::free_surface::NormalSweep< CommunicationStencil_T, FlagField_T, ScalarField_T, VectorField_T > normalSweep(
normalFieldID, smoothFillFieldID, flagFieldID, walberla::free_surface::flagIDs::interfaceFlagID,
walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, flagInfo.getObstacleIDSet(), true, false, true,
false);
timeloop.add() << Sweep(normalSweep, "Sweep: normal computation", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: normal")
<< AfterFunction(Communication_T(blockForest, normalFieldID), "Communication: after normal sweep");
if (computeCurvature)
{
// compute interface curvature using finite differences according to Brackbill et al.
walberla::free_surface::CurvatureSweepFiniteDifferences< CommunicationStencil_T, FlagField_T, ScalarField_T,
VectorField_T >
curvSweep(curvatureFieldID, normalFieldID, obstacleNormalFieldID, flagFieldID,
walberla::free_surface::flagIDs::interfaceFlagID,
walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, flagInfo.getObstacleIDSet(), false,
real_c(0));
timeloop.add() << Sweep(curvSweep, "Sweep: curvature computation (finite difference method)",
StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: curvature")
<< AfterFunction(Communication_T(blockForest, curvatureFieldID),
"Communication: after curvature sweep");
}
// add surface dynamics handler
// add standard waLBerla boundary handling
timeloop.add() << Sweep(antidunesBoundaryHandling->getBoundarySweep(), "Sweep: boundary handling",
Set< SUID >::emptySet(), StateSweep::onlyGasAndBoundary)
<< Sweep(emptySweep(), "Empty sweep: boundary handling", StateSweep::onlyGasAndBoundary);
// add sweep for weighting force in interface cells with fill level and density
// different version for codegen because pystencils does not support 'Ghostlayerfield<Vector3(), 1>'
const ForceDensityCodegenSweep< LatticeModel_T, FlagField_T, VectorFieldFlattened_T, ScalarField_T >
forceDensityCodegenSweep(forceDensityFieldID, pdfFieldID, flagFieldID, fillFieldID, flagInfo, acceleration);
timeloop.add() << Sweep(forceDensityCodegenSweep, "Sweep: force weighting", Set< SUID >::emptySet(),
StateSweep::onlyGasAndBoundary)
<< Sweep(emptySweep(), "Empty sweep: force weighting", StateSweep::onlyGasAndBoundary)
<< AfterFunction(Communication_T(blockForest, forceDensityFieldID),
"Communication: after force weighting sweep");
// sweep for
// - reconstruction of PDFs in interface cells
// - streaming of PDFs in interface cells (and liquid cells on the same block)
// - advection of mass
// - update bubble volumes
// - marking interface cells for conversion
const walberla::free_surface::StreamReconstructAdvectSweep<
LatticeModel_T, typename AntidunesBoundaryHandling_T::BoundaryHandling_T, FlagField_T,
typename AntidunesBoundaryHandling_T::FlagInfo_T, ScalarField_T, VectorField_T, true >
streamReconstructAdvectSweep(surfaceTension, antidunesBoundaryHandling->getHandlingID(), fillFieldID, flagFieldID,
pdfFieldID, normalFieldID, curvatureFieldID, flagInfo, bubbleModel.get(),
pdfReconstructionModel, useSimpleMassExchange, cellConversionThreshold,
cellConversionForceThreshold);
// sweep acts only on blocks with at least one interface cell (due to StateSweep::fullFreeSurface)
timeloop.add() << Sweep(streamReconstructAdvectSweep, "Sweep: StreamReconstructAdvect", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: StreamReconstructAdvect")
// do not communicate PDFs here:
// - stream on blocks with "StateSweep::fullFreeSurface" was performed here using post-collision PDFs
// - stream on other blocks is performed below and should also use post-collision PDFs
// => if PDFs were communicated here, the ghost layer of other blocks would have post-stream PDFs
<< AfterFunction(Communication_T(blockForest, fillFieldID, flagFieldID),
"Communication: after StreamReconstructAdvect sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after StreamReconstructAdvect sweep (fill level field)")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< FlagField_T >(blockForest, flagFieldID),
"Second ghost layer update: after StreamReconstructAdvect sweep (flag field)");
auto lbmSweepGenerated = typename LatticeModel_T::Sweep(pdfFieldID);
// temporary class for being able to call the LBM collision with operator()
class CollideSweep
{
public:
CollideSweep(const typename LatticeModel_T::Sweep& sweep) : sweep_(sweep){};
void operator()(IBlock* const block, const uint_t numberOfGhostLayersToInclude = uint_t(0))
{
sweep_.collide(block, numberOfGhostLayersToInclude);
}
private:
typename LatticeModel_T::Sweep sweep_;
};
timeloop.add() << Sweep(CollideSweep(lbmSweepGenerated), "Sweep: collision (generated)", StateSweep::fullFreeSurface)
<< Sweep(lbmSweepGenerated, "Sweep: streamCollide (generated)", StateSweep::onlyLBM)
<< Sweep(emptySweep(), "Empty sweep: streamCollide (generated)")
<< AfterFunction(PdfCommunication_T(blockForest, pdfFieldID),
"Communication: after streamCollide (generated)");
// convert cells
// - according to the flags from StreamReconstructAdvectSweep (interface -> gas/liquid)
// - to ensure a closed layer of interface cells (gas/liquid -> interface)
// - detect and register bubble merges/splits (bubble volumes are already updated in StreamReconstructAdvectSweep)
// - convert cells and initialize PDFs near inflow boundaries
const walberla::free_surface::CellConversionSweep< LatticeModel_T, AntidunesBoundaryHandling_T::BoundaryHandling_T,
ScalarField_T >
cellConvSweep(antidunesBoundaryHandling->getHandlingID(), pdfFieldID, flagInfo, bubbleModel.get());
timeloop.add() << Sweep(cellConvSweep, "Sweep: cell conversion", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: cell conversion")
//<< AfterFunction(PdfCommunication_T(blockForest, pdfFieldID),
//
// "Communication: after cell conversion sweep (PDF field)")
// communicate the flag field also in corner directions
<< AfterFunction(Communication_T(blockForest, flagFieldID),
"Communication: after cell conversion sweep (flag field)")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< FlagField_T >(blockForest, flagFieldID),
"Second ghost layer update: after cell conversion sweep (flag field)");
// reinitialize PDFs, i.e., refill cells that were converted from gas to interface
// - when the flag "convertedFromGasToInterface" has been set (by CellConversionSweep)
// - according to the method specified with pdfRefillingModel_
const walberla::free_surface::EquilibriumRefillingSweep< LatticeModel_T, FlagField_T > equilibriumRefillingSweep(
pdfFieldID, flagFieldID, flagInfo, true);
timeloop.add() << Sweep(equilibriumRefillingSweep, "Sweep: EquilibriumRefilling", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: EquilibriumRefilling")
<< AfterFunction(PdfCommunication_T(blockForest, pdfFieldID),
"Communication: after EquilibriumRefilling sweep");
// distribute excess mass:
// - excess mass: mass that is free after conversion from interface to gas/liquid cells
// - update the bubble model
// IMPORTANT REMARK: this sweep computes the mass via the density, i.e., the PDF field must be up-to-date and the
// PdfRefillingSweep must have been performed
if (excessMassModel.isEvenlyType())
{
const walberla::free_surface::ExcessMassDistributionSweepInterfaceEvenly< LatticeModel_T, FlagField_T,
ScalarField_T, VectorField_T >
distributeMassSweep(excessMassModel, fillFieldID, flagFieldID, pdfFieldID, flagInfo);
timeloop.add()
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: distribute excess mass")
<< AfterFunction(Communication_T(blockForest, fillFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after excess mass distribution sweep (fill level field)")
// update bubble model, i.e., perform registered bubble merges/splits; bubble merges/splits are
// already detected and registered by CellConversionSweep
<< AfterFunction([&bubbleModel]() { bubbleModel->update(); }, "Sweep: bubble model update");
}
else
{
if (excessMassModel.isWeightedType())
{
const walberla::free_surface::ExcessMassDistributionSweepInterfaceWeighted< LatticeModel_T, FlagField_T,
ScalarField_T, VectorField_T >
distributeMassSweep(excessMassModel, fillFieldID, flagFieldID, pdfFieldID, flagInfo, normalFieldID);
timeloop.add()
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: distribute excess mass")
<< AfterFunction(Communication_T(blockForest, fillFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after excess mass distribution sweep (fill level field)")
// update bubble model, i.e., perform registered bubble merges/splits; bubble merges/splits
// are already detected and registered by CellConversionSweep
<< AfterFunction([&]() { bubbleModel->update(); }, "Sweep: bubble model update");
}
else
{
if (excessMassModel.isEvenlyAllInterfaceFallbackLiquidType())
{
const walberla::free_surface::ExcessMassDistributionSweepInterfaceAndLiquid< LatticeModel_T, FlagField_T,
ScalarField_T, VectorField_T >
distributeMassSweep(excessMassModel, fillFieldID, flagFieldID, pdfFieldID, flagInfo, excessMassFieldID);
timeloop.add()
// perform this sweep also on "onlyLBM" blocks because liquid cells also exchange excess mass here
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::fullFreeSurface)
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::onlyLBM)
<< Sweep(emptySweep(), "Empty sweep: distribute excess mass")
<< AfterFunction(Communication_T(blockForest, fillFieldID, excessMassFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after excess mass distribution sweep (fill level field)")
// update bubble model, i.e., perform registered bubble merges/splits; bubble
// merges/splits are already detected and registered by CellConversionSweep
<< AfterFunction([&]() { bubbleModel->update(); }, "Sweep: bubble model update");
}
}
}
// reset all flags that signal cell conversions (except "keepInterfaceForWettingFlag")
walberla::free_surface::ConversionFlagsResetSweep< FlagField_T > resetConversionFlagsSweep(flagFieldID, flagInfo);
timeloop.add() << Sweep(resetConversionFlagsSweep, "Sweep: conversion flag reset", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: conversion flag reset")
<< AfterFunction(Communication_T(blockForest, flagFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< FlagField_T >(blockForest, flagFieldID),
"Second ghost layer update: after excess mass distribution sweep (flag field)");
// update block states
timeloop.add() << Sweep(blockStateUpdate, "Sweep: block state update");
// add VTK output
walberla::free_surface::addVTKOutput< LatticeModel_T, AntidunesBoundaryHandling_T, PdfField_T, FlagField_T,
ScalarField_T, VectorField_T >(
blockForest, timeloop, configPtr, flagInfo, pdfFieldID, flagFieldID, fillFieldID, BlockDataID(), curvatureFieldID,
normalFieldID, obstacleNormalFieldID);
// add triangle mesh output of free surface
walberla::free_surface::SurfaceMeshWriter< ScalarField_T, FlagField_T > surfaceMeshWriter(
blockForest, fillFieldID, flagFieldID, walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, real_c(0),
configPtr);
surfaceMeshWriter(); // write initial mesh
timeloop.addFuncAfterTimeStep(surfaceMeshWriter, "Writer: surface mesh");
if (vtkSpacingParticles != uint_t(0))
{
// particle field
auto particleFieldVTK =
vtk::createVTKOutput_BlockData(blockForest, "particle_field", vtkSpacingParticles, 0, false, vtkFolder);
auto cellBB_filterParameters = vtkFluidParameters.getOneBlock("CellBB_filter");
const Vector3< uint_t > cellBB_filterMin = cellBB_filterParameters.getParameter< Vector3< uint_t > >("min");
const Vector3< uint_t > cellBB_filterMax = cellBB_filterParameters.getParameter< Vector3< uint_t > >("max");
AABB sliceAABB(real_c(cellBB_filterMin[0]), real_c(cellBB_filterMin[1]), real_c(cellBB_filterMin[2]),
real_c(cellBB_filterMax[0] + uint_t(1)), real_c(cellBB_filterMax[1] + uint_t(1)),
real_c(cellBB_filterMax[2] + uint_t(1)));
particleFieldVTK->addCellInclusionFilter(vtk::AABBCellFilter(sliceAABB));
particleFieldVTK->addCellDataWriter(
make_shared< field::VTKWriter< GhostLayerField< walberla::id_t, 1 > > >(particleFieldID, "particleField"));
particleFieldVTK->setSamplingResolution(vtkFluidParameters.getParameter< real_t >("samplingResolution"));
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(particleFieldVTK), "VTK (particle field data");
}
if (vtkSpacingParticles != uint_t(0))
{
// sphere
auto particleVtkOutput = make_shared< mesa_pd::vtk::ParticleVtkOutput >(particleStorage);
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleUid >("uid");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleLinearVelocity >("velocity");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleInteractionRadius >("radius");
// limit output to process-local spheres
particleVtkOutput->setParticleSelector([](const mesa_pd::data::ParticleStorage::iterator& pIt) {
using namespace walberla::mesa_pd::data::particle_flags;
return (pIt->getBaseShape()->getShapeType() == mesa_pd::data::Sphere::SHAPE_TYPE) &&
!isSet(pIt->getFlags(), GHOST);
});
auto particleVtkWriter =
vtk::createVTKOutput_PointData(particleVtkOutput, "particles", vtkSpacingParticles, vtkFolder,
std::string("simulation_step"), false, true, true, true, beginTimeStep);
timeloop.addFuncAfterTimeStep(vtk::writeFiles(particleVtkWriter), "VTK (sphere data)");
}
// add logging for computational performance
const lbm::PerformanceLogger< FlagField_T > performanceLogger(
blockForest, flagFieldID, walberla::free_surface::flagIDs::liquidInterfaceFlagIDs, performanceLogFrequency);
timeloop.addFuncAfterTimeStep(performanceLogger, "Evaluator: performance logging");
// LBM stability check
timeloop.addFuncAfterTimeStep(makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >(
walberlaEnv.config(), blockForest, pdfFieldID, flagFieldID,
walberla::free_surface::flagIDs::liquidInterfaceFlagIDs)),
"LBM stability check");
// add sweep for evaluating the fluid's mean velocity
const std::shared_ptr< Vector3< real_t > > meanVelocity = std::make_shared< Vector3< real_t > >(real_c(0));
const real_t velocityAveragingFactor = real_c(1) / (liquidHeight * real_c(domainSize[0]) * real_c(domainSize[1]));
MeanVelocityComputer< AntidunesBoundaryHandling_T, PdfField_T, FlagField_T > meanVelocityComputer(
blockForest, antidunesBoundaryHandling, pdfFieldID, meanVelocity, velocityAveragingFactor);
// PID Controller
shared_ptr< ForcingAdjuster > forcingAdjuster =
make_shared< ForcingAdjuster >(blockForest, targetMeanVelocityMagnitude, (*acceleration)[0], proportionalGain,
derivativeGain, integralGain, maxRamp, minActuatingVariable, maxActuatingVariable);
if (loadSnapshot) { forcingAdjuster->loadPIDSnapshot(snapshotBaseFolder + "/" + "pidState.file"); }
WcTimingPool timingPool;
// this is carried out after the particle integration, it corrects the flag field and restores missing PDF
// information then, the checkpointing file can be written, as otherwise some cells are invalid and can not be
// recovered
SweepTimeloop timeloopAfterParticles(blockForest, timesteps);
timeloopAfterParticles.setCurrentTimeStep(beginTimeStep);
// sweep for updating the particle mapping into the LBM simulation
bool strictlyConserveMomentum = false;
timeloopAfterParticles.add() << Sweep(
lbm_mesapd_coupling::makeMovingParticleMapping< PdfField_T, AntidunesBoundaryHandling_T::BoundaryHandling_T >(
blockForest, pdfFieldID, antidunesBoundaryHandling->getHandlingID(), particleFieldID, particleAccessor,
AntidunesBoundaryHandling_T::movingObstacleFlagID, FormerMO_Flag,
lbm_mesapd_coupling::RegularParticlesSelector(), strictlyConserveMomentum),
"Particle Mapping");
// sweep for restoring PDFs in cells previously occupied by particles
bool reconstruction_recomputeTargetDensity = false;
bool reconstruction_useCentralDifferences = true;
auto gradReconstructor =
lbm_mesapd_coupling::makeGradsMomentApproximationReconstructor< AntidunesBoundaryHandling_T::BoundaryHandling_T >(
blockForest, antidunesBoundaryHandling->getHandlingID(), relaxationRate, reconstruction_recomputeTargetDensity,
reconstruction_useCentralDifferences);
timeloopAfterParticles.add()
<< Sweep(makeSharedSweep(
lbm_mesapd_coupling::makePdfReconstructionManager< PdfField_T,
AntidunesBoundaryHandling_T::BoundaryHandling_T >(
blockForest, pdfFieldID, antidunesBoundaryHandling->getHandlingID(), particleFieldID,
particleAccessor, FormerMO_Flag, walberla::free_surface::flagIDs::liquidFlagID, gradReconstructor,
strictlyConserveMomentum)),
"PDF Restore")
<< AfterFunction(Communication_T(blockForest, flagFieldID, particleFieldID),
"Communication: after PDF reconstruction sweep") // unsure if necessary but added for consistency
<< AfterFunction(pdfCommunication, "PDF Communication");
real_t timeStepSizeParticles = real_c(1) / real_c(particleNumSubCycles);
mesa_pd::kernel::VelocityVerletPreForceUpdate vvIntegratorPreForce(timeStepSizeParticles);
mesa_pd::kernel::VelocityVerletPostForceUpdate vvIntegratorPostForce(timeStepSizeParticles);
mesa_pd::kernel::LinearSpringDashpot collisionResponse(1);
collisionResponse.setFrictionCoefficientDynamic(size_t(0), size_t(0), particleFrictionCoefficient);
mesa_pd::mpi::ReduceProperty reduceProperty;
mesa_pd::mpi::ReduceContactHistory reduceAndSwapContactHistory;
lbm_mesapd_coupling::ResetHydrodynamicForceTorqueKernel resetHydrodynamicForceTorque;
lbm_mesapd_coupling::AverageHydrodynamicForceTorqueKernel averageHydrodynamicForceTorque;
real_t particleCollisionTime = particleCollisionTimeNonDim * avgParticleDiameter;
lbm_mesapd_coupling::LubricationCorrectionKernel lubricationCorrectionKernel(
viscosity, [](real_t r) { return (real_c(0.001 + real_c(0.00007) * r)) * r; });
WALBERLA_LOG_INFO_ON_ROOT("Will use particle time step size of "
<< timeStepSizeParticles << " and collision time of " << particleCollisionTime);
AverageDataSliceEvaluator< PdfField_T, AntidunesBoundaryHandling_T, FlagField_T, ScalarField_T >
averageDataSliceEvaluator(blockForest, flagFieldID, fillFieldID, pdfFieldID);
std::shared_ptr< real_t > totalFluidMass = std::make_shared< real_t >(real_c(0));
walberla::free_surface::TotalMassComputer< AntidunesBoundaryHandling_T, PdfField_T, FlagField_T, ScalarField_T >
totalFluidMassEvaluator(blockForest, antidunesBoundaryHandling, pdfFieldID, fillFieldID, evaluationFrequency,
totalFluidMass);
BedloadTransportEvaluator< ParticleAccessor_T > bedloadTransportEvaluator(
particleAccessor, real_c(1) / real_c(domainSize[0] * domainSize[1]), numParticles);
auto bedLoadTransportFileName = baseFolderName + "/bedload.txt";
WALBERLA_LOG_INFO_ON_ROOT("Writing bedload info to file " << bedLoadTransportFileName);
auto fluidInfoFileName = baseFolderName + "/fluidInfo.txt";
WALBERLA_LOG_INFO_ON_ROOT("Writing fluid info to file " << fluidInfoFileName);
// write info file
WALBERLA_ROOT_SECTION()
{
std::ofstream evalInfoFile(baseFolderName + "/info.txt");
evalInfoFile << evaluationFrequency << "\n";
evalInfoFile << gravity << "\n";
evalInfoFile << viscosity << "\n";
evalInfoFile << particleDensityRatio << "\n";
evalInfoFile << avgParticleDiameter << "\n";
evalInfoFile << domainSize[0] << "\n";
evalInfoFile << domainSize[1] << "\n";
evalInfoFile << domainSize[2] << "\n";
evalInfoFile << numParticles << "\n";
evalInfoFile << dx_SI << "\n";
evalInfoFile << dt_SI << "\n";
evalInfoFile.close();
}
Vector3< real_t > totalHydrodynamicForceOnParticles(real_c(0)); // only root will have valid values
for (uint_t t = beginTimeStep; t != timesteps; ++t)
{
timeloop.singleStep(timingPool, true);
timingPool["Mesa_pd"].start();
reduceProperty.operator()< mesa_pd::HydrodynamicForceTorqueNotification >(*particleStorage);
if (t == 0)
{
lbm_mesapd_coupling::InitializeHydrodynamicForceTorqueForAveragingKernel
initializeHydrodynamicForceTorqueForAveragingKernel;
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
initializeHydrodynamicForceTorqueForAveragingKernel, *particleAccessor);
}
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
averageHydrodynamicForceTorque, *particleAccessor);
for (auto subCycle = uint_t(0); subCycle < particleNumSubCycles; ++subCycle)
{
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
vvIntegratorPreForce, *particleAccessor);
syncCall();
if (useLubricationCorrection)
{
// lubrication correction
particleStorage->forEachParticlePairHalf(
useOpenMP, mesa_pd::kernel::ExcludeInfiniteInfinite(), *particleAccessor,
[&lubricationCorrectionKernel, &mesapdDomain](const size_t idx1, const size_t idx2, auto& ac) {
mesa_pd::collision_detection::AnalyticContactDetection acd;
acd.getContactThreshold() = lubricationCorrectionKernel.getNormalCutOffDistance();
mesa_pd::kernel::DoubleCast double_cast;
mesa_pd::mpi::ContactFilter contact_filter;
if (double_cast(idx1, idx2, ac, acd, ac))
{
if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *mesapdDomain))
{
double_cast(acd.getIdx1(), acd.getIdx2(), ac, lubricationCorrectionKernel, ac,
acd.getContactNormal(), acd.getPenetrationDepth());
}
}
},
*particleAccessor);
}
// collision response
particleStorage->forEachParticlePairHalf(
useOpenMP, mesa_pd::kernel::ExcludeInfiniteInfinite(), *particleAccessor,
[&collisionResponse, &mesapdDomain, timeStepSizeParticles, particleRestitutionCoefficient,
particleCollisionTime, particleKappa](const size_t idx1, const size_t idx2, auto& ac) {
mesa_pd::collision_detection::AnalyticContactDetection acd;
mesa_pd::kernel::DoubleCast double_cast;
mesa_pd::mpi::ContactFilter contact_filter;
if (double_cast(idx1, idx2, ac, acd, ac))
{
if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *mesapdDomain))
{
auto meff = real_c(1) / (ac.getInvMass(idx1) + ac.getInvMass(idx2));
collisionResponse.setStiffnessAndDamping(0, 0, particleRestitutionCoefficient,
particleCollisionTime, particleKappa, meff);
collisionResponse(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(),
acd.getPenetrationDepth(), timeStepSizeParticles);
}
}
},
*particleAccessor);
reduceAndSwapContactHistory(*particleStorage);
// add hydrodynamic force
lbm_mesapd_coupling::AddHydrodynamicInteractionKernel addHydrodynamicInteraction;
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
addHydrodynamicInteraction, *particleAccessor);
// add external forces
particleStorage->forEachParticle(
useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
[particleDensityRatio, acceleration](const size_t idx, auto& ac) {
mesa_pd::addForceAtomic(idx, ac,
ac.getVolume(idx) *
Vector3< real_t >((*acceleration)[0], (*acceleration)[1],
(particleDensityRatio - real_c(1)) * (*acceleration)[2]));
},
*particleAccessor);
reduceProperty.operator()< mesa_pd::ForceTorqueNotification >(*particleStorage);
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
vvIntegratorPostForce, *particleAccessor);
syncCall();
}
// has to be evaluated here before the force info is erased from particles
if (t % evaluationFrequency == uint_c(0))
totalHydrodynamicForceOnParticles = getTotalHydrodynamicForceOnParticles(particleAccessor);
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectAll(), *particleAccessor,
resetHydrodynamicForceTorque, *particleAccessor);
timingPool["Mesa_pd"].end();
// update particle mapping
timeloopAfterParticles.singleStep(timingPool, true);
timingPool["Evaluation"].start();
if (t % evaluationFrequency == uint_c(0))
{
averageDataSliceEvaluator();
totalFluidMassEvaluator.computeMass(blockForest, antidunesBoundaryHandling);
bedloadTransportEvaluator();
meanVelocityComputer();
WALBERLA_ROOT_SECTION()
{
write2DVectorToFile(averageDataSliceEvaluator.getSolidVolumeFractionVector(),
averageDataSliceEvaluator.getXLen(), averageDataSliceEvaluator.getZLen(),
baseFolderName + "/svfSlice_" + std::to_string(t) + ".txt");
write2DVectorToFile(averageDataSliceEvaluator.getFillLevelVector(), averageDataSliceEvaluator.getXLen(),
averageDataSliceEvaluator.getZLen(),
baseFolderName + "/fillSlice_" + std::to_string(t) + ".txt");
write2DVectorToFile(averageDataSliceEvaluator.getVelocityXVector(), averageDataSliceEvaluator.getXLen(),
averageDataSliceEvaluator.getZLen(),
baseFolderName + "/velXSlice_" + std::to_string(t) + ".txt");
std::ofstream bedloadFile(bedLoadTransportFileName, std::ofstream::app);
bedloadFile << t << " " << bedloadTransportEvaluator.getTransportRate() << " "
<< bedloadTransportEvaluator.getAverageVelocity() << " " << totalHydrodynamicForceOnParticles[0]
<< " " << totalHydrodynamicForceOnParticles[1] << " " << totalHydrodynamicForceOnParticles[2]
<< "\n";
bedloadFile.close();
WALBERLA_LOG_DEVEL("____________________________________________________________________");
WALBERLA_LOG_DEVEL("time step = " << t);
const real_t froudeNumber =
(*meanVelocity)[0] / real_c(std::sqrt(liquidHeight * std::abs((*acceleration)[2])));
const real_t reynoldsNumber = (*meanVelocity)[0] * liquidHeight / viscosity;
const real_t weberNumber =
real_c(1.0) * (*meanVelocity)[0] * (*meanVelocity)[0] * liquidHeight / surfaceTension;
WALBERLA_LOG_DEVEL(" - Total fluid mass = " << std::setprecision(16) << (*totalFluidMass));
auto maxFluidZPos = averageDataSliceEvaluator.getMaxFluidZPos();
WALBERLA_LOG_DEVEL(" - Max fluid z-position = " << maxFluidZPos);
WALBERLA_LOG_DEVEL(" - Froude number = " << froudeNumber);
WALBERLA_LOG_DEVEL(" - Reynolds number = " << reynoldsNumber);
WALBERLA_LOG_DEVEL(" - We = " << weberNumber);
WALBERLA_LOG_DEVEL(" - meanVelocity = " << *meanVelocity);
std::ofstream fluidInfoFile(fluidInfoFileName, std::ofstream::app);
fluidInfoFile << t << " " << (*acceleration)[0] << " " << (*meanVelocity)[0] << " " << maxFluidZPos << " "
<< std::setprecision(16) << (*totalFluidMass) << "\n";
fluidInfoFile.close();
if (std::isnan(reynoldsNumber)) WALBERLA_ABORT("reynoldsNumber is inf!")
}
WALBERLA_LOG_DEVEL_ON_ROOT(
" -> CurrentExternalAcceleration in x-direction before update = " << (*acceleration)[0]);
(*forcingAdjuster)(meanVelocity->length());
(*acceleration)[0] = forcingAdjuster->getExternalForcing();
WALBERLA_LOG_DEVEL_ON_ROOT(
" -> CurrentExternalAcceleration in x-direction after update = " << (*acceleration)[0]);
}
timingPool["Evaluation"].end();
if (storeSnapshot)
{
if (t % snapshotFrequency == uint_c(0) && t > uint_c(0))
{
WALBERLA_LOG_INFO_ON_ROOT("Writing checkpointing file in time step " << t)
blockForest->saveBlockData(snapshotBaseFolder + std::string("/tmp_") += pdfFieldFile, pdfFieldID);
blockForest->saveBlockData(snapshotBaseFolder + "/tmp_" += fillFieldFile, fillFieldID);
blockForest->saveBlockData(snapshotBaseFolder + "/tmp_" += excessMassFieldFile, excessMassFieldID);
blockForest->saveBlockData(snapshotBaseFolder + "/tmp_" += particleStorageFile, particleStorageID);
WALBERLA_ROOT_SECTION()
{
std::string tmpCheckpointConfigFile = snapshotBaseFolder + "/tmp_" += checkpointConfigFile;
std::ofstream file;
file.open(tmpCheckpointConfigFile.c_str());
file << std::setprecision(16);
file << t + 1 << "\n";
file << (*acceleration)[0] << "\n";
file.close();
}
forcingAdjuster->storePIDSnapshot(snapshotBaseFolder + "/" + "pidState.file");
WALBERLA_MPI_BARRIER();
// rename checkpoint files to "real" ones
// otherwise, the checkpointed state might be incomplete if the simulation stops due to over time during
// checkpointing
WALBERLA_ROOT_SECTION()
{
renameFile(snapshotBaseFolder + "/tmp_" += pdfFieldFile, snapshotBaseFolder + "/" += pdfFieldFile);
renameFile(snapshotBaseFolder + "/tmp_" += fillFieldFile, snapshotBaseFolder + "/" += fillFieldFile);
renameFile(snapshotBaseFolder + "/tmp_" += excessMassFieldFile,
snapshotBaseFolder + "/" += excessMassFieldFile);
renameFile(snapshotBaseFolder + "/tmp_" += particleStorageFile,
snapshotBaseFolder + "/" += particleStorageFile);
renameFile(snapshotBaseFolder + "/tmp_" += checkpointConfigFile,
snapshotBaseFolder + "/" += checkpointConfigFile);
}
}
}
if (t % performanceLogFrequency == uint_c(0) && t > uint_c(0)) { timingPool.logResultOnRoot(); }
}
return EXIT_SUCCESS;
}
} // namespace antidunes
} // namespace walberla
int main(int argc, char** argv) { return walberla::antidunes::main(argc, argv); }
BlockForestParameters
{
cellsPerBlock < 50, 20, 40 >;
periodicity < 1, 1, 0 >;
loadSnapshot false;
storeSnapshot true;
snapshotFrequency 10000;
snapshotBaseFolder snapshots;
}
DomainParameters
{
domainSize <3200, 60, 160>;
wavePeriods 1; // never set to 0 -> division by zero, even if you initialize a flat particle bed
liquidHeightFactor 2.862; // h_0 / d (water height / avg particle diameter) -> from experiment [E1=2.862, E2=3.1724, E3=3.27586, E4=3.5862]
floorHeightFactor 4.1393; // from domain bottom to sine's average
initialAmplitude 0; // defined from minimum peak to maximum peak as by Pascal et al. (2021)
}
PIDParameters
{
targetMeanVelocityMagnitude 0.02;
proportionalGain 2e-4;
derivativeGain 1e-6;
integralGain 2e-4;
maxRamp 1e-4;
minActuatingVariable 0;
maxActuatingVariable 1e-3;
}
PhysicsParameters
{
enableWetting false;
timesteps 2000000;
Re 3100; // [E1=3100, E2=3772, E3=4180, E4=4800]
Fr 1.31; // [E1=1.31, E2=1.38, E3=1.44, E4=1.45]
We 15.6188; // [E1=15.6188, E2=21.48, E3=25.54, E4=30.2493]
}
ParticleParameters
{
inFileName spheres_out.dat;
bedCopiesInX 1;
bedCopiesInY 1;
densityRatio 2.55;
fixingHeightFactor 1.5; // proportional to the mean particle diameter
frictionCoefficient 0.5;
restitutionCoefficient 0.97;
numSubCycles 10;
useLubricationCorrection true;
useNoSlipParticles false;
}
ModelParameters
{
pdfReconstructionModel OnlyMissing;
pdfRefillingModel EquilibriumRefilling;
excessMassDistributionModel EvenlyNewInterfaceFallbackLiquid;
curvatureModel FiniteDifferenceMethod;
useSimpleMassExchange false;
cellConversionThreshold 1e-2;
cellConversionForceThreshold 1e-1;
}
EvaluationParameters
{
performanceLogFrequency 10000;
evaluationFrequency 1000;
baseFolderName eval;
}
StabilityChecker
{
checkFrequency 0;
streamOutput false;
vtkOutput true;
}
BoundaryParameters
{
// X
//Border { direction W; walldistance -1; NoSlip{} }
//Border { direction E; walldistance -1; NoSlip{} }
// Y
//Border { direction N; walldistance -1; NoSlip{} }
//Border { direction S; walldistance -1; NoSlip{} }
// Z
Border { direction T; walldistance -1; NoSlip{} }
Border { direction B; walldistance -1; NoSlip{} }
}
MeshOutputParameters
{
writeFrequency 0;
baseFolder mesh-out;
}
VTK
{
fluid_field
{
writeFrequency 1000;
ghostLayers 0;
baseFolder vtk-out;
samplingResolution 1;
writers
{
fill_level;
mapped_flag;
velocity;
density;
//curvature;
//normal;
//obstacle_normal;
//pdf;
//flag;
//force;
}
CellBB_filter {
min < 0, 29, 0 >;
max < 3200, 30, 160 >;
}
inclusion_filters
{
CellBB_filter;
//liquidInterfaceFilter; // only include liquid and interface cells in VTK output
}
before_functions
{
//ghost_layer_synchronization; // only needed if writing the ghost layer
gas_cell_zero_setter; // sets velocity=0 and density=1 all gas cells before writing VTK output
}
}
domain_decomposition
{
writeFrequency 10000000000;
baseFolder vtk-out;
outputDomainDecomposition true;
}
}
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file Boundary.h
//! \ingroup free_surface
//! \author Martin Bauer
//! \author Christoph Schwarzmeier <christoph.schwarzmeier@fau.de>
//! \brief Boundary handling for the free surface LBM module.
//
//======================================================================================================================
#pragma once
#include "blockforest/StructuredBlockForest.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "boundary/BoundaryHandling.h"
#include "field/GhostLayerField.h"
#include "geometry/initializer/InitializationManager.h"
#include "geometry/initializer/OverlapFieldFromBody.h"
#include "lbm/boundary/all.h"
#include "lbm/field/PdfField.h"
#include "lbm/free_surface/FlagInfo.h"
#include "lbm/free_surface/InitFunctions.h"
#include "lbm/free_surface/InterfaceFromFillLevel.h"
#include "lbm/free_surface/boundary/SimplePressureWithFreeSurface.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/boundary/CurvedLinear.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/boundary/SimpleBB.h"
namespace walberla
{
namespace antidunes
{
namespace free_surface
{
/***********************************************************************************************************************
* Boundary handling for the free surface LBM extension.
**********************************************************************************************************************/
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
class AntidunesBoundaryHandling
{
public:
using flag_t = typename FlagField_T::value_type;
using Stencil_T = typename LatticeModel_T::Stencil;
using CommunicationStencil_T =
std::conditional_t< LatticeModel_T::Stencil::D == uint_t(2), stencil::D2Q9, stencil::D3Q27 >;
using PdfField_T = lbm::PdfField< LatticeModel_T >;
// boundary
using NoSlip_T = lbm::NoSlip< LatticeModel_T, flag_t >;
using FreeSlip_T = lbm::FreeSlip< LatticeModel_T, FlagField_T >;
using UBB_T = lbm::UBB< LatticeModel_T, flag_t >;
using Pressure_T = walberla::free_surface::SimplePressureWithFreeSurface< LatticeModel_T, FlagField_T >;
using Outlet_T = lbm::Outlet< LatticeModel_T, FlagField_T, 4, 3 >;
using UBB_Inflow_T =
lbm::UBB< LatticeModel_T, flag_t >; // creates interface cells in the direction of the prescribed velocity, i.e.,
// represents an inflow boundary condition
using MovingObstacle_T = lbm_mesapd_coupling::SimpleBB< LatticeModel_T, FlagField_T, ParticleAccessor_T >;
// handling type
using BoundaryHandling_T =
BoundaryHandling< FlagField_T, Stencil_T, NoSlip_T, UBB_T, UBB_Inflow_T, Pressure_T, Pressure_T, Outlet_T,
FreeSlip_T,
MovingObstacle_T >; // 2 pressure boundaries with different densities, e.g., inflow-outflow
using FlagInfo_T = walberla::free_surface::FlagInfo< FlagField_T >;
// constructor
AntidunesBoundaryHandling(const std::shared_ptr< StructuredBlockForest >& blockForest, BlockDataID pdfFieldID,
BlockDataID fillLevelID, BlockDataID particleFieldID,
const shared_ptr< ParticleAccessor_T >& ac,
std::function< real_t(const Vector3< real_t >&) > hydrostaticDensityFct);
// initialize fluid field from config file using (quotes indicate the string to be used in the file):
// - "CellInterval" (see src/geometry/initializer/BoundaryFromCellInterval.h)
// - "Border" (see src/geometry/initializer/BoundaryFromDomainBorder.h)
// - "Image" (see src/geometry/initializer/BoundaryFromImage.h)
// - "Body" (see src/geometry/initializer/OverlapFieldFromBody.h)
inline void initFromConfig(const Config::BlockHandle& block);
// initialize free surface object from geometric body (see src/geometry/initializer/OverlapFieldFromBody.h)
template< typename Body_T >
inline void addFreeSurfaceObject(const Body_T& body, bool addOrSubtract = false);
// clear and initialize flags in every cell according to the fill level and initialize fill level in boundaries (with
// value 1) and obstacles such that the bubble model does not detect obstacles as gas cells
void initFlagsFromFillLevel();
inline void setNoSlipAtBorder(stencil::Direction d, cell_idx_t wallDistance = cell_idx_c(0));
inline void setNoSlipAtAllBorders(cell_idx_t wallDistance = cell_idx_c(0));
void setNoSlipInCell(const Cell& globalCell);
inline void setFreeSlipAtBorder(stencil::Direction d, cell_idx_t wallDistance = cell_idx_c(0));
inline void setFreeSlipAtAllBorders(cell_idx_t wallDistance = cell_idx_c(0));
void setFreeSlipInCell(const Cell& globalCell);
void setUBBInCell(const Cell& globalCell, const Vector3< real_t >& velocity);
// UBB that generates interface cells to resemble an inflow boundary
void setInflowInCell(const Cell& globalCell, const Vector3< real_t >& velocity);
inline void setPressure(real_t density);
void setPressureOutflow(real_t density);
void setBodyForce(const Vector3< real_t >& bodyForce);
void enableBubbleOutflow(BubbleModelBase* bubbleModel);
// checks if an obstacle cell is located in an outermost ghost layer (corners are explicitly ignored, as they do not
// influence periodic communication)
Vector3< bool > isObstacleInGlobalGhostLayer();
// flag management
const walberla::free_surface::FlagInfo< FlagField_T >& getFlagInfo() const { return flagInfo_; }
// flag IDs
static const field::FlagUID noSlipFlagID;
static const field::FlagUID ubbFlagID;
static const field::FlagUID ubbInflowFlagID;
static const field::FlagUID pressureFlagID;
static const field::FlagUID pressureOutflowFlagID;
static const field::FlagUID outletFlagID;
static const field::FlagUID freeSlipFlagID;
static const field::FlagUID movingObstacleFlagID;
// boundary IDs
static const BoundaryUID noSlipBoundaryID;
static const BoundaryUID ubbBoundaryID;
static const BoundaryUID ubbInflowBoundaryID;
static const BoundaryUID pressureBoundaryID;
static const BoundaryUID pressureOutflowBoundaryID;
static const BoundaryUID outletBoundaryID;
static const BoundaryUID freeSlipBoundaryID;
static const BoundaryUID movingObstacleBoundaryID;
inline BlockDataID getHandlingID() const { return handlingID_; }
inline BlockDataID getPdfFieldID() const { return pdfFieldID_; }
inline BlockDataID getFillFieldID() const { return fillFieldID_; }
inline BlockDataID getFlagFieldID() const { return flagFieldID_; }
inline BlockDataID getParticleFieldID() const { return particleFieldID_; }
inline std::shared_ptr< StructuredBlockForest > getBlockForest() const { return blockForest_; };
inline shared_ptr< ParticleAccessor_T > getParticleAccessor() const { return ac_; }
inline std::function< real_t(const Vector3< real_t >&) > getHydrostaticDensityFct() const
{
return hydrostaticDensityFct_;
}
// executes standard waLBerla boundary handling
class ExecuteBoundaryHandling
{
public:
ExecuteBoundaryHandling(const BlockDataID& collection) : handlingID_(collection) {}
void operator()(IBlock* const block) const
{
BoundaryHandling_T* const handling = block->getData< BoundaryHandling_T >(handlingID_);
// reset "near boundary" flags
handling->refresh();
(*handling)();
}
protected:
BlockDataID handlingID_;
}; // class ExecuteBoundaryHandling
ExecuteBoundaryHandling getBoundarySweep() const { return ExecuteBoundaryHandling(getHandlingID()); }
private:
walberla::free_surface::FlagInfo< FlagField_T > flagInfo_;
// register standard waLBerla initializers
geometry::initializer::InitializationManager getInitManager();
std::shared_ptr< StructuredBlockForest > blockForest_;
BlockDataID flagFieldID_;
BlockDataID pdfFieldID_;
BlockDataID fillFieldID_;
BlockDataID particleFieldID_;
shared_ptr< ParticleAccessor_T > ac_;
std::function< real_t(const Vector3< real_t >&) > hydrostaticDensityFct_;
BlockDataID handlingID_;
blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > comm_;
}; // class AntidunesBoundaryHandling
} // namespace free_surface
} // namespace antidunes
} // namespace walberla
#include "AntidunesBoundaryHandling.impl.h"