Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 3628 additions and 832 deletions
...@@ -26,7 +26,6 @@ Parameters ...@@ -26,7 +26,6 @@ Parameters
omega 1.92; omega 1.92;
initShearFlow 1; initShearFlow 1;
useGui 0;
} }
/* /*
......
import sympy as sp import sympy as sp
import numpy as np import numpy as np
import pystencils as ps import pystencils as ps
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule from lbmpy.enums import Method, Stencil, SubgridScaleModel
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule, create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import generate_pack_info_from_kernel from pystencils_walberla import generate_pack_info_from_kernel
from lbmpy_walberla import generate_lattice_model, generate_boundary
from pystencils_walberla import CodeGeneration, generate_sweep from pystencils_walberla import CodeGeneration, generate_sweep
from pystencils.data_types import TypedSymbol from pystencils.typing import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
omega = sp.symbols("omega") omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free") omega_free = sp.Symbol("omega_free")
omega_fill = sp.symbols("omega_:10")
compile_time_block_size = False compile_time_block_size = False
if compile_time_block_size: if compile_time_block_size:
...@@ -37,24 +41,42 @@ options_dict = { ...@@ -37,24 +41,42 @@ options_dict = {
'mrt': { 'mrt': {
'method': 'mrt', 'method': 'mrt',
'stencil': 'D3Q19', 'stencil': 'D3Q19',
'relaxation_rates': [omega, 1.3, 1.4, omega, 1.2, 1.1], 'relaxation_rates': [omega, 1.3, 1.4, 1.2, 1.1, 1.15, 1.234, 1.4235],
},
'mrt_full': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2],
omega_fill[3], omega_fill[4], omega_fill[5]],
}, },
'entropic': { 'entropic': {
'method': 'mrt', 'method': 'mrt',
'stencil': 'D3Q19', 'stencil': 'D3Q19',
'compressible': True, 'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free], 'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free, omega_free],
'entropic': True,
},
'entropic_kbc_n4': {
'method': 'trt-kbc-n4',
'stencil': 'D3Q27',
'compressible': True,
'relaxation_rates': [omega, omega_free],
'entropic': True, 'entropic': True,
}, },
'smagorinsky': { 'smagorinsky': {
'method': 'srt', 'method': 'srt',
'stencil': 'D3Q19', 'stencil': 'D3Q19',
'smagorinsky': True, 'subgrid_scale_model': SubgridScaleModel.SMAGORINSKY,
'relaxation_rate': omega, 'relaxation_rate': omega,
} },
'cumulant': {
'method': 'cumulant',
'stencil': 'D3Q19',
'compressible': True,
'relaxation_rate': omega,
},
} }
info_header = """ info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q}; #include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}"; const char * infoStencil = "{stencil}";
...@@ -63,22 +85,37 @@ const bool infoCseGlobal = {cse_global}; ...@@ -63,22 +85,37 @@ const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs}; const bool infoCsePdfs = {cse_pdfs};
""" """
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
accessors = { accessor = StreamPullTwoFieldsAccessor()
'Even': AAEvenTimeStepAccessor(), # accessor = StreamPushTwoFieldsAccessor()
'Odd': AAOddTimeStepAccessor() assert not accessor.is_inplace, "This app does not work for inplace accessors"
}
common_options = { common_options = {
'field_name': 'pdfs', 'field_name': 'pdfs',
'temporary_field_name': 'pdfs_tmp',
'kernel_type': accessor,
'optimization': {'cse_global': True, 'optimization': {'cse_global': True,
'cse_pdfs': False, 'cse_pdfs': False}
'field_layout': 'fzyx',
}
} }
options = options_dict.get(ctx.config, options_dict['srt']) config_name = ctx.config
noopt = False
d3q27 = False
if config_name.endswith("_noopt"):
noopt = True
config_name = config_name[:-len("_noopt")]
if config_name.endswith("_d3q27"):
d3q27 = True
config_name = config_name[:-len("_d3q27")]
options = options_dict[config_name]
options.update(common_options) options.update(common_options)
options = options.copy()
if noopt:
options['optimization']['cse_global'] = False
options['optimization']['cse_pdfs'] = False
if d3q27:
options['stencil'] = 'D3Q27'
stencil_str = options['stencil'] stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q') + 1:]) q = int(stencil_str[stencil_str.find('Q') + 1:])
...@@ -86,37 +123,48 @@ with CodeGeneration() as ctx: ...@@ -86,37 +123,48 @@ with CodeGeneration() as ctx:
options['optimization']['symbolic_field'] = pdfs options['optimization']['symbolic_field'] = pdfs
vp = [ vp = [
('double', 'omega_0'),
('double', 'omega_1'),
('double', 'omega_2'),
('double', 'omega_3'),
('double', 'omega_4'),
('double', 'omega_5'),
('double', 'omega_6'),
('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize0'),
('int32_t', 'cudaBlockSize1') ('int32_t', 'cudaBlockSize1'),
] ]
lb_method = create_lb_method(**options) lb_method = create_lb_method(**options)
update_rule = create_lb_update_rule(lb_method=lb_method, **options)
# Kernels if not noopt:
options_without_opt = options.copy()
del options_without_opt['optimization']
update_rules = {}
for name, accessor in accessors.items():
update_rule = create_lb_update_rule(lb_method=lb_method, kernel_type=accessor, **options)
update_rule = insert_fast_divisions(update_rule) update_rule = insert_fast_divisions(update_rule)
update_rule = insert_fast_sqrts(update_rule) update_rule = insert_fast_sqrts(update_rule)
update_rules[name] = update_rule
generate_sweep(ctx, 'UniformGridGPU_AA_LbKernel' + name, update_rule, # CPU lattice model - required for macroscopic value computation, VTK output etc.
inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params, options_without_opt = options.copy()
varying_parameters=vp) del options_without_opt['optimization']
generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', create_lb_collision_rule(lb_method=lb_method,
**options_without_opt))
# gpu LB sweep & boundaries
generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule,
field_swaps=[('pdfs', 'pdfs_tmp')],
inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params,
varying_parameters=vp)
generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu')
generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu')
# getter & setter # getter & setter
setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector, setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1.0) pdfs=pdfs.center_vector, density=1.0)
getter_assignments = macroscopic_values_getter(lb_method, velocity=velocity_field.center_vector, getter_assignments = macroscopic_values_getter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None) pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'UniformGridGPU_AA_MacroSetter', setter_assignments) generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments)
generate_sweep(ctx, 'UniformGridGPU_AA_MacroGetter', getter_assignments) generate_sweep(ctx, 'UniformGridGPU_MacroGetter', getter_assignments)
# communication # communication
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPull', update_rules['Odd'], generate_pack_info_from_kernel(ctx, 'UniformGridGPU_PackInfo', update_rule, target='gpu')
kind='pull', target='gpu')
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPush', update_rules['Odd'],
kind='push', target='gpu')
infoHeaderParams = { infoHeaderParams = {
'stencil': stencil_str, 'stencil': stencil_str,
...@@ -125,4 +173,4 @@ with CodeGeneration() as ctx: ...@@ -125,4 +173,4 @@ with CodeGeneration() as ctx:
'cse_global': int(options['optimization']['cse_global']), 'cse_global': int(options['optimization']['cse_global']),
'cse_pdfs': int(options['optimization']['cse_pdfs']), 'cse_pdfs': int(options['optimization']['cse_pdfs']),
} }
ctx.write_file("UniformGridGPU_AA_Defines.h", info_header.format(**infoHeaderParams)) ctx.write_file("UniformGridGPU_Defines.h", info_header.format(**infoHeaderParams))
...@@ -7,9 +7,9 @@ ...@@ -7,9 +7,9 @@
#include "blockforest/communication/UniformDirectScheme.h" #include "blockforest/communication/UniformDirectScheme.h"
#include "field/communication/StencilRestrictedMPIDatatypeInfo.h" #include "field/communication/StencilRestrictedMPIDatatypeInfo.h"
#include "field/communication/UniformMPIDatatypeInfo.h" #include "field/communication/UniformMPIDatatypeInfo.h"
#include "cuda/communication/GPUPackInfo.h" #include "gpu/communication/GPUPackInfo.h"
#include "cuda/communication/UniformGPUScheme.h" #include "gpu/communication/UniformGPUScheme.h"
#include "cuda/communication/MemcpyPackInfo.h" #include "gpu/communication/MemcpyPackInfo.h"
#include "UniformGridGPU_PackInfo.h" #include "UniformGridGPU_PackInfo.h"
...@@ -36,28 +36,28 @@ public: ...@@ -36,28 +36,28 @@ public:
_gpuCommunicationScheme(nullptr), _directScheme(nullptr) _gpuCommunicationScheme(nullptr), _directScheme(nullptr)
{ {
auto generatedPackInfo = make_shared<pystencils::UniformGridGPU_PackInfo>( bdId ); auto generatedPackInfo = make_shared<pystencils::UniformGridGPU_PackInfo>( bdId );
auto memcpyPackInfo = make_shared< cuda::communication::MemcpyPackInfo< GPUFieldType > >( bdId ); auto memcpyPackInfo = make_shared< gpu::communication::MemcpyPackInfo< GPUFieldType > >( bdId );
auto dataTypeInfo = make_shared< field::communication::StencilRestrictedMPIDatatypeInfo< GPUFieldType, StencilType > >( bdId ); auto dataTypeInfo = make_shared< field::communication::StencilRestrictedMPIDatatypeInfo< GPUFieldType, StencilType > >( bdId );
auto dataTypeInfoFull = make_shared< field::communication::UniformMPIDatatypeInfo<GPUFieldType> >( bdId ); auto dataTypeInfoFull = make_shared< field::communication::UniformMPIDatatypeInfo<GPUFieldType> >( bdId );
switch(_commSchemeType) switch(_commSchemeType)
{ {
case GPUPackInfo_Baseline: case GPUPackInfo_Baseline:
_gpuPackInfo = make_shared< cuda::communication::GPUPackInfo< GPUFieldType > >( bdId ); _gpuPackInfo = make_shared< gpu::communication::GPUPackInfo< GPUFieldType > >( bdId );
_cpuCommunicationScheme = make_shared< blockforest::communication::UniformBufferedScheme< StencilType > >( bf ); _cpuCommunicationScheme = make_shared< blockforest::communication::UniformBufferedScheme< StencilType > >( bf );
_cpuCommunicationScheme->addPackInfo( _gpuPackInfo ); _cpuCommunicationScheme->addPackInfo( _gpuPackInfo );
break; break;
case GPUPackInfo_Streams: case GPUPackInfo_Streams:
_gpuPackInfo = make_shared< cuda::communication::GPUPackInfo< GPUFieldType > >( bdId ); _gpuPackInfo = make_shared< gpu::communication::GPUPackInfo< GPUFieldType > >( bdId );
_cpuCommunicationScheme = make_shared< blockforest::communication::UniformBufferedScheme< StencilType > >( bf ); _cpuCommunicationScheme = make_shared< blockforest::communication::UniformBufferedScheme< StencilType > >( bf );
_cpuCommunicationScheme->addPackInfo( _gpuPackInfo ); _cpuCommunicationScheme->addPackInfo( _gpuPackInfo );
break; break;
case UniformGPUScheme_Baseline: case UniformGPUScheme_Baseline:
_gpuCommunicationScheme = make_shared< cuda::communication::UniformGPUScheme< StencilType > >( bf, cudaEnabledMPI ); _gpuCommunicationScheme = make_shared< gpu::communication::UniformGPUScheme< StencilType > >( bf, cudaEnabledMPI );
_gpuCommunicationScheme->addPackInfo( generatedPackInfo ); _gpuCommunicationScheme->addPackInfo( generatedPackInfo );
break; break;
case UniformGPUScheme_Memcpy: case UniformGPUScheme_Memcpy:
_gpuCommunicationScheme = make_shared< cuda::communication::UniformGPUScheme< StencilType > >( bf, cudaEnabledMPI ); _gpuCommunicationScheme = make_shared< gpu::communication::UniformGPUScheme< StencilType > >( bf, cudaEnabledMPI );
_gpuCommunicationScheme->addPackInfo( memcpyPackInfo ); _gpuCommunicationScheme->addPackInfo( memcpyPackInfo );
break; break;
case MPIDatatypes: case MPIDatatypes:
...@@ -151,7 +151,7 @@ public: ...@@ -151,7 +151,7 @@ public:
private: private:
CommunicationSchemeType _commSchemeType; CommunicationSchemeType _commSchemeType;
shared_ptr< blockforest::communication::UniformBufferedScheme< StencilType > > _cpuCommunicationScheme; shared_ptr< blockforest::communication::UniformBufferedScheme< StencilType > > _cpuCommunicationScheme;
shared_ptr< cuda::communication::GPUPackInfo< GPUFieldType > > _gpuPackInfo; shared_ptr< gpu::communication::GPUPackInfo< GPUFieldType > > _gpuPackInfo;
shared_ptr< cuda::communication::UniformGPUScheme< StencilType > > _gpuCommunicationScheme; shared_ptr< gpu::communication::UniformGPUScheme< StencilType > > _gpuCommunicationScheme;
shared_ptr< blockforest::communication::UniformDirectScheme<StencilType> > _directScheme; shared_ptr< blockforest::communication::UniformDirectScheme<StencilType> > _directScheme;
}; };
#!/usr/bin/env python3
"""
This is a waLBerla parameter file that tests (almost) all parameter combinations for GPU communication.
Build waLBerla with -DWALBERLA_BUILD_WITH_PYTHON=1 then run e.g.
./UniformGridGPU_d3q27_aa_srt simulation_setup/benchmark_configs.py
Look at the end of the file to select the benchmark to run
"""
import os
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
from copy import deepcopy
from functools import reduce
import operator
import sys
import sqlite3
# Number of time steps run for a workload of 128^3 per GPU
# if double as many cells are on the GPU, half as many time steps are run etc.
# increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = 200
DB_FILE = "gpu_benchmark.sqlite3"
BASE_CONFIG = {
'DomainSetup': {
'cellsPerBlock': (256, 128, 128),
'periodic': (1, 1, 1),
},
'Parameters': {
'omega': 1.8,
'cudaEnabledMPI': False,
'warmupSteps': 5,
'outerIterations': 3,
}
}
def prod(seq):
return reduce(operator.mul, seq, 1)
def num_time_steps(block_size, time_steps_for_128_block=200):
cells = block_size[0] * block_size[1] * block_size[2]
time_steps = (128 ** 3 / cells) * time_steps_for_128_block
return int(time_steps)
def cuda_block_size_ok(block_size, regs_per_threads=168):
"""Checks if a given CUDA block size does not exceed the SM register limit.
168 registers per thread was obtained using cuobjdump on both SRT and Cumulant
kernels. You might want to validate that for your own kernels."""
return prod(block_size) * regs_per_threads < 64 * (2 ** 10)
def domain_block_size_ok(block_size, total_mem, gls=1, q=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
return prod(b + 2 * gls for b in block_size) * q * size_per_value < total_mem
class Scenario:
def __init__(self, cells_per_block=(256, 128, 128), db_file=DB_FILE, **kwargs):
self.db_file = db_file
self.config_dict = deepcopy(BASE_CONFIG)
self.config_dict['Parameters'].update(kwargs)
self.config_dict['DomainSetup']['blocks'] = block_decomposition(wlb.mpi.numProcesses())
self.config_dict['DomainSetup']['cellsPerBlock'] = cells_per_block
@wlb.member_callback
def config(self):
from pprint import pformat
wlb.log_info_on_root("Scenario:\n" + pformat(self.config_dict))
# Write out the configuration as text-based prm:
# print(toPrm(self.config_dict))
return self.config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, "runs", self.db_file)
storeSingle(result, "runs", self.db_file)
break
except sqlite3.OperationalError as e:
wlb.log_warning("Sqlite DB writing failed: try {}/{} {}".format(num_try + 1, num_tries, str(e)))
# -------------------------------------- Functions trying different parameter sets -----------------------------------
def overlap_benchmark():
"""Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running different communication overlap strategies")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
inner_outer_splits = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
# 'GPUPackInfo_Baseline', 'GPUPackInfo_Streams'
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']:
# no overlap
scenarios.add(Scenario(timeStepStrategy='noOverlap', communicationScheme=comm_strategy,
innerOuterSplit=(1, 1, 1)))
# overlap
for overlap_strategy in ['simpleOverlap', 'complexOverlap']:
for inner_outer_split in inner_outer_splits:
scenario = Scenario(timeStepStrategy=overlap_strategy,
communicationScheme=comm_strategy,
innerOuterSplit=inner_outer_split,
timesteps=num_time_steps((256, 128, 128)))
scenarios.add(scenario)
def communication_compare():
"""Tests different communication strategies"""
wlb.log_info_on_root("Running benchmarks to compare communication strategies")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128), (32, 32, 32), (64, 64, 64), (256, 256, 256)]:
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']:
sc = Scenario(cells_per_block=block_size,
gpuBlockSize=(128, 1, 1),
timeStepStrategy='noOverlap',
communicationScheme=comm_strategy,
timesteps=num_time_steps(block_size))
scenarios.add(sc)
for inner_outer_split in [(4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1)]:
# ensure that the inner part of the domain is still large enough
if 3 * inner_outer_split[0] > block_size[0]:
continue
sc = Scenario(cells_per_block=block_size,
gpuBlockSize=(128, 1, 1),
timeStepStrategy='simpleOverlap',
innerOuterSplit=inner_outer_split,
communicationScheme=comm_strategy,
timesteps=num_time_steps(block_size))
scenarios.add(sc)
def single_gpu_benchmark():
"""Benchmarks only the LBM compute kernel"""
wlb.log_info_on_root("Running single GPU benchmarks")
wlb.log_info_on_root("")
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
gpu_mem = gpu_mem_gb * (2 ** 30)
gpu_type = os.environ.get('GPU_TYPE')
kwargs = {}
if gpu_type is not None:
kwargs['gpu_type'] = gpu_type
scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (64, 128, 256, 384)] + [(512, 512, 128)]
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1),
(32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
(32, 4, 1), (64, 4, 1), (128, 4, 1),
(32, 8, 1), (64, 8, 1),
(32, 16, 1)]
for block_size in block_sizes:
for cuda_block_size in cuda_blocks:
if not cuda_block_size_ok(cuda_block_size):
wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
continue
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(cells_per_block=block_size,
gpuBlockSize=cuda_block_size,
timeStepStrategy='kernelOnly',
timesteps=num_time_steps(block_size),
**kwargs)
scenarios.add(scenario)
# -------------------------------------- Optional job script generation for PizDaint ---------------------------------
job_script_header = """
#!/bin/bash -l
#SBATCH --job-name=scaling
#SBATCH --time=0:30:00
#SBATCH --nodes={nodes}
#SBATCH -o out_scaling_{nodes}_%j.txt
#SBATCH -e err_scaling_{nodes}_%j.txt
#SBATCH --ntasks-per-core=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --partition=normal
#SBATCH --constraint=gpu
#SBATCH --account=d105
cd {folder}
source ~/env.sh
module load daint-gpu
module load craype-accel-nvidia60
export MPICH_RDMA_ENABLED_CUDA=1 # allow GPU-GPU data transfer
export CRAY_CUDA_MPS=1 # allow GPU sharing
export MPICH_G2G_PIPELINE=256 # adapt maximum number of concurrent in-flight messages
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
export CRAY_CUDA_MPS=1
export MPICH_RANK_REORDER_METHOD=3
export PMI_MMAP_SYNC_WAIT_TIME=300
# grid_order -R -H -c 1,1,8 -g 16,16,8
ulimit -c 0
"""
job_script_exe_part = """
export WALBERLA_SCENARIO_IDX=0
while srun -n {nodes} ./{app} {config}
do
((WALBERLA_SCENARIO_IDX++))
done
"""
all_executables = ('UniformGridBenchmarkGPU_mrt_d3q27',
'UniformGridBenchmarkGPU_smagorinsky_d3q27',
'UniformGridBenchmarkGPU_cumulant'
'UniformGridBenchmarkGPU_cumulant_d3q27')
def generate_jobscripts(exe_names=all_executables):
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 2400]:
folder_name = "scaling_{:04d}".format(node_count)
os.makedirs(folder_name, exist_ok=True)
# run grid_order
import subprocess
decomposition = block_decomposition(node_count)
decomposition_str = ",".join(str(e) for e in decomposition)
subprocess.check_call(['grid_order', '-R', '-H', '-g', decomposition_str])
job_script = job_script_header.format(nodes=node_count, folder=os.path.join(os.getcwd(), folder_name))
for exe in exe_names:
job_script += job_script_exe_part.format(app="../" + exe, nodes=node_count,
config='../communication_compare.py')
with open(os.path.join(folder_name, 'job.sh'), 'w') as f:
f.write(job_script)
if __name__ == '__main__':
print("Called without waLBerla - generating job scripts for PizDaint")
generate_jobscripts()
else:
wlb.log_info_on_root("Batch run of benchmark scenarios, saving result to {}".format(DB_FILE))
# Select the benchmark you want to run
single_gpu_benchmark()
# benchmarks different CUDA block sizes and domain sizes and measures single
# GPU performance of compute kernel (no communication)
# communication_compare(): benchmarks different communication routines, with and without overlap
# overlap_benchmark(): benchmarks different communication overlap options
...@@ -8,6 +8,7 @@ DomainSetup ...@@ -8,6 +8,7 @@ DomainSetup
Parameters Parameters
{ {
initShearFlow False;
cudaEnabledMPI False; // set to true, if MPI was compiled with CUDA cudaEnabledMPI False; // set to true, if MPI was compiled with CUDA
outerIterations 3; // number of measurements to make outerIterations 3; // number of measurements to make
timeStepStrategy simpleOverlap; // one of simpleOverlap, noOverlap, the non-AA version also supports complexOverlap timeStepStrategy simpleOverlap; // one of simpleOverlap, noOverlap, the non-AA version also supports complexOverlap
...@@ -35,3 +36,10 @@ Parameters ...@@ -35,3 +36,10 @@ Parameters
// GPUPackInfo_Baseline: old implementation based on communication mechanism for CPUs // GPUPackInfo_Baseline: old implementation based on communication mechanism for CPUs
// GPUPackInfo_Streams: same as above but with CUDA streams // GPUPackInfo_Streams: same as above but with CUDA streams
} }
Boundaries {
Border { direction T; walldistance -1; flag UBB; }
Border { direction B; walldistance -1; flag NoSlip; }
Border { direction W; walldistance -1; flag NoSlip; }
Border { direction E; walldistance -1; flag NoSlip; }
}
File added
#!/usr/bin/env python3
"""
This is a waLBerla parameter file that tests (almost) all parameter combinations for GPU communication.
Build waLBerla with -DWALBERLA_BUILD_WITH_PYTHON=1 then run e.g.
./UniformGridBenchmarkGPU_AA_trt simulation_setup/benchmark_configs.py
Look at the end of the file to select the benchmark to run
"""
import os import os
import waLBerla as wlb import waLBerla as wlb
from waLBerla.tools.config import block_decomposition from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
from copy import deepcopy
import sys import sys
import sqlite3 import sqlite3
from math import prod
try:
import machinestate as ms
except ImportError:
ms = None
# Number of time steps run for a workload of 128^3 per GPU # Number of time steps run for a workload of 128^3 per GPU
# if double as many cells are on the GPU, half as many time steps are run etc. # if double as many cells are on the GPU, half as many time steps are run etc.
# increase this to get more reliable measurements # increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = 200 TIME_STEPS_FOR_128_BLOCK = 1000
DB_FILE = "gpu_benchmark.sqlite3" DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
BASE_CONFIG = { BASE_CONFIG = {
'DomainSetup': { 'DomainSetup': {
...@@ -34,27 +39,107 @@ BASE_CONFIG = { ...@@ -34,27 +39,107 @@ BASE_CONFIG = {
} }
} }
ldc_setup = {'Border': [
{'direction': 'N', 'walldistance': -1, 'flag': 'UBB'},
{'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
]}
def num_time_steps(block_size):
def num_time_steps(block_size, time_steps_for_128_block=200):
cells = block_size[0] * block_size[1] * block_size[2] cells = block_size[0] * block_size[1] * block_size[2]
time_steps = (128 ** 3 / cells) * TIME_STEPS_FOR_128_BLOCK time_steps = (128 ** 3 / cells) * time_steps_for_128_block
if time_steps < 10:
time_steps = 10
return int(time_steps) return int(time_steps)
def cuda_block_size_ok(block_size, regs_per_threads=168):
"""Checks if a given CUDA block size does not exceed the SM register limit.
168 registers per thread was obtained using cuobjdump on both SRT and Cumulant
kernels. You might want to validate that for your own kernels."""
return prod(block_size) * regs_per_threads < 64 * (2 ** 10)
def domain_block_size_ok(block_size, total_mem, gls=1, q=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
return prod(b + 2 * gls for b in block_size) * q * size_per_value < total_mem
class Scenario: class Scenario:
def __init__(self, cells_per_block=(256, 128, 128), **kwargs): def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blocks=(128, 1, 1),
self.config_dict = deepcopy(BASE_CONFIG) timesteps=None, time_step_strategy="normal", omega=1.8, cuda_enabled_mpi=False,
self.config_dict['Parameters'].update(kwargs) inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3,
self.config_dict['DomainSetup']['blocks'] = block_decomposition(wlb.mpi.numProcesses()) init_shear_flow=False, boundary_setup=False,
self.config_dict['DomainSetup']['cellsPerBlock'] = cells_per_block vtk_write_frequency=0, remaining_time_logger_frequency=-1,
additional_info=None, blocks=None, db_file_name=None):
if boundary_setup:
init_shear_flow = False
periodic = (0, 0, 0)
self.blocks = blocks if blocks else block_decomposition(wlb.mpi.numProcesses())
self.cells_per_block = cells_per_block
self.periodic = periodic
self.time_step_strategy = time_step_strategy
self.omega = omega
self.timesteps = timesteps if timesteps else num_time_steps(cells_per_block)
self.cuda_enabled_mpi = cuda_enabled_mpi
self.inner_outer_split = inner_outer_split
self.init_shear_flow = init_shear_flow
self.boundary_setup = boundary_setup
self.warmup_steps = warmup_steps
self.outer_iterations = outer_iterations
self.cuda_blocks = cuda_blocks
self.vtk_write_frequency = vtk_write_frequency
self.remaining_time_logger_frequency = remaining_time_logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
self.additional_info = additional_info
@wlb.member_callback @wlb.member_callback
def config(self, **kwargs): def config(self, print_dict=True):
from pprint import pformat from pprint import pformat
wlb.log_info_on_root("Scenario:\n" + pformat(self.config_dict)) config_dict = {
# Write out the configuration as text-based prm: 'DomainSetup': {
# print(toPrm(self.config_dict)) 'blocks': self.blocks,
return self.config_dict 'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'omega': self.omega,
'cudaEnabledMPI': self.cuda_enabled_mpi,
'warmupSteps': self.warmup_steps,
'outerIterations': self.outer_iterations,
'timeStepStrategy': self.time_step_strategy,
'timesteps': self.timesteps,
'initShearFlow': self.init_shear_flow,
'gpuBlockSize': self.cuda_blocks,
'innerOuterSplit': self.inner_outer_split,
'vtkWriteFrequency': self.vtk_write_frequency,
'remainingTimeLoggerFrequency': self.remaining_time_logger_frequency
},
'Logging': {
'logLevel': 'info', # info progress detail tracing
}
}
if self.boundary_setup:
config_dict["Boundaries"] = ldc_setup
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
if self.additional_info:
wlb.log_info_on_root("Additional Info:\n" + pformat(self.additional_info))
return config_dict
@wlb.member_callback @wlb.member_callback
def results_callback(self, **kwargs): def results_callback(self, **kwargs):
...@@ -62,23 +147,53 @@ class Scenario: ...@@ -62,23 +147,53 @@ class Scenario:
data.update(self.config_dict['Parameters']) data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup']) data.update(self.config_dict['DomainSetup'])
data.update(kwargs) data.update(kwargs)
if self.additional_info is not None:
data.update(self.additional_info)
data['executable'] = sys.argv[0] data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data) sequenceValuesToScalars(data)
result = data result = data
sequenceValuesToScalars(result) sequenceValuesToScalars(result)
num_tries = 4 num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running # check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs_{data['stencil']}_{data['streamingPattern']}_{data['collisionSetup']}_{prod(self.blocks)}"
table_name = table_name.replace("-", "_") # - not allowed for table name would lead to syntax error
for num_try in range(num_tries): for num_try in range(num_tries):
try: try:
checkAndUpdateSchema(result, "runs", DB_FILE) checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, "runs", DB_FILE) storeSingle(result, table_name, self.db_file_name)
break break
except sqlite3.OperationalError as e: except sqlite3.OperationalError as e:
wlb.log_warning("Sqlite DB writing failed: try {}/{} {}".format(num_try + 1, num_tries, str(e))) wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
# -------------------------------------- Profiling -----------------------------------
def profiling():
"""Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running 2 timesteps for profiling")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
cells = (128, 128, 128)
cuda_enabled_mpi = False
scenarios.add(Scenario(cells_per_block=cells, time_step_strategy='kernelOnly',
inner_outer_split=(1, 1, 1), timesteps=2, cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1, warmup_steps=0))
# -------------------------------------- Functions trying different parameter sets ----------------------------------- # -------------------------------------- Functions trying different parameter sets -----------------------------------
...@@ -90,158 +205,153 @@ def overlap_benchmark(): ...@@ -90,158 +205,153 @@ def overlap_benchmark():
wlb.log_info_on_root("") wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
cuda_enabled_mpi = False
inner_outer_splits = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1), inner_outer_splits = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1), (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)] (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
# 'GPUPackInfo_Baseline', 'GPUPackInfo_Streams' # no overlap
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']: scenarios.add(Scenario(time_step_strategy='noOverlap',
# no overlap inner_outer_split=(1, 1, 1),
scenarios.add(Scenario(timeStepStrategy='noOverlap', communicationScheme=comm_strategy, cuda_enabled_mpi=cuda_enabled_mpi,
innerOuterSplit=(1, 1, 1))) outer_iterations=1))
# overlap
for overlap_strategy in ['simpleOverlap', 'complexOverlap']:
for inner_outer_split in inner_outer_splits:
scenario = Scenario(timeStepStrategy=overlap_strategy,
communicationScheme=comm_strategy,
innerOuterSplit=inner_outer_split,
timesteps=num_time_steps((256, 128, 128)))
scenarios.add(scenario)
def communication_compare():
"""Tests different communication strategies"""
wlb.log_info_on_root("Running benchmarks to compare communication strategies")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager() for inner_outer_split in inner_outer_splits:
for block_size in [(128, 128, 128), (32, 32, 32), (64, 64, 64), (256, 256, 256)]: scenario = Scenario(time_step_strategy='simpleOverlap',
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']: inner_outer_split=inner_outer_split,
cuda_enabled_mpi=cuda_enabled_mpi,
sc = Scenario(cells_per_block=block_size, outer_iterations=1)
gpuBlockSize=(128, 1, 1), scenarios.add(scenario)
timeStepStrategy='noOverlap',
communicationScheme=comm_strategy,
timesteps=num_time_steps(block_size))
scenarios.add(sc)
for inner_outer_split in [(4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1)]:
# ensure that the inner part of the domain is still large enough
if 3 * inner_outer_split[0] > block_size[0]:
continue
sc = Scenario(cells_per_block=block_size,
gpuBlockSize=(128, 1, 1),
timeStepStrategy='simpleOverlap',
innerOuterSplit=inner_outer_split,
communicationScheme=comm_strategy,
timesteps=num_time_steps(block_size))
scenarios.add(sc)
def single_gpu_benchmark(): def no_overlap_scaling(cuda_enabled_mpi=False):
"""Benchmarks only the LBM compute kernel""" """Tests different communication overlapping strategies"""
wlb.log_info_on_root("Running single GPU benchmarks") wlb.log_info_on_root("Running scaling benchmark without communication hiding")
wlb.log_info_on_root("") wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (64, 128, 256, 384)] + [(512, 512, 128)] # no overlap
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1), scenarios.add(Scenario(cells_per_block=(256, 256, 256),
(32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1), cuda_blocks=(128, 1, 1),
(32, 4, 1), (64, 4, 1), (128, 4, 1), time_step_strategy='noOverlap',
(32, 8, 1), (64, 8, 1), inner_outer_split=(1, 1, 1),
(32, 16, 1)] cuda_enabled_mpi=cuda_enabled_mpi,
for block_size in block_sizes: outer_iterations=1))
for cuda_block_size in cuda_blocks:
scenario = Scenario(cells_per_block=block_size,
gpuBlockSize=cuda_block_size,
timeStepStrategy='kernelOnly',
timesteps=num_time_steps(block_size))
scenarios.add(scenario)
# -------------------------------------- Optional job script generation for PizDaint ---------------------------------
job_script_header = """
#!/bin/bash -l
#SBATCH --job-name=scaling
#SBATCH --time=0:30:00
#SBATCH --nodes={nodes}
#SBATCH -o out_scaling_{nodes}_%j.txt
#SBATCH -e err_scaling_{nodes}_%j.txt
#SBATCH --ntasks-per-core=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --partition=normal
#SBATCH --constraint=gpu
#SBATCH --account=d105
cd {folder} def weak_scaling_overlap(cuda_enabled_mpi=False):
"""Tests different communication overlapping strategies"""
source ~/env.sh wlb.log_info_on_root("Running scaling benchmark with communication hiding")
wlb.log_info_on_root("")
module load daint-gpu
module load craype-accel-nvidia60
export MPICH_RDMA_ENABLED_CUDA=1 # allow GPU-GPU data transfer
export CRAY_CUDA_MPS=1 # allow GPU sharing
export MPICH_G2G_PIPELINE=256 # adapt maximum number of concurrent in-flight messages
export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK scenarios = wlb.ScenarioManager()
export CRAY_CUDA_MPS=1
export MPICH_RANK_REORDER_METHOD=3 # overlap
export PMI_MMAP_SYNC_WAIT_TIME=300 for t in ["noOverlap", "simpleOverlap"]:
scenarios.add(Scenario(cells_per_block=(WeakX, WeakY, WeakZ),
cuda_blocks=(128, 1, 1),
time_step_strategy=t,
inner_outer_split=(8, 8, 8),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
boundary_setup=True,
db_file_name="weakScalingUniformGrid.sqlite3"))
# grid_order -R -H -c 1,1,8 -g 16,16,8 def strong_scaling_overlap(cuda_enabled_mpi=False):
wlb.log_info_on_root("Running strong scaling benchmark with one block per proc with communication hiding")
wlb.log_info_on_root("")
ulimit -c 0 scenarios = wlb.ScenarioManager()
"""
job_script_exe_part = """ domain_size = (StrongX, StrongY, StrongZ)
blocks = block_decomposition(wlb.mpi.numProcesses())
cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
export WALBERLA_SCENARIO_IDX=0 # overlap
while srun -n {nodes} ./{app} {config} for t in ["noOverlap", "simpleOverlap"]:
do scenarios.add(Scenario(cells_per_block=cells_per_block,
((WALBERLA_SCENARIO_IDX++)) cuda_blocks=(128, 1, 1),
done time_step_strategy=t,
""" inner_outer_split=(1, 1, 1),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
timesteps=50,
blocks=blocks,
boundary_setup=True,
db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
all_executables = ('UniformGridBenchmarkGPU_mrt_d3q27', def single_gpu_benchmark():
'UniformGridBenchmarkGPU_smagorinsky_d3q27', """Benchmarks only the LBM compute kernel"""
'UniformGridBenchmarkGPU_cumulant' wlb.log_info_on_root("Running single GPU benchmarks")
'UniformGridBenchmarkGPU_cumulant_d3q27') wlb.log_info_on_root("")
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
gpu_type = os.environ.get('GPU_TYPE')
def generate_jobscripts(exe_names=all_executables): additional_info = {}
for node_count in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 2400]: if gpu_type is not None:
folder_name = "scaling_{:04d}".format(node_count) additional_info['gpu_type'] = gpu_type
os.makedirs(folder_name, exist_ok=True)
# run grid_order scenarios = wlb.ScenarioManager()
import subprocess block_sizes = [(i, i, i) for i in (128, 256, 320)]
decomposition = block_decomposition(node_count) cuda_blocks = [(128, 1, 1), ]
decomposition_str = ",".join(str(e) for e in decomposition) for block_size in block_sizes:
subprocess.check_call(['grid_order', '-R', '-H', '-g', decomposition_str]) for cuda_block_size in cuda_blocks:
# cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
if cuda_block_size > block_size:
continue
if not cuda_block_size_ok(cuda_block_size):
wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
continue
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(cells_per_block=block_size,
cuda_blocks=cuda_block_size,
time_step_strategy='kernelOnly',
timesteps=num_time_steps(block_size, 2000),
outer_iterations=1,
additional_info=additional_info)
scenarios.add(scenario)
job_script = job_script_header.format(nodes=node_count, folder=os.path.join(os.getcwd(), folder_name))
for exe in exe_names:
job_script += job_script_exe_part.format(app="../" + exe, nodes=node_count,
config='../communication_compare.py')
with open(os.path.join(folder_name, 'job.sh'), 'w') as f: def validation_run():
f.write(job_script) """Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
wlb.log_info_on_root("")
time_step_strategy = "noOverlap" # "simpleOverlap"
if __name__ == '__main__': scenarios = wlb.ScenarioManager()
print("Called without waLBerla - generating job scripts for PizDaint") scenario = Scenario(cells_per_block=(128, 128, 128),
generate_jobscripts() time_step_strategy=time_step_strategy,
else: timesteps=10001,
wlb.log_info_on_root("Batch run of benchmark scenarios, saving result to {}".format(DB_FILE)) outer_iterations=1,
# Select the benchmark you want to run warmup_steps=0,
init_shear_flow=False,
boundary_setup=True,
vtk_write_frequency=5000,
remaining_time_logger_frequency=30)
scenarios.add(scenario)
wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FILE}")
# Select the benchmark you want to run
# single_gpu_benchmark() # benchmarks different CUDA block sizes and domain sizes and measures single GPU
# performance of compute kernel (no communication)
# overlap_benchmark() # benchmarks different communication overlap options
# profiling() # run only two timesteps on a smaller domain for profiling only
# validation_run()
if BENCHMARK == 0:
single_gpu_benchmark() single_gpu_benchmark()
# benchmarks different CUDA block sizes and domain sizes and measures single elif BENCHMARK == 1:
# GPU performance of compute kernel (no communication) weak_scaling_overlap(True)
# communication_compare(): benchmarks different communication routines, with and without overlap elif BENCHMARK == 2:
# overlap_benchmark(): benchmarks different communication overlap options strong_scaling_overlap(True)
else:
validation_run()
import os
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
import sys
import sqlite3
from math import prod
try:
import machinestate as ms
except ImportError:
ms = None
# Number of time steps run for a workload of 128^3 per GPU
# if double as many cells are on the GPU, half as many time steps are run etc.
# increase this to get more reliable measurements
TIME_STEPS_FOR_128_BLOCK = int(os.environ.get('TIME_STEPS_FOR_128_BLOCK', 1000))
DB_FILE = os.environ.get('DB_FILE', "gpu_benchmark.sqlite3")
BENCHMARK = int(os.environ.get('BENCHMARK', 0))
WeakX = int(os.environ.get('WeakX', 128))
WeakY = int(os.environ.get('WeakY', 128))
WeakZ = int(os.environ.get('WeakZ', 128))
StrongX = int(os.environ.get('StrongX', 128))
StrongY = int(os.environ.get('StrongY', 128))
StrongZ = int(os.environ.get('StrongZ', 128))
BASE_CONFIG = {
'DomainSetup': {
'cellsPerBlock': (256, 128, 128),
'periodic': (1, 1, 1),
},
'Parameters': {
'omega': 1.8,
'cudaEnabledMPI': False,
'warmupSteps': 5,
'outerIterations': 3,
}
}
ldc_setup = {'Border': [
{'direction': 'N', 'walldistance': -1, 'flag': 'UBB'},
{'direction': 'W', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'E', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'S', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'B', 'walldistance': -1, 'flag': 'NoSlip'},
{'direction': 'T', 'walldistance': -1, 'flag': 'NoSlip'},
]}
def num_time_steps(block_size, time_steps_for_128_block=TIME_STEPS_FOR_128_BLOCK):
"""
Calculate the number of time steps based on the block size.
This function computes the number of time steps required for a given block size
by scaling the time steps that could be executed on one process within one second
for a 128x128x128 cells_per_block to the given cells_per_block size.
Parameters:
block_size (tuple): A tuple of three integers representing the dimensions of the cells_per_block (x, y, z).
time_steps_for_128_block (int, optional): The number of time steps for a 128x128x128 block. Default is 1000,
which is approximately the number of timesteps that were executed within one second on one entire MareNostrum5-acc node.
Returns:
int: The calculated number of time steps, with a minimum value of 10.
"""
cells = block_size[0] * block_size[1] * block_size[2]
time_steps = (128 ** 3 / cells) * time_steps_for_128_block
if time_steps < 10:
time_steps = 10
return int(time_steps)
def cuda_block_size_ok(block_size, regs_per_threads=168):
"""
Checks if a given CUDA block size does not exceed the SM register limit.
168 registers per thread was obtained using cuobjdump on both SRT and Cumulant
kernels. You might want to validate that for your own kernels.
"""
return prod(block_size) * regs_per_threads < 64 * (2 ** 10)
def domain_block_size_ok(block_size, total_mem, gls=1, q=27, size_per_value=8):
"""
Checks if a single block of given size fits into GPU memory.
"""
return prod(b + 2 * gls for b in block_size) * q * size_per_value < total_mem
class Scenario:
def __init__(self, cells_per_block=(256, 128, 128), periodic=(1, 1, 1), cuda_blocks=(128, 1, 1),
timesteps=None, time_step_strategy="normal", omega=1.8, cuda_enabled_mpi=False,
inner_outer_split=(1, 1, 1), warmup_steps=5, outer_iterations=3,
init_shear_flow=False, boundary_setup=False,
vtk_write_frequency=0, remaining_time_logger_frequency=-1,
additional_info=None, blocks=None, db_file_name=None):
if boundary_setup:
init_shear_flow = False
periodic = (0, 0, 0)
self.blocks = blocks if blocks else block_decomposition(wlb.mpi.numProcesses())
self.cells_per_block = cells_per_block
self.periodic = periodic
self.time_step_strategy = time_step_strategy
self.omega = omega
self.timesteps = timesteps if timesteps else num_time_steps(cells_per_block)
self.cuda_enabled_mpi = cuda_enabled_mpi
self.inner_outer_split = inner_outer_split
self.init_shear_flow = init_shear_flow
self.boundary_setup = boundary_setup
self.warmup_steps = warmup_steps
self.outer_iterations = outer_iterations
self.cuda_blocks = cuda_blocks
self.vtk_write_frequency = vtk_write_frequency
self.remaining_time_logger_frequency = remaining_time_logger_frequency
self.db_file_name = DB_FILE if db_file_name is None else db_file_name
self.config_dict = self.config(print_dict=False)
self.additional_info = additional_info
@wlb.member_callback
def config(self, print_dict=True):
from pprint import pformat
config_dict = {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'omega': self.omega,
'cudaEnabledMPI': self.cuda_enabled_mpi,
'warmupSteps': self.warmup_steps,
'outerIterations': self.outer_iterations,
'timeStepStrategy': self.time_step_strategy,
'timesteps': self.timesteps,
'initShearFlow': self.init_shear_flow,
'gpuBlockSize': self.cuda_blocks,
'innerOuterSplit': self.inner_outer_split,
'vtkWriteFrequency': self.vtk_write_frequency,
'remainingTimeLoggerFrequency': self.remaining_time_logger_frequency
},
'Logging': {
'logLevel': 'info', # info progress detail tracing
}
}
if self.boundary_setup:
config_dict["Boundaries"] = ldc_setup
if print_dict:
wlb.log_info_on_root("Scenario:\n" + pformat(config_dict))
if self.additional_info:
wlb.log_info_on_root("Additional Info:\n" + pformat(self.additional_info))
return config_dict
@wlb.member_callback
def results_callback(self, **kwargs):
data = {}
data.update(self.config_dict['Parameters'])
data.update(self.config_dict['DomainSetup'])
data.update(kwargs)
if self.additional_info is not None:
data.update(self.additional_info)
data['executable'] = sys.argv[0]
data['compile_flags'] = wlb.build_info.compiler_flags
data['walberla_version'] = wlb.build_info.version
data['build_machine'] = wlb.build_info.build_machine
if ms:
state = ms.MachineState(extended=False, anonymous=True)
state.generate() # generate subclasses
state.update() # read information
data["MachineState"] = str(state.get())
else:
print("MachineState module is not available. MachineState was not saved")
sequenceValuesToScalars(data)
result = data
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs_{data['stencil']}_{data['streamingPattern']}_{data['collisionSetup']}_{prod(self.blocks)}"
table_name = table_name.replace("-", "_") # - not allowed for table name would lead to syntax error
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, table_name, self.db_file_name)
storeSingle(result, table_name, self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
# -------------------------------------- Functions trying different parameter sets -----------------------------------
def weak_scaling_overlap(cuda_enabled_mpi=False):
wlb.log_info_on_root("Running scaling benchmark with communication hiding")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
# overlap
for t in ["simpleOverlap"]:
scenarios.add(Scenario(cells_per_block=(WeakX, WeakY, WeakZ),
cuda_blocks=(128, 1, 1),
time_step_strategy=t,
inner_outer_split=(8, 8, 8),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
boundary_setup=True,
db_file_name="weakScalingUniformGrid.sqlite3"))
def strong_scaling_overlap(cuda_enabled_mpi=False):
wlb.log_info_on_root("Running strong scaling benchmark with one block per proc with communication hiding")
wlb.log_info_on_root("")
scenarios = wlb.ScenarioManager()
domain_size = (StrongX, StrongY, StrongZ)
blocks = block_decomposition(wlb.mpi.numProcesses())
cells_per_block = tuple([d // b for d, b in zip(domain_size, reversed(blocks))])
# overlap
for t in ["simpleOverlap"]:
scenarios.add(Scenario(cells_per_block=cells_per_block,
cuda_blocks=(128, 1, 1),
time_step_strategy=t,
inner_outer_split=(1, 1, 1),
cuda_enabled_mpi=cuda_enabled_mpi,
outer_iterations=1,
timesteps=num_time_steps(cells_per_block),
blocks=blocks,
boundary_setup=True,
db_file_name="strongScalingUniformGridOneBlock.sqlite3"))
def single_gpu_benchmark():
"""Benchmarks only the LBM compute kernel"""
wlb.log_info_on_root("Running single GPU benchmarks")
wlb.log_info_on_root("")
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 40))
gpu_mem = gpu_mem_gb * (2 ** 30)
gpu_type = os.environ.get('GPU_TYPE')
additional_info = {}
if gpu_type is not None:
additional_info['gpu_type'] = gpu_type
scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (128, 256, 320)]
cuda_blocks = [(128, 1, 1), ]
for block_size in block_sizes:
for cuda_block_size in cuda_blocks:
# cuda_block_size = (256, 1, 1) and block_size = (64, 64, 64) would be cut to cuda_block_size = (64, 1, 1)
if cuda_block_size > block_size:
continue
if not cuda_block_size_ok(cuda_block_size):
wlb.log_info_on_root(f"Cuda block size {cuda_block_size} would exceed register limit. Skipping.")
continue
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(cells_per_block=block_size,
cuda_blocks=cuda_block_size,
time_step_strategy='kernelOnly',
timesteps=num_time_steps(block_size, 2000),
outer_iterations=1,
additional_info=additional_info)
scenarios.add(scenario)
def validation_run():
"""Run with full periodic shear flow or boundary scenario (ldc) to check if the code works"""
wlb.log_info_on_root("Validation run")
wlb.log_info_on_root("")
time_step_strategy = "noOverlap" # "simpleOverlap"
scenarios = wlb.ScenarioManager()
scenario = Scenario(cells_per_block=(128, 128, 128),
time_step_strategy=time_step_strategy,
timesteps=10001,
outer_iterations=1,
warmup_steps=0,
init_shear_flow=False,
boundary_setup=True,
vtk_write_frequency=5000,
remaining_time_logger_frequency=30)
scenarios.add(scenario)
wlb.log_info_on_root(f"Batch run of benchmark scenarios, saving result to {DB_FILE}")
# Select the benchmark you want to run
# single_gpu_benchmark() # benchmarks different CUDA block sizes and domain sizes and measures single GPU
# performance of compute kernel (no communication)
# overlap_benchmark() # benchmarks different communication overlap options
# profiling() # run only two timesteps on a smaller domain for profiling only
# validation_run()
if BENCHMARK == 0:
single_gpu_benchmark()
elif BENCHMARK == 1:
weak_scaling_overlap(True)
elif BENCHMARK == 2:
strong_scaling_overlap(True)
else:
validation_run()
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "*.py" )
foreach(config trt smagorinsky mrt entropic_kbc_n4 cumulant )
waLBerla_generate_target_from_python(NAME UniformGridGenerated_${config}
CODEGEN_CFG ${config}
FILE UniformGridGenerated.py
OUT_FILES GenMacroGetter.cpp GenMacroGetter.h GenMacroSetter.cpp GenMacroSetter.h
GenPackInfo.cpp GenPackInfo.h GenPackInfoAAPush.cpp GenPackInfoAAPush.h GenPackInfoAAPull.cpp GenPackInfoAAPull.h
GenLbKernel.cpp GenLbKernel.h GenLbKernelAAEven.cpp GenLbKernelAAEven.h GenLbKernelAAOdd.cpp GenLbKernelAAOdd.h
GenMpiDtypeInfo.h GenMpiDtypeInfoAAPull.h GenMpiDtypeInfoAAPush.h
GenDefines.h)
waLBerla_add_executable ( NAME UniformGridBenchmarkGenerated_${config}
FILES UniformGridGenerated.cpp
DEPENDS blockforest boundary core domain_decomposition field geometry timeloop vtk gui
UniformGridGenerated_${config} python_coupling)
set_target_properties(UniformGridBenchmarkGenerated_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach()
DomainSetup
{
blocks < 1, 1, 1 >;
cellsPerBlock < 64, 64, 64 >;
periodic < 1, 1, 1 >;
}
Parameters
{
timesteps 200; // time steps of one performance measurement
warmupSteps 1; // number of steps to run before measurement starts
outerIterations 4; // how many measurements to conduct
vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled
timeStepMode twoField;
//twoFieldKernelType manualD3Q19;
remainingTimeLoggerFrequency 6; // interval in seconds to log the estimated remaining time
directComm 0;
omega 1.8;
shearVelocityMagnitude 0.02;
useGui 0;
}
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "core/OpenMP.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/PythonCallback.h"
#include "python_coupling/DictWrapper.h"
#include "blockforest/Initialization.h"
#include "field/vtk/VTKWriter.h"
#include "field/AddToStorage.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "blockforest/communication/UniformDirectScheme.h"
#include "timeloop/all.h"
#include "core/timing/TimingPool.h"
#include "core/timing/RemainingTimeLogger.h"
#include "core/waLBerlaBuildInfo.h"
#include "domain_decomposition/SharedSweep.h"
#include "gui/Gui.h"
#include "InitShearVelocity.h"
#include "ManualKernels.h"
#include "lbm/lattice_model/D3Q19.h"
#include "GenDefines.h"
#include "GenMacroGetter.h"
#include "GenMacroSetter.h"
#include "GenLbKernel.h"
#include "GenLbKernelAAEven.h"
#include "GenLbKernelAAOdd.h"
#include "GenPackInfo.h"
#include "GenPackInfoAAPush.h"
#include "GenPackInfoAAPull.h"
#include "GenMpiDtypeInfo.h"
#include "GenMpiDtypeInfoAAPull.h"
#include "GenMpiDtypeInfoAAPush.h"
#include <iomanip>
using namespace walberla;
using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >;
using VelocityField_T = GhostLayerField< real_t, 3 >;
int main( int argc, char **argv )
{
mpi::Environment env( argc, argv );
for( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
{
WALBERLA_MPI_WORLD_BARRIER()
auto config = *cfg;
logging::configureLogging( config );
auto blocks = blockforest::createUniformBlockGridFromConfig( config );
Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" );
// Reading parameters
auto parameters = config->getOneBlock( "Parameters" );
const std::string timeStepMode = parameters.getParameter<std::string>( "timeStepMode", "twoField");
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 ));
const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", real_t(0.08));
const bool directComm = parameters.getParameter<bool>("directComm", false);
auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage * const storage) {
return new PdfField_T(storage->getNumberOfXCells(*block),
storage->getNumberOfYCells(*block),
storage->getNumberOfZCells(*block),
uint_t(1),
field::fzyx,
make_shared<field::AllocateAligned<real_t, 64>>());
};
// Creating fields
BlockDataID pdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx );
pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId);
pystencils::GenMacroGetter getterKernel(pdfFieldId, velFieldId);
if( shearVelocityMagnitude > 0 )
initShearVelocity(blocks, velFieldId, shearVelocityMagnitude);
for( auto & b : *blocks)
setterKernel(&b);
// Buffered Comm
blockforest::communication::UniformBufferedScheme< Stencil_T > twoFieldComm(blocks );
twoFieldComm.addPackInfo(make_shared< pystencils::GenPackInfo >(pdfFieldId ) );
blockforest::communication::UniformBufferedScheme< Stencil_T > aaPullComm(blocks);
aaPullComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPull>(pdfFieldId));
blockforest::communication::UniformBufferedScheme< Stencil_T > aaPushComm(blocks);
aaPushComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPush>(pdfFieldId));
// Direct Comm
blockforest::communication::UniformDirectScheme< Stencil_T > twoFieldCommDirect(blocks);
twoFieldCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfo>(pdfFieldId));
blockforest::communication::UniformDirectScheme< Stencil_T > aaPullCommDirect(blocks);
aaPullCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPull>(pdfFieldId));
blockforest::communication::UniformDirectScheme< Stencil_T > aaPushCommDirect(blocks);
aaPushCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPush>(pdfFieldId));
const std::string twoFieldKernelType = parameters.getParameter<std::string>( "twoFieldKernelType", "generated");
std::function<void(IBlock*)> twoFieldKernel;
if( twoFieldKernelType == "generated") {
twoFieldKernel = pystencils::GenLbKernel(pdfFieldId, omega);
} else if (twoFieldKernelType == "manualGeneric") {
using MyLM = lbm::D3Q19<lbm::collision_model::SRT>;
BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
twoFieldKernel = StreamPullCollideGeneric<MyLM>(pdfFieldId, tmpPdfFieldId, omega);
} else if (twoFieldKernelType == "manualD3Q19") {
using MyLM = lbm::D3Q19<lbm::collision_model::SRT>;
BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
twoFieldKernel = StreamPullCollideD3Q19<MyLM>(pdfFieldId, tmpPdfFieldId, omega);
} else {
WALBERLA_ABORT_NO_DEBUG_INFO("Invalid option for \"twoFieldKernelType\", "
"valid options are \"generated\", \"manualGeneric\", \"manualD3Q19\"")
}
using F = std::function<void()>;
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
if( timeStepMode == "twoField")
{
timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( twoFieldKernel, "LB stream & collide1" );
timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( twoFieldKernel, "LB stream & collide2" );
} else if ( timeStepMode == "twoFieldKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
} else if ( timeStepMode == "aa") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
timeLoop.add() << BeforeFunction( directComm ? F(aaPullCommDirect) : F(aaPullComm) )
<< Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd")
<< AfterFunction( directComm ? F(aaPushCommDirect) : F(aaPushComm) );
} else if ( timeStepMode == "aaKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd");
} else {
WALBERLA_ABORT("Invalid value for timeStepMode")
}
int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 );
int outerIterations = parameters.getParameter<int>( "outerIterations", 1 );
for(int i=0; i < warmupSteps; ++i )
timeLoop.singleStep();
auto remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", -1.0 ); // in seconds
if (remainingTimeLoggerFrequency > 0) {
auto logger = timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps() * uint_c( outerIterations ), remainingTimeLoggerFrequency );
timeLoop.addFuncAfterTimeStep( logger, "remaining time logger" );
}
// VTK
uint_t vtkWriteFrequency = parameters.getParameter<uint_t>( "vtkWriteFrequency", 0 );
if( vtkWriteFrequency > 0 )
{
auto vtkOutput = vtk::createVTKOutput_BlockData( *blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0 );
auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >( velFieldId, "vel" );
vtkOutput->addCellDataWriter( velWriter );
vtkOutput->addBeforeFunction( [&]()
{ for( auto & b : *blocks)
getterKernel(&b);
} );
timeLoop.addFuncAfterTimeStep( vtk::writeFiles( vtkOutput ), "VTK Output" );
}
bool useGui = parameters.getParameter<bool>( "useGui", false );
if( useGui )
{
GUI gui( timeLoop, blocks, argc, argv);
gui.run();
}
else
{
for ( int outerIteration = 0; outerIteration < outerIterations; ++outerIteration )
{
timeLoop.setCurrentTimeStepToZero();
WcTimer simTimer;
auto threads = omp_get_max_threads();
simTimer.start();
timeLoop.run();
simTimer.end();
auto time = simTimer.last();
auto nrOfCells = real_c( cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2] );
auto mlupsPerProcess = nrOfCells * real_c( timesteps ) / time * 1e-6;
using std::setw;
WALBERLA_LOG_INFO_ON_ROOT(setw(18) << timeStepMode <<
" procs: " << setw(6) << MPIManager::instance()->numProcesses() <<
" threads: " << threads <<
" direct_comm: " << directComm <<
" time steps: " << timesteps <<
setw(15) << " block size: " << cellsPerBlock <<
" mlups/core: " << int(mlupsPerProcess/ threads) <<
" mlups: " << int(mlupsPerProcess) * MPIManager::instance()->numProcesses())
WALBERLA_ROOT_SECTION()
{
python_coupling::PythonCallback pythonCallbackResults( "results_callback" );
if ( pythonCallbackResults.isCallable())
{
pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess );
pythonCallbackResults.data().exposeValue( "stencil", infoStencil );
pythonCallbackResults.data().exposeValue( "configName", infoConfigName );
pythonCallbackResults.data().exposeValue( "timeStepMode", timeStepMode );
pythonCallbackResults.data().exposeValue( "twoFieldKernel", twoFieldKernelType );
pythonCallbackResults.data().exposeValue( "optimizations", optimizationDict );
pythonCallbackResults.data().exposeValue( "githash", core::buildinfo::gitSHA1() );
pythonCallbackResults.data().exposeValue( "compilerFlags", core::buildinfo::compilerFlags() );
pythonCallbackResults.data().exposeValue( "buildMachine", core::buildinfo::buildMachine() );
// Call Python function to report results
pythonCallbackResults();
}
}
}
}
}
return 0;
}
import sympy as sp
import pystencils as ps
from lbmpy.creationfunctions import create_lb_update_rule, create_lb_collision_rule
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep,\
generate_mpidtype_info_from_kernel
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
omega = sp.symbols('omega')
omega_free = sp.Symbol('omega_free')
omega_fill = sp.symbols('omega_:10')
options_dict = {
'srt': {
'method': 'srt',
'stencil': 'D3Q19',
'relaxation_rate': omega,
'compressible': False,
},
'trt': {
'method': 'trt',
'stencil': 'D3Q19',
'compressible': False,
'relaxation_rate': omega,
},
'mrt': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [0.0, omega, 1.3, 1.4, omega, 1.2, 1.1, 1.15, 1.234, 1.4235, 1.242, 1.2567, 0.9, 0.7],
},
'mrt_full': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2],
omega_fill[3], omega_fill[4], omega_fill[5]],
},
'entropic': {
'method': 'mrt',
'stencil': 'D3Q19',
'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free],
'entropic': True,
},
'entropic_kbc_n4': {
'method': 'trt-kbc-n4',
'stencil': 'D3Q27',
'compressible': True,
'relaxation_rates': [omega, omega_free],
'entropic': True,
},
'smagorinsky': {
'method': 'srt',
'stencil': 'D3Q19',
'smagorinsky': True,
'relaxation_rate': omega,
},
'cumulant': {
'method': 'cumulant',
'stencil': 'D3Q19',
'compressible': True,
'relaxation_rate': omega,
},
}
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const char * optimizationDict = "{optimizationDict}";
"""
with CodeGeneration() as ctx:
common_options = {
'field_name': 'pdfs',
'temporary_field_name': 'pdfs_tmp',
}
opts = {
'two_field_cse_pdfs': False,
'two_field_cse_global': False,
'two_field_split': True,
'two_field_nt_stores': True,
'aa_even_cse_pdfs': False,
'aa_even_cse_global': False,
'aa_even_split': False,
'aa_even_nt_stores': False,
'aa_odd_cse_pdfs': False,
'aa_odd_cse_global': False,
'aa_odd_split': True,
'aa_odd_nt_stores': False,
'compiled_in_boundaries': False,
}
config_name = ctx.config
noopt = False
d3q27 = False
if config_name.endswith('_d3q27'):
d3q27 = True
config_name = config_name[:-len('_d3q27')]
if config_name == '':
config_name = 'trt'
options = options_dict[config_name]
options.update(common_options)
options = options.copy()
if d3q27:
options['stencil'] = 'D3Q27'
dtype_string = 'float64' if ctx.double_accuracy else 'float32'
stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q') + 1:])
pdfs, velocity_field = ps.fields(f'pdfs({q}), velocity(3) : {dtype_string}[3D]', layout='fzyx')
update_rule_two_field = create_lb_update_rule(optimization={'symbolic_field': pdfs,
'split': opts['two_field_split'],
'cse_global': opts['two_field_cse_global'],
'cse_pdfs': opts['two_field_cse_pdfs']}, **options)
if opts['compiled_in_boundaries']:
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.boundaries.boundaries_in_kernel import update_rule_with_push_boundaries
from collections import OrderedDict
boundaries = OrderedDict((
((1, 0, 0), NoSlip()),
((-1, 0, 0), NoSlip()),
((0, 1, 0), NoSlip()),
((0, -1, 0), NoSlip()),
((0, 0, 1), UBB([0.05, 0, 0])),
((0, 0, -1), NoSlip()),
))
cr_even = create_lb_collision_rule(stencil='D3Q19', compressible=False,
optimization={'cse_global': opts['aa_even_cse_global'],
'cse_pdfs': opts['aa_even_cse_pdfs']})
cr_odd = create_lb_collision_rule(stencil='D3Q19', compressible=False,
optimization={'cse_global': opts['aa_odd_cse_global'],
'cse_pdfs': opts['aa_odd_cse_pdfs']})
update_rule_aa_even = update_rule_with_push_boundaries(cr_even, pdfs, boundaries,
AAEvenTimeStepAccessor, AAOddTimeStepAccessor.read)
update_rule_aa_odd = update_rule_with_push_boundaries(cr_odd, pdfs, boundaries,
AAOddTimeStepAccessor, AAEvenTimeStepAccessor.read)
else:
update_rule_aa_even = create_lb_update_rule(kernel_type=AAEvenTimeStepAccessor(),
optimization={'symbolic_field': pdfs,
'split': opts['aa_even_split'],
'cse_global': opts['aa_even_cse_global'],
'cse_pdfs': opts['aa_even_cse_pdfs']}, **options)
update_rule_aa_odd = create_lb_update_rule(kernel_type=AAOddTimeStepAccessor(),
optimization={'symbolic_field': pdfs,
'split': opts['aa_odd_split'],
'cse_global': opts['aa_odd_cse_global'],
'cse_pdfs': opts['aa_odd_cse_pdfs']}, **options)
vec = {'assume_aligned': True, 'assume_inner_stride_one': True}
# check if openmp is enabled in cmake
if ctx.openmp:
openmp_enabled = True
else:
openmp_enabled = False
# Sweeps
vec['nontemporal'] = opts['two_field_nt_stores']
generate_sweep(ctx, 'GenLbKernel', update_rule_two_field, field_swaps=[('pdfs', 'pdfs_tmp')],
cpu_vectorize_info=vec)
vec['nontemporal'] = opts['aa_even_nt_stores']
generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info=vec,
cpu_openmp=openmp_enabled, ghost_layers=1)
vec['nontemporal'] = opts['aa_odd_nt_stores']
generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info=vec,
cpu_openmp=openmp_enabled, ghost_layers=1)
setter_assignments = macroscopic_values_setter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1.0)
getter_assignments = macroscopic_values_getter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'GenMacroSetter', setter_assignments, cpu_openmp=openmp_enabled)
generate_sweep(ctx, 'GenMacroGetter', getter_assignments, cpu_openmp=openmp_enabled)
# Communication
generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule_two_field,
cpu_vectorize_info={'instruction_set': None}, cpu_openmp=False)
generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPull', update_rule_aa_odd, kind='pull',
cpu_vectorize_info={'instruction_set': None}, cpu_openmp=False)
generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPush', update_rule_aa_odd, kind='push',
cpu_vectorize_info={'instruction_set': None}, cpu_openmp=False)
generate_mpidtype_info_from_kernel(ctx, 'GenMpiDtypeInfo', update_rule_two_field)
generate_mpidtype_info_from_kernel(ctx, 'GenMpiDtypeInfoAAPull', update_rule_aa_odd, kind='pull')
generate_mpidtype_info_from_kernel(ctx, 'GenMpiDtypeInfoAAPush', update_rule_aa_odd, kind='push')
# Info Header
infoHeaderParams = {
'stencil': stencil_str,
'q': q,
'configName': ctx.config,
'optimizationDict': str(opts),
}
ctx.write_file('GenDefines.h', info_header.format(**infoHeaderParams))
import math
import os
import operator
import waLBerla as wlb
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
from waLBerla.tools.config import block_decomposition
from functools import reduce
import sqlite3
def prod(seq):
return reduce(operator.mul, seq, 1)
def get_block_decomposition(block_decomposition, num_processes):
bx = by = bz = 1
blocks_per_axis = int(math.log(num_processes, 2))
for i in range(blocks_per_axis):
decomposition_axis = block_decomposition[i % len(block_decomposition)]
if decomposition_axis == 'y':
by *= 2
elif decomposition_axis == 'z':
bz *= 2
elif decomposition_axis == 'x':
bx *= 2
assert (bx * by * bz) == num_processes
return bx, by, bz
def calculate_time_steps(runtime, expected_mlups, domain_size):
cells = prod(domain_size)
time_steps_per_second = expected_mlups * 1e6 / cells
return int(time_steps_per_second * runtime)
def domain_decomposition_func_z(processes, threads, block_size):
return {
'blocks': (1, 1, processes),
'cellsPerBlock': (block_size[0], block_size[1], block_size[2] * threads)
}
def domain_decomposition_func_full(processes, threads, block_size):
return {
'blocks': block_decomposition(processes),
'cellsPerBlock': (block_size[0], block_size[1], block_size[2] * threads)
}
class BenchmarkScenario:
def __init__(self, block_size=(256, 128, 128), direct_comm=True,
time_step_mode='aa', two_field_kernel_type='generated',
domain_decomposition_func=domain_decomposition_func_z,
db_file_name='uniform_grid_gen.sqlite'):
self.block_size = block_size
self.direct_comm = direct_comm
self.time_step_mode = time_step_mode
self.two_field_kernel_type = two_field_kernel_type
self.domain_decomposition_func = domain_decomposition_func
self.threads = int(os.environ['OMP_NUM_THREADS'])
self.processes = wlb.mpi.numProcesses()
self.db_file_name = db_file_name
@wlb.member_callback
def config(self, **kwargs):
time_steps_for_128_cubed = 10
time_steps = int(128**3 / prod(self.block_size) * time_steps_for_128_cubed)
time_steps = max(10, time_steps)
cfg = {
'DomainSetup': {
'periodic': (1, 1, 1),
},
'Parameters': {
'timesteps': time_steps,
'warmupSteps': 2,
'outerIterations': 4,
'vtkWriteFrequency': 0,
'remainingTimeLoggerFrequency': 0,
'omega': 1.6,
'timeStepMode': self.time_step_mode,
'twoFieldKernelType': self.two_field_kernel_type,
'directComm': self.direct_comm,
}
}
cfg['DomainSetup'].update(self.domain_decomposition_func(self.processes, self.threads, self.block_size))
return cfg
@wlb.member_callback
def results_callback(self, mlupsPerProcess, optimizations, **kwargs):
cfg = self.config()
result = {
'block_size': self.block_size,
'mlups_per_core': mlupsPerProcess / self.threads,
'threads': self.threads,
'processes': self.processes,
'time_step_mode': self.time_step_mode,
'direct_comm': self.direct_comm,
'time_steps': cfg['Parameters']['timesteps'],
'I_MPI_PIN_CELL': os.environ.get('I_MPI_PIN_CELL', ''),
'I_MPI_PIN_DOMAIN': os.environ.get('I_MPI_PIN_CELL', ''),
}
optimizations = eval(optimizations)
result.update(optimizations)
result.update(kwargs)
sequenceValuesToScalars(result)
num_tries = 4
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, "runs", self.db_file_name)
storeSingle(result, "runs", self.db_file_name)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/4 " + str(e))
def block_size_ok(sc):
block_size = sc.config()['DomainSetup']['cellsPerBlock']
return prod(block_size) * 19 < 2 ** 31
def single_node_benchmark():
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128), (128, 64, 64), (64, 64, 128), (64, 128, 64), (64, 64, 64),
(1024, 64, 32), (2048, 64, 16),
(64, 32, 32), (32, 32, 32), (16, 16, 16), (256, 128, 64), (512, 128, 32)]:
for direct_comm in (True, False):
for time_step_mode in ['aa', 'aaKernelOnly', 'twoField']:
if time_step_mode == 'twoField':
for kt in ['generated', 'manualGeneric', 'manualD3Q19']:
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
time_step_mode=time_step_mode, two_field_kernel_type=kt,
domain_decomposition_func=domain_decomposition_func_z
)
if not block_size_ok(sc):
continue
scenarios.add(sc)
else:
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
domain_decomposition_func=domain_decomposition_func_z,
time_step_mode=time_step_mode)
if not block_size_ok(sc):
continue
# scenarios.add(sc)
def single_node_benchmark_small():
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128), (128, 64, 64), (64, 64, 128), (64, 128, 64), (64, 64, 64),
(1024, 64, 32), (2048, 64, 16), (64, 32, 32), (32, 32, 32), (16, 16, 16),
(256, 128, 64), (512, 128, 32)]:
for direct_comm in (True, False):
for time_step_mode in ['aa', 'aaKernelOnly', 'twoField']:
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm, time_step_mode=time_step_mode)
if not block_size_ok(sc):
continue
scenarios.add(sc)
def weak_scaling():
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128), (128, 64, 64), (64, 64, 128), (64, 128, 64), (64, 64, 64),
(1024, 64, 32), (2048, 64, 16), (64, 32, 32), (32, 32, 32), (16, 16, 16),
(256, 128, 64), (512, 128, 32)]:
for direct_comm in (True, False):
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
domain_decomposition_func=domain_decomposition_func_full)
if not block_size_ok(sc):
continue
scenarios.add(sc)
single_node_benchmark()
# waLBerla Python module # waLBerla Python module
if ( NOT TARGET walberla::python_coupling )
message( WARNING "python module ist not build since the walberla::python_coupling target is non-existent" )
else()
if ( WALBERLA_BUILD_WITH_PYTHON ) if ( WALBERLA_BUILD_WITH_PYTHON )
if ( WALBERLA_BUILD_WITH_CUDA ) if ( WALBERLA_BUILD_WITH_GPU_SUPPORT )
set(PYTHON_MODULE_DEPENDENCIES blockforest boundary domain_decomposition core field python_coupling timeloop vtk cuda) set( PYTHON_MODULE_DEPENDENCIES walberla::blockforest walberla::boundary walberla::domain_decomposition walberla::core walberla::field walberla::python_coupling walberla::timeloop walberla::vtk walberla::gpu )
else() else()
set(PYTHON_MODULE_DEPENDENCIES blockforest boundary domain_decomposition core field python_coupling timeloop vtk) set( PYTHON_MODULE_DEPENDENCIES walberla::blockforest walberla::boundary walberla::domain_decomposition walberla::core walberla::field walberla::python_coupling walberla::timeloop walberla::vtk )
endif() endif()
if( WALBERLA_CXX_COMPILER_IS_MSVC ) if( WALBERLA_CXX_COMPILER_IS_MSVC )
...@@ -25,10 +26,10 @@ if ( WALBERLA_BUILD_WITH_PYTHON ) ...@@ -25,10 +26,10 @@ if ( WALBERLA_BUILD_WITH_PYTHON )
SUFFIX "${PYTHON_MODULE_EXTENSION}" SUFFIX "${PYTHON_MODULE_EXTENSION}"
) )
set_target_properties( walberla_cpp PROPERTIES MACOSX_RPATH TRUE ) set_target_properties( walberla_cpp PROPERTIES MACOSX_RPATH TRUE )
set_target_properties( walberla_cpp PROPERTIES CXX_VISIBILITY_PRESET hidden)
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/setup.py ${CMAKE_CURRENT_BINARY_DIR}/setup.py ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/setup.py ${CMAKE_CURRENT_BINARY_DIR}/setup.py )
add_custom_target( pythonModule ALL ${PYTHON_EXECUTABLE} setup.py build DEPENDS walberla_cpp ) add_custom_target( pythonModule ALL ${Python_EXECUTABLE} setup.py build DEPENDS walberla_cpp )
add_custom_target( pythonModuleInstall ${PYTHON_EXECUTABLE} setup.py install --user DEPENDS walberla_cpp ) add_custom_target( pythonModuleInstall ${Python_EXECUTABLE} setup.py install --user DEPENDS walberla_cpp )
endif() endif()
endif()
\ No newline at end of file
...@@ -28,8 +28,8 @@ ...@@ -28,8 +28,8 @@
#include "stencil/all.h" #include "stencil/all.h"
#ifdef WALBERLA_BUILD_WITH_CUDA #ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
#include "python_coupling/export/CUDAExport.h" #include "python_coupling/export/GPUExport.h"
#endif #endif
...@@ -75,11 +75,11 @@ struct InitObject ...@@ -75,11 +75,11 @@ struct InitObject
pythonManager->addExporterFunction(blockforest::exportModuleToPython<stencil::D2Q5, stencil::D2Q9, stencil::D3Q7, stencil::D3Q19, stencil::D3Q27>); pythonManager->addExporterFunction(blockforest::exportModuleToPython<stencil::D2Q5, stencil::D2Q9, stencil::D3Q7, stencil::D3Q19, stencil::D3Q27>);
// VTK // VTK
pythonManager->addExporterFunction( vtk::exportModuleToPython ); pythonManager->addExporterFunction( vtk::exportModuleToPython );
#ifdef WALBERLA_BUILD_WITH_CUDA #ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT
using walberla::cuda::GPUField; using walberla::gpu::GPUField;
pythonManager->addExporterFunction( cuda::exportModuleToPython<GPU_FIELD_TYPES> ); pythonManager->addExporterFunction(gpu::exportModuleToPython<GPU_FIELD_TYPES> );
pythonManager->addExporterFunction( cuda::exportCopyFunctionsToPython<FIELD_TYPES> ); pythonManager->addExporterFunction(gpu::exportCopyFunctionsToPython<FIELD_TYPES> );
pythonManager->addBlockDataConversion<GPU_FIELD_TYPES>(); pythonManager->addBlockDataConversion<GPU_FIELD_TYPES>();
#endif #endif
// //
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file AntiDunes.cpp
//! \author Samuel Kemmler <samuel.kemmler@fau.de>
//! \author Jonas Plewinski <jonas.plewinski@fau.de>
//! \author Christoph Rettinger <christoph.rettinger@fau.de>
//! \author Christoph Schwarzmeier <christoph.schwarzmeier@fau.de>
//
// This showcase simulates antidunes, i.e., particulate dunes that travel in opposite stream-wise direction. See
// simulation results published in https://doi.org/10.1017/jfm.2023.262.
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "core/Environment.h"
#include "core/SharedFunctor.h"
#include "core/timing/RemainingTimeLogger.h"
#include "domain_decomposition/SharedSweep.h"
#include "field/StabilityChecker.h"
#include "lbm/PerformanceLogger.h"
#include "lbm/field/AddToStorage.h"
#include "lbm/free_surface/BlockStateDetectorSweep.h"
#include "lbm/free_surface/SurfaceMeshWriter.h"
#include "lbm/free_surface/TotalMassComputer.h"
#include "lbm/free_surface/VtkWriter.h"
#include "lbm/free_surface/dynamics/CellConversionSweep.h"
#include "lbm/free_surface/dynamics/ConversionFlagsResetSweep.h"
#include "lbm/free_surface/dynamics/ExcessMassDistributionSweep.h"
#include "lbm/free_surface/dynamics/PdfRefillingSweep.h"
#include "lbm/free_surface/dynamics/StreamReconstructAdvectSweep.h"
#include "lbm/free_surface/surface_geometry/CurvatureSweep.h"
#include "lbm/free_surface/surface_geometry/NormalSweep.h"
#include "lbm/free_surface/surface_geometry/SmoothingSweep.h"
#include "lbm/free_surface/surface_geometry/Utility.h"
#include "lbm_mesapd_coupling/mapping/ParticleMapping.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/MovingParticleMapping.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/reconstruction/PdfReconstructionManager.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/reconstruction/Reconstructor.h"
#include "lbm_mesapd_coupling/utility/AddHydrodynamicInteractionKernel.h"
#include "lbm_mesapd_coupling/utility/AverageHydrodynamicForceTorqueKernel.h"
#include "lbm_mesapd_coupling/utility/InitializeHydrodynamicForceTorqueForAveragingKernel.h"
#include "lbm_mesapd_coupling/utility/LubricationCorrectionKernel.h"
#include "lbm_mesapd_coupling/utility/ParticleSelector.h"
#include "lbm_mesapd_coupling/utility/ResetHydrodynamicForceTorqueKernel.h"
#include "mesa_pd/collision_detection/AnalyticContactDetection.h"
#include "mesa_pd/data/ParticleAccessorWithBaseShape.h"
#include "mesa_pd/data/ParticleStorage.h"
#include "mesa_pd/data/shape/Sphere.h"
#include "mesa_pd/domain/BlockForestDataHandling.h"
#include "mesa_pd/domain/BlockForestDomain.h"
#include "mesa_pd/kernel/DoubleCast.h"
#include "mesa_pd/kernel/LinearSpringDashpot.h"
#include "mesa_pd/kernel/ParticleSelector.h"
#include "mesa_pd/kernel/VelocityVerlet.h"
#include "mesa_pd/mpi/ClearGhostOwnerSync.h"
#include "mesa_pd/mpi/ClearNextNeighborSync.h"
#include "mesa_pd/mpi/ContactFilter.h"
#include "mesa_pd/mpi/ReduceContactHistory.h"
#include "mesa_pd/mpi/ReduceProperty.h"
#include "mesa_pd/mpi/SyncGhostOwners.h"
#include "mesa_pd/mpi/SyncNextNeighbors.h"
#include "mesa_pd/mpi/notifications/ForceTorqueNotification.h"
#include "mesa_pd/mpi/notifications/HydrodynamicForceTorqueNotification.h"
#include "mesa_pd/vtk/ParticleVtkOutput.h"
#include "vtk/VTKOutput.h"
#include <core/waLBerlaBuildInfo.h>
#include "AntidunesBoundaryHandling.h"
#include "AntidunesLatticeModel.h"
#include "PIDController.h"
#include "Utility.h"
namespace walberla
{
namespace antidunes
{
using ScalarField_T = GhostLayerField< real_t, 1 >;
using VectorField_T = GhostLayerField< Vector3< real_t >, 1 >;
using VectorFieldFlattened_T = GhostLayerField< real_t, 3 >;
using LatticeModel_T = lbm::AntidunesLatticeModel;
using LatticeModelStencil_T = LatticeModel_T::Stencil;
using PdfField_T = lbm::PdfField< LatticeModel_T >;
using PdfCommunication_T = blockforest::SimpleCommunication< LatticeModelStencil_T >;
// the geometry computations in SurfaceGeometryHandler require meaningful values in the ghost layers in corner
// directions (flag field and fill level field); this holds, even if the lattice model uses a D3Q19 stencil
using CommunicationStencil_T =
std::conditional_t< LatticeModel_T::Stencil::D == uint_t(2), stencil::D2Q9, stencil::D3Q27 >;
using Communication_T = blockforest::SimpleCommunication< CommunicationStencil_T >;
using ParticleAccessor_T = mesa_pd::data::ParticleAccessorWithBaseShape;
using flag_t = uint32_t;
using FlagField_T = FlagField< flag_t >;
using AntidunesBoundaryHandling_T =
free_surface::AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >;
using StateSweep = walberla::free_surface::BlockStateDetectorSweep< FlagField_T >;
const FlagUID FormerMO_Flag("former moving obstacle");
// empty sweep required for using selectors (e.g. StateSweep::fullFreeSurface)
struct emptySweep
{
void operator()(IBlock*) {}
};
// data handling for loading a field of type ScalarField_T from file
template< typename ScalarField_T >
class ScalarFieldHandling : public field::BlockDataHandling< ScalarField_T >
{
public:
ScalarFieldHandling(const weak_ptr< StructuredBlockStorage >& blocks, uint_t numberGhostLayer)
: blocks_(blocks), numberGhostLayer_(numberGhostLayer)
{}
protected:
ScalarField_T* allocate(IBlock* const block) override { return allocateDispatch(block); }
ScalarField_T* reallocate(IBlock* const block) override { return allocateDispatch(block); }
private:
weak_ptr< StructuredBlockStorage > blocks_;
uint_t numberGhostLayer_;
ScalarField_T* allocateDispatch(IBlock* const block)
{
WALBERLA_ASSERT_NOT_NULLPTR(block);
auto blocks = blocks_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blocks);
return new ScalarField_T(blocks->getNumberOfXCells(*block), blocks->getNumberOfYCells(*block),
blocks->getNumberOfZCells(*block), numberGhostLayer_, real_c(0), field::fzyx);
}
}; // class ScalarFieldHandling
// data handling for loading a field of type VectorFieldFlattened_T from file
template< typename VectorFieldFlattened_T >
class VectorFieldFlattenedHandling : public field::BlockDataHandling< VectorFieldFlattened_T >
{
public:
VectorFieldFlattenedHandling(const weak_ptr< StructuredBlockStorage >& blocks, uint_t numberGhostLayer)
: blocks_(blocks), numberGhostLayer_(numberGhostLayer)
{}
protected:
VectorFieldFlattened_T* allocate(IBlock* const block) override { return allocateDispatch(block); }
VectorFieldFlattened_T* reallocate(IBlock* const block) override { return allocateDispatch(block); }
private:
weak_ptr< StructuredBlockStorage > blocks_;
uint_t numberGhostLayer_;
VectorFieldFlattened_T* allocateDispatch(IBlock* const block)
{
WALBERLA_ASSERT_NOT_NULLPTR(block);
auto blocks = blocks_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blocks);
return new VectorFieldFlattened_T(blocks->getNumberOfXCells(*block), blocks->getNumberOfYCells(*block),
blocks->getNumberOfZCells(*block), numberGhostLayer_, field::fzyx);
}
}; // class VectorFieldFlattenedHandling
// sweep for computing the force density from the fluid density and fill level
template< typename LatticeModel_T, typename FlagField_T, typename VectorFieldFlattened_T, typename ScalarField_T >
class ForceDensityCodegenSweep
{
public:
ForceDensityCodegenSweep(BlockDataID forceDensityFieldID, ConstBlockDataID pdfFieldID, ConstBlockDataID flagFieldID,
ConstBlockDataID fillFieldID,
const walberla::free_surface::FlagInfo< FlagField_T >& flagInfo,
std::shared_ptr< Vector3< real_t > > globalAcceleration)
: forceDensityFieldID_(forceDensityFieldID), pdfFieldID_(pdfFieldID), flagFieldID_(flagFieldID),
fillFieldID_(fillFieldID), flagInfo_(flagInfo), globalAcceleration_(globalAcceleration)
{}
void operator()(IBlock* const block)
{
VectorFieldFlattened_T* const forceDensityField = block->getData< VectorFieldFlattened_T >(forceDensityFieldID_);
const PdfField_T* const pdfField = block->getData< const PdfField_T >(pdfFieldID_);
const FlagField_T* const flagField = block->getData< const FlagField_T >(flagFieldID_);
const ScalarField_T* const fillField = block->getData< const ScalarField_T >(fillFieldID_);
WALBERLA_FOR_ALL_CELLS(forceDensityFieldIt, forceDensityField, pdfFieldIt, pdfField, flagFieldIt, flagField,
fillFieldIt, fillField, {
flag_t flag = *flagFieldIt;
// set force density in cells to acceleration * density * fillLevel (see equation 15
// in Koerner et al., 2005);
if (flagInfo_.isInterface(flag))
{
const real_t density = pdfField->getDensity(pdfFieldIt.cell());
forceDensityFieldIt[0] = (*globalAcceleration_)[0] * *fillFieldIt * density;
forceDensityFieldIt[1] = (*globalAcceleration_)[1] * *fillFieldIt * density;
forceDensityFieldIt[2] = (*globalAcceleration_)[2] * *fillFieldIt * density;
}
else
{
if (flagInfo_.isLiquid(flag))
{
const real_t density = pdfField->getDensity(pdfFieldIt.cell());
forceDensityFieldIt[0] = (*globalAcceleration_)[0] * density;
forceDensityFieldIt[1] = (*globalAcceleration_)[1] * density;
forceDensityFieldIt[2] = (*globalAcceleration_)[2] * density;
}
}
}) // WALBERLA_FOR_ALL_CELLS
}
private:
using flag_t = typename FlagField_T::flag_t;
BlockDataID forceDensityFieldID_;
ConstBlockDataID pdfFieldID_;
ConstBlockDataID flagFieldID_;
ConstBlockDataID fillFieldID_;
walberla::free_surface::FlagInfo< FlagField_T > flagInfo_;
std::shared_ptr< Vector3< real_t > > globalAcceleration_;
}; // class ForceDensitySweep
// function describing the global initialization profile
inline real_t initializationProfile(real_t x, real_t amplitude, real_t offset, real_t wavelength)
{
return amplitude * std::cos(x / wavelength * real_c(2) * math::pi + math::pi) + offset;
}
real_t getHydrostaticDensity(real_t height, real_t referenceHeight, real_t gravitationalAcceleration)
{
return real_c(1) + real_c(3) * gravitationalAcceleration * (height - referenceHeight);
}
void initializePoiseuilleProfile(StructuredBlockForest& forest, const BlockDataID& pdfFieldID,
const ConstBlockDataID& fillFieldID, const real_t& averageBedHeight,
const real_t& averageFluidHeight, const real_t& accelerationX, const real_t& viscosity,
real_t amplitude, real_t wavelength)
{
WALBERLA_LOG_INFO_ON_ROOT("Initializing Poiseuille velocity profile");
const real_t rho = real_c(1);
for (auto blockIt = forest.begin(); blockIt != forest.end(); ++blockIt)
{
PdfField_T* const pdfField = blockIt->getData< PdfField_T >(pdfFieldID);
const ScalarField_T* const fillField = blockIt->getData< const ScalarField_T >(fillFieldID);
WALBERLA_FOR_ALL_CELLS_XYZ(
pdfField, const Vector3< real_t > coord = forest.getBlockLocalCellCenter(*blockIt, Cell(x, y, z));
Vector3< real_t > velocity(real_c(0));
auto localBedHeight = initializationProfile(coord[0], amplitude, averageBedHeight, wavelength);
auto heightAboveBed = coord[2] - localBedHeight;
const real_t fillLevel = fillField->get(x, y, z);
if (heightAboveBed >= real_c(0) && fillLevel > real_c(0)) {
velocity[0] = accelerationX / (real_c(2) * viscosity) * heightAboveBed *
(real_c(2) * averageFluidHeight - heightAboveBed);
} pdfField->setToEquilibrium(x, y, z, velocity, rho);)
}
}
/***********************************************************************************************************************
* Initialize the hydrostatic pressure in the direction in which a force is acting in ALL cells (regardless of a cell's
* flag). The velocity remains unchanged.
*
* The force vector must have only one component, i.e., the direction of the force can only be in x-, y- or z-axis.
* The variable fluidHeight determines the height at which the density is equal to reference density (=1).
**********************************************************************************************************************/
template< typename PdfField_T >
void initHydrostaticPressure(const std::weak_ptr< StructuredBlockForest >& blockForestPtr,
const BlockDataID& pdfFieldID,
std::function< real_t(const Vector3< real_t >&) > hydrostaticDensityFct)
{
WALBERLA_LOG_INFO_ON_ROOT("Initializing hydrostatic pressure");
const auto blockForest = blockForestPtr.lock();
WALBERLA_CHECK_NOT_NULLPTR(blockForest);
for (auto blockIt = blockForest->begin(); blockIt != blockForest->end(); ++blockIt)
{
PdfField_T* const pdfField = blockIt->getData< PdfField_T >(pdfFieldID);
CellInterval local = pdfField->xyzSizeWithGhostLayer(); // block-, i.e., process-local cell interval
for (const auto& cell : local)
{
// initialize the (hydrostatic) pressure, i.e., LBM density
// Bernoulli: p = p0 + density * gravity * height
// => LBM (density=1): rho = rho0 + gravity * height = 1 + 1/cs^2 * g * h = 1 + 3 * g * h
// shift global cell by 0.5 since density is set for cell center
Vector3< real_t > cellCenter = blockForest->getBlockLocalCellCenter(*blockIt, cell);
const real_t rho = hydrostaticDensityFct(cellCenter);
const Vector3< real_t > velocity = pdfField->getVelocity(cell);
pdfField->setDensityAndVelocity(cell, velocity, rho);
}
}
}
template< typename FreeSurfaceBoundaryHandling_T, typename PdfField_T, typename FlagField_T >
class MeanVelocityComputer
{
public:
MeanVelocityComputer(const std::weak_ptr< const StructuredBlockForest >& blockForest,
const std::weak_ptr< const FreeSurfaceBoundaryHandling_T >& freeSurfaceBoundaryHandling,
const ConstBlockDataID& pdfFieldID, const std::shared_ptr< Vector3< real_t > >& meanVelocity,
real_t averagingFactor)
: blockForest_(blockForest), freeSurfaceBoundaryHandling_(freeSurfaceBoundaryHandling), pdfFieldID_(pdfFieldID),
meanVelocity_(meanVelocity), averagingFactor_(averagingFactor)
{}
void operator()()
{
auto blockForest = blockForest_.lock();
WALBERLA_CHECK_NOT_NULLPTR(blockForest);
auto freeSurfaceBoundaryHandling = freeSurfaceBoundaryHandling_.lock();
WALBERLA_CHECK_NOT_NULLPTR(freeSurfaceBoundaryHandling);
getMeanVelocity(blockForest, freeSurfaceBoundaryHandling);
}
void getMeanVelocity(const std::shared_ptr< const StructuredBlockForest >& blockForest,
const std::shared_ptr< const FreeSurfaceBoundaryHandling_T >& freeSurfaceBoundaryHandling)
{
const BlockDataID flagFieldID = freeSurfaceBoundaryHandling->getFlagFieldID();
const typename FreeSurfaceBoundaryHandling_T::FlagInfo_T& flagInfo = freeSurfaceBoundaryHandling->getFlagInfo();
// use separate variables for the velocity in each direction; syntax meanVelocity[0] does not work in OMP-macro
real_t meanVelocityX = real_c(0);
real_t meanVelocityY = real_c(0);
real_t meanVelocityZ = real_c(0);
for (auto blockIt = blockForest->begin(); blockIt != blockForest->end(); ++blockIt)
{
const FlagField_T* const flagField = blockIt->template getData< const FlagField_T >(flagFieldID);
const PdfField_T* const pdfField = blockIt->template getData< const PdfField_T >(pdfFieldID_);
WALBERLA_FOR_ALL_CELLS_OMP(flagFieldIt, flagField, pdfFieldIt, pdfField,
omp parallel for schedule(static) reduction(+:meanVelocityX)
reduction(+:meanVelocityY) reduction(+:meanVelocityZ),
{
if (flagInfo.isLiquid(flagFieldIt) || flagInfo.isInterface(flagFieldIt))
{
const Vector3< real_t > velocity = pdfField->getVelocity(pdfFieldIt.cell());
meanVelocityX += velocity[0];
meanVelocityY += velocity[1];
meanVelocityZ += velocity[2];
//++cellCount;
}
}) // WALBERLA_FOR_ALL_CELLS_OMP
}
Vector3< real_t > meanVelocity(meanVelocityX, meanVelocityY, meanVelocityZ);
mpi::allReduceInplace< real_t >(meanVelocity, mpi::SUM);
// mpi::allReduceInplace< uint_t >(cellCount, mpi::SUM);
meanVelocity *= averagingFactor_;
*meanVelocity_ = meanVelocity;
};
private:
std::weak_ptr< const StructuredBlockForest > blockForest_;
std::weak_ptr< const FreeSurfaceBoundaryHandling_T > freeSurfaceBoundaryHandling_;
const ConstBlockDataID pdfFieldID_;
std::shared_ptr< Vector3< real_t > > meanVelocity_;
real_t averagingFactor_;
}; // class MeanVelocityComputer
class ForcingAdjuster
{
public:
ForcingAdjuster(const shared_ptr< StructuredBlockStorage >& blocks, real_t targetVelocity, real_t externalForcing,
real_t proportionalGain, real_t derivativeGain, real_t integralGain, real_t maxRamp,
real_t minActuatingVariable, real_t maxActuatingVariable)
: blocks_(blocks), currentExternalForcing_(externalForcing),
pid_(targetVelocity, externalForcing, proportionalGain, derivativeGain, integralGain, maxRamp,
minActuatingVariable, maxActuatingVariable)
{
WALBERLA_LOG_INFO_ON_ROOT("Creating PID controller with pg = " << pid_.getProportionalGain()
<< ", dg = " << pid_.getDerivateGain()
<< ", ig = " << pid_.getIntegralGain());
}
void operator()(const real_t currentMeanVelocity)
{
// compute new forcing value on root (since flow rate only known on root)
WALBERLA_ROOT_SECTION()
{
real_t newExternalForcing = pid_.update(currentMeanVelocity);
currentExternalForcing_ = newExternalForcing;
}
// send updated external forcing to all other processes
mpi::broadcastObject(currentExternalForcing_);
}
real_t getExternalForcing() { return currentExternalForcing_; }
void storePIDSnapshot(std::string filename)
{
WALBERLA_ROOT_SECTION() { pid_.writeStateToFile(filename); }
}
void loadPIDSnapshot(std::string filename) { pid_.readStateFromFile(filename); }
private:
shared_ptr< StructuredBlockStorage > blocks_;
real_t currentExternalForcing_;
PIDController pid_;
}; // ForcingAdjuster
int main(int argc, char** argv)
{
Environment walberlaEnv(argc, argv);
WALBERLA_LOG_INFO_ON_ROOT("waLBerla Revision: " << std::string(WALBERLA_GIT_SHA1).substr(0, 8))
if (argc < 2) { WALBERLA_ABORT("Please specify a parameter file as input argument.") }
WALBERLA_LOG_DEVEL_ON_ROOT("Using generated lattice model.");
auto configPtr = walberlaEnv.config();
// print content of parameter file
WALBERLA_LOG_INFO_ON_ROOT(*configPtr);
WALBERLA_ROOT_SECTION()
{
std::ofstream file;
file.open("parameterConfiguration.cfg");
file << *configPtr;
file.close();
}
// get block forest parameters from parameter file
auto blockForestParameters = configPtr->getOneBlock("BlockForestParameters");
const Vector3< uint_t > cellsPerBlock = blockForestParameters.getParameter< Vector3< uint_t > >("cellsPerBlock");
const Vector3< bool > periodicity = blockForestParameters.getParameter< Vector3< bool > >("periodicity");
const bool loadSnapshot = blockForestParameters.getParameter< bool >("loadSnapshot");
const bool storeSnapshot = blockForestParameters.getParameter< bool >("storeSnapshot");
const uint_t snapshotFrequency = blockForestParameters.getParameter< uint_t >("snapshotFrequency");
const std::string snapshotBaseFolder = blockForestParameters.getParameter< std::string >("snapshotBaseFolder");
// get domain parameters from parameter file
auto domainParameters = configPtr->getOneBlock("DomainParameters");
const Vector3< uint_t > domainSize = domainParameters.getParameter< Vector3< uint_t > >("domainSize");
const uint_t wavePeriods = domainParameters.getParameter< uint_t >("wavePeriods");
const real_t liquidHeightFactor = domainParameters.getParameter< real_t >("liquidHeightFactor");
const real_t floorHeightFactor = domainParameters.getParameter< real_t >("floorHeightFactor");
const real_t initialAmplitude = domainParameters.getParameter< real_t >("initialAmplitude");
// compute number of blocks as defined by domainSize and cellsPerBlock
Vector3< uint_t > numBlocks;
for (uint_t i = uint_c(0); i <= uint_c(2); ++i)
{
numBlocks[i] = domainSize[i] / cellsPerBlock[i];
WALBERLA_CHECK_EQUAL(numBlocks[i] * cellsPerBlock[i], domainSize[i],
"Domain size in direction " << i << " is not a multiple of cells per block.")
}
// get number of (MPI) processes
const uint_t numProcesses = uint_c(MPIManager::instance()->numProcesses());
WALBERLA_CHECK_EQUAL(numProcesses, numBlocks[0] * numBlocks[1] * numBlocks[2],
"The number of MPI processes is different from the number of blocks as defined by "
"\"domainSize/cellsPerBlock\".")
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(numProcesses);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(numBlocks);
// get PID controller parameters
auto PIDParameters = configPtr->getOneBlock("PIDParameters");
const real_t targetMeanVelocityMagnitude = PIDParameters.getParameter< real_t >("targetMeanVelocityMagnitude");
const real_t proportionalGain = PIDParameters.getParameter< real_t >("proportionalGain");
const real_t derivativeGain = PIDParameters.getParameter< real_t >("derivativeGain");
const real_t integralGain = PIDParameters.getParameter< real_t >("integralGain");
const real_t maxRamp = PIDParameters.getParameter< real_t >("maxRamp");
const real_t minActuatingVariable = PIDParameters.getParameter< real_t >("minActuatingVariable");
const real_t maxActuatingVariable = PIDParameters.getParameter< real_t >("maxActuatingVariable");
// read particle infos
const auto particleParameters = configPtr->getOneBlock("ParticleParameters");
const std::string particleInFileName = particleParameters.getParameter< std::string >("inFileName");
const uint_t bedCopiesInX = particleParameters.getParameter< uint_t >("bedCopiesInX");
const uint_t bedCopiesInY = particleParameters.getParameter< uint_t >("bedCopiesInY");
const real_t particleDensityRatio = particleParameters.getParameter< real_t >("densityRatio");
const real_t particleFixingHeightFactor = particleParameters.getParameter< real_t >("fixingHeightFactor");
const real_t particleFrictionCoefficient = particleParameters.getParameter< real_t >("frictionCoefficient");
const real_t particleRestitutionCoefficient = particleParameters.getParameter< real_t >("restitutionCoefficient");
const uint_t particleNumSubCycles = particleParameters.getParameter< uint_t >("numSubCycles");
const bool useLubricationCorrection = particleParameters.getParameter< bool >("useLubricationCorrection");
const bool useNoSlipParticles = particleParameters.getParameter< bool >("useNoSlipParticles");
const real_t particlePoissonsRatio = real_c(0.22);
const real_t particleKappa = real_c(2) * (real_c(1) - particlePoissonsRatio) / (real_c(2) - particlePoissonsRatio);
real_t particleCollisionTimeNonDim = real_c(4);
bool useOpenMP = false;
const uint_t vtkSpacingParticles =
configPtr->getOneBlock("VTK").getOneBlock("fluid_field").getParameter< uint_t >("writeFrequency");
const std::string vtkFolder =
configPtr->getOneBlock("VTK").getOneBlock("fluid_field").getParameter< std::string >("baseFolder");
// get physics parameters from parameter file
auto physicsParameters = configPtr->getOneBlock("PhysicsParameters");
const bool enableWetting = physicsParameters.getParameter< bool >("enableWetting");
const uint_t timesteps = physicsParameters.getParameter< uint_t >("timesteps");
const real_t Re = physicsParameters.getParameter< real_t >("Re");
const real_t Fr = physicsParameters.getParameter< real_t >("Fr");
const real_t We = physicsParameters.getParameter< real_t >("We");
// get avgDiameter and scaling factor
real_t avgParticleDiameter = real_c(0);
real_t particleScalingFactor = real_c(0);
getAvgDiameterScalingFactor(particleInFileName, domainSize, bedCopiesInX, bedCopiesInY, avgParticleDiameter,
particleScalingFactor);
const real_t liquidHeight = avgParticleDiameter * liquidHeightFactor;
const real_t floorHeight = avgParticleDiameter * floorHeightFactor;
const real_t particleFixingHeight = particleFixingHeightFactor * avgParticleDiameter;
WALBERLA_CHECK_FLOAT_UNEQUAL(liquidHeight, 0.0)
WALBERLA_CHECK_FLOAT_UNEQUAL(floorHeight, 0.0)
const real_t absoluteLiquidHeight = liquidHeight + floorHeight;
const real_t viscosity = targetMeanVelocityMagnitude * liquidHeight / Re;
const real_t relaxationRate = real_c(1.0) / (real_c(3) * viscosity + real_c(0.5));
const real_t gravity = (targetMeanVelocityMagnitude / Fr) * (targetMeanVelocityMagnitude / Fr) / liquidHeight;
const real_t accelerationX = real_c(3) * targetMeanVelocityMagnitude * viscosity / (liquidHeight * liquidHeight);
std::shared_ptr< Vector3< real_t > > acceleration =
std::make_shared< Vector3< real_t > >(accelerationX, real_c(0.0), -gravity);
const real_t surfaceTension =
real_c(1.0) * targetMeanVelocityMagnitude * targetMeanVelocityMagnitude * liquidHeight / We;
// compute SI dx and dt
const real_t viscosity_SI = real_c(1.0016e-6); // kinemtic viscosity of water at 20°C at 1 bar
const real_t dx_SI = real_c(1) / particleScalingFactor;
const real_t dt_SI = viscosity / viscosity_SI * dx_SI * dx_SI;
WALBERLA_LOG_INFO_ON_ROOT("\nPhysical parameters:")
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(liquidHeight);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(floorHeight);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(dx_SI);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(dt_SI);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(*acceleration);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(relaxationRate);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(viscosity);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(absoluteLiquidHeight);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(avgParticleDiameter);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(particleScalingFactor);
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(particleFixingHeight);
WALBERLA_LOG_INFO_ON_ROOT("\nFree surface physical parameters")
WALBERLA_LOG_DEVEL_VAR_ON_ROOT(surfaceTension);
if ((periodicity[0] && numBlocks[0] < uint_c(3)) || (periodicity[1] && numBlocks[1] < uint_c(3)) ||
(periodicity[2] && numBlocks[2] < uint_c(3)))
{
WALBERLA_ABORT("When using particles, use at least three blocks per periodic direction.");
}
// read model parameters from parameter file
const auto modelParameters = configPtr->getOneBlock("ModelParameters");
const std::string pdfReconstructionModel = modelParameters.getParameter< std::string >("pdfReconstructionModel");
const std::string excessMassDistributionModel =
modelParameters.getParameter< std::string >("excessMassDistributionModel");
const walberla::free_surface::ExcessMassDistributionModel excessMassModel(excessMassDistributionModel);
const bool useSimpleMassExchange = modelParameters.getParameter< bool >("useSimpleMassExchange");
const real_t cellConversionThreshold = modelParameters.getParameter< real_t >("cellConversionThreshold");
const real_t cellConversionForceThreshold = modelParameters.getParameter< real_t >("cellConversionForceThreshold");
// read evaluation parameters from parameter file
const auto evaluationParameters = configPtr->getOneBlock("EvaluationParameters");
const uint_t performanceLogFrequency = evaluationParameters.getParameter< uint_t >("performanceLogFrequency");
const uint_t evaluationFrequency = evaluationParameters.getParameter< uint_t >("evaluationFrequency");
const std::string baseFolderName = evaluationParameters.getParameter< std::string >("baseFolderName");
uint_t beginTimeStep = 0;
const std::string checkpointConfigFile("antidunesCheckpointConfig.file");
if (loadSnapshot)
{
WALBERLA_ROOT_SECTION()
{
std::ifstream file;
file.open(snapshotBaseFolder + "/" + checkpointConfigFile);
if (file.fail()) WALBERLA_ABORT("Error: " << checkpointConfigFile << " could not be opened!");
file >> beginTimeStep;
file >> (*acceleration)[0];
file.close();
}
mpi::broadcastObject(beginTimeStep);
mpi::broadcastObject(*acceleration);
WALBERLA_LOG_INFO_ON_ROOT("Successfully read config parameters from checkpoint config file:")
WALBERLA_LOG_INFO_ON_ROOT(" - beginTimeStep = " << beginTimeStep)
WALBERLA_LOG_INFO_ON_ROOT(" - acceleration = < " << (*acceleration)[0] << ", " << (*acceleration)[1] << ", "
<< (*acceleration)[2] << " >")
}
if (loadSnapshot)
{
// modify config file to start VTK output from "loadFromTimestep" rather than from 0
std::vector< config::Config::Block* > configVTKBlock;
configPtr->getWritableGlobalBlock().getWritableBlocks("VTK", configVTKBlock, 1, 1);
std::vector< config::Config::Block* > configVTKFluidFieldBlock;
configVTKBlock[0]->getWritableBlocks("fluid_field", configVTKFluidFieldBlock, 1, 1);
configVTKFluidFieldBlock[0]->setOrAddParameter("initialExecutionCount", std::to_string(beginTimeStep));
}
WALBERLA_ROOT_SECTION()
{
// create base directories if they do not yet exist
filesystem::path tpath(baseFolderName);
if (!filesystem::exists(tpath)) filesystem::create_directory(tpath);
filesystem::path snapshotPath(snapshotBaseFolder);
if (!filesystem::exists(snapshotPath)) filesystem::create_directory(snapshotPath);
}
std::shared_ptr< StructuredBlockForest > blockForest(nullptr);
const std::string blockForestFile("blockForest.file");
if (loadSnapshot)
{
// load block forest from file
MPIManager::instance()->useWorldComm();
blockForest = make_shared< StructuredBlockForest >(
make_shared< BlockForest >(uint_c(MPIManager::instance()->rank()),
(std::string(snapshotBaseFolder + "/" + blockForestFile)).c_str(), true, false),
cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2]);
blockForest->createCellBoundingBoxes();
}
else
{
// create uniform block forest
blockForest = blockforest::createUniformBlockGrid(numBlocks[0], numBlocks[1], numBlocks[2], // blocks
cellsPerBlock[0], cellsPerBlock[1], cellsPerBlock[2], // cells
real_c(1.0), // dx
true, // one block per process
periodicity[0], periodicity[1], periodicity[2]); // periodicity
}
// save block forest to file (but do not overwrite existing file if snapshot is loaded)
if (storeSnapshot && !loadSnapshot)
{
blockForest->getBlockForest().saveToFile(snapshotBaseFolder + "/" + blockForestFile);
}
const auto vtkParameters = configPtr->getOneBlock("VTK");
const auto vtkFluidParameters = vtkParameters.getOneBlock("fluid_field");
BlockDataID pdfFieldID;
const std::string pdfFieldFile("pdfField.file");
BlockDataID fillFieldID;
const std::string fillFieldFile("fillField.file");
BlockDataID excessMassFieldID;
const std::string excessMassFieldFile("excessMassField.file");
// force density field must be added here to create lattice model
BlockDataID forceDensityFieldID =
field::addToStorage< VectorFieldFlattened_T >(blockForest, "Force field", real_c(0), field::fzyx, uint_c(1));
if (excessMassModel.isEvenlyAllInterfaceFallbackLiquidType())
{
// add additional field for storing excess mass in liquid cells
excessMassFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Excess mass", real_c(0), field::fzyx, uint_c(1));
}
LatticeModel_T latticeModel = LatticeModel_T(forceDensityFieldID, relaxationRate);
if (loadSnapshot)
{
// load PDF field from file
shared_ptr< lbm::internal::PdfFieldHandling< LatticeModel_T > > pdfFieldDataHandling =
make_shared< lbm::internal::PdfFieldHandling< LatticeModel_T > >(
blockForest, latticeModel, false, Vector3< real_t >(real_c(0)), real_c(1), uint_c(1), field::fzyx);
pdfFieldID = (blockForest->getBlockStorage())
.loadBlockData(snapshotBaseFolder + "/" + pdfFieldFile, pdfFieldDataHandling, "PDF field");
// load fill level field from file
std::shared_ptr< ScalarFieldHandling< ScalarField_T > > fillFieldDataHandling =
std::make_shared< ScalarFieldHandling< ScalarField_T > >(blockForest, uint_c(2));
fillFieldID =
(blockForest->getBlockStorage())
.loadBlockData(snapshotBaseFolder + "/" + fillFieldFile, fillFieldDataHandling, "Fill level field");
// load fill level field from file
std::shared_ptr< ScalarFieldHandling< ScalarField_T > > excessMassFieldDataHandling =
std::make_shared< ScalarFieldHandling< ScalarField_T > >(blockForest, uint_c(1));
excessMassFieldID = (blockForest->getBlockStorage())
.loadBlockData(snapshotBaseFolder + "/" + excessMassFieldFile, excessMassFieldDataHandling,
"Excess mass field");
}
else
{
// add PDF field
pdfFieldID =
lbm::addPdfFieldToStorage(blockForest, "PDF field", latticeModel,
Vector3< real_t >(targetMeanVelocityMagnitude, 0, 0), real_c(1.0), field::fzyx);
// add fill level field (initialized with 0, i.e., gas everywhere)
fillFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Fill level field", real_c(0.0), field::fzyx, uint_c(2));
// add fill level field (initialized with 0, i.e., gas everywhere)
excessMassFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Excess mass field", real_c(0.0), field::fzyx, uint_c(1));
}
// MesaPD data structures
auto particleStorage = std::make_shared< mesa_pd::data::ParticleStorage >(1);
auto particleAccessor = std::make_shared< mesa_pd::data::ParticleAccessorWithBaseShape >(particleStorage);
auto mesapdDomain = std::make_shared< mesa_pd::domain::BlockForestDomain >(blockForest->getBlockForestPointer());
BlockDataID particleStorageID;
const std::string particleStorageFile("particleStorageFile.file");
if (loadSnapshot)
{
WALBERLA_LOG_INFO_ON_ROOT("Initializing particles from checkpointing file!");
particleStorageID = blockForest->loadBlockData(snapshotBaseFolder + "/" + particleStorageFile,
mesa_pd::domain::createBlockForestDataHandling(particleStorage),
"Particle Storage");
mesa_pd::mpi::ClearNextNeighborSync CNNS;
CNNS(*particleAccessor);
mesa_pd::mpi::ClearGhostOwnerSync CGOS;
CGOS(*particleAccessor);
}
else
{
particleStorageID =
blockForest->addBlockData(mesa_pd::domain::createBlockForestDataHandling(particleStorage), "Particle Storage");
}
BlockDataID particleFieldID = field::addToStorage< lbm_mesapd_coupling::ParticleField_T >(
blockForest, "Particle field", particleAccessor->getInvalidUid(), field::fzyx, uint_c(2));
auto densityReferenceHeight = absoluteLiquidHeight;
auto hydrostaticDensityFct = [acceleration, densityReferenceHeight](const Vector3< real_t >& position) {
uint_t forceComponent = uint_c(2); // gravity is here strictly only acting in z direction!
return getHydrostaticDensity(position[forceComponent], densityReferenceHeight, (*acceleration)[forceComponent]);
};
// add boundary handling
const std::shared_ptr< AntidunesBoundaryHandling_T > antidunesBoundaryHandling =
std::make_shared< AntidunesBoundaryHandling_T >(blockForest, pdfFieldID, fillFieldID, particleFieldID,
particleAccessor, hydrostaticDensityFct);
const BlockDataID flagFieldID = antidunesBoundaryHandling->getFlagFieldID();
const typename AntidunesBoundaryHandling_T::FlagInfo_T& flagInfo = antidunesBoundaryHandling->getFlagInfo();
real_t sinusAmplitude = real_c(0.5) * initialAmplitude;
real_t sinusOffset = floorHeight;
real_t sinusWavelength = real_c(domainSize[0]) / real_c(wavePeriods);
if (!loadSnapshot)
{
// samples used in the Monte-Carlo like estimation of the fill level
const uint_t fillLevelInitSamples = uint_c(100); // actually there will be 101 since 0 is also included
const uint_t numTotalPoints = (fillLevelInitSamples + uint_c(1)) * (fillLevelInitSamples + uint_c(1));
const real_t stepsize = real_c(1) / real_c(fillLevelInitSamples);
// initialize free-surface sine profile
for (auto blockIt = blockForest->begin(); blockIt != blockForest->end(); ++blockIt)
{
ScalarField_T* const fillField = blockIt->getData< ScalarField_T >(fillFieldID);
WALBERLA_FOR_ALL_CELLS(fillFieldIt, fillField, {
// cell in block-local coordinates
const Cell localCell = fillFieldIt.cell();
// get cell in global coordinates
Cell globalCell = localCell;
blockForest->transformBlockLocalToGlobalCell(globalCell, *blockIt, localCell);
// Monte-Carlo like estimation of the fill level:
// create uniformly-distributed sample points in each cell and count the number of points below the sine
// profile; this fraction of points is used as the fill level to initialize the profile
uint_t numPointsBelow = uint_c(0);
for (uint_t xSample = uint_c(0); xSample <= fillLevelInitSamples; ++xSample)
{
// Pascal et al. (2021) defined the amplitude to span from minimum peak to maximum peak; in
// initializationProfile(), the amplitude is defined to range from the average to the maximum peak
const real_t functionValue =
initializationProfile(real_c(globalCell[0]) + real_c(xSample) * stepsize, sinusAmplitude,
absoluteLiquidHeight + real_c(0.5), sinusWavelength);
for (uint_t zSample = uint_c(0); zSample <= fillLevelInitSamples; ++zSample)
{
const real_t zPoint = real_c(globalCell[2]) + real_c(zSample) * stepsize;
// with operator <, a fill level of 1 can not be reached when the line is equal to the cell's top
// border; with operator <=, a fill level of 0 can not be reached when the line is equal to the cell's
// bottom border
if (zPoint < functionValue) { ++numPointsBelow; }
}
}
// fill level is fraction of points below sine profile
fillField->get(localCell) = real_c(numPointsBelow) / real_c(numTotalPoints);
}) // WALBERLA_FOR_ALL_CELLS
}
initializePoiseuilleProfile(*blockForest, pdfFieldID, fillFieldID, floorHeight, liquidHeight + real_c(0.5),
(*acceleration)[0], viscosity, sinusAmplitude, sinusWavelength);
}
// initialize domain boundary conditions from config file
const auto boundaryParameters = configPtr->getOneBlock("BoundaryParameters");
antidunesBoundaryHandling->initFromConfig(boundaryParameters);
std::function< void(void) > syncCall;
auto simulationDomainAABB = blockForest->getDomain();
lbm_mesapd_coupling::ParticleMappingKernel< AntidunesBoundaryHandling_T::BoundaryHandling_T > particleMappingKernel(
blockForest, antidunesBoundaryHandling->getHandlingID());
lbm_mesapd_coupling::MovingParticleMappingKernel< AntidunesBoundaryHandling_T::BoundaryHandling_T >
movingParticleMappingKernel(blockForest, antidunesBoundaryHandling->getHandlingID(), particleFieldID);
uint_t numParticles = uint_c(0);
// initialize bottom solid sine profile
if (!loadSnapshot)
{
auto createParticleFct = [sinusAmplitude, sinusOffset, sinusWavelength](Vector3< real_t > pos) {
return pos[2] < initializationProfile(pos[0], sinusAmplitude, sinusOffset, sinusWavelength);
};
real_t maxParticleHeight = real_c(0);
initSpheresFromFile(particleInFileName, *particleStorage, *mesapdDomain, particleDensityRatio, domainSize,
createParticleFct, simulationDomainAABB, bedCopiesInX, bedCopiesInY, numParticles,
maxParticleHeight, particleScalingFactor);
WALBERLA_LOG_INFO_ON_ROOT("Max particle height " << maxParticleHeight);
if ((sinusOffset + sinusAmplitude) > maxParticleHeight)
WALBERLA_ABORT("Created particle bed is below desired sinus shape!");
if (real_c(2) * sinusAmplitude > (maxParticleHeight - particleFixingHeight))
WALBERLA_ABORT("Created mobile particle bed is not high enough for desired sinus shape!");
if (useNoSlipParticles && (particleFixingHeight < maxParticleHeight))
WALBERLA_ABORT("You are using no-slip BCs on particles (which does not set hydrodynamic forces) but do not "
"fix all particles - this leads to wrong behavior and is not permitted!")
// fix lower particles
particleStorage->forEachParticle(
useOpenMP, mesa_pd::kernel::SelectAll(), *particleAccessor,
[particleFixingHeight](const size_t idx, auto& ac) {
if (ac.getPosition(idx)[2] < particleFixingHeight)
mesa_pd::data::particle_flags::set(ac.getFlagsRef(idx), mesa_pd::data::particle_flags::FIXED);
},
*particleAccessor);
}
else
{
real_t avgParticleDiameterTest = real_c(0);
particleStorage->forEachParticle(
false, mesa_pd::kernel::SelectLocal(), *particleAccessor,
[&numParticles, &avgParticleDiameterTest](const size_t idx, auto& ac) {
auto sp = static_cast< mesa_pd::data::Sphere* >(ac.getBaseShape(idx).get());
++numParticles;
avgParticleDiameterTest += real_c(2) * sp->getRadius();
},
*particleAccessor);
mpi::allReduceInplace(numParticles, mpi::SUM);
mpi::allReduceInplace(avgParticleDiameterTest, mpi::SUM);
avgParticleDiameterTest /= real_c(numParticles);
WALBERLA_LOG_INFO_ON_ROOT("Read particles from check pointing file with avg diameter of "
<< avgParticleDiameterTest)
if (std::abs(avgParticleDiameterTest - avgParticleDiameter) / avgParticleDiameterTest > real_c(0.05))
{
WALBERLA_ABORT("Particle diameters not correct.")
}
}
WALBERLA_LOG_INFO_ON_ROOT("Created " << numParticles << " particles");
// create planes
createPlane(*particleStorage, simulationDomainAABB.minCorner(), Vector3< real_t >(real_c(0), real_c(0), real_c(1)));
createPlane(*particleStorage, simulationDomainAABB.maxCorner(), Vector3< real_t >(real_c(0), real_c(0), real_c(-1)));
const real_t blockSyncExtension = real_c(2.5);
real_t maxPossibleParticleDiameter = avgParticleDiameter * real_c(1.1);
if (maxPossibleParticleDiameter < real_c(2) * real_c(cellsPerBlock.min()) - blockSyncExtension)
{
WALBERLA_LOG_INFO_ON_ROOT("Using next neighbor sync for particles");
syncCall = [particleStorage, mesapdDomain, blockSyncExtension]() {
mesa_pd::mpi::SyncNextNeighbors syncNextNeighborFunc;
syncNextNeighborFunc(*particleStorage, *mesapdDomain, blockSyncExtension);
};
syncCall();
}
else
{
WALBERLA_LOG_INFO_ON_ROOT("Using ghost owner sync for particles")
syncCall = [particleStorage, mesapdDomain, blockSyncExtension]() {
mesa_pd::mpi::SyncGhostOwners syncGhostOwnersFunc;
syncGhostOwnersFunc(*particleStorage, *mesapdDomain, blockSyncExtension);
};
for (uint_t i = uint_c(0); i < uint_c(std::ceil(maxPossibleParticleDiameter / real_c(cellsPerBlock.min()))); ++i)
syncCall();
}
if (useNoSlipParticles)
{
particleStorage->forEachParticle(useOpenMP, SphereSelector(), *particleAccessor, particleMappingKernel,
*particleAccessor, AntidunesBoundaryHandling_T::noSlipFlagID);
}
else
{
particleStorage->forEachParticle(useOpenMP, SphereSelector(), *particleAccessor, movingParticleMappingKernel,
*particleAccessor, AntidunesBoundaryHandling_T::movingObstacleFlagID);
}
// IMPORTANT REMARK: this must be only called after every solid flag has been set; otherwise, the boundary handling
// might not detect solid flags correctly
antidunesBoundaryHandling->initFlagsFromFillLevel();
// initialize hydrostatic pressure
if (!loadSnapshot) { initHydrostaticPressure< PdfField_T >(blockForest, pdfFieldID, hydrostaticDensityFct); }
// initialize force density field
walberla::free_surface::initForceDensityFieldCodegen< PdfField_T, FlagField_T, VectorFieldFlattened_T,
ScalarField_T >(
blockForest, forceDensityFieldID, fillFieldID, pdfFieldID, flagFieldID, flagInfo, *acceleration);
// communication after initialization
Communication_T communication(blockForest, flagFieldID, fillFieldID, forceDensityFieldID);
communication();
PdfCommunication_T pdfCommunication(blockForest, pdfFieldID);
pdfCommunication();
// add bubble model
std::shared_ptr< walberla::free_surface::bubble_model::BubbleModelBase > bubbleModel =
std::make_shared< walberla::free_surface::bubble_model::BubbleModelConstantPressure >(real_c(1));
// set density in non-liquid or non-interface cells to 1 (after initializing with hydrostatic pressure)
// setDensityInNonFluidCellsToOne< FlagField_T, PdfField_T >(blockForest, flagInfo, flagFieldID, pdfFieldID);
// create timeloop
SweepTimeloop timeloop(blockForest, timesteps);
timeloop.setCurrentTimeStep(beginTimeStep);
timeloop.addFuncBeforeTimeStep(RemainingTimeLogger(timeloop.getNrOfTimeSteps()), "Remaining Time Logger");
// Laplace pressure = 2 * surface tension * curvature; curvature computation is not necessary with no surface
// tension
bool computeCurvature = false;
if (!realIsEqual(surfaceTension, real_c(0), real_c(1e-14))) { computeCurvature = true; }
auto blockStateUpdate = StateSweep(blockForest, flagInfo, flagFieldID);
// add surface geometry handler
BlockDataID curvatureFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Curvature field", real_c(0), field::fzyx, uint_c(1));
BlockDataID normalFieldID = field::addToStorage< VectorField_T >(
blockForest, "Normal field", Vector3< real_t >(real_c(0)), field::fzyx, uint_c(1));
BlockDataID obstacleNormalFieldID = field::addToStorage< VectorField_T >(
blockForest, "Obstacle normal field", Vector3< real_t >(real_c(0)), field::fzyx, uint_c(1));
// add field for smoothed fill levels
BlockDataID smoothFillFieldID =
field::addToStorage< ScalarField_T >(blockForest, "Smooth fill level field", real_c(0), field::fzyx, uint_c(1));
// smooth fill level field for decreasing error in finite difference normal and curvature computation (see
// dissertation of S. Bogner, 2017 (section 4.4.2.1))
walberla::free_surface::SmoothingSweep< CommunicationStencil_T, FlagField_T, ScalarField_T, VectorField_T >
smoothingSweep(smoothFillFieldID, fillFieldID, flagFieldID,
walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, flagInfo.getObstacleIDSet(),
enableWetting);
// IMPORTANT REMARK: SmoothingSweep must be executed on all blocks, because the algorithm works on all liquid,
// interface and gas cells. This is necessary since the normals are not only computed in interface cells, but also
// in the neighborhood of interface cells. Therefore, meaningful values for the fill levels of the second
// neighbors of interface cells are also required in NormalSweep.
timeloop.add() << Sweep(smoothingSweep, "Sweep: fill level smoothing")
<< AfterFunction(Communication_T(blockForest, smoothFillFieldID),
"Communication: after smoothing sweep");
// compute interface normals (using smoothed fill level field)
walberla::free_surface::NormalSweep< CommunicationStencil_T, FlagField_T, ScalarField_T, VectorField_T > normalSweep(
normalFieldID, smoothFillFieldID, flagFieldID, walberla::free_surface::flagIDs::interfaceFlagID,
walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, flagInfo.getObstacleIDSet(), true, false, true,
false);
timeloop.add() << Sweep(normalSweep, "Sweep: normal computation", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: normal")
<< AfterFunction(Communication_T(blockForest, normalFieldID), "Communication: after normal sweep");
if (computeCurvature)
{
// compute interface curvature using finite differences according to Brackbill et al.
walberla::free_surface::CurvatureSweepFiniteDifferences< CommunicationStencil_T, FlagField_T, ScalarField_T,
VectorField_T >
curvSweep(curvatureFieldID, normalFieldID, obstacleNormalFieldID, flagFieldID,
walberla::free_surface::flagIDs::interfaceFlagID,
walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, flagInfo.getObstacleIDSet(), false,
real_c(0));
timeloop.add() << Sweep(curvSweep, "Sweep: curvature computation (finite difference method)",
StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: curvature")
<< AfterFunction(Communication_T(blockForest, curvatureFieldID),
"Communication: after curvature sweep");
}
// add surface dynamics handler
// add standard waLBerla boundary handling
timeloop.add() << Sweep(antidunesBoundaryHandling->getBoundarySweep(), "Sweep: boundary handling",
Set< SUID >::emptySet(), StateSweep::onlyGasAndBoundary)
<< Sweep(emptySweep(), "Empty sweep: boundary handling", StateSweep::onlyGasAndBoundary);
// add sweep for weighting force in interface cells with fill level and density
// different version for codegen because pystencils does not support 'Ghostlayerfield<Vector3(), 1>'
const ForceDensityCodegenSweep< LatticeModel_T, FlagField_T, VectorFieldFlattened_T, ScalarField_T >
forceDensityCodegenSweep(forceDensityFieldID, pdfFieldID, flagFieldID, fillFieldID, flagInfo, acceleration);
timeloop.add() << Sweep(forceDensityCodegenSweep, "Sweep: force weighting", Set< SUID >::emptySet(),
StateSweep::onlyGasAndBoundary)
<< Sweep(emptySweep(), "Empty sweep: force weighting", StateSweep::onlyGasAndBoundary)
<< AfterFunction(Communication_T(blockForest, forceDensityFieldID),
"Communication: after force weighting sweep");
// sweep for
// - reconstruction of PDFs in interface cells
// - streaming of PDFs in interface cells (and liquid cells on the same block)
// - advection of mass
// - update bubble volumes
// - marking interface cells for conversion
const walberla::free_surface::StreamReconstructAdvectSweep<
LatticeModel_T, typename AntidunesBoundaryHandling_T::BoundaryHandling_T, FlagField_T,
typename AntidunesBoundaryHandling_T::FlagInfo_T, ScalarField_T, VectorField_T, true >
streamReconstructAdvectSweep(surfaceTension, antidunesBoundaryHandling->getHandlingID(), fillFieldID, flagFieldID,
pdfFieldID, normalFieldID, curvatureFieldID, flagInfo, bubbleModel.get(),
pdfReconstructionModel, useSimpleMassExchange, cellConversionThreshold,
cellConversionForceThreshold);
// sweep acts only on blocks with at least one interface cell (due to StateSweep::fullFreeSurface)
timeloop.add() << Sweep(streamReconstructAdvectSweep, "Sweep: StreamReconstructAdvect", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: StreamReconstructAdvect")
// do not communicate PDFs here:
// - stream on blocks with "StateSweep::fullFreeSurface" was performed here using post-collision PDFs
// - stream on other blocks is performed below and should also use post-collision PDFs
// => if PDFs were communicated here, the ghost layer of other blocks would have post-stream PDFs
<< AfterFunction(Communication_T(blockForest, fillFieldID, flagFieldID),
"Communication: after StreamReconstructAdvect sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after StreamReconstructAdvect sweep (fill level field)")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< FlagField_T >(blockForest, flagFieldID),
"Second ghost layer update: after StreamReconstructAdvect sweep (flag field)");
auto lbmSweepGenerated = typename LatticeModel_T::Sweep(pdfFieldID);
// temporary class for being able to call the LBM collision with operator()
class CollideSweep
{
public:
CollideSweep(const typename LatticeModel_T::Sweep& sweep) : sweep_(sweep){};
void operator()(IBlock* const block, const uint_t numberOfGhostLayersToInclude = uint_t(0))
{
sweep_.collide(block, numberOfGhostLayersToInclude);
}
private:
typename LatticeModel_T::Sweep sweep_;
};
timeloop.add() << Sweep(CollideSweep(lbmSweepGenerated), "Sweep: collision (generated)", StateSweep::fullFreeSurface)
<< Sweep(lbmSweepGenerated, "Sweep: streamCollide (generated)", StateSweep::onlyLBM)
<< Sweep(emptySweep(), "Empty sweep: streamCollide (generated)")
<< AfterFunction(PdfCommunication_T(blockForest, pdfFieldID),
"Communication: after streamCollide (generated)");
// convert cells
// - according to the flags from StreamReconstructAdvectSweep (interface -> gas/liquid)
// - to ensure a closed layer of interface cells (gas/liquid -> interface)
// - detect and register bubble merges/splits (bubble volumes are already updated in StreamReconstructAdvectSweep)
// - convert cells and initialize PDFs near inflow boundaries
const walberla::free_surface::CellConversionSweep< LatticeModel_T, AntidunesBoundaryHandling_T::BoundaryHandling_T,
ScalarField_T >
cellConvSweep(antidunesBoundaryHandling->getHandlingID(), pdfFieldID, flagInfo, bubbleModel.get());
timeloop.add() << Sweep(cellConvSweep, "Sweep: cell conversion", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: cell conversion")
//<< AfterFunction(PdfCommunication_T(blockForest, pdfFieldID),
//
// "Communication: after cell conversion sweep (PDF field)")
// communicate the flag field also in corner directions
<< AfterFunction(Communication_T(blockForest, flagFieldID),
"Communication: after cell conversion sweep (flag field)")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< FlagField_T >(blockForest, flagFieldID),
"Second ghost layer update: after cell conversion sweep (flag field)");
// reinitialize PDFs, i.e., refill cells that were converted from gas to interface
// - when the flag "convertedFromGasToInterface" has been set (by CellConversionSweep)
// - according to the method specified with pdfRefillingModel_
const walberla::free_surface::EquilibriumRefillingSweep< LatticeModel_T, FlagField_T > equilibriumRefillingSweep(
pdfFieldID, flagFieldID, flagInfo, true);
timeloop.add() << Sweep(equilibriumRefillingSweep, "Sweep: EquilibriumRefilling", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: EquilibriumRefilling")
<< AfterFunction(PdfCommunication_T(blockForest, pdfFieldID),
"Communication: after EquilibriumRefilling sweep");
// distribute excess mass:
// - excess mass: mass that is free after conversion from interface to gas/liquid cells
// - update the bubble model
// IMPORTANT REMARK: this sweep computes the mass via the density, i.e., the PDF field must be up-to-date and the
// PdfRefillingSweep must have been performed
if (excessMassModel.isEvenlyType())
{
const walberla::free_surface::ExcessMassDistributionSweepInterfaceEvenly< LatticeModel_T, FlagField_T,
ScalarField_T, VectorField_T >
distributeMassSweep(excessMassModel, fillFieldID, flagFieldID, pdfFieldID, flagInfo);
timeloop.add()
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: distribute excess mass")
<< AfterFunction(Communication_T(blockForest, fillFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after excess mass distribution sweep (fill level field)")
// update bubble model, i.e., perform registered bubble merges/splits; bubble merges/splits are
// already detected and registered by CellConversionSweep
<< AfterFunction([&bubbleModel]() { bubbleModel->update(); }, "Sweep: bubble model update");
}
else
{
if (excessMassModel.isWeightedType())
{
const walberla::free_surface::ExcessMassDistributionSweepInterfaceWeighted< LatticeModel_T, FlagField_T,
ScalarField_T, VectorField_T >
distributeMassSweep(excessMassModel, fillFieldID, flagFieldID, pdfFieldID, flagInfo, normalFieldID);
timeloop.add()
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: distribute excess mass")
<< AfterFunction(Communication_T(blockForest, fillFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after excess mass distribution sweep (fill level field)")
// update bubble model, i.e., perform registered bubble merges/splits; bubble merges/splits
// are already detected and registered by CellConversionSweep
<< AfterFunction([&]() { bubbleModel->update(); }, "Sweep: bubble model update");
}
else
{
if (excessMassModel.isEvenlyAllInterfaceFallbackLiquidType())
{
const walberla::free_surface::ExcessMassDistributionSweepInterfaceAndLiquid< LatticeModel_T, FlagField_T,
ScalarField_T, VectorField_T >
distributeMassSweep(excessMassModel, fillFieldID, flagFieldID, pdfFieldID, flagInfo, excessMassFieldID);
timeloop.add()
// perform this sweep also on "onlyLBM" blocks because liquid cells also exchange excess mass here
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::fullFreeSurface)
<< Sweep(distributeMassSweep, "Sweep: excess mass distribution", StateSweep::onlyLBM)
<< Sweep(emptySweep(), "Empty sweep: distribute excess mass")
<< AfterFunction(Communication_T(blockForest, fillFieldID, excessMassFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< ScalarField_T >(blockForest, fillFieldID),
"Second ghost layer update: after excess mass distribution sweep (fill level field)")
// update bubble model, i.e., perform registered bubble merges/splits; bubble
// merges/splits are already detected and registered by CellConversionSweep
<< AfterFunction([&]() { bubbleModel->update(); }, "Sweep: bubble model update");
}
}
}
// reset all flags that signal cell conversions (except "keepInterfaceForWettingFlag")
walberla::free_surface::ConversionFlagsResetSweep< FlagField_T > resetConversionFlagsSweep(flagFieldID, flagInfo);
timeloop.add() << Sweep(resetConversionFlagsSweep, "Sweep: conversion flag reset", StateSweep::fullFreeSurface)
<< Sweep(emptySweep(), "Empty sweep: conversion flag reset")
<< AfterFunction(Communication_T(blockForest, flagFieldID),
"Communication: after excess mass distribution sweep")
<< AfterFunction(blockforest::UpdateSecondGhostLayer< FlagField_T >(blockForest, flagFieldID),
"Second ghost layer update: after excess mass distribution sweep (flag field)");
// update block states
timeloop.add() << Sweep(blockStateUpdate, "Sweep: block state update");
// add VTK output
walberla::free_surface::addVTKOutput< LatticeModel_T, AntidunesBoundaryHandling_T, PdfField_T, FlagField_T,
ScalarField_T, VectorField_T >(
blockForest, timeloop, configPtr, flagInfo, pdfFieldID, flagFieldID, fillFieldID, BlockDataID(), curvatureFieldID,
normalFieldID, obstacleNormalFieldID);
// add triangle mesh output of free surface
walberla::free_surface::SurfaceMeshWriter< ScalarField_T, FlagField_T > surfaceMeshWriter(
blockForest, fillFieldID, flagFieldID, walberla::free_surface::flagIDs::liquidInterfaceGasFlagIDs, real_c(0),
configPtr);
surfaceMeshWriter(); // write initial mesh
timeloop.addFuncAfterTimeStep(surfaceMeshWriter, "Writer: surface mesh");
if (vtkSpacingParticles != uint_t(0))
{
// particle field
auto particleFieldVTK =
vtk::createVTKOutput_BlockData(blockForest, "particle_field", vtkSpacingParticles, 0, false, vtkFolder);
auto cellBB_filterParameters = vtkFluidParameters.getOneBlock("CellBB_filter");
const Vector3< uint_t > cellBB_filterMin = cellBB_filterParameters.getParameter< Vector3< uint_t > >("min");
const Vector3< uint_t > cellBB_filterMax = cellBB_filterParameters.getParameter< Vector3< uint_t > >("max");
AABB sliceAABB(real_c(cellBB_filterMin[0]), real_c(cellBB_filterMin[1]), real_c(cellBB_filterMin[2]),
real_c(cellBB_filterMax[0] + uint_t(1)), real_c(cellBB_filterMax[1] + uint_t(1)),
real_c(cellBB_filterMax[2] + uint_t(1)));
particleFieldVTK->addCellInclusionFilter(vtk::AABBCellFilter(sliceAABB));
particleFieldVTK->addCellDataWriter(
make_shared< field::VTKWriter< GhostLayerField< walberla::id_t, 1 > > >(particleFieldID, "particleField"));
particleFieldVTK->setSamplingResolution(vtkFluidParameters.getParameter< real_t >("samplingResolution"));
timeloop.addFuncBeforeTimeStep(vtk::writeFiles(particleFieldVTK), "VTK (particle field data");
}
if (vtkSpacingParticles != uint_t(0))
{
// sphere
auto particleVtkOutput = make_shared< mesa_pd::vtk::ParticleVtkOutput >(particleStorage);
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleUid >("uid");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleLinearVelocity >("velocity");
particleVtkOutput->addOutput< mesa_pd::data::SelectParticleInteractionRadius >("radius");
// limit output to process-local spheres
particleVtkOutput->setParticleSelector([](const mesa_pd::data::ParticleStorage::iterator& pIt) {
using namespace walberla::mesa_pd::data::particle_flags;
return (pIt->getBaseShape()->getShapeType() == mesa_pd::data::Sphere::SHAPE_TYPE) &&
!isSet(pIt->getFlags(), GHOST);
});
auto particleVtkWriter =
vtk::createVTKOutput_PointData(particleVtkOutput, "particles", vtkSpacingParticles, vtkFolder,
std::string("simulation_step"), false, true, true, true, beginTimeStep);
timeloop.addFuncAfterTimeStep(vtk::writeFiles(particleVtkWriter), "VTK (sphere data)");
}
// add logging for computational performance
const lbm::PerformanceLogger< FlagField_T > performanceLogger(
blockForest, flagFieldID, walberla::free_surface::flagIDs::liquidInterfaceFlagIDs, performanceLogFrequency);
timeloop.addFuncAfterTimeStep(performanceLogger, "Evaluator: performance logging");
// LBM stability check
timeloop.addFuncAfterTimeStep(makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >(
walberlaEnv.config(), blockForest, pdfFieldID, flagFieldID,
walberla::free_surface::flagIDs::liquidInterfaceFlagIDs)),
"LBM stability check");
// add sweep for evaluating the fluid's mean velocity
const std::shared_ptr< Vector3< real_t > > meanVelocity = std::make_shared< Vector3< real_t > >(real_c(0));
const real_t velocityAveragingFactor = real_c(1) / (liquidHeight * real_c(domainSize[0]) * real_c(domainSize[1]));
MeanVelocityComputer< AntidunesBoundaryHandling_T, PdfField_T, FlagField_T > meanVelocityComputer(
blockForest, antidunesBoundaryHandling, pdfFieldID, meanVelocity, velocityAveragingFactor);
// PID Controller
shared_ptr< ForcingAdjuster > forcingAdjuster =
make_shared< ForcingAdjuster >(blockForest, targetMeanVelocityMagnitude, (*acceleration)[0], proportionalGain,
derivativeGain, integralGain, maxRamp, minActuatingVariable, maxActuatingVariable);
if (loadSnapshot) { forcingAdjuster->loadPIDSnapshot(snapshotBaseFolder + "/" + "pidState.file"); }
WcTimingPool timingPool;
// this is carried out after the particle integration, it corrects the flag field and restores missing PDF
// information then, the checkpointing file can be written, as otherwise some cells are invalid and can not be
// recovered
SweepTimeloop timeloopAfterParticles(blockForest, timesteps);
timeloopAfterParticles.setCurrentTimeStep(beginTimeStep);
// sweep for updating the particle mapping into the LBM simulation
bool strictlyConserveMomentum = false;
timeloopAfterParticles.add() << Sweep(
lbm_mesapd_coupling::makeMovingParticleMapping< PdfField_T, AntidunesBoundaryHandling_T::BoundaryHandling_T >(
blockForest, pdfFieldID, antidunesBoundaryHandling->getHandlingID(), particleFieldID, particleAccessor,
AntidunesBoundaryHandling_T::movingObstacleFlagID, FormerMO_Flag,
lbm_mesapd_coupling::RegularParticlesSelector(), strictlyConserveMomentum),
"Particle Mapping");
// sweep for restoring PDFs in cells previously occupied by particles
bool reconstruction_recomputeTargetDensity = false;
bool reconstruction_useCentralDifferences = true;
auto gradReconstructor =
lbm_mesapd_coupling::makeGradsMomentApproximationReconstructor< AntidunesBoundaryHandling_T::BoundaryHandling_T >(
blockForest, antidunesBoundaryHandling->getHandlingID(), relaxationRate, reconstruction_recomputeTargetDensity,
reconstruction_useCentralDifferences);
timeloopAfterParticles.add()
<< Sweep(makeSharedSweep(
lbm_mesapd_coupling::makePdfReconstructionManager< PdfField_T,
AntidunesBoundaryHandling_T::BoundaryHandling_T >(
blockForest, pdfFieldID, antidunesBoundaryHandling->getHandlingID(), particleFieldID,
particleAccessor, FormerMO_Flag, walberla::free_surface::flagIDs::liquidFlagID, gradReconstructor,
strictlyConserveMomentum)),
"PDF Restore")
<< AfterFunction(Communication_T(blockForest, flagFieldID, particleFieldID),
"Communication: after PDF reconstruction sweep") // unsure if necessary but added for consistency
<< AfterFunction(pdfCommunication, "PDF Communication");
real_t timeStepSizeParticles = real_c(1) / real_c(particleNumSubCycles);
mesa_pd::kernel::VelocityVerletPreForceUpdate vvIntegratorPreForce(timeStepSizeParticles);
mesa_pd::kernel::VelocityVerletPostForceUpdate vvIntegratorPostForce(timeStepSizeParticles);
mesa_pd::kernel::LinearSpringDashpot collisionResponse(1);
collisionResponse.setFrictionCoefficientDynamic(size_t(0), size_t(0), particleFrictionCoefficient);
mesa_pd::mpi::ReduceProperty reduceProperty;
mesa_pd::mpi::ReduceContactHistory reduceAndSwapContactHistory;
lbm_mesapd_coupling::ResetHydrodynamicForceTorqueKernel resetHydrodynamicForceTorque;
lbm_mesapd_coupling::AverageHydrodynamicForceTorqueKernel averageHydrodynamicForceTorque;
real_t particleCollisionTime = particleCollisionTimeNonDim * avgParticleDiameter;
lbm_mesapd_coupling::LubricationCorrectionKernel lubricationCorrectionKernel(
viscosity, [](real_t r) { return (real_c(0.001 + real_c(0.00007) * r)) * r; });
WALBERLA_LOG_INFO_ON_ROOT("Will use particle time step size of "
<< timeStepSizeParticles << " and collision time of " << particleCollisionTime);
AverageDataSliceEvaluator< PdfField_T, AntidunesBoundaryHandling_T, FlagField_T, ScalarField_T >
averageDataSliceEvaluator(blockForest, flagFieldID, fillFieldID, pdfFieldID);
std::shared_ptr< real_t > totalFluidMass = std::make_shared< real_t >(real_c(0));
walberla::free_surface::TotalMassComputer< AntidunesBoundaryHandling_T, PdfField_T, FlagField_T, ScalarField_T >
totalFluidMassEvaluator(blockForest, antidunesBoundaryHandling, pdfFieldID, fillFieldID, evaluationFrequency,
totalFluidMass);
BedloadTransportEvaluator< ParticleAccessor_T > bedloadTransportEvaluator(
particleAccessor, real_c(1) / real_c(domainSize[0] * domainSize[1]), numParticles);
auto bedLoadTransportFileName = baseFolderName + "/bedload.txt";
WALBERLA_LOG_INFO_ON_ROOT("Writing bedload info to file " << bedLoadTransportFileName);
auto fluidInfoFileName = baseFolderName + "/fluidInfo.txt";
WALBERLA_LOG_INFO_ON_ROOT("Writing fluid info to file " << fluidInfoFileName);
// write info file
WALBERLA_ROOT_SECTION()
{
std::ofstream evalInfoFile(baseFolderName + "/info.txt");
evalInfoFile << evaluationFrequency << "\n";
evalInfoFile << gravity << "\n";
evalInfoFile << viscosity << "\n";
evalInfoFile << particleDensityRatio << "\n";
evalInfoFile << avgParticleDiameter << "\n";
evalInfoFile << domainSize[0] << "\n";
evalInfoFile << domainSize[1] << "\n";
evalInfoFile << domainSize[2] << "\n";
evalInfoFile << numParticles << "\n";
evalInfoFile << dx_SI << "\n";
evalInfoFile << dt_SI << "\n";
evalInfoFile.close();
}
Vector3< real_t > totalHydrodynamicForceOnParticles(real_c(0)); // only root will have valid values
for (uint_t t = beginTimeStep; t != timesteps; ++t)
{
timeloop.singleStep(timingPool, true);
timingPool["Mesa_pd"].start();
reduceProperty.operator()< mesa_pd::HydrodynamicForceTorqueNotification >(*particleStorage);
if (t == 0)
{
lbm_mesapd_coupling::InitializeHydrodynamicForceTorqueForAveragingKernel
initializeHydrodynamicForceTorqueForAveragingKernel;
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
initializeHydrodynamicForceTorqueForAveragingKernel, *particleAccessor);
}
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
averageHydrodynamicForceTorque, *particleAccessor);
for (auto subCycle = uint_t(0); subCycle < particleNumSubCycles; ++subCycle)
{
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
vvIntegratorPreForce, *particleAccessor);
syncCall();
if (useLubricationCorrection)
{
// lubrication correction
particleStorage->forEachParticlePairHalf(
useOpenMP, mesa_pd::kernel::ExcludeInfiniteInfinite(), *particleAccessor,
[&lubricationCorrectionKernel, &mesapdDomain](const size_t idx1, const size_t idx2, auto& ac) {
mesa_pd::collision_detection::AnalyticContactDetection acd;
acd.getContactThreshold() = lubricationCorrectionKernel.getNormalCutOffDistance();
mesa_pd::kernel::DoubleCast double_cast;
mesa_pd::mpi::ContactFilter contact_filter;
if (double_cast(idx1, idx2, ac, acd, ac))
{
if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *mesapdDomain))
{
double_cast(acd.getIdx1(), acd.getIdx2(), ac, lubricationCorrectionKernel, ac,
acd.getContactNormal(), acd.getPenetrationDepth());
}
}
},
*particleAccessor);
}
// collision response
particleStorage->forEachParticlePairHalf(
useOpenMP, mesa_pd::kernel::ExcludeInfiniteInfinite(), *particleAccessor,
[&collisionResponse, &mesapdDomain, timeStepSizeParticles, particleRestitutionCoefficient,
particleCollisionTime, particleKappa](const size_t idx1, const size_t idx2, auto& ac) {
mesa_pd::collision_detection::AnalyticContactDetection acd;
mesa_pd::kernel::DoubleCast double_cast;
mesa_pd::mpi::ContactFilter contact_filter;
if (double_cast(idx1, idx2, ac, acd, ac))
{
if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *mesapdDomain))
{
auto meff = real_c(1) / (ac.getInvMass(idx1) + ac.getInvMass(idx2));
collisionResponse.setStiffnessAndDamping(0, 0, particleRestitutionCoefficient,
particleCollisionTime, particleKappa, meff);
collisionResponse(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(),
acd.getPenetrationDepth(), timeStepSizeParticles);
}
}
},
*particleAccessor);
reduceAndSwapContactHistory(*particleStorage);
// add hydrodynamic force
lbm_mesapd_coupling::AddHydrodynamicInteractionKernel addHydrodynamicInteraction;
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
addHydrodynamicInteraction, *particleAccessor);
// add external forces
particleStorage->forEachParticle(
useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
[particleDensityRatio, acceleration](const size_t idx, auto& ac) {
mesa_pd::addForceAtomic(idx, ac,
ac.getVolume(idx) *
Vector3< real_t >((*acceleration)[0], (*acceleration)[1],
(particleDensityRatio - real_c(1)) * (*acceleration)[2]));
},
*particleAccessor);
reduceProperty.operator()< mesa_pd::ForceTorqueNotification >(*particleStorage);
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectLocal(), *particleAccessor,
vvIntegratorPostForce, *particleAccessor);
syncCall();
}
// has to be evaluated here before the force info is erased from particles
if (t % evaluationFrequency == uint_c(0))
totalHydrodynamicForceOnParticles = getTotalHydrodynamicForceOnParticles(particleAccessor);
particleStorage->forEachParticle(useOpenMP, mesa_pd::kernel::SelectAll(), *particleAccessor,
resetHydrodynamicForceTorque, *particleAccessor);
timingPool["Mesa_pd"].end();
// update particle mapping
timeloopAfterParticles.singleStep(timingPool, true);
timingPool["Evaluation"].start();
if (t % evaluationFrequency == uint_c(0))
{
averageDataSliceEvaluator();
totalFluidMassEvaluator.computeMass(blockForest, antidunesBoundaryHandling);
bedloadTransportEvaluator();
meanVelocityComputer();
WALBERLA_ROOT_SECTION()
{
write2DVectorToFile(averageDataSliceEvaluator.getSolidVolumeFractionVector(),
averageDataSliceEvaluator.getXLen(), averageDataSliceEvaluator.getZLen(),
baseFolderName + "/svfSlice_" + std::to_string(t) + ".txt");
write2DVectorToFile(averageDataSliceEvaluator.getFillLevelVector(), averageDataSliceEvaluator.getXLen(),
averageDataSliceEvaluator.getZLen(),
baseFolderName + "/fillSlice_" + std::to_string(t) + ".txt");
write2DVectorToFile(averageDataSliceEvaluator.getVelocityXVector(), averageDataSliceEvaluator.getXLen(),
averageDataSliceEvaluator.getZLen(),
baseFolderName + "/velXSlice_" + std::to_string(t) + ".txt");
std::ofstream bedloadFile(bedLoadTransportFileName, std::ofstream::app);
bedloadFile << t << " " << bedloadTransportEvaluator.getTransportRate() << " "
<< bedloadTransportEvaluator.getAverageVelocity() << " " << totalHydrodynamicForceOnParticles[0]
<< " " << totalHydrodynamicForceOnParticles[1] << " " << totalHydrodynamicForceOnParticles[2]
<< "\n";
bedloadFile.close();
WALBERLA_LOG_DEVEL("____________________________________________________________________");
WALBERLA_LOG_DEVEL("time step = " << t);
const real_t froudeNumber =
(*meanVelocity)[0] / real_c(std::sqrt(liquidHeight * std::abs((*acceleration)[2])));
const real_t reynoldsNumber = (*meanVelocity)[0] * liquidHeight / viscosity;
const real_t weberNumber =
real_c(1.0) * (*meanVelocity)[0] * (*meanVelocity)[0] * liquidHeight / surfaceTension;
WALBERLA_LOG_DEVEL(" - Total fluid mass = " << std::setprecision(16) << (*totalFluidMass));
auto maxFluidZPos = averageDataSliceEvaluator.getMaxFluidZPos();
WALBERLA_LOG_DEVEL(" - Max fluid z-position = " << maxFluidZPos);
WALBERLA_LOG_DEVEL(" - Froude number = " << froudeNumber);
WALBERLA_LOG_DEVEL(" - Reynolds number = " << reynoldsNumber);
WALBERLA_LOG_DEVEL(" - We = " << weberNumber);
WALBERLA_LOG_DEVEL(" - meanVelocity = " << *meanVelocity);
std::ofstream fluidInfoFile(fluidInfoFileName, std::ofstream::app);
fluidInfoFile << t << " " << (*acceleration)[0] << " " << (*meanVelocity)[0] << " " << maxFluidZPos << " "
<< std::setprecision(16) << (*totalFluidMass) << "\n";
fluidInfoFile.close();
if (std::isnan(reynoldsNumber)) WALBERLA_ABORT("reynoldsNumber is inf!")
}
WALBERLA_LOG_DEVEL_ON_ROOT(
" -> CurrentExternalAcceleration in x-direction before update = " << (*acceleration)[0]);
(*forcingAdjuster)(meanVelocity->length());
(*acceleration)[0] = forcingAdjuster->getExternalForcing();
WALBERLA_LOG_DEVEL_ON_ROOT(
" -> CurrentExternalAcceleration in x-direction after update = " << (*acceleration)[0]);
}
timingPool["Evaluation"].end();
if (storeSnapshot)
{
if (t % snapshotFrequency == uint_c(0) && t > uint_c(0))
{
WALBERLA_LOG_INFO_ON_ROOT("Writing checkpointing file in time step " << t)
blockForest->saveBlockData(snapshotBaseFolder + std::string("/tmp_") += pdfFieldFile, pdfFieldID);
blockForest->saveBlockData(snapshotBaseFolder + "/tmp_" += fillFieldFile, fillFieldID);
blockForest->saveBlockData(snapshotBaseFolder + "/tmp_" += excessMassFieldFile, excessMassFieldID);
blockForest->saveBlockData(snapshotBaseFolder + "/tmp_" += particleStorageFile, particleStorageID);
WALBERLA_ROOT_SECTION()
{
std::string tmpCheckpointConfigFile = snapshotBaseFolder + "/tmp_" += checkpointConfigFile;
std::ofstream file;
file.open(tmpCheckpointConfigFile.c_str());
file << std::setprecision(16);
file << t + 1 << "\n";
file << (*acceleration)[0] << "\n";
file.close();
}
forcingAdjuster->storePIDSnapshot(snapshotBaseFolder + "/" + "pidState.file");
WALBERLA_MPI_BARRIER();
// rename checkpoint files to "real" ones
// otherwise, the checkpointed state might be incomplete if the simulation stops due to over time during
// checkpointing
WALBERLA_ROOT_SECTION()
{
renameFile(snapshotBaseFolder + "/tmp_" += pdfFieldFile, snapshotBaseFolder + "/" += pdfFieldFile);
renameFile(snapshotBaseFolder + "/tmp_" += fillFieldFile, snapshotBaseFolder + "/" += fillFieldFile);
renameFile(snapshotBaseFolder + "/tmp_" += excessMassFieldFile,
snapshotBaseFolder + "/" += excessMassFieldFile);
renameFile(snapshotBaseFolder + "/tmp_" += particleStorageFile,
snapshotBaseFolder + "/" += particleStorageFile);
renameFile(snapshotBaseFolder + "/tmp_" += checkpointConfigFile,
snapshotBaseFolder + "/" += checkpointConfigFile);
}
}
}
if (t % performanceLogFrequency == uint_c(0) && t > uint_c(0)) { timingPool.logResultOnRoot(); }
}
return EXIT_SUCCESS;
}
} // namespace antidunes
} // namespace walberla
int main(int argc, char** argv) { return walberla::antidunes::main(argc, argv); }
BlockForestParameters
{
cellsPerBlock < 50, 20, 40 >;
periodicity < 1, 1, 0 >;
loadSnapshot false;
storeSnapshot true;
snapshotFrequency 10000;
snapshotBaseFolder snapshots;
}
DomainParameters
{
domainSize <3200, 60, 160>;
wavePeriods 1; // never set to 0 -> division by zero, even if you initialize a flat particle bed
liquidHeightFactor 2.862; // h_0 / d (water height / avg particle diameter) -> from experiment [E1=2.862, E2=3.1724, E3=3.27586, E4=3.5862]
floorHeightFactor 4.1393; // from domain bottom to sine's average
initialAmplitude 0; // defined from minimum peak to maximum peak as by Pascal et al. (2021)
}
PIDParameters
{
targetMeanVelocityMagnitude 0.02;
proportionalGain 2e-4;
derivativeGain 1e-6;
integralGain 2e-4;
maxRamp 1e-4;
minActuatingVariable 0;
maxActuatingVariable 1e-3;
}
PhysicsParameters
{
enableWetting false;
timesteps 2000000;
Re 3100; // [E1=3100, E2=3772, E3=4180, E4=4800]
Fr 1.31; // [E1=1.31, E2=1.38, E3=1.44, E4=1.45]
We 15.6188; // [E1=15.6188, E2=21.48, E3=25.54, E4=30.2493]
}
ParticleParameters
{
inFileName spheres_out.dat;
bedCopiesInX 1;
bedCopiesInY 1;
densityRatio 2.55;
fixingHeightFactor 1.5; // proportional to the mean particle diameter
frictionCoefficient 0.5;
restitutionCoefficient 0.97;
numSubCycles 10;
useLubricationCorrection true;
useNoSlipParticles false;
}
ModelParameters
{
pdfReconstructionModel OnlyMissing;
pdfRefillingModel EquilibriumRefilling;
excessMassDistributionModel EvenlyNewInterfaceFallbackLiquid;
curvatureModel FiniteDifferenceMethod;
useSimpleMassExchange false;
cellConversionThreshold 1e-2;
cellConversionForceThreshold 1e-1;
}
EvaluationParameters
{
performanceLogFrequency 10000;
evaluationFrequency 1000;
baseFolderName eval;
}
StabilityChecker
{
checkFrequency 0;
streamOutput false;
vtkOutput true;
}
BoundaryParameters
{
// X
//Border { direction W; walldistance -1; NoSlip{} }
//Border { direction E; walldistance -1; NoSlip{} }
// Y
//Border { direction N; walldistance -1; NoSlip{} }
//Border { direction S; walldistance -1; NoSlip{} }
// Z
Border { direction T; walldistance -1; NoSlip{} }
Border { direction B; walldistance -1; NoSlip{} }
}
MeshOutputParameters
{
writeFrequency 0;
baseFolder mesh-out;
}
VTK
{
fluid_field
{
writeFrequency 1000;
ghostLayers 0;
baseFolder vtk-out;
samplingResolution 1;
writers
{
fill_level;
mapped_flag;
velocity;
density;
//curvature;
//normal;
//obstacle_normal;
//pdf;
//flag;
//force;
}
CellBB_filter {
min < 0, 29, 0 >;
max < 3200, 30, 160 >;
}
inclusion_filters
{
CellBB_filter;
//liquidInterfaceFilter; // only include liquid and interface cells in VTK output
}
before_functions
{
//ghost_layer_synchronization; // only needed if writing the ghost layer
gas_cell_zero_setter; // sets velocity=0 and density=1 all gas cells before writing VTK output
}
}
domain_decomposition
{
writeFrequency 10000000000;
baseFolder vtk-out;
outputDomainDecomposition true;
}
}
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file Boundary.h
//! \ingroup free_surface
//! \author Martin Bauer
//! \author Christoph Schwarzmeier <christoph.schwarzmeier@fau.de>
//! \brief Boundary handling for the free surface LBM module.
//
//======================================================================================================================
#pragma once
#include "blockforest/StructuredBlockForest.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "boundary/BoundaryHandling.h"
#include "field/GhostLayerField.h"
#include "geometry/initializer/InitializationManager.h"
#include "geometry/initializer/OverlapFieldFromBody.h"
#include "lbm/boundary/all.h"
#include "lbm/field/PdfField.h"
#include "lbm/free_surface/FlagInfo.h"
#include "lbm/free_surface/InitFunctions.h"
#include "lbm/free_surface/InterfaceFromFillLevel.h"
#include "lbm/free_surface/boundary/SimplePressureWithFreeSurface.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/boundary/CurvedLinear.h"
#include "lbm_mesapd_coupling/momentum_exchange_method/boundary/SimpleBB.h"
namespace walberla
{
namespace antidunes
{
namespace free_surface
{
/***********************************************************************************************************************
* Boundary handling for the free surface LBM extension.
**********************************************************************************************************************/
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
class AntidunesBoundaryHandling
{
public:
using flag_t = typename FlagField_T::value_type;
using Stencil_T = typename LatticeModel_T::Stencil;
using CommunicationStencil_T =
std::conditional_t< LatticeModel_T::Stencil::D == uint_t(2), stencil::D2Q9, stencil::D3Q27 >;
using PdfField_T = lbm::PdfField< LatticeModel_T >;
// boundary
using NoSlip_T = lbm::NoSlip< LatticeModel_T, flag_t >;
using FreeSlip_T = lbm::FreeSlip< LatticeModel_T, FlagField_T >;
using UBB_T = lbm::UBB< LatticeModel_T, flag_t >;
using Pressure_T = walberla::free_surface::SimplePressureWithFreeSurface< LatticeModel_T, FlagField_T >;
using Outlet_T = lbm::Outlet< LatticeModel_T, FlagField_T, 4, 3 >;
using UBB_Inflow_T =
lbm::UBB< LatticeModel_T, flag_t >; // creates interface cells in the direction of the prescribed velocity, i.e.,
// represents an inflow boundary condition
using MovingObstacle_T = lbm_mesapd_coupling::SimpleBB< LatticeModel_T, FlagField_T, ParticleAccessor_T >;
// handling type
using BoundaryHandling_T =
BoundaryHandling< FlagField_T, Stencil_T, NoSlip_T, UBB_T, UBB_Inflow_T, Pressure_T, Pressure_T, Outlet_T,
FreeSlip_T,
MovingObstacle_T >; // 2 pressure boundaries with different densities, e.g., inflow-outflow
using FlagInfo_T = walberla::free_surface::FlagInfo< FlagField_T >;
// constructor
AntidunesBoundaryHandling(const std::shared_ptr< StructuredBlockForest >& blockForest, BlockDataID pdfFieldID,
BlockDataID fillLevelID, BlockDataID particleFieldID,
const shared_ptr< ParticleAccessor_T >& ac,
std::function< real_t(const Vector3< real_t >&) > hydrostaticDensityFct);
// initialize fluid field from config file using (quotes indicate the string to be used in the file):
// - "CellInterval" (see src/geometry/initializer/BoundaryFromCellInterval.h)
// - "Border" (see src/geometry/initializer/BoundaryFromDomainBorder.h)
// - "Image" (see src/geometry/initializer/BoundaryFromImage.h)
// - "Body" (see src/geometry/initializer/OverlapFieldFromBody.h)
inline void initFromConfig(const Config::BlockHandle& block);
// initialize free surface object from geometric body (see src/geometry/initializer/OverlapFieldFromBody.h)
template< typename Body_T >
inline void addFreeSurfaceObject(const Body_T& body, bool addOrSubtract = false);
// clear and initialize flags in every cell according to the fill level and initialize fill level in boundaries (with
// value 1) and obstacles such that the bubble model does not detect obstacles as gas cells
void initFlagsFromFillLevel();
inline void setNoSlipAtBorder(stencil::Direction d, cell_idx_t wallDistance = cell_idx_c(0));
inline void setNoSlipAtAllBorders(cell_idx_t wallDistance = cell_idx_c(0));
void setNoSlipInCell(const Cell& globalCell);
inline void setFreeSlipAtBorder(stencil::Direction d, cell_idx_t wallDistance = cell_idx_c(0));
inline void setFreeSlipAtAllBorders(cell_idx_t wallDistance = cell_idx_c(0));
void setFreeSlipInCell(const Cell& globalCell);
void setUBBInCell(const Cell& globalCell, const Vector3< real_t >& velocity);
// UBB that generates interface cells to resemble an inflow boundary
void setInflowInCell(const Cell& globalCell, const Vector3< real_t >& velocity);
inline void setPressure(real_t density);
void setPressureOutflow(real_t density);
void setBodyForce(const Vector3< real_t >& bodyForce);
void enableBubbleOutflow(BubbleModelBase* bubbleModel);
// checks if an obstacle cell is located in an outermost ghost layer (corners are explicitly ignored, as they do not
// influence periodic communication)
Vector3< bool > isObstacleInGlobalGhostLayer();
// flag management
const walberla::free_surface::FlagInfo< FlagField_T >& getFlagInfo() const { return flagInfo_; }
// flag IDs
static const field::FlagUID noSlipFlagID;
static const field::FlagUID ubbFlagID;
static const field::FlagUID ubbInflowFlagID;
static const field::FlagUID pressureFlagID;
static const field::FlagUID pressureOutflowFlagID;
static const field::FlagUID outletFlagID;
static const field::FlagUID freeSlipFlagID;
static const field::FlagUID movingObstacleFlagID;
// boundary IDs
static const BoundaryUID noSlipBoundaryID;
static const BoundaryUID ubbBoundaryID;
static const BoundaryUID ubbInflowBoundaryID;
static const BoundaryUID pressureBoundaryID;
static const BoundaryUID pressureOutflowBoundaryID;
static const BoundaryUID outletBoundaryID;
static const BoundaryUID freeSlipBoundaryID;
static const BoundaryUID movingObstacleBoundaryID;
inline BlockDataID getHandlingID() const { return handlingID_; }
inline BlockDataID getPdfFieldID() const { return pdfFieldID_; }
inline BlockDataID getFillFieldID() const { return fillFieldID_; }
inline BlockDataID getFlagFieldID() const { return flagFieldID_; }
inline BlockDataID getParticleFieldID() const { return particleFieldID_; }
inline std::shared_ptr< StructuredBlockForest > getBlockForest() const { return blockForest_; };
inline shared_ptr< ParticleAccessor_T > getParticleAccessor() const { return ac_; }
inline std::function< real_t(const Vector3< real_t >&) > getHydrostaticDensityFct() const
{
return hydrostaticDensityFct_;
}
// executes standard waLBerla boundary handling
class ExecuteBoundaryHandling
{
public:
ExecuteBoundaryHandling(const BlockDataID& collection) : handlingID_(collection) {}
void operator()(IBlock* const block) const
{
BoundaryHandling_T* const handling = block->getData< BoundaryHandling_T >(handlingID_);
// reset "near boundary" flags
handling->refresh();
(*handling)();
}
protected:
BlockDataID handlingID_;
}; // class ExecuteBoundaryHandling
ExecuteBoundaryHandling getBoundarySweep() const { return ExecuteBoundaryHandling(getHandlingID()); }
private:
walberla::free_surface::FlagInfo< FlagField_T > flagInfo_;
// register standard waLBerla initializers
geometry::initializer::InitializationManager getInitManager();
std::shared_ptr< StructuredBlockForest > blockForest_;
BlockDataID flagFieldID_;
BlockDataID pdfFieldID_;
BlockDataID fillFieldID_;
BlockDataID particleFieldID_;
shared_ptr< ParticleAccessor_T > ac_;
std::function< real_t(const Vector3< real_t >&) > hydrostaticDensityFct_;
BlockDataID handlingID_;
blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > comm_;
}; // class AntidunesBoundaryHandling
} // namespace free_surface
} // namespace antidunes
} // namespace walberla
#include "AntidunesBoundaryHandling.impl.h"
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file AntidunesBoundaryHandling.impl.h
//! \ingroup free_surface
//! \author Martin Bauer
//! \author Christoph Schwarzmeier <christoph.schwarzmeier@fau.de>
//! \brief Boundary handling for the free surface LBM module.
//
//======================================================================================================================
#pragma once
#include "field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/communication/PackInfo.h"
#include "geometry/initializer/BoundaryFromCellInterval.h"
#include "geometry/initializer/BoundaryFromDomainBorder.h"
#include "geometry/initializer/BoundaryFromImage.h"
#include "geometry/structured/GrayScaleImage.h"
#include "lbm/free_surface/FlagInfo.h"
#include "lbm/free_surface/InterfaceFromFillLevel.h"
#include "lbm/lattice_model/CollisionModel.h"
#include "AntidunesBoundaryHandling.h"
namespace walberla
{
namespace antidunes
{
namespace free_surface
{
namespace internal
{
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
class BoundaryBlockDataHandling
: public domain_decomposition::BlockDataHandling< typename AntidunesBoundaryHandling<
LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::BoundaryHandling_T >
{
public:
using BoundaryHandling_T =
typename AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T,
ParticleAccessor_T >::BoundaryHandling_T; // handling as defined in
// AntidunesBoundaryHandling.h
BoundaryBlockDataHandling(
const AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >* boundary)
: boundary_(boundary)
{}
// initialize standard waLBerla boundary handling
BoundaryHandling_T* initialize(IBlock* const block)
{
using B = AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >;
using flag_t = typename B::flag_t;
// get fields
FlagField_T* const flagField = block->getData< FlagField_T >(boundary_->getFlagFieldID());
typename B::PdfField_T* const pdfField = block->getData< typename B::PdfField_T >(boundary_->getPdfFieldID());
lbm_mesapd_coupling::ParticleField_T* const particleField =
block->getData< lbm_mesapd_coupling::ParticleField_T >(boundary_->getParticleFieldID());
auto interfaceFlag = flag_t(flagField->getFlag(walberla::free_surface::flagIDs::interfaceFlagID));
auto liquidFlag = flag_t(flagField->getFlag(walberla::free_surface::flagIDs::liquidFlagID));
// domainMask is used to identify liquid and interface cells
auto domainMask = flag_t(liquidFlag | interfaceFlag);
WALBERLA_ASSERT(domainMask != 0);
// initialize boundary conditions
typename B::UBB_T ubb(B::ubbBoundaryID, B::ubbFlagID, pdfField, flagField);
typename B::UBB_Inflow_T ubbInflow(B::ubbInflowBoundaryID, B::ubbInflowFlagID, pdfField, flagField);
typename B::NoSlip_T noslip(B::noSlipBoundaryID, B::noSlipFlagID, pdfField);
typename B::Pressure_T pressure(B::pressureBoundaryID, B::pressureFlagID, block, pdfField, flagField,
interfaceFlag, real_c(1.0));
typename B::Pressure_T pressureOutflow(B::pressureOutflowBoundaryID, B::pressureOutflowFlagID, block, pdfField,
flagField, interfaceFlag, real_c(1.0));
typename B::Outlet_T outlet(B::outletBoundaryID, B::outletFlagID, pdfField, flagField, domainMask);
typename B::FreeSlip_T freeSlip(B::freeSlipBoundaryID, B::freeSlipFlagID, pdfField, flagField, domainMask);
typename B::MovingObstacle_T curvedLinear(B::movingObstacleBoundaryID, B::movingObstacleFlagID, pdfField,
flagField, particleField, boundary_->getParticleAccessor(), domainMask,
*boundary_->getBlockForest(), *block,
boundary_->getHydrostaticDensityFct());
return new BoundaryHandling_T("Boundary Handling", flagField, domainMask, noslip, ubb, ubbInflow, pressure,
pressureOutflow, outlet, freeSlip, curvedLinear);
}
void serialize(IBlock* const block, const BlockDataID& id, mpi::SendBuffer& buffer)
{
BoundaryHandling_T* const boundaryHandlingPtr = block->getData< BoundaryHandling_T >(id);
CellInterval everyCell = boundaryHandlingPtr->getFlagField()->xyzSizeWithGhostLayer();
boundaryHandlingPtr->pack(buffer, everyCell, true);
}
BoundaryHandling_T* deserialize(IBlock* const block) { return initialize(block); }
void deserialize(IBlock* const block, const BlockDataID& id, mpi::RecvBuffer& buffer)
{
BoundaryHandling_T* const boundaryHandlingPtr = block->getData< BoundaryHandling_T >(id);
CellInterval everyCell = boundaryHandlingPtr->getFlagField()->xyzSizeWithGhostLayer();
boundaryHandlingPtr->unpack(buffer, everyCell, true);
}
private:
const AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >* boundary_;
}; // class BoundaryBlockDataHandling
// helper function wrapper for adding the flag field to the block storage; since the input parameter for an
// initialization function in field::addFlagFieldToStorage() is a std::function<void(FlagField_T*,IBlock* const)>, we
// need a function wrapper that has both these input parameters; as FlagInfo< FlagField_T >::registerFlags() does not
// have both of these input parameters, a function wrapper with both input parameters is created and the second input
// parameter is simply ignored inside the function wrapper
template< typename FlagField_T >
void flagFieldInitFunction(FlagField_T* flagField, IBlock* const, const Set< field::FlagUID >& obstacleIDs,
const Set< field::FlagUID >& outflowIDs, const Set< field::FlagUID >& inflowIDs,
const Set< field::FlagUID >& freeSlipIDs)
{
// register flags in the flag field
walberla::free_surface::FlagInfo< FlagField_T >::registerFlags(flagField, obstacleIDs, outflowIDs, inflowIDs,
freeSlipIDs);
}
} // namespace internal
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::AntidunesBoundaryHandling(
const std::shared_ptr< StructuredBlockForest >& blockForest, BlockDataID pdfFieldID, BlockDataID fillLevelID,
BlockDataID particleFieldID, const shared_ptr< ParticleAccessor_T >& ac,
std::function< real_t(const Vector3< real_t >&) > hydrostaticDensityFct)
: blockForest_(blockForest), pdfFieldID_(pdfFieldID), fillFieldID_(fillLevelID), particleFieldID_(particleFieldID),
ac_(ac), hydrostaticDensityFct_(std::move(hydrostaticDensityFct)), comm_(blockForest)
{
// initialize obstacleIDs
Set< FlagUID > obstacleIDs;
obstacleIDs += noSlipFlagID;
obstacleIDs += ubbFlagID;
obstacleIDs += ubbInflowFlagID;
obstacleIDs += pressureFlagID;
obstacleIDs += pressureOutflowFlagID;
obstacleIDs += freeSlipFlagID;
obstacleIDs += outletFlagID;
obstacleIDs += movingObstacleFlagID;
// initialize outflowIDs
Set< FlagUID > outflowIDs;
outflowIDs += pressureOutflowFlagID;
outflowIDs += outletFlagID;
// initialize outflowIDs
Set< FlagUID > inflowIDs;
inflowIDs += ubbInflowFlagID;
// initialize freeSlipIDs
Set< FlagUID > freeSlipIDs;
freeSlipIDs += freeSlipFlagID;
// create callable function wrapper with input arguments 1 and 2 unset, whereas arguments 3, 4 and 5 are set to be
// obstacleIDs, outflowIDs, and inflowIDs, respectively; this is necessary for field::addFlagFieldToStorage()
auto ffInitFunc = std::bind(internal::flagFieldInitFunction< FlagField_T >, std::placeholders::_1,
std::placeholders::_2, obstacleIDs, outflowIDs, inflowIDs, freeSlipIDs);
// IMPORTANT REMARK: The flag field needs two ghost layers because of function advectMass(). There, the flags of all
// D3Q* neighbors are determined for each cell, including cells in the first ghost layer. Therefore, all D3Q*
// neighbors of the first ghost layer must be accessible. This requires a second ghost layer.
flagFieldID_ = field::addFlagFieldToStorage< FlagField_T >(blockForest, "Flags", uint_c(2), true, ffInitFunc);
// create FlagInfo
flagInfo_ = walberla::free_surface::FlagInfo< FlagField_T >(obstacleIDs, outflowIDs, inflowIDs, freeSlipIDs);
WALBERLA_ASSERT(flagInfo_.isConsistentAcrossBlocksAndProcesses(blockForest, flagFieldID_));
// add boundary handling to blockForest
handlingID_ = blockForest_->addBlockData(
std::make_shared<
internal::BoundaryBlockDataHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T > >(this),
"Boundary Handling");
// create communication object with fill level field, since fill levels determine the flags during the simulation
comm_.addPackInfo(std::make_shared< field::communication::PackInfo< ScalarField_T > >(fillFieldID_));
}
// define IDs (static const variables)
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::noSlipFlagID =
field::FlagUID("NoSlip");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::ubbFlagID =
field::FlagUID("UBB");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::ubbInflowFlagID =
field::FlagUID("UBB_Inflow");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::pressureFlagID =
field::FlagUID("Pressure");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::pressureOutflowFlagID =
field::FlagUID("PressureOutflow");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::outletFlagID =
field::FlagUID("Outlet");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::freeSlipFlagID =
field::FlagUID("FreeSlip");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const field::FlagUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::movingObstacleFlagID =
field::FlagUID("MovingObstacle");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::noSlipBoundaryID =
BoundaryUID("NoSlip");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::ubbBoundaryID =
BoundaryUID("UBB");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::ubbInflowBoundaryID =
BoundaryUID("UBB_Inflow");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::pressureBoundaryID =
BoundaryUID("Pressure");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T,
ParticleAccessor_T >::pressureOutflowBoundaryID =
BoundaryUID("PressureOutflow");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::outletBoundaryID =
BoundaryUID("Outlet");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::freeSlipBoundaryID =
BoundaryUID("FreeSlip");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
const BoundaryUID AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T,
ParticleAccessor_T >::movingObstacleBoundaryID =
BoundaryUID("MovingObstacle");
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
geometry::initializer::InitializationManager
AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::getInitManager()
{
using namespace geometry::initializer;
InitializationManager initManager(blockForest_->getBlockStorage());
// define initializers
auto cellIntvInit = std::make_shared< BoundaryFromCellInterval< BoundaryHandling_T > >(*blockForest_, handlingID_);
auto borderInit = std::make_shared< BoundaryFromDomainBorder< BoundaryHandling_T > >(*blockForest_, handlingID_);
auto imgInit =
std::make_shared< BoundaryFromImage< BoundaryHandling_T, geometry::GrayScaleImage > >(*blockForest_, handlingID_);
auto bodyInit = std::make_shared< OverlapFieldFromBody >(*blockForest_, fillFieldID_);
// register initializers
initManager.registerInitializer("CellInterval", cellIntvInit);
initManager.registerInitializer("Border", borderInit);
initManager.registerInitializer("Image", imgInit);
initManager.registerInitializer("Body", bodyInit);
return initManager;
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::initFromConfig(
const Config::BlockHandle& configBlock)
{
// initialize from config file
getInitManager().init(configBlock);
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
template< typename Body_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::addFreeSurfaceObject(
const Body_T& body, bool addOrSubtract)
{
geometry::initializer::OverlapFieldFromBody(*blockForest_, fillFieldID_).init(body, addOrSubtract);
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setNoSlipAtBorder(
stencil::Direction d, cell_idx_t wallDistance)
{
geometry::initializer::BoundaryFromDomainBorder< BoundaryHandling_T > init(*blockForest_, handlingID_);
init.init(noSlipFlagID, d, wallDistance);
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setNoSlipAtAllBorders(
cell_idx_t wallDistance)
{
geometry::initializer::BoundaryFromDomainBorder< BoundaryHandling_T > init(*blockForest_, handlingID_);
init.initAllBorders(noSlipFlagID, wallDistance);
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setNoSlipInCell(
const Cell& globalCell)
{
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
BoundaryHandling_T* const handling = blockIt->template getData< BoundaryHandling_T >(handlingID_);
// transform cell in global coordinates to cell in block local coordinates
Cell blockLocalCell;
blockForest_->transformGlobalToBlockLocalCell(blockLocalCell, *blockIt, globalCell);
handling->forceBoundary(noSlipFlagID, blockLocalCell[0], blockLocalCell[1], blockLocalCell[2]);
}
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setFreeSlipAtBorder(
stencil::Direction d, cell_idx_t wallDistance)
{
geometry::initializer::BoundaryFromDomainBorder< BoundaryHandling_T > init(*blockForest_, handlingID_);
init.init(freeSlipFlagID, d, wallDistance);
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T,
ParticleAccessor_T >::setFreeSlipAtAllBorders(cell_idx_t wallDistance)
{
geometry::initializer::BoundaryFromDomainBorder< BoundaryHandling_T > init(*blockForest_, handlingID_);
init.initAllBorders(freeSlipFlagID, wallDistance);
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setFreeSlipInCell(
const Cell& globalCell)
{
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
BoundaryHandling_T* const handling = blockIt->template getData< BoundaryHandling_T >(handlingID_);
// transform cell in global coordinates to cell in block local coordinates
Cell blockLocalCell;
blockForest_->transformGlobalToBlockLocalCell(blockLocalCell, *blockIt, globalCell);
handling->forceBoundary(freeSlipFlagID, blockLocalCell[0], blockLocalCell[1], blockLocalCell[2]);
}
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setPressure(
real_t density)
{
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
BoundaryHandling_T* const handling = blockIt->template getData< BoundaryHandling_T >(handlingID_);
Pressure_T& pressure =
handling->template getBoundaryCondition< Pressure_T >(handling->getBoundaryUID(pressureFlagID));
pressure.setLatticeDensity(density);
}
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setUBBInCell(
const Cell& globalCell, const Vector3< real_t >& velocity)
{
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
BoundaryHandling_T* const handling = blockIt->template getData< BoundaryHandling_T >(handlingID_);
typename UBB_Inflow_T::Velocity ubbVel(velocity);
// transform cell in global coordinates to cell in block-local coordinates
Cell blockLocalCell;
blockForest_->transformGlobalToBlockLocalCell(blockLocalCell, *blockIt, globalCell);
// get block cell bounding box to check if cell is contained in block
CellInterval blockCellBB = blockForest_->getBlockCellBB(*blockIt);
// flag field has two ghost layers so blockCellBB is actually larger than returned; this is relevant for setups
// where the UBB is set in a ghost layer cell
blockCellBB.expand(cell_idx_c(2));
if (blockCellBB.contains(globalCell))
{
handling->forceBoundary(ubbFlagID, blockLocalCell[0], blockLocalCell[1], blockLocalCell[2], ubbVel);
}
}
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setInflowInCell(
const Cell& globalCell, const Vector3< real_t >& velocity)
{
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
BoundaryHandling_T* const handling = blockIt->template getData< BoundaryHandling_T >(handlingID_);
typename UBB_Inflow_T::Velocity ubbVel(velocity);
// transform cell in global coordinates to cell in block-local coordinates
Cell blockLocalCell;
blockForest_->transformGlobalToBlockLocalCell(blockLocalCell, *blockIt, globalCell);
// get block cell bounding box to check if cell is contained in block
CellInterval blockCellBB = blockForest_->getBlockCellBB(*blockIt);
// flag field has two ghost layers so blockCellBB is actually larger than returned; this is relevant for setups
// where the UBB is set in a ghost layer cell
blockCellBB.expand(cell_idx_c(2));
if (blockCellBB.contains(globalCell))
{
handling->forceBoundary(ubbInflowFlagID, blockLocalCell[0], blockLocalCell[1], blockLocalCell[2], ubbVel);
}
}
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::setPressureOutflow(
real_t density)
{
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
BoundaryHandling_T* const handling = blockIt->template getData< BoundaryHandling_T >(handlingID_);
Pressure_T& pressure =
handling->template getBoundaryCondition< Pressure_T >(handling->getBoundaryUID(pressureOutflowFlagID));
pressure.setLatticeDensity(density);
}
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T, ParticleAccessor_T >::enableBubbleOutflow(
BubbleModelBase* bubbleModel)
{
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
BoundaryHandling_T* const handling = blockIt->template getData< BoundaryHandling_T >(handlingID_);
// get pressure from boundary handling
Pressure_T& pressure =
handling->template getBoundaryCondition< Pressure_T >(handling->getBoundaryUID(pressureFlagID));
Pressure_T& pressureOutflow =
handling->template getBoundaryCondition< Pressure_T >(handling->getBoundaryUID(pressureOutflowFlagID));
// set pressure in bubble model
pressure.setBubbleModel(bubbleModel);
pressureOutflow.setBubbleModel(bubbleModel);
}
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
Vector3< bool > AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T,
ParticleAccessor_T >::isObstacleInGlobalGhostLayer()
{
Vector3< bool > isObstacleInGlobalGhostLayer(false, false, false);
for (auto blockIt = blockForest_->begin(); blockIt != blockForest_->end(); ++blockIt)
{
const FlagField_T* const flagField = blockIt->template getData< const FlagField_T >(flagFieldID_);
const CellInterval domainCellBB = blockForest_->getDomainCellBB();
// disable OpenMP such that loop termination works correctly
WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ_OMP(flagField, uint_c(1), omp critical, {
// get cell in global coordinates
Cell globalCell = Cell(x, y, z);
blockForest_->transformBlockLocalToGlobalCell(globalCell, *blockIt);
// check if the current cell is located in a global ghost layer
const bool isCellInGlobalGhostLayerX =
globalCell[0] < domainCellBB.xMin() || globalCell[0] > domainCellBB.xMax();
const bool isCellInGlobalGhostLayerY =
globalCell[1] < domainCellBB.yMin() || globalCell[1] > domainCellBB.yMax();
const bool isCellInGlobalGhostLayerZ =
globalCell[2] < domainCellBB.zMin() || globalCell[2] > domainCellBB.zMax();
// skip corners, as they do not influence periodic communication
if ((isCellInGlobalGhostLayerX && (isCellInGlobalGhostLayerY || isCellInGlobalGhostLayerZ)) ||
(isCellInGlobalGhostLayerY && isCellInGlobalGhostLayerZ))
{
continue;
}
if (!isObstacleInGlobalGhostLayer[0] && isCellInGlobalGhostLayerX &&
isPartOfMaskSet(flagField->get(x, y, z), flagField->getMask(flagInfo_.getObstacleIDSet())))
{
isObstacleInGlobalGhostLayer[0] = true;
}
if (!isObstacleInGlobalGhostLayer[1] && isCellInGlobalGhostLayerY &&
isPartOfMaskSet(flagField->get(x, y, z), flagField->getMask(flagInfo_.getObstacleIDSet())))
{
isObstacleInGlobalGhostLayer[1] = true;
}
if (!isObstacleInGlobalGhostLayer[2] && isCellInGlobalGhostLayerZ &&
isPartOfMaskSet(flagField->get(x, y, z), flagField->getMask(flagInfo_.getObstacleIDSet())))
{
isObstacleInGlobalGhostLayer[2] = true;
}
if (isObstacleInGlobalGhostLayer[0] && isObstacleInGlobalGhostLayer[1] && isObstacleInGlobalGhostLayer[2])
{
break; // there is no need to check other cells on this block
}
}) // WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ_OMP
}
mpi::allReduceInplace(isObstacleInGlobalGhostLayer, mpi::LOGICAL_OR);
return isObstacleInGlobalGhostLayer;
}
template< typename LatticeModel_T, typename FlagField_T, typename ScalarField_T, typename ParticleAccessor_T >
void AntidunesBoundaryHandling< LatticeModel_T, FlagField_T, ScalarField_T,
ParticleAccessor_T >::initFlagsFromFillLevel()
{
const Vector3< bool > isObstacleInGlobalGhostLayer = this->isObstacleInGlobalGhostLayer();
WALBERLA_ROOT_SECTION()
{
if ((blockForest_->isXPeriodic() && isObstacleInGlobalGhostLayer[0]) ||
(blockForest_->isYPeriodic() && isObstacleInGlobalGhostLayer[1]) ||
(blockForest_->isZPeriodic() && isObstacleInGlobalGhostLayer[2]))
{
WALBERLA_LOG_WARNING_ON_ROOT(
"WARNING: An obstacle cell is located in a global outermost ghost layer in a periodic "
"direction. Be aware that due to periodicity, this obstacle cell will be "
"overwritten during communication.");
}
}
// communicate fill level (neighborhood is used in initialization)
comm_();
// initialize fill level in boundaries (with value 1), i.e., obstacles such that the bubble model does not detect
// obstacles as gas cells
walberla::free_surface::initFillLevelsInBoundaries< BoundaryHandling_T, typename LatticeModel_T::Stencil,
ScalarField_T >(blockForest_, handlingID_, fillFieldID_);
// clear and initialize flags in every cell according to the fill level
walberla::free_surface::initFlagsFromFillLevels< BoundaryHandling_T, typename LatticeModel_T::Stencil, FlagField_T,
const ScalarField_T >(blockForest_, flagInfo_, handlingID_,
fillFieldID_);
}
} // namespace free_surface
} // namespace antidunes
} // namespace walberla
import sympy as sp
import pystencils as ps
from lbmpy.creationfunctions import LBMConfig, LBMOptimisation, create_lb_collision_rule
from lbmpy.enums import ForceModel, Method, Stencil
from lbmpy.stencils import LBStencil
from pystencils_walberla import CodeGeneration
from lbmpy_walberla import generate_lattice_model
with CodeGeneration() as ctx:
# general parameters
layout = 'fzyx'
data_type = "float64" if ctx.double_accuracy else "float32"
stencil = LBStencil(Stencil.D3Q27)
omega = sp.Symbol('omega')
force_field = ps.fields(f"force(3): {data_type}[3D]", layout=layout)
# method definition
lbm_config = LBMConfig(stencil=stencil,
method=Method.CUMULANT,
relaxation_rate=omega,
compressible=True,
force=force_field,
zero_centered=False,
streaming_pattern='pull', # free surface implementation only works with pull pattern
galilean_correction=True)
# optimizations to be used by the code generator
lbm_opt = LBMOptimisation(cse_global=True,
field_layout=layout)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config,
lbm_optimisation=lbm_opt)
generate_lattice_model(ctx, "AntidunesLatticeModel", collision_rule, field_layout=layout)