Commit d5c5fac4 authored by Frederik Hennig's avatar Frederik Hennig Committed by Markus Holzer
Browse files

Revamp LB GPU Benchmark App for In-Place Streaming

parent 803a82cb
from pystencils.field import fields
from lbmpy.advanced_streaming.utility import get_timesteps, Timestep
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_method, create_lb_update_rule
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
......
......@@ -4,49 +4,27 @@ waLBerla_link_files_to_builddir( "*.py" )
waLBerla_link_files_to_builddir( "simulation_setup" )
foreach (config srt trt mrt smagorinsky entropic smagorinsky_noopt entropic_kbc_n4
entropic_kbc_n4_noopt mrt_noopt mrt_full mrt_full_noopt
cumulant cumulant_d3q27
srt_d3q27 mrt_d3q27 mrt_d3q27_noopt smagorinsky_d3q27 smagorinsky_d3q27_noopt mrt_full_d3q27 mrt_full_d3q27_noopt)
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
FILE UniformGridGPU.py
CODEGEN_CFG ${config}
OUT_FILES UniformGridGPU_LatticeModel.cpp UniformGridGPU_LatticeModel.h
UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_PackInfo.cu UniformGridGPU_PackInfo.h
UniformGridGPU_MacroSetter.cpp UniformGridGPU_MacroSetter.h
UniformGridGPU_MacroGetter.cpp UniformGridGPU_MacroGetter.h
UniformGridGPU_Defines.h
)
waLBerla_add_executable(NAME UniformGridBenchmarkGPU_${config}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_${config})
set_target_properties( UniformGridBenchmarkGPU_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach ()
foreach (config srt trt mrt smagorinsky entropic)
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_AA_${config}
FILE UniformGridGPU_AA.py
CODEGEN_CFG ${config}
OUT_FILES UniformGridGPU_AA_PackInfoPull.cu UniformGridGPU_AA_PackInfoPull.h
UniformGridGPU_AA_LbKernelOdd.cu UniformGridGPU_AA_LbKernelOdd.h
UniformGridGPU_AA_LbKernelEven.cu UniformGridGPU_AA_LbKernelEven.h
UniformGridGPU_AA_PackInfoPush.cu UniformGridGPU_AA_PackInfoPush.h
UniformGridGPU_AA_MacroSetter.cpp UniformGridGPU_AA_MacroSetter.h
UniformGridGPU_AA_MacroGetter.cpp UniformGridGPU_AA_MacroGetter.h
UniformGridGPU_AA_Defines.h
)
waLBerla_add_executable(NAME UniformGridBenchmarkGPU_AA_${config}
FILES UniformGridGPU_AA.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui UniformGridGPUGenerated_AA_${config})
set_target_properties( UniformGridBenchmarkGPU_AA_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach ()
foreach(streaming_pattern aa) # choose from {pull, push, aa, esotwist}
foreach(stencil d3q27) # choose from {d3q19 d3q27}
foreach (collision_setup srt trt mrt cumulant) # choose from {srt trt mrt cumulant entropic smagorinsky}
set(config ${stencil}_${streaming_pattern}_${collision_setup})
waLBerla_generate_target_from_python(NAME UniformGridGPUGenerated_${config}
FILE UniformGridGPU.py
CODEGEN_CFG ${config}
OUT_FILES UniformGridGPU_LbKernel.cu UniformGridGPU_LbKernel.h
UniformGridGPU_PackInfoEven.cu UniformGridGPU_PackInfoEven.h
UniformGridGPU_PackInfoOdd.cu UniformGridGPU_PackInfoOdd.h
UniformGridGPU_NoSlip.cu UniformGridGPU_NoSlip.h
UniformGridGPU_UBB.cu UniformGridGPU_UBB.h
UniformGridGPU_MacroSetter.cu UniformGridGPU_MacroSetter.h
UniformGridGPU_InfoHeader.h
)
waLBerla_add_executable(NAME UniformGridGPU_${config}
FILES UniformGridGPU.cpp
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk UniformGridGPUGenerated_${config})
set_target_properties( UniformGridGPU_${config} PROPERTIES CXX_VISIBILITY_PRESET hidden)
endforeach ()
endforeach()
endforeach()
\ No newline at end of file
import sympy as sp
import numpy as np
import pystencils as ps
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule, create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import generate_pack_info_from_kernel
from lbmpy_walberla import generate_lattice_model, generate_boundary
from pystencils_walberla import CodeGeneration, generate_sweep
from pystencils.data_types import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_info_header, generate_sweep
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_lb_pack_info, generate_alternating_lbm_boundary
omega = sp.symbols("omega")
omega_free = sp.Symbol("omega_free")
omega_fill = sp.symbols("omega_:10")
compile_time_block_size = False
if compile_time_block_size:
......@@ -21,156 +24,158 @@ if compile_time_block_size:
else:
sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
TypedSymbol("cudaBlockSize1", np.int32),
1)
TypedSymbol("cudaBlockSize2", np.int32))
sweep_params = {'block_size': sweep_block_size}
gpu_indexing_params = {'block_size': sweep_block_size}
options_dict = {
'srt': {
'method': 'srt',
'stencil': 'D3Q19',
'relaxation_rate': omega,
'compressible': False,
},
'trt': {
'method': 'trt',
'stencil': 'D3Q19',
'relaxation_rate': omega,
},
'mrt': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega, 1.3, 1.4, 1.2, 1.1, 1.15, 1.234, 1.4235],
'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
},
'mrt_full': {
'mrt-overrelax': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2],
omega_fill[3], omega_fill[4], omega_fill[5]],
'relaxation_rates': [omega, 1.3, 1.4, omega, 1.2, 1.1],
},
'entropic': {
'method': 'mrt',
'stencil': 'D3Q19',
'cumulant': {
'method': 'cumulant',
'relaxation_rate': omega,
'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free, omega_free],
'entropic': True,
},
'entropic_kbc_n4': {
'method': 'trt-kbc-n4',
'stencil': 'D3Q27',
'cumulant-overrelax': {
'method': 'cumulant',
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
'compressible': True,
'relaxation_rates': [omega, omega_free],
},
'entropic': {
'method': 'mrt',
'compressible': True,
'relaxation_rates': [omega, omega, omega_free, omega_free, omega_free],
'entropic': True,
},
'smagorinsky': {
'method': 'srt',
'stencil': 'D3Q19',
'smagorinsky': True,
'relaxation_rate': omega,
},
'cumulant': {
'method': 'cumulant',
'stencil': 'D3Q19',
'compressible': True,
'relaxation_rate': omega,
},
}
}
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
# DEFAULTS
optimize = True
with CodeGeneration() as ctx:
accessor = StreamPullTwoFieldsAccessor()
# accessor = StreamPushTwoFieldsAccessor()
assert not accessor.is_inplace, "This app does not work for inplace accessors"
config_tokens = ctx.config.split('_')
assert len(config_tokens) >= 3
stencil_str = config_tokens[0]
streaming_pattern = config_tokens[1]
collision_setup = config_tokens[2]
if len(config_tokens) >= 4:
optimize = (config_tokens[3] != 'noopt')
stencil = get_stencil(stencil_str)
assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"
options = options_dict[collision_setup]
q = len(stencil)
dim = len(stencil[0])
assert dim == 3, "This app supports only three-dimensional stencils"
pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : double[3D]", layout='fzyx')
common_options = {
'field_name': 'pdfs',
'temporary_field_name': 'pdfs_tmp',
'kernel_type': accessor,
'optimization': {'cse_global': True,
'cse_pdfs': False}
'stencil': stencil,
'field_name': pdfs.name,
'optimization': {
'target': 'gpu',
'cse_global': True,
'cse_pdfs': False,
'symbolic_field': pdfs,
'field_layout': 'fzyx',
'gpu_indexing_params': gpu_indexing_params,
}
}
config_name = ctx.config
noopt = False
d3q27 = False
if config_name.endswith("_noopt"):
noopt = True
config_name = config_name[:-len("_noopt")]
if config_name.endswith("_d3q27"):
d3q27 = True
config_name = config_name[:-len("_d3q27")]
options = options_dict[config_name]
options.update(common_options)
options = options.copy()
if noopt:
options['optimization']['cse_global'] = False
options['optimization']['cse_pdfs'] = False
if d3q27:
options['stencil'] = 'D3Q27'
options.update(common_options)
stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q') + 1:])
pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
options['optimization']['symbolic_field'] = pdfs
if not is_inplace(streaming_pattern):
options['optimization']['symbolic_temporary_field'] = pdfs_tmp
field_swaps = [(pdfs, pdfs_tmp)]
else:
field_swaps = []
vp = [
('double', 'omega_0'),
('double', 'omega_1'),
('double', 'omega_2'),
('double', 'omega_3'),
('double', 'omega_4'),
('double', 'omega_5'),
('double', 'omega_6'),
('int32_t', 'cudaBlockSize0'),
('int32_t', 'cudaBlockSize1'),
('int32_t', 'cudaBlockSize2')
]
lb_method = create_lb_method(**options)
update_rule = create_lb_update_rule(lb_method=lb_method, **options)
if not noopt:
update_rule = insert_fast_divisions(update_rule)
update_rule = insert_fast_sqrts(update_rule)
# CPU lattice model - required for macroscopic value computation, VTK output etc.
options_without_opt = options.copy()
del options_without_opt['optimization']
generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', create_lb_collision_rule(lb_method=lb_method,
**options_without_opt))
# gpu LB sweep & boundaries
generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule,
field_swaps=[('pdfs', 'pdfs_tmp')],
inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params,
varying_parameters=vp)
generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu')
generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu')
# LB Sweep
collision_rule = create_lb_collision_rule(**options)
if optimize:
collision_rule = insert_fast_divisions(collision_rule)
collision_rule = insert_fast_sqrts(collision_rule)
lb_method = collision_rule.method
generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, streaming_pattern,
optimization=options['optimization'],
inner_outer_split=True, varying_parameters=vp, field_swaps=field_swaps)
# getter & setter
setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1.0)
getter_assignments = macroscopic_values_getter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments)
generate_sweep(ctx, 'UniformGridGPU_MacroGetter', getter_assignments)
setter_assignments = macroscopic_values_setter(lb_method, density=1.0, velocity=velocity_field.center_vector,
pdfs=pdfs,
streaming_pattern=streaming_pattern,
previous_timestep=Timestep.EVEN)
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target='gpu')
# Boundaries
noslip = NoSlip()
ubb = UBB((0.05, 0, 0))
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_UBB', ubb, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
# communication
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_PackInfo', update_rule, target='gpu')
generate_lb_pack_info(ctx, 'UniformGridGPU_PackInfo', stencil, pdfs,
streaming_pattern=streaming_pattern, target='gpu',
always_generate_separate_classes=True)
infoHeaderParams = {
'stencil': stencil_str,
'q': q,
'configName': ctx.config,
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(options['optimization']['cse_global']),
'cse_pdfs': int(options['optimization']['cse_pdfs']),
}
ctx.write_file("UniformGridGPU_Defines.h", info_header.format(**infoHeaderParams))
stencil_typedefs = {'Stencil_T': stencil,
'CommunicationStencil_T': stencil}
field_typedefs = {'PdfField_T': pdfs,
'VelocityField_T': velocity_field}
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'UniformGridGPU_InfoHeader',
stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
additional_code=info_header.format(**infoHeaderParams))
#include "core/Environment.h"
#include "core/logging/Initialization.h"
#include "python_coupling/CreateConfig.h"
#include "python_coupling/PythonCallback.h"
#include "python_coupling/DictWrapper.h"
#include "blockforest/Initialization.h"
#include "field/FlagField.h"
#include "field/AddToStorage.h"
#include "field/vtk/VTKWriter.h"
#include "field/communication/PackInfo.h"
#include "lbm/PerformanceLogger.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "timeloop/all.h"
#include "geometry/all.h"
#include "cuda/HostFieldAllocator.h"
#include "cuda/communication/GPUPackInfo.h"
#include "cuda/ParallelStreams.h"
#include "core/timing/TimingPool.h"
#include "core/timing/RemainingTimeLogger.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/communication/UniformGPUScheme.h"
#include "cuda/DeviceSelectMPI.h"
#include "domain_decomposition/SharedSweep.h"
#include "InitShearVelocity.h"
#include "gui/Gui.h"
#ifdef WALBERLA_ENABLE_GUI
#include "lbm/gui/PdfFieldDisplayAdaptor.h"
#endif
#include "UniformGridGPU_AA_PackInfoPush.h"
#include "UniformGridGPU_AA_PackInfoPull.h"
#include "UniformGridGPU_AA_MacroSetter.h"
#include "UniformGridGPU_AA_MacroGetter.h"
#include "UniformGridGPU_AA_LbKernelEven.h"
#include "UniformGridGPU_AA_LbKernelOdd.h"
#include "UniformGridGPU_AA_Defines.h"
#include <cmath>
using namespace walberla;
using CommunicationStencil_T = Stencil_T;
using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >;
using VelocityField_T = GhostLayerField< real_t, 3 >;
int main( int argc, char **argv )
{
mpi::Environment env( argc, argv );
cuda::selectDeviceBasedOnMpiRank();
for ( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
{
WALBERLA_MPI_WORLD_BARRIER();
WALBERLA_CUDA_CHECK( cudaPeekAtLastError() );
auto config = *cfg;
logging::configureLogging( config );
auto blocks = blockforest::createUniformBlockGridFromConfig( config );
Vector3< uint_t > cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter< Vector3< uint_t > >( "cellsPerBlock" );
// Reading parameters
auto parameters = config->getOneBlock( "Parameters" );
const real_t omega = parameters.getParameter< real_t >( "omega", real_c( 1.4 ));
const uint_t timesteps = parameters.getParameter< uint_t >( "timesteps", uint_c( 50 ));
// Creating fields
BlockDataID pdfFieldCpuID = field::addToStorage< PdfField_T >( blocks, "pdfs cpu", real_t( std::nan("") ), field::fzyx );
BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx );
WALBERLA_LOG_INFO_ON_ROOT( "Initializing shear flow" );
initShearVelocity( blocks, velFieldCpuID );
pystencils::UniformGridGPU_AA_MacroGetter getterSweep( pdfFieldCpuID, velFieldCpuID );
pystencils::UniformGridGPU_AA_MacroSetter setterSweep( pdfFieldCpuID, velFieldCpuID );
for ( auto &block : *blocks )
setterSweep( &block );
BlockDataID pdfFieldGpuID = cuda::addGPUFieldToStorage< PdfField_T >( blocks, pdfFieldCpuID, "pdfs on GPU", true );
Vector3<int> innerOuterSplit = parameters.getParameter<Vector3<int> >("innerOuterSplit", Vector3<int>(1, 1, 1));
for(uint_t i=0; i< 3; ++i)
{
if( int_c(cellsPerBlock[i]) <= innerOuterSplit[i] * 2) {
WALBERLA_ABORT_NO_DEBUG_INFO("innerOuterSplit too large - make it smaller or increase cellsPerBlock");
}
}
Cell innerOuterSplitCell (innerOuterSplit[0], innerOuterSplit[1], innerOuterSplit[2]);
bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false );
Vector3<int32_t> gpuBlockSize = parameters.getParameter<Vector3<int32_t> > ("gpuBlockSize", Vector3<int32_t>(256, 1, 1));
int streamHighPriority = 0;
int streamLowPriority = 0;
WALBERLA_CUDA_CHECK( cudaDeviceGetStreamPriorityRange( &streamLowPriority, &streamHighPriority ));
WALBERLA_CHECK( gpuBlockSize[2] == 1 );
using KernelEven = pystencils::UniformGridGPU_AA_LbKernelEven;
using KernelOdd = pystencils::UniformGridGPU_AA_LbKernelOdd;
using PackInfoPull = pystencils::UniformGridGPU_AA_PackInfoPull;
using PackInfoPush = pystencils::UniformGridGPU_AA_PackInfoPush;
using cuda::communication::UniformGPUScheme;
KernelEven kernelEven( pdfFieldGpuID, omega, gpuBlockSize[0], gpuBlockSize[1], innerOuterSplitCell );
KernelOdd kernelOdd ( pdfFieldGpuID, omega, gpuBlockSize[0], gpuBlockSize[1], innerOuterSplitCell );
kernelEven.setOuterPriority( streamHighPriority );
kernelOdd .setOuterPriority( streamHighPriority );
auto pullScheme = make_shared< UniformGPUScheme< Stencil_T > >( blocks, cudaEnabledMPI );
auto pushScheme = make_shared< UniformGPUScheme< Stencil_T > >( blocks, cudaEnabledMPI );
pullScheme->addPackInfo( make_shared< PackInfoPull >( pdfFieldGpuID ) );
pushScheme->addPackInfo( make_shared< PackInfoPush >( pdfFieldGpuID ) );
auto defaultStream = cuda::StreamRAII::newPriorityStream( streamLowPriority );
auto setupPhase = [&]() {
for ( auto &block: *blocks )
kernelEven( &block );
pullScheme->communicate();
for ( auto &block: *blocks )
kernelOdd( &block );
};
auto tearDownPhase = [&]() {
pushScheme->communicate();
cuda::fieldCpy< PdfField_T, cuda::GPUField< real_t > >( blocks, pdfFieldCpuID, pdfFieldGpuID );
for ( auto &block : *blocks )
getterSweep( &block );
};
auto simpleOverlapTimeStep = [&]()
{
// Even
pushScheme->startCommunication( defaultStream );
for ( auto &block: *blocks )
kernelEven.inner( &block, defaultStream );
pushScheme->wait( defaultStream );
for ( auto &block: *blocks )
kernelEven.outer( &block, defaultStream );
// Odd
pullScheme->startCommunication( defaultStream );
for ( auto &block: *blocks )
kernelOdd.inner( &block, defaultStream );
pullScheme->wait( defaultStream );
for ( auto &block: *blocks )
kernelOdd.outer( &block, defaultStream );
};
auto normalTimeStep = [&]()
{
pushScheme->communicate( defaultStream );
for ( auto &block: *blocks )
kernelEven( &block, defaultStream );
pullScheme->communicate( defaultStream );
for ( auto &block: *blocks )
kernelOdd( &block, defaultStream );
};
auto kernelOnlyFunc = [&]()
{
for ( auto &block: *blocks )
kernelEven( &block, defaultStream );
for ( auto &block: *blocks )
kernelOdd( &block, defaultStream );
};
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
const std::string timeStepStrategy = parameters.getParameter< std::string >( "timeStepStrategy", "normal" );
std::function< void() > timeStep;
if ( timeStepStrategy == "noOverlap" )
timeStep = std::function< void() >( normalTimeStep );
else if ( timeStepStrategy == "simpleOverlap" )
timeStep = simpleOverlapTimeStep;
else if ( timeStepStrategy == "kernelOnly" )
{
WALBERLA_LOG_INFO_ON_ROOT( "Running only compute kernel without boundary - this makes only sense for benchmarking!" )
timeStep = kernelOnlyFunc;
}
else
{
WALBERLA_ABORT_NO_DEBUG_INFO(
"Invalid value for 'timeStepStrategy'. Allowed values are 'noOverlap', 'complexOverlap', 'simpleOverlap', 'kernelOnly'" );
}
timeLoop.add() << BeforeFunction( timeStep )
<< Sweep( []( IBlock * ) {}, "time step" );
// VTK
uint_t vtkWriteFrequency = parameters.getParameter< uint_t >( "vtkWriteFrequency", 0 );
if ( vtkWriteFrequency > 0 )
{
auto vtkOutput = vtk::createVTKOutput_BlockData( *blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0 );