Commit b4b0cb4b authored by Christoph Schwarzmeier's avatar Christoph Schwarzmeier
Browse files

Merge branch 'UpdatePhaseField' into 'master'

Update phase field

See merge request !477
parents 3455bf3e 40e80f3e
Pipeline #33777 passed with stages
in 565 minutes and 16 seconds
...@@ -3,30 +3,43 @@ import waLBerla as wlb ...@@ -3,30 +3,43 @@ import waLBerla as wlb
import pandas as pd import pandas as pd
from waLBerla.tools.sqlitedb import sequenceValuesToScalars from waLBerla.tools.sqlitedb import sequenceValuesToScalars
from waLBerla.tools.config import block_decomposition
import sys import sys
from math import prod
def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
cells = prod(b + 2 * gls for b in block_size)
# 3 values for the velocity and two for the phase field and the temporary phase field
values_per_cell = 2 * q_phase + 2 * q_hydro + 3 + 2
needed_memory = values_per_cell * cells * size_per_value
return needed_memory < total_mem
class Scenario: class Scenario:
def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size): def __init__(self, time_step_strategy, cuda_block_size, cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies # output frequencies
self.vtkWriteFrequency = 0 self.vtkWriteFrequency = 0
# simulation parameters # simulation parameters
self.timesteps = 101 self.timesteps = 101
edge_size = 32 self.cells_per_block = cells_per_block
self.cells = (edge_size, edge_size, edge_size) self.blocks = block_decomposition(wlb.mpi.numProcesses())
self.blocks = (1, 1, 1)
self.periodic = (1, 1, 1) self.periodic = (1, 1, 1)
self.size = (self.cells[0] * self.blocks[0], self.size = (self.cells_per_block[0] * self.blocks[0],
self.cells[1] * self.blocks[1], self.cells_per_block[1] * self.blocks[1],
self.cells[2] * self.blocks[2]) self.cells_per_block[2] * self.blocks[2])
self.timeStepStrategy = timeStepStrategy self.timeStepStrategy = time_step_strategy
self.overlappingWidth = overlappingWidth
self.cuda_block_size = cuda_block_size self.cuda_block_size = cuda_block_size
self.warmupSteps = 10 self.warmupSteps = 10
self.cudaEnabledMpi = cuda_enabled_mpi
# bubble parameters # bubble parameters
self.bubbleRadius = min(self.size) // 4 self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2) self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
...@@ -41,19 +54,18 @@ class Scenario: ...@@ -41,19 +54,18 @@ class Scenario:
return { return {
'DomainSetup': { 'DomainSetup': {
'blocks': self.blocks, 'blocks': self.blocks,
'cellsPerBlock': self.cells, 'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic, 'periodic': self.periodic,
}, },
'Parameters': { 'Parameters': {
'timesteps': self.timesteps, 'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency, 'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0,
'remainingTimeLoggerFrequency': -1, 'remainingTimeLoggerFrequency': -1,
'timeStepStrategy': self.timeStepStrategy, 'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size, 'gpuBlockSize': self.cuda_block_size,
'warmupSteps': self.warmupSteps, 'warmupSteps': self.warmupSteps,
'scenario': self.scenario, 'scenario': self.scenario,
'cudaEnabledMpi': self.cudaEnabledMpi
}, },
'Boundaries_GPU': { 'Boundaries_GPU': {
'Border': [] 'Border': []
...@@ -86,43 +98,46 @@ class Scenario: ...@@ -86,43 +98,46 @@ class Scenario:
df.to_csv(self.csv_file, index=False, mode='a', header=False) df.to_csv(self.csv_file, index=False, mode='a', header=False)
def overlap_benchmark(): def benchmark():
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1), gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)] gpu_mem = gpu_mem_gb * (2 ** 30)
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1), block_size = (256, 256, 256)
(32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
(32, 4, 1), (64, 4, 1), (128, 4, 1), if not domain_block_size_ok(block_size, gpu_mem):
(32, 4, 2), (64, 4, 2), (128, 2, 2), wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
(32, 8, 1), (64, 8, 1), (64, 4, 2), else:
(32, 16, 1), (16, 16, 1), (16, 16, 2)] scenarios.add(Scenario(time_step_strategy='normal', cuda_block_size=(256, 1, 1), cells_per_block=block_size))
# no overlap
scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1)))
# overlap
for overlap_strategy in ['overlap']:
for overlappingWidth in overlappingWidths:
for cuda_block in cuda_blocks:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=overlappingWidth,
cuda_block_size=cuda_block)
scenarios.add(scenario)
def kernel_benchmark(): def kernel_benchmark():
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
# overlap gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
# for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']: gpu_mem = gpu_mem_gb * (2 ** 30)
for overlap_strategy in ['overlap']:
scenario = Scenario(timeStepStrategy=overlap_strategy, block_sizes = [(i, i, i) for i in (32, 64, 128, 256, 320, 384, 448, 512)]
overlappingWidth=(1, 1, 1),
cuda_block_size=(128, 1, 1)) cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1),
scenarios.add(scenario) (32, 2, 1), (64, 2, 1), (128, 2, 1),
(32, 4, 1), (64, 4, 1),
(32, 4, 2),
(32, 8, 1),
(16, 16, 1)]
for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
for cuda_block in cuda_blocks:
for block_size in block_sizes:
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(time_step_strategy=time_step_strategy,
cuda_block_size=cuda_block,
cells_per_block=block_size)
scenarios.add(scenario)
# overlap_benchmark() # benchmark()
kernel_benchmark() kernel_benchmark()
...@@ -3,57 +3,55 @@ import waLBerla as wlb ...@@ -3,57 +3,55 @@ import waLBerla as wlb
import pandas as pd import pandas as pd
from waLBerla.tools.sqlitedb import sequenceValuesToScalars from waLBerla.tools.sqlitedb import sequenceValuesToScalars
from waLBerla.tools.config import block_decomposition
import sys import sys
class Scenario: class Scenario:
def __init__(self, timeStepStrategy, overlappingWidth): def __init__(self, time_step_strategy, cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies # output frequencies
self.vtkWriteFrequency = -1 self.vtkWriteFrequency = 0
# simulation parameters # simulation parameters
self.timesteps = 500 self.timesteps = 101
edge_size = 50 self.cells_per_block = cells_per_block
self.cells = (edge_size, edge_size, edge_size) self.blocks = block_decomposition(wlb.mpi.numProcesses())
self.blocks = (1, 1, 1)
self.periodic = (1, 1, 1) self.periodic = (1, 1, 1)
self.size = (self.cells[0] * self.blocks[0], self.size = (self.cells_per_block[0] * self.blocks[0],
self.cells[1] * self.blocks[1], self.cells_per_block[1] * self.blocks[1],
self.cells[2] * self.blocks[2]) self.cells_per_block[2] * self.blocks[2])
self.timeStepStrategy = timeStepStrategy self.timeStepStrategy = time_step_strategy
self.overlappingWidth = overlappingWidth
self.cuda_block_size = (-1, -1, -1)
self.warmupSteps = 10 self.warmupSteps = 10
self.cudaEnabledMpi = cuda_enabled_mpi
# bubble parameters # bubble parameters
self.bubbleRadius = min(self.size) // 4 self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2) self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
self.scenario = 1 # 1 rising bubble, 2 RTI
self.config_dict = self.config() self.config_dict = self.config()
self.csv_file = "benchmark_cpu.csv" self.csv_file = "benchmark.csv"
@wlb.member_callback @wlb.member_callback
def config(self): def config(self):
return { return {
'DomainSetup': { 'DomainSetup': {
'blocks': self.blocks, 'blocks': self.blocks,
'cellsPerBlock': self.cells, 'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic, 'periodic': self.periodic,
}, },
'Parameters': { 'Parameters': {
'timesteps': self.timesteps, 'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency, 'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0, 'remainingTimeLoggerFrequency': -1,
'remainingTimeLoggerFrequency': 10.0,
'timeStepStrategy': self.timeStepStrategy, 'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size,
'warmupSteps': self.warmupSteps, 'warmupSteps': self.warmupSteps,
'scenario': self.scenario, 'scenario': 1,
'cudaEnabledMpi': self.cudaEnabledMpi
}, },
'Boundaries_GPU': { 'Boundaries_GPU': {
'Border': [] 'Border': []
...@@ -86,31 +84,23 @@ class Scenario: ...@@ -86,31 +84,23 @@ class Scenario:
df.to_csv(self.csv_file, index=False, mode='a', header=False) df.to_csv(self.csv_file, index=False, mode='a', header=False)
def overlap_benchmark(): def benchmark():
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1), block_size = (64, 64, 64)
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32), scenarios.add(Scenario(time_step_strategy='normal', cells_per_block=block_size))
(64, 32, 32), (64, 64, 32), (64, 64, 64)]
# no overlap
scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1)))
# overlap
for overlap_strategy in ['overlap']:
for overlappingWidth in overlappingWidths:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=overlappingWidth)
scenarios.add(scenario)
def kernel_benchmark(): def kernel_benchmark():
scenarios = wlb.ScenarioManager() scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (8, 16, 32, 64, 128)]
# overlap for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
scenario = Scenario(timeStepStrategy='overlap', for block_size in block_sizes:
overlappingWidth=(8, 8, 8)) scenario = Scenario(time_step_strategy=time_step_strategy,
scenarios.add(scenario) cells_per_block=block_size)
scenarios.add(scenario)
# overlap_benchmark() # benchmark()
kernel_benchmark() kernel_benchmark()
...@@ -37,8 +37,6 @@ ...@@ -37,8 +37,6 @@
#include "timeloop/SweepTimeloop.h" #include "timeloop/SweepTimeloop.h"
#include <blockforest/communication/UniformBufferedScheme.h>
#include "InitializerFunctions.h" #include "InitializerFunctions.h"
////////////////////////////// //////////////////////////////
...@@ -48,40 +46,19 @@ ...@@ -48,40 +46,19 @@
#if defined(WALBERLA_BUILD_WITH_CUDA) #if defined(WALBERLA_BUILD_WITH_CUDA)
# include "cuda/AddGPUFieldToStorage.h" # include "cuda/AddGPUFieldToStorage.h"
# include "cuda/DeviceSelectMPI.h" # include "cuda/DeviceSelectMPI.h"
# include "cuda/HostFieldAllocator.h"
# include "cuda/ParallelStreams.h" # include "cuda/ParallelStreams.h"
# include "cuda/communication/GPUPackInfo.h"
# include "cuda/communication/MemcpyPackInfo.h" # include "cuda/communication/MemcpyPackInfo.h"
# include "cuda/communication/UniformGPUScheme.h" # include "cuda/communication/UniformGPUScheme.h"
# include "GenDefines.h"
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
#else #else
# include "GenDefines.h" # include <blockforest/communication/UniformBufferedScheme.h>
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
#endif #endif
#include "GenDefines.h"
using namespace walberla; using namespace walberla;
using PdfField_phase_T = GhostLayerField< real_t, Stencil_phase_T::Size >; using flag_t = walberla::uint8_t;
using PdfField_hydro_T = GhostLayerField< real_t, Stencil_hydro_T::Size >; using FlagField_T = FlagField< flag_t >;
using VelocityField_T = GhostLayerField< real_t, Stencil_hydro_T::Dimension >;
using PhaseField_T = GhostLayerField< real_t, 1 >;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;
#if defined(WALBERLA_BUILD_WITH_CUDA) #if defined(WALBERLA_BUILD_WITH_CUDA)
typedef cuda::GPUField< real_t > GPUField; typedef cuda::GPUField< real_t > GPUField;
...@@ -111,9 +88,7 @@ int main(int argc, char** argv) ...@@ -111,9 +88,7 @@ int main(int argc, char** argv)
const real_t remainingTimeLoggerFrequency = const real_t remainingTimeLoggerFrequency =
parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0);
const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1)); const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1));
Vector3< int > overlappingWidth = const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2));
parameters.getParameter< Vector3< int > >("overlappingWidth", Vector3< int >(1, 1, 1));
const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2));
#if defined(WALBERLA_BUILD_WITH_CUDA) #if defined(WALBERLA_BUILD_WITH_CUDA)
// CPU fields // CPU fields
...@@ -165,24 +140,21 @@ int main(int argc, char** argv) ...@@ -165,24 +140,21 @@ int main(int argc, char** argv)
pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu); pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu);
pystencils::phase_field_LB_step phase_field_LB_step( pystencils::phase_field_LB_step phase_field_LB_step(
lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]);
Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0],
gpuBlockSize[1], gpuBlockSize[1], gpuBlockSize[2]);
Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
#else #else
pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field); pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field);
pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field); pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field);
pystencils::phase_field_LB_step phase_field_LB_step( pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, vel_field);
lb_phase_field, phase_field, vel_field, Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2])); pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field);
pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field,
Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2]));
#endif #endif
// add communication // add communication
#if defined(WALBERLA_BUILD_WITH_CUDA) #if defined(WALBERLA_BUILD_WITH_CUDA)
const bool cudaEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false);
auto Comm_velocity_based_distributions = auto Comm_velocity_based_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0); make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi);
auto generatedPackInfo_velocity_based_distributions = auto generatedPackInfo_velocity_based_distributions =
make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu); make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu);
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions);
...@@ -190,13 +162,12 @@ int main(int argc, char** argv) ...@@ -190,13 +162,12 @@ int main(int argc, char** argv)
Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field);
auto Comm_phase_field_distributions = auto Comm_phase_field_distributions =
make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0); make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi);
auto generatedPackInfo_phase_field_distributions = auto generatedPackInfo_phase_field_distributions =
make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu); make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu);
Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions); Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions);
#else #else
blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks); blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks);
auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field);
...@@ -244,29 +215,15 @@ int main(int argc, char** argv) ...@@ -244,29 +215,15 @@ int main(int argc, char** argv)
auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps); auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps);
#if defined(WALBERLA_BUILD_WITH_CUDA) #if defined(WALBERLA_BUILD_WITH_CUDA)
auto normalTimeStep = [&]() { auto normalTimeStep = [&]() {
Comm_velocity_based_distributions->startCommunication(defaultStream);
for (auto& block : *blocks) for (auto& block : *blocks)
{ phase_field_LB_step(&block, defaultStream);
Comm_phase_field_distributions->communicate(nullptr); Comm_velocity_based_distributions->wait(defaultStream);
phase_field_LB_step(&block);
Comm_velocity_based_distributions->communicate(nullptr);
hydro_LB_step(&block);
}
};
auto simpleOverlapTimeStep = [&]() {
Comm_phase_field_distributions->startCommunication(defaultStream); Comm_phase_field_distributions->startCommunication(defaultStream);
for (auto& block : *blocks) for (auto& block : *blocks)
phase_field_LB_step.inner(&block, defaultStream); hydro_LB_step(&block, defaultStream);
Comm_phase_field_distributions->wait(defaultStream); Comm_phase_field_distributions->wait(defaultStream);
for (auto& block : *blocks)
phase_field_LB_step.outer(&block, defaultStream);
Comm_velocity_based_distributions->startCommunication(defaultStream);
for (auto& block : *blocks)
hydro_LB_step.inner(&block, defaultStream);
Comm_velocity_based_distributions->wait(defaultStream);
for (auto& block : *blocks)
hydro_LB_step.outer(&block, defaultStream);
}; };
auto phase_only = [&]() { auto phase_only = [&]() {
for (auto& block : *blocks) for (auto& block : *blocks)
...@@ -284,29 +241,15 @@ int main(int argc, char** argv) ...@@ -284,29 +241,15 @@ int main(int argc, char** argv)
}; };
#else #else
auto normalTimeStep = [&]() { auto normalTimeStep = [&]() {
for (auto& block : *blocks) Comm_velocity_based_distributions.startCommunication();
{ for (auto& block : *blocks)
Comm_phase_field_distributions(); phase_field_LB_step(&block);
phase_field_LB_step(&block); Comm_velocity_based_distributions.wait();
Comm_velocity_based_distributions(); Comm_phase_field_distributions.startCommunication();
hydro_LB_step(&block); for (auto& block : *blocks)
} hydro_LB_step(&block);
}; Comm_phase_field_distributions.wait();
auto simpleOverlapTimeStep = [&]() {
Comm_phase_field_distributions.startCommunication();
for (auto& block : *blocks)
phase_field_LB_step.inner(&block);
Comm_phase_field_distributions.wait();
for (auto& block : *blocks)
phase_field_LB_step.outer(&block);
Comm_velocity_based_distributions.startCommunication();
for (auto& block : *blocks)
hydro_LB_step.inner(&block);
Comm_velocity_based_distributions.wait();
for (auto& block : *blocks)
hydro_LB_step.outer(&block);
}; };
auto phase_only = [&]() { auto phase_only = [&]() {
for (auto& block : *blocks)