diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py index 2c447a3d88f3d331da8f4bc0fb7daa082584707e..88a410c1062a0699ac93058104d6dcc46f51a5bf 100755 --- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark.py @@ -3,30 +3,43 @@ import waLBerla as wlb import pandas as pd from waLBerla.tools.sqlitedb import sequenceValuesToScalars +from waLBerla.tools.config import block_decomposition import sys +from math import prod + + +def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8): + """Checks if a single block of given size fits into GPU memory""" + + cells = prod(b + 2 * gls for b in block_size) + # 3 values for the velocity and two for the phase field and the temporary phase field + values_per_cell = 2 * q_phase + 2 * q_hydro + 3 + 2 + needed_memory = values_per_cell * cells * size_per_value + return needed_memory < total_mem class Scenario: - def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size): + def __init__(self, time_step_strategy, cuda_block_size, cells_per_block=(256, 256, 256), + cuda_enabled_mpi=False): # output frequencies self.vtkWriteFrequency = 0 # simulation parameters self.timesteps = 101 - edge_size = 32 - self.cells = (edge_size, edge_size, edge_size) - self.blocks = (1, 1, 1) + self.cells_per_block = cells_per_block + self.blocks = block_decomposition(wlb.mpi.numProcesses()) self.periodic = (1, 1, 1) - self.size = (self.cells[0] * self.blocks[0], - self.cells[1] * self.blocks[1], - self.cells[2] * self.blocks[2]) + self.size = (self.cells_per_block[0] * self.blocks[0], + self.cells_per_block[1] * self.blocks[1], + self.cells_per_block[2] * self.blocks[2]) - self.timeStepStrategy = timeStepStrategy - self.overlappingWidth = overlappingWidth + self.timeStepStrategy = time_step_strategy self.cuda_block_size = cuda_block_size self.warmupSteps = 10 + self.cudaEnabledMpi = cuda_enabled_mpi + # bubble parameters self.bubbleRadius = min(self.size) // 4 self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2) @@ -41,19 +54,18 @@ class Scenario: return { 'DomainSetup': { 'blocks': self.blocks, - 'cellsPerBlock': self.cells, + 'cellsPerBlock': self.cells_per_block, 'periodic': self.periodic, }, 'Parameters': { 'timesteps': self.timesteps, 'vtkWriteFrequency': self.vtkWriteFrequency, - 'useGui': 0, 'remainingTimeLoggerFrequency': -1, 'timeStepStrategy': self.timeStepStrategy, - 'overlappingWidth': self.overlappingWidth, 'gpuBlockSize': self.cuda_block_size, 'warmupSteps': self.warmupSteps, 'scenario': self.scenario, + 'cudaEnabledMpi': self.cudaEnabledMpi }, 'Boundaries_GPU': { 'Border': [] @@ -86,43 +98,46 @@ class Scenario: df.to_csv(self.csv_file, index=False, mode='a', header=False) -def overlap_benchmark(): +def benchmark(): scenarios = wlb.ScenarioManager() - overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1), - (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1), - (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)] - - cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1), - (32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1), - (32, 4, 1), (64, 4, 1), (128, 4, 1), - (32, 4, 2), (64, 4, 2), (128, 2, 2), - (32, 8, 1), (64, 8, 1), (64, 4, 2), - (32, 16, 1), (16, 16, 1), (16, 16, 2)] - - # no overlap - scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1))) - - # overlap - for overlap_strategy in ['overlap']: - for overlappingWidth in overlappingWidths: - for cuda_block in cuda_blocks: - scenario = Scenario(timeStepStrategy=overlap_strategy, - overlappingWidth=overlappingWidth, - cuda_block_size=cuda_block) - scenarios.add(scenario) + + gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8)) + gpu_mem = gpu_mem_gb * (2 ** 30) + + block_size = (256, 256, 256) + + if not domain_block_size_ok(block_size, gpu_mem): + wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.") + else: + scenarios.add(Scenario(time_step_strategy='normal', cuda_block_size=(256, 1, 1), cells_per_block=block_size)) def kernel_benchmark(): scenarios = wlb.ScenarioManager() - # overlap - # for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']: - for overlap_strategy in ['overlap']: - scenario = Scenario(timeStepStrategy=overlap_strategy, - overlappingWidth=(1, 1, 1), - cuda_block_size=(128, 1, 1)) - scenarios.add(scenario) + gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8)) + gpu_mem = gpu_mem_gb * (2 ** 30) + + block_sizes = [(i, i, i) for i in (32, 64, 128, 256, 320, 384, 448, 512)] + + cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), + (32, 2, 1), (64, 2, 1), (128, 2, 1), + (32, 4, 1), (64, 4, 1), + (32, 4, 2), + (32, 8, 1), + (16, 16, 1)] + + for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']: + for cuda_block in cuda_blocks: + for block_size in block_sizes: + if not domain_block_size_ok(block_size, gpu_mem): + wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.") + continue + scenario = Scenario(time_step_strategy=time_step_strategy, + cuda_block_size=cuda_block, + cells_per_block=block_size) + scenarios.add(scenario) -# overlap_benchmark() +# benchmark() kernel_benchmark() diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py index e4fc118efaaaf6650ad0c756928f21796003bd2b..58cf970f4e634a5eba9fb528d1c3b407b9e86942 100755 --- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py @@ -3,57 +3,55 @@ import waLBerla as wlb import pandas as pd from waLBerla.tools.sqlitedb import sequenceValuesToScalars +from waLBerla.tools.config import block_decomposition import sys class Scenario: - def __init__(self, timeStepStrategy, overlappingWidth): + def __init__(self, time_step_strategy, cells_per_block=(256, 256, 256), + cuda_enabled_mpi=False): # output frequencies - self.vtkWriteFrequency = -1 + self.vtkWriteFrequency = 0 # simulation parameters - self.timesteps = 500 - edge_size = 50 - self.cells = (edge_size, edge_size, edge_size) - self.blocks = (1, 1, 1) + self.timesteps = 101 + self.cells_per_block = cells_per_block + self.blocks = block_decomposition(wlb.mpi.numProcesses()) self.periodic = (1, 1, 1) - self.size = (self.cells[0] * self.blocks[0], - self.cells[1] * self.blocks[1], - self.cells[2] * self.blocks[2]) + self.size = (self.cells_per_block[0] * self.blocks[0], + self.cells_per_block[1] * self.blocks[1], + self.cells_per_block[2] * self.blocks[2]) - self.timeStepStrategy = timeStepStrategy - self.overlappingWidth = overlappingWidth - self.cuda_block_size = (-1, -1, -1) + self.timeStepStrategy = time_step_strategy self.warmupSteps = 10 + self.cudaEnabledMpi = cuda_enabled_mpi + # bubble parameters self.bubbleRadius = min(self.size) // 4 self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2) - self.scenario = 1 # 1 rising bubble, 2 RTI self.config_dict = self.config() - self.csv_file = "benchmark_cpu.csv" + self.csv_file = "benchmark.csv" @wlb.member_callback def config(self): return { 'DomainSetup': { 'blocks': self.blocks, - 'cellsPerBlock': self.cells, + 'cellsPerBlock': self.cells_per_block, 'periodic': self.periodic, }, 'Parameters': { 'timesteps': self.timesteps, 'vtkWriteFrequency': self.vtkWriteFrequency, - 'useGui': 0, - 'remainingTimeLoggerFrequency': 10.0, + 'remainingTimeLoggerFrequency': -1, 'timeStepStrategy': self.timeStepStrategy, - 'overlappingWidth': self.overlappingWidth, - 'gpuBlockSize': self.cuda_block_size, 'warmupSteps': self.warmupSteps, - 'scenario': self.scenario, + 'scenario': 1, + 'cudaEnabledMpi': self.cudaEnabledMpi }, 'Boundaries_GPU': { 'Border': [] @@ -86,31 +84,23 @@ class Scenario: df.to_csv(self.csv_file, index=False, mode='a', header=False) -def overlap_benchmark(): +def benchmark(): scenarios = wlb.ScenarioManager() - overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1), - (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1), - (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32), - (64, 32, 32), (64, 64, 32), (64, 64, 64)] - # no overlap - scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1))) - - # overlap - for overlap_strategy in ['overlap']: - for overlappingWidth in overlappingWidths: - scenario = Scenario(timeStepStrategy=overlap_strategy, - overlappingWidth=overlappingWidth) - scenarios.add(scenario) + block_size = (64, 64, 64) + + scenarios.add(Scenario(time_step_strategy='normal', cells_per_block=block_size)) def kernel_benchmark(): scenarios = wlb.ScenarioManager() + block_sizes = [(i, i, i) for i in (8, 16, 32, 64, 128)] - # overlap - scenario = Scenario(timeStepStrategy='overlap', - overlappingWidth=(8, 8, 8)) - scenarios.add(scenario) + for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']: + for block_size in block_sizes: + scenario = Scenario(time_step_strategy=time_step_strategy, + cells_per_block=block_size) + scenarios.add(scenario) -# overlap_benchmark() +# benchmark() kernel_benchmark() diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp index f9a20e01a0ddcd11a65eade491cf05a35f25cca4..5af04a35531a3bebe2123018a760aa4a00c72623 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp +++ b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp @@ -37,8 +37,6 @@ #include "timeloop/SweepTimeloop.h" -#include <blockforest/communication/UniformBufferedScheme.h> - #include "InitializerFunctions.h" ////////////////////////////// @@ -48,40 +46,19 @@ #if defined(WALBERLA_BUILD_WITH_CUDA) # include "cuda/AddGPUFieldToStorage.h" # include "cuda/DeviceSelectMPI.h" -# include "cuda/HostFieldAllocator.h" # include "cuda/ParallelStreams.h" -# include "cuda/communication/GPUPackInfo.h" # include "cuda/communication/MemcpyPackInfo.h" # include "cuda/communication/UniformGPUScheme.h" - -# include "GenDefines.h" -# include "PackInfo_phase_field.h" -# include "PackInfo_phase_field_distributions.h" -# include "PackInfo_velocity_based_distributions.h" -# include "hydro_LB_step.h" -# include "initialize_phase_field_distributions.h" -# include "initialize_velocity_based_distributions.h" -# include "phase_field_LB_step.h" - #else -# include "GenDefines.h" -# include "PackInfo_phase_field.h" -# include "PackInfo_phase_field_distributions.h" -# include "PackInfo_velocity_based_distributions.h" -# include "hydro_LB_step.h" -# include "initialize_phase_field_distributions.h" -# include "initialize_velocity_based_distributions.h" -# include "phase_field_LB_step.h" +# include <blockforest/communication/UniformBufferedScheme.h> #endif +#include "GenDefines.h" + using namespace walberla; -using PdfField_phase_T = GhostLayerField< real_t, Stencil_phase_T::Size >; -using PdfField_hydro_T = GhostLayerField< real_t, Stencil_hydro_T::Size >; -using VelocityField_T = GhostLayerField< real_t, Stencil_hydro_T::Dimension >; -using PhaseField_T = GhostLayerField< real_t, 1 >; -using flag_t = walberla::uint8_t; -using FlagField_T = FlagField< flag_t >; +using flag_t = walberla::uint8_t; +using FlagField_T = FlagField< flag_t >; #if defined(WALBERLA_BUILD_WITH_CUDA) typedef cuda::GPUField< real_t > GPUField; @@ -111,9 +88,7 @@ int main(int argc, char** argv) const real_t remainingTimeLoggerFrequency = parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1)); - Vector3< int > overlappingWidth = - parameters.getParameter< Vector3< int > >("overlappingWidth", Vector3< int >(1, 1, 1)); - const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2)); + const uint_t warmupSteps = parameters.getParameter< uint_t >("warmupSteps", uint_t(2)); #if defined(WALBERLA_BUILD_WITH_CUDA) // CPU fields @@ -165,24 +140,21 @@ int main(int argc, char** argv) pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu); pystencils::phase_field_LB_step phase_field_LB_step( - lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], - Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2])); + lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gpuBlockSize[0], - gpuBlockSize[1], - Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2])); + gpuBlockSize[1], gpuBlockSize[2]); #else pystencils::initialize_phase_field_distributions init_h(lb_phase_field, phase_field, vel_field); pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field, vel_field); - pystencils::phase_field_LB_step phase_field_LB_step( - lb_phase_field, phase_field, vel_field, Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2])); - pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field, - Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2])); + pystencils::phase_field_LB_step phase_field_LB_step(lb_phase_field, phase_field, vel_field); + pystencils::hydro_LB_step hydro_LB_step(lb_velocity_field, phase_field, vel_field); #endif // add communication #if defined(WALBERLA_BUILD_WITH_CUDA) + const bool cudaEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false); auto Comm_velocity_based_distributions = - make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0); + make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions); @@ -190,13 +162,12 @@ int main(int argc, char** argv) Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_phase_field); auto Comm_phase_field_distributions = - make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 0); + make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu); Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions); #else - blockforest::communication::UniformBufferedScheme< Stencil_hydro_T > Comm_velocity_based_distributions(blocks); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field); @@ -244,29 +215,15 @@ int main(int argc, char** argv) auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps); #if defined(WALBERLA_BUILD_WITH_CUDA) auto normalTimeStep = [&]() { + Comm_velocity_based_distributions->startCommunication(defaultStream); for (auto& block : *blocks) - { - Comm_phase_field_distributions->communicate(nullptr); - phase_field_LB_step(&block); + phase_field_LB_step(&block, defaultStream); + Comm_velocity_based_distributions->wait(defaultStream); - Comm_velocity_based_distributions->communicate(nullptr); - hydro_LB_step(&block); - } - }; - auto simpleOverlapTimeStep = [&]() { Comm_phase_field_distributions->startCommunication(defaultStream); for (auto& block : *blocks) - phase_field_LB_step.inner(&block, defaultStream); + hydro_LB_step(&block, defaultStream); Comm_phase_field_distributions->wait(defaultStream); - for (auto& block : *blocks) - phase_field_LB_step.outer(&block, defaultStream); - - Comm_velocity_based_distributions->startCommunication(defaultStream); - for (auto& block : *blocks) - hydro_LB_step.inner(&block, defaultStream); - Comm_velocity_based_distributions->wait(defaultStream); - for (auto& block : *blocks) - hydro_LB_step.outer(&block, defaultStream); }; auto phase_only = [&]() { for (auto& block : *blocks) @@ -284,29 +241,15 @@ int main(int argc, char** argv) }; #else auto normalTimeStep = [&]() { - for (auto& block : *blocks) - { - Comm_phase_field_distributions(); - phase_field_LB_step(&block); - - Comm_velocity_based_distributions(); - hydro_LB_step(&block); - } - }; - auto simpleOverlapTimeStep = [&]() { - Comm_phase_field_distributions.startCommunication(); - for (auto& block : *blocks) - phase_field_LB_step.inner(&block); - Comm_phase_field_distributions.wait(); - for (auto& block : *blocks) - phase_field_LB_step.outer(&block); - - Comm_velocity_based_distributions.startCommunication(); - for (auto& block : *blocks) - hydro_LB_step.inner(&block); - Comm_velocity_based_distributions.wait(); - for (auto& block : *blocks) - hydro_LB_step.outer(&block); + Comm_velocity_based_distributions.startCommunication(); + for (auto& block : *blocks) + phase_field_LB_step(&block); + Comm_velocity_based_distributions.wait(); + + Comm_phase_field_distributions.startCommunication(); + for (auto& block : *blocks) + hydro_LB_step(&block); + Comm_phase_field_distributions.wait(); }; auto phase_only = [&]() { for (auto& block : *blocks) @@ -324,12 +267,7 @@ int main(int argc, char** argv) }; #endif std::function< void() > timeStep; - if (timeStepStrategy == "overlap") - { - timeStep = std::function< void() >(simpleOverlapTimeStep); - WALBERLA_LOG_INFO_ON_ROOT("overlapping timestep") - } - else if (timeStepStrategy == "phase_only") + if (timeStepStrategy == "phase_only") { timeStep = std::function< void() >(phase_only); WALBERLA_LOG_INFO_ON_ROOT("started only phasefield step without communication for benchmarking") @@ -347,7 +285,7 @@ int main(int argc, char** argv) else { timeStep = std::function< void() >(normalTimeStep); - WALBERLA_LOG_INFO_ON_ROOT("normal timestep with no overlapping") + WALBERLA_LOG_INFO_ON_ROOT("normal timestep with overlapping") } timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step"); @@ -364,10 +302,8 @@ int main(int argc, char** argv) auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step", false, true, true, false, 0); #if defined(WALBERLA_BUILD_WITH_CUDA) - vtkOutput->addBeforeFunction([&]() { - cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu); - // cuda::fieldCpy<VelocityField_T, GPUField>( blocks, vel_field, vel_field_gpu ); - }); + vtkOutput->addBeforeFunction( + [&]() { cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu); }); #endif auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T > >(phase_field, "phase"); vtkOutput->addCellDataWriter(phaseWriter); @@ -402,6 +338,11 @@ int main(int argc, char** argv) if (pythonCallbackResults.isCallable()) { pythonCallbackResults.data().exposeValue("mlupsPerProcess", mlupsPerProcess); + pythonCallbackResults.data().exposeValue("stencil_phase", StencilNamePhase); + pythonCallbackResults.data().exposeValue("stencil_hydro", StencilNameHydro); + #if defined(WALBERLA_BUILD_WITH_CUDA) + pythonCallbackResults.data().exposeValue("cuda_enabled_mpi", cudaEnabledMpi); + #endif // Call Python function to report results pythonCallbackResults(); } diff --git a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_piz_daint.py b/apps/benchmarks/PhaseFieldAllenCahn/benchmark_piz_daint.py deleted file mode 100755 index 816d74b2787e1ca718ce7454be06b8f818011593..0000000000000000000000000000000000000000 --- a/apps/benchmarks/PhaseFieldAllenCahn/benchmark_piz_daint.py +++ /dev/null @@ -1,149 +0,0 @@ -import os -import waLBerla as wlb -import csv - -from waLBerla.tools.sqlitedb import sequenceValuesToScalars - -import sys - - -class Scenario: - def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size): - # output frequencies - self.vtkWriteFrequency = 0 - - # simulation parameters - self.timesteps = 201 - edge_size = 100 - self.cells = (edge_size, edge_size, edge_size) - self.blocks = (1, 1, 1) - self.periodic = (1, 1, 1) - self.size = (self.cells[0] * self.blocks[0], - self.cells[1] * self.blocks[1], - self.cells[2] * self.blocks[2]) - - self.timeStepStrategy = timeStepStrategy - self.overlappingWidth = overlappingWidth - self.cuda_block_size = cuda_block_size - self.warmupSteps = 20 - - # bubble parameters - self.bubbleRadius = min(self.size) // 4 - self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2) - - self.scenario = 1 # 1 rising bubble, 2 RTI - self.config_dict = self.config() - - self.csv_file = "benchmark_piz_daint.csv" - - @wlb.member_callback - def config(self): - return { - 'DomainSetup': { - 'blocks': self.blocks, - 'cellsPerBlock': self.cells, - 'periodic': self.periodic, - }, - 'Parameters': { - 'timesteps': self.timesteps, - 'vtkWriteFrequency': self.vtkWriteFrequency, - 'useGui': 0, - 'remainingTimeLoggerFrequency': -1, - 'timeStepStrategy': self.timeStepStrategy, - 'overlappingWidth': self.overlappingWidth, - 'gpuBlockSize': self.cuda_block_size, - 'warmupSteps': self.warmupSteps, - 'scenario': self.scenario, - }, - 'Boundaries_GPU': { - 'Border': [] - }, - 'Boundaries_CPU': { - 'Border': [] - }, - 'Bubble': { - 'bubbleMidPoint': self.bubbleMidPoint, - 'bubbleRadius': self.bubbleRadius, - }, - } - - @wlb.member_callback - def results_callback(self, **kwargs): - data = {} - data.update(self.config_dict['Parameters']) - data.update(self.config_dict['DomainSetup']) - data.update(kwargs) - data['executable'] = sys.argv[0] - data['compile_flags'] = wlb.build_info.compiler_flags - data['walberla_version'] = wlb.build_info.version - data['build_machine'] = wlb.build_info.build_machine - sequenceValuesToScalars(data) - - if not os.path.isfile(self.csv_file): - try: - with open(self.csv_file, 'w') as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=data) - writer.writeheader() - writer.writerow(data) - except IOError: - print("could not create csv file") - else: - try: - with open(self.csv_file, 'a') as csvfile: - writer = csv.DictWriter(csvfile, fieldnames=data) - writer.writerow(data) - except IOError: - print("could not write to csv file") - - -def overlap_benchmark(): - scenarios = wlb.ScenarioManager() - overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1), - (4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1), - (4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)] - - cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), - (32, 2, 1), (64, 2, 1), (128, 2, 1), - (32, 4, 1), (64, 4, 1), - (32, 4, 2), (32, 8, 1), (16, 16, 1)] - - # no overlap - scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1))) - - # overlap - for overlap_strategy in ['overlap']: - for overlappingWidth in overlappingWidths: - for cuda_block in cuda_blocks: - scenario = Scenario(timeStepStrategy=overlap_strategy, - overlappingWidth=overlappingWidth, - cuda_block_size=cuda_block) - scenarios.add(scenario) - - -def kernel_benchmark(): - scenarios = wlb.ScenarioManager() - - # overlap - # for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']: - for overlap_strategy in ['overlap']: - scenario = Scenario(timeStepStrategy=overlap_strategy, - overlappingWidth=(16, 16, 16), - cuda_block_size=(128, 4, 1)) - scenarios.add(scenario) - - -def weak_scaling(): - scenarios = wlb.ScenarioManager() - - # overlap - # for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']: - for overlap_strategy in ['overlap']: - scenario = Scenario(timeStepStrategy=overlap_strategy, - overlappingWidth=(32, 32, 32), - cuda_block_size=(128, 2, 1)) - scenarios.add(scenario) - - -# overlap_benchmark() -# kernel_benchmark() -weak_scaling() diff --git a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py index bbd994de58a0b180293b7e643a2a27e0b6c50068..9be9c174b29b7d9452202f70d77d21ad497a8db2 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/multiphase_codegen.py @@ -1,11 +1,10 @@ from pystencils import fields, TypedSymbol from pystencils.simp import sympy_cse -from pystencils import AssignmentCollection -from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule +from lbmpy.creationfunctions import create_lb_method from lbmpy.stencils import get_stencil -from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field +from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field, generate_info_header from lbmpy_walberla import generate_lb_pack_info from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \ @@ -16,133 +15,145 @@ from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel import numpy as np -stencil_phase = get_stencil("D3Q15", "walberla") -stencil_hydro = get_stencil("D3Q27", "walberla") -q_phase = len(stencil_phase) -q_hydro = len(stencil_hydro) - -maxwellian_moments = False -assert (len(stencil_phase[0]) == len(stencil_hydro[0])) -dimensions = len(stencil_hydro[0]) - -######################## -# PARAMETER DEFINITION # -######################## - -density_liquid = 1.0 -density_gas = 0.001 -surface_tension = 0.0001 -M = 0.02 - -# phase-field parameter -drho3 = (density_liquid - density_gas) / 3 -# interface thickness -W = 5 -# coefficient related to surface tension -beta = 12.0 * (surface_tension / W) -# coefficient related to surface tension -kappa = 1.5 * surface_tension * W -# relaxation rate allen cahn (h) -w_c = 1.0 / (0.5 + (3.0 * M)) - -######################## -# FIELDS # -######################## - -# velocity field -u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx') -# phase-field -C = fields(f"phase_field: [{dimensions}D]", layout='fzyx') -C_tmp = fields(f"phase_field_tmp: [{dimensions}D]", layout='fzyx') - -# phase-field distribution functions -h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx') -h_tmp = fields(f"lb_phase_field_tmp({q_phase}): [{dimensions}D]", layout='fzyx') -# hydrodynamic distribution functions -g = fields(f"lb_velocity_field({q_hydro}): [{dimensions}D]", layout='fzyx') -g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): [{dimensions}D]", layout='fzyx') - -######################################## -# RELAXATION RATES AND EXTERNAL FORCES # -######################################## - -# calculate the relaxation rate for the hydro lb as well as the body force -density = density_gas + C.center * (density_liquid - density_gas) - -# force acting on all phases of the model -body_force = np.array([0, 1e-6, 0]) - -relaxation_time = 0.03 + 0.5 -relaxation_rate = 1.0 / relaxation_time - -############### -# LBM METHODS # -############### - -method_phase = create_lb_method(stencil=stencil_phase, method='srt', relaxation_rate=w_c, compressible=True) - -method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True, - relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1], - maxwellian_moments=maxwellian_moments) - -# create the kernels for the initialization of the g and h field -h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W) -g_updates = initializer_kernel_hydro_lb(g, u, method_hydro) - - -force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))] -force_model_h = MultiphaseForceModel(force=force_h) - -force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force, - fd_stencil=get_stencil("D3Q27")) - -force_model_g = MultiphaseForceModel(force=force_g, rho=density) - -#################### -# LBM UPDATE RULES # -#################### - -phase_field_LB_step = get_collision_assignments_phase(lb_method=method_phase, - velocity_input=u, - output={'density': C_tmp}, - force_model=force_model_h, - symbolic_fields={"symbolic_field": h, - "symbolic_temporary_field": h_tmp}, - kernel_type='stream_pull_collide') - -phase_field_LB_step = sympy_cse(phase_field_LB_step) - -# --------------------------------------------------------------------------------------------------------- - -hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro, - density=density, - velocity_input=u, - force_model=force_model_g, - sub_iterations=2, - symbolic_fields={"symbolic_field": g, - "symbolic_temporary_field": g_tmp}, - kernel_type='collide_stream_push') - -################### -# GENERATE SWEEPS # -################### - -cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': True} - -vp = [('int32_t', 'cudaBlockSize0'), - ('int32_t', 'cudaBlockSize1')] +with CodeGeneration() as ctx: + field_type = "float64" if ctx.double_accuracy else "float32" -sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32), - TypedSymbol("cudaBlockSize1", np.int32), - 1) -sweep_params = {'block_size': sweep_block_size} + stencil_phase_name = "D3Q15" + stencil_hydro_name = "D3Q27" + + stencil_phase = get_stencil(stencil_phase_name, "walberla") + stencil_hydro = get_stencil(stencil_hydro_name, "walberla") + q_phase = len(stencil_phase) + q_hydro = len(stencil_hydro) + + assert (len(stencil_phase[0]) == len(stencil_hydro[0])) + dimensions = len(stencil_hydro[0]) + + ######################## + # PARAMETER DEFINITION # + ######################## + + density_liquid = 1.0 + density_gas = 0.001 + surface_tension = 0.0001 + M = 0.02 + + # phase-field parameter + drho3 = (density_liquid - density_gas) / 3 + # interface thickness + W = 5 + # coefficient related to surface tension + beta = 12.0 * (surface_tension / W) + # coefficient related to surface tension + kappa = 1.5 * surface_tension * W + # relaxation rate allen cahn (h) + w_c = 1.0 / (0.5 + (3.0 * M)) + + ######################## + # FIELDS # + ######################## + + # velocity field + u = fields(f"vel_field({dimensions}): {field_type}[{dimensions}D]", layout='fzyx') + # phase-field + C = fields(f"phase_field: {field_type}[{dimensions}D]", layout='fzyx') + C_tmp = fields(f"phase_field_tmp: {field_type}[{dimensions}D]", layout='fzyx') + + # phase-field distribution functions + h = fields(f"lb_phase_field({q_phase}): {field_type}[{dimensions}D]", layout='fzyx') + h_tmp = fields(f"lb_phase_field_tmp({q_phase}): {field_type}[{dimensions}D]", layout='fzyx') + # hydrodynamic distribution functions + g = fields(f"lb_velocity_field({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx') + g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx') + + ######################################## + # RELAXATION RATES AND EXTERNAL FORCES # + ######################################## + + # calculate the relaxation rate for the hydro lb as well as the body force + density = density_gas + C.center * (density_liquid - density_gas) + + # force acting on all phases of the model + body_force = np.array([0, 1e-6, 0]) + + relaxation_time = 0.03 + 0.5 + relaxation_rate = 1.0 / relaxation_time + + ############### + # LBM METHODS # + ############### + + method_phase = create_lb_method(stencil=stencil_phase, method='srt', relaxation_rate=w_c, compressible=True) + + method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True, + relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1]) + + # create the kernels for the initialization of the g and h field + h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W) + g_updates = initializer_kernel_hydro_lb(g, u, method_hydro) + + force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))] + force_model_h = MultiphaseForceModel(force=force_h) + + force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, + body_force, + fd_stencil=get_stencil("D3Q27")) + + force_model_g = MultiphaseForceModel(force=force_g, rho=density) + + #################### + # LBM UPDATE RULES # + #################### + + phase_field_LB_step = get_collision_assignments_phase(lb_method=method_phase, + velocity_input=u, + output={'density': C_tmp}, + force_model=force_model_h, + symbolic_fields={"symbolic_field": h, + "symbolic_temporary_field": h_tmp}, + kernel_type='stream_pull_collide') + + phase_field_LB_step = sympy_cse(phase_field_LB_step) + + # --------------------------------------------------------------------------------------------------------- + + hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro, + density=density, + velocity_input=u, + force_model=force_model_g, + sub_iterations=2, + symbolic_fields={"symbolic_field": g, + "symbolic_temporary_field": g_tmp}, + kernel_type='collide_stream_push') + + hydro_LB_step = sympy_cse(hydro_LB_step) + ################### + # GENERATE SWEEPS # + ################### + + cpu_vec = {'assume_inner_stride_one': True, 'nontemporal': True} + + vp = [('int32_t', 'cudaBlockSize0'), + ('int32_t', 'cudaBlockSize1'), + ('int32_t', 'cudaBlockSize2')] + + sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32), + TypedSymbol("cudaBlockSize1", np.int32), + TypedSymbol("cudaBlockSize2", np.int32)) + sweep_params = {'block_size': sweep_block_size} + + stencil_typedefs = {'Stencil_phase_T': stencil_phase, + 'Stencil_hydro_T': stencil_hydro} + field_typedefs = {'PdfField_phase_T': h, + 'PdfField_hydro_T': g, + 'VelocityField_T': u, + 'PhaseField_T': C} -info_header = f""" -#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase}; -#include "stencil/D3Q{q_hydro}.h"\nusing Stencil_hydro_T = walberla::stencil::D3Q{q_hydro}; -""" + additional_code = f""" + const char * StencilNamePhase = "{stencil_phase_name}"; + const char * StencilNameHydro = "{stencil_hydro_name}"; + """ -with CodeGeneration() as ctx: if not ctx.cuda: if not ctx.optimize_for_localhost: cpu_vec = {'instruction_set': None} @@ -169,8 +180,6 @@ with CodeGeneration() as ctx: generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='cpu') - ctx.write_file("GenDefines.h", info_header) - if ctx.cuda: generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates, target='gpu') @@ -179,14 +188,12 @@ with CodeGeneration() as ctx: generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step, field_swaps=[(h, h_tmp), (C, C_tmp)], - inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params, varying_parameters=vp) generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step, field_swaps=[(g, g_tmp)], - inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params, varying_parameters=vp) @@ -199,6 +206,8 @@ with CodeGeneration() as ctx: generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='gpu') - ctx.write_file("GenDefines.h", info_header) + # Info header containing correct template definitions for stencil and field + generate_info_header(ctx, 'GenDefines', stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs, + additional_code=additional_code) print("finished code generation successfully") diff --git a/apps/benchmarks/PhaseFieldAllenCahn/profiling.py b/apps/benchmarks/PhaseFieldAllenCahn/profiling.py index f8e6bed300cb1ce5e04d1ae46915bf926d90c809..d2e416f93f1bdfb21b030c840dd86e0700197bbf 100644 --- a/apps/benchmarks/PhaseFieldAllenCahn/profiling.py +++ b/apps/benchmarks/PhaseFieldAllenCahn/profiling.py @@ -16,15 +16,13 @@ class Scenario: self.cells[1] * self.blocks[1], self.cells[2] * self.blocks[2]) - self.timeStepStrategy = 'phase_only' - self.overlappingWidth = (1, 1, 1) + self.timeStepStrategy = 'phase_only' # 'phase_only', 'hydro_only', 'kernel_only', 'normal' self.cuda_block_size = (128, 1, 1) # bubble parameters self.bubbleRadius = min(self.size) // 4 self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2) - self.scenario = 1 # 1 rising bubble, 2 RTI self.config_dict = self.config() @wlb.member_callback @@ -38,13 +36,11 @@ class Scenario: 'Parameters': { 'timesteps': self.timesteps, 'vtkWriteFrequency': self.vtkWriteFrequency, - 'useGui': 0, 'remainingTimeLoggerFrequency': -1, 'timeStepStrategy': self.timeStepStrategy, - 'overlappingWidth': self.overlappingWidth, 'gpuBlockSize': self.cuda_block_size, 'warmupSteps': 0, - 'scenario': self.scenario, + 'scenario': 1, }, 'Boundaries_GPU': { 'Border': [] diff --git a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py index b3e7ac03396100609e509149f35ab7179712f945..8b503fedaabc15bd0b8bc465ac6ed7a1f6d1047d 100755 --- a/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py +++ b/apps/benchmarks/UniformGridGPU/simulation_setup/benchmark_configs.py @@ -11,10 +11,9 @@ import os import waLBerla as wlb from waLBerla.tools.config import block_decomposition from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle -from functools import reduce -import operator import sys import sqlite3 +from math import prod # Number of time steps run for a workload of 128^3 per GPU # if double as many cells are on the GPU, half as many time steps are run etc. @@ -36,10 +35,6 @@ BASE_CONFIG = { } -def prod(seq): - return reduce(operator.mul, seq, 1) - - def num_time_steps(block_size, time_steps_for_128_block=200): cells = block_size[0] * block_size[1] * block_size[2] time_steps = (128 ** 3 / cells) * time_steps_for_128_block diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/droplet_contact_angle.py b/apps/showcases/PhaseFieldAllenCahn/GPU/droplet_contact_angle.py index 34bdb9fdceb1432638a89d5670c7c8ce537158d5..2f6c36edd7d96359a6c1c7aef9eeebd9bfdcbfea 100755 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/droplet_contact_angle.py +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/droplet_contact_angle.py @@ -2,7 +2,7 @@ import waLBerla as wlb class Scenario: - def __init__(self): + def __init__(self, cuda_enabled_mpi=False): # output frequencies self.vtkWriteFrequency = 1000 @@ -28,6 +28,9 @@ class Scenario: self.counter = 0 self.yPositions = [] + self.cudaEnabledMpi = cuda_enabled_mpi + self.cuda_blocks = (64, 2, 2) + @wlb.member_callback def config(self): return { @@ -44,6 +47,8 @@ class Scenario: 'overlappingWidth': self.overlappingWidth, 'remainingTimeLoggerFrequency': 10.0, 'scenario': self.scenario, + 'cudaEnabledMpi': self.cudaEnabledMpi, + 'gpuBlockSize': self.cuda_blocks }, 'PhysicalParameters': { 'density_liquid': 1.0, diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase.cpp b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase.cpp index a97ea043fd4facc1010f2838b99e5269c91a6023..f2c8f0087fd54a3de147537613b8282a4b5f23a8 100644 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase.cpp +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase.cpp @@ -58,16 +58,6 @@ //////////////////////////// #include "GenDefines.h" -#include "PackInfo_phase_field.h" -#include "PackInfo_phase_field_distributions.h" -#include "PackInfo_velocity_based_distributions.h" -#include "hydro_LB_NoSlip.h" -#include "hydro_LB_step.h" -#include "initialize_phase_field_distributions.h" -#include "initialize_velocity_based_distributions.h" -#include "phase_field_LB_NoSlip.h" -#include "phase_field_LB_step.h" -#include "ContactAngle.h" //////////// // USING // @@ -75,31 +65,11 @@ using namespace walberla; -using NormalsField_T = GhostLayerField< int8_t, Stencil_hydro_T::Dimension >; -using FlagField_T = FlagField< uint8_t >; +using FlagField_T = FlagField< uint8_t >; typedef cuda::GPUField< real_t > GPUField; typedef cuda::GPUField< uint8_t > GPUField_int; -class Filter -{ - public: - explicit Filter(Vector3< uint_t > numberOfCells) : numberOfCells_(numberOfCells) {} - - void operator()(const IBlock& /*block*/) {} - - bool operator()(const cell_idx_t x, const cell_idx_t y, const cell_idx_t z) const - { - return x >= -1 && x <= cell_idx_t(numberOfCells_[0]) && y >= -1 && y <= cell_idx_t(numberOfCells_[1]) && - z >= -1 && z <= cell_idx_t(numberOfCells_[2]); - } - - private: - Vector3< uint_t > numberOfCells_; -}; - -using FluidFilter_T = Filter; - int main(int argc, char** argv) { mpi::Environment Env(argc, argv); @@ -120,7 +90,7 @@ int main(int argc, char** argv) auto domainSetup = config->getOneBlock("DomainSetup"); Vector3< uint_t > cellsPerBlock = domainSetup.getParameter< Vector3< uint_t > >("cellsPerBlock"); - const bool tube = domainSetup.getParameter< bool >("tube", true); + const bool tube = domainSetup.getParameter< bool >("tube", false); //////////////////////////////////////// // ADD GENERAL SIMULATION PARAMETERS // @@ -132,10 +102,9 @@ int main(int argc, char** argv) const real_t remainingTimeLoggerFrequency = parameters.getParameter< real_t >("remainingTimeLoggerFrequency", 3.0); const uint_t scenario = parameters.getParameter< uint_t >("scenario", uint_c(1)); - Vector3< int > overlappingWidth = - parameters.getParameter< Vector3< int > >("overlappingWidth", Vector3< int >(1, 1, 1)); Vector3< int > gpuBlockSize = parameters.getParameter< Vector3< int > >("gpuBlockSize", Vector3< int >(128, 1, 1)); + const bool cudaEnabledMpi = parameters.getParameter< bool >("cudaEnabledMpi", false); ///////////////////////// // ADD DATA TO BLOCKS // @@ -178,7 +147,6 @@ int main(int argc, char** argv) const real_t bubbleRadius = bubbleParameters.getParameter< real_t >("bubbleRadius", 20.0); const bool bubble = bubbleParameters.getParameter< bool >("bubble", true); initPhaseField_sphere(blocks, phase_field, bubbleRadius, bubbleMidPoint, bubble, interface_thickness); - } else if (scenario == 2) { @@ -213,33 +181,32 @@ int main(int argc, char** argv) interface_thickness); pystencils::initialize_velocity_based_distributions init_g(lb_velocity_field_gpu, vel_field_gpu); - pystencils::phase_field_LB_step phase_field_LB_step( - flagFieldID_gpu, lb_phase_field_gpu, phase_field_gpu, vel_field_gpu, interface_thickness, mobility, - gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], - Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2])); + pystencils::phase_field_LB_step phase_field_LB_step(flagFieldID_gpu, lb_phase_field_gpu, phase_field_gpu, + vel_field_gpu, interface_thickness, mobility, gpuBlockSize[0], + gpuBlockSize[1], gpuBlockSize[2]); pystencils::hydro_LB_step hydro_LB_step(flagFieldID_gpu, lb_velocity_field_gpu, phase_field_gpu, vel_field_gpu, gravitational_acceleration, interface_thickness, density_liquid, density_gas, surface_tension, relaxation_time_liquid, relaxation_time_gas, - gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], - Cell(overlappingWidth[0], overlappingWidth[1], overlappingWidth[2])); + gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2]); //////////////////////// // ADD COMMUNICATION // ////////////////////// auto Comm_velocity_based_distributions = - make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 1); + make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); auto generatedPackInfo_velocity_based_distributions = make_shared< lbm::PackInfo_velocity_based_distributions >(lb_velocity_field_gpu); Comm_velocity_based_distributions->addPackInfo(generatedPackInfo_velocity_based_distributions); - auto Comm_phase_field = make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 1); + auto Comm_phase_field = + make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); auto generatedPackInfo_phase_field = make_shared< pystencils::PackInfo_phase_field >(phase_field_gpu); Comm_phase_field->addPackInfo(generatedPackInfo_phase_field); auto Comm_phase_field_distributions = - make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, 1); + make_shared< cuda::communication::UniformGPUScheme< Stencil_hydro_T > >(blocks, cudaEnabledMpi); auto generatedPackInfo_phase_field_distributions = make_shared< lbm::PackInfo_phase_field_distributions >(lb_phase_field_gpu); Comm_phase_field_distributions->addPackInfo(generatedPackInfo_phase_field_distributions); @@ -261,7 +228,8 @@ int main(int argc, char** argv) const real_t eccentricity = domainSetup.getParameter< real_t >("ratio", real_c(0)); const real_t start_transition = domainSetup.getParameter< real_t >("start_transition", real_c(60)); const real_t length_transition = domainSetup.getParameter< real_t >("length_transition", real_c(10)); - const bool eccentricity_or_pipe_ration = domainSetup.getParameter< bool >("eccentricity_or_pipe_ration", true); + const bool eccentricity_or_pipe_ration = + domainSetup.getParameter< bool >("eccentricity_or_pipe_ration", true); initTubeWithCylinder(blocks, flagFieldID, wallFlagUID, inner_radius, eccentricity, start_transition, length_transition, eccentricity_or_pipe_ration); } @@ -290,56 +258,30 @@ int main(int argc, char** argv) auto timeLoop = make_shared< SweepTimeloop >(blocks->getBlockStorage(), timesteps); auto normalTimeStep = [&]() { + Comm_velocity_based_distributions->startCommunication(defaultStream); for (auto& block : *blocks) { - Comm_phase_field_distributions->communicate(nullptr); - phase_field_LB_NoSlip(&block); - - phase_field_LB_step(&block); - contact_angle(&block); - Comm_phase_field->communicate(nullptr); - - hydro_LB_step(&block); - - Comm_velocity_based_distributions->communicate(nullptr); - hydro_LB_NoSlip(&block); + phase_field_LB_NoSlip(&block, defaultStream); + phase_field_LB_step(&block, defaultStream); } - }; - auto simpleOverlapTimeStep = [&]() { + Comm_velocity_based_distributions->wait(defaultStream); + for (auto& block : *blocks) - phase_field_LB_NoSlip(&block); + { + contact_angle(&block, defaultStream); + Comm_phase_field->communicate(defaultStream); + } Comm_phase_field_distributions->startCommunication(defaultStream); for (auto& block : *blocks) - phase_field_LB_step.inner(&block, defaultStream); + { + hydro_LB_NoSlip(&block, defaultStream); + hydro_LB_step(&block, defaultStream); + } Comm_phase_field_distributions->wait(defaultStream); - for (auto& block : *blocks) - phase_field_LB_step.outer(&block, defaultStream); - - for (auto& block : *blocks) - contact_angle(&block); - - Comm_phase_field->startCommunication(defaultStream); - for (auto& block : *blocks) - hydro_LB_step.inner(&block, defaultStream); - Comm_phase_field->wait(defaultStream); - for (auto& block : *blocks) - hydro_LB_step.outer(&block, defaultStream); - - for (auto& block : *blocks) - hydro_LB_NoSlip(&block); }; std::function< void() > timeStep; - if (timeStepStrategy == "overlap") - { - timeStep = std::function< void() >(simpleOverlapTimeStep); - WALBERLA_LOG_INFO_ON_ROOT("overlapping timestep") - } - else - { - timeStep = std::function< void() >(normalTimeStep); - WALBERLA_LOG_INFO_ON_ROOT("normal timestep with no overlapping") - } + timeStep = std::function< void() >(normalTimeStep); timeLoop->add() << BeforeFunction(timeStep) << Sweep([](IBlock*) {}, "time step"); @@ -360,77 +302,87 @@ int main(int argc, char** argv) init_h(&block); init_g(&block); } + + for (auto& block : *blocks) + { + Comm_phase_field_distributions->communicate(nullptr); + phase_field_LB_NoSlip(&block); + } WALBERLA_LOG_INFO_ON_ROOT("Initialisation of the PDFs done") - uint_t dbWriteFrequency = parameters.getParameter< uint_t >("dbWriteFrequency", 10000000); + uint_t dbWriteFrequency = parameters.getParameter< uint_t >("dbWriteFrequency", 0); int targetRank = 0; - timeLoop->addFuncAfterTimeStep( - [&]() { - if (timeLoop->getCurrentTimeStep() % dbWriteFrequency == 0) - { - cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu); - cuda::fieldCpy< VelocityField_T, GPUField >(blocks, vel_field, vel_field_gpu); - if (scenario == 4) + if (dbWriteFrequency > 0) + { + timeLoop->addFuncAfterTimeStep( + [&]() { + if (timeLoop->getCurrentTimeStep() % dbWriteFrequency == 0) { - std::array< real_t, 4 > total_velocity = { 0.0, 0.0, 0.0, 0.0 }; - real_t volume; - uint_t nrCells; - PhaseField_T gatheredPhaseField(0, 0, 0, 0); - VelocityField_T gatheredVelocityField(0, 0, 0, 0); - - CellInterval boundingBox = blocks->getDomainCellBB(); - if (cell_idx_t(center_of_mass[1] - cell_idx_t(cellsPerBlock[0]) * 1.5) >= 0) - boundingBox.min()[1] = cell_idx_t(center_of_mass[1] - cell_idx_t(cellsPerBlock[0]) * 1.5); - if (cell_idx_t(center_of_mass[1] + cell_idx_t(cellsPerBlock[0]) * 1.5) <= boundingBox.max()[1]) - boundingBox.max()[1] = cell_idx_t(center_of_mass[1] + cell_idx_t(cellsPerBlock[0]) * 1.5); - - field::gather< PhaseField_T >(gatheredPhaseField, blocks, phase_field, boundingBox, targetRank); - field::gather< VelocityField_T >(gatheredVelocityField, blocks, vel_field, boundingBox, targetRank); - - WALBERLA_EXCLUSIVE_WORLD_SECTION(targetRank) - { - flood_fill(gatheredPhaseField, gatheredVelocityField, boundingBox, volume, nrCells, center_of_mass, - total_velocity); - } - WALBERLA_MPI_SECTION() { walberla::mpi::broadcastObject(center_of_mass, targetRank); } - - python_coupling::PythonCallback callback("at_end_of_time_step"); - if (callback.isCallable()) + cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu); + cuda::fieldCpy< VelocityField_T, GPUField >(blocks, vel_field, vel_field_gpu); + if (scenario == 4) { - callback.data().exposeValue("blocks", blocks); - callback.data().exposeValue("timeStep", timeLoop->getCurrentTimeStep()); - callback.data().exposeValue("target_rank", targetRank); - callback.data().exposeValue("bounding_box_min", boundingBox.min()[1]); - callback.data().exposeValue("bounding_box_max", boundingBox.max()[1]); - callback.data().exposeValue("total_velocity", total_velocity[0]); - callback.data().exposeValue("total_velocity_X", total_velocity[1]); - callback.data().exposeValue("total_velocity_Y", total_velocity[2]); - callback.data().exposeValue("total_velocity_Z", total_velocity[3]); - callback.data().exposeValue("center_of_mass_X", center_of_mass[0]); - callback.data().exposeValue("center_of_mass_Y", center_of_mass[1]); - callback.data().exposeValue("center_of_mass_Z", center_of_mass[2]); - callback.data().exposeValue("sum_inv_phi", volume); - callback.data().exposeValue("gas_cells_of_the_taylor_bubble", nrCells); - callback.data().exposeValue("stencil_phase", stencil_phase_name); - callback.data().exposeValue("stencil_hydro", stencil_hydro_name); - callback(); + std::array< real_t, 4 > total_velocity = { 0.0, 0.0, 0.0, 0.0 }; + real_t volume; + uint_t nrCells; + PhaseField_T gatheredPhaseField(0, 0, 0, 0); + VelocityField_T gatheredVelocityField(0, 0, 0, 0); + + CellInterval boundingBox = blocks->getDomainCellBB(); + if (cell_idx_t(center_of_mass[1] - cell_idx_t(cellsPerBlock[0]) * 1.5) >= 0) + boundingBox.min()[1] = cell_idx_t(center_of_mass[1] - cell_idx_t(cellsPerBlock[0]) * 1.5); + if (cell_idx_t(center_of_mass[1] + cell_idx_t(cellsPerBlock[0]) * 1.5) <= boundingBox.max()[1]) + boundingBox.max()[1] = cell_idx_t(center_of_mass[1] + cell_idx_t(cellsPerBlock[0]) * 1.5); + + field::gather< PhaseField_T >(gatheredPhaseField, blocks, phase_field, boundingBox, targetRank); + field::gather< VelocityField_T >(gatheredVelocityField, blocks, vel_field, boundingBox, + targetRank); + + WALBERLA_EXCLUSIVE_WORLD_SECTION(targetRank) + { + flood_fill(gatheredPhaseField, gatheredVelocityField, boundingBox, volume, nrCells, + center_of_mass, total_velocity); + } + WALBERLA_MPI_SECTION() { walberla::mpi::broadcastObject(center_of_mass, targetRank); } + + python_coupling::PythonCallback callback("at_end_of_time_step"); + if (callback.isCallable()) + { + callback.data().exposeValue("blocks", blocks); + callback.data().exposeValue("timeStep", timeLoop->getCurrentTimeStep()); + callback.data().exposeValue("target_rank", targetRank); + callback.data().exposeValue("bounding_box_min", boundingBox.min()[1]); + callback.data().exposeValue("bounding_box_max", boundingBox.max()[1]); + callback.data().exposeValue("total_velocity", total_velocity[0]); + callback.data().exposeValue("total_velocity_X", total_velocity[1]); + callback.data().exposeValue("total_velocity_Y", total_velocity[2]); + callback.data().exposeValue("total_velocity_Z", total_velocity[3]); + callback.data().exposeValue("center_of_mass_X", center_of_mass[0]); + callback.data().exposeValue("center_of_mass_Y", center_of_mass[1]); + callback.data().exposeValue("center_of_mass_Z", center_of_mass[2]); + callback.data().exposeValue("sum_inv_phi", volume); + callback.data().exposeValue("gas_cells_of_the_taylor_bubble", nrCells); + callback.data().exposeValue("stencil_phase", StencilNamePhase); + callback.data().exposeValue("stencil_hydro", StencilNameHydro); + callback(); + } } - } - else - { - python_coupling::PythonCallback callback("at_end_of_time_step"); - if (callback.isCallable()) + else { - callback.data().exposeValue("blocks", blocks); - callback.data().exposeValue("timeStep", timeLoop->getCurrentTimeStep()); - callback.data().exposeValue("stencil_phase", stencil_phase_name); - callback.data().exposeValue("stencil_hydro", stencil_hydro_name); - callback(); + python_coupling::PythonCallback callback("at_end_of_time_step"); + if (callback.isCallable()) + { + callback.data().exposeValue("blocks", blocks); + callback.data().exposeValue("timeStep", timeLoop->getCurrentTimeStep()); + callback.data().exposeValue("stencil_phase", StencilNamePhase); + callback.data().exposeValue("stencil_hydro", StencilNameHydro); + callback(); + } } } - } - }, - "Python callback"); + }, + "Python callback"); + } int meshWriteFrequency = parameters.getParameter< int >("meshWriteFrequency", 0); int counter = 0; @@ -469,7 +421,7 @@ int main(int argc, char** argv) "simulation_step", false, true, true, false, 0); vtkOutput->addBeforeFunction([&]() { cuda::fieldCpy< PhaseField_T, GPUField >(blocks, phase_field, phase_field_gpu); - cuda::fieldCpy<VelocityField_T, GPUField>( blocks, vel_field, vel_field_gpu ); + cuda::fieldCpy< VelocityField_T, GPUField >(blocks, vel_field, vel_field_gpu); }); auto phaseWriter = make_shared< field::VTKWriter< PhaseField_T, float > >(phase_field, "PhaseField"); vtkOutput->addCellDataWriter(phaseWriter); @@ -480,12 +432,6 @@ int main(int argc, char** argv) auto velWriter = make_shared< field::VTKWriter< VelocityField_T, float > >(vel_field, "Velocity"); vtkOutput->addCellDataWriter(velWriter); - FluidFilter_T filter(cellsPerBlock); - - auto QCriterionWriter = make_shared< lbm::QCriterionVTKWriter< VelocityField_T, FluidFilter_T, float > >( - blocks, filter, vel_field, "Q-Criterion"); - vtkOutput->addCellDataWriter(QCriterionWriter); - timeLoop->addFuncBeforeTimeStep(vtk::writeFiles(vtkOutput), "VTK Output"); } diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_RTI_3D.py b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_RTI_3D.py index 024b66a9e15c4291f045d500d4493de2448f137b..f4d6f9db166032fe1f57d60fa4ff13ec5f722f19 100755 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_RTI_3D.py +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_RTI_3D.py @@ -10,7 +10,7 @@ from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_paramete class Scenario: - def __init__(self): + def __init__(self, cuda_enabled_mpi=False): # output frequencies self.vtkWriteFrequency = 1000 @@ -51,13 +51,14 @@ class Scenario: self.tube = False # everything else - self.dbFile = "RTI.csv" - self.scenario = 2 # 1 rising bubble or droplet, 2 RTI, 3 bubble field, 4 taylor bubble set up self.counter = 0 self.yPositions = [] + self.cudaEnabledMpi = cuda_enabled_mpi + self.cuda_blocks = (64, 2, 2) + self.config_dict = self.config() @wlb.member_callback @@ -75,6 +76,8 @@ class Scenario: 'dbWriteFrequency': self.dbWriteFrequency, 'remainingTimeLoggerFrequency': 10.0, 'scenario': self.scenario, + 'cudaEnabledMpi': self.cudaEnabledMpi, + 'gpuBlockSize': self.cuda_blocks }, 'PhysicalParameters': { 'density_liquid': self.density_heavy, @@ -147,7 +150,8 @@ class Scenario: sequenceValuesToScalars(data) - csv_file = f"RTI_{data['stencil_phase']}_{data['stencil_hydro']}_Re_{self.reynolds_number}_tube.csv" + csv_file = f"RTI_{data['stencil_phase']}_{data['stencil_hydro']}_Re_{self.reynolds_number}" + csv_file += "_tube.csv" if self.tube else ".csv" df = pd.DataFrame.from_records([data]) if not os.path.isfile(csv_file): diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py index 0abf47375eaa5d43924d6582761307278ecb52ea..ae42819a4b97579616086e14ba0d70391ef1839a 100644 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_codegen.py @@ -9,11 +9,11 @@ from lbmpy.creationfunctions import create_lb_method from lbmpy.stencils import get_stencil import pystencils_walberla -from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field +from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field, generate_info_header from lbmpy_walberla import generate_boundary, generate_lb_pack_info -from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb,\ - initializer_kernel_hydro_lb, interface_tracking_force, hydrodynamic_force, get_collision_assignments_hydro,\ +from lbmpy.phasefield_allen_cahn.kernel_equations import initializer_kernel_phase_field_lb, \ + initializer_kernel_hydro_lb, interface_tracking_force, hydrodynamic_force, get_collision_assignments_hydro, \ get_collision_assignments_phase from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel @@ -21,180 +21,174 @@ from lbmpy.phasefield_allen_cahn.force_model import MultiphaseForceModel import numpy as np import sympy as sp -stencil_phase_name = "D3Q27" -stencil_hydro_name = "D3Q27" - -contact_angle_in_degrees = 22 - -stencil_phase = get_stencil(stencil_phase_name) -stencil_hydro = get_stencil(stencil_hydro_name) -q_phase = len(stencil_phase) -q_hydro = len(stencil_hydro) - -assert (len(stencil_phase[0]) == len(stencil_hydro[0])) -dimensions = len(stencil_hydro[0]) - -######################## -# PARAMETER DEFINITION # -######################## - -density_liquid = sp.Symbol("rho_H") -density_gas = sp.Symbol("rho_L") - -surface_tension = sp.Symbol("sigma") -mobility = sp.Symbol("mobility") - -gravitational_acceleration = sp.Symbol("gravity") - -relaxation_time_liquid = sp.Symbol("tau_H") -relaxation_time_gas = sp.Symbol("tau_L") - -# phase-field parameter -drho3 = (density_liquid - density_gas) / 3 -# interface thickness -W = sp.Symbol("interface_thickness") -# coefficients related to surface tension -beta = 12.0 * (surface_tension / W) -kappa = 1.5 * surface_tension * W - -######################## -# FIELDS # -######################## - -# velocity field -u = fields(f"vel_field({dimensions}): [{dimensions}D]", layout='fzyx') -# phase-field -C = fields(f"phase_field: [{dimensions}D]", layout='fzyx') -C_tmp = fields(f"phase_field_tmp: [{dimensions}D]", layout='fzyx') - -flag = fields(f"flag_field: uint8[{dimensions}D]", layout='fzyx') -# phase-field distribution functions -h = fields(f"lb_phase_field({q_phase}): [{dimensions}D]", layout='fzyx') -h_tmp = fields(f"lb_phase_field_tmp({q_phase}): [{dimensions}D]", layout='fzyx') -# hydrodynamic distribution functions -g = fields(f"lb_velocity_field({q_hydro}): [{dimensions}D]", layout='fzyx') -g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): [{dimensions}D]", layout='fzyx') - -######################################## -# RELAXATION RATES AND EXTERNAL FORCES # -######################################## - -# relaxation rate for interface tracking LB step -relaxation_rate_allen_cahn = 1.0 / (0.5 + (3.0 * mobility)) -# calculate the relaxation rate for hydrodynamic LB step -density = density_gas + C.center * (density_liquid - density_gas) -# force acting on all phases of the model -body_force = np.array([0, gravitational_acceleration * density, 0]) -# calculation of the relaxation time via viscosities -# viscosity = viscosity_gas * viscosity_liquid + C.center\ -# * (density_liquid*viscosity_liquid - viscosity_liquid*viscosity_gas) -# relaxation_time = 3 * viscosity / density + 0.5 - -relaxation_time = 0.5 + relaxation_time_gas + C.center * (relaxation_time_liquid - relaxation_time_gas) -# calculate the relaxation time if the phase-field has values over one -# relaxation_rate = 1.0 / relaxation_time -relaxation_rate = sp.Symbol("s8") -relaxation_rate_cutoff = sp.Piecewise((1 / (0.5 + relaxation_time_liquid), C.center > 0.999), # True value - (1 / relaxation_time, True)) # Else value - -############### -# LBM METHODS # -############### - -# method_phase = create_lb_method(stencil=stencil_phase, method="mrt", compressible=True, weighted=True, -# relaxation_rates=[1, 1.5, 1, 1.5, 1, 1.5]) -method_phase = create_lb_method(stencil=stencil_phase, method="mrt", compressible=True, weighted=True, - relaxation_rates=[1, 1, 1, 1, 1, 1]) - -method_phase.set_conserved_moments_relaxation_rate(relaxation_rate_allen_cahn) - -method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True, - relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1]) - - -# create the kernels for the initialization of the g and h field -h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W, fd_stencil=get_stencil("D3Q27")) -g_updates = initializer_kernel_hydro_lb(g, u, method_hydro) - -force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))] -force_model_h = MultiphaseForceModel(force=force_h) - -force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, body_force, - fd_stencil=get_stencil("D3Q27")) - -force_model_g = MultiphaseForceModel(force=force_g, rho=density) - -#################### -# LBM UPDATE RULES # -#################### - -phase_field_LB_step = get_collision_assignments_phase(lb_method=method_phase, - velocity_input=u, - output={'density': C_tmp}, - force_model=force_model_h, - symbolic_fields={"symbolic_field": h, - "symbolic_temporary_field": h_tmp}, - kernel_type='stream_pull_collide') - -phase_field_LB_step = sympy_cse(phase_field_LB_step) - -phase_field_LB_step = [Conditional(sp.Eq(flag.center(), 2), - Block(phase_field_LB_step), - Block([Assignment(C_tmp.center, C.center)]))] -# --------------------------------------------------------------------------------------------------------- -hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro, - density=density, - velocity_input=u, - force_model=force_model_g, - sub_iterations=2, - symbolic_fields={"symbolic_field": g, - "symbolic_temporary_field": g_tmp}, - kernel_type='collide_stream_push') - -hydro_LB_step.set_sub_expressions_from_dict({**{relaxation_rate: relaxation_rate_cutoff}, - **hydro_LB_step.subexpressions_dict}) - -hydro_LB_step = [Conditional(sp.Eq(flag.center(), 2), Block(hydro_LB_step))] - -contact_angle = ContactAngle(contact_angle_in_degrees, W) - - -################### -# GENERATE SWEEPS # -################### - -vp = [('int32_t', 'cudaBlockSize0'), - ('int32_t', 'cudaBlockSize1'), - ('int32_t', 'cudaBlockSize2')] - -sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32), - TypedSymbol("cudaBlockSize1", np.int32), - TypedSymbol("cudaBlockSize2", np.int32)) - -sweep_params = {'block_size': sweep_block_size} - -info_header = f""" -using namespace walberla; -#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase}; -#include "stencil/D3Q{q_hydro}.h"\nusing Stencil_hydro_T = walberla::stencil::D3Q{q_hydro}; -using PdfField_phase_T = GhostLayerField<real_t, {q_phase}>; -using PdfField_hydro_T = GhostLayerField<real_t, {q_hydro}>; -using VelocityField_T = GhostLayerField<real_t, {dimensions}>; -using PhaseField_T = GhostLayerField<real_t, 1>; -#ifndef UTIL_H -#define UTIL_H -const char * stencil_phase_name = "{stencil_phase_name}"; -const char * stencil_hydro_name = "{stencil_hydro_name}"; -#endif -""" - with CodeGeneration() as ctx: + field_type = "float64" if ctx.double_accuracy else "float32" + + stencil_phase_name = "D3Q27" + stencil_hydro_name = "D3Q27" + + contact_angle_in_degrees = 22 + + stencil_phase = get_stencil(stencil_phase_name) + stencil_hydro = get_stencil(stencil_hydro_name) + q_phase = len(stencil_phase) + q_hydro = len(stencil_hydro) + + assert (len(stencil_phase[0]) == len(stencil_hydro[0])) + dimensions = len(stencil_hydro[0]) + + ######################## + # PARAMETER DEFINITION # + ######################## + + density_liquid = sp.Symbol("rho_H") + density_gas = sp.Symbol("rho_L") + + surface_tension = sp.Symbol("sigma") + mobility = sp.Symbol("mobility") + + gravitational_acceleration = sp.Symbol("gravity") + + relaxation_time_liquid = sp.Symbol("tau_H") + relaxation_time_gas = sp.Symbol("tau_L") + + # phase-field parameter + drho3 = (density_liquid - density_gas) / 3 + # interface thickness + W = sp.Symbol("interface_thickness") + # coefficients related to surface tension + beta = 12.0 * (surface_tension / W) + kappa = 1.5 * surface_tension * W + + ######################## + # FIELDS # + ######################## + + # velocity field + u = fields(f"vel_field({dimensions}): {field_type}[{dimensions}D]", layout='fzyx') + # phase-field + C = fields(f"phase_field: {field_type}[{dimensions}D]", layout='fzyx') + C_tmp = fields(f"phase_field_tmp: {field_type}[{dimensions}D]", layout='fzyx') + + flag = fields(f"flag_field: uint8[{dimensions}D]", layout='fzyx') + # phase-field distribution functions + h = fields(f"lb_phase_field({q_phase}): {field_type}[{dimensions}D]", layout='fzyx') + h_tmp = fields(f"lb_phase_field_tmp({q_phase}): {field_type}[{dimensions}D]", layout='fzyx') + # hydrodynamic distribution functions + g = fields(f"lb_velocity_field({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx') + g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx') + + ######################################## + # RELAXATION RATES AND EXTERNAL FORCES # + ######################################## + + # relaxation rate for interface tracking LB step + relaxation_rate_allen_cahn = 1.0 / (0.5 + (3.0 * mobility)) + # calculate the relaxation rate for hydrodynamic LB step + density = density_gas + C.center * (density_liquid - density_gas) + # force acting on all phases of the model + body_force = np.array([0, gravitational_acceleration * density, 0]) + # calculation of the relaxation time via viscosities + # viscosity = viscosity_gas * viscosity_liquid + C.center\ + # * (density_liquid*viscosity_liquid - viscosity_liquid*viscosity_gas) + # relaxation_time = 3 * viscosity / density + 0.5 + + relaxation_time = 0.5 + relaxation_time_gas + C.center * (relaxation_time_liquid - relaxation_time_gas) + # calculate the relaxation time if the phase-field has values over one + # relaxation_rate = 1.0 / relaxation_time + relaxation_rate = sp.Symbol("s8") + relaxation_rate_cutoff = sp.Piecewise((1 / (0.5 + relaxation_time_liquid), C.center > 0.999), # True value + (1 / relaxation_time, True)) # Else value + + ############### + # LBM METHODS # + ############### + method_phase = create_lb_method(stencil=stencil_phase, method="mrt", compressible=True, weighted=True, + relaxation_rates=[1, 1, 1, 1, 1, 1]) + + method_phase.set_first_moment_relaxation_rate(relaxation_rate_allen_cahn) + + method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True, + relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1]) + + # create the kernels for the initialization of the g and h field + h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W, fd_stencil=get_stencil("D3Q27")) + g_updates = initializer_kernel_hydro_lb(g, u, method_hydro) + + force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))] + force_model_h = MultiphaseForceModel(force=force_h) + + force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta, + body_force, + fd_stencil=get_stencil("D3Q27")) + + force_model_g = MultiphaseForceModel(force=force_g, rho=density) + + #################### + # LBM UPDATE RULES # + #################### + + phase_field_LB_step = get_collision_assignments_phase(lb_method=method_phase, + velocity_input=u, + output={'density': C_tmp}, + force_model=force_model_h, + symbolic_fields={"symbolic_field": h, + "symbolic_temporary_field": h_tmp}, + kernel_type='stream_pull_collide') + + phase_field_LB_step = sympy_cse(phase_field_LB_step) + + phase_field_LB_step = [Conditional(sp.Eq(flag.center(), 2), + Block(phase_field_LB_step), + Block([Assignment(C_tmp.center, C.center)]))] + # --------------------------------------------------------------------------------------------------------- + hydro_LB_step = get_collision_assignments_hydro(lb_method=method_hydro, + density=density, + velocity_input=u, + force_model=force_model_g, + sub_iterations=2, + symbolic_fields={"symbolic_field": g, + "symbolic_temporary_field": g_tmp}, + kernel_type='collide_stream_push') + + hydro_LB_step.set_sub_expressions_from_dict({**{relaxation_rate: relaxation_rate_cutoff}, + **hydro_LB_step.subexpressions_dict}) + + hydro_LB_step = [Conditional(sp.Eq(flag.center(), 2), Block(hydro_LB_step))] + + contact_angle = ContactAngle(contact_angle_in_degrees, W) + + ################### + # GENERATE SWEEPS # + ################### + + vp = [('int32_t', 'cudaBlockSize0'), + ('int32_t', 'cudaBlockSize1'), + ('int32_t', 'cudaBlockSize2')] + + sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32), + TypedSymbol("cudaBlockSize1", np.int32), + TypedSymbol("cudaBlockSize2", np.int32)) + + sweep_params = {'block_size': sweep_block_size} + + stencil_typedefs = {'Stencil_phase_T': stencil_phase, + 'Stencil_hydro_T': stencil_hydro} + field_typedefs = {'PdfField_phase_T': h, + 'PdfField_hydro_T': g, + 'VelocityField_T': u, + 'PhaseField_T': C} + + additional_code = f""" + const char * StencilNamePhase = "{stencil_phase_name}"; + const char * StencilNameHydro = "{stencil_hydro_name}"; + """ + generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates, target='gpu') generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target='gpu') generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step, field_swaps=[(h, h_tmp), (C, C_tmp)], - inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params, varying_parameters=vp) @@ -202,7 +196,6 @@ with CodeGeneration() as ctx: generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step, field_swaps=[(g, g_tmp)], - inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params, varying_parameters=vp) @@ -221,6 +214,7 @@ with CodeGeneration() as ctx: pystencils_walberla.boundary.generate_boundary(ctx, 'ContactAngle', contact_angle, C.name, stencil_hydro, index_shape=[], target='gpu') - ctx.write_file("GenDefines.h", info_header) + generate_info_header(ctx, 'GenDefines', stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs, + additional_code=additional_code) print("finished code generation successfully") diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_rising_bubble.py b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_rising_bubble.py index 186590774aa43e8624eaf891f94616aad081360f..54f63d35f9ca27d6095d07751a8bb36bd910c3a1 100755 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_rising_bubble.py +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/multiphase_rising_bubble.py @@ -7,7 +7,7 @@ from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_dimensio class Scenario: - def __init__(self): + def __init__(self, cuda_enabled_mpi=False): # output frequencies self.vtkWriteFrequency = 1000 self.dbWriteFrequency = 200 @@ -49,6 +49,9 @@ class Scenario: self.counter = 0 self.yPositions = [] + self.cudaEnabledMpi = cuda_enabled_mpi + self.cuda_blocks = (64, 2, 2) + @wlb.member_callback def config(self): return { @@ -65,6 +68,8 @@ class Scenario: 'overlappingWidth': self.overlappingWidth, 'remainingTimeLoggerFrequency': 10.0, 'scenario': self.scenario, + 'cudaEnabledMpi': self.cudaEnabledMpi, + 'gpuBlockSize': self.cuda_blocks }, 'PhysicalParameters': { 'density_liquid': self.density_heavy, diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/parameters_taylor_bubble.py b/apps/showcases/PhaseFieldAllenCahn/GPU/parameters_taylor_bubble.py deleted file mode 100644 index 55db7aa693400b12f71d097470c57bed2609ee8e..0000000000000000000000000000000000000000 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/parameters_taylor_bubble.py +++ /dev/null @@ -1,69 +0,0 @@ -import math - - -def calculate_parameters_taylor_bubble(reference_length=128, - reference_time=16000, - density_heavy=1.0, - d1=0.0254, - d2=0.0127): - r""" - Calculate the simulation parameters for a rising Taylor bubble. The calculation can be found in - 10.1016/S0009-2509(97)00210-8 by G. Das - - Args: - reference_length: chosen reference length - reference_time: chosen reference time - density_heavy: density of the heavier fluid - d1: diameter of the outer tube - d2: diameter of the inner cylinder - """ - - water_rho = 998 # kg/m3 - air_rho = 1.2047 # kg/m3 - surface_tension = 0.07286 # kg/s2 - water_mu = 1.002e-3 # kg/ms - - water_nu = water_mu / water_rho # m2/s - air_mu = 1.8205e-5 # kg/ms - air_nu = air_mu / air_rho # m2/s - gravity = 9.81 # m/s2 - - dh = d1 - d2 - dr = d1 / d2 - de = d1 + d2 - # ur = 0.1695 # (0.28913, 0.23882, 0.1695) - - inverse_viscosity_number = math.sqrt((water_rho - air_rho) * water_rho * gravity * dh ** 3) / water_mu - bond_number = (water_rho - air_rho) * gravity * dh ** 2 / surface_tension - morton_number = gravity * water_mu ** 4 * (water_rho - air_rho) / (water_rho ** 2 * surface_tension ** 3) - - d = reference_length / dr - - density_light = 1.0 / (water_rho / air_rho) - dh = reference_length - d - g = dh / reference_time ** 2 - - mu_h = math.sqrt((density_heavy - density_light) * density_heavy * g * dh ** 3) / inverse_viscosity_number - mu_l = mu_h / (water_mu / air_mu) - - dynamic_viscosity_heavy = mu_h / density_heavy - dynamic_viscosity_light = mu_l / density_light - - relaxation_time_heavy = 3 * dynamic_viscosity_heavy - relaxation_time_light = 3 * dynamic_viscosity_light - - sigma = (density_heavy - density_light) * g * dh ** 2 / bond_number - - parameters = { - "inverse_viscosity_number": inverse_viscosity_number, - "bond_number": bond_number, - "morton_number": morton_number, - "density_light": density_light, - "dynamic_viscosity_heavy": dynamic_viscosity_heavy, - "dynamic_viscosity_light": dynamic_viscosity_light, - "relaxation_time_heavy": relaxation_time_heavy, - "relaxation_time_light": relaxation_time_light, - "gravitational_acceleration": -g, - "surface_tension": sigma - } - return parameters diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py b/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py index 0afb25234ac119bff33d6876e23bfec7cd79391b..9bac32e1b46874049188db142d8503aeb10848a1 100755 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/taylor_bubble.py @@ -11,7 +11,7 @@ from waLBerla.tools.sqlitedb import sequenceValuesToScalars from scipy.ndimage.filters import gaussian_filter import scipy.spatial as spatial -from parameters_taylor_bubble import calculate_parameters_taylor_bubble +from lbmpy.phasefield_allen_cahn.parameter_calculation import calculate_parameters_taylor_bubble def intersection(points1, points2, eps): @@ -55,16 +55,17 @@ def get_circle(midpoint, radius): class Scenario: - def __init__(self): + def __init__(self, cuda_enabled_mpi=False): self.density_liquid = 1.0 self.reference_time = 16000 self.reference_length = 128 d1 = 0.0254 # (0.0508, 0.0381, 0.0254) d2 = 0.0127 # (0.0254, 0.0127, 0.0127) - gpus = 1 - self.interface_width = 5 + self.interface_width = 4 self.mobility = 0.05 + num_processes = wlb.mpi.numProcesses() + # output frequencies self.vtkWriteFrequency = self.reference_time self.dbWriteFrequency = self.reference_time // 25 @@ -74,8 +75,8 @@ class Scenario: # simulation parameters self.diameter = self.reference_length self.timesteps = self.reference_time * 15 + 1 - self.cells = (self.diameter, (self.reference_length * 15) // gpus, self.diameter) - self.blocks = (1, gpus, 1) + self.cells = (self.diameter, (self.reference_length * 15) // num_processes, self.diameter) + self.blocks = (1, num_processes, 1) self.periodic = (0, 0, 0) self.size = (self.cells[0] * self.blocks[0], self.cells[1] * self.blocks[1], @@ -86,16 +87,13 @@ class Scenario: self.center_y = self.size[1] / 2 self.center_z = self.size[2] / 2 - self.overlappingWidth = (8, 1, 1) - self.timeStepStrategy = 'normal' - self.scenario = 4 # 1 rising bubble or droplet, 2 RTI, 3 bubble field, 4 taylor bubble set up self.counter = 0 self.yPositions = [] self.eccentricity_or_pipe_ratio = False # if True eccentricity is conducted otherwise pipe ratio - self.ratio = 0.5 + self.ratio = 0 self.start_transition = (self.size[1] // 2) - 2 * self.diameter self.length_transition = 4 * self.diameter @@ -117,8 +115,8 @@ class Scenario: parameters = calculate_parameters_taylor_bubble(reference_length=self.reference_length, reference_time=self.reference_time, density_heavy=self.density_liquid, - d1=d1, - d2=d2) + diameter_outer=d1, + diameter_inner=d2) self.density_gas = parameters["density_light"] self.surface_tension = parameters["surface_tension"] @@ -128,6 +126,9 @@ class Scenario: self.relaxation_time_liquid = parameters.get("relaxation_time_heavy") self.relaxation_time_gas = parameters.get("relaxation_time_light") + self.cudaEnabledMpi = cuda_enabled_mpi + self.cuda_blocks = (64, 2, 2) + self.config_dict = self.config() @wlb.member_callback @@ -151,6 +152,8 @@ class Scenario: 'meshWriteFrequency': self.meshWriteFrequency, 'remainingTimeLoggerFrequency': 60.0, 'scenario': self.scenario, + 'cudaEnabledMpi': self.cudaEnabledMpi, + 'gpuBlockSize': self.cuda_blocks }, 'PhysicalParameters': { 'density_liquid': self.density_liquid, @@ -275,7 +278,7 @@ class Scenario: else: theta = 360 - if t % self.pngWriteFrequency == 0: + if self.pngWriteFrequency > 0 and t % self.pngWriteFrequency == 0: plt.plot(center_x + np.linspace(0, center_x) * np.cos(np.radians(angle)), center_z + np.linspace(0, center_z) * np.sin(np.radians(angle)), 'b-') plt.plot(center_x + np.linspace(0, center_x) * np.cos(np.radians(angle1)), diff --git a/apps/showcases/PhaseFieldAllenCahn/GPU/util.h b/apps/showcases/PhaseFieldAllenCahn/GPU/util.h index cefd57d38736d9f8370a86c78e07215fa947e851..0d5689c6945247ac722c32ee61d7f19c00ce6496 100644 --- a/apps/showcases/PhaseFieldAllenCahn/GPU/util.h +++ b/apps/showcases/PhaseFieldAllenCahn/GPU/util.h @@ -17,6 +17,7 @@ //! \author Markus Holzer <markus.holzer@fau.de> // //====================================================================================================================== +#pragma once #include "python_coupling/DictWrapper.h" @@ -28,7 +29,7 @@ #include "field/FlagField.h" #include "field/vtk/VTKWriter.h" #include "GenDefines.h" -#pragma once + namespace walberla { diff --git a/python/pystencils_walberla/templates/Boundary.tmpl.h b/python/pystencils_walberla/templates/Boundary.tmpl.h index 33c355a530e9313f52d9c623a5e71da71d0a9c7c..cc5351d519cc46b618d100dba1460aeaaf8ea434 100644 --- a/python/pystencils_walberla/templates/Boundary.tmpl.h +++ b/python/pystencils_walberla/templates/Boundary.tmpl.h @@ -251,7 +251,7 @@ public: { {%- for dirIdx, dirVec, offset in additional_data_handler.stencil_info %} dx = {{dirVec[0]}}; dy = {{dirVec[1]}}; {%if dim == 3%} dz = {{dirVec[2]}}; {% endif %} - dot = dx*sum_x + dy*sum_y {%if dim == 3%} + dz*sum_z {% endif %}; + dot = real_c( dx*sum_x + dy*sum_y {%if dim == 3%} + dz*sum_z {% endif %}); if (dot > maxn) { maxn = dot; diff --git a/python/pystencils_walberla/utility.py b/python/pystencils_walberla/utility.py index cda1ddbec3a6f98a9ea75d96037866c5018875b8..3283e84c044ca24d28b1c6aac690d19787e23b5c 100644 --- a/python/pystencils_walberla/utility.py +++ b/python/pystencils_walberla/utility.py @@ -47,7 +47,9 @@ def generate_info_header(ctx: CodeGenerationContext, typedefs = {**stencil_typedefs, **field_typedefs, **additional_typedefs} typedefs = [f"using {alias} = {typename};" for alias, typename in typedefs.items()] - lines = '\n'.join(headers_to_include + [''] + typedefs) + '\n' + lines = "#pragma once\n" + + lines += '\n'.join(headers_to_include + [''] + typedefs) + '\n' if path.splitext(filename)[1] not in HEADER_EXTENSIONS: filename += '.h' diff --git a/src/cuda/communication/UniformGPUScheme.h b/src/cuda/communication/UniformGPUScheme.h index ba702c5740be3847f8e057af68d002f6fc62e21e..dcd93210ae137e8c9afadcbd6d735e460bbf6447 100644 --- a/src/cuda/communication/UniformGPUScheme.h +++ b/src/cuda/communication/UniformGPUScheme.h @@ -50,11 +50,11 @@ namespace communication { void addPackInfo( const shared_ptr<GeneratedGPUPackInfo> &pi ); - void startCommunication( cudaStream_t stream = 0); - void wait( cudaStream_t stream = 0); + void startCommunication( cudaStream_t stream = nullptr); + void wait( cudaStream_t stream = nullptr); - void operator()( cudaStream_t stream = 0 ) { communicate( stream ); } - inline void communicate( cudaStream_t stream = 0 ) { startCommunication(stream); wait(stream); } + void operator()( cudaStream_t stream = nullptr ) { communicate( stream ); } + inline void communicate( cudaStream_t stream = nullptr ) { startCommunication(stream); wait(stream); } private: void setupCommunication(); diff --git a/src/cuda/communication/UniformGPUScheme.impl.h b/src/cuda/communication/UniformGPUScheme.impl.h index c2bdb32056762ee9b68bdd68f7916ee793c11966..fa8e0c2aa7289e67280761d94ee8d98b79f3cf0b 100644 --- a/src/cuda/communication/UniformGPUScheme.impl.h +++ b/src/cuda/communication/UniformGPUScheme.impl.h @@ -43,7 +43,7 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf template<typename Stencil> void UniformGPUScheme<Stencil>::startCommunication( cudaStream_t stream ) { - WALBERLA_ASSERT( !communicationInProgress_ ); + WALBERLA_ASSERT( !communicationInProgress_ ) auto forest = blockForest_.lock(); auto currentBlockForestStamp = forest->getBlockForest().getModificationStamp(); @@ -79,14 +79,14 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf parallelSection.run([&](auto s) { auto size = pi->size( *dir, block ); auto gpuDataPtr = bufferSystemGPU_.sendBuffer( nProcess ).advanceNoResize( size ); - WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr ); + WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr ) pi->pack( *dir, gpuDataPtr, block, s ); if( !sendFromGPU_ ) { auto cpuDataPtr = bufferSystemCPU_.sendBuffer( nProcess ).advanceNoResize( size ); - WALBERLA_ASSERT_NOT_NULLPTR( cpuDataPtr ); - WALBERLA_CUDA_CHECK( cudaMemcpyAsync( cpuDataPtr, gpuDataPtr, size, cudaMemcpyDeviceToHost, s )); + WALBERLA_ASSERT_NOT_NULLPTR( cpuDataPtr ) + WALBERLA_CUDA_CHECK( cudaMemcpyAsync( cpuDataPtr, gpuDataPtr, size, cudaMemcpyDeviceToHost, s )) } }); } @@ -109,7 +109,7 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf template<typename Stencil> void UniformGPUScheme<Stencil>::wait( cudaStream_t stream ) { - WALBERLA_ASSERT( communicationInProgress_ ); + WALBERLA_ASSERT( communicationInProgress_ ) auto forest = blockForest_.lock(); @@ -127,7 +127,7 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf { auto size = pi->size( header.dir, block ); auto gpuDataPtr = recvInfo.buffer().advanceNoResize( size ); - WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr ); + WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr ) parallelSection.run([&](auto s) { pi->unpack( stencil::inverseDir[header.dir], gpuDataPtr, block, s ); }); @@ -152,12 +152,12 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf auto size = pi->size( header.dir, block ); auto cpuDataPtr = recvInfo.buffer().advanceNoResize( size ); auto gpuDataPtr = gpuBuffer.advanceNoResize( size ); - WALBERLA_ASSERT_NOT_NULLPTR( cpuDataPtr ); - WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr ); + WALBERLA_ASSERT_NOT_NULLPTR( cpuDataPtr ) + WALBERLA_ASSERT_NOT_NULLPTR( gpuDataPtr ) parallelSection.run([&](auto s) { WALBERLA_CUDA_CHECK( cudaMemcpyAsync( gpuDataPtr, cpuDataPtr, size, - cudaMemcpyHostToDevice, s )); + cudaMemcpyHostToDevice, s )) pi->unpack( stencil::inverseDir[header.dir], gpuDataPtr, block, s ); }); } @@ -190,9 +190,9 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf continue; WALBERLA_ASSERT( block->neighborhoodSectionHasEquallySizedBlock( neighborIdx ), - "Works for uniform setups only" ); + "Works for uniform setups only" ) WALBERLA_ASSERT_EQUAL( block->getNeighborhoodSectionSize( neighborIdx ), uint_t( 1 ), - "Works for uniform setups only" ); + "Works for uniform setups only" ) const BlockID &nBlockId = block->getNeighborId( neighborIdx, uint_t( 0 )); auto nProcess = mpi::MPIRank( block->getNeighborProcess( neighborIdx, uint_t( 0 ))); @@ -235,7 +235,7 @@ UniformGPUScheme<Stencil>::UniformGPUScheme( weak_ptr <StructuredBlockForest> bf template<typename Stencil> void UniformGPUScheme<Stencil>::addPackInfo( const shared_ptr<GeneratedGPUPackInfo> &pi ) { - WALBERLA_ASSERT( !communicationInProgress_, "Cannot add pack info while communication is in progress" ); + WALBERLA_ASSERT( !communicationInProgress_, "Cannot add pack info while communication is in progress" ) packInfos_.push_back( pi ); setupBeforeNextCommunication_ = true; }