diff --git a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt index 4e5396f93f11ca2cedd3f4ad90db63d289ee036f..50d7a1b028c687a3c77fb8c2fee885e564866e2e 100644 --- a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt +++ b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt @@ -10,7 +10,8 @@ waLBerla_python_file_generates(UniformGridGenerated.py GenDefines.h) -foreach(config trt smagorinsky mrt3 mrt entropic_kbc_n4 cumulant ) +#foreach(config trt smagorinsky mrt3 mrt entropic_kbc_n4 cumulant ) +foreach(config trt trt_nt_split trt_nt trt_split) waLBerla_add_executable ( NAME UniformGridBenchmarkGenerated_${config} FILES UniformGridGenerated.cpp Pinning.cpp UniformGridGenerated.py DEPENDS blockforest boundary core domain_decomposition field geometry timeloop vtk gui diff --git a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm index 41d29bbccba9df9ed7850b0daa7e0a71a4e157a8..0561a0c6f0a53823afc6f5828ccb78aa3ee9978d 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm +++ b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm @@ -26,7 +26,7 @@ Parameters timesteps 2000; // time steps of one performance measurement default 60 warmupSteps 1; // number of steps to run before measurement starts outerIterations 1; // how many measurements to conduct - vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled + WriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled remainingTimeLoggerFrequency 6; // interval in seconds to log the estimated remaining time fPadding 3; diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py index 0c8b7ff3c39003fa7efb65ba7e9376c72d7738b5..bf6736d79e9e2a246af2751ba2622252ae8edb23 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py @@ -20,6 +20,28 @@ options_dict = { 'stencil': 'D3Q19', 'compressible': False, 'relaxation_rate': omega, + 'opts': {"two_field_split": False, "two_field_nt_stores": False} + }, + 'trt_nt_split': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'compressible': False, + 'relaxation_rate': omega, + 'opts': {"two_field_split": True, "two_field_nt_stores": True} + }, + 'trt_nt': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'compressible': False, + 'relaxation_rate': omega, + 'opts': {"two_field_split": False, "two_field_nt_stores": True} + }, + 'trt_split': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'compressible': False, + 'relaxation_rate': omega, + 'opts': {"two_field_split": True, "two_field_nt_stores": False} }, 'mrt': { 'method': 'mrt', @@ -79,17 +101,17 @@ with CodeGeneration() as ctx: } opts = { 'two_field_cse_pdfs': False, - 'two_field_cse_global': False, + 'two_field_cse_global': True, 'two_field_split': True, 'two_field_nt_stores': True, 'aa_even_cse_pdfs': False, - 'aa_even_cse_global': False, + 'aa_even_cse_global': True, 'aa_even_split': False, 'aa_even_nt_stores': False, 'aa_odd_cse_pdfs': False, - 'aa_odd_cse_global': False, + 'aa_odd_cse_global': True, 'aa_odd_split': True, 'aa_odd_nt_stores': False, @@ -105,8 +127,12 @@ with CodeGeneration() as ctx: if config_name == '': config_name = 'trt' options = options_dict[config_name] - options.update(common_options) options = options.copy() + options.update(common_options) + opts = opts.copy() + if 'opts' in options: + opts.update(options['opts']) + del options['opts'] if d3q27: options['stencil'] = 'D3Q27' diff --git a/apps/benchmarks/UniformGridGenerated/params.py b/apps/benchmarks/UniformGridGenerated/params.py index f701bed0b2368491119c76e5144b64855d7e5198..450e2acdda5a8301244cad7c3f657e4d8a821418 100644 --- a/apps/benchmarks/UniformGridGenerated/params.py +++ b/apps/benchmarks/UniformGridGenerated/params.py @@ -48,6 +48,13 @@ def domain_decomposition_func_full(processes, threads, block_size): } +def domain_decomposition_func_z_strong_scaling(processes, threads, block_size): + return { + 'blocks': (1, 1, processes), + 'cellsPerBlock': (block_size[0], block_size[1], block_size[2]) + } + + class BenchmarkScenario: def __init__(self, block_size=(256, 128, 128), direct_comm=True, time_step_mode='aa', two_field_kernel_type='generated', @@ -173,6 +180,18 @@ def weak_scaling(): scenarios.add(sc) +def trt_two_field_benchmark(): + scenarios = wlb.ScenarioManager() + common = {'block_size': (300, 100, 100), + 'time_step_mode': 'two_field', + 'direct_comm': False, + 'domain_decomposition_func': domain_decomposition_func_z_strong_scaling, + } + scenarios.add(BenchmarkScenario(**common, two_field_kernel_type='manualGeneric')) + scenarios.add(BenchmarkScenario(**common, two_field_kernel_type='manualD3Q19')) + scenarios.add(BenchmarkScenario(**common, two_field_kernel_type='generated')) + + def padding_test(): scenarios = wlb.ScenarioManager() for block_size in [(300, 100, 100), (500, 100, 100)]: