diff --git a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt index 50d7a1b028c687a3c77fb8c2fee885e564866e2e..368f9dc9e33a11708918bb566c2bfee5bcf3e8b2 100644 --- a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt +++ b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt @@ -10,8 +10,8 @@ waLBerla_python_file_generates(UniformGridGenerated.py GenDefines.h) -#foreach(config trt smagorinsky mrt3 mrt entropic_kbc_n4 cumulant ) -foreach(config trt trt_nt_split trt_nt trt_split) +foreach(config trt smagorinsky smagorinsky_d3q27 mrt mrt_d3q27 entropic entropic_kbc_n4 cumulant cumulant_d3q27 + trt_nt_split trt_nt trt_split trt_aa1 trt_aa2 trt_aa3 trt_aa4 trt_aa1_d3q27 trt_aa2_d3q27 trt_aa3_d3q27 trt_aa4_d3q27 ) waLBerla_add_executable ( NAME UniformGridBenchmarkGenerated_${config} FILES UniformGridGenerated.cpp Pinning.cpp UniformGridGenerated.py DEPENDS blockforest boundary core domain_decomposition field geometry timeloop vtk gui diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py index bf6736d79e9e2a246af2751ba2622252ae8edb23..6b0eed38063234a8aeedb2e7229fec54986f52a1 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py @@ -18,6 +18,7 @@ options_dict = { 'trt': { 'method': 'trt', 'stencil': 'D3Q19', + 'maxwellian_moments': False, 'compressible': False, 'relaxation_rate': omega, 'opts': {"two_field_split": False, "two_field_nt_stores": False} @@ -26,6 +27,7 @@ options_dict = { 'method': 'trt', 'stencil': 'D3Q19', 'compressible': False, + 'maxwellian_moments': False, 'relaxation_rate': omega, 'opts': {"two_field_split": True, "two_field_nt_stores": True} }, @@ -33,6 +35,7 @@ options_dict = { 'method': 'trt', 'stencil': 'D3Q19', 'compressible': False, + 'maxwellian_moments': False, 'relaxation_rate': omega, 'opts': {"two_field_split": False, "two_field_nt_stores": True} }, @@ -40,12 +43,46 @@ options_dict = { 'method': 'trt', 'stencil': 'D3Q19', 'compressible': False, + 'maxwellian_moments': False, 'relaxation_rate': omega, 'opts': {"two_field_split": True, "two_field_nt_stores": False} }, + 'trt_aa1': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'compressible': False, + 'relaxation_rate': omega, + 'maxwellian_moments': False, + 'opts': {"aa_even_split": True, "aa_odd_split": False} + }, + 'trt_aa2': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'compressible': False, + 'relaxation_rate': omega, + 'maxwellian_moments': False, + 'opts': {"aa_even_split": False, "aa_odd_split": True} + }, + 'trt_aa3': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'compressible': False, + 'relaxation_rate': omega, + 'maxwellian_moments': False, + 'opts': {"aa_even_split": True, "aa_odd_split": True} + }, + 'trt_aa4': { + 'method': 'trt', + 'stencil': 'D3Q19', + 'compressible': False, + 'relaxation_rate': omega, + 'maxwellian_moments': False, + 'opts': {"aa_even_split": False, "aa_odd_split": False} + }, 'mrt': { 'method': 'mrt', 'stencil': 'D3Q19', + 'maxwellian_moments': False, 'relaxation_rates': [omega, 1.3, 1.4, 1.2, 1.1, 1.15, 1.234], }, 'mrt_full': { @@ -112,10 +149,10 @@ with CodeGeneration() as ctx: 'aa_odd_cse_pdfs': False, 'aa_odd_cse_global': True, - 'aa_odd_split': True, + 'aa_odd_split': False, 'aa_odd_nt_stores': False, - 'compiled_in_boundaries': False, + 'compiled_in_boundaries': True, } config_name = ctx.config noopt = False @@ -158,10 +195,14 @@ with CodeGeneration() as ctx: ((0, 0, 1), UBB([0.05, 0, 0])), ((0, 0, -1), NoSlip()), )) - cr_even = create_lb_collision_rule(stencil="D3Q19", compressible=False, optimization={'cse_global': opts['aa_even_cse_global'], - 'cse_pdfs': opts['aa_even_cse_pdfs']}) - cr_odd = create_lb_collision_rule(stencil="D3Q19", compressible=False, optimization={'cse_global': opts['aa_odd_cse_global'], - 'cse_pdfs': opts['aa_odd_cse_pdfs']}) + cr_even = create_lb_collision_rule(**options, optimization={'cse_global': opts['aa_even_cse_global'], + 'cse_pdfs': opts['aa_even_cse_pdfs'], + # 'split': opts['aa_even_split'] + }) + cr_odd = create_lb_collision_rule(**options, optimization={'cse_global': opts['aa_odd_cse_global'], + 'cse_pdfs': opts['aa_odd_cse_pdfs'], + #'split': opts['aa_odd_split'] + }) update_rule_aa_even = update_rule_with_push_boundaries(cr_even, pdfs, boundaries, AAEvenTimeStepAccessor, AAOddTimeStepAccessor.read) update_rule_aa_odd = update_rule_with_push_boundaries(cr_odd, pdfs, boundaries, AAOddTimeStepAccessor, AAEvenTimeStepAccessor.read) else: diff --git a/apps/benchmarks/UniformGridGenerated/params.py b/apps/benchmarks/UniformGridGenerated/params.py index 450e2acdda5a8301244cad7c3f657e4d8a821418..761eae184ee5adcde2a12ebd02b0682a1bbb8279 100644 --- a/apps/benchmarks/UniformGridGenerated/params.py +++ b/apps/benchmarks/UniformGridGenerated/params.py @@ -58,7 +58,7 @@ def domain_decomposition_func_z_strong_scaling(processes, threads, block_size): class BenchmarkScenario: def __init__(self, block_size=(256, 128, 128), direct_comm=True, time_step_mode='aa', two_field_kernel_type='generated', - domain_decomposition_func=domain_decomposition_func_z, pinning="", f_padding=0, + domain_decomposition_func=domain_decomposition_func_z, pinning="auto", f_padding=0, db_file_name='uniform_grid_gen.sqlite'): self.block_size = block_size self.direct_comm = direct_comm @@ -68,13 +68,17 @@ class BenchmarkScenario: self.threads = int(os.environ['OMP_NUM_THREADS']) self.processes = wlb.mpi.numProcesses() self.db_file_name = db_file_name + + if pinning == 'auto': + pinning = ",".join([str(i) for i in range(self.threads)]) + self.pinning = pinning self.f_padding = f_padding @wlb.member_callback def config(self, **kwargs): - time_steps_for_128_cubed = 10 - time_steps = int(128**3 / prod(self.block_size) * time_steps_for_128_cubed) + time_steps_for_128_cubed = 30 + time_steps = int(128**3 / prod(self.block_size) * time_steps_for_128_cubed * self.threads) time_steps = max(10, time_steps) cfg = { 'DomainSetup': { @@ -183,14 +187,25 @@ def weak_scaling(): def trt_two_field_benchmark(): scenarios = wlb.ScenarioManager() common = {'block_size': (300, 100, 100), - 'time_step_mode': 'two_field', + 'time_step_mode': 'twoField', 'direct_comm': False, 'domain_decomposition_func': domain_decomposition_func_z_strong_scaling, + 'db_file_name': 'trt_two_field_bench.sqlite', } scenarios.add(BenchmarkScenario(**common, two_field_kernel_type='manualGeneric')) scenarios.add(BenchmarkScenario(**common, two_field_kernel_type='manualD3Q19')) scenarios.add(BenchmarkScenario(**common, two_field_kernel_type='generated')) +def trt_single_field_benchmark(): + scenarios = wlb.ScenarioManager() + common = {'block_size': (300, 100, 100), + 'direct_comm': False, + 'domain_decomposition_func': domain_decomposition_func_z_strong_scaling, + 'db_file_name': 'trt_single_field_benchmark.sqlite', + } + for time_step_mode in ['aa', 'aaKernelOnly']: + scenarios.add(BenchmarkScenario(**common, time_step_mode=time_step_mode)) + def padding_test(): scenarios = wlb.ScenarioManager() @@ -205,6 +220,7 @@ def padding_test(): continue scenarios.add(sc) - +#trt_single_field_benchmark() +trt_two_field_benchmark() #single_node_benchmark() -padding_test() +#padding_test()