Commit 0523675f authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGenerated: padding after f

parent 96146de7
Pipeline #20381 failed with stages
in 218 minutes and 51 seconds
DomainSetup
{
blocks < 1, 1, 1 >;
cellsPerBlock < 64, 64, 64 >;
cellsPerBlock < 300, 64, 64 >;
periodic < 1, 1, 1 >;
}
Parameters
{
timeStepMode twoField;
timeStepMode aa;
// twoField: normal src-dst update with two fields [default]
// twoFieldKernelOnly: same as above but without communication and periodicity
// aa: AA single-field udate pattern
......@@ -22,11 +22,13 @@ Parameters
// manualD3Q19: manual D3Q19
timesteps 200; // time steps of one performance measurement default 60
timesteps 2000; // time steps of one performance measurement default 60
warmupSteps 1; // number of steps to run before measurement starts
outerIterations 4; // how many measurements to conduct
vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled
outerIterations 1; // how many measurements to conduct
vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled
remainingTimeLoggerFrequency 6; // interval in seconds to log the estimated remaining time
fPadding 3;
useGui 0;
......
......@@ -150,16 +150,19 @@ with CodeGeneration() as ctx:
'cse_global': opts['aa_odd_cse_global'],
'cse_pdfs': opts['aa_odd_cse_pdfs']}, **options)
vec = { 'assume_aligned': True, 'assume_inner_stride_one': True}
vec = {'assume_aligned': True, 'assume_inner_stride_one': True}
# Sweeps
vec['nontemporal'] = opts['two_field_nt_stores']
vec['assume_aligned'] = opts['two_field_nt_stores']
generate_sweep(ctx, 'GenLbKernel', update_rule_two_field, field_swaps=[('pdfs', 'pdfs_tmp')],
cpu_vectorize_info=vec)
vec['nontemporal'] = opts['aa_even_nt_stores']
vec['assume_aligned'] = opts['aa_even_nt_stores']
generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info=vec,
cpu_openmp=True, ghost_layers=1)
vec['nontemporal'] = opts['aa_odd_nt_stores']
vec['assume_aligned'] = opts['aa_odd_nt_stores']
generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info=vec,
cpu_openmp=True, ghost_layers=1)
......
......@@ -51,7 +51,7 @@ def domain_decomposition_func_full(processes, threads, block_size):
class BenchmarkScenario:
def __init__(self, block_size=(256, 128, 128), direct_comm=True,
time_step_mode='aa', two_field_kernel_type='generated',
domain_decomposition_func=domain_decomposition_func_z,
domain_decomposition_func=domain_decomposition_func_z, pinning="", f_padding=0,
db_file_name='uniform_grid_gen.sqlite'):
self.block_size = block_size
self.direct_comm = direct_comm
......@@ -61,6 +61,8 @@ class BenchmarkScenario:
self.threads = int(os.environ['OMP_NUM_THREADS'])
self.processes = wlb.mpi.numProcesses()
self.db_file_name = db_file_name
self.pinning = pinning
self.f_padding = f_padding
@wlb.member_callback
def config(self, **kwargs):
......@@ -81,6 +83,8 @@ class BenchmarkScenario:
'timeStepMode': self.time_step_mode,
'twoFieldKernelType': self.two_field_kernel_type,
'directComm': self.direct_comm,
'pinning': self.pinning,
'fPadding': self.f_padding,
}
}
cfg['DomainSetup'].update(self.domain_decomposition_func(self.processes, self.threads, self.block_size))
......@@ -168,4 +172,20 @@ def weak_scaling():
continue
scenarios.add(sc)
single_node_benchmark()
def padding_test():
scenarios = wlb.ScenarioManager()
for block_size in [(300, 100, 100), (500, 100, 100)]:
for direct_comm in (False,):
for time_step_mode in ['aa', 'aaKernelOnly']:
for f_padding in range(16):
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
time_step_mode=time_step_mode, domain_decomposition_func=domain_decomposition_func_z,
f_padding=f_padding, pinning="0")
if not block_size_ok(sc):
continue
scenarios.add(sc)
#single_node_benchmark()
padding_test()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment