# coding: utf-8 # In[32]: import pickle import warnings import pystencils as ps from pygrandchem.grandchem import GrandChemGenerator from pygrandchem.scenarios import system_4_2, system_3_1 from pygrandchem.initialization import init_boxes, smooth_fields from pygrandchem.scenarios import benchmark_configs from sympy import Number, Symbol, Expr, preorder_traversal, postorder_traversal, Function, Piecewise, relational from pystencils.simp import sympy_cse_on_assignment_list from pystencils.simp.liveness_opts import * from pystencils.simp.liveness_opts_exp import * from pystencils.simp.liveness_permutations import * import pycuda import sys from subprocess import run, PIPE from pystencils import show_code import pycuda.driver as drv import importlib configs = benchmark_configs() def get_config(name): return configs[name] domain_size = (512, 512, 128) periodicity = (True, True, False) optimization = {'gpu_indexing_params': {"block_size": (32, 4, 2)}} #bestSeqs = pickle.load(open('best_seq.pickle', 'rb')) scenarios = ["42_varT_freeEnergy", "31_varT_aniso_rot"] kernel_types = ["phi_full", "phi_partial1", "phi_partial2", "mu_full", "mu_partial1", "mu_partial2"] liveness_trans_seqs = importlib.import_module( "gpu_liveness_trans_sequences").gpu_liveness_trans_sequences for scenario in scenarios: config = get_config(scenario) phases, components = config['Parameters']['phases'], config['Parameters']['components'] format_args = {'p': phases, 'c': components, 's': ','.join(str(e) for e in domain_size)} # Adding fields dh = ps.create_data_handling(domain_size, periodicity=periodicity, default_target='gpu') f = dh.fields phi_src = dh.add_array( 'phi_src', values_per_cell=config['Parameters']['phases'], layout='fzyx', latex_name='phi_s') mu_src = dh.add_array( 'mu_src', values_per_cell=config['Parameters']['components'], layout='fzyx', latex_name="mu_s") mu_stag = dh.add_array( 'mu_stag', values_per_cell=(dh.dim, config['Parameters']['components']), layout='f') phi_stag = dh.add_array('phi_stag', values_per_cell=(dh.dim, phases), layout='f') phi_dst = dh.add_array_like('phi_dst', 'phi_src') mu_dst = dh.add_array_like('mu_dst', 'mu_src') gc = GrandChemGenerator( phi_src, phi_dst, mu_src, mu_dst, config['FreeEnergy'], config['Parameters'], #conc=c, mu_staggered=mu_stag, phi_staggered=phi_stag, use_block_offsets=False, compile_kernel=False) mu_full_eqs = gc.mu_full() phi_full_eqs = gc.phi_full() phi_kernel = ps.create_kernel(phi_full_eqs, target='gpu', **optimization).compile() mu_kernel = ps.create_kernel(mu_full_eqs, target='gpu', **optimization).compile() c = dh.add_array( 'c', values_per_cell=config['Parameters']['components'], layout='fzyx', gpu=False) init_boxes(dh) #initialize_concentration_field(dh, free_energy, config['Parameters']['initial_concentration']) smooth_fields(dh, sigma=0.4, iterations=5, dim=dh.dim) dh.synchronization_function(['phi_src', 'phi_dst', 'mu_src', 'mu_dst'])() staggered_params = None def bench_kernels(mu_kernel, phi_kernel): start = drv.Event() end = drv.Event() dh.run_kernel(mu_kernel, timestep=1) start.record() dh.run_kernel(mu_kernel, timestep=1) dh.run_kernel(mu_kernel, timestep=1) end.record() end.synchronize() msec = start.time_till(end) / 2 print("mu_kernel: {} {:5.3f} ms".format(mu_kernel.num_regs, msec)) dh.run_kernel(phi_kernel, timestep=1) start.record() dh.run_kernel(phi_kernel, timestep=1) dh.run_kernel(phi_kernel, timestep=1) end.record() end.synchronize() msec = start.time_till(end) / 2 print("phi_kernel: {} {:5.3f} ms".format(phi_kernel.num_regs, msec)) print("warmup") bench_kernels(mu_kernel, phi_kernel) dh.swap('mu_src', 'mu_dst') dh.swap('phi_src', 'phi_dst') print() for kernel_type in kernel_types: print(scenario + " " + kernel_type) for div_sqrt_approx in [True, False]: print("Approximations for div/sqrt: " + str(div_sqrt_approx)) for liveness_trans in [True, False]: gc = GrandChemGenerator( phi_src, phi_dst, mu_src, mu_dst, config['FreeEnergy'], config['Parameters'], #conc=c, mu_staggered=mu_stag, phi_staggered=phi_stag, use_block_offsets=False, compile_kernel=False, fast_divisions=div_sqrt_approx, fast_sqrts=div_sqrt_approx, gpu_liveness_trans_sequences=(liveness_trans_seqs[scenario] if liveness_trans else None)) if kernel_type == "phi_full": eqs = gc.phi_full() elif kernel_type == "mu_full": eqs = gc.mu_full() elif kernel_type == "mu_partial1": staggered_params = gc.mu_partial1() elif kernel_type == "mu_partial2": eqs = gc.mu_partial2() elif kernel_type == "phi_partial1": staggered_params = gc.phi_partial1() elif kernel_type == "phi_partial2": eqs = gc.phi_partial2() else: print("Specified kernel does not exist") exit() if not staggered_params is None: eqs = unpack_staggered_eqs(*staggered_params) print( bench_kernel( eqs, dh, liveness_trans_seqs[scenario][(kernel_type, liveness_trans)].blockSize, staggered_params)) print()