Commit 67e53365 authored by Markus Holzer's avatar Markus Holzer
Browse files

Adapted flak8 formatting for apps

parent ffe98bd7
Pipeline #25466 passed with stages
in 212 minutes and 38 seconds
[flake8]
max-line-length=120
exclude=apps/showcases/Mixer/GenerateModule.py, # contains only statements
apps/benchmarks/FieldCommunication/config.py # just a config file
ignore = W503 C901 E741
......@@ -34,7 +34,8 @@ sng_network = supermuc_network_spread()
class AlreadySimulated:
def __init__(self, db_file, properties=('processes0*processes1*processes2', 'layout', 'ghostLayers', 'cartesianCommunicator', 'stencil',
def __init__(self, db_file, properties=('processes0*processes1*processes2', 'layout', 'ghostLayers',
'cartesianCommunicator', 'stencil',
'cellsPerBlock0', 'cellsPerBlock1', 'cellsPerBlock2',
'blocksPerProcess', 'localCommunicationMode', 'singleMessage',
'fieldsPdf', 'fieldsPdfOpt', 'fieldsVector', 'fieldsScalar',
......
......@@ -147,7 +147,7 @@ sweep_block_size = (TypedSymbol("cudaBlockSize0", np.int32),
sweep_params = {'block_size': sweep_block_size}
info_header = f"""
#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase};
#include "stencil/D3Q{q_phase}.h"\nusing Stencil_phase_T = walberla::stencil::D3Q{q_phase};
#include "stencil/D3Q{q_hydro}.h"\nusing Stencil_hydro_T = walberla::stencil::D3Q{q_hydro};
"""
......
......@@ -3,75 +3,78 @@
import numpy as np
import matplotlib.pyplot as plt
kernels = dict()
class Kernel:
def __init__(self,name, cyclesFirstLoop=0, cyclesSecondLoop=0, cyclesRegPerLUP =0):
def __init__(self, name, cyclesFirstLoop=0, cyclesSecondLoop=0, cyclesRegPerLUP=0):
self.name = name
if cyclesRegPerLUP <= 0:
self.cyclesFirstLoop = cyclesFirstLoop
self.cyclesFirstLoop = cyclesFirstLoop
self.cyclesSecondLoop = cyclesSecondLoop
self.cyclesRegPerLUP = cyclesFirstLoop + 9* cyclesSecondLoop
self.cyclesRegPerLUP = cyclesFirstLoop + 9 * cyclesSecondLoop
else:
self.cyclesRegPerLUP = cyclesRegPerLUP
self.cyclesRegPerCacheLine = 8*self.cyclesRegPerLUP
self.cyclesL1L2 = 3*19*2
self.cyclesL2L3 = 3*19*2
self.cyclesRegPerCacheLine = 8 * self.cyclesRegPerLUP
self.cyclesL1L2 = 3 * 19 * 2
self.cyclesL2L3 = 3 * 19 * 2
self.freq = 2.7e9
self.cyclesMem = 305
#self.cyclesMem = 191
# self.cyclesMem = 191
def mlups(self, processes):
singleCoreCycles = self.cyclesRegPerCacheLine + self.cyclesL1L2 + self.cyclesL2L3 + self.cyclesMem
timeSingleCore = singleCoreCycles / self.freq
mlups = 8 / timeSingleCore * 1e-6
#todo
# todo
mlupsMax = 78
return min ( processes * mlups, mlupsMax )
def plot( self, divideByProcesses=False,processes=8, label="" ):
x = np.arange( 1, processes+1, 1 )
return min(processes * mlups, mlupsMax)
def plot(self, divideByProcesses=False, processes=8, label=""):
x = np.arange(1, processes + 1, 1)
if divideByProcesses:
y = np.array( [ self.mlups(i)/i for i in x ] )
y = np.array([self.mlups(i) / i for i in x])
else:
y = np.array( [ self.mlups(i) for i in x ] )
if label=="":
label = "ecm\_" + self.name
plt.plot( x, y, marker='^', markersize=5, label = label)
y = np.array([self.mlups(i) for i in x])
kernels=dict()
if label == "":
label = "ecm_" + self.name
plt.plot(x, y, marker='^', markersize=5, label=label)
#kernels['srt_split'] = Kernel("srt_split", 46, 12 )
kernels['srt_pure'] = Kernel("srt_pure", 40, 8 )
kernels['trt_split'] = Kernel("trt\_split", 41, 11 )
kernels['srt_nonopt'] = Kernel("srt_nonopt", cyclesRegPerLUP = 1045) #SRTStreamCollide.h - pgo and lto (20cycles first loop, 35 second)
kernels = dict()
# kernels['srt_split'] = Kernel("srt_split", 46, 12 )
#kernels['trt_pure_intelOpt'] = Kernel("trt_pure_intelOpt", 41/2, 10/2 ) # vectorized (v*pd)
kernels['srt_pure'] = Kernel("srt_pure", 40, 8)
kernels['trt_split'] = Kernel("trt_split", 41, 11)
# SRTStreamCollide.h - pgo and lto (20cycles first loop, 35 second)
kernels['srt_nonopt'] = Kernel("srt_nonopt",
cyclesRegPerLUP=1045)
def plotAllKernels( divideByProcesses = False ):
# kernels['trt_pure_intelOpt'] = Kernel("trt_pure_intelOpt", 41/2, 10/2 ) # vectorized (v*pd)
def plotAllKernels(divideByProcesses=False):
for kernel in kernels:
kernel.plot( divideByProcesses )
kernel.plot(divideByProcesses)
def plot(kernelName, divideByProcesses=False, label=""):
kernels[kernelName].plot(divideByProcesses, label=label)
def plot( kernelName, divideByProcesses = False, label = ""):
kernels[kernelName].plot( divideByProcesses, label=label )
if __name__ == "__main__":
plotAllKernels()
plt.legend()
plt.show()
\ No newline at end of file
plt.show()
......@@ -4,85 +4,87 @@ import sqlite3
import sys
import shutil
def getColumnNames ( db, tableName, dbName ):
def getColumnNames(db, tableName, dbName):
cursor = db.cursor()
cursor.execute("PRAGMA %s.table_info(%s)" % (dbName,tableName) )
cursor.execute("PRAGMA %s.table_info(%s)" % (dbName, tableName))
columns = cursor.fetchall()
res = []
for e in columns:
res.append ( (e[1], e[2].upper()) )
res.append((e[1], e[2].upper()))
return res
def mergeSqliteFiles ( targetFile, fileToMerge ):
db = sqlite3.connect( targetFile )
db.execute ('ATTACH "' + fileToMerge + '" AS toMerge')
targetColumns = getColumnNames( db, "runs", "main" )
toMergeColumns = getColumnNames( db, "runs", "toMerge" )
def mergeSqliteFiles(targetFile, fileToMerge):
db = sqlite3.connect(targetFile)
db.execute('ATTACH "' + fileToMerge + '" AS toMerge')
targetColumns = getColumnNames(db, "runs", "main")
toMergeColumns = getColumnNames(db, "runs", "toMerge")
columnsToCreate = [e for e in toMergeColumns if e not in targetColumns]
for column in columnsToCreate:
print "Adding Column %s to run table of %s " % ( column[0], targetFile )
db.execute ( "ALTER TABLE main.runs ADD COLUMN %s %s" % ( column[0], column[1] ) )
print
"Adding Column %s to run table of %s " % (column[0], targetFile)
db.execute("ALTER TABLE main.runs ADD COLUMN %s %s" % (column[0], column[1]))
# Fetch all runs from toMerge,
# check if an entry with same date exists, if not add the run and the timing pool entries
# to the targetTable
c = db.cursor()
assert( toMergeColumns[0][0] == "runId")
columns = [ e[0] for e in toMergeColumns ]
columnString = ",".join( columns )
columnStringNoRunId = ",".join( columns[1:] )
query = 'SELECT %s FROM toMerge.runs WHERE timestamp || " " || random NOT IN ' % (columnString,)
query += '( SELECT timestamp || " " || random FROM main.runs )'
timingPoolColumnsMain = getColumnNames ( db, "timingPool", "main" )
timingPoolColumnsToMerge = getColumnNames ( db, "timingPool", "toMerge" )
assert ( timingPoolColumnsMain == timingPoolColumnsToMerge )
timingPoolColumnNames = [ e[0] for e in timingPoolColumnsMain ]
assert ( timingPoolColumnNames[0] == "runId")
assert (toMergeColumns[0][0] == "runId")
columns = [e[0] for e in toMergeColumns]
columnString = ",".join(columns)
columnStringNoRunId = ",".join(columns[1:])
query = 'SELECT %s FROM toMerge.runs WHERE timestamp || " " || random NOT IN ' % (columnString,)
query += '( SELECT timestamp || " " || random FROM main.runs )'
timingPoolColumnsMain = getColumnNames(db, "timingPool", "main")
timingPoolColumnsToMerge = getColumnNames(db, "timingPool", "toMerge")
assert (timingPoolColumnsMain == timingPoolColumnsToMerge)
timingPoolColumnNames = [e[0] for e in timingPoolColumnsMain]
assert (timingPoolColumnNames[0] == "runId")
mergedRuns = 0
for run in c.execute (query):
for run in c.execute(query):
# Build up insert statement for 'runs' table
questionMarkList = ['?'] * (len(run)-1)
questionMarkString = ",".join( questionMarkList )
insertStatement = "INSERT INTO main.runs (%s) VALUES (%s);" % ( columnStringNoRunId, questionMarkString )
questionMarkList = ['?'] * (len(run) - 1)
questionMarkString = ",".join(questionMarkList)
insertStatement = "INSERT INTO main.runs (%s) VALUES (%s);" % (columnStringNoRunId, questionMarkString)
# Execute the insert
insertCursor = db.cursor()
insertCursor.execute( insertStatement, run[1:] )
insertCursor.execute(insertStatement, run[1:])
# Insert the corresponding timingPool infos
insertedRunId = insertCursor.lastrowid
originalRunId = run[0]
timingPoolQuery = "SELECT %s FROM toMerge.timingPool WHERE runId=?" % ( ",".join( timingPoolColumnNames[1:] ) )
timingPoolQuery = "SELECT %s FROM toMerge.timingPool WHERE runId=?" % (",".join(timingPoolColumnNames[1:]))
timingPoolInsertCursor = db.cursor()
timingPoolQueryCursor = db.cursor()
for tp in timingPoolQueryCursor.execute ( timingPoolQuery, ( originalRunId,) ):
questionMarkList = ['?'] * len(timingPoolColumnNames)
questionMarkString = ",".join( questionMarkList )
insertQuery = "INSERT INTO main.timingPool (%s) VALUES (%s)" % (",".join(timingPoolColumnNames), questionMarkString)
timingPoolInsertCursor.execute ( insertQuery, (insertedRunId,) + tp )
mergedRuns = mergedRuns +1
print "Merged %s runs from %s to %s " % ( mergedRuns, fileToMerge, targetFile )
timingPoolQueryCursor = db.cursor()
for tp in timingPoolQueryCursor.execute(timingPoolQuery, (originalRunId,)):
questionMarkList = ['?'] * len(timingPoolColumnNames)
questionMarkString = ",".join(questionMarkList)
insertQuery = "INSERT INTO main.timingPool (%s) VALUES (%s)" % (",".join(timingPoolColumnNames),
questionMarkString)
timingPoolInsertCursor.execute(insertQuery, (insertedRunId,) + tp)
mergedRuns = mergedRuns + 1
print("Merged %s runs from %s to %s " % (mergedRuns, fileToMerge, targetFile))
db.commit()
db.close()
if ( len(sys.argv) < 3 ):
print "Usage: mergeSqliteFiles resultFile <filesToMerge>"
if len(sys.argv) < 3:
print("Usage: mergeSqliteFiles resultFile <filesToMerge>")
else:
print "Copying " + sys.argv[2] + " to " + sys.argv[1]
shutil.copy( sys.argv[2], sys.argv[1] )
for i in range ( 3, len(sys.argv) ):
print "Merging " + sys.argv[i]
mergeSqliteFiles ( sys.argv[1], sys.argv[i] )
print("Copying " + sys.argv[2] + " to " + sys.argv[1])
shutil.copy(sys.argv[2], sys.argv[1])
for i in range(3, len(sys.argv)):
print("Merging " + sys.argv[i])
mergeSqliteFiles(sys.argv[1], sys.argv[i])
This diff is collapsed.
......@@ -3,7 +3,7 @@ import numpy as np
import pystencils as ps
from lbmpy.creationfunctions import create_lb_method, create_lb_update_rule, create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor, StreamPushTwoFieldsAccessor
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import generate_pack_info_from_kernel
from lbmpy_walberla import generate_lattice_model, generate_boundary
from pystencils_walberla import CodeGeneration, generate_sweep
......@@ -45,7 +45,8 @@ options_dict = {
'mrt_full': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2], omega_fill[3], omega_fill[4], omega_fill[5]],
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2],
omega_fill[3], omega_fill[4], omega_fill[5]],
},
'entropic': {
'method': 'mrt',
......@@ -77,7 +78,7 @@ options_dict = {
}
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const bool infoCseGlobal = {cse_global};
......@@ -87,7 +88,7 @@ const bool infoCsePdfs = {cse_pdfs};
with CodeGeneration() as ctx:
accessor = StreamPullTwoFieldsAccessor()
#accessor = StreamPushTwoFieldsAccessor()
# accessor = StreamPushTwoFieldsAccessor()
assert not accessor.is_inplace, "This app does not work for inplace accessors"
common_options = {
......@@ -118,7 +119,7 @@ with CodeGeneration() as ctx:
options['stencil'] = 'D3Q27'
stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q')+1:])
q = int(stencil_str[stencil_str.find('Q') + 1:])
pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
options['optimization']['symbolic_field'] = pdfs
......@@ -143,7 +144,8 @@ with CodeGeneration() as ctx:
# CPU lattice model - required for macroscopic value computation, VTK output etc.
options_without_opt = options.copy()
del options_without_opt['optimization']
generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', create_lb_collision_rule(lb_method=lb_method, **options_without_opt))
generate_lattice_model(ctx, 'UniformGridGPU_LatticeModel', create_lb_collision_rule(lb_method=lb_method,
**options_without_opt))
# gpu LB sweep & boundaries
generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule,
......@@ -158,7 +160,7 @@ with CodeGeneration() as ctx:
setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1)
getter_assignments = macroscopic_values_getter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None)
pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments)
generate_sweep(ctx, 'UniformGridGPU_MacroGetter', getter_assignments)
......
......@@ -56,7 +56,7 @@ options_dict = {
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const bool infoCseGlobal = {cse_global};
......@@ -113,8 +113,10 @@ with CodeGeneration() as ctx:
generate_sweep(ctx, 'UniformGridGPU_AA_MacroGetter', getter_assignments)
# communication
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPull', update_rules['Odd'], kind='pull', target='gpu')
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPush', update_rules['Odd'], kind='push', target='gpu')
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPull', update_rules['Odd'],
kind='pull', target='gpu')
generate_pack_info_from_kernel(ctx, 'UniformGridGPU_AA_PackInfoPush', update_rules['Odd'],
kind='push', target='gpu')
infoHeaderParams = {
'stencil': stencil_str,
......
......@@ -9,8 +9,8 @@ Look at the end of the file to select the benchmark to run
import os
import waLBerla as wlb
from waLBerla.tools.config import block_decomposition, toPrm
from waLBerla.tools.sqlitedb import *
from waLBerla.tools.config import block_decomposition
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
from copy import deepcopy
import sys
import sqlite3
......@@ -53,7 +53,7 @@ class Scenario:
from pprint import pformat
wlb.log_info_on_root("Scenario:\n" + pformat(self.config_dict))
# Write out the configuration as text-based prm:
#print(toPrm(self.config_dict))
# print(toPrm(self.config_dict))
return self.config_dict
@wlb.member_callback
......@@ -71,16 +71,17 @@ class Scenario:
result = data
sequenceValuesToScalars(result)
num_tries = 4
for num_try in range(num_tries): # check multiple times e.g. may fail when multiple benchmark processes are running
# check multiple times e.g. may fail when multiple benchmark processes are running
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, "runs", DB_FILE)
storeSingle(result, "runs", DB_FILE)
break
except sqlite3.OperationalError as e:
wlb.log_warning("Sqlite DB writing failed: try {}/{} {}".format(num_try+1, num_tries, str(e)))
wlb.log_warning("Sqlite DB writing failed: try {}/{} {}".format(num_try + 1, num_tries, str(e)))
# -------------------------------------- Functions trying different parameter sets -------------------------------------------------------------------
# -------------------------------------- Functions trying different parameter sets -----------------------------------
def overlap_benchmark():
......@@ -93,9 +94,11 @@ def overlap_benchmark():
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']: # 'GPUPackInfo_Baseline', 'GPUPackInfo_Streams'
# 'GPUPackInfo_Baseline', 'GPUPackInfo_Streams'
for comm_strategy in ['UniformGPUScheme_Baseline', 'UniformGPUScheme_Memcpy']:
# no overlap
scenarios.add(Scenario(timeStepStrategy='noOverlap', communicationScheme=comm_strategy, innerOuterSplit=(1, 1, 1)))
scenarios.add(Scenario(timeStepStrategy='noOverlap', communicationScheme=comm_strategy,
innerOuterSplit=(1, 1, 1)))
# overlap
for overlap_strategy in ['simpleOverlap', 'complexOverlap']:
......@@ -123,7 +126,8 @@ def communication_compare():
timesteps=num_time_steps(block_size))
scenarios.add(sc)
for inner_outer_split in [(4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1)]:
if 3 * inner_outer_split[0] > block_size[0]: # ensure that the inner part of the domain is still large enough
# ensure that the inner part of the domain is still large enough
if 3 * inner_outer_split[0] > block_size[0]:
continue
sc = Scenario(cells_per_block=block_size,
gpuBlockSize=(128, 1, 1),
......@@ -155,7 +159,7 @@ def single_gpu_benchmark():
scenarios.add(scenario)
# -------------------------------------- Optional job script generation for PizDaint -------------------------------------------------------------------
# -------------------------------------- Optional job script generation for PizDaint ---------------------------------
job_script_header = """
......@@ -223,7 +227,8 @@ def generate_jobscripts(exe_names=all_executables):
job_script = job_script_header.format(nodes=node_count, folder=os.path.join(os.getcwd(), folder_name))
for exe in exe_names:
job_script += job_script_exe_part.format(app="../" + exe, nodes=node_count, config='../communication_compare.py')
job_script += job_script_exe_part.format(app="../" + exe, nodes=node_count,
config='../communication_compare.py')
with open(os.path.join(folder_name, 'job.sh'), 'w') as f:
f.write(job_script)
......@@ -235,6 +240,8 @@ if __name__ == '__main__':
else:
wlb.log_info_on_root("Batch run of benchmark scenarios, saving result to {}".format(DB_FILE))
# Select the benchmark you want to run
single_gpu_benchmark() # benchmarks different CUDA block sizes and domain sizes and measures single GPU performance of compute kernel (no communication)
#communication_compare() # benchmarks different communication routines, with and without overlap
#overlap_benchmark() # benchmarks different communication overlap options
single_gpu_benchmark()
# benchmarks different CUDA block sizes and domain sizes and measures single
# GPU performance of compute kernel (no communication)
# communication_compare(): benchmarks different communication routines, with and without overlap
# overlap_benchmark(): benchmarks different communication overlap options
import sympy as sp
import pystencils as ps
from lbmpy.creationfunctions import create_lb_update_rule, create_lb_collision_rule
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep, generate_mpidtype_info_from_kernel
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep,\
generate_mpidtype_info_from_kernel
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
......@@ -30,7 +31,8 @@ options_dict = {
'mrt_full': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2], omega_fill[3], omega_fill[4], omega_fill[5]],
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2],
omega_fill[3], omega_fill[4], omega_fill[5]],
},
'entropic': {
'method': 'mrt',
......@@ -62,7 +64,7 @@ options_dict = {
}
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const char * optimizationDict = "{optimizationDict}";
......@@ -108,7 +110,7 @@ with CodeGeneration() as ctx:
options['stencil'] = 'D3Q27'
stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q')+1:])
q = int(stencil_str[stencil_str.find('Q') + 1:])
pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
update_rule_two_field = create_lb_update_rule(optimization={'symbolic_field': pdfs,
......@@ -128,12 +130,16 @@ with CodeGeneration() as ctx:
((0, 0, 1), UBB([0.05, 0, 0])),
((0, 0, -1), NoSlip()),
))
cr_even = create_lb_collision_rule(stencil="D3Q19", compressible=False, optimization={'cse_global': opts['aa_even_cse_global'],
'cse_pdfs': opts['aa_even_cse_pdfs']})
cr_odd = create_lb_collision_rule(stencil="D3Q19", compressible=False, optimization={'cse_global': opts['aa_odd_cse_global'],
'cse_pdfs': opts['aa_odd_cse_pdfs']})
update_rule_aa_even = update_rule_with_push_boundaries(cr_even, pdfs, boundaries, AAEvenTimeStepAccessor, AAOddTimeStepAccessor.read)
update_rule_aa_odd = update_rule_with_push_boundaries(cr_odd, pdfs, boundaries, AAOddTimeStepAccessor, AAEvenTimeStepAccessor.read)
cr_even = create_lb_collision_rule(stencil="D3Q19", compressible=False,
optimization={'cse_global': opts['aa_even_cse_global'],
'cse_pdfs': opts['aa_even_cse_pdfs']})
cr_odd = create_lb_collision_rule(stencil="D3Q19", compressible=False,
optimization={'cse_global': opts['aa_odd_cse_global'],
'cse_pdfs': opts['aa_odd_cse_pdfs']})
update_rule_aa_even = update_rule_with_push_boundaries(cr_even, pdfs, boundaries,
AAEvenTimeStepAccessor, AAOddTimeStepAccessor.read)
update_rule_aa_odd = update_rule_with_push_boundaries(cr_odd, pdfs, boundaries,
AAOddTimeStepAccessor, AAEvenTimeStepAccessor.read)
else:
update_rule_aa_even = create_lb_update_rule(kernel_type=AAEvenTimeStepAccessor(),
optimization={'symbolic_field': pdfs,
......@@ -146,7 +152,7 @@ with CodeGeneration() as ctx:
'cse_global': opts['aa_odd_cse_global'],
'cse_pdfs': opts['aa_odd_cse_pdfs']}, **options)
vec = { 'assume_aligned': True, 'assume_inner_stride_one': True}
vec = {'assume_aligned': True, 'assume_inner_stride_one': True}
# check if openmp is enabled in cmake
if ctx.openmp:
......
......@@ -2,7 +2,7 @@ import math
import os
import operator
import waLBerla as wlb
from waLBerla.tools.sqlitedb import *
from waLBerla.tools.sqlitedb import sequenceValuesToScalars, checkAndUpdateSchema, storeSingle
from waLBerla.tools.config import block_decomposition
from functools import reduce
import sqlite3
......@@ -137,18 +137,19 @@ def single_node_benchmark():
continue
scenarios.add(sc)
else:
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
domain_decomposition_func=domain_decomposition_func_z,
time_step_mode=time_step_mode)
if not block_size_ok(sc):
continue
scenarios.add(sc)
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
domain_decomposition_func=domain_decomposition_func_z,
time_step_mode=time_step_mode)
if not block_size_ok(sc):
continue
# scenarios.add(sc)
def single_node_benchmark_small():
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128),