Commit 771bee84 authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGenerated: manual kernel & parametrization

parent 7e7a676b
......@@ -10,10 +10,9 @@ waLBerla_python_file_generates(UniformGridGenerated.py
GenDefines.h)
foreach(config trt )
foreach(config trt smagorinsky mrt3 mrt entropic_kbc_n4 cumulant )
waLBerla_add_executable ( NAME UniformGridBenchmarkGenerated_${config}
FILES UniformGridGenerated.cpp UniformGridGenerated.py
DEPENDS blockforest boundary core domain_decomposition field geometry timeloop vtk gui
CODEGEN_CFG ${config})
endforeach()
......@@ -57,4 +57,127 @@ private:
template<typename LM>
class StreamPullCollideD3Q19
{
public:
StreamPullCollideD3Q19(BlockDataID src, BlockDataID dst, real_t omega)
: src_(src), dst_(dst), omega_(omega)
{}
void operator()(IBlock * block)
{
using PdfField_T = GhostLayerField<real_t, LM::Stencil::Q>;
auto src = block->getData<PdfField_T>( src_ );
auto dst = block->getData<PdfField_T>( dst_ );
using namespace stencil;
using Stencil = typename LM::Stencil;
real_t omega_trm_ = real_t(1) - omega_;
real_t omega_w0_ = real_t(3) * ( real_t(1) / real_t( 3) ) * omega_;
real_t omega_w1_ = real_t(3) * ( real_t(1) / real_t(18) ) * omega_;
real_t omega_w2_ = real_t(3) * ( real_t(1) / real_t(36) ) * omega_;
WALBERLA_FOR_ALL_CELLS_XYZ(src, {
const real_t dd_tmp_NE = src->get(x-1, y-1, z , Stencil::idx[NE]);
const real_t dd_tmp_N = src->get(x , y-1, z , Stencil::idx[N]);
const real_t dd_tmp_NW = src->get(x+1, y-1, z , Stencil::idx[NW]);
const real_t dd_tmp_W = src->get(x+1, y , z , Stencil::idx[W]);
const real_t dd_tmp_SW = src->get(x+1, y+1, z , Stencil::idx[SW]);
const real_t dd_tmp_S = src->get(x , y+1, z , Stencil::idx[S]);
const real_t dd_tmp_SE = src->get(x-1, y+1, z , Stencil::idx[SE]);
const real_t dd_tmp_E = src->get(x-1, y , z , Stencil::idx[E]);
const real_t dd_tmp_T = src->get(x , y , z-1, Stencil::idx[T]);
const real_t dd_tmp_TE = src->get(x-1, y , z-1, Stencil::idx[TE]);
const real_t dd_tmp_TN = src->get(x , y-1, z-1, Stencil::idx[TN]);
const real_t dd_tmp_TW = src->get(x+1, y , z-1, Stencil::idx[TW]);
const real_t dd_tmp_TS = src->get(x , y+1, z-1, Stencil::idx[TS]);
const real_t dd_tmp_B = src->get(x , y , z+1, Stencil::idx[B]);
const real_t dd_tmp_BE = src->get(x-1, y , z+1, Stencil::idx[BE]);
const real_t dd_tmp_BN = src->get(x , y-1, z+1, Stencil::idx[BN]);
const real_t dd_tmp_BW = src->get(x+1, y , z+1, Stencil::idx[BW]);
const real_t dd_tmp_BS = src->get(x , y+1, z+1, Stencil::idx[BS]);
const real_t dd_tmp_C = src->get(x , y , z , Stencil::idx[C]);
const real_t velX_trm = dd_tmp_E + dd_tmp_NE + dd_tmp_SE + dd_tmp_TE + dd_tmp_BE;
const real_t velY_trm = dd_tmp_N + dd_tmp_NW + dd_tmp_TN + dd_tmp_BN;
const real_t velZ_trm = dd_tmp_T + dd_tmp_TS + dd_tmp_TW;
const real_t rho = dd_tmp_C + dd_tmp_S + dd_tmp_W + dd_tmp_B + dd_tmp_SW + dd_tmp_BS + dd_tmp_BW + velX_trm + velY_trm + velZ_trm;
const real_t velX = velX_trm - dd_tmp_W - dd_tmp_NW - dd_tmp_SW - dd_tmp_TW - dd_tmp_BW;
const real_t velY = velY_trm + dd_tmp_NE - dd_tmp_S - dd_tmp_SW - dd_tmp_SE - dd_tmp_TS - dd_tmp_BS;
const real_t velZ = velZ_trm + dd_tmp_TN + dd_tmp_TE - dd_tmp_B - dd_tmp_BN - dd_tmp_BS - dd_tmp_BW - dd_tmp_BE;
const real_t velXX = velX * velX;
const real_t velYY = velY * velY;
const real_t velZZ = velZ * velZ;
const real_t dir_indep_trm = ( real_t(1) / real_t(3) ) * rho - real_t(0.5) * ( velXX + velYY + velZZ );
dst->get(x,y,z,Stencil::idx[C]) = omega_trm_ * dd_tmp_C + omega_w0_ * dir_indep_trm;
const real_t vel_trm_E_W = dir_indep_trm + real_t(1.5) * velXX;
const real_t vel_trm_N_S = dir_indep_trm + real_t(1.5) * velYY;
const real_t vel_trm_T_B = dir_indep_trm + real_t(1.5) * velZZ;
dst->get(x,y,z,Stencil::idx[E]) = omega_trm_ * dd_tmp_E + omega_w1_ * ( vel_trm_E_W + velX );
dst->get(x,y,z,Stencil::idx[W]) = omega_trm_ * dd_tmp_W + omega_w1_ * ( vel_trm_E_W - velX );
dst->get(x,y,z,Stencil::idx[N]) = omega_trm_ * dd_tmp_N + omega_w1_ * ( vel_trm_N_S + velY );
dst->get(x,y,z,Stencil::idx[S]) = omega_trm_ * dd_tmp_S + omega_w1_ * ( vel_trm_N_S - velY );
dst->get(x,y,z,Stencil::idx[T]) = omega_trm_ * dd_tmp_T + omega_w1_ * ( vel_trm_T_B + velZ );
dst->get(x,y,z,Stencil::idx[B]) = omega_trm_ * dd_tmp_B + omega_w1_ * ( vel_trm_T_B - velZ );
const real_t velXmY = velX - velY;
const real_t vel_trm_NW_SE = dir_indep_trm + real_t(1.5) * velXmY * velXmY;
dst->get(x,y,z,Stencil::idx[NW]) = omega_trm_ * dd_tmp_NW + omega_w2_ * ( vel_trm_NW_SE - velXmY );
dst->get(x,y,z,Stencil::idx[SE]) = omega_trm_ * dd_tmp_SE + omega_w2_ * ( vel_trm_NW_SE + velXmY );
const real_t velXpY = velX + velY;
const real_t vel_trm_NE_SW = dir_indep_trm + real_t(1.5) * velXpY * velXpY;
dst->get(x,y,z,Stencil::idx[NE]) = omega_trm_ * dd_tmp_NE + omega_w2_ * ( vel_trm_NE_SW + velXpY );
dst->get(x,y,z,Stencil::idx[SW]) = omega_trm_ * dd_tmp_SW + omega_w2_ * ( vel_trm_NE_SW - velXpY );
const real_t velXmZ = velX - velZ;
const real_t vel_trm_TW_BE = dir_indep_trm + real_t(1.5) * velXmZ * velXmZ;
dst->get(x,y,z,Stencil::idx[TW]) = omega_trm_ * dd_tmp_TW + omega_w2_ * ( vel_trm_TW_BE - velXmZ );
dst->get(x,y,z,Stencil::idx[BE]) = omega_trm_ * dd_tmp_BE + omega_w2_ * ( vel_trm_TW_BE + velXmZ );
const real_t velXpZ = velX + velZ;
const real_t vel_trm_TE_BW = dir_indep_trm + real_t(1.5) * velXpZ * velXpZ;
dst->get(x,y,z,Stencil::idx[TE]) = omega_trm_ * dd_tmp_TE + omega_w2_ * ( vel_trm_TE_BW + velXpZ );
dst->get(x,y,z,Stencil::idx[BW]) = omega_trm_ * dd_tmp_BW + omega_w2_ * ( vel_trm_TE_BW - velXpZ );
const real_t velYmZ = velY - velZ;
const real_t vel_trm_TS_BN = dir_indep_trm + real_t(1.5) * velYmZ * velYmZ;
dst->get(x,y,z,Stencil::idx[TS]) = omega_trm_ * dd_tmp_TS + omega_w2_ * ( vel_trm_TS_BN - velYmZ );
dst->get(x,y,z,Stencil::idx[BN]) = omega_trm_ * dd_tmp_BN + omega_w2_ * ( vel_trm_TS_BN + velYmZ );
const real_t velYpZ = velY + velZ;
const real_t vel_trm_TN_BS = dir_indep_trm + real_t(1.5) * velYpZ * velYpZ;
dst->get(x,y,z,Stencil::idx[TN]) = omega_trm_ * dd_tmp_TN + omega_w2_ * ( vel_trm_TN_BS + velYpZ );
dst->get(x,y,z,Stencil::idx[BS]) = omega_trm_ * dd_tmp_BS + omega_w2_ * ( vel_trm_TN_BS - velYpZ );
});
src->swapDataPointers(*dst);
}
private:
BlockDataID src_;
BlockDataID dst_;
real_t omega_;
};
} // namespace walberla
\ No newline at end of file
......@@ -8,17 +8,17 @@ DomainSetup
Parameters
{
timesteps 1000; // time steps of one performance measurement
timesteps 200; // time steps of one performance measurement
warmupSteps 1; // number of steps to run before measurement starts
outerIterations 1; // how many measurements to conduct
outerIterations 4; // how many measurements to conduct
vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled
vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled
timeStepMode twoField;
//twoFieldKernelType manual_generic;
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
//twoFieldKernelType manualD3Q19;
remainingTimeLoggerFrequency 6; // interval in seconds to log the estimated remaining time
directComm 0;
omega 1.8;
shearVelocityMagnitude 0.05;
shearVelocityMagnitude 0.02;
useGui 0;
}
......@@ -110,12 +110,17 @@ int main( int argc, char **argv )
std::function<void(IBlock*)> twoFieldKernel;
if( twoFieldKernelType == "generated") {
twoFieldKernel = pystencils::GenLbKernel(pdfFieldId, omega);
} else if (twoFieldKernelType == "manual_generic") {
} else if (twoFieldKernelType == "manualGeneric") {
using MyLM = lbm::D3Q19<lbm::collision_model::SRT>;
BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
twoFieldKernel = StreamPullCollideGeneric<MyLM>(pdfFieldId, tmpPdfFieldId, omega);
} else if (twoFieldKernelType == "manual_d3q19") {
} else if (twoFieldKernelType == "manualD3Q19") {
using MyLM = lbm::D3Q19<lbm::collision_model::SRT>;
BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
twoFieldKernel = StreamPullCollideD3Q19<MyLM>(pdfFieldId, tmpPdfFieldId, omega);
} else {
WALBERLA_ABORT_NO_DEBUG_INFO("Invalid option for \"twoFieldKernelType\", "
"valid options are \"generated\", \"manualGeneric\", \"manualD3Q19\"");
}
using F = std::function<void()>;
......
......@@ -3,6 +3,7 @@ import os
import operator
import waLBerla as wlb
from waLBerla.tools.sqlitedb import *
from waLBerla.tools.setup import block_decomposition
from functools import reduce
......@@ -32,11 +33,30 @@ def calculate_time_steps(runtime, expected_mlups, domain_size):
return int(time_steps_per_second * runtime)
def domain_decomposition_func_z(processes, threads, block_size):
return {
'blocks': (1, 1, processes),
'cellsPerBlock': (block_size[0], block_size[1], block_size[2] * threads)
}
def domain_decomposition_func_full(processes, threads, block_size):
return {
'blocks': block_decomposition(processes),
'cellsPerBlock': (block_size[0], block_size[1], block_size[2] * threads)
}
class BenchmarkScenario:
def __init__(self, block_size=(256, 128, 128), direct_comm=True, time_step_mode='aa', db_file_name='uniform_grid_gen.sqlite'):
def __init__(self, block_size=(256, 128, 128), direct_comm=True,
time_step_mode='aa', two_field_kernel_type='generated',
domain_decomposition_func=domain_decomposition_func_z,
db_file_name='uniform_grid_gen.sqlite'):
self.block_size = block_size
self.direct_comm = direct_comm
self.time_step_mode = time_step_mode
self.two_field_kernel_type = two_field_kernel_type
self.domain_decomposition_func = domain_decomposition_func
self.threads = int(os.environ['OMP_NUM_THREADS'])
self.processes = wlb.mpi.numProcesses()
self.db_file_name = db_file_name
......@@ -48,8 +68,6 @@ class BenchmarkScenario:
time_steps = max(10, time_steps)
cfg = {
'DomainSetup': {
'blocks': (1, 1, self.processes),
'cellsPerBlock': (self.block_size[0], self.block_size[1], self.block_size[2] * self.threads),
'periodic': (1, 1, 1),
},
'Parameters': {
......@@ -60,9 +78,11 @@ class BenchmarkScenario:
'remainingTimeLoggerFrequency': 0,
'omega': 1.6,
'timeStepMode': self.time_step_mode,
'twoFieldKernelType': self.two_field_kernel_type,
'directComm': self.direct_comm,
}
}
cfg['DomainSetup'].update(self.domain_decomposition_func(self.processes, self.threads, self.block_size))
return cfg
@wlb.member_callback
......@@ -88,13 +108,38 @@ class BenchmarkScenario:
storeSingle(result, "runs", self.db_file_name)
def benchmark():
def single_node_benchmark():
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128), (128, 64, 64), (64, 64, 128), (64, 64, 64), (64, 32, 32), (32, 32, 32), (16, 16, 16), (256, 128, 64), (512, 128, 32)]:
for block_size in [(128, 128, 128), (128, 64, 64), (64, 64, 128), (64, 128, 64), (64, 64, 64),
(1024, 64, 32), (2048, 64, 16),
(64, 32, 32), (32, 32, 32), (16, 16, 16), (256, 128, 64), (512, 128, 32)]:
for direct_comm in (True, False):
for time_step_mode in ['aa', 'aaKernelOnly', 'twoField']:
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm, time_step_mode=time_step_mode)
scenarios.add(sc)
if time_step_mode == 'twoField':
for kt in ['generated', 'manualGeneric', 'manualD3Q19']:
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
time_step_mode=time_step_mode, two_field_kernel_type=kt,
domain_decomposition_func=domain_decomposition_func_z
)
scenarios.add(sc)
else:
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
domain_decomposition_func=domain_decomposition_func_z,
time_step_mode=time_step_mode)
scenarios.add(sc)
def weak_scaling():
scenarios = wlb.ScenarioManager()
for block_size in [(128, 128, 128), (128, 64, 64), (64, 64, 128), (64, 128, 64), (64, 64, 64),
(1024, 64, 32), (2048, 64, 16),
(64, 32, 32), (32, 32, 32), (16, 16, 16), (256, 128, 64), (512, 128, 32)]:
for direct_comm in (True, False):
sc = BenchmarkScenario(block_size=block_size, direct_comm=direct_comm,
domain_decomposition_func=domain_decomposition_func_full)
scenarios.add(sc)
benchmark()
single_node_benchmark()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment