Skip to content
Snippets Groups Projects
Commit c933bf25 authored by Martin Bauer's avatar Martin Bauer
Browse files

Corrections in UniformGridGenerated Benchmark

parent 1b78979c
Branches
Tags
No related merge requests found
...@@ -2,7 +2,10 @@ waLBerla_link_files_to_builddir( "*.prm" ) ...@@ -2,7 +2,10 @@ waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_python_file_generates(UniformGridGenerated.py waLBerla_python_file_generates(UniformGridGenerated.py
GenLbKernel.cpp GenMacroGetter.cpp GenMacroSetter.cpp GenPackInfo.cpp GenDefines.h) GenMacroGetter.cpp GenMacroSetter.cpp
GenPackInfo.cpp GenPackInfoAAPush.cpp GenPackInfoAAPull.cpp
GenLbKernel.cpp GenLbKernelAAEven.cpp GenLbKernelAAOdd.cpp
GenDefines.h)
foreach(config trt ) foreach(config trt )
......
DomainSetup DomainSetup
{ {
blocks < 1, 1, 1 >; blocks < 1, 1, 1 >;
cellsPerBlock < 64, 64, 128 >; cellsPerBlock < 256, 128, 128 >;
periodic < 1, 1, 1 >; periodic < 1, 1, 1 >;
} }
Parameters Parameters
{ {
timesteps 200; // time steps of one performance measurement timesteps 400; // time steps of one performance measurement
warmupSteps 10; // number of steps to run before measurement starts warmupSteps 1; // number of steps to run before measurement starts
outerIterations 15; // how many measurements to conduct outerIterations 1; // how many measurements to conduct
vtkWriteFrequency 200; // write a VTK file every n'th step, if zero VTK output is disabled vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled
cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation
timeStepMode aaKernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
timeStepStrategy kernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
innerOuterSplit < 8, 1, 1>; // slice-thickness that 'outer'-kernels process when overlapping
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
omega 1.8; omega 1.8;
initShearFlow 1;
useGui 0; useGui 0;
} }
#include "core/Environment.h" #include "core/Environment.h"
#include "core/logging/Initialization.h" #include "core/logging/Initialization.h"
#include "core/math/Random.h"
#include "python_coupling/CreateConfig.h" #include "python_coupling/CreateConfig.h"
#include "python_coupling/PythonCallback.h" #include "python_coupling/PythonCallback.h"
#include "python_coupling/DictWrapper.h" #include "python_coupling/DictWrapper.h"
#include "blockforest/Initialization.h" #include "blockforest/Initialization.h"
#include "field/vtk/VTKWriter.h" #include "field/vtk/VTKWriter.h"
#include "field/AddToStorage.h" #include "field/AddToStorage.h"
#include "field/communication/PackInfo.h"
#include "blockforest/communication/UniformBufferedScheme.h" #include "blockforest/communication/UniformBufferedScheme.h"
#include "timeloop/all.h" #include "timeloop/all.h"
#include "core/timing/TimingPool.h" #include "core/timing/TimingPool.h"
...@@ -17,11 +15,18 @@ ...@@ -17,11 +15,18 @@
#include "InitShearVelocity.h" #include "InitShearVelocity.h"
#include "GenDefines.h" #include "GenDefines.h"
#include "GenPackInfo.h"
#include "GenLbKernel.h"
#include "GenMacroGetter.h" #include "GenMacroGetter.h"
#include "GenMacroSetter.h" #include "GenMacroSetter.h"
#include "GenLbKernel.h"
#include "GenLbKernelAAEven.h"
#include "GenLbKernelAAOdd.h"
#include "GenPackInfo.h"
#include "GenPackInfoAAPush.h"
#include "GenPackInfoAAPull.h"
using namespace walberla; using namespace walberla;
using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >; using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >;
...@@ -43,13 +48,14 @@ int main( int argc, char **argv ) ...@@ -43,13 +48,14 @@ int main( int argc, char **argv )
Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" ); Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" );
// Reading parameters // Reading parameters
auto parameters = config->getOneBlock( "Parameters" ); auto parameters = config->getOneBlock( "Parameters" );
const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal"); const std::string timeStepMode = parameters.getParameter<std::string>( "timeStepMode", "twoField");
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 )); const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 )); uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 ));
const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08); const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08);
// Creating fields // Creating fields
BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx ); //BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx );
BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", 0.0, field::fzyx );
BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx ); BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx );
pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId); pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId);
...@@ -59,12 +65,38 @@ int main( int argc, char **argv ) ...@@ -59,12 +65,38 @@ int main( int argc, char **argv )
for( auto & b : *blocks) for( auto & b : *blocks)
setterKernel(&b); setterKernel(&b);
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps ); blockforest::communication::UniformBufferedScheme< Stencil_T > twoFieldComm(blocks );
blockforest::communication::UniformBufferedScheme< Stencil_T > communication( blocks ); twoFieldComm.addPackInfo(make_shared< pystencils::GenPackInfo >(pdfFieldId ) );
communication.addPackInfo( make_shared< pystencils::GenPackInfo >( pdfFieldId ) );
blockforest::communication::UniformBufferedScheme< Stencil_T > aaPullComm(blocks);
aaPullComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPull>(pdfFieldId));
blockforest::communication::UniformBufferedScheme< Stencil_T > aaPushComm(blocks);
aaPushComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPush>(pdfFieldId));
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
if( timeStepMode == "twoField")
{
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
} else if ( timeStepMode == "twoFieldKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
} else if ( timeStepMode == "aa") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
timeLoop.add() << BeforeFunction( aaPullComm )
<< Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd")
<< AfterFunction( aaPushComm );
} else if ( timeStepMode == "aaKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd");
} else {
WALBERLA_ABORT("Invalid value for timeStepMode ");
}
timeLoop.add() << BeforeFunction( communication, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide" );
int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 ); int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 );
int outerIterations = parameters.getParameter<int>( "outerIterations", 1 ); int outerIterations = parameters.getParameter<int>( "outerIterations", 1 );
...@@ -108,6 +140,14 @@ int main( int argc, char **argv ) ...@@ -108,6 +140,14 @@ int main( int argc, char **argv )
WALBERLA_LOG_INFO_ON_ROOT( "Starting simulation with " << timesteps << " time steps" ); WALBERLA_LOG_INFO_ON_ROOT( "Starting simulation with " << timesteps << " time steps" );
simTimer.start(); simTimer.start();
timeLoop.run(); timeLoop.run();
/*
pystencils::GenLbKernelAAEven k1(pdfFieldId, omega);
pystencils::GenLbKernelAAOdd k2(pdfFieldId, omega);
for(int t=0; t < timesteps / 2; ++t)
{ for( auto & b : *blocks) {
k1(&b);
k2(&b);
}}*/
simTimer.end(); simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT( "Simulation finished" ); WALBERLA_LOG_INFO_ON_ROOT( "Simulation finished" );
auto time = simTimer.last(); auto time = simTimer.last();
......
...@@ -4,8 +4,9 @@ from lbmpy.creationfunctions import create_lb_update_rule ...@@ -4,8 +4,9 @@ from lbmpy.creationfunctions import create_lb_update_rule
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
omega = sp.symbols("omega") omega = 1.6#sp.symbols("omega")
omega_fill = sp.symbols("omega_:10") omega_fill = sp.symbols("omega_:10")
options_dict = { options_dict = {
...@@ -18,6 +19,7 @@ options_dict = { ...@@ -18,6 +19,7 @@ options_dict = {
'trt': { 'trt': {
'method': 'trt', 'method': 'trt',
'stencil': 'D3Q19', 'stencil': 'D3Q19',
'compressible': False,
'relaxation_rate': omega, 'relaxation_rate': omega,
}, },
'mrt': { 'mrt': {
...@@ -74,16 +76,12 @@ const bool infoCsePdfs = {cse_pdfs}; ...@@ -74,16 +76,12 @@ const bool infoCsePdfs = {cse_pdfs};
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
accessor = StreamPullTwoFieldsAccessor()
assert not accessor.is_inplace, "This app does not work for inplace accessors"
common_options = { common_options = {
'field_name': 'pdfs', 'field_name': 'pdfs',
'temporary_field_name': 'pdfs_tmp', 'temporary_field_name': 'pdfs_tmp',
'kernel_type': accessor,
'optimization': {'cse_global': False, 'optimization': {'cse_global': False,
'cse_pdfs': True, 'cse_pdfs': False,
'split': True} 'split': False}
} }
config_name = ctx.config config_name = ctx.config
noopt = False noopt = False
...@@ -101,9 +99,6 @@ with CodeGeneration() as ctx: ...@@ -101,9 +99,6 @@ with CodeGeneration() as ctx:
options.update(common_options) options.update(common_options)
options = options.copy() options = options.copy()
if noopt:
options['optimization']['cse_global'] = False
options['optimization']['cse_pdfs'] = False
if d3q27: if d3q27:
options['stencil'] = 'D3Q27' options['stencil'] = 'D3Q27'
...@@ -112,20 +107,32 @@ with CodeGeneration() as ctx: ...@@ -112,20 +107,32 @@ with CodeGeneration() as ctx:
pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx') pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
options['optimization']['symbolic_field'] = pdfs options['optimization']['symbolic_field'] = pdfs
update_rule = create_lb_update_rule(**options) update_rule_two_field = create_lb_update_rule(**options)
vec = {'nontemporal': True, 'assume_aligned': True, 'assume_inner_stride_one': True} update_rule_aa_even = create_lb_update_rule(kernel_type=AAEvenTimeStepAccessor(), **options)
options['optimization']['split'] = True
update_rule_aa_odd = create_lb_update_rule(kernel_type=AAOddTimeStepAccessor(), **options)
vec = {'nontemporal': False, 'assume_aligned': True, 'assume_inner_stride_one': True}
# Sweeps # Sweeps
generate_sweep(ctx, 'GenLbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')]) generate_sweep(ctx, 'GenLbKernel', update_rule_two_field, field_swaps=[('pdfs', 'pdfs_tmp')])
setter_assignments = macroscopic_values_setter(update_rule.method, velocity=velocity_field.center_vector, generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
setter_assignments = macroscopic_values_setter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1) pdfs=pdfs.center_vector, density=1)
getter_assignments = macroscopic_values_getter(update_rule.method, velocity=velocity_field.center_vector, getter_assignments = macroscopic_values_getter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None) pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'GenMacroSetter', setter_assignments) generate_sweep(ctx, 'GenMacroSetter', setter_assignments)
generate_sweep(ctx, 'GenMacroGetter', getter_assignments) generate_sweep(ctx, 'GenMacroGetter', getter_assignments)
# Communication # Communication
generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule, cpu_vectorize_info={'instruction_set': None}) generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule_two_field,
cpu_vectorize_info={'instruction_set': None})
generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPull', update_rule_aa_odd, kind='pull',
cpu_vectorize_info={'instruction_set': None})
generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPush', update_rule_aa_odd, kind='push',
cpu_vectorize_info={'instruction_set': None})
# Info Header # Info Header
infoHeaderParams = { infoHeaderParams = {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment