Skip to content
Snippets Groups Projects
Commit c933bf25 authored by Martin Bauer's avatar Martin Bauer
Browse files

Corrections in UniformGridGenerated Benchmark

parent 1b78979c
No related merge requests found
...@@ -2,7 +2,10 @@ waLBerla_link_files_to_builddir( "*.prm" ) ...@@ -2,7 +2,10 @@ waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_python_file_generates(UniformGridGenerated.py waLBerla_python_file_generates(UniformGridGenerated.py
GenLbKernel.cpp GenMacroGetter.cpp GenMacroSetter.cpp GenPackInfo.cpp GenDefines.h) GenMacroGetter.cpp GenMacroSetter.cpp
GenPackInfo.cpp GenPackInfoAAPush.cpp GenPackInfoAAPull.cpp
GenLbKernel.cpp GenLbKernelAAEven.cpp GenLbKernelAAOdd.cpp
GenDefines.h)
foreach(config trt ) foreach(config trt )
......
DomainSetup DomainSetup
{ {
blocks < 1, 1, 1 >; blocks < 1, 1, 1 >;
cellsPerBlock < 64, 64, 128 >; cellsPerBlock < 256, 128, 128 >;
periodic < 1, 1, 1 >; periodic < 1, 1, 1 >;
} }
Parameters Parameters
{ {
timesteps 200; // time steps of one performance measurement timesteps 400; // time steps of one performance measurement
warmupSteps 10; // number of steps to run before measurement starts warmupSteps 1; // number of steps to run before measurement starts
outerIterations 15; // how many measurements to conduct outerIterations 1; // how many measurements to conduct
vtkWriteFrequency 200; // write a VTK file every n'th step, if zero VTK output is disabled vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled
cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation
timeStepMode aaKernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
timeStepStrategy kernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
innerOuterSplit < 8, 1, 1>; // slice-thickness that 'outer'-kernels process when overlapping
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
omega 1.8; omega 1.8;
initShearFlow 1;
useGui 0; useGui 0;
} }
#include "core/Environment.h" #include "core/Environment.h"
#include "core/logging/Initialization.h" #include "core/logging/Initialization.h"
#include "core/math/Random.h"
#include "python_coupling/CreateConfig.h" #include "python_coupling/CreateConfig.h"
#include "python_coupling/PythonCallback.h" #include "python_coupling/PythonCallback.h"
#include "python_coupling/DictWrapper.h" #include "python_coupling/DictWrapper.h"
#include "blockforest/Initialization.h" #include "blockforest/Initialization.h"
#include "field/vtk/VTKWriter.h" #include "field/vtk/VTKWriter.h"
#include "field/AddToStorage.h" #include "field/AddToStorage.h"
#include "field/communication/PackInfo.h"
#include "blockforest/communication/UniformBufferedScheme.h" #include "blockforest/communication/UniformBufferedScheme.h"
#include "timeloop/all.h" #include "timeloop/all.h"
#include "core/timing/TimingPool.h" #include "core/timing/TimingPool.h"
...@@ -17,11 +15,18 @@ ...@@ -17,11 +15,18 @@
#include "InitShearVelocity.h" #include "InitShearVelocity.h"
#include "GenDefines.h" #include "GenDefines.h"
#include "GenPackInfo.h"
#include "GenLbKernel.h"
#include "GenMacroGetter.h" #include "GenMacroGetter.h"
#include "GenMacroSetter.h" #include "GenMacroSetter.h"
#include "GenLbKernel.h"
#include "GenLbKernelAAEven.h"
#include "GenLbKernelAAOdd.h"
#include "GenPackInfo.h"
#include "GenPackInfoAAPush.h"
#include "GenPackInfoAAPull.h"
using namespace walberla; using namespace walberla;
using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >; using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >;
...@@ -43,13 +48,14 @@ int main( int argc, char **argv ) ...@@ -43,13 +48,14 @@ int main( int argc, char **argv )
Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" ); Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" );
// Reading parameters // Reading parameters
auto parameters = config->getOneBlock( "Parameters" ); auto parameters = config->getOneBlock( "Parameters" );
const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal"); const std::string timeStepMode = parameters.getParameter<std::string>( "timeStepMode", "twoField");
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 )); const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 )); uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 ));
const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08); const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08);
// Creating fields // Creating fields
BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx ); //BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx );
BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", 0.0, field::fzyx );
BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx ); BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx );
pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId); pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId);
...@@ -59,12 +65,38 @@ int main( int argc, char **argv ) ...@@ -59,12 +65,38 @@ int main( int argc, char **argv )
for( auto & b : *blocks) for( auto & b : *blocks)
setterKernel(&b); setterKernel(&b);
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps ); blockforest::communication::UniformBufferedScheme< Stencil_T > twoFieldComm(blocks );
blockforest::communication::UniformBufferedScheme< Stencil_T > communication( blocks ); twoFieldComm.addPackInfo(make_shared< pystencils::GenPackInfo >(pdfFieldId ) );
communication.addPackInfo( make_shared< pystencils::GenPackInfo >( pdfFieldId ) );
blockforest::communication::UniformBufferedScheme< Stencil_T > aaPullComm(blocks);
aaPullComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPull>(pdfFieldId));
blockforest::communication::UniformBufferedScheme< Stencil_T > aaPushComm(blocks);
aaPushComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPush>(pdfFieldId));
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
if( timeStepMode == "twoField")
{
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
} else if ( timeStepMode == "twoFieldKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
} else if ( timeStepMode == "aa") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
timeLoop.add() << BeforeFunction( aaPullComm )
<< Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd")
<< AfterFunction( aaPushComm );
} else if ( timeStepMode == "aaKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd");
} else {
WALBERLA_ABORT("Invalid value for timeStepMode ");
}
timeLoop.add() << BeforeFunction( communication, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide" );
int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 ); int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 );
int outerIterations = parameters.getParameter<int>( "outerIterations", 1 ); int outerIterations = parameters.getParameter<int>( "outerIterations", 1 );
...@@ -108,6 +140,14 @@ int main( int argc, char **argv ) ...@@ -108,6 +140,14 @@ int main( int argc, char **argv )
WALBERLA_LOG_INFO_ON_ROOT( "Starting simulation with " << timesteps << " time steps" ); WALBERLA_LOG_INFO_ON_ROOT( "Starting simulation with " << timesteps << " time steps" );
simTimer.start(); simTimer.start();
timeLoop.run(); timeLoop.run();
/*
pystencils::GenLbKernelAAEven k1(pdfFieldId, omega);
pystencils::GenLbKernelAAOdd k2(pdfFieldId, omega);
for(int t=0; t < timesteps / 2; ++t)
{ for( auto & b : *blocks) {
k1(&b);
k2(&b);
}}*/
simTimer.end(); simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT( "Simulation finished" ); WALBERLA_LOG_INFO_ON_ROOT( "Simulation finished" );
auto time = simTimer.last(); auto time = simTimer.last();
......
...@@ -4,8 +4,9 @@ from lbmpy.creationfunctions import create_lb_update_rule ...@@ -4,8 +4,9 @@ from lbmpy.creationfunctions import create_lb_update_rule
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
omega = sp.symbols("omega") omega = 1.6#sp.symbols("omega")
omega_fill = sp.symbols("omega_:10") omega_fill = sp.symbols("omega_:10")
options_dict = { options_dict = {
...@@ -18,6 +19,7 @@ options_dict = { ...@@ -18,6 +19,7 @@ options_dict = {
'trt': { 'trt': {
'method': 'trt', 'method': 'trt',
'stencil': 'D3Q19', 'stencil': 'D3Q19',
'compressible': False,
'relaxation_rate': omega, 'relaxation_rate': omega,
}, },
'mrt': { 'mrt': {
...@@ -74,16 +76,12 @@ const bool infoCsePdfs = {cse_pdfs}; ...@@ -74,16 +76,12 @@ const bool infoCsePdfs = {cse_pdfs};
with CodeGeneration() as ctx: with CodeGeneration() as ctx:
accessor = StreamPullTwoFieldsAccessor()
assert not accessor.is_inplace, "This app does not work for inplace accessors"
common_options = { common_options = {
'field_name': 'pdfs', 'field_name': 'pdfs',
'temporary_field_name': 'pdfs_tmp', 'temporary_field_name': 'pdfs_tmp',
'kernel_type': accessor,
'optimization': {'cse_global': False, 'optimization': {'cse_global': False,
'cse_pdfs': True, 'cse_pdfs': False,
'split': True} 'split': False}
} }
config_name = ctx.config config_name = ctx.config
noopt = False noopt = False
...@@ -101,9 +99,6 @@ with CodeGeneration() as ctx: ...@@ -101,9 +99,6 @@ with CodeGeneration() as ctx:
options.update(common_options) options.update(common_options)
options = options.copy() options = options.copy()
if noopt:
options['optimization']['cse_global'] = False
options['optimization']['cse_pdfs'] = False
if d3q27: if d3q27:
options['stencil'] = 'D3Q27' options['stencil'] = 'D3Q27'
...@@ -112,20 +107,32 @@ with CodeGeneration() as ctx: ...@@ -112,20 +107,32 @@ with CodeGeneration() as ctx:
pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx') pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
options['optimization']['symbolic_field'] = pdfs options['optimization']['symbolic_field'] = pdfs
update_rule = create_lb_update_rule(**options) update_rule_two_field = create_lb_update_rule(**options)
vec = {'nontemporal': True, 'assume_aligned': True, 'assume_inner_stride_one': True} update_rule_aa_even = create_lb_update_rule(kernel_type=AAEvenTimeStepAccessor(), **options)
options['optimization']['split'] = True
update_rule_aa_odd = create_lb_update_rule(kernel_type=AAOddTimeStepAccessor(), **options)
vec = {'nontemporal': False, 'assume_aligned': True, 'assume_inner_stride_one': True}
# Sweeps # Sweeps
generate_sweep(ctx, 'GenLbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')]) generate_sweep(ctx, 'GenLbKernel', update_rule_two_field, field_swaps=[('pdfs', 'pdfs_tmp')])
setter_assignments = macroscopic_values_setter(update_rule.method, velocity=velocity_field.center_vector, generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
setter_assignments = macroscopic_values_setter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1) pdfs=pdfs.center_vector, density=1)
getter_assignments = macroscopic_values_getter(update_rule.method, velocity=velocity_field.center_vector, getter_assignments = macroscopic_values_getter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None) pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'GenMacroSetter', setter_assignments) generate_sweep(ctx, 'GenMacroSetter', setter_assignments)
generate_sweep(ctx, 'GenMacroGetter', getter_assignments) generate_sweep(ctx, 'GenMacroGetter', getter_assignments)
# Communication # Communication
generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule, cpu_vectorize_info={'instruction_set': None}) generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule_two_field,
cpu_vectorize_info={'instruction_set': None})
generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPull', update_rule_aa_odd, kind='pull',
cpu_vectorize_info={'instruction_set': None})
generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPush', update_rule_aa_odd, kind='push',
cpu_vectorize_info={'instruction_set': None})
# Info Header # Info Header
infoHeaderParams = { infoHeaderParams = {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment