From c933bf255788551f28740370ec0b1e1653896e90 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Sat, 5 Oct 2019 10:50:32 +0200 Subject: [PATCH] Corrections in UniformGridGenerated Benchmark --- .../UniformGridGenerated/CMakeLists.txt | 5 +- .../UniformGridGenerated/UniformGrid.prm | 16 ++--- .../UniformGridGenerated.cpp | 64 +++++++++++++++---- .../UniformGridGenerated.py | 41 +++++++----- 4 files changed, 86 insertions(+), 40 deletions(-) diff --git a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt index 1e8028d39..836baeefa 100644 --- a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt +++ b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt @@ -2,7 +2,10 @@ waLBerla_link_files_to_builddir( "*.prm" ) waLBerla_python_file_generates(UniformGridGenerated.py - GenLbKernel.cpp GenMacroGetter.cpp GenMacroSetter.cpp GenPackInfo.cpp GenDefines.h) + GenMacroGetter.cpp GenMacroSetter.cpp + GenPackInfo.cpp GenPackInfoAAPush.cpp GenPackInfoAAPull.cpp + GenLbKernel.cpp GenLbKernelAAEven.cpp GenLbKernelAAOdd.cpp + GenDefines.h) foreach(config trt ) diff --git a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm index 622515557..ae22ab6b5 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm +++ b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm @@ -1,26 +1,22 @@ DomainSetup { blocks < 1, 1, 1 >; - cellsPerBlock < 64, 64, 128 >; + cellsPerBlock < 256, 128, 128 >; periodic < 1, 1, 1 >; } Parameters { - timesteps 200; // time steps of one performance measurement - warmupSteps 10; // number of steps to run before measurement starts - outerIterations 15; // how many measurements to conduct + timesteps 400; // time steps of one performance measurement + warmupSteps 1; // number of steps to run before measurement starts + outerIterations 1; // how many measurements to conduct - vtkWriteFrequency 200; // write a VTK file every n'th step, if zero VTK output is disabled + vtkWriteFrequency 0; // write a VTK file every n'th step, if zero VTK output is disabled cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation - - timeStepStrategy kernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly - innerOuterSplit < 8, 1, 1>; // slice-thickness that 'outer'-kernels process when overlapping - + timeStepMode aaKernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time omega 1.8; - initShearFlow 1; useGui 0; } diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp index 69db6180b..8ccfa107d 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp @@ -1,13 +1,11 @@ #include "core/Environment.h" #include "core/logging/Initialization.h" -#include "core/math/Random.h" #include "python_coupling/CreateConfig.h" #include "python_coupling/PythonCallback.h" #include "python_coupling/DictWrapper.h" #include "blockforest/Initialization.h" #include "field/vtk/VTKWriter.h" #include "field/AddToStorage.h" -#include "field/communication/PackInfo.h" #include "blockforest/communication/UniformBufferedScheme.h" #include "timeloop/all.h" #include "core/timing/TimingPool.h" @@ -17,11 +15,18 @@ #include "InitShearVelocity.h" #include "GenDefines.h" -#include "GenPackInfo.h" -#include "GenLbKernel.h" #include "GenMacroGetter.h" #include "GenMacroSetter.h" +#include "GenLbKernel.h" +#include "GenLbKernelAAEven.h" +#include "GenLbKernelAAOdd.h" + +#include "GenPackInfo.h" +#include "GenPackInfoAAPush.h" +#include "GenPackInfoAAPull.h" + + using namespace walberla; using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >; @@ -43,13 +48,14 @@ int main( int argc, char **argv ) Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" ); // Reading parameters auto parameters = config->getOneBlock( "Parameters" ); - const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal"); + const std::string timeStepMode = parameters.getParameter<std::string>( "timeStepMode", "twoField"); const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 )); - const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 )); + uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 )); const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08); // Creating fields - BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx ); + //BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx ); + BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", 0.0, field::fzyx ); BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx ); pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId); @@ -59,12 +65,38 @@ int main( int argc, char **argv ) for( auto & b : *blocks) setterKernel(&b); - SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps ); - blockforest::communication::UniformBufferedScheme< Stencil_T > communication( blocks ); - communication.addPackInfo( make_shared< pystencils::GenPackInfo >( pdfFieldId ) ); + blockforest::communication::UniformBufferedScheme< Stencil_T > twoFieldComm(blocks ); + twoFieldComm.addPackInfo(make_shared< pystencils::GenPackInfo >(pdfFieldId ) ); + + blockforest::communication::UniformBufferedScheme< Stencil_T > aaPullComm(blocks); + aaPullComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPull>(pdfFieldId)); + + blockforest::communication::UniformBufferedScheme< Stencil_T > aaPushComm(blocks); + aaPushComm.addPackInfo(make_shared< pystencils::GenPackInfoAAPush>(pdfFieldId)); + + SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 ); + if( timeStepMode == "twoField") + { + timeLoop.add() << BeforeFunction(twoFieldComm, "communication" ) + << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" ); + timeLoop.add() << BeforeFunction(twoFieldComm, "communication" ) + << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" ); + + } else if ( timeStepMode == "twoFieldKernelOnly") { + timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" ); + timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" ); + } else if ( timeStepMode == "aa") { + timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" ); + timeLoop.add() << BeforeFunction( aaPullComm ) + << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd") + << AfterFunction( aaPushComm ); + } else if ( timeStepMode == "aaKernelOnly") { + timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" ); + timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd"); + } else { + WALBERLA_ABORT("Invalid value for timeStepMode "); + } - timeLoop.add() << BeforeFunction( communication, "communication" ) - << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide" ); int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 ); int outerIterations = parameters.getParameter<int>( "outerIterations", 1 ); @@ -108,6 +140,14 @@ int main( int argc, char **argv ) WALBERLA_LOG_INFO_ON_ROOT( "Starting simulation with " << timesteps << " time steps" ); simTimer.start(); timeLoop.run(); + /* + pystencils::GenLbKernelAAEven k1(pdfFieldId, omega); + pystencils::GenLbKernelAAOdd k2(pdfFieldId, omega); + for(int t=0; t < timesteps / 2; ++t) + { for( auto & b : *blocks) { + k1(&b); + k2(&b); + }}*/ simTimer.end(); WALBERLA_LOG_INFO_ON_ROOT( "Simulation finished" ); auto time = simTimer.last(); diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py index 7027cf7ee..2afbd8d18 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py @@ -4,8 +4,9 @@ from lbmpy.creationfunctions import create_lb_update_rule from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter +from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor -omega = sp.symbols("omega") +omega = 1.6#sp.symbols("omega") omega_fill = sp.symbols("omega_:10") options_dict = { @@ -18,6 +19,7 @@ options_dict = { 'trt': { 'method': 'trt', 'stencil': 'D3Q19', + 'compressible': False, 'relaxation_rate': omega, }, 'mrt': { @@ -74,16 +76,12 @@ const bool infoCsePdfs = {cse_pdfs}; with CodeGeneration() as ctx: - accessor = StreamPullTwoFieldsAccessor() - assert not accessor.is_inplace, "This app does not work for inplace accessors" - common_options = { 'field_name': 'pdfs', 'temporary_field_name': 'pdfs_tmp', - 'kernel_type': accessor, 'optimization': {'cse_global': False, - 'cse_pdfs': True, - 'split': True} + 'cse_pdfs': False, + 'split': False} } config_name = ctx.config noopt = False @@ -101,9 +99,6 @@ with CodeGeneration() as ctx: options.update(common_options) options = options.copy() - if noopt: - options['optimization']['cse_global'] = False - options['optimization']['cse_pdfs'] = False if d3q27: options['stencil'] = 'D3Q27' @@ -112,20 +107,32 @@ with CodeGeneration() as ctx: pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx') options['optimization']['symbolic_field'] = pdfs - update_rule = create_lb_update_rule(**options) - vec = {'nontemporal': True, 'assume_aligned': True, 'assume_inner_stride_one': True} + update_rule_two_field = create_lb_update_rule(**options) + update_rule_aa_even = create_lb_update_rule(kernel_type=AAEvenTimeStepAccessor(), **options) + options['optimization']['split'] = True + update_rule_aa_odd = create_lb_update_rule(kernel_type=AAOddTimeStepAccessor(), **options) + + vec = {'nontemporal': False, 'assume_aligned': True, 'assume_inner_stride_one': True} # Sweeps - generate_sweep(ctx, 'GenLbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')]) - setter_assignments = macroscopic_values_setter(update_rule.method, velocity=velocity_field.center_vector, + generate_sweep(ctx, 'GenLbKernel', update_rule_two_field, field_swaps=[('pdfs', 'pdfs_tmp')]) + generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1) + generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1) + + setter_assignments = macroscopic_values_setter(update_rule_two_field.method, velocity=velocity_field.center_vector, pdfs=pdfs.center_vector, density=1) - getter_assignments = macroscopic_values_getter(update_rule.method, velocity=velocity_field.center_vector, - pdfs=pdfs.center_vector, density=None) + getter_assignments = macroscopic_values_getter(update_rule_two_field.method, velocity=velocity_field.center_vector, + pdfs=pdfs.center_vector, density=None) generate_sweep(ctx, 'GenMacroSetter', setter_assignments) generate_sweep(ctx, 'GenMacroGetter', getter_assignments) # Communication - generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule, cpu_vectorize_info={'instruction_set': None}) + generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule_two_field, + cpu_vectorize_info={'instruction_set': None}) + generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPull', update_rule_aa_odd, kind='pull', + cpu_vectorize_info={'instruction_set': None}) + generate_pack_info_from_kernel(ctx, 'GenPackInfoAAPush', update_rule_aa_odd, kind='push', + cpu_vectorize_info={'instruction_set': None}) # Info Header infoHeaderParams = { -- GitLab