From 1a43844657a5fe44ee8abef14b4b2e34377dae80 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Tue, 1 Oct 2019 08:25:52 +0200
Subject: [PATCH] New benchmark for generated LBM on uniform grid on CPU

---
 apps/benchmarks/CMakeLists.txt                |   1 +
 .../UniformGridGenerated/CMakeLists.txt       |  12 ++
 .../UniformGridGenerated.cpp                  | 157 ++++++++++++++++++
 .../UniformGridGenerated.py                   | 133 +++++++++++++++
 4 files changed, 303 insertions(+)
 create mode 100644 apps/benchmarks/UniformGridGenerated/CMakeLists.txt
 create mode 100644 apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
 create mode 100644 apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py

diff --git a/apps/benchmarks/CMakeLists.txt b/apps/benchmarks/CMakeLists.txt
index 8e83319ae..a59d3bc1a 100644
--- a/apps/benchmarks/CMakeLists.txt
+++ b/apps/benchmarks/CMakeLists.txt
@@ -14,3 +14,4 @@ add_subdirectory( ProbeVsExtraMessage )
 add_subdirectory( SchaeferTurek )
 add_subdirectory( UniformGrid )
 add_subdirectory( UniformGridGPU )
+add_subdirectory( UniformGridGenerated )
diff --git a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt
new file mode 100644
index 000000000..def7d93f6
--- /dev/null
+++ b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt
@@ -0,0 +1,12 @@
+waLBerla_python_file_generates(UniformGridGenerated.py
+        UniformGridGenerated_LatticeModel.cpp
+        UniformGridGenerated_Defines.h)
+
+
+foreach(config trt )
+    waLBerla_add_executable ( NAME UniformGridBenchmarkGenerated_${config}
+            FILES UniformGridGenerated.cpp UniformGridGenerated.py
+            DEPENDS blockforest boundary core domain_decomposition field geometry timeloop vtk gui
+            CODEGEN_CFG ${config})
+endforeach()
+
diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
new file mode 100644
index 000000000..f48b0f755
--- /dev/null
+++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
@@ -0,0 +1,157 @@
+#include "core/Environment.h"
+#include "core/logging/Initialization.h"
+#include "core/math/Random.h"
+#include "python_coupling/CreateConfig.h"
+#include "python_coupling/PythonCallback.h"
+#include "python_coupling/DictWrapper.h"
+#include "blockforest/Initialization.h"
+#include "field/vtk/VTKWriter.h"
+#include "field/communication/PackInfo.h"
+#include "blockforest/communication/UniformBufferedScheme.h"
+#include "timeloop/all.h"
+#include "core/timing/TimingPool.h"
+#include "core/timing/RemainingTimeLogger.h"
+#include "domain_decomposition/SharedSweep.h"
+#include "lbm/communication/PdfFieldPackInfo.h"
+#include "lbm/field/AddToStorage.h"
+#include "lbm/vtk/VTKOutput.h"
+#include "lbm/gui/Connection.h"
+#include "lbm/vtk/Velocity.h"
+#include "gui/Gui.h"
+
+#include "UniformGridGenerated_LatticeModel.h"
+#include "UniformGridGenerated_Defines.h"
+
+
+using namespace walberla;
+
+typedef lbm::UniformGridGenerated_LatticeModel LatticeModel_T;
+typedef LatticeModel_T::Stencil                Stencil_T;
+typedef LatticeModel_T::CommunicationStencil   CommunicationStencil_T;
+typedef lbm::PdfField< LatticeModel_T >        PdfField_T;
+
+
+void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID pdfFieldId,
+                       const real_t xMagnitude=0.1, const real_t fluctuationMagnitude=0.05 )
+{
+    math::seedRandomGenerator(0);
+    auto halfZ = blocks->getDomainCellBB().zMax() / 2;
+    for( auto & block: *blocks)
+    {
+        auto pdfField = block.getData<PdfField_T>( pdfFieldId );
+        WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdfField,
+            Cell globalCell;
+            blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
+            real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude);
+
+            if( globalCell[2] >= halfZ ) {
+                pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(xMagnitude, 0, randomReal), real_t(1.0));
+            } else {
+                pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(-xMagnitude, 0, randomReal), real_t(1.0));
+            }
+        );
+    }
+}
+
+
+int main( int argc, char **argv )
+{
+   mpi::Environment env( argc, argv );
+
+   for( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
+   {
+      WALBERLA_MPI_WORLD_BARRIER();
+
+      auto config = *cfg;
+      logging::configureLogging( config );
+      auto blocks = blockforest::createUniformBlockGridFromConfig( config );
+
+      Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t>  >( "cellsPerBlock" );
+      // Reading parameters
+      auto parameters = config->getOneBlock( "Parameters" );
+      const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal");
+      const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
+      const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 ));
+      const bool initShearFlow = parameters.getParameter<bool>("initShearFlow", false);
+
+      // Creating fields
+      LatticeModel_T latticeModel = LatticeModel_T( omega );
+      BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel);
+
+      if( initShearFlow ) {
+          initShearVelocity(blocks, pdfFieldId);
+      }
+
+      SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps );
+      blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > communication( blocks );
+      communication.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldId ) );
+
+      timeLoop.add() << BeforeFunction( communication, "communication" )
+                     << Sweep( LatticeModel_T::Sweep( pdfFieldId ), "LB stream & collide" );
+
+      int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 );
+      int outerIterations = parameters.getParameter<int>( "outerIterations", 1 );
+      for(int i=0; i < warmupSteps; ++i )
+         timeLoop.singleStep();
+
+      auto remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", -1.0 ); // in seconds
+      if (remainingTimeLoggerFrequency > 0) {
+          auto logger = timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps() * outerIterations, remainingTimeLoggerFrequency );
+          timeLoop.addFuncAfterTimeStep( logger, "remaining time logger" );
+      }
+
+      // VTK
+      uint_t vtkWriteFrequency = parameters.getParameter<uint_t>( "vtkWriteFrequency", 0 );
+      if( vtkWriteFrequency > 0 )
+      {
+          auto vtkOutput = vtk::createVTKOutput_BlockData( *blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
+                                                           "simulation_step", false, true, true, false, 0 );
+          auto velWriter = make_shared< lbm::VelocityVTKWriter<LatticeModel_T> >(pdfFieldId, "vel");
+          vtkOutput->addCellDataWriter(velWriter);
+          timeLoop.addFuncAfterTimeStep( vtk::writeFiles( vtkOutput ), "VTK Output" );
+      }
+
+
+      bool useGui = parameters.getParameter<bool>( "useGui", false );
+      if( useGui )
+      {
+          GUI gui( timeLoop, blocks, argc, argv);
+          lbm::connectToGui<LatticeModel_T>(gui);
+          gui.run();
+      }
+      else
+      {
+          for ( int outerIteration = 0; outerIteration < outerIterations; ++outerIteration )
+          {
+              timeLoop.setCurrentTimeStepToZero();
+              WcTimer simTimer;
+              WALBERLA_LOG_INFO_ON_ROOT( "Starting simulation with " << timesteps << " time steps" );
+              simTimer.start();
+              timeLoop.run();
+              simTimer.end();
+              WALBERLA_LOG_INFO_ON_ROOT( "Simulation finished" );
+              auto time = simTimer.last();
+              auto nrOfCells = real_c( cellsPerBlock[0] * cellsPerBlock[1] * cellsPerBlock[2] );
+              auto mlupsPerProcess = nrOfCells * real_c( timesteps ) / time * 1e-6;
+              WALBERLA_LOG_RESULT_ON_ROOT( "MLUPS per process " << mlupsPerProcess );
+              WALBERLA_LOG_RESULT_ON_ROOT( "Time per time step " << time / real_c( timesteps ));
+              WALBERLA_ROOT_SECTION()
+              {
+                  python_coupling::PythonCallback pythonCallbackResults( "results_callback" );
+                  if ( pythonCallbackResults.isCallable())
+                  {
+                      pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess );
+                      pythonCallbackResults.data().exposeValue( "stencil", infoStencil );
+                      pythonCallbackResults.data().exposeValue( "configName", infoConfigName );
+                      pythonCallbackResults.data().exposeValue( "cse_global", infoCseGlobal );
+                      pythonCallbackResults.data().exposeValue( "cse_pdfs", infoCsePdfs );
+                      // Call Python function to report results
+                      pythonCallbackResults();
+                  }
+              }
+          }
+      }
+   }
+
+   return 0;
+}
diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
new file mode 100644
index 000000000..8f7bf791c
--- /dev/null
+++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
@@ -0,0 +1,133 @@
+import sympy as sp
+import pystencils as ps
+from lbmpy.creationfunctions import create_lb_collision_rule
+from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
+from lbmpy_walberla import generate_lattice_model
+from pystencils_walberla import CodeGeneration
+
+omega = sp.symbols("omega")
+omega_fill = sp.symbols("omega_:10")
+
+options_dict = {
+    'srt': {
+        'method': 'srt',
+        'stencil': 'D3Q19',
+        'relaxation_rate': omega,
+        'compressible': False,
+    },
+    'trt': {
+        'method': 'trt',
+        'stencil': 'D3Q19',
+        'relaxation_rate': omega,
+    },
+    'mrt': {
+        'method': 'mrt',
+        'stencil': 'D3Q19',
+        'relaxation_rates': [0, omega, 1.3, 1.4, omega, 1.2, 1.1, 1.15, 1.234, 1.4235, 1.242, 1.2567, 0.9, 0.7],
+    },
+    'mrt_full': {
+        'method': 'mrt',
+        'stencil': 'D3Q19',
+        'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2], omega_fill[3], omega_fill[4], omega_fill[5]],
+    },
+    'mrt3': {
+        'method': 'mrt3',
+        'stencil': 'D3Q19',
+        'relaxation_rates': [omega, 1.1, 1.2],
+    },
+    'entropic': {
+        'method': 'mrt3',
+        'stencil': 'D3Q19',
+        'compressible': True,
+        'relaxation_rates': [omega, omega, sp.Symbol("omega_free")],
+        'entropic': True,
+    },
+    'entropic_kbc_n4': {
+        'method': 'trt-kbc-n4',
+        'stencil': 'D3Q27',
+        'compressible': True,
+        'relaxation_rates': [omega, sp.Symbol("omega_free")],
+        'entropic': True,
+    },
+    'smagorinsky': {
+        'method': 'srt',
+        'stencil': 'D3Q19',
+        'smagorinsky': True,
+        'relaxation_rate': omega,
+    },
+    'cumulant': {
+        'stencil': 'D3Q19',
+        'compressible': True,
+        'method': 'mrt',
+        'cumulant': True,
+        'relaxation_rates': [0, omega, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+    },
+}
+
+info_header = """
+#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q}; 
+const char * infoStencil = "{stencil}";
+const char * infoConfigName = "{configName}";
+const bool infoCseGlobal = {cse_global};
+const bool infoCsePdfs = {cse_pdfs};
+"""
+
+
+with CodeGeneration() as ctx:
+    accessor = StreamPullTwoFieldsAccessor()
+    assert not accessor.is_inplace, "This app does not work for inplace accessors"
+
+    common_options = {
+        'field_name': 'pdfs',
+        'temporary_field_name': 'pdfs_tmp',
+        'kernel_type': accessor,
+        'optimization': {'cse_global': True,
+                         'cse_pdfs': False}
+    }
+    config_name = ctx.config
+    noopt = False
+    d3q27 = False
+    if config_name.endswith("_noopt"):
+        noopt = True
+        config_name = config_name[:-len("_noopt")]
+    if config_name.endswith("_d3q27"):
+        d3q27 = True
+        config_name = config_name[:-len("_d3q27")]
+
+    options = options_dict[config_name]
+    options.update(common_options)
+    options = options.copy()
+
+    if noopt:
+        options['optimization']['cse_global'] = False
+        options['optimization']['cse_pdfs'] = False
+    if d3q27:
+        options['stencil'] = 'D3Q27'
+
+    stencil_str = options['stencil']
+    q = int(stencil_str[stencil_str.find('Q')+1:])
+    pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
+    options['optimization']['symbolic_field'] = pdfs
+
+    vp = [
+        ('double', 'omega_0'),
+        ('double', 'omega_1'),
+        ('double', 'omega_2'),
+        ('double', 'omega_3'),
+        ('double', 'omega_4'),
+        ('double', 'omega_5'),
+        ('double', 'omega_6'),
+        ('int32_t', 'cudaBlockSize0'),
+        ('int32_t', 'cudaBlockSize1'),
+    ]
+    update_rule = create_lb_collision_rule(**options)
+    generate_lattice_model(ctx, 'UniformGridGenerated_LatticeModel', update_rule)
+
+    infoHeaderParams = {
+        'stencil': stencil_str,
+        'q': q,
+        'configName': ctx.config,
+        'cse_global': int(options['optimization']['cse_global']),
+        'cse_pdfs': int(options['optimization']['cse_pdfs']),
+    }
+    ctx.write_file("UniformGridGenerated_Defines.h", info_header.format(**infoHeaderParams))
-- 
GitLab