From 41f8d43ac0cd67ab6774674bf86492cfebb99794 Mon Sep 17 00:00:00 2001
From: Sebastian Eibl <sebastian.eibl@fau.de>
Date: Mon, 3 Jun 2019 13:27:44 +0200
Subject: [PATCH] updated benchmarks

---
 .../GranularGas/MESA_PD_GranularGas.cpp       | 84 +++++++++++--------
 .../GranularGas/MESA_PD_KernelBenchmark.cpp   | 61 +++++++++++++-
 .../benchmarks/GranularGas/PE_GranularGas.cpp | 42 ++++++++--
 3 files changed, 142 insertions(+), 45 deletions(-)

diff --git a/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp b/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
index 09c6d7549..68cd3111b 100644
--- a/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
+++ b/apps/benchmarks/GranularGas/MESA_PD_GranularGas.cpp
@@ -47,7 +47,6 @@
 #include <core/logging/Logging.h>
 #include <core/OpenMP.h>
 #include <core/timing/Timer.h>
-#include <core/timing/TimingPool.h>
 #include <core/waLBerlaBuildInfo.h>
 #include <postprocessing/sqlite/SQLite.h>
 #include <vtk/VTKOutput.h>
@@ -277,6 +276,7 @@ int main( int argc, char ** argv )
       int64_t contactsDetected = 0;
       int64_t contactsTreated  = 0;
       if (bBarrier) WALBERLA_MPI_BARRIER();
+      timer.start();
       for (int64_t i=0; i < simulationSteps; ++i)
       {
          //      if (i % visSpacing == 0)
@@ -330,42 +330,49 @@ int main( int argc, char ** argv )
          if (bBarrier) WALBERLA_MPI_BARRIER();
          tp["SNN"].end();
 
-         if( i % 100 == 0 )
-         {
-            WALBERLA_LOG_DEVEL_ON_ROOT( "Timestep " << i << " / " << simulationSteps );
-            SNNBytesSent     = SNN.getBytesSent();
-            SNNBytesReceived = SNN.getBytesReceived();
-            SNNSends         = SNN.getNumberOfSends();
-            SNNReceives      = SNN.getNumberOfReceives();
-            RPBytesSent      = RP.getBytesSent();
-            RPBytesReceived  = RP.getBytesReceived();
-            RPSends          = RP.getNumberOfSends();
-            RPReceives       = RP.getNumberOfReceives();
-            walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
-            walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
-            walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
-            walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
-            walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
-            walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
-            walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
-            walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
-            auto cC = walberla::mpi::reduce(contactsChecked, walberla::mpi::SUM);
-            auto cD = walberla::mpi::reduce(contactsDetected, walberla::mpi::SUM);
-            auto cT = walberla::mpi::reduce(contactsTreated, walberla::mpi::SUM);
-            WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
-            WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
-            WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
-            WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
-            WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
-            if (bBarrier) WALBERLA_MPI_BARRIER();
-         }
+         //               if( i % 100 == 0 )
+         //               {
+         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "Timestep " << i << " / " << simulationSteps );
+         //                  SNNBytesSent     = SNN.getBytesSent();
+         //                  SNNBytesReceived = SNN.getBytesReceived();
+         //                  SNNSends         = SNN.getNumberOfSends();
+         //                  SNNReceives      = SNN.getNumberOfReceives();
+         //                  RPBytesSent      = RP.getBytesSent();
+         //                  RPBytesReceived  = RP.getBytesReceived();
+         //                  RPSends          = RP.getNumberOfSends();
+         //                  RPReceives       = RP.getNumberOfReceives();
+         //                  walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
+         //                  walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
+         //                  walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
+         //                  walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
+         //                  walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
+         //                  walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
+         //                  walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
+         //                  walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
+         //                  auto cC = walberla::mpi::reduce(contactsChecked, walberla::mpi::SUM);
+         //                  auto cD = walberla::mpi::reduce(contactsDetected, walberla::mpi::SUM);
+         //                  auto cT = walberla::mpi::reduce(contactsTreated, walberla::mpi::SUM);
+         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
+         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
+         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
+         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
+         //                  WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
+         //               }
       }
       timer.end();
+
+      auto timer_reduced = walberla::timing::getReduced(timer, REDUCE_TOTAL, 0);
+      double PUpS = 0.0;
+      WALBERLA_ROOT_SECTION()
+      {
+         WALBERLA_LOG_INFO_ON_ROOT(*timer_reduced);
+         WALBERLA_LOG_INFO_ON_ROOT("runtime: " << timer_reduced->max());
+         PUpS = double_c(numParticles) * double_c(simulationSteps) / double_c(timer_reduced->max());
+         WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpS);
+      }
+
       auto tp_reduced = tp.getReduced();
       WALBERLA_LOG_INFO_ON_ROOT(*tp_reduced);
-      WALBERLA_LOG_INFO_ON_ROOT("runtime: " << timer.average());
-      auto PUpS = real_c(numParticles) * real_c(simulationSteps) / timer.average();
-      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpS);
       WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - END ***");
 
       WALBERLA_LOG_INFO_ON_ROOT("*** CHECKING RESULT - START ***");
@@ -436,8 +443,8 @@ int main( int argc, char ** argv )
          integerProperties["contacts_detected"]   = contactsDetected;
          integerProperties["contacts_treated"]    = contactsTreated;
          integerProperties["blocks_x"]            = int64_c(forest->getXSize());
-         integerProperties["blocks_y"]            = int64_c(forest->getXSize());
-         integerProperties["blocks_z"]            = int64_c(forest->getXSize());
+         integerProperties["blocks_y"]            = int64_c(forest->getYSize());
+         integerProperties["blocks_z"]            = int64_c(forest->getZSize());
          realProperties["domain_x"]               = double_c(forest->getDomain().xSize());
          realProperties["domain_y"]               = double_c(forest->getDomain().ySize());
          realProperties["domain_z"]               = double_c(forest->getDomain().zSize());
@@ -454,6 +461,10 @@ int main( int argc, char ** argv )
          integerProperties["RPReceives"]          = RPReceives;
          realProperties["linkedCellsVolume"]      = linkedCellsVolume;
          integerProperties["numLinkedCells"]      = int64_c(numLinkedCells);
+         realProperties["timer_min"]              = timer_reduced->min();
+         realProperties["timer_max"]              = timer_reduced->max();
+         realProperties["timer_average"]          = timer_reduced->average();
+         realProperties["timer_total"]            = timer_reduced->total();
          stringProperties["SLURM_CLUSTER_NAME"]       = envToString(std::getenv( "SLURM_CLUSTER_NAME" ));
          stringProperties["SLURM_CPUS_ON_NODE"]       = envToString(std::getenv( "SLURM_CPUS_ON_NODE" ));
          stringProperties["SLURM_CPUS_PER_TASK"]      = envToString(std::getenv( "SLURM_CPUS_PER_TASK" ));
@@ -466,11 +477,10 @@ int main( int argc, char ** argv )
          stringProperties["SLURM_NTASKS_PER_CORE"]    = envToString(std::getenv( "SLURM_NTASKS_PER_CORE" ));
          stringProperties["SLURM_NTASKS_PER_NODE"]    = envToString(std::getenv( "SLURM_NTASKS_PER_NODE" ));
          stringProperties["SLURM_NTASKS_PER_SOCKET"]  = envToString(std::getenv( "SLURM_NTASKS_PER_SOCKET" ));
-         stringProperties["SLURM_TASKS_PER_NODE"]     = envToString(std::getenv( "SLURM_TASKS_PER_NODE" ));
-
 
          auto runId = postprocessing::storeRunInSqliteDB( sqlFile, integerProperties, stringProperties, realProperties );
          postprocessing::storeTimingPoolInSqliteDB( sqlFile, runId, *tp_reduced, "Timeloop" );
+
       }
       WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - END ***");
    }
diff --git a/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp b/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp
index ea71d2890..723500d58 100644
--- a/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp
+++ b/apps/benchmarks/GranularGas/MESA_PD_KernelBenchmark.cpp
@@ -41,8 +41,12 @@
 #include <blockforest/Initialization.h>
 #include <core/Abort.h>
 #include <core/Environment.h>
+#include <core/Hostname.h>
 #include <core/math/Random.h>
+#include <core/mpi/Gatherv.h>
+#include <core/mpi/RecvBuffer.h>
 #include <core/mpi/Reduce.h>
+#include <core/mpi/SendBuffer.h>
 #include <core/grid_generator/SCIterator.h>
 #include <core/logging/Logging.h>
 #include <core/OpenMP.h>
@@ -132,6 +136,57 @@ std::string envToString(const char* env)
    return env != nullptr ? std::string(env) : "";
 }
 
+void storeNodeTimings( const uint_t                 runId,
+                       const std::string          & dbFile,
+                       const std::string          & tableName,
+                       const WcTimingPool         & tp )
+{
+   std::map< std::string, walberla::int64_t > integerProperties;
+   std::map< std::string, double >            realProperties;
+   std::map< std::string, std::string >       stringProperties;
+
+   walberla::mpi::SendBuffer sb;
+   walberla::mpi::RecvBuffer rb;
+
+   sb << walberla::getHostName();
+   sb << int64_t(walberla::mpi::MPIManager::instance()->rank());
+   sb << tp;
+
+   walberla::mpi::gathervBuffer(sb, rb);
+
+   WALBERLA_ROOT_SECTION()
+   {
+      while (!rb.isEmpty())
+      {
+         integerProperties.clear();
+         realProperties.clear();
+         stringProperties.clear();
+
+         std::string  hostname;
+         int64_t      rank;
+         WcTimingPool cTP;
+         rb >> hostname;
+         rb >> rank;
+         rb >> cTP;
+
+         stringProperties["hostname"] = hostname;
+         integerProperties["rank"]    = rank;
+         for (auto& v : cTP)
+         {
+            realProperties[v.first] = v.second.average();
+         }
+
+         postprocessing::storeAdditionalRunInfoInSqliteDB( runId,
+                                                           dbFile,
+                                                           tableName,
+                                                           integerProperties,
+                                                           stringProperties,
+                                                           realProperties );
+      }
+   }
+}
+
+
 int main( int argc, char ** argv )
 {
    using namespace walberla::timing;
@@ -438,6 +493,8 @@ int main( int argc, char ** argv )
       walberla::mpi::reduceInplace(local_aabbs, walberla::mpi::SUM);
       walberla::mpi::reduceInplace(neighbor_subdomains, walberla::mpi::SUM);
       walberla::mpi::reduceInplace(neighbor_processes, walberla::mpi::SUM);
+
+      uint_t runId = uint_c(-1);
       WALBERLA_ROOT_SECTION()
       {
          std::map< std::string, walberla::int64_t > integerProperties;
@@ -450,6 +507,7 @@ int main( int argc, char ** argv )
          integerProperties["jobid"]               = jobid;
          integerProperties["mpi_num_processes"]   = mpiManager->numProcesses();
          integerProperties["omp_max_threads"]     = omp_get_max_threads();
+         integerProperties["outerIteration"]      = int64_c(outerIteration);
          integerProperties["numOuterIterations"]  = numOuterIterations;
          integerProperties["simulationSteps"]     = simulationSteps;
          integerProperties["bBarrier"]            = int64_c(bBarrier);
@@ -492,9 +550,10 @@ int main( int argc, char ** argv )
          stringProperties["SLURM_TASKS_PER_NODE"]     = envToString(std::getenv( "SLURM_TASKS_PER_NODE" ));
 
 
-         auto runId = postprocessing::storeRunInSqliteDB( sqlFile, integerProperties, stringProperties, realProperties );
+         runId = postprocessing::storeRunInSqliteDB( sqlFile, integerProperties, stringProperties, realProperties );
          postprocessing::storeTimingPoolInSqliteDB( sqlFile, runId, *tp_reduced, "Timeloop" );
       }
+      storeNodeTimings(runId, sqlFile, "NodeTiming", tp);
       WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - END ***");
    }
 
diff --git a/apps/benchmarks/GranularGas/PE_GranularGas.cpp b/apps/benchmarks/GranularGas/PE_GranularGas.cpp
index 6178e9ee9..d38f72278 100644
--- a/apps/benchmarks/GranularGas/PE_GranularGas.cpp
+++ b/apps/benchmarks/GranularGas/PE_GranularGas.cpp
@@ -43,6 +43,11 @@ using namespace walberla::timing;
 
 using BodyTuple = std::tuple<Sphere, Plane> ;
 
+std::string envToString(const char* env)
+{
+   return env != nullptr ? std::string(env) : "";
+}
+
 int main( int argc, char ** argv )
 {
    WcTimingTree tt;
@@ -250,9 +255,10 @@ int main( int argc, char ** argv )
    for (int64_t outerIteration = 0; outerIteration < numOuterIterations; ++outerIteration)
    {
       WALBERLA_LOG_INFO_ON_ROOT("*** RUNNING OUTER ITERATION " << outerIteration << " ***");
-      WALBERLA_MPI_BARRIER();
       WcTimer timer;
       WcTimingPool tp;
+      WALBERLA_MPI_BARRIER();
+      timer.start();
       for (int64_t i=0; i < simulationSteps; ++i)
       {
          if( i % 200 == 0 )
@@ -273,11 +279,17 @@ int main( int argc, char ** argv )
          //   vtkSphereOutput->write( );
          //}
       }
-      WALBERLA_MPI_BARRIER();
       timer.end();
-      WALBERLA_LOG_INFO_ON_ROOT("runtime: " << timer.average());
-      auto PUpS = real_c(numParticles) * real_c(simulationSteps) / timer.average();
-      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpS);
+      auto timer_reduced = walberla::timing::getReduced(timer, REDUCE_TOTAL, 0);
+      double PUpS = 0.0;
+      WALBERLA_ROOT_SECTION()
+      {
+         WALBERLA_LOG_INFO_ON_ROOT(*timer_reduced);
+         WALBERLA_LOG_INFO_ON_ROOT("runtime: " << timer_reduced->max());
+         PUpS = double_c(numParticles) * double_c(simulationSteps) / double_c(timer_reduced->max());
+         WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpS);
+      }
+
       auto tp_reduced = tp.getReduced();
       WALBERLA_LOG_INFO_ON_ROOT(*tp_reduced);
       WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - END ***");
@@ -330,14 +342,30 @@ int main( int argc, char ** argv )
          integerProperties["simulationSteps"]     = simulationSteps;
          integerProperties["bBarrier"]            = int64_c(bBarrier);
          realProperties["PUpS"]                   = double_c(PUpS);
+         realProperties["timer_min"]              = timer_reduced->min();
+         realProperties["timer_max"]              = timer_reduced->max();
+         realProperties["timer_average"]          = timer_reduced->average();
+         realProperties["timer_total"]            = timer_reduced->total();
          integerProperties["num_particles"]       = numParticles;
          integerProperties["num_ghost_particles"] = numGhostParticles;
          integerProperties["blocks_x"]            = int64_c(forest->getXSize());
-         integerProperties["blocks_y"]            = int64_c(forest->getXSize());
-         integerProperties["blocks_z"]            = int64_c(forest->getXSize());
+         integerProperties["blocks_y"]            = int64_c(forest->getYSize());
+         integerProperties["blocks_z"]            = int64_c(forest->getZSize());
          realProperties["domain_x"]               = double_c(forest->getDomain().xSize());
          realProperties["domain_y"]               = double_c(forest->getDomain().ySize());
          realProperties["domain_z"]               = double_c(forest->getDomain().zSize());
+         stringProperties["SLURM_CLUSTER_NAME"]       = envToString(std::getenv( "SLURM_CLUSTER_NAME" ));
+         stringProperties["SLURM_CPUS_ON_NODE"]       = envToString(std::getenv( "SLURM_CPUS_ON_NODE" ));
+         stringProperties["SLURM_CPUS_PER_TASK"]      = envToString(std::getenv( "SLURM_CPUS_PER_TASK" ));
+         stringProperties["SLURM_JOB_ACCOUNT"]        = envToString(std::getenv( "SLURM_JOB_ACCOUNT" ));
+         stringProperties["SLURM_JOB_ID"]             = envToString(std::getenv( "SLURM_JOB_ID" ));
+         stringProperties["SLURM_JOB_CPUS_PER_NODE"]  = envToString(std::getenv( "SLURM_JOB_CPUS_PER_NODE" ));
+         stringProperties["SLURM_JOB_NAME"]           = envToString(std::getenv( "SLURM_JOB_NAME" ));
+         stringProperties["SLURM_JOB_NUM_NODES"]      = envToString(std::getenv( "SLURM_JOB_NUM_NODES" ));
+         stringProperties["SLURM_NTASKS"]             = envToString(std::getenv( "SLURM_NTASKS" ));
+         stringProperties["SLURM_NTASKS_PER_CORE"]    = envToString(std::getenv( "SLURM_NTASKS_PER_CORE" ));
+         stringProperties["SLURM_NTASKS_PER_NODE"]    = envToString(std::getenv( "SLURM_NTASKS_PER_NODE" ));
+         stringProperties["SLURM_NTASKS_PER_SOCKET"]  = envToString(std::getenv( "SLURM_NTASKS_PER_SOCKET" ));
 
          auto runId = postprocessing::storeRunInSqliteDB( sqlFile, integerProperties, stringProperties, realProperties );
          postprocessing::storeTimingPoolInSqliteDB( sqlFile, runId, *tp_reduced, "Timeloop" );
-- 
GitLab