From f856309fc82a7ae28169c2a93193f39a89cbb53d Mon Sep 17 00:00:00 2001
From: Sebastian Eibl <sebastian.eibl@fau.de>
Date: Mon, 12 Aug 2019 14:58:05 +0200
Subject: [PATCH] updated all benchmarks

---
 apps/benchmarks/GranularGas/GranularGas.cfg   |   9 +-
 .../GranularGas/MESA_PD_LoadBalancing.cpp     | 523 ++++++++++--------
 .../benchmarks/GranularGas/PE_GranularGas.cpp |   2 +-
 apps/benchmarks/GranularGas/Parameters.cpp    |   6 +
 apps/benchmarks/GranularGas/Parameters.h      |   5 +-
 .../GranularGas/Parameters.templ.cpp          |   2 +
 .../benchmarks/GranularGas/Parameters.templ.h |   4 +-
 apps/benchmarks/GranularGas/generateConfig.py |   2 +-
 8 files changed, 325 insertions(+), 228 deletions(-)

diff --git a/apps/benchmarks/GranularGas/GranularGas.cfg b/apps/benchmarks/GranularGas/GranularGas.cfg
index 2c321312f..685b7dfc0 100644
--- a/apps/benchmarks/GranularGas/GranularGas.cfg
+++ b/apps/benchmarks/GranularGas/GranularGas.cfg
@@ -1,12 +1,13 @@
 GranularGas
 {
    simulationCorner < 0, 0, 0 >;
-   simulationDomain < 40, 40, 40 >;
+   simulationDomain < 80, 80, 80 >;
    blocks < 2,2,2 >;
-   isPeriodic < 1, 1, 1 >;
-   initialRefinementLevel 0;
-   sorting none;
+   isPeriodic < 0, 0, 0 >;
+   initialRefinementLevel 2;
+   sorting linear;
 
+   normal  <1,1,1>;
    radius  0.6;
    spacing 1.0;
    vMax    0.0;
diff --git a/apps/benchmarks/GranularGas/MESA_PD_LoadBalancing.cpp b/apps/benchmarks/GranularGas/MESA_PD_LoadBalancing.cpp
index 597eb3f7b..dd25a6bed 100644
--- a/apps/benchmarks/GranularGas/MESA_PD_LoadBalancing.cpp
+++ b/apps/benchmarks/GranularGas/MESA_PD_LoadBalancing.cpp
@@ -89,8 +89,9 @@ int main( int argc, char ** argv )
    auto mpiManager = walberla::mpi::MPIManager::instance();
    mpiManager->useWorldComm();
 
-   logging::Logging::instance()->setStreamLogLevel(logging::Logging::INFO);
-   logging::Logging::instance()->setFileLogLevel(logging::Logging::INFO);
+   //   logging::Logging::instance()->setStreamLogLevel(logging::Logging::INFO);
+   //   logging::Logging::instance()->includeLoggingToFile("LoadBalancing");
+   //   logging::Logging::instance()->setFileLogLevel(logging::Logging::DETAIL);
 
    WALBERLA_LOG_INFO_ON_ROOT( "config file: " << argv[1] );
    WALBERLA_LOG_INFO_ON_ROOT( "waLBerla Revision: " << WALBERLA_GIT_SHA1 );
@@ -184,7 +185,6 @@ int main( int argc, char ** argv )
    }
 
    auto domain = std::make_shared<domain::BlockForestDomain>(forest);
-   auto simulationDomain = forest->getDomain();
 
    WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - START ***");
 
@@ -205,7 +205,7 @@ int main( int argc, char ** argv )
                                             params.spacing))
       {
          WALBERLA_CHECK(iBlk.getAABB().contains(pt));
-         auto tmp = dot( (pt - center), Vec3(real_t(1), real_t(1), real_t(1)) );
+         auto tmp = dot( (pt - center), params.normal );
          if (tmp < 0)
          {
             createSphere(*ps, pt, params.radius, smallSphere);
@@ -216,27 +216,6 @@ int main( int argc, char ** argv )
    walberla::mpi::reduceInplace(numParticles, walberla::mpi::SUM);
    WALBERLA_LOG_INFO_ON_ROOT("#particles created: " << numParticles);
 
-   auto shift = (params.spacing - params.radius - params.radius) * real_t(0.5);
-   auto confiningDomain = simulationDomain.getExtended(shift);
-
-   //   if (!forest->isPeriodic(0))
-   //   {
-   //      createPlane(*ps, *ss, confiningDomain.minCorner(), Vec3(+1,0,0));
-   //      createPlane(*ps, *ss, confiningDomain.maxCorner(), Vec3(-1,0,0));
-   //   }
-
-   //   if (!forest->isPeriodic(1))
-   //   {
-   //      createPlane(*ps, *ss, confiningDomain.minCorner(), Vec3(0,+1,0));
-   //      createPlane(*ps, *ss, confiningDomain.maxCorner(), Vec3(0,-1,0));
-   //   }
-
-   //   if (!forest->isPeriodic(2))
-   //   {
-   //      createPlane(*ps, *ss, confiningDomain.minCorner(), Vec3(0,0,+1));
-   //      createPlane(*ps, *ss, confiningDomain.maxCorner(), Vec3(0,0,-1));
-   //   }
-
    WALBERLA_LOG_INFO_ON_ROOT("*** SETUP - END ***");
 
    WALBERLA_LOG_INFO_ON_ROOT("*** VTK ***");
@@ -264,224 +243,328 @@ int main( int argc, char ** argv )
    mpi::SyncNextNeighborsBlockForest     SNN;
 
    // initial sync
-   domain::createWithNeighborhood( accessor, *forest, *ic );
-   forest->refresh();
-   domain->refresh();
-   lc = std::make_shared<data::LinkedCells>(domain->getUnionOfLocalAABBs().getExtended(params.spacing), params.spacing );
    ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
-   WALBERLA_LOG_DEVEL_VAR(ps->size());
    SNN(*ps, forest, domain);
    sortParticleStorage(*ps, params.sorting, lc->domain_, uint_c(lc->numCellsPerDim_[0]));
 
-   for (int64_t outerIteration = 0; outerIteration < params.numOuterIterations; ++outerIteration)
+   WcTimer      timerImbalanced;
+   WcTimer      timerLoadBalancing;
+   WcTimer      timerBalanced;
+   WcTimingPool tpImbalanced;
+   WcTimingPool tpBalanced;
+   auto    SNNBytesSent     = SNN.getBytesSent();
+   auto    SNNBytesReceived = SNN.getBytesReceived();
+   auto    SNNSends         = SNN.getNumberOfSends();
+   auto    SNNReceives      = SNN.getNumberOfReceives();
+   auto    RPBytesSent      = RP.getBytesSent();
+   auto    RPBytesReceived  = RP.getBytesReceived();
+   auto    RPSends          = RP.getNumberOfSends();
+   auto    RPReceives       = RP.getNumberOfReceives();
+   int64_t contactsChecked  = 0;
+   int64_t contactsDetected = 0;
+   int64_t contactsTreated  = 0;
+
+   WALBERLA_LOG_DEVEL_ON_ROOT("running imbalanced simulation");
+   timerImbalanced.start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
    {
-      WALBERLA_LOG_DEVEL_ON_ROOT("*** RUNNING OUTER ITERATION " << outerIteration << " ***");
-
-      WcTimer      timer;
-      WcTimingPool tp;
-      auto    SNNBytesSent     = SNN.getBytesSent();
-      auto    SNNBytesReceived = SNN.getBytesReceived();
-      auto    SNNSends         = SNN.getNumberOfSends();
-      auto    SNNReceives      = SNN.getNumberOfReceives();
-      auto    RPBytesSent      = RP.getBytesSent();
-      auto    RPBytesReceived  = RP.getBytesReceived();
-      auto    RPSends          = RP.getNumberOfSends();
-      auto    RPReceives       = RP.getNumberOfReceives();
-      int64_t contactsChecked  = 0;
-      int64_t contactsDetected = 0;
-      int64_t contactsTreated  = 0;
+      //WALBERLA_LOG_DEVEL_ON_ROOT("timestep: " << i << " / " << params.simulationSteps );
+      //         if (i % params.visSpacing == 0)
+      //         {
+      //            vtkWriter->write();
+      //         }
+
+      tpImbalanced["AssocToBlock"].start();
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
       if (params.bBarrier) WALBERLA_MPI_BARRIER();
-      std::stringstream fout;
-      timer.start();
-      for (int64_t i=0; i < params.simulationSteps; ++i)
+      tpImbalanced["AssocToBlock"].end();
+
+      tpImbalanced["GenerateLinkedCells"].start();
+      lc->clear();
+      ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, *lc);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["GenerateLinkedCells"].end();
+
+      tpImbalanced["DEM"].start();
+      contactsChecked  = 0;
+      contactsDetected = 0;
+      contactsTreated  = 0;
+      lc->forEachParticlePairHalf(true,
+                                  kernel::SelectAll(),
+                                  accessor,
+                                  [&](const size_t idx1, const size_t idx2, auto& ac)
       {
-         fout.clear();
-         //WALBERLA_LOG_DEVEL_ON_ROOT("timestep: " << i << " / " << params.simulationSteps );
-         //         if (i % params.visSpacing == 0)
-         //         {
-         //            vtkWriter->write();
-         //         }
-
-         tp["AssocToBlock"].start();
-         ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
-         if (params.bBarrier) WALBERLA_MPI_BARRIER();
-         tp["AssocToBlock"].end();
-
-         tp["GenerateLinkedCells"].start();
-         lc->clear();
-         ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, *lc);
-         if (params.bBarrier) WALBERLA_MPI_BARRIER();
-         tp["GenerateLinkedCells"].end();
-
-         tp["DEM"].start();
-         contactsChecked  = 0;
-         contactsDetected = 0;
-         contactsTreated  = 0;
-         lc->forEachParticlePairHalf(true,
-                                     kernel::SelectAll(),
-                                     accessor,
-                                     [&](const size_t idx1, const size_t idx2, auto& ac)
+         ++contactsChecked;
+         if (double_cast(idx1, idx2, ac, acd, ac ))
          {
-            ++contactsChecked;
-            if (double_cast(idx1, idx2, ac, acd, ac ))
+            ++contactsDetected;
+            if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
             {
-               ++contactsDetected;
-               if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
-               {
-                  ++contactsTreated;
-                  dem(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
-               }
+               ++contactsTreated;
+               dem(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
             }
-         },
-         accessor );
-         if (params.bBarrier) WALBERLA_MPI_BARRIER();
-         tp["DEM"].end();
-
-         tp["ReduceForce"].start();
-         RP.operator()<ForceTorqueNotification>(*ps);
-         if (params.bBarrier) WALBERLA_MPI_BARRIER();
-         tp["ReduceForce"].end();
-
-         tp["Euler"].start();
-         //ps->forEachParticle(false, [&](const size_t idx){WALBERLA_CHECK_EQUAL(ps->getForce(idx), Vec3(0,0,0), *(*ps)[idx] << "\n" << idx);});
-         ps->forEachParticle(true, kernel::SelectLocal(), accessor, explicitEulerWithShape, accessor);
-         if (params.bBarrier) WALBERLA_MPI_BARRIER();
-         tp["Euler"].end();
-
-         tp["SNN"].start();
-         SNN(*ps, forest, domain);
-         if (params.bBarrier) WALBERLA_MPI_BARRIER();
-         tp["SNN"].end();
-      }
-      timer.end();
-
-      SNNBytesSent     = SNN.getBytesSent();
-      SNNBytesReceived = SNN.getBytesReceived();
-      SNNSends         = SNN.getNumberOfSends();
-      SNNReceives      = SNN.getNumberOfReceives();
-      RPBytesSent      = RP.getBytesSent();
-      RPBytesReceived  = RP.getBytesReceived();
-      RPSends          = RP.getNumberOfSends();
-      RPReceives       = RP.getNumberOfReceives();
-      walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
-      auto cC = walberla::mpi::reduce(contactsChecked, walberla::mpi::SUM);
-      auto cD = walberla::mpi::reduce(contactsDetected, walberla::mpi::SUM);
-      auto cT = walberla::mpi::reduce(contactsTreated, walberla::mpi::SUM);
-      WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
-      WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
-      WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
-      WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
-      WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
-
-      auto timer_reduced = walberla::timing::getReduced(timer, REDUCE_TOTAL, 0);
-      double PUpS = 0.0;
-      WALBERLA_ROOT_SECTION()
-      {
-         WALBERLA_LOG_INFO_ON_ROOT(*timer_reduced);
-         WALBERLA_LOG_INFO_ON_ROOT("runtime: " << timer_reduced->max());
-         PUpS = double_c(numParticles) * double_c(params.simulationSteps) / double_c(timer_reduced->max());
-         WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpS);
-      }
+         }
+      },
+      accessor );
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["DEM"].end();
 
-      auto tp_reduced = tp.getReduced();
-      WALBERLA_LOG_INFO_ON_ROOT(*tp_reduced);
-      WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - END ***");
+      tpImbalanced["ReduceForce"].start();
+      RP.operator()<ForceTorqueNotification>(*ps);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["ReduceForce"].end();
 
-      if (params.checkSimulation)
-      {
-         check(*ps, *forest, params.spacing);
-      }
+      tpImbalanced["Euler"].start();
+      //ps->forEachParticle(false, [&](const size_t idx){WALBERLA_CHECK_EQUAL(ps->getForce(idx), Vec3(0,0,0), *(*ps)[idx] << "\n" << idx);});
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, explicitEulerWithShape, accessor);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["Euler"].end();
+
+      tpImbalanced["SNN"].start();
+      SNN(*ps, forest, domain);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpImbalanced["SNN"].end();
+   }
+   timerImbalanced.end();
+   WALBERLA_MPI_BARRIER();
 
-      WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - START ***");
-      numParticles = 0;
-      int64_t numGhostParticles = 0;
-      ps->forEachParticle(false,
-                          kernel::SelectAll(),
-                          accessor,
-                          [&numParticles, &numGhostParticles](const size_t idx, auto& ac)
+   timerLoadBalancing.start();
+   if (bRebalance)
+   {
+      WALBERLA_LOG_DEVEL_ON_ROOT("running load balancing");
+      domain::createWithNeighborhood( accessor, *forest, *ic );
+      for (auto pIt = ps->begin(); pIt != ps->end(); )
       {
-         if (data::particle_flags::isSet( ac.getFlagsRef(idx), data::particle_flags::GHOST))
+         using namespace walberla::mesa_pd::data::particle_flags;
+         if (isSet(pIt->getFlags(), GHOST))
          {
-            ++numGhostParticles;
+            pIt = ps->erase(pIt);
          } else
          {
-            ++numParticles;
+            pIt->getGhostOwnersRef().clear();
+            ++pIt;
          }
-      },
-      accessor);
-      walberla::mpi::reduceInplace(numParticles, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(numGhostParticles, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(contactsChecked, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(contactsDetected, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(contactsTreated, walberla::mpi::SUM);
-      double linkedCellsVolume = lc->domain_.volume();
-      walberla::mpi::reduceInplace(linkedCellsVolume, walberla::mpi::SUM);
-      size_t numLinkedCells = lc->cells_.size();
-      walberla::mpi::reduceInplace(numLinkedCells, walberla::mpi::SUM);
-      size_t local_aabbs         = domain->getNumLocalAABBs();
-      size_t neighbor_subdomains = domain->getNumNeighborSubdomains();
-      size_t neighbor_processes  = domain->getNumNeighborProcesses();
-      walberla::mpi::reduceInplace(local_aabbs, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(neighbor_subdomains, walberla::mpi::SUM);
-      walberla::mpi::reduceInplace(neighbor_processes, walberla::mpi::SUM);
-
-      WALBERLA_LOG_DEVEL_VAR_ON_ROOT(numParticles);
-      WALBERLA_LOG_DEVEL_VAR_ON_ROOT(numGhostParticles);
-
-      uint_t runId = uint_c(-1);
-      WALBERLA_ROOT_SECTION()
-      {
-         stringProperties["walberla_git"]         = WALBERLA_GIT_SHA1;
-         stringProperties["tag"]                  = "mesa_pd";
-         integerProperties["mpi_num_processes"]   = mpiManager->numProcesses();
-         integerProperties["omp_max_threads"]     = omp_get_max_threads();
-         realProperties["PUpS"]                   = double_c(PUpS);
-         realProperties["timer_min"]              = timer_reduced->min();
-         realProperties["timer_max"]              = timer_reduced->max();
-         realProperties["timer_average"]          = timer_reduced->average();
-         realProperties["timer_total"]            = timer_reduced->total();
-         integerProperties["outerIteration"]      = int64_c(outerIteration);
-         integerProperties["num_particles"]       = numParticles;
-         integerProperties["num_ghost_particles"] = numGhostParticles;
-         integerProperties["contacts_checked"]    = contactsChecked;
-         integerProperties["contacts_detected"]   = contactsDetected;
-         integerProperties["contacts_treated"]    = contactsTreated;
-         integerProperties["local_aabbs"]         = int64_c(local_aabbs);
-         integerProperties["neighbor_subdomains"] = int64_c(neighbor_subdomains);
-         integerProperties["neighbor_processes"]  = int64_c(neighbor_processes);
-         integerProperties["SNNBytesSent"]        = SNNBytesSent;
-         integerProperties["SNNBytesReceived"]    = SNNBytesReceived;
-         integerProperties["SNNSends"]            = SNNSends;
-         integerProperties["SNNReceives"]         = SNNReceives;
-         integerProperties["RPBytesSent"]         = RPBytesSent;
-         integerProperties["RPBytesReceived"]     = RPBytesReceived;
-         integerProperties["RPSends"]             = RPSends;
-         integerProperties["RPReceives"]          = RPReceives;
-         realProperties["linkedCellsVolume"]      = linkedCellsVolume;
-         integerProperties["numLinkedCells"]      = int64_c(numLinkedCells);
-
-         addBuildInfoToSQL( integerProperties, realProperties, stringProperties );
-         saveToSQL(params, integerProperties, realProperties, stringProperties );
-         addDomainPropertiesToSQL(*forest, integerProperties, realProperties, stringProperties);
-         addSlurmPropertiesToSQL(integerProperties, realProperties, stringProperties);
-
-         runId = postprocessing::storeRunInSqliteDB( params.sqlFile, integerProperties, stringProperties, realProperties );
-         postprocessing::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tp_reduced, "Timeloop" );
       }
+      forest->refresh();
+      domain->refresh();
+      lc = std::make_shared<data::LinkedCells>(domain->getUnionOfLocalAABBs().getExtended(params.spacing), params.spacing );
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+      WALBERLA_LOG_DEVEL_VAR(ps->size());
+      SNN(*ps, forest, domain);
+      sortParticleStorage(*ps, params.sorting, lc->domain_, uint_c(lc->numCellsPerDim_[0]));
+   }
+   timerLoadBalancing.end();
+
+   WALBERLA_MPI_BARRIER();
+   WALBERLA_LOG_DEVEL_ON_ROOT("running balanced simulation");
+   timerBalanced.start();
+   for (int64_t i=0; i < params.simulationSteps; ++i)
+   {
+      //WALBERLA_LOG_DEVEL_ON_ROOT("timestep: " << i << " / " << params.simulationSteps );
+      //         if (i % params.visSpacing == 0)
+      //         {
+      //            vtkWriter->write();
+      //         }
+
+      tpBalanced["AssocToBlock"].start();
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, assoc, accessor);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["AssocToBlock"].end();
+
+      tpBalanced["GenerateLinkedCells"].start();
+      lc->clear();
+      ps->forEachParticle(true, kernel::SelectAll(), accessor, ipilc, accessor, *lc);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["GenerateLinkedCells"].end();
+
+      tpBalanced["DEM"].start();
+      contactsChecked  = 0;
+      contactsDetected = 0;
+      contactsTreated  = 0;
+      lc->forEachParticlePairHalf(true,
+                                  kernel::SelectAll(),
+                                  accessor,
+                                  [&](const size_t idx1, const size_t idx2, auto& ac)
+      {
+         ++contactsChecked;
+         if (double_cast(idx1, idx2, ac, acd, ac ))
+         {
+            ++contactsDetected;
+            if (contact_filter(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), *domain))
+            {
+               ++contactsTreated;
+               dem(acd.getIdx1(), acd.getIdx2(), ac, acd.getContactPoint(), acd.getContactNormal(), acd.getPenetrationDepth());
+            }
+         }
+      },
+      accessor );
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["DEM"].end();
+
+      tpBalanced["ReduceForce"].start();
+      RP.operator()<ForceTorqueNotification>(*ps);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["ReduceForce"].end();
+
+      tpBalanced["Euler"].start();
+      //ps->forEachParticle(false, [&](const size_t idx){WALBERLA_CHECK_EQUAL(ps->getForce(idx), Vec3(0,0,0), *(*ps)[idx] << "\n" << idx);});
+      ps->forEachParticle(true, kernel::SelectLocal(), accessor, explicitEulerWithShape, accessor);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["Euler"].end();
+
+      tpBalanced["SNN"].start();
+      SNN(*ps, forest, domain);
+      if (params.bBarrier) WALBERLA_MPI_BARRIER();
+      tpBalanced["SNN"].end();
+   }
+   timerBalanced.end();
+
+   SNNBytesSent     = SNN.getBytesSent();
+   SNNBytesReceived = SNN.getBytesReceived();
+   SNNSends         = SNN.getNumberOfSends();
+   SNNReceives      = SNN.getNumberOfReceives();
+   RPBytesSent      = RP.getBytesSent();
+   RPBytesReceived  = RP.getBytesReceived();
+   RPSends          = RP.getNumberOfSends();
+   RPReceives       = RP.getNumberOfReceives();
+   walberla::mpi::reduceInplace(SNNBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(SNNReceives, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesSent, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPBytesReceived, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPSends, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(RPReceives, walberla::mpi::SUM);
+   auto cC = walberla::mpi::reduce(contactsChecked, walberla::mpi::SUM);
+   auto cD = walberla::mpi::reduce(contactsDetected, walberla::mpi::SUM);
+   auto cT = walberla::mpi::reduce(contactsTreated, walberla::mpi::SUM);
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN bytes communicated:   " << SNNBytesSent << " / " << SNNBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "SNN communication partners: " << SNNSends << " / " << SNNReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP bytes communicated:  " << RPBytesSent << " / " << RPBytesReceived );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "RP communication partners: " << RPSends << " / " << RPReceives );
+   WALBERLA_LOG_DEVEL_ON_ROOT( "contacts checked/detected/treated: " << cC << " / " << cD << " / " << cT );
+
+   auto timerImbalancedReduced = walberla::timing::getReduced(timerImbalanced, REDUCE_TOTAL, 0);
+   double PUpSImbalanced = 0.0;
+   WALBERLA_ROOT_SECTION()
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(*timerImbalancedReduced);
+      PUpSImbalanced = double_c(numParticles) * double_c(params.simulationSteps) / double_c(timerImbalancedReduced->max());
+      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpSImbalanced);
+   }
+
+   auto timerBalancedReduced = walberla::timing::getReduced(timerBalanced, REDUCE_TOTAL, 0);
+   double PUpSBalanced = 0.0;
+   WALBERLA_ROOT_SECTION()
+   {
+      WALBERLA_LOG_INFO_ON_ROOT(*timerBalancedReduced);
+      PUpSBalanced = double_c(numParticles) * double_c(params.simulationSteps) / double_c(timerBalancedReduced->max());
+      WALBERLA_LOG_INFO_ON_ROOT("PUpS: " << PUpSBalanced);
+   }
+
+   auto timerLoadBalancingReduced = walberla::timing::getReduced(timerLoadBalancing, REDUCE_TOTAL, 0);
+
+   auto tpImbalancedReduced = tpImbalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpImbalancedReduced);
+
+   auto tpBalancedReduced = tpBalanced.getReduced();
+   WALBERLA_LOG_INFO_ON_ROOT(*tpBalancedReduced);
+   WALBERLA_LOG_INFO_ON_ROOT("*** SIMULATION - END ***");
+
+   if (params.checkSimulation)
+   {
+      check(*ps, *forest, params.spacing);
+   }
 
-      if (params.storeNodeTimings)
+   WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - START ***");
+   numParticles = 0;
+   int64_t numGhostParticles = 0;
+   ps->forEachParticle(false,
+                       kernel::SelectAll(),
+                       accessor,
+                       [&numParticles, &numGhostParticles](const size_t idx, auto& ac)
+   {
+      if (data::particle_flags::isSet( ac.getFlagsRef(idx), data::particle_flags::GHOST))
+      {
+         ++numGhostParticles;
+      } else
       {
-         storeNodeTimings(runId, params.sqlFile, "NodeTiming", tp);
+         ++numParticles;
       }
-      WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - END ***");
+   },
+   accessor);
+   walberla::mpi::reduceInplace(numParticles, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(numGhostParticles, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(contactsChecked, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(contactsDetected, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(contactsTreated, walberla::mpi::SUM);
+   double linkedCellsVolume = lc->domain_.volume();
+   walberla::mpi::reduceInplace(linkedCellsVolume, walberla::mpi::SUM);
+   size_t numLinkedCells = lc->cells_.size();
+   walberla::mpi::reduceInplace(numLinkedCells, walberla::mpi::SUM);
+   size_t local_aabbs         = domain->getNumLocalAABBs();
+   size_t neighbor_subdomains = domain->getNumNeighborSubdomains();
+   size_t neighbor_processes  = domain->getNumNeighborProcesses();
+   walberla::mpi::reduceInplace(local_aabbs, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(neighbor_subdomains, walberla::mpi::SUM);
+   walberla::mpi::reduceInplace(neighbor_processes, walberla::mpi::SUM);
+
+   uint_t runId = uint_c(-1);
+   WALBERLA_ROOT_SECTION()
+   {
+      stringProperties["walberla_git"]         = WALBERLA_GIT_SHA1;
+      stringProperties["tag"]                  = "mesa_pd";
+      integerProperties["mpi_num_processes"]   = mpiManager->numProcesses();
+      integerProperties["omp_max_threads"]     = omp_get_max_threads();
+      realProperties["imbalanced_PUpS"]          = double_c(PUpSImbalanced);
+      realProperties["imbalanced_timer_min"]     = timerImbalancedReduced->min();
+      realProperties["imbalanced_timer_max"]     = timerImbalancedReduced->max();
+      realProperties["imbalanced_timer_average"] = timerImbalancedReduced->average();
+      realProperties["imbalanced_timer_total"]   = timerImbalancedReduced->total();
+      realProperties["loadbalancing_timer_min"]     = timerLoadBalancingReduced->min();
+      realProperties["loadbalancing_timer_max"]     = timerLoadBalancingReduced->max();
+      realProperties["loadbalancing_timer_average"] = timerLoadBalancingReduced->average();
+      realProperties["loadbalancing_timer_total"]   = timerLoadBalancingReduced->total();
+      realProperties["balanced_PUpS"]          = double_c(PUpSBalanced);
+      realProperties["balanced_timer_min"]     = timerBalancedReduced->min();
+      realProperties["balanced_timer_max"]     = timerBalancedReduced->max();
+      realProperties["balanced_timer_average"] = timerBalancedReduced->average();
+      realProperties["balanced_timer_total"]   = timerBalancedReduced->total();
+      integerProperties["num_particles"]       = numParticles;
+      integerProperties["num_ghost_particles"] = numGhostParticles;
+      integerProperties["contacts_checked"]    = contactsChecked;
+      integerProperties["contacts_detected"]   = contactsDetected;
+      integerProperties["contacts_treated"]    = contactsTreated;
+      integerProperties["local_aabbs"]         = int64_c(local_aabbs);
+      integerProperties["neighbor_subdomains"] = int64_c(neighbor_subdomains);
+      integerProperties["neighbor_processes"]  = int64_c(neighbor_processes);
+      integerProperties["SNNBytesSent"]        = SNNBytesSent;
+      integerProperties["SNNBytesReceived"]    = SNNBytesReceived;
+      integerProperties["SNNSends"]            = SNNSends;
+      integerProperties["SNNReceives"]         = SNNReceives;
+      integerProperties["RPBytesSent"]         = RPBytesSent;
+      integerProperties["RPBytesReceived"]     = RPBytesReceived;
+      integerProperties["RPSends"]             = RPSends;
+      integerProperties["RPReceives"]          = RPReceives;
+      realProperties["linkedCellsVolume"]      = linkedCellsVolume;
+      integerProperties["numLinkedCells"]      = int64_c(numLinkedCells);
+
+      addBuildInfoToSQL( integerProperties, realProperties, stringProperties );
+      saveToSQL(params, integerProperties, realProperties, stringProperties );
+      addDomainPropertiesToSQL(*forest, integerProperties, realProperties, stringProperties);
+      addSlurmPropertiesToSQL(integerProperties, realProperties, stringProperties);
+
+      runId = postprocessing::storeRunInSqliteDB( params.sqlFile, integerProperties, stringProperties, realProperties );
+      postprocessing::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpImbalancedReduced, "imbalanced" );
+      postprocessing::storeTimingPoolInSqliteDB( params.sqlFile, runId, *tpImbalancedReduced, "balanced" );
+   }
+
+   if (params.storeNodeTimings)
+   {
+      storeNodeTimings(runId, params.sqlFile, "NodeTimingImbalanced", tpImbalanced);
+      storeNodeTimings(runId, params.sqlFile, "NodeTimingBalanced", tpBalanced);
    }
+   WALBERLA_LOG_INFO_ON_ROOT("*** SQL OUTPUT - END ***");
 
    return EXIT_SUCCESS;
 }
diff --git a/apps/benchmarks/GranularGas/PE_GranularGas.cpp b/apps/benchmarks/GranularGas/PE_GranularGas.cpp
index e235f348f..96ded8e71 100644
--- a/apps/benchmarks/GranularGas/PE_GranularGas.cpp
+++ b/apps/benchmarks/GranularGas/PE_GranularGas.cpp
@@ -90,7 +90,7 @@ int main( int argc, char ** argv )
    auto cfg = env.config();
    if (cfg == nullptr) WALBERLA_ABORT("No config specified!");
    const Config::BlockHandle mainConf  = cfg->getBlock( "GranularGas" );
-   Parameters params;
+   mesa_pd::Parameters params;
    loadFromConfig(params, mainConf);
 
    WALBERLA_LOG_INFO_ON_ROOT("*** GLOBALBODYSTORAGE ***");
diff --git a/apps/benchmarks/GranularGas/Parameters.cpp b/apps/benchmarks/GranularGas/Parameters.cpp
index 2215898ba..228f9e33e 100644
--- a/apps/benchmarks/GranularGas/Parameters.cpp
+++ b/apps/benchmarks/GranularGas/Parameters.cpp
@@ -29,12 +29,16 @@
 #include <core/logging/Logging.h>
 
 namespace walberla {
+namespace mesa_pd {
 
 void loadFromConfig(Parameters& params, const Config::BlockHandle& cfg)
 {
    params.sorting = cfg.getParameter<std::string>("sorting", "none" );
    WALBERLA_LOG_INFO_ON_ROOT("sorting: " << params.sorting);
    
+   params.normal = cfg.getParameter<Vec3>("normal", Vec3(real_t(1.0), real_t(1.0), real_t(1.0)) );
+   WALBERLA_LOG_INFO_ON_ROOT("normal: " << params.normal);
+   
    params.spacing = cfg.getParameter<real_t>("spacing", real_t(1.0) );
    WALBERLA_LOG_INFO_ON_ROOT("spacing: " << params.spacing);
    
@@ -107,6 +111,7 @@ void saveToSQL(const Parameters& params,
 {
    stringProperties["sorting"] = params.sorting;
    
+   
    realProperties["spacing"] = double_c(params.spacing);
    
    realProperties["radius"] = double_c(params.radius);
@@ -145,4 +150,5 @@ void saveToSQL(const Parameters& params,
    
 }
 
+} //namespace mesa_pd
 } //namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GranularGas/Parameters.h b/apps/benchmarks/GranularGas/Parameters.h
index 31ce0cb12..c765ab202 100644
--- a/apps/benchmarks/GranularGas/Parameters.h
+++ b/apps/benchmarks/GranularGas/Parameters.h
@@ -27,15 +27,17 @@
 #pragma once
 
 #include <core/config/Config.h>
-#include <core/DataTypes.h>
+#include <mesa_pd/data/DataTypes.h>
 
 #include <string>
 
 namespace walberla {
+namespace mesa_pd {
 
 struct Parameters
 {
    std::string sorting = "none";
+   Vec3 normal = Vec3(real_t(1.0), real_t(1.0), real_t(1.0));
    real_t spacing = real_t(1.0);
    real_t radius = real_t(0.5);
    bool bBarrier = false;
@@ -67,4 +69,5 @@ void saveToSQL(const Parameters& params,
                std::map< std::string, double >&            realProperties,
                std::map< std::string, std::string >&       stringProperties );
 
+} //namespace mesa_pd
 } //namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/GranularGas/Parameters.templ.cpp b/apps/benchmarks/GranularGas/Parameters.templ.cpp
index 8f9be9811..c8aae96b3 100644
--- a/apps/benchmarks/GranularGas/Parameters.templ.cpp
+++ b/apps/benchmarks/GranularGas/Parameters.templ.cpp
@@ -29,6 +29,7 @@
 #include <core/logging/Logging.h>
 
 namespace walberla {
+namespace mesa_pd {
 
 void loadFromConfig(Parameters& params, const Config::BlockHandle& cfg)
 {
@@ -56,4 +57,5 @@ void saveToSQL(const Parameters& params,
    {% endfor %}
 }
 
+} //namespace mesa_pd
 } //namespace walberla
diff --git a/apps/benchmarks/GranularGas/Parameters.templ.h b/apps/benchmarks/GranularGas/Parameters.templ.h
index 29b18935c..6f0200412 100644
--- a/apps/benchmarks/GranularGas/Parameters.templ.h
+++ b/apps/benchmarks/GranularGas/Parameters.templ.h
@@ -27,11 +27,12 @@
 #pragma once
 
 #include <core/config/Config.h>
-#include <core/DataTypes.h>
+#include <mesa_pd/data/DataTypes.h>
 
 #include <string>
 
 namespace walberla {
+namespace mesa_pd {
 
 struct Parameters
 {
@@ -48,4 +49,5 @@ void saveToSQL(const Parameters& params,
                std::map< std::string, double >&            realProperties,
                std::map< std::string, std::string >&       stringProperties );
 
+} //namespace mesa_pd
 } //namespace walberla
diff --git a/apps/benchmarks/GranularGas/generateConfig.py b/apps/benchmarks/GranularGas/generateConfig.py
index 04fdf2122..c5e777106 100755
--- a/apps/benchmarks/GranularGas/generateConfig.py
+++ b/apps/benchmarks/GranularGas/generateConfig.py
@@ -5,6 +5,7 @@ from ConfigGenerator import Config
 
 cfg = Config()
 cfg.addParameter("sorting",                "std::string", '"none"')
+cfg.addParameter("normal",                 "Vec3",        "Vec3(real_t(1.0), real_t(1.0), real_t(1.0))")
 cfg.addParameter("spacing",                "real_t",      "real_t(1.0)")
 cfg.addParameter("radius",                 "real_t",      "real_t(0.5)")
 cfg.addParameter("bBarrier",               "bool",        "false")
@@ -28,5 +29,4 @@ cfg.addParameter("metisAlgorithm",         "std::string", '"PART_GEOM_KWAY"' );
 cfg.addParameter("metisWeightsToUse",      "std::string", '"BOTH_WEIGHTS"' );
 cfg.addParameter("metisEdgeSource",        "std::string", '"EDGES_FROM_EDGE_WEIGHTS"' );
 
-
 cfg.generate()
-- 
GitLab