From 7e7a676b71fdc8511b10d7ac46268c912c2c8b31 Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Mon, 21 Oct 2019 17:15:21 +0200
Subject: [PATCH] Added manual kernel to generated benchmark for comparison

---
 .../UniformGridGenerated/ManualKernels.h      | 60 +++++++++++++++++++
 .../UniformGridGenerated/UniformGrid.prm      | 11 ++--
 .../UniformGridGenerated.cpp                  | 22 ++++++-
 3 files changed, 86 insertions(+), 7 deletions(-)
 create mode 100644 apps/benchmarks/UniformGridGenerated/ManualKernels.h

diff --git a/apps/benchmarks/UniformGridGenerated/ManualKernels.h b/apps/benchmarks/UniformGridGenerated/ManualKernels.h
new file mode 100644
index 000000000..72bbf8fbf
--- /dev/null
+++ b/apps/benchmarks/UniformGridGenerated/ManualKernels.h
@@ -0,0 +1,60 @@
+#include "domain_decomposition/BlockDataID.h"
+#include "domain_decomposition/IBlock.h"
+#include "field/GhostLayerField.h"
+#include "stencil/Directions.h"
+
+
+namespace walberla {
+
+template<typename LM>
+class StreamPullCollideGeneric
+{
+public:
+    StreamPullCollideGeneric(BlockDataID src, BlockDataID dst, real_t omega)
+        : src_(src), dst_(dst), omega_(omega)
+    {}
+
+    void operator()(IBlock * block)
+    {
+        using PdfField_T = GhostLayerField<real_t, LM::Stencil::Q>;
+        auto src = block->getData<PdfField_T>( src_ );
+        auto dst = block->getData<PdfField_T>( dst_ );
+
+        WALBERLA_FOR_ALL_CELLS_XYZ(src, {
+                    // Compute conserved moments
+                    Vector3<real_t> u(0);
+                    real_t rho = 0;
+                    using stencil::cx;
+                    using stencil::cy;
+                    using stencil::cz;
+                    for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d )
+                    {
+                        const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx());
+                        u[0] += pdf * d.cx();
+                        u[1] += pdf * d.cy();
+                        u[2] += pdf * d.cz();
+                        rho += pdf;
+                    }
+
+                    // collide
+                    const real_t vel_sqr = u.sqrLength() * 1.5;
+                    for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d )
+                    {
+                        const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx());
+                        const real_t vel = d.cx()*u[0] + d.cy()*u[1] + d.cz()*u[2];
+                        dst->get(x, y, z, d.toIdx()) = ( 1.0 - omega_ ) * pdf +
+                                                       omega_   * LM::w[ d.toIdx() ] * ( rho - vel_sqr + 3.0*vel + 4.5*vel*vel );
+                    }
+                });
+        src->swapDataPointers(*dst);
+    }
+
+private:
+    BlockDataID src_;
+    BlockDataID dst_;
+    real_t omega_;
+};
+
+
+
+} // namespace walberla
\ No newline at end of file
diff --git a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm
index aae6bc2c0..48fa74339 100644
--- a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm
+++ b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm
@@ -1,8 +1,8 @@
 DomainSetup
 {
    blocks        <  1,    1,   1 >;
-   cellsPerBlock <  128, 128, 128 >;
-   periodic      <  0,    0,   0 >;
+   cellsPerBlock <  64, 64, 64 >;
+   periodic      <  1,   1,   1 >;
 }
 
 Parameters 
@@ -13,11 +13,12 @@ Parameters
     outerIterations 1;      // how many measurements to conduct
 
 	vtkWriteFrequency 100;           // write a VTK file every n'th step, if zero VTK output is disabled
-	timeStepMode aa;                 // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
+	timeStepMode twoField;
+	//twoFieldKernelType manual_generic;
 	remainingTimeLoggerFrequency 0;  // interval in seconds to log the estimated remaining time
-    directComm 1;
+    directComm 0;
 
 	omega 1.8;
-	shearVelocityMagnitude 0;
+	shearVelocityMagnitude 0.05;
 	useGui 0;
 }
diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
index f3b620a5b..13d261a42 100644
--- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
+++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
@@ -16,6 +16,8 @@
 #include "domain_decomposition/SharedSweep.h"
 #include "gui/Gui.h"
 #include "InitShearVelocity.h"
+#include "ManualKernels.h"
+#include "lbm/lattice_model/D3Q19.h"
 
 #include "GenDefines.h"
 #include "GenMacroGetter.h"
@@ -32,6 +34,7 @@
 #include "GenMpiDtypeInfoAAPull.h"
 #include "GenMpiDtypeInfoAAPush.h"
 
+
 #include <iomanip>
 
 using namespace walberla;
@@ -102,14 +105,27 @@ int main( int argc, char **argv )
       blockforest::communication::UniformDirectScheme< Stencil_T > aaPushCommDirect(blocks);
       aaPushCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPush>(pdfFieldId));
 
+
+      const std::string twoFieldKernelType = parameters.getParameter<std::string>( "twoFieldKernelType", "generated");
+      std::function<void(IBlock*)> twoFieldKernel;
+      if( twoFieldKernelType == "generated") {
+          twoFieldKernel = pystencils::GenLbKernel(pdfFieldId, omega);
+      } else if (twoFieldKernelType == "manual_generic") {
+          using MyLM = lbm::D3Q19<lbm::collision_model::SRT>;
+          BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
+          twoFieldKernel = StreamPullCollideGeneric<MyLM>(pdfFieldId, tmpPdfFieldId, omega);
+      } else if (twoFieldKernelType == "manual_d3q19") {
+
+      }
+
       using F = std::function<void()>;
       SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
       if( timeStepMode == "twoField")
       {
           timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
-                         << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
+                         << Sweep( twoFieldKernel, "LB stream & collide1" );
           timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
-                         << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
+                         << Sweep( twoFieldKernel, "LB stream & collide2" );
 
       } else if ( timeStepMode == "twoFieldKernelOnly") {
           timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
@@ -194,6 +210,8 @@ int main( int argc, char **argv )
                       pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess );
                       pythonCallbackResults.data().exposeValue( "stencil", infoStencil );
                       pythonCallbackResults.data().exposeValue( "configName", infoConfigName );
+                      pythonCallbackResults.data().exposeValue( "timeStepMode", timeStepMode );
+                      pythonCallbackResults.data().exposeValue( "twoFieldKernel", twoFieldKernelType );
                       pythonCallbackResults.data().exposeValue( "optimizations", optimizationDict );
                       pythonCallbackResults.data().exposeValue( "githash", core::buildinfo::gitSHA1() );
                       pythonCallbackResults.data().exposeValue( "compilerFlags", core::buildinfo::compilerFlags() );
-- 
GitLab