From 7e7a676b71fdc8511b10d7ac46268c912c2c8b31 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Mon, 21 Oct 2019 17:15:21 +0200 Subject: [PATCH] Added manual kernel to generated benchmark for comparison --- .../UniformGridGenerated/ManualKernels.h | 60 +++++++++++++++++++ .../UniformGridGenerated/UniformGrid.prm | 11 ++-- .../UniformGridGenerated.cpp | 22 ++++++- 3 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 apps/benchmarks/UniformGridGenerated/ManualKernels.h diff --git a/apps/benchmarks/UniformGridGenerated/ManualKernels.h b/apps/benchmarks/UniformGridGenerated/ManualKernels.h new file mode 100644 index 000000000..72bbf8fbf --- /dev/null +++ b/apps/benchmarks/UniformGridGenerated/ManualKernels.h @@ -0,0 +1,60 @@ +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "field/GhostLayerField.h" +#include "stencil/Directions.h" + + +namespace walberla { + +template<typename LM> +class StreamPullCollideGeneric +{ +public: + StreamPullCollideGeneric(BlockDataID src, BlockDataID dst, real_t omega) + : src_(src), dst_(dst), omega_(omega) + {} + + void operator()(IBlock * block) + { + using PdfField_T = GhostLayerField<real_t, LM::Stencil::Q>; + auto src = block->getData<PdfField_T>( src_ ); + auto dst = block->getData<PdfField_T>( dst_ ); + + WALBERLA_FOR_ALL_CELLS_XYZ(src, { + // Compute conserved moments + Vector3<real_t> u(0); + real_t rho = 0; + using stencil::cx; + using stencil::cy; + using stencil::cz; + for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d ) + { + const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx()); + u[0] += pdf * d.cx(); + u[1] += pdf * d.cy(); + u[2] += pdf * d.cz(); + rho += pdf; + } + + // collide + const real_t vel_sqr = u.sqrLength() * 1.5; + for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d ) + { + const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx()); + const real_t vel = d.cx()*u[0] + d.cy()*u[1] + d.cz()*u[2]; + dst->get(x, y, z, d.toIdx()) = ( 1.0 - omega_ ) * pdf + + omega_ * LM::w[ d.toIdx() ] * ( rho - vel_sqr + 3.0*vel + 4.5*vel*vel ); + } + }); + src->swapDataPointers(*dst); + } + +private: + BlockDataID src_; + BlockDataID dst_; + real_t omega_; +}; + + + +} // namespace walberla \ No newline at end of file diff --git a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm index aae6bc2c0..48fa74339 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm +++ b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm @@ -1,8 +1,8 @@ DomainSetup { blocks < 1, 1, 1 >; - cellsPerBlock < 128, 128, 128 >; - periodic < 0, 0, 0 >; + cellsPerBlock < 64, 64, 64 >; + periodic < 1, 1, 1 >; } Parameters @@ -13,11 +13,12 @@ Parameters outerIterations 1; // how many measurements to conduct vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled - timeStepMode aa; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly + timeStepMode twoField; + //twoFieldKernelType manual_generic; remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time - directComm 1; + directComm 0; omega 1.8; - shearVelocityMagnitude 0; + shearVelocityMagnitude 0.05; useGui 0; } diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp index f3b620a5b..13d261a42 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp @@ -16,6 +16,8 @@ #include "domain_decomposition/SharedSweep.h" #include "gui/Gui.h" #include "InitShearVelocity.h" +#include "ManualKernels.h" +#include "lbm/lattice_model/D3Q19.h" #include "GenDefines.h" #include "GenMacroGetter.h" @@ -32,6 +34,7 @@ #include "GenMpiDtypeInfoAAPull.h" #include "GenMpiDtypeInfoAAPush.h" + #include <iomanip> using namespace walberla; @@ -102,14 +105,27 @@ int main( int argc, char **argv ) blockforest::communication::UniformDirectScheme< Stencil_T > aaPushCommDirect(blocks); aaPushCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPush>(pdfFieldId)); + + const std::string twoFieldKernelType = parameters.getParameter<std::string>( "twoFieldKernelType", "generated"); + std::function<void(IBlock*)> twoFieldKernel; + if( twoFieldKernelType == "generated") { + twoFieldKernel = pystencils::GenLbKernel(pdfFieldId, omega); + } else if (twoFieldKernelType == "manual_generic") { + using MyLM = lbm::D3Q19<lbm::collision_model::SRT>; + BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs"); + twoFieldKernel = StreamPullCollideGeneric<MyLM>(pdfFieldId, tmpPdfFieldId, omega); + } else if (twoFieldKernelType == "manual_d3q19") { + + } + using F = std::function<void()>; SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 ); if( timeStepMode == "twoField") { timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" ) - << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" ); + << Sweep( twoFieldKernel, "LB stream & collide1" ); timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" ) - << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" ); + << Sweep( twoFieldKernel, "LB stream & collide2" ); } else if ( timeStepMode == "twoFieldKernelOnly") { timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" ); @@ -194,6 +210,8 @@ int main( int argc, char **argv ) pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess ); pythonCallbackResults.data().exposeValue( "stencil", infoStencil ); pythonCallbackResults.data().exposeValue( "configName", infoConfigName ); + pythonCallbackResults.data().exposeValue( "timeStepMode", timeStepMode ); + pythonCallbackResults.data().exposeValue( "twoFieldKernel", twoFieldKernelType ); pythonCallbackResults.data().exposeValue( "optimizations", optimizationDict ); pythonCallbackResults.data().exposeValue( "githash", core::buildinfo::gitSHA1() ); pythonCallbackResults.data().exposeValue( "compilerFlags", core::buildinfo::compilerFlags() ); -- GitLab