Skip to content
Snippets Groups Projects
Commit 7e7a676b authored by Martin Bauer's avatar Martin Bauer
Browse files

Added manual kernel to generated benchmark for comparison

parent 94387196
Branches
Tags
No related merge requests found
#include "domain_decomposition/BlockDataID.h"
#include "domain_decomposition/IBlock.h"
#include "field/GhostLayerField.h"
#include "stencil/Directions.h"
namespace walberla {
template<typename LM>
class StreamPullCollideGeneric
{
public:
StreamPullCollideGeneric(BlockDataID src, BlockDataID dst, real_t omega)
: src_(src), dst_(dst), omega_(omega)
{}
void operator()(IBlock * block)
{
using PdfField_T = GhostLayerField<real_t, LM::Stencil::Q>;
auto src = block->getData<PdfField_T>( src_ );
auto dst = block->getData<PdfField_T>( dst_ );
WALBERLA_FOR_ALL_CELLS_XYZ(src, {
// Compute conserved moments
Vector3<real_t> u(0);
real_t rho = 0;
using stencil::cx;
using stencil::cy;
using stencil::cz;
for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d )
{
const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx());
u[0] += pdf * d.cx();
u[1] += pdf * d.cy();
u[2] += pdf * d.cz();
rho += pdf;
}
// collide
const real_t vel_sqr = u.sqrLength() * 1.5;
for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d )
{
const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx());
const real_t vel = d.cx()*u[0] + d.cy()*u[1] + d.cz()*u[2];
dst->get(x, y, z, d.toIdx()) = ( 1.0 - omega_ ) * pdf +
omega_ * LM::w[ d.toIdx() ] * ( rho - vel_sqr + 3.0*vel + 4.5*vel*vel );
}
});
src->swapDataPointers(*dst);
}
private:
BlockDataID src_;
BlockDataID dst_;
real_t omega_;
};
} // namespace walberla
\ No newline at end of file
DomainSetup DomainSetup
{ {
blocks < 1, 1, 1 >; blocks < 1, 1, 1 >;
cellsPerBlock < 128, 128, 128 >; cellsPerBlock < 64, 64, 64 >;
periodic < 0, 0, 0 >; periodic < 1, 1, 1 >;
} }
Parameters Parameters
...@@ -13,11 +13,12 @@ Parameters ...@@ -13,11 +13,12 @@ Parameters
outerIterations 1; // how many measurements to conduct outerIterations 1; // how many measurements to conduct
vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled
timeStepMode aa; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly timeStepMode twoField;
//twoFieldKernelType manual_generic;
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
directComm 1; directComm 0;
omega 1.8; omega 1.8;
shearVelocityMagnitude 0; shearVelocityMagnitude 0.05;
useGui 0; useGui 0;
} }
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#include "domain_decomposition/SharedSweep.h" #include "domain_decomposition/SharedSweep.h"
#include "gui/Gui.h" #include "gui/Gui.h"
#include "InitShearVelocity.h" #include "InitShearVelocity.h"
#include "ManualKernels.h"
#include "lbm/lattice_model/D3Q19.h"
#include "GenDefines.h" #include "GenDefines.h"
#include "GenMacroGetter.h" #include "GenMacroGetter.h"
...@@ -32,6 +34,7 @@ ...@@ -32,6 +34,7 @@
#include "GenMpiDtypeInfoAAPull.h" #include "GenMpiDtypeInfoAAPull.h"
#include "GenMpiDtypeInfoAAPush.h" #include "GenMpiDtypeInfoAAPush.h"
#include <iomanip> #include <iomanip>
using namespace walberla; using namespace walberla;
...@@ -102,14 +105,27 @@ int main( int argc, char **argv ) ...@@ -102,14 +105,27 @@ int main( int argc, char **argv )
blockforest::communication::UniformDirectScheme< Stencil_T > aaPushCommDirect(blocks); blockforest::communication::UniformDirectScheme< Stencil_T > aaPushCommDirect(blocks);
aaPushCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPush>(pdfFieldId)); aaPushCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPush>(pdfFieldId));
const std::string twoFieldKernelType = parameters.getParameter<std::string>( "twoFieldKernelType", "generated");
std::function<void(IBlock*)> twoFieldKernel;
if( twoFieldKernelType == "generated") {
twoFieldKernel = pystencils::GenLbKernel(pdfFieldId, omega);
} else if (twoFieldKernelType == "manual_generic") {
using MyLM = lbm::D3Q19<lbm::collision_model::SRT>;
BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
twoFieldKernel = StreamPullCollideGeneric<MyLM>(pdfFieldId, tmpPdfFieldId, omega);
} else if (twoFieldKernelType == "manual_d3q19") {
}
using F = std::function<void()>; using F = std::function<void()>;
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 ); SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
if( timeStepMode == "twoField") if( timeStepMode == "twoField")
{ {
timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" ) timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" ); << Sweep( twoFieldKernel, "LB stream & collide1" );
timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" ) timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" ); << Sweep( twoFieldKernel, "LB stream & collide2" );
} else if ( timeStepMode == "twoFieldKernelOnly") { } else if ( timeStepMode == "twoFieldKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" ); timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
...@@ -194,6 +210,8 @@ int main( int argc, char **argv ) ...@@ -194,6 +210,8 @@ int main( int argc, char **argv )
pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess ); pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess );
pythonCallbackResults.data().exposeValue( "stencil", infoStencil ); pythonCallbackResults.data().exposeValue( "stencil", infoStencil );
pythonCallbackResults.data().exposeValue( "configName", infoConfigName ); pythonCallbackResults.data().exposeValue( "configName", infoConfigName );
pythonCallbackResults.data().exposeValue( "timeStepMode", timeStepMode );
pythonCallbackResults.data().exposeValue( "twoFieldKernel", twoFieldKernelType );
pythonCallbackResults.data().exposeValue( "optimizations", optimizationDict ); pythonCallbackResults.data().exposeValue( "optimizations", optimizationDict );
pythonCallbackResults.data().exposeValue( "githash", core::buildinfo::gitSHA1() ); pythonCallbackResults.data().exposeValue( "githash", core::buildinfo::gitSHA1() );
pythonCallbackResults.data().exposeValue( "compilerFlags", core::buildinfo::compilerFlags() ); pythonCallbackResults.data().exposeValue( "compilerFlags", core::buildinfo::compilerFlags() );
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment