Commit 7e7a676b authored by Martin Bauer's avatar Martin Bauer
Browse files

Added manual kernel to generated benchmark for comparison

parent 94387196
#include "domain_decomposition/BlockDataID.h"
#include "domain_decomposition/IBlock.h"
#include "field/GhostLayerField.h"
#include "stencil/Directions.h"
namespace walberla {
template<typename LM>
class StreamPullCollideGeneric
{
public:
StreamPullCollideGeneric(BlockDataID src, BlockDataID dst, real_t omega)
: src_(src), dst_(dst), omega_(omega)
{}
void operator()(IBlock * block)
{
using PdfField_T = GhostLayerField<real_t, LM::Stencil::Q>;
auto src = block->getData<PdfField_T>( src_ );
auto dst = block->getData<PdfField_T>( dst_ );
WALBERLA_FOR_ALL_CELLS_XYZ(src, {
// Compute conserved moments
Vector3<real_t> u(0);
real_t rho = 0;
using stencil::cx;
using stencil::cy;
using stencil::cz;
for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d )
{
const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx());
u[0] += pdf * d.cx();
u[1] += pdf * d.cy();
u[2] += pdf * d.cz();
rho += pdf;
}
// collide
const real_t vel_sqr = u.sqrLength() * 1.5;
for( auto d = LM::Stencil::begin(); d != LM::Stencil::end(); ++d )
{
const real_t pdf = src->get(x - d.cx(), y - d.cy(), z - d.cz(), d.toIdx());
const real_t vel = d.cx()*u[0] + d.cy()*u[1] + d.cz()*u[2];
dst->get(x, y, z, d.toIdx()) = ( 1.0 - omega_ ) * pdf +
omega_ * LM::w[ d.toIdx() ] * ( rho - vel_sqr + 3.0*vel + 4.5*vel*vel );
}
});
src->swapDataPointers(*dst);
}
private:
BlockDataID src_;
BlockDataID dst_;
real_t omega_;
};
} // namespace walberla
\ No newline at end of file
DomainSetup
{
blocks < 1, 1, 1 >;
cellsPerBlock < 128, 128, 128 >;
periodic < 0, 0, 0 >;
cellsPerBlock < 64, 64, 64 >;
periodic < 1, 1, 1 >;
}
Parameters
......@@ -13,11 +13,12 @@ Parameters
outerIterations 1; // how many measurements to conduct
vtkWriteFrequency 100; // write a VTK file every n'th step, if zero VTK output is disabled
timeStepMode aa; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
timeStepMode twoField;
//twoFieldKernelType manual_generic;
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
directComm 1;
directComm 0;
omega 1.8;
shearVelocityMagnitude 0;
shearVelocityMagnitude 0.05;
useGui 0;
}
......@@ -16,6 +16,8 @@
#include "domain_decomposition/SharedSweep.h"
#include "gui/Gui.h"
#include "InitShearVelocity.h"
#include "ManualKernels.h"
#include "lbm/lattice_model/D3Q19.h"
#include "GenDefines.h"
#include "GenMacroGetter.h"
......@@ -32,6 +34,7 @@
#include "GenMpiDtypeInfoAAPull.h"
#include "GenMpiDtypeInfoAAPush.h"
#include <iomanip>
using namespace walberla;
......@@ -102,14 +105,27 @@ int main( int argc, char **argv )
blockforest::communication::UniformDirectScheme< Stencil_T > aaPushCommDirect(blocks);
aaPushCommDirect.addDataToCommunicate(make_shared<pystencils::GenMpiDtypeInfoAAPush>(pdfFieldId));
const std::string twoFieldKernelType = parameters.getParameter<std::string>( "twoFieldKernelType", "generated");
std::function<void(IBlock*)> twoFieldKernel;
if( twoFieldKernelType == "generated") {
twoFieldKernel = pystencils::GenLbKernel(pdfFieldId, omega);
} else if (twoFieldKernelType == "manual_generic") {
using MyLM = lbm::D3Q19<lbm::collision_model::SRT>;
BlockDataID tmpPdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
twoFieldKernel = StreamPullCollideGeneric<MyLM>(pdfFieldId, tmpPdfFieldId, omega);
} else if (twoFieldKernelType == "manual_d3q19") {
}
using F = std::function<void()>;
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps / 2 );
if( timeStepMode == "twoField")
{
timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
<< Sweep( twoFieldKernel, "LB stream & collide1" );
timeLoop.add() << BeforeFunction(directComm ? F(twoFieldCommDirect) : F(twoFieldComm), "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
<< Sweep( twoFieldKernel, "LB stream & collide2" );
} else if ( timeStepMode == "twoFieldKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
......@@ -194,6 +210,8 @@ int main( int argc, char **argv )
pythonCallbackResults.data().exposeValue( "mlupsPerProcess", mlupsPerProcess );
pythonCallbackResults.data().exposeValue( "stencil", infoStencil );
pythonCallbackResults.data().exposeValue( "configName", infoConfigName );
pythonCallbackResults.data().exposeValue( "timeStepMode", timeStepMode );
pythonCallbackResults.data().exposeValue( "twoFieldKernel", twoFieldKernelType );
pythonCallbackResults.data().exposeValue( "optimizations", optimizationDict );
pythonCallbackResults.data().exposeValue( "githash", core::buildinfo::gitSHA1() );
pythonCallbackResults.data().exposeValue( "compilerFlags", core::buildinfo::compilerFlags() );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment