Commit 1b78979c authored by Martin Bauer's avatar Martin Bauer
Browse files

Generated UniformGrid Benchmark - two field version

parent 1a438446
waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_python_file_generates(UniformGridGenerated.py
UniformGridGenerated_LatticeModel.cpp
UniformGridGenerated_Defines.h)
GenLbKernel.cpp GenMacroGetter.cpp GenMacroSetter.cpp GenPackInfo.cpp GenDefines.h)
foreach(config trt )
......
#include "core/math/Random.h"
#include "domain_decomposition/StructuredBlockStorage.h"
namespace walberla {
inline void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID velFieldID,
const real_t xMagnitude=0.005, const real_t fluctuationMagnitude=0.05 )
{
math::seedRandomGenerator(0);
auto halfZ = blocks->getDomainCellBB().zMax() / 2;
for( auto & block: *blocks)
{
auto velField = block.getData<GhostLayerField<real_t, 3> >( velFieldID );
WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velField,
Cell globalCell;
blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude);
velField->get(x, y, z, 1) = real_t(0);
velField->get(x, y, z, 2) = randomReal;
if( globalCell[2] >= halfZ ) {
velField->get(x, y, z, 0) = xMagnitude;
} else {
velField->get(x, y, z, 0) = -xMagnitude;
}
);
}
}
}
\ No newline at end of file
DomainSetup
{
blocks < 1, 1, 1 >;
cellsPerBlock < 64, 64, 128 >;
periodic < 1, 1, 1 >;
}
Parameters
{
timesteps 200; // time steps of one performance measurement
warmupSteps 10; // number of steps to run before measurement starts
outerIterations 15; // how many measurements to conduct
vtkWriteFrequency 200; // write a VTK file every n'th step, if zero VTK output is disabled
cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation
timeStepStrategy kernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly
innerOuterSplit < 8, 1, 1>; // slice-thickness that 'outer'-kernels process when overlapping
remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time
omega 1.8;
initShearFlow 1;
useGui 0;
}
......@@ -6,52 +6,26 @@
#include "python_coupling/DictWrapper.h"
#include "blockforest/Initialization.h"
#include "field/vtk/VTKWriter.h"
#include "field/AddToStorage.h"
#include "field/communication/PackInfo.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "timeloop/all.h"
#include "core/timing/TimingPool.h"
#include "core/timing/RemainingTimeLogger.h"
#include "domain_decomposition/SharedSweep.h"
#include "lbm/communication/PdfFieldPackInfo.h"
#include "lbm/field/AddToStorage.h"
#include "lbm/vtk/VTKOutput.h"
#include "lbm/gui/Connection.h"
#include "lbm/vtk/Velocity.h"
#include "gui/Gui.h"
#include "InitShearVelocity.h"
#include "UniformGridGenerated_LatticeModel.h"
#include "UniformGridGenerated_Defines.h"
#include "GenDefines.h"
#include "GenPackInfo.h"
#include "GenLbKernel.h"
#include "GenMacroGetter.h"
#include "GenMacroSetter.h"
using namespace walberla;
typedef lbm::UniformGridGenerated_LatticeModel LatticeModel_T;
typedef LatticeModel_T::Stencil Stencil_T;
typedef LatticeModel_T::CommunicationStencil CommunicationStencil_T;
typedef lbm::PdfField< LatticeModel_T > PdfField_T;
void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID pdfFieldId,
const real_t xMagnitude=0.1, const real_t fluctuationMagnitude=0.05 )
{
math::seedRandomGenerator(0);
auto halfZ = blocks->getDomainCellBB().zMax() / 2;
for( auto & block: *blocks)
{
auto pdfField = block.getData<PdfField_T>( pdfFieldId );
WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdfField,
Cell globalCell;
blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z));
real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude);
if( globalCell[2] >= halfZ ) {
pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(xMagnitude, 0, randomReal), real_t(1.0));
} else {
pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(-xMagnitude, 0, randomReal), real_t(1.0));
}
);
}
}
using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >;
using VelocityField_T = GhostLayerField< real_t, 3 >;
int main( int argc, char **argv )
......@@ -72,22 +46,25 @@ int main( int argc, char **argv )
const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal");
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 ));
const bool initShearFlow = parameters.getParameter<bool>("initShearFlow", false);
const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08);
// Creating fields
LatticeModel_T latticeModel = LatticeModel_T( omega );
BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel);
BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx );
BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx );
if( initShearFlow ) {
initShearVelocity(blocks, pdfFieldId);
}
pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId);
pystencils::GenMacroGetter getterKernel(pdfFieldId, velFieldId);
initShearVelocity(blocks, velFieldId, shearVelocityMagnitude);
for( auto & b : *blocks)
setterKernel(&b);
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps );
blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > communication( blocks );
communication.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldId ) );
blockforest::communication::UniformBufferedScheme< Stencil_T > communication( blocks );
communication.addPackInfo( make_shared< pystencils::GenPackInfo >( pdfFieldId ) );
timeLoop.add() << BeforeFunction( communication, "communication" )
<< Sweep( LatticeModel_T::Sweep( pdfFieldId ), "LB stream & collide" );
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide" );
int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 );
int outerIterations = parameters.getParameter<int>( "outerIterations", 1 );
......@@ -106,8 +83,12 @@ int main( int argc, char **argv )
{
auto vtkOutput = vtk::createVTKOutput_BlockData( *blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0 );
auto velWriter = make_shared< lbm::VelocityVTKWriter<LatticeModel_T> >(pdfFieldId, "vel");
vtkOutput->addCellDataWriter(velWriter);
auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >( velFieldId, "vel" );
vtkOutput->addCellDataWriter( velWriter );
vtkOutput->addBeforeFunction( [&]()
{ for( auto & b : *blocks)
getterKernel(&b);
} );
timeLoop.addFuncAfterTimeStep( vtk::writeFiles( vtkOutput ), "VTK Output" );
}
......@@ -116,7 +97,6 @@ int main( int argc, char **argv )
if( useGui )
{
GUI gui( timeLoop, blocks, argc, argv);
lbm::connectToGui<LatticeModel_T>(gui);
gui.run();
}
else
......
import sympy as sp
import pystencils as ps
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.creationfunctions import create_lb_update_rule
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from lbmpy_walberla import generate_lattice_model
from pystencils_walberla import CodeGeneration
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
omega = sp.symbols("omega")
omega_fill = sp.symbols("omega_:10")
......@@ -81,8 +81,9 @@ with CodeGeneration() as ctx:
'field_name': 'pdfs',
'temporary_field_name': 'pdfs_tmp',
'kernel_type': accessor,
'optimization': {'cse_global': True,
'cse_pdfs': False}
'optimization': {'cse_global': False,
'cse_pdfs': True,
'split': True}
}
config_name = ctx.config
noopt = False
......@@ -94,6 +95,8 @@ with CodeGeneration() as ctx:
d3q27 = True
config_name = config_name[:-len("_d3q27")]
if config_name == '':
config_name = 'trt'
options = options_dict[config_name]
options.update(common_options)
options = options.copy()
......@@ -109,20 +112,22 @@ with CodeGeneration() as ctx:
pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx')
options['optimization']['symbolic_field'] = pdfs
vp = [
('double', 'omega_0'),
('double', 'omega_1'),
('double', 'omega_2'),
('double', 'omega_3'),
('double', 'omega_4'),
('double', 'omega_5'),
('double', 'omega_6'),
('int32_t', 'cudaBlockSize0'),
('int32_t', 'cudaBlockSize1'),
]
update_rule = create_lb_collision_rule(**options)
generate_lattice_model(ctx, 'UniformGridGenerated_LatticeModel', update_rule)
update_rule = create_lb_update_rule(**options)
vec = {'nontemporal': True, 'assume_aligned': True, 'assume_inner_stride_one': True}
# Sweeps
generate_sweep(ctx, 'GenLbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')])
setter_assignments = macroscopic_values_setter(update_rule.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1)
getter_assignments = macroscopic_values_getter(update_rule.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'GenMacroSetter', setter_assignments)
generate_sweep(ctx, 'GenMacroGetter', getter_assignments)
# Communication
generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule, cpu_vectorize_info={'instruction_set': None})
# Info Header
infoHeaderParams = {
'stencil': stencil_str,
'q': q,
......@@ -130,4 +135,5 @@ with CodeGeneration() as ctx:
'cse_global': int(options['optimization']['cse_global']),
'cse_pdfs': int(options['optimization']['cse_pdfs']),
}
ctx.write_file("UniformGridGenerated_Defines.h", info_header.format(**infoHeaderParams))
ctx.write_file("GenDefines.h", info_header.format(**infoHeaderParams))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment