diff --git a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt index def7d93f6a10d530ce4c980708f2a4c9f657026b..1e8028d3916bfcd58938da87d0d1c05226f94083 100644 --- a/apps/benchmarks/UniformGridGenerated/CMakeLists.txt +++ b/apps/benchmarks/UniformGridGenerated/CMakeLists.txt @@ -1,6 +1,8 @@ +waLBerla_link_files_to_builddir( "*.prm" ) + + waLBerla_python_file_generates(UniformGridGenerated.py - UniformGridGenerated_LatticeModel.cpp - UniformGridGenerated_Defines.h) + GenLbKernel.cpp GenMacroGetter.cpp GenMacroSetter.cpp GenPackInfo.cpp GenDefines.h) foreach(config trt ) diff --git a/apps/benchmarks/UniformGridGenerated/InitShearVelocity.h b/apps/benchmarks/UniformGridGenerated/InitShearVelocity.h new file mode 100644 index 0000000000000000000000000000000000000000..2aed66b1ade0380aed7ff59080367897937dae12 --- /dev/null +++ b/apps/benchmarks/UniformGridGenerated/InitShearVelocity.h @@ -0,0 +1,33 @@ +#include "core/math/Random.h" +#include "domain_decomposition/StructuredBlockStorage.h" + + +namespace walberla { + + +inline void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID velFieldID, + const real_t xMagnitude=0.005, const real_t fluctuationMagnitude=0.05 ) +{ + math::seedRandomGenerator(0); + auto halfZ = blocks->getDomainCellBB().zMax() / 2; + for( auto & block: *blocks) + { + auto velField = block.getData<GhostLayerField<real_t, 3> >( velFieldID ); + WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(velField, + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z)); + real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude); + velField->get(x, y, z, 1) = real_t(0); + velField->get(x, y, z, 2) = randomReal; + + if( globalCell[2] >= halfZ ) { + velField->get(x, y, z, 0) = xMagnitude; + } else { + velField->get(x, y, z, 0) = -xMagnitude; + } + ); + } +} + + +} \ No newline at end of file diff --git a/apps/benchmarks/UniformGridGenerated/UniformGrid.prm b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm new file mode 100644 index 0000000000000000000000000000000000000000..6225155574aa1e6117dbb8d5ce5f8ea3b944bba8 --- /dev/null +++ b/apps/benchmarks/UniformGridGenerated/UniformGrid.prm @@ -0,0 +1,26 @@ +DomainSetup +{ + blocks < 1, 1, 1 >; + cellsPerBlock < 64, 64, 128 >; + periodic < 1, 1, 1 >; +} + +Parameters +{ + + timesteps 200; // time steps of one performance measurement + warmupSteps 10; // number of steps to run before measurement starts + outerIterations 15; // how many measurements to conduct + + vtkWriteFrequency 200; // write a VTK file every n'th step, if zero VTK output is disabled + cudaEnabledMPI false; // switch on if you have a CUDA-enabled MPI implementation + + timeStepStrategy kernelOnly; // can be: noOverlap, simpleOverlap, complexOverlap, kernelOnly + innerOuterSplit < 8, 1, 1>; // slice-thickness that 'outer'-kernels process when overlapping + + remainingTimeLoggerFrequency 0; // interval in seconds to log the estimated remaining time + + omega 1.8; + initShearFlow 1; + useGui 0; +} diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp index f48b0f7558b7caa2c7ceb9a77fc650b29d30d5cc..69db6180b0920daa9331b875ec4b4c011b94ce09 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp @@ -6,52 +6,26 @@ #include "python_coupling/DictWrapper.h" #include "blockforest/Initialization.h" #include "field/vtk/VTKWriter.h" +#include "field/AddToStorage.h" #include "field/communication/PackInfo.h" #include "blockforest/communication/UniformBufferedScheme.h" #include "timeloop/all.h" #include "core/timing/TimingPool.h" #include "core/timing/RemainingTimeLogger.h" #include "domain_decomposition/SharedSweep.h" -#include "lbm/communication/PdfFieldPackInfo.h" -#include "lbm/field/AddToStorage.h" -#include "lbm/vtk/VTKOutput.h" -#include "lbm/gui/Connection.h" -#include "lbm/vtk/Velocity.h" #include "gui/Gui.h" +#include "InitShearVelocity.h" -#include "UniformGridGenerated_LatticeModel.h" -#include "UniformGridGenerated_Defines.h" - +#include "GenDefines.h" +#include "GenPackInfo.h" +#include "GenLbKernel.h" +#include "GenMacroGetter.h" +#include "GenMacroSetter.h" using namespace walberla; -typedef lbm::UniformGridGenerated_LatticeModel LatticeModel_T; -typedef LatticeModel_T::Stencil Stencil_T; -typedef LatticeModel_T::CommunicationStencil CommunicationStencil_T; -typedef lbm::PdfField< LatticeModel_T > PdfField_T; - - -void initShearVelocity(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID pdfFieldId, - const real_t xMagnitude=0.1, const real_t fluctuationMagnitude=0.05 ) -{ - math::seedRandomGenerator(0); - auto halfZ = blocks->getDomainCellBB().zMax() / 2; - for( auto & block: *blocks) - { - auto pdfField = block.getData<PdfField_T>( pdfFieldId ); - WALBERLA_FOR_ALL_CELLS_INCLUDING_GHOST_LAYER_XYZ(pdfField, - Cell globalCell; - blocks->transformBlockLocalToGlobalCell(globalCell, block, Cell(x, y, z)); - real_t randomReal = xMagnitude * math::realRandom<real_t>(-fluctuationMagnitude, fluctuationMagnitude); - - if( globalCell[2] >= halfZ ) { - pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(xMagnitude, 0, randomReal), real_t(1.0)); - } else { - pdfField->setDensityAndVelocity(x, y, z, Vector3<real_t>(-xMagnitude, 0, randomReal), real_t(1.0)); - } - ); - } -} +using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >; +using VelocityField_T = GhostLayerField< real_t, 3 >; int main( int argc, char **argv ) @@ -72,22 +46,25 @@ int main( int argc, char **argv ) const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal"); const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 )); const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 )); - const bool initShearFlow = parameters.getParameter<bool>("initShearFlow", false); + const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08); // Creating fields - LatticeModel_T latticeModel = LatticeModel_T( omega ); - BlockDataID pdfFieldId = lbm::addPdfFieldToStorage( blocks, "pdf field", latticeModel); + BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx ); + BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx ); - if( initShearFlow ) { - initShearVelocity(blocks, pdfFieldId); - } + pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId); + pystencils::GenMacroGetter getterKernel(pdfFieldId, velFieldId); + + initShearVelocity(blocks, velFieldId, shearVelocityMagnitude); + for( auto & b : *blocks) + setterKernel(&b); SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps ); - blockforest::communication::UniformBufferedScheme< CommunicationStencil_T > communication( blocks ); - communication.addPackInfo( make_shared< lbm::PdfFieldPackInfo< LatticeModel_T > >( pdfFieldId ) ); + blockforest::communication::UniformBufferedScheme< Stencil_T > communication( blocks ); + communication.addPackInfo( make_shared< pystencils::GenPackInfo >( pdfFieldId ) ); timeLoop.add() << BeforeFunction( communication, "communication" ) - << Sweep( LatticeModel_T::Sweep( pdfFieldId ), "LB stream & collide" ); + << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide" ); int warmupSteps = parameters.getParameter<int>( "warmupSteps", 2 ); int outerIterations = parameters.getParameter<int>( "outerIterations", 1 ); @@ -106,8 +83,12 @@ int main( int argc, char **argv ) { auto vtkOutput = vtk::createVTKOutput_BlockData( *blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out", "simulation_step", false, true, true, false, 0 ); - auto velWriter = make_shared< lbm::VelocityVTKWriter<LatticeModel_T> >(pdfFieldId, "vel"); - vtkOutput->addCellDataWriter(velWriter); + auto velWriter = make_shared< field::VTKWriter< VelocityField_T > >( velFieldId, "vel" ); + vtkOutput->addCellDataWriter( velWriter ); + vtkOutput->addBeforeFunction( [&]() + { for( auto & b : *blocks) + getterKernel(&b); + } ); timeLoop.addFuncAfterTimeStep( vtk::writeFiles( vtkOutput ), "VTK Output" ); } @@ -116,7 +97,6 @@ int main( int argc, char **argv ) if( useGui ) { GUI gui( timeLoop, blocks, argc, argv); - lbm::connectToGui<LatticeModel_T>(gui); gui.run(); } else diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py index 8f7bf791caaa1f37a65ede2782f4d09f6e7a6019..7027cf7ee7672eafff9a405df06e8bdc6389073d 100644 --- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py +++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py @@ -1,9 +1,9 @@ import sympy as sp import pystencils as ps -from lbmpy.creationfunctions import create_lb_collision_rule +from lbmpy.creationfunctions import create_lb_update_rule from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor -from lbmpy_walberla import generate_lattice_model -from pystencils_walberla import CodeGeneration +from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep +from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter omega = sp.symbols("omega") omega_fill = sp.symbols("omega_:10") @@ -81,8 +81,9 @@ with CodeGeneration() as ctx: 'field_name': 'pdfs', 'temporary_field_name': 'pdfs_tmp', 'kernel_type': accessor, - 'optimization': {'cse_global': True, - 'cse_pdfs': False} + 'optimization': {'cse_global': False, + 'cse_pdfs': True, + 'split': True} } config_name = ctx.config noopt = False @@ -94,6 +95,8 @@ with CodeGeneration() as ctx: d3q27 = True config_name = config_name[:-len("_d3q27")] + if config_name == '': + config_name = 'trt' options = options_dict[config_name] options.update(common_options) options = options.copy() @@ -109,20 +112,22 @@ with CodeGeneration() as ctx: pdfs, velocity_field = ps.fields("pdfs({q}), velocity(3) : double[3D]".format(q=q), layout='fzyx') options['optimization']['symbolic_field'] = pdfs - vp = [ - ('double', 'omega_0'), - ('double', 'omega_1'), - ('double', 'omega_2'), - ('double', 'omega_3'), - ('double', 'omega_4'), - ('double', 'omega_5'), - ('double', 'omega_6'), - ('int32_t', 'cudaBlockSize0'), - ('int32_t', 'cudaBlockSize1'), - ] - update_rule = create_lb_collision_rule(**options) - generate_lattice_model(ctx, 'UniformGridGenerated_LatticeModel', update_rule) + update_rule = create_lb_update_rule(**options) + vec = {'nontemporal': True, 'assume_aligned': True, 'assume_inner_stride_one': True} + # Sweeps + generate_sweep(ctx, 'GenLbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')]) + setter_assignments = macroscopic_values_setter(update_rule.method, velocity=velocity_field.center_vector, + pdfs=pdfs.center_vector, density=1) + getter_assignments = macroscopic_values_getter(update_rule.method, velocity=velocity_field.center_vector, + pdfs=pdfs.center_vector, density=None) + generate_sweep(ctx, 'GenMacroSetter', setter_assignments) + generate_sweep(ctx, 'GenMacroGetter', getter_assignments) + + # Communication + generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule, cpu_vectorize_info={'instruction_set': None}) + + # Info Header infoHeaderParams = { 'stencil': stencil_str, 'q': q, @@ -130,4 +135,5 @@ with CodeGeneration() as ctx: 'cse_global': int(options['optimization']['cse_global']), 'cse_pdfs': int(options['optimization']['cse_pdfs']), } - ctx.write_file("UniformGridGenerated_Defines.h", info_header.format(**infoHeaderParams)) + ctx.write_file("GenDefines.h", info_header.format(**infoHeaderParams)) +