Skip to content
Snippets Groups Projects
Commit 16c96838 authored by Markus Holzer's avatar Markus Holzer
Browse files

Implemented tutorial 03 for GPU usage

parent 49ba0e86
Branches
Tags
No related merge requests found
......@@ -22,6 +22,16 @@
#include "core/all.h"
#if defined(WALBERLA_BUILD_WITH_CUDA)
#include "cuda/HostFieldAllocator.h"
#include "cuda/ParallelStreams.h"
#include "cuda/NVTX.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/communication/GPUPackInfo.h"
#include "cuda/DeviceSelectMPI.h"
#include "cuda/communication/UniformGPUScheme.h"
#endif
#include "domain_decomposition/all.h"
#include "field/all.h"
......@@ -29,8 +39,6 @@
#include "geometry/all.h"
#include "lbm/vtk/VTKOutput.h"
#include "stencil/D2Q9.h"
#include "timeloop/all.h"
......@@ -64,6 +72,10 @@ typedef walberla::uint8_t flag_t;
typedef FlagField< flag_t > FlagField_T;
typedef lbm::CumulantMRTNoSlip NoSlip_T;
#if defined(WALBERLA_BUILD_WITH_CUDA)
typedef cuda::GPUField<double> GPUField;
#endif
//////////////////////////////////////////
/// Shear Flow Velocity Initialization ///
//////////////////////////////////////////
......@@ -119,15 +131,24 @@ int main(int argc, char** argv)
const real_t omega = parameters.getParameter< real_t >("omega", real_c(1.8));
const double remainingTimeLoggerFrequency =
parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds
const int32_t VTKwriteFrequency = parameters.getParameter< int32_t >("VTKwriteFrequency", 1000);
////////////////////////////////////
/// PDF Field and Velocity Setup ///
////////////////////////////////////
#if defined(WALBERLA_BUILD_WITH_CUDA)
// CPU fields
BlockDataID velocityFieldId = field::addToStorage< VectorField_T >(blocks, "velocity", real_c(0.0), field::fzyx);
BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
// GPU fields
BlockDataID pdfFieldId = cuda::addGPUFieldToStorage<cuda::GPUField<real_t >>( blocks,"pdf field on GPU", Stencil_T::Size, field::fzyx, uint_t (1));
BlockDataID velocityFieldIdGPU = cuda::addGPUFieldToStorage<VectorField_T >( blocks, velocityFieldId, "velocity on GPU", true );
#else
BlockDataID velocityFieldId = field::addToStorage< VectorField_T >(blocks, "velocity", real_c(0.0), field::fzyx);
BlockDataID pdfFieldId = field::addToStorage< PdfField_T >(blocks, "pdf field", real_c(0.0), field::fzyx);
BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field");
#endif
// Velocity field setup
auto shearFlowSetup = walberlaEnv.config()->getOneBlock("ShearFlowSetup");
......@@ -136,13 +157,28 @@ int main(int argc, char** argv)
real_t rho = shearFlowSetup.getParameter("rho", real_c(1.0));
// pdfs setup
#if defined(WALBERLA_BUILD_WITH_CUDA)
cuda::fieldCpy<GPUField, VectorField_T>(blocks, velocityFieldIdGPU, velocityFieldId);
pystencils::InitialPDFsSetter pdfSetter(pdfFieldId, velocityFieldIdGPU, rho);
#else
pystencils::InitialPDFsSetter pdfSetter(pdfFieldId, velocityFieldId, rho);
#endif
for (auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt)
{
pdfSetter(&(*blockIt));
}
/////////////
/// Sweep ///
/////////////
#if defined(WALBERLA_BUILD_WITH_CUDA)
pystencils::CumulantMRTSweep CumulantMRTSweep(pdfFieldId, velocityFieldIdGPU, omega);
#else
pystencils::CumulantMRTSweep CumulantMRTSweep(pdfFieldId, velocityFieldId, omega);
#endif
/////////////////////////
/// Boundary Handling ///
/////////////////////////
......@@ -165,29 +201,37 @@ int main(int argc, char** argv)
SweepTimeloop timeloop(blocks->getBlockStorage(), timesteps);
// Communication
#if defined(WALBERLA_BUILD_WITH_CUDA)
cuda::communication::UniformGPUScheme< Stencil_T > com( blocks, 0 );
com.addPackInfo(make_shared< PackInfo_T >(pdfFieldId));
auto simpleOverlapTimeStep = [&]()
{
com.communicate(nullptr);
};
auto communication = std::function<void()>( simpleOverlapTimeStep );
#else
blockforest::communication::UniformBufferedScheme< Stencil_T > communication(blocks);
communication.addPackInfo(make_shared< PackInfo_T >(pdfFieldId));
#endif
// Timeloop
timeloop.add() << BeforeFunction(communication, "communication") << Sweep(noSlip);
timeloop.add() << Sweep(pystencils::CumulantMRTSweep(pdfFieldId, velocityFieldId, omega));
// Stability Checker
timeloop.addFuncAfterTimeStep(makeSharedFunctor(field::makeStabilityChecker< PdfField_T, FlagField_T >(
walberlaEnv.config(), blocks, pdfFieldId, flagFieldId, fluidFlagUID)),
"LBM stability check");
timeloop.add() << Sweep(CumulantMRTSweep);
// Time logger
timeloop.addFuncAfterTimeStep(timing::RemainingTimeLogger(timeloop.getNrOfTimeSteps(), remainingTimeLoggerFrequency),
"remaining time logger");
uint_t vtkWriteFrequency = uint_c(100);
if (vtkWriteFrequency > 0)
if (VTKwriteFrequency > 0)
{
const std::string path = "vtk_out/tut03";
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "cumulant_mrt_velocity_field", vtkWriteFrequency, 0,
false, path, "simulation_step", false, true, true, false, 0);
auto vtkOutput = vtk::createVTKOutput_BlockData(*blocks, "cumulant_mrt_velocity_field", VTKwriteFrequency, 0,
false, path, "simulation_step", false, true, true, false, 0);
#if defined(WALBERLA_BUILD_WITH_CUDA)
vtkOutput->addBeforeFunction( [&]() {
cuda::fieldCpy<VectorField_T, GPUField>(blocks, velocityFieldId, velocityFieldIdGPU);
});
#endif
auto velWriter = make_shared< field::VTKWriter< VectorField_T > >(velocityFieldId, "Velocity");
vtkOutput->addCellDataWriter(velWriter);
......
......@@ -2,9 +2,10 @@
Parameters
{
omega 1.8;
timesteps 100000;
timesteps 10001;
remainingTimeLoggerFrequency 3; // in seconds
VTKwriteFrequency 1000;
}
ShearFlowSetup
......@@ -35,21 +36,3 @@ Boundaries
{
Border { direction S,N; walldistance -1; flag NoSlip; }
}
VTK
{
velocity_field
{
writeFrequency 10;
ghostLayers 1;
inclusion_filters {
DomainFilter;
}
writers {
Velocity;
}
}
}
......@@ -64,15 +64,19 @@ pdfs_setter = macroscopic_values_setter(lbm_method,
# =====================
with CodeGeneration() as ctx:
if ctx.cuda:
target = 'gpu'
else:
target = 'cpu'
# LBM Sweep
generate_sweep(ctx, "CumulantMRTSweep", lbm_update_rule, field_swaps=[(pdfs, pdfs_tmp)])
generate_sweep(ctx, "CumulantMRTSweep", lbm_update_rule, field_swaps=[(pdfs, pdfs_tmp)], target=target)
# Pack Info
generate_pack_info_from_kernel(ctx, "CumulantMRTPackInfo", lbm_update_rule)
generate_pack_info_from_kernel(ctx, "CumulantMRTPackInfo", lbm_update_rule, target=target)
# Macroscopic Values Setter
generate_sweep(ctx, "InitialPDFsSetter", pdfs_setter)
generate_sweep(ctx, "InitialPDFsSetter", pdfs_setter, target=target)
# NoSlip Boundary
generate_boundary(ctx, "CumulantMRTNoSlip", NoSlip(), lbm_method)
generate_boundary(ctx, "CumulantMRTNoSlip", NoSlip(), lbm_method, target=target)
......@@ -24,16 +24,28 @@ if( WALBERLA_BUILD_WITH_CODEGEN )
DEPENDS blockforest core domain_decomposition field geometry timeloop lbm stencil vtk 02_LBMLatticeModelGenerationPython )
# Tutorial 3: Advanced lbmpy Code Generation
walberla_generate_target_from_python( NAME 03_AdvancedLBMCodegenPython
FILE 03_AdvancedLBMCodegen.py
OUT_FILES CumulantMRTSweep.cpp CumulantMRTSweep.h
CumulantMRTPackInfo.cpp CumulantMRTPackInfo.h
InitialPDFsSetter.cpp InitialPDFsSetter.h
CumulantMRTNoSlip.cpp CumulantMRTNoSlip.h)
walberla_add_executable ( NAME 03_AdvancedLBMCodegenApp
FILES 03_AdvancedLBMCodegen.cpp
DEPENDS blockforest core domain_decomposition field geometry timeloop lbm stencil vtk 03_AdvancedLBMCodegenPython )
if(WALBERLA_BUILD_WITH_CUDA)
walberla_generate_target_from_python( NAME 03_AdvancedLBMCodegenPython
FILE 03_AdvancedLBMCodegen.py
OUT_FILES CumulantMRTSweep.cu CumulantMRTSweep.h
CumulantMRTPackInfo.cu CumulantMRTPackInfo.h
InitialPDFsSetter.cu InitialPDFsSetter.h
CumulantMRTNoSlip.cu CumulantMRTNoSlip.h)
walberla_add_executable ( NAME 03_AdvancedLBMCodegenApp
FILES 03_AdvancedLBMCodegen.cpp
DEPENDS blockforest cuda core domain_decomposition field geometry timeloop lbm stencil vtk 03_AdvancedLBMCodegenPython )
else()
walberla_generate_target_from_python( NAME 03_AdvancedLBMCodegenPython
FILE 03_AdvancedLBMCodegen.py
OUT_FILES CumulantMRTSweep.cpp CumulantMRTSweep.h
CumulantMRTPackInfo.cpp CumulantMRTPackInfo.h
InitialPDFsSetter.cpp InitialPDFsSetter.h
CumulantMRTNoSlip.cpp CumulantMRTNoSlip.h)
walberla_add_executable ( NAME 03_AdvancedLBMCodegenApp
FILES 03_AdvancedLBMCodegen.cpp
DEPENDS blockforest core domain_decomposition field geometry timeloop lbm stencil vtk 03_AdvancedLBMCodegenPython )
endif()
endif()
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment