Newer
Older
#include "core/Environment.h"
#include "python_coupling/CreateConfig.h"
#include "blockforest/Initialization.h"
#include "lbm/field/PdfField.h"
#include "lbm/field/AddToStorage.h"
#include "field/FlagField.h"
#include "field/AddToStorage.h"
#include "lbm/communication/PdfFieldPackInfo.h"
#include "lbm/vtk/VTKOutput.h"
#include "lbm/PerformanceLogger.h"
#include "blockforest/communication/UniformBufferedScheme.h"
#include "timeloop/all.h"
#include "core/math/Random.h"
#include "geometry/all.h"
#include "cuda/HostFieldAllocator.h"
#include "cuda/communication/GPUPackInfo.h"
#include "cuda/ParallelStreams.h"
#include "core/timing/TimingPool.h"
#include "core/timing/RemainingTimeLogger.h"
#include "cuda/AddGPUFieldToStorage.h"
#include "cuda/communication/UniformGPUScheme.h"
#include "cuda/DeviceSelectMPI.h"
#include "lbm/sweeps/CellwiseSweep.h"
#include "domain_decomposition/SharedSweep.h"
#include "UniformGridGPU_LatticeModel.h"
#include "UniformGridGPU_LbKernel.h"
#include "UniformGridGPU_PackInfo.h"
#include "UniformGridGPU_UBB.h"
#include "UniformGridGPU_NoSlip.h"
using namespace walberla;
using LatticeModel_T = lbm::UniformGridGPU_LatticeModel;
using Stencil_T = LatticeModel_T::Stencil;
using CommunicationStencil_T = LatticeModel_T::CommunicationStencil;
using PdfField_T = lbm::PdfField<LatticeModel_T>;
using CommScheme_T = cuda::communication::UniformGPUScheme<CommunicationStencil_T>;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField<flag_t>;
int main( int argc, char **argv )
{
mpi::Environment env( argc, argv );
cuda::selectDeviceBasedOnMpiRank();
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
for( auto cfg = python_coupling::configBegin( argc, argv ); cfg != python_coupling::configEnd(); ++cfg )
{
auto config = *cfg;
auto blocks = blockforest::createUniformBlockGridFromConfig( config );
// Reading parameters
auto parameters = config->getOneBlock( "Parameters" );
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 ));
const Vector3<real_t> initialVelocity = parameters.getParameter< Vector3<real_t> >( "initialVelocity", Vector3<real_t>() );
// Creating fields
auto latticeModel = LatticeModel_T( omega );
BlockDataID pdfFieldCpuID = lbm::addPdfFieldToStorage( blocks, "pdfs on CPU", latticeModel, initialVelocity, real_t(1), field::fzyx );
BlockDataID pdfFieldGpuID = cuda::addGPUFieldToStorage<PdfField_T >( blocks, pdfFieldCpuID, "pdfs on GPU", true );
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" );
// Boundaries
const FlagUID fluidFlagUID( "Fluid" );
auto boundariesConfig = config->getOneBlock( "Boundaries" );
geometry::initBoundaryHandling<FlagField_T>(*blocks, flagFieldID, boundariesConfig);
geometry::setNonBoundaryCellsToDomain<FlagField_T>(*blocks, flagFieldID, fluidFlagUID);
lbm::UniformGridGPU_UBB ubb(blocks, pdfFieldGpuID);
lbm::UniformGridGPU_NoSlip noSlip(blocks, pdfFieldGpuID);
//lbm::GeneratedFixedDensity pressure(blocks, pdfFieldGpuID);
ubb.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("UBB"), fluidFlagUID );
noSlip.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID );
//pressure.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("pressure"), fluidFlagUID );
// Communication setup
bool overlapCommunication = parameters.getParameter<bool>( "overlapCommunication", true );
bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false );
int streamHighPriority = 0;
int streamLowPriority = 0;
WALBERLA_CUDA_CHECK( cudaDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority) );
pystencils::UniformGridGPU_LbKernel lbKernel( pdfFieldGpuID, omega );
lbKernel.setOuterPriority( streamHighPriority );
CommScheme_T gpuComm( blocks, cudaEnabledMPI );
gpuComm.addPackInfo( make_shared<pystencils::UniformGridGPU_PackInfo>( pdfFieldGpuID ));
auto defaultStream = cuda::StreamRAII::newPriorityStream( streamLowPriority );
auto innerOuterStreams = cuda::ParallelStreams( streamHighPriority );
auto boundaryOuterStreams = cuda::ParallelStreams( streamHighPriority );
auto boundaryInnerStreams = cuda::ParallelStreams( streamHighPriority );
auto overlapTimeStep = [&]()
{
auto innerOuterSection = innerOuterStreams.parallelSection( defaultStream );
innerOuterSection.run([&]( auto innerStream )
{
for( auto &block: *blocks )
{
{
auto p = boundaryInnerStreams.parallelSection( innerStream );
p.run( [&]( auto s ) { ubb.inner( &block, s ); } );
p.run( [&]( auto s ) { noSlip.inner( &block, s ); } );
}
lbKernel.inner( &block, innerStream );
}
});
innerOuterSection.run([&]( auto outerStream )
{
gpuComm( outerStream );
for( auto &block: *blocks )
{
{
auto p = boundaryOuterStreams.parallelSection( outerStream );
p.run( [&]( auto s ) { ubb.outer( &block, s ); } );
p.run( [&]( auto s ) { noSlip.outer( &block, s ); } );
}
lbKernel.outer( &block, outerStream );
}
});
};
auto boundaryStreams = cuda::ParallelStreams( streamHighPriority );
auto normalTimeStep = [&]()
{
gpuComm();
for( auto &block: *blocks )
{
{
auto p = boundaryStreams.parallelSection( defaultStream );
p.run( [&]( auto s ) { ubb( &block, s ); } );
p.run( [&]( auto s ) { noSlip( &block, s ); } );
}
lbKernel( &block );
}
};
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps );
std::function<void()> timeStep = overlapCommunication ? std::function<void()>( overlapTimeStep ) :
std::function<void()>( normalTimeStep );
timeLoop.add() << BeforeFunction( timeStep )
<< Sweep( []( IBlock * ) {}, "time step" );
// VTK
uint_t vtkWriteFrequency = parameters.getParameter<uint_t>( "vtkWriteFrequency", 0 );
if( vtkWriteFrequency > 0 )
{
auto vtkOutput = vtk::createVTKOutput_BlockData( *blocks, "vtk", vtkWriteFrequency, 0, false, "vtk_out",
"simulation_step", false, true, true, false, 0 );
vtkOutput->addCellDataWriter(
make_shared<lbm::VelocityVTKWriter<LatticeModel_T> >( pdfFieldCpuID, "Velocity" ));
vtkOutput->addCellDataWriter( make_shared<lbm::DensityVTKWriter<LatticeModel_T> >( pdfFieldCpuID, "Density" ));
vtkOutput->addBeforeFunction(
cuda::fieldCpyFunctor<PdfField_T, cuda::GPUField<real_t> >( blocks, pdfFieldCpuID, pdfFieldGpuID ));
timeLoop.addFuncAfterTimeStep( vtk::writeFiles( vtkOutput ), "VTK Output" );
}
auto remainingTimeLoggerFrequency = parameters.getParameter< double >( "remainingTimeLoggerFrequency", 3.0 ); // in seconds
timeLoop.addFuncAfterTimeStep( timing::RemainingTimeLogger( timeLoop.getNrOfTimeSteps(), remainingTimeLoggerFrequency ), "remaining time logger" );
lbm::PerformanceLogger<FlagField_T> performanceLogger(blocks, flagFieldID, fluidFlagUID, 500);
timeLoop.addFuncAfterTimeStep( performanceLogger, "remaining time logger" );
timeLoop.run();
}
return 0;
}