Skip to content
Snippets Groups Projects
Commit 6dc55108 authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGPU

- fix in time measurement
- options to disable boundaries and/or communication
- priority for communication & outer part of kernel
parent 628f2060
No related merge requests found
......@@ -87,7 +87,10 @@ int main( int argc, char **argv )
noSlip.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("NoSlip"), fluidFlagUID );
//pressure.fillFromFlagField<FlagField_T>( blocks, flagFieldID, FlagUID("pressure"), fluidFlagUID );
// Communication setup
bool disableBoundaries = parameters.getParameter<bool>( "disableBoundaries", false );
bool kernelOnly = parameters.getParameter<bool>( "kernelOnly", false );
// Communication setup
bool overlapCommunication = parameters.getParameter<bool>( "overlapCommunication", true );
bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false );
int communicationScheme = parameters.getParameter<int>( "communicationScheme", (int) CommunicationSchemeType::UniformGPUScheme_Baseline );
......@@ -98,8 +101,6 @@ int main( int argc, char **argv )
pystencils::UniformGridGPU_LbKernel lbKernel( pdfFieldGpuID, omega );
lbKernel.setOuterPriority( streamHighPriority );
//CommScheme_T gpuComm( blocks, cudaEnabledMPI );
//gpuComm.addPackInfo( make_shared<pystencils::UniformGridGPU_PackInfo>( pdfFieldGpuID ));
UniformGridGPU_Communication< CommunicationStencil_T, cuda::GPUField< double > >
gpuComm( blocks, pdfFieldGpuID, (CommunicationSchemeType) communicationScheme, cudaEnabledMPI );
......@@ -116,6 +117,7 @@ int main( int argc, char **argv )
{
for( auto &block: *blocks )
{
if(!disableBoundaries)
{
auto p = boundaryInnerStreams.parallelSection( innerStream );
p.run( [&block, &ubb]( cudaStream_t s ) { ubb.inner( &block, s ); } );
......@@ -131,6 +133,7 @@ int main( int argc, char **argv )
for( auto &block: *blocks )
{
if(!disableBoundaries)
{
auto p = boundaryOuterStreams.parallelSection( outerStream );
p.run( [&block, &ubb]( cudaStream_t s ) { ubb.outer( &block, s ); } );
......@@ -148,6 +151,7 @@ int main( int argc, char **argv )
gpuComm();
for( auto &block: *blocks )
{
if(!disableBoundaries)
{
auto p = boundaryStreams.parallelSection( defaultStream );
p.run( [&block, &ubb]( cudaStream_t s ) { ubb( &block, s ); } );
......@@ -157,9 +161,21 @@ int main( int argc, char **argv )
}
};
auto kernelOnlyFunc = [&] ()
{
for( auto &block: *blocks )
lbKernel( &block );
};
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps );
std::function<void()> timeStep = overlapCommunication ? std::function<void()>( overlapTimeStep ) :
std::function<void()>( normalTimeStep );
if( kernelOnly )
{
WALBERLA_LOG_INFO_ON_ROOT("Running only compute kernel without boundary - this makes only sense for benchmarking!")
timeStep = kernelOnlyFunc;
}
timeLoop.add() << BeforeFunction( timeStep )
<< Sweep( []( IBlock * ) {}, "time step" );
......@@ -185,8 +201,8 @@ int main( int argc, char **argv )
cudaDeviceSynchronize();
WALBERLA_LOG_INFO_ON_ROOT("Starting simulation with " << timesteps << " time steps");
simTimer.start();
cudaDeviceSynchronize();
timeLoop.run();
cudaDeviceSynchronize();
simTimer.end();
WALBERLA_LOG_INFO_ON_ROOT("Simulation finished");
auto time = simTimer.last();
......
......@@ -2,13 +2,16 @@
Parameters
{
omega 1.8;
timesteps 1000;
timesteps 500;
remainingTimeLoggerFrequency 3;
vtkWriteFrequency 0;
overlapCommunication true;
cudaEnabledMPI false;
kernelOnly false;
disableBoundaries false;
}
DomainSetup
......@@ -20,8 +23,10 @@ DomainSetup
Boundaries
{
/*
Border { direction W; walldistance -1; flag NoSlip; }
Border { direction E; walldistance -1; flag NoSlip; }
Border { direction S; walldistance -1; flag NoSlip; }
Border { direction N; walldistance -1; flag UBB; }
*/
}
......@@ -6,6 +6,10 @@ from lbmpy_walberla import generate_lattice_model, generate_boundary
from pystencils_walberla import CodeGeneration, generate_sweep
sweep_block_size = (128, 1, 1)
sweep_params = {'block_size': sweep_block_size}
with CodeGeneration() as ctx:
# LB options
options = {
......@@ -16,8 +20,8 @@ with CodeGeneration() as ctx:
'compressible': False,
'temporary_field_name': 'pdfs_tmp',
'optimization': {'cse_global': True,
'cse_pdfs': True,
'gpu_indexing_params': {'block_size': (128, 1, 1)}}
'cse_pdfs': False,
}
}
lb_method = create_lb_method(**options)
update_rule = create_lb_update_rule(lb_method=lb_method, **options)
......@@ -27,7 +31,7 @@ with CodeGeneration() as ctx:
# gpu LB sweep & boundaries
generate_sweep(ctx, 'UniformGridGPU_LbKernel', update_rule, field_swaps=[('pdfs', 'pdfs_tmp')],
inner_outer_split=True, target='gpu')
inner_outer_split=True, target='gpu', gpu_indexing_params=sweep_params)
generate_boundary(ctx, 'UniformGridGPU_NoSlip', NoSlip(), lb_method, target='gpu')
generate_boundary(ctx, 'UniformGridGPU_UBB', UBB([0.05, 0, 0]), lb_method, target='gpu')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment