Skip to content
Snippets Groups Projects
Commit 427a8c68 authored by Martin Bauer's avatar Martin Bauer
Browse files

Refactored test for GPU communication

- comparison to old CPU transfer communication included
parent ff97334f
Branches
Tags
No related merge requests found
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include "field/FlagField.h" #include "field/FlagField.h"
#include "field/AddToStorage.h" #include "field/AddToStorage.h"
#include "lbm/communication/PdfFieldPackInfo.h" #include "lbm/communication/PdfFieldPackInfo.h"
#include "lbm/vtk/VTKOutput.h"
#include "lbm/PerformanceLogger.h" #include "lbm/PerformanceLogger.h"
#include "blockforest/communication/UniformBufferedScheme.h" #include "blockforest/communication/UniformBufferedScheme.h"
#include "timeloop/all.h" #include "timeloop/all.h"
...@@ -58,7 +57,8 @@ void initPdfField( const shared_ptr<StructuredBlockForest> &blocks, BlockDataID ...@@ -58,7 +57,8 @@ void initPdfField( const shared_ptr<StructuredBlockForest> &blocks, BlockDataID
auto globalZ = real_c( offset[2] + z ); auto globalZ = real_c( offset[2] + z );
auto xArg = real_c(std::sin(real_c(globalX) / real_t(4) * real_c(domainBB.size(0)) )); auto xArg = real_c(std::sin(real_c(globalX) / real_t(4) * real_c(domainBB.size(0)) ));
auto zArg = real_c(std::sin(real_c(globalZ) / real_t(4) * real_c(domainBB.size(2)) )); auto zArg = real_c(std::sin(real_c(globalZ) / real_t(4) * real_c(domainBB.size(2)) ));
pdfField->setToEquilibrium( x, y, z, Vector3<real_t>( 0.05 * std::sin(xArg), 0, 0.05 * std::cos(zArg))); pdfField->setToEquilibrium( x, y, z, Vector3<real_t>( real_t(0.05) * std::sin(xArg), 0,
real_t(0.05) * std::cos(zArg)));
); );
} }
} }
...@@ -82,6 +82,7 @@ int main( int argc, char **argv ) ...@@ -82,6 +82,7 @@ int main( int argc, char **argv )
BlockDataID flagFieldId = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field" ); BlockDataID flagFieldId = field::addFlagFieldToStorage<FlagField_T>( blocks, "flag field" );
const FlagUID fluidFlagUID( "Fluid" ); const FlagUID fluidFlagUID( "Fluid" );
geometry::setNonBoundaryCellsToDomain<FlagField_T>( *blocks, flagFieldId, fluidFlagUID ); geometry::setNonBoundaryCellsToDomain<FlagField_T>( *blocks, flagFieldId, fluidFlagUID );
GeneratedLatticeModel_T generatedLatticeModel = GeneratedLatticeModel_T( omega );
// Part 1 : Native walberla // Part 1 : Native walberla
...@@ -99,7 +100,6 @@ int main( int argc, char **argv ) ...@@ -99,7 +100,6 @@ int main( int argc, char **argv )
// Part 2: Generated CPU Version // Part 2: Generated CPU Version
GeneratedLatticeModel_T generatedLatticeModel = GeneratedLatticeModel_T( omega );
BlockDataID pdfFieldGeneratedId = lbm::addPdfFieldToStorage( blocks, "pdfGenerated", generatedLatticeModel, field::fzyx ); BlockDataID pdfFieldGeneratedId = lbm::addPdfFieldToStorage( blocks, "pdfGenerated", generatedLatticeModel, field::fzyx );
initPdfField<GeneratedPdfField_T >( blocks, pdfFieldGeneratedId ); initPdfField<GeneratedPdfField_T >( blocks, pdfFieldGeneratedId );
CpuCommScheme_T cpuComm( blocks ); CpuCommScheme_T cpuComm( blocks );
...@@ -113,6 +113,7 @@ int main( int argc, char **argv ) ...@@ -113,6 +113,7 @@ int main( int argc, char **argv )
// Part 3: Generated GPU Version // Part 3: Generated GPU Version
bool overlapCommunication = parameters.getParameter<bool>( "overlapCommunication", true ); bool overlapCommunication = parameters.getParameter<bool>( "overlapCommunication", true );
bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false ); bool cudaEnabledMPI = parameters.getParameter<bool>( "cudaEnabledMPI", false );
bool oldCommunication = parameters.getParameter<bool>( "oldCommunication", false );
BlockDataID pdfShadowCPU = lbm::addPdfFieldToStorage( blocks, "cpu shadow field", generatedLatticeModel, field::fzyx ); BlockDataID pdfShadowCPU = lbm::addPdfFieldToStorage( blocks, "cpu shadow field", generatedLatticeModel, field::fzyx );
initPdfField<GeneratedPdfField_T >( blocks, pdfShadowCPU ); initPdfField<GeneratedPdfField_T >( blocks, pdfShadowCPU );
...@@ -126,10 +127,23 @@ int main( int argc, char **argv ) ...@@ -126,10 +127,23 @@ int main( int argc, char **argv )
gpuComm.addPackInfo( make_shared<pystencils::EquivalenceTest_GPUPackInfo>( pdfGpuFieldId )); gpuComm.addPackInfo( make_shared<pystencils::EquivalenceTest_GPUPackInfo>( pdfGpuFieldId ));
auto runCommunication = [&]() { gpuComm(); }; auto runCommunication = [&]() { gpuComm(); };
CpuCommScheme_T oldGpuScheme( blocks );
std::vector<cudaStream_t > streams;
for(uint_t i=0; i < Stencil_T::Size; ++i ) {
cudaStream_t s;
cudaStreamCreate(&s);
streams.push_back(s);
}
using OldPackInfo = cuda::communication::GPUPackInfo<cuda::GPUField<real_t> >;
oldGpuScheme.addPackInfo( make_shared<OldPackInfo>(pdfGpuFieldId, streams) );
SweepTimeloop gpuTimeLoop( blocks->getBlockStorage(), timesteps ); SweepTimeloop gpuTimeLoop( blocks->getBlockStorage(), timesteps );
if( !overlapCommunication ) if( !overlapCommunication )
{ {
gpuTimeLoop.add() << BeforeFunction( runCommunication, "gpu communication" ) gpuTimeLoop.add() << (oldCommunication ? BeforeFunction(oldGpuScheme) :
BeforeFunction( runCommunication, "gpu communication" ))
<< Sweep( cudaLbKernel, "LB stream & collide gpu" ); << Sweep( cudaLbKernel, "LB stream & collide gpu" );
} }
else else
......
...@@ -3,6 +3,8 @@ from lbmpy_walberla import generate_lattice_model_files ...@@ -3,6 +3,8 @@ from lbmpy_walberla import generate_lattice_model_files
from lbmpy.creationfunctions import create_lb_update_rule from lbmpy.creationfunctions import create_lb_update_rule
from pystencils_walberla.sweep import Sweep from pystencils_walberla.sweep import Sweep
dtype = 'float64'
# LB options # LB options
options = { options = {
'method': 'srt', 'method': 'srt',
...@@ -12,14 +14,14 @@ options = { ...@@ -12,14 +14,14 @@ options = {
'compressible': False, 'compressible': False,
'maxwellian_moments': False, 'maxwellian_moments': False,
'temporary_field_name': 'pdfs_tmp', 'temporary_field_name': 'pdfs_tmp',
'optimization': {'cse_global': False, 'optimization': {'cse_global': True,
'cse_pdfs': False, 'cse_pdfs': True,
'double_precision': True} 'double_precision': dtype == 'float64'}
} }
# GPU optimization options # GPU optimization options
opt = {'gpu_indexing_params': {'block_size': (128, 2, 1)}, 'data_type': 'float64'} opt = {'gpu_indexing_params': {'block_size': (128, 1, 1)}, 'data_type': dtype}
outer_opt = {'gpu_indexing_params': {'block_size': (32, 32, 32)}, 'data_type': 'float64'} outer_opt = {'gpu_indexing_params': {'block_size': (32, 32, 32)}, 'data_type': dtype}
def lb_assignments(): def lb_assignments():
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment