Skip to content
Snippets Groups Projects
Commit e33c19be authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGPU more configurations

parent 2ad061e8
Branches
Tags
No related merge requests found
...@@ -13,7 +13,7 @@ waLBerla_python_file_generates(UniformGridGPU.py ...@@ -13,7 +13,7 @@ waLBerla_python_file_generates(UniformGridGPU.py
UniformGridGPU_Defines.h UniformGridGPU_Defines.h
) )
foreach(config srt trt mrt smagorinsky entropic ) foreach(config srt trt mrt smagorinsky entropic smagorinsky_noopt entropic_kbc_n4 entropic_kbc_n4_noopt mrt_noopt mrt_full mrt_full_noopt)
waLBerla_add_executable ( NAME UniformGridBenchmarkGPU_${config} waLBerla_add_executable ( NAME UniformGridBenchmarkGPU_${config}
FILES UniformGridGPU.cpp UniformGridGPU.py FILES UniformGridGPU.cpp UniformGridGPU.py
DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui DEPENDS blockforest boundary core cuda domain_decomposition field geometry timeloop vtk gui
......
...@@ -95,26 +95,32 @@ int main( int argc, char **argv ) ...@@ -95,26 +95,32 @@ int main( int argc, char **argv )
Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" ); Vector3<uint_t> cellsPerBlock = config->getBlock( "DomainSetup" ).getParameter<Vector3<uint_t> >( "cellsPerBlock" );
// Reading parameters // Reading parameters
auto parameters = config->getOneBlock( "Parameters" ); auto parameters = config->getOneBlock( "Parameters" );
const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal");
const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 )); const real_t omega = parameters.getParameter<real_t>( "omega", real_c( 1.4 ));
const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 )); const uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 50 ));
const bool initShearFlow = parameters.getParameter<bool>("initShearFlow", false); const bool initShearFlow = parameters.getParameter<bool>("initShearFlow", false);
// Creating fields // Creating fields
BlockDataID pdfFieldCpuID = field::addToStorage< PdfField_T >( blocks, "pdfs cpu", real_t(99.8), field::fzyx); BlockDataID pdfFieldCpuID = field::addToStorage< PdfField_T >( blocks, "pdfs cpu", real_t(0), field::fzyx);
BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >( blocks, "vel", real_t(0), field::fzyx); BlockDataID velFieldCpuID = field::addToStorage< VelocityField_T >( blocks, "vel", real_t(0), field::fzyx);
if( initShearFlow ) { if( timeStepStrategy != "kernelOnlyNoInit")
WALBERLA_LOG_INFO_ON_ROOT("Initializing shear flow"); {
initShearVelocity( blocks, velFieldCpuID ); if ( initShearFlow )
} {
pystencils::UniformGridGPU_MacroSetter setterSweep(pdfFieldCpuID, velFieldCpuID); WALBERLA_LOG_INFO_ON_ROOT( "Initializing shear flow" );
for( auto & block : *blocks ) initShearVelocity( blocks, velFieldCpuID );
setterSweep( &block ); }
// setter sweep only initializes interior of domain - for push schemes to work a first communication is required here
blockforest::communication::UniformBufferedScheme<CommunicationStencil_T> initialComm(blocks); pystencils::UniformGridGPU_MacroSetter setterSweep(pdfFieldCpuID, velFieldCpuID);
initialComm.addPackInfo( make_shared< field::communication::PackInfo<PdfField_T> >( pdfFieldCpuID ) ); for( auto & block : *blocks )
initialComm(); setterSweep( &block );
// setter sweep only initializes interior of domain - for push schemes to work a first communication is required here
blockforest::communication::UniformBufferedScheme<CommunicationStencil_T> initialComm(blocks);
initialComm.addPackInfo( make_shared< field::communication::PackInfo<PdfField_T> >( pdfFieldCpuID ) );
initialComm();
}
BlockDataID pdfFieldGpuID = cuda::addGPUFieldToStorage<PdfField_T >( blocks, pdfFieldCpuID, "pdfs on GPU", true ); BlockDataID pdfFieldGpuID = cuda::addGPUFieldToStorage<PdfField_T >( blocks, pdfFieldCpuID, "pdfs on GPU", true );
BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" ); BlockDataID flagFieldID = field::addFlagFieldToStorage< FlagField_T >( blocks, "flag field" );
...@@ -165,7 +171,9 @@ int main( int argc, char **argv ) ...@@ -165,7 +171,9 @@ int main( int argc, char **argv )
int streamLowPriority = 0; int streamLowPriority = 0;
WALBERLA_CUDA_CHECK( cudaDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority) ); WALBERLA_CUDA_CHECK( cudaDeviceGetStreamPriorityRange(&streamLowPriority, &streamHighPriority) );
WALBERLA_CHECK(gpuBlockSize[2] == 1); WALBERLA_CHECK(gpuBlockSize[2] == 1);
pystencils::UniformGridGPU_LbKernel lbKernel( pdfFieldGpuID, omega, gpuBlockSize[0], gpuBlockSize[1], pystencils::UniformGridGPU_LbKernel lbKernel( pdfFieldGpuID, omega,
1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7,
gpuBlockSize[0], gpuBlockSize[1],
Cell(innerOuterSplit[0], innerOuterSplit[1], innerOuterSplit[2]) ); Cell(innerOuterSplit[0], innerOuterSplit[1], innerOuterSplit[2]) );
lbKernel.setOuterPriority( streamHighPriority ); lbKernel.setOuterPriority( streamHighPriority );
UniformGridGPU_Communication< CommunicationStencil_T, cuda::GPUField< double > > UniformGridGPU_Communication< CommunicationStencil_T, cuda::GPUField< double > >
...@@ -252,7 +260,6 @@ int main( int argc, char **argv ) ...@@ -252,7 +260,6 @@ int main( int argc, char **argv )
SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps ); SweepTimeloop timeLoop( blocks->getBlockStorage(), timesteps );
const std::string timeStepStrategy = parameters.getParameter<std::string>( "timeStepStrategy", "normal");
std::function<void()> timeStep; std::function<void()> timeStep;
if (timeStepStrategy == "noOverlap") if (timeStepStrategy == "noOverlap")
timeStep = std::function<void()>( normalTimeStep ); timeStep = std::function<void()>( normalTimeStep );
...@@ -260,7 +267,7 @@ int main( int argc, char **argv ) ...@@ -260,7 +267,7 @@ int main( int argc, char **argv )
timeStep = std::function<void()>( overlapTimeStep ); timeStep = std::function<void()>( overlapTimeStep );
else if (timeStepStrategy == "simpleOverlap") else if (timeStepStrategy == "simpleOverlap")
timeStep = simpleOverlapTimeStep; timeStep = simpleOverlapTimeStep;
else if (timeStepStrategy == "kernelOnly") { else if (timeStepStrategy == "kernelOnly" or timeStepStrategy == "kernelOnlyNoInit") {
WALBERLA_LOG_INFO_ON_ROOT("Running only compute kernel without boundary - this makes only sense for benchmarking!") WALBERLA_LOG_INFO_ON_ROOT("Running only compute kernel without boundary - this makes only sense for benchmarking!")
timeStep = kernelOnlyFunc; timeStep = kernelOnlyFunc;
} }
......
...@@ -12,6 +12,7 @@ from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisio ...@@ -12,6 +12,7 @@ from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisio
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
omega = sp.symbols("omega") omega = sp.symbols("omega")
omega_fill = sp.symbols("omega_:10")
compile_time_block_size = False compile_time_block_size = False
if compile_time_block_size: if compile_time_block_size:
...@@ -40,6 +41,11 @@ options_dict = { ...@@ -40,6 +41,11 @@ options_dict = {
'stencil': 'D3Q19', 'stencil': 'D3Q19',
'relaxation_rates': [0, omega, 1.3, 1.4, omega, 1.2, 1.1], 'relaxation_rates': [0, omega, 1.3, 1.4, omega, 1.2, 1.1],
}, },
'mrt_full': {
'method': 'mrt',
'stencil': 'D3Q19',
'relaxation_rates': [omega_fill[0], omega, omega_fill[1], omega_fill[2], omega_fill[3], omega_fill[4], omega_fill[5]],
},
'entropic': { 'entropic': {
'method': 'mrt3', 'method': 'mrt3',
'stencil': 'D3Q19', 'stencil': 'D3Q19',
...@@ -47,6 +53,13 @@ options_dict = { ...@@ -47,6 +53,13 @@ options_dict = {
'relaxation_rates': [omega, omega, sp.Symbol("omega_free")], 'relaxation_rates': [omega, omega, sp.Symbol("omega_free")],
'entropic': True, 'entropic': True,
}, },
'entropic_kbc_n4': {
'method': 'trt-kbc-n4',
'stencil': 'D3Q27',
'compressible': True,
'relaxation_rates': [omega, sp.Symbol("omega_free")],
'entropic': True,
},
'smagorinsky': { 'smagorinsky': {
'method': 'srt', 'method': 'srt',
'stencil': 'D3Q19', 'stencil': 'D3Q19',
...@@ -76,8 +89,19 @@ with CodeGeneration() as ctx: ...@@ -76,8 +89,19 @@ with CodeGeneration() as ctx:
'optimization': {'cse_global': True, 'optimization': {'cse_global': True,
'cse_pdfs': False} 'cse_pdfs': False}
} }
options = options_dict.get(ctx.config, options_dict['srt']) config_name = ctx.config
noopt = False
if config_name.endswith("_noopt"):
noopt = True
config_name = config_name[:-len("_noopt")]
options = options_dict[config_name]
options.update(common_options) options.update(common_options)
options = options.copy()
if noopt:
options['optimization']['cse_global'] = False
options['optimization']['cse_pdfs'] = False
stencil_str = options['stencil'] stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q')+1:]) q = int(stencil_str[stencil_str.find('Q')+1:])
...@@ -85,14 +109,22 @@ with CodeGeneration() as ctx: ...@@ -85,14 +109,22 @@ with CodeGeneration() as ctx:
options['optimization']['symbolic_field'] = pdfs options['optimization']['symbolic_field'] = pdfs
vp = [ vp = [
('double', 'omega_0'),
('double', 'omega_1'),
('double', 'omega_2'),
('double', 'omega_3'),
('double', 'omega_4'),
('double', 'omega_5'),
('double', 'omega_6'),
('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize0'),
('int32_t', 'cudaBlockSize1') ('int32_t', 'cudaBlockSize1'),
] ]
lb_method = create_lb_method(**options) lb_method = create_lb_method(**options)
update_rule = create_lb_update_rule(lb_method=lb_method, **options) update_rule = create_lb_update_rule(lb_method=lb_method, **options)
update_rule = insert_fast_divisions(update_rule) if not noopt:
update_rule = insert_fast_sqrts(update_rule) update_rule = insert_fast_divisions(update_rule)
update_rule = insert_fast_sqrts(update_rule)
# CPU lattice model - required for macroscopic value computation, VTK output etc. # CPU lattice model - required for macroscopic value computation, VTK output etc.
options_without_opt = options.copy() options_without_opt = options.copy()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment