Commit 2d6fd6f0 authored by Martin Bauer's avatar Martin Bauer
Browse files

UniformGridGenerated: parallel initialization for NUMA first touch

parent c933bf25
......@@ -53,9 +53,18 @@ int main( int argc, char **argv )
uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 ));
const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08);
auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage * const storage) {
return new PdfField_T(storage->getNumberOfXCells(*block),
storage->getNumberOfYCells(*block),
storage->getNumberOfZCells(*block),
uint_t(1),
field::fzyx,
make_shared<field::AllocateAligned<real_t, 64>>());
};
// Creating fields
//BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx );
BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", 0.0, field::fzyx );
BlockDataID pdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx );
pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId);
......@@ -78,21 +87,21 @@ int main( int argc, char **argv )
if( timeStepMode == "twoField")
{
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
<< Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
<< Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
} else if ( timeStepMode == "twoFieldKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
} else if ( timeStepMode == "aa") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
timeLoop.add() << BeforeFunction( aaPullComm )
<< Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd")
<< Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd")
<< AfterFunction( aaPushComm );
} else if ( timeStepMode == "aaKernelOnly") {
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd");
timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd");
} else {
WALBERLA_ABORT("Invalid value for timeStepMode ");
}
......
import sympy as sp
import pystencils as ps
from lbmpy.creationfunctions import create_lb_update_rule
from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
omega = 1.6#sp.symbols("omega")
omega = sp.symbols("omega")
omega_fill = sp.symbols("omega_:10")
options_dict = {
......@@ -116,15 +115,17 @@ with CodeGeneration() as ctx:
# Sweeps
generate_sweep(ctx, 'GenLbKernel', update_rule_two_field, field_swaps=[('pdfs', 'pdfs_tmp')])
generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info={'assume_aligned': True},
cpu_openmp=True, ghost_layers=1)
generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info={'assume_aligned': True},
cpu_openmp=True, ghost_layers=1)
setter_assignments = macroscopic_values_setter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=1)
getter_assignments = macroscopic_values_getter(update_rule_two_field.method, velocity=velocity_field.center_vector,
pdfs=pdfs.center_vector, density=None)
generate_sweep(ctx, 'GenMacroSetter', setter_assignments)
generate_sweep(ctx, 'GenMacroGetter', getter_assignments)
generate_sweep(ctx, 'GenMacroSetter', setter_assignments, cpu_openmp=True)
generate_sweep(ctx, 'GenMacroGetter', getter_assignments, cpu_openmp=True)
# Communication
generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule_two_field,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment