From 2d6fd6f00c3b43744c30ec31a8581c97b763391e Mon Sep 17 00:00:00 2001
From: Martin Bauer <martin.bauer@fau.de>
Date: Sun, 6 Oct 2019 12:53:10 +0200
Subject: [PATCH] UniformGridGenerated: parallel initialization for NUMA first
 touch

---
 .../UniformGridGenerated.cpp                  | 29 ++++++++++++-------
 .../UniformGridGenerated.py                   | 13 +++++----
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
index 8ccfa107d..bbc904de3 100644
--- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
+++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.cpp
@@ -53,9 +53,18 @@ int main( int argc, char **argv )
             uint_t timesteps = parameters.getParameter<uint_t>( "timesteps", uint_c( 60 ));
       const real_t shearVelocityMagnitude = parameters.getParameter<real_t>("shearVelocityMagnitude", 0.08);
 
+
+      auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage * const storage) {
+          return new PdfField_T(storage->getNumberOfXCells(*block),
+                                storage->getNumberOfYCells(*block),
+                                storage->getNumberOfZCells(*block),
+                                uint_t(1),
+                                field::fzyx,
+                                make_shared<field::AllocateAligned<real_t, 64>>());
+      };
+
       // Creating fields
-      //BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", real_t( std::nan("") ), field::fzyx );
-      BlockDataID pdfFieldId = field::addToStorage< PdfField_T >( blocks, "pdfs", 0.0, field::fzyx );
+      BlockDataID pdfFieldId = blocks->addStructuredBlockData<PdfField_T>(pdfFieldAdder, "pdfs");
       BlockDataID velFieldId = field::addToStorage< VelocityField_T >( blocks, "vel", real_t( 0 ), field::fzyx );
 
       pystencils::GenMacroSetter setterKernel(pdfFieldId, velFieldId);
@@ -78,21 +87,21 @@ int main( int argc, char **argv )
       if( timeStepMode == "twoField")
       {
           timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
-                         << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
+                         << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
           timeLoop.add() << BeforeFunction(twoFieldComm, "communication" )
-                         << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
+                         << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
 
       } else if ( timeStepMode == "twoFieldKernelOnly") {
-          timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide1" );
-          timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId), "LB stream & collide2" );
+          timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide1" );
+          timeLoop.add() << Sweep( pystencils::GenLbKernel(pdfFieldId, omega), "LB stream & collide2" );
       } else if ( timeStepMode == "aa") {
-          timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
+          timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
           timeLoop.add() << BeforeFunction( aaPullComm )
-                         << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd")
+                         << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd")
                          << AfterFunction( aaPushComm );
       } else if ( timeStepMode == "aaKernelOnly") {
-          timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId), "AA Even" );
-          timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId), "AA Odd");
+          timeLoop.add() << Sweep( pystencils::GenLbKernelAAEven(pdfFieldId, omega), "AA Even" );
+          timeLoop.add() << Sweep( pystencils::GenLbKernelAAOdd(pdfFieldId, omega), "AA Odd");
       } else {
           WALBERLA_ABORT("Invalid value for timeStepMode ");
       }
diff --git a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
index 2afbd8d18..74315df21 100644
--- a/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
+++ b/apps/benchmarks/UniformGridGenerated/UniformGridGenerated.py
@@ -1,12 +1,11 @@
 import sympy as sp
 import pystencils as ps
 from lbmpy.creationfunctions import create_lb_update_rule
-from lbmpy.fieldaccess import StreamPullTwoFieldsAccessor
 from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep
 from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
 from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
 
-omega = 1.6#sp.symbols("omega")
+omega = sp.symbols("omega")
 omega_fill = sp.symbols("omega_:10")
 
 options_dict = {
@@ -116,15 +115,17 @@ with CodeGeneration() as ctx:
 
     # Sweeps
     generate_sweep(ctx, 'GenLbKernel', update_rule_two_field, field_swaps=[('pdfs', 'pdfs_tmp')])
-    generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
-    generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info={'assume_aligned': True}, cpu_openmp=6, ghost_layers=1)
+    generate_sweep(ctx, 'GenLbKernelAAEven', update_rule_aa_even, cpu_vectorize_info={'assume_aligned': True},
+                   cpu_openmp=True, ghost_layers=1)
+    generate_sweep(ctx, 'GenLbKernelAAOdd', update_rule_aa_odd, cpu_vectorize_info={'assume_aligned': True},
+                   cpu_openmp=True, ghost_layers=1)
 
     setter_assignments = macroscopic_values_setter(update_rule_two_field.method, velocity=velocity_field.center_vector,
                                                    pdfs=pdfs.center_vector, density=1)
     getter_assignments = macroscopic_values_getter(update_rule_two_field.method, velocity=velocity_field.center_vector,
                                                    pdfs=pdfs.center_vector, density=None)
-    generate_sweep(ctx, 'GenMacroSetter', setter_assignments)
-    generate_sweep(ctx, 'GenMacroGetter', getter_assignments)
+    generate_sweep(ctx, 'GenMacroSetter', setter_assignments, cpu_openmp=True)
+    generate_sweep(ctx, 'GenMacroGetter', getter_assignments, cpu_openmp=True)
 
     # Communication
     generate_pack_info_from_kernel(ctx, 'GenPackInfo', update_rule_two_field,
-- 
GitLab