From f9cee3f38b23e962535dafb9f362923756f88bab Mon Sep 17 00:00:00 2001
From: Markus Holzer <markus.holzer@fau.de>
Date: Mon, 29 Apr 2024 14:40:26 +0200
Subject: [PATCH] [Fix] PDF initialisation

---
 .../NonUniformGridCPU/NonUniformGridCPU.cpp         |  2 +-
 .../NonUniformGridGPU/NonUniformGridGPU.cpp         |  2 +-
 apps/benchmarks/UniformGridCPU/UniformGridCPU.py    |  3 ++-
 apps/benchmarks/UniformGridGPU/UniformGridGPU.py    |  2 +-
 python/lbmpy_walberla/sweep_collection.py           | 13 ++++++++-----
 python/lbmpy_walberla/walberla_lbm_package.py       |  3 ++-
 tests/lbm_generated/Example.cpp                     |  4 ++--
 tests/lbm_generated/FreeSlipRefinement.cpp          |  4 ++--
 tests/lbm_generated/InterpolationNoSlip.py          |  3 ++-
 9 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
index 6f6e03781..34fde13e3 100644
--- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
+++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp
@@ -132,7 +132,7 @@ int main(int argc, char** argv)
       SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit);
       for (auto& block : *blocks)
       {
-         sweepCollection.initialise(&block, 2);
+         sweepCollection.initialise(&block, cell_idx_c(1));
       }
       WALBERLA_MPI_BARRIER()
       WALBERLA_LOG_INFO_ON_ROOT("Initialisation done")
diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
index e76c0b511..233103342 100644
--- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
+++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp
@@ -190,7 +190,7 @@ int main(int argc, char** argv)
       SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit);
       for (auto& iBlock : *blocks)
       {
-         sweepCollection.initialise(&iBlock, 2, nullptr);
+         sweepCollection.initialise(&iBlock, cell_idx_c(1), nullptr);
       }
       WALBERLA_GPU_CHECK(gpuDeviceSynchronize())
       WALBERLA_GPU_CHECK(gpuPeekAtLastError())
diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
index ae9ec4f1b..676d5f0f4 100644
--- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
+++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py
@@ -152,7 +152,8 @@ with CodeGeneration() as ctx:
                          lbm_config=lbm_config, lbm_optimisation=lbm_opt,
                          nonuniform=False, boundaries=[no_slip, ubb],
                          macroscopic_fields=macroscopic_fields,
-                         cpu_openmp=openmp, cpu_vectorize_info=cpu_vec)
+                         cpu_openmp=openmp, cpu_vectorize_info=cpu_vec,
+                         set_pre_collision_pdfs=False)
 
     # Stream only kernel
     generate_sweep(ctx, 'UniformGridCPU_StreamOnlyKernel', stream_only_kernel,
diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
index 3d7579e5b..6d7d7648b 100644
--- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
+++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py
@@ -168,7 +168,7 @@ with CodeGeneration() as ctx:
                          nonuniform=False, boundaries=[no_slip, ubb],
                          macroscopic_fields=macroscopic_fields,
                          target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params,
-                         max_threads=max_threads)
+                         max_threads=max_threads, set_pre_collision_pdfs=False)
 
     # Stream only kernel
     vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), ('int32_t', 'cudaBlockSize2')]
diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py
index 5fe4892ab..bc8bdda49 100644
--- a/python/lbmpy_walberla/sweep_collection.py
+++ b/python/lbmpy_walberla/sweep_collection.py
@@ -28,7 +28,7 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli
                                   lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation,
                                   refinement_scaling=None, macroscopic_fields: Dict[str, Field] = None,
                                   target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None,
-                                  max_threads=None,
+                                  max_threads=None, set_pre_collision_pdfs=True,
                                   **create_kernel_params):
 
     config = config_from_context(ctx, target=target, data_type=data_type,
@@ -76,7 +76,7 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli
     config_unoptimized = replace(config, cpu_vectorize_info=None, cpu_prepend_optimizations=[], cpu_blocking=None)
 
     setter_family = get_setter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields,
-                                      config_unoptimized)
+                                      config_unoptimized, set_pre_collision_pdfs)
     setter_generator = kernel_family_function_generator('initialise', setter_family,
                                                         namespace='lbm', max_threads=max_threads)
     function_generators.append(setter_generator)
@@ -167,7 +167,8 @@ def lbm_kernel_family(class_name, kernel_name,
     return family
 
 
-def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields, config: CreateKernelConfig):
+def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields,
+                      config: CreateKernelConfig, set_pre_collision_pdfs: bool):
     dim = lb_method.stencil.D
     density = macroscopic_fields.get('density', 1.0)
     velocity = macroscopic_fields.get('velocity', [0.0] * dim)
@@ -184,7 +185,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
             timestep_suffix = str(timestep)
             setter = macroscopic_values_setter(lb_method,
                                                density=density, velocity=velocity, pdfs=pdfs,
-                                               streaming_pattern=streaming_pattern, previous_timestep=timestep)
+                                               streaming_pattern=streaming_pattern, previous_timestep=timestep,
+                                               set_pre_collision_pdfs=set_pre_collision_pdfs)
 
             if default_dtype != pdfs.dtype:
                 setter = add_subexpressions_for_field_reads(setter, data_type=default_dtype)
@@ -198,7 +200,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi
         timestep = Timestep.BOTH
         setter = macroscopic_values_setter(lb_method,
                                            density=density, velocity=velocity, pdfs=pdfs,
-                                           streaming_pattern=streaming_pattern, previous_timestep=timestep)
+                                           streaming_pattern=streaming_pattern, previous_timestep=timestep,
+                                           set_pre_collision_pdfs=set_pre_collision_pdfs)
 
         setter_ast = create_kernel(setter, config=config)
         setter_ast.function_name = 'kernel_initialise'
diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py
index 80b37a4a9..ea583181f 100644
--- a/python/lbmpy_walberla/walberla_lbm_package.py
+++ b/python/lbmpy_walberla/walberla_lbm_package.py
@@ -20,7 +20,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                          target: Target = Target.CPU,
                          data_type=None, pdfs_data_type=None,
                          cpu_openmp=None, cpu_vectorize_info=None,
-                         max_threads=None,
+                         max_threads=None, set_pre_collision_pdfs=True,
                          **kernel_parameters):
 
     if macroscopic_fields is None:
@@ -48,6 +48,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str,
                                   target=target, data_type=data_type,
                                   cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info,
                                   max_threads=max_threads,
+                                  set_pre_collision_pdfs=set_pre_collision_pdfs,
                                   **kernel_parameters)
 
     spatial_shape = None
diff --git a/tests/lbm_generated/Example.cpp b/tests/lbm_generated/Example.cpp
index 4dfd69b55..2e77ddcb2 100644
--- a/tests/lbm_generated/Example.cpp
+++ b/tests/lbm_generated/Example.cpp
@@ -177,14 +177,14 @@ int main(int argc, char** argv)
 
    StorageSpecification_T StorageSpec = StorageSpecification_T();
    BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2));
-   BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx);
+   BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx, uint_c(2));
 
    BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3));
 
    SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega);
    for (auto& block : *blocks)
    {
-      sweepCollection.initialise(&block);
+      sweepCollection.initialise(&block, cell_idx_c(1));
    }
 
    const FlagUID fluidFlagUID("Fluid");
diff --git a/tests/lbm_generated/FreeSlipRefinement.cpp b/tests/lbm_generated/FreeSlipRefinement.cpp
index 42d91a6ee..4a6d51a91 100644
--- a/tests/lbm_generated/FreeSlipRefinement.cpp
+++ b/tests/lbm_generated/FreeSlipRefinement.cpp
@@ -201,14 +201,14 @@ int main(int argc, char** argv)
 
    StorageSpecification_T StorageSpec = StorageSpecification_T();
    BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2));
-   BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx);
+   BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx, uint_c(2));
 
    BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3));
 
    SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega);
    for (auto& block : *blocks)
    {
-      sweepCollection.initialise(&block);
+      sweepCollection.initialise(&block, cell_idx_c(1));
    }
 
    const FlagUID fluidFlagUID("Fluid");
diff --git a/tests/lbm_generated/InterpolationNoSlip.py b/tests/lbm_generated/InterpolationNoSlip.py
index 62463033e..891892f43 100644
--- a/tests/lbm_generated/InterpolationNoSlip.py
+++ b/tests/lbm_generated/InterpolationNoSlip.py
@@ -48,6 +48,7 @@ with CodeGeneration() as ctx:
                          collision_rule=collision_rule,
                          lbm_config=lbm_config, lbm_optimisation=lbm_opt,
                          nonuniform=True, boundaries=[no_slip, no_slip_bouzidi, no_slip_quadraticbb, ubb],
-                         macroscopic_fields=macroscopic_fields, data_type=data_type)
+                         macroscopic_fields=macroscopic_fields, data_type=data_type,
+                         set_pre_collision_pdfs=False)
 
     generate_info_header(ctx, 'InterpolationNoSlipHeader')
-- 
GitLab