From f9cee3f38b23e962535dafb9f362923756f88bab Mon Sep 17 00:00:00 2001 From: Markus Holzer <markus.holzer@fau.de> Date: Mon, 29 Apr 2024 14:40:26 +0200 Subject: [PATCH] [Fix] PDF initialisation --- .../NonUniformGridCPU/NonUniformGridCPU.cpp | 2 +- .../NonUniformGridGPU/NonUniformGridGPU.cpp | 2 +- apps/benchmarks/UniformGridCPU/UniformGridCPU.py | 3 ++- apps/benchmarks/UniformGridGPU/UniformGridGPU.py | 2 +- python/lbmpy_walberla/sweep_collection.py | 13 ++++++++----- python/lbmpy_walberla/walberla_lbm_package.py | 3 ++- tests/lbm_generated/Example.cpp | 4 ++-- tests/lbm_generated/FreeSlipRefinement.cpp | 4 ++-- tests/lbm_generated/InterpolationNoSlip.py | 3 ++- 9 files changed, 21 insertions(+), 15 deletions(-) diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp index 6f6e03781..34fde13e3 100644 --- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp +++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp @@ -132,7 +132,7 @@ int main(int argc, char** argv) SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit); for (auto& block : *blocks) { - sweepCollection.initialise(&block, 2); + sweepCollection.initialise(&block, cell_idx_c(1)); } WALBERLA_MPI_BARRIER() WALBERLA_LOG_INFO_ON_ROOT("Initialisation done") diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp index e76c0b511..233103342 100644 --- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp +++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp @@ -190,7 +190,7 @@ int main(int argc, char** argv) SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit); for (auto& iBlock : *blocks) { - sweepCollection.initialise(&iBlock, 2, nullptr); + sweepCollection.initialise(&iBlock, cell_idx_c(1), nullptr); } WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) WALBERLA_GPU_CHECK(gpuPeekAtLastError()) diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py index ae9ec4f1b..676d5f0f4 100644 --- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py +++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py @@ -152,7 +152,8 @@ with CodeGeneration() as ctx: lbm_config=lbm_config, lbm_optimisation=lbm_opt, nonuniform=False, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, - cpu_openmp=openmp, cpu_vectorize_info=cpu_vec) + cpu_openmp=openmp, cpu_vectorize_info=cpu_vec, + set_pre_collision_pdfs=False) # Stream only kernel generate_sweep(ctx, 'UniformGridCPU_StreamOnlyKernel', stream_only_kernel, diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py index 3d7579e5b..6d7d7648b 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py @@ -168,7 +168,7 @@ with CodeGeneration() as ctx: nonuniform=False, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params, - max_threads=max_threads) + max_threads=max_threads, set_pre_collision_pdfs=False) # Stream only kernel vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), ('int32_t', 'cudaBlockSize2')] diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py index 5fe4892ab..bc8bdda49 100644 --- a/python/lbmpy_walberla/sweep_collection.py +++ b/python/lbmpy_walberla/sweep_collection.py @@ -28,7 +28,7 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation, refinement_scaling=None, macroscopic_fields: Dict[str, Field] = None, target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None, - max_threads=None, + max_threads=None, set_pre_collision_pdfs=True, **create_kernel_params): config = config_from_context(ctx, target=target, data_type=data_type, @@ -76,7 +76,7 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli config_unoptimized = replace(config, cpu_vectorize_info=None, cpu_prepend_optimizations=[], cpu_blocking=None) setter_family = get_setter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields, - config_unoptimized) + config_unoptimized, set_pre_collision_pdfs) setter_generator = kernel_family_function_generator('initialise', setter_family, namespace='lbm', max_threads=max_threads) function_generators.append(setter_generator) @@ -167,7 +167,8 @@ def lbm_kernel_family(class_name, kernel_name, return family -def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields, config: CreateKernelConfig): +def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields, + config: CreateKernelConfig, set_pre_collision_pdfs: bool): dim = lb_method.stencil.D density = macroscopic_fields.get('density', 1.0) velocity = macroscopic_fields.get('velocity', [0.0] * dim) @@ -184,7 +185,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi timestep_suffix = str(timestep) setter = macroscopic_values_setter(lb_method, density=density, velocity=velocity, pdfs=pdfs, - streaming_pattern=streaming_pattern, previous_timestep=timestep) + streaming_pattern=streaming_pattern, previous_timestep=timestep, + set_pre_collision_pdfs=set_pre_collision_pdfs) if default_dtype != pdfs.dtype: setter = add_subexpressions_for_field_reads(setter, data_type=default_dtype) @@ -198,7 +200,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi timestep = Timestep.BOTH setter = macroscopic_values_setter(lb_method, density=density, velocity=velocity, pdfs=pdfs, - streaming_pattern=streaming_pattern, previous_timestep=timestep) + streaming_pattern=streaming_pattern, previous_timestep=timestep, + set_pre_collision_pdfs=set_pre_collision_pdfs) setter_ast = create_kernel(setter, config=config) setter_ast.function_name = 'kernel_initialise' diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py index 80b37a4a9..ea583181f 100644 --- a/python/lbmpy_walberla/walberla_lbm_package.py +++ b/python/lbmpy_walberla/walberla_lbm_package.py @@ -20,7 +20,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str, target: Target = Target.CPU, data_type=None, pdfs_data_type=None, cpu_openmp=None, cpu_vectorize_info=None, - max_threads=None, + max_threads=None, set_pre_collision_pdfs=True, **kernel_parameters): if macroscopic_fields is None: @@ -48,6 +48,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str, target=target, data_type=data_type, cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info, max_threads=max_threads, + set_pre_collision_pdfs=set_pre_collision_pdfs, **kernel_parameters) spatial_shape = None diff --git a/tests/lbm_generated/Example.cpp b/tests/lbm_generated/Example.cpp index 4dfd69b55..2e77ddcb2 100644 --- a/tests/lbm_generated/Example.cpp +++ b/tests/lbm_generated/Example.cpp @@ -177,14 +177,14 @@ int main(int argc, char** argv) StorageSpecification_T StorageSpec = StorageSpecification_T(); BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2)); - BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx); + BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx, uint_c(2)); BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3)); SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega); for (auto& block : *blocks) { - sweepCollection.initialise(&block); + sweepCollection.initialise(&block, cell_idx_c(1)); } const FlagUID fluidFlagUID("Fluid"); diff --git a/tests/lbm_generated/FreeSlipRefinement.cpp b/tests/lbm_generated/FreeSlipRefinement.cpp index 42d91a6ee..4a6d51a91 100644 --- a/tests/lbm_generated/FreeSlipRefinement.cpp +++ b/tests/lbm_generated/FreeSlipRefinement.cpp @@ -201,14 +201,14 @@ int main(int argc, char** argv) StorageSpecification_T StorageSpec = StorageSpecification_T(); BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2)); - BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx); + BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx, uint_c(2)); BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3)); SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega); for (auto& block : *blocks) { - sweepCollection.initialise(&block); + sweepCollection.initialise(&block, cell_idx_c(1)); } const FlagUID fluidFlagUID("Fluid"); diff --git a/tests/lbm_generated/InterpolationNoSlip.py b/tests/lbm_generated/InterpolationNoSlip.py index 62463033e..891892f43 100644 --- a/tests/lbm_generated/InterpolationNoSlip.py +++ b/tests/lbm_generated/InterpolationNoSlip.py @@ -48,6 +48,7 @@ with CodeGeneration() as ctx: collision_rule=collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_opt, nonuniform=True, boundaries=[no_slip, no_slip_bouzidi, no_slip_quadraticbb, ubb], - macroscopic_fields=macroscopic_fields, data_type=data_type) + macroscopic_fields=macroscopic_fields, data_type=data_type, + set_pre_collision_pdfs=False) generate_info_header(ctx, 'InterpolationNoSlipHeader') -- GitLab