diff --git a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp index 6f6e0378176c28468f09b359932ea9d78152ec24..34fde13e360e3190c3ec811b0f810432f47adf00 100644 --- a/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp +++ b/apps/benchmarks/NonUniformGridCPU/NonUniformGridCPU.cpp @@ -132,7 +132,7 @@ int main(int argc, char** argv) SweepCollection_T sweepCollection(blocks, pdfFieldID, densityFieldID, velFieldID, omega, innerOuterSplit); for (auto& block : *blocks) { - sweepCollection.initialise(&block, 2); + sweepCollection.initialise(&block, cell_idx_c(1)); } WALBERLA_MPI_BARRIER() WALBERLA_LOG_INFO_ON_ROOT("Initialisation done") diff --git a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp index e76c0b51184e8486d9d688a498c9ac4f65dd86b6..233103342cdde4e21ab443cd3aadbece48b42c85 100644 --- a/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp +++ b/apps/benchmarks/NonUniformGridGPU/NonUniformGridGPU.cpp @@ -190,7 +190,7 @@ int main(int argc, char** argv) SweepCollection_T sweepCollection(blocks, pdfFieldGpuID, densityFieldGpuID, velFieldGpuID, gpuBlockSize[0], gpuBlockSize[1], gpuBlockSize[2], omega, innerOuterSplit); for (auto& iBlock : *blocks) { - sweepCollection.initialise(&iBlock, 2, nullptr); + sweepCollection.initialise(&iBlock, cell_idx_c(1), nullptr); } WALBERLA_GPU_CHECK(gpuDeviceSynchronize()) WALBERLA_GPU_CHECK(gpuPeekAtLastError()) diff --git a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py index ae9ec4f1bd6e26a8474099cd6f03d4c40f114854..676d5f0f4f45b0dfdf0d39a108abbbd73b8a7e47 100644 --- a/apps/benchmarks/UniformGridCPU/UniformGridCPU.py +++ b/apps/benchmarks/UniformGridCPU/UniformGridCPU.py @@ -152,7 +152,8 @@ with CodeGeneration() as ctx: lbm_config=lbm_config, lbm_optimisation=lbm_opt, nonuniform=False, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, - cpu_openmp=openmp, cpu_vectorize_info=cpu_vec) + cpu_openmp=openmp, cpu_vectorize_info=cpu_vec, + set_pre_collision_pdfs=False) # Stream only kernel generate_sweep(ctx, 'UniformGridCPU_StreamOnlyKernel', stream_only_kernel, diff --git a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py index 3d7579e5bcb3f3713f59a9afd94d7fed790c21e9..6d7d7648bdf18729b3f7206828902b1115509199 100644 --- a/apps/benchmarks/UniformGridGPU/UniformGridGPU.py +++ b/apps/benchmarks/UniformGridGPU/UniformGridGPU.py @@ -168,7 +168,7 @@ with CodeGeneration() as ctx: nonuniform=False, boundaries=[no_slip, ubb], macroscopic_fields=macroscopic_fields, target=ps.Target.GPU, gpu_indexing_params=gpu_indexing_params, - max_threads=max_threads) + max_threads=max_threads, set_pre_collision_pdfs=False) # Stream only kernel vp = [('int32_t', 'cudaBlockSize0'), ('int32_t', 'cudaBlockSize1'), ('int32_t', 'cudaBlockSize2')] diff --git a/python/lbmpy_walberla/sweep_collection.py b/python/lbmpy_walberla/sweep_collection.py index 5fe4892ab3bc9740f3ca04775363586b42134e96..bc8bdda49dcb88f897f7fa1ce23c9a9b101660a3 100644 --- a/python/lbmpy_walberla/sweep_collection.py +++ b/python/lbmpy_walberla/sweep_collection.py @@ -28,7 +28,7 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli lbm_config: LBMConfig, lbm_optimisation: LBMOptimisation, refinement_scaling=None, macroscopic_fields: Dict[str, Field] = None, target=Target.CPU, data_type=None, cpu_openmp=None, cpu_vectorize_info=None, - max_threads=None, + max_threads=None, set_pre_collision_pdfs=True, **create_kernel_params): config = config_from_context(ctx, target=target, data_type=data_type, @@ -76,7 +76,7 @@ def generate_lbm_sweep_collection(ctx, class_name: str, collision_rule: LbmColli config_unoptimized = replace(config, cpu_vectorize_info=None, cpu_prepend_optimizations=[], cpu_blocking=None) setter_family = get_setter_family(class_name, lb_method, src_field, streaming_pattern, macroscopic_fields, - config_unoptimized) + config_unoptimized, set_pre_collision_pdfs) setter_generator = kernel_family_function_generator('initialise', setter_family, namespace='lbm', max_threads=max_threads) function_generators.append(setter_generator) @@ -167,7 +167,8 @@ def lbm_kernel_family(class_name, kernel_name, return family -def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields, config: CreateKernelConfig): +def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopic_fields, + config: CreateKernelConfig, set_pre_collision_pdfs: bool): dim = lb_method.stencil.D density = macroscopic_fields.get('density', 1.0) velocity = macroscopic_fields.get('velocity', [0.0] * dim) @@ -184,7 +185,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi timestep_suffix = str(timestep) setter = macroscopic_values_setter(lb_method, density=density, velocity=velocity, pdfs=pdfs, - streaming_pattern=streaming_pattern, previous_timestep=timestep) + streaming_pattern=streaming_pattern, previous_timestep=timestep, + set_pre_collision_pdfs=set_pre_collision_pdfs) if default_dtype != pdfs.dtype: setter = add_subexpressions_for_field_reads(setter, data_type=default_dtype) @@ -198,7 +200,8 @@ def get_setter_family(class_name, lb_method, pdfs, streaming_pattern, macroscopi timestep = Timestep.BOTH setter = macroscopic_values_setter(lb_method, density=density, velocity=velocity, pdfs=pdfs, - streaming_pattern=streaming_pattern, previous_timestep=timestep) + streaming_pattern=streaming_pattern, previous_timestep=timestep, + set_pre_collision_pdfs=set_pre_collision_pdfs) setter_ast = create_kernel(setter, config=config) setter_ast.function_name = 'kernel_initialise' diff --git a/python/lbmpy_walberla/walberla_lbm_package.py b/python/lbmpy_walberla/walberla_lbm_package.py index 80b37a4a90f717e79ea60890a802a363c45dde52..ea583181f6c4165863d68a703acd341c8c41d71e 100644 --- a/python/lbmpy_walberla/walberla_lbm_package.py +++ b/python/lbmpy_walberla/walberla_lbm_package.py @@ -20,7 +20,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str, target: Target = Target.CPU, data_type=None, pdfs_data_type=None, cpu_openmp=None, cpu_vectorize_info=None, - max_threads=None, + max_threads=None, set_pre_collision_pdfs=True, **kernel_parameters): if macroscopic_fields is None: @@ -48,6 +48,7 @@ def generate_lbm_package(ctx: CodeGenerationContext, name: str, target=target, data_type=data_type, cpu_openmp=cpu_openmp, cpu_vectorize_info=cpu_vectorize_info, max_threads=max_threads, + set_pre_collision_pdfs=set_pre_collision_pdfs, **kernel_parameters) spatial_shape = None diff --git a/tests/lbm_generated/Example.cpp b/tests/lbm_generated/Example.cpp index 4dfd69b553d88d268efb0c49c857eb391f6277ea..2e77ddcb2bfa24c924553afdf71fc5b8081a49ab 100644 --- a/tests/lbm_generated/Example.cpp +++ b/tests/lbm_generated/Example.cpp @@ -177,14 +177,14 @@ int main(int argc, char** argv) StorageSpecification_T StorageSpec = StorageSpecification_T(); BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2)); - BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx); + BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx, uint_c(2)); BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3)); SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega); for (auto& block : *blocks) { - sweepCollection.initialise(&block); + sweepCollection.initialise(&block, cell_idx_c(1)); } const FlagUID fluidFlagUID("Fluid"); diff --git a/tests/lbm_generated/FreeSlipRefinement.cpp b/tests/lbm_generated/FreeSlipRefinement.cpp index 42d91a6eecbd9ad8e25a88c9a4dd4c70986d347e..4a6d51a91ef1419a703596ffbd79cabb4f95cc32 100644 --- a/tests/lbm_generated/FreeSlipRefinement.cpp +++ b/tests/lbm_generated/FreeSlipRefinement.cpp @@ -201,14 +201,14 @@ int main(int argc, char** argv) StorageSpecification_T StorageSpec = StorageSpecification_T(); BlockDataID pdfFieldId = lbm_generated::addPdfFieldToStorage(blocks, "pdf field", StorageSpec, uint_c(2)); - BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx); + BlockDataID velFieldId = field::addToStorage< VectorField_T >(blocks, "Velocity", real_c(0.0), field::fzyx, uint_c(2)); BlockDataID flagFieldId = field::addFlagFieldToStorage< FlagField_T >(blocks, "flag field", uint_c(3)); SweepCollection_T sweepCollection(blocks, pdfFieldId, velFieldId, omega); for (auto& block : *blocks) { - sweepCollection.initialise(&block); + sweepCollection.initialise(&block, cell_idx_c(1)); } const FlagUID fluidFlagUID("Fluid"); diff --git a/tests/lbm_generated/InterpolationNoSlip.py b/tests/lbm_generated/InterpolationNoSlip.py index 62463033edaab74efff765a7e412a273b44bccbf..891892f43c3a5f0acbf187cd064ae0f85b559036 100644 --- a/tests/lbm_generated/InterpolationNoSlip.py +++ b/tests/lbm_generated/InterpolationNoSlip.py @@ -48,6 +48,7 @@ with CodeGeneration() as ctx: collision_rule=collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_opt, nonuniform=True, boundaries=[no_slip, no_slip_bouzidi, no_slip_quadraticbb, ubb], - macroscopic_fields=macroscopic_fields, data_type=data_type) + macroscopic_fields=macroscopic_fields, data_type=data_type, + set_pre_collision_pdfs=False) generate_info_header(ctx, 'InterpolationNoSlipHeader')