diff --git a/python/lbmpy_walberla/boundary.py b/python/lbmpy_walberla/boundary.py index 1ca58b37a8da98f2c925a135aacc22b6f2b22124..89c0a7d97d8aea3c1693c2ad59b2c91143d7cf58 100644 --- a/python/lbmpy_walberla/boundary.py +++ b/python/lbmpy_walberla/boundary.py @@ -29,6 +29,7 @@ def generate_boundary(generation_context, class_name, boundary_object, lb_method kernel = create_lattice_boltzmann_boundary_kernel(pdf_field, index_field, lb_method, boundary_object, target=target, openmp=generation_context.openmp) kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D if lb_method.dim == 2: diff --git a/python/lbmpy_walberla/walberla_lbm_generation.py b/python/lbmpy_walberla/walberla_lbm_generation.py index b234c8b3351c956ec775c4603f395ed8fb806b10..a191c47e9a45d037a5110084127b05c140d7dcd7 100644 --- a/python/lbmpy_walberla/walberla_lbm_generation.py +++ b/python/lbmpy_walberla/walberla_lbm_generation.py @@ -149,14 +149,17 @@ def generate_lattice_model(generation_context, class_name, collision_rule, field stream_collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, StreamPullTwoFieldsAccessor()) stream_collide_ast = create_kernel(stream_collide_update_rule, **create_kernel_params) stream_collide_ast.function_name = 'kernel_streamCollide' + stream_collide_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor()) collide_ast = create_kernel(collide_update_rule, **create_kernel_params) collide_ast.function_name = 'kernel_collide' + collide_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] stream_update_rule = create_stream_pull_only_kernel(lb_method.stencil, None, 'pdfs', 'pdfs_tmp', field_layout, dtype) stream_ast = create_kernel(stream_update_rule, **create_kernel_params) stream_ast.function_name = 'kernel_stream' + stream_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] __lattice_model(generation_context, class_name, lb_method, stream_collide_ast, collide_ast, stream_ast, refinement_scaling) diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py index a0086890bb4bb47cb5b9d915886dcee38e2a906c..2df49896e38ae1c34f8266e9007369d1e28f3791 100644 --- a/python/pystencils_walberla/boundary.py +++ b/python/pystencils_walberla/boundary.py @@ -29,6 +29,7 @@ def generate_staggered_boundary(generation_context, class_name, boundary_object, kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target, openmp=generation_context.openmp) kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D if dim == 2: @@ -82,6 +83,7 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target, openmp=generation_context.openmp) kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D if dim == 2: diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py index 692377ffbbe6920fe5e3df6fe6e4a3857587ccd8..d73b502c570f23d049224588fc66dc9e6e4024f6 100644 --- a/python/pystencils_walberla/codegen.py +++ b/python/pystencils_walberla/codegen.py @@ -57,6 +57,7 @@ def generate_sweep(generation_context, class_name, assignments, ast = create_kernel(assignments, **create_kernel_params) else: ast = create_staggered_kernel(assignments, **create_kernel_params) + ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] def to_name(f): return f.name if isinstance(f, Field) else f @@ -223,15 +224,18 @@ def generate_pack_info(generation_context, class_name: str, pack_assignments = [Assignment(buffer(i), term) for i, term in enumerate(terms)] pack_ast = create_kernel(pack_assignments, **create_kernel_params, ghost_layers=0) pack_ast.function_name = 'pack_{}'.format("_".join(direction_strings)) + pack_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] unpack_assignments = [Assignment(term, buffer(i)) for i, term in enumerate(terms)] unpack_ast = create_kernel(unpack_assignments, **create_kernel_params, ghost_layers=0) unpack_ast.function_name = 'unpack_{}'.format("_".join(direction_strings)) + unpack_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] pack_kernels[direction_strings] = KernelInfo(pack_ast) unpack_kernels[direction_strings] = KernelInfo(unpack_ast) elements_per_cell[direction_strings] = len(terms) fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params) + fused_kernel.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] jinja_context = { 'class_name': class_name, diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py index ee79ef903fae7add43ac4b791f0e5cdccf8cc718..656cd022e1132da53d248de4631bc989d2ac8110 100644 --- a/python/pystencils_walberla/jinja_filters.py +++ b/python/pystencils_walberla/jinja_filters.py @@ -252,6 +252,8 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non coordinates = tuple(coordinates) kernel_call_lines.append("%s %s = %s->dataAt(%s, %s, %s, %s);" % ((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates)) + if ast.assumed_inner_stride_one: + kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (param.field_name,)) elif param.is_field_stride: casted_stride = get_field_stride(param) type_str = param.symbol.dtype.base_name @@ -265,6 +267,8 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non max_value = "%s->%sSizeWithGhostLayer()" % (field.name, ('x', 'y', 'z')[coord]) kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);" % (max_value, shape)) kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape)) + if ast.assumed_inner_stride_one: + kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (field.name,)) call_parameters = ", ".join([p.symbol.name for p in ast_params])