diff --git a/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py b/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py index e9ecd8375b583c1e40965c5f570183e9a5fdebbc..7992bcdd92bbbbd99cd4a3ad9a948dbcd8eb7e8d 100644 --- a/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py +++ b/apps/benchmarks/FluidParticleCoupling/GeneratedLBM.py @@ -10,6 +10,7 @@ from lbmpy.stencils import get_stencil with CodeGeneration() as ctx: + omegaVisc = sp.Symbol("omega_visc") omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx') omegaMagic = sp.Symbol("omega_magic") @@ -37,8 +38,8 @@ with CodeGeneration() as ctx: #print(methodWithForce.relaxation_rates) #print(methodWithForce.moment_matrix) - collision_rule = create_lb_collision_rule(lb_method=methodWithForce) - generate_lattice_model(ctx, 'GeneratedLBM', collision_rule) + collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True}) + generate_lattice_model(ctx, 'GeneratedLBM', collision_rule, field_layout='fzyx') diff --git a/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py b/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py index c63157fdab56e999fbd75fd818f95ddbc3c5a35e..9162d1cfeb55b6f850292ea3ce7fa6edac6c5aef 100644 --- a/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py +++ b/apps/benchmarks/FluidParticleCoupling/GeneratedLBMWithForce.py @@ -41,5 +41,5 @@ with CodeGeneration() as ctx: #print(methodWithForce.relaxation_rates) #print(methodWithForce.moment_matrix) - collision_rule = create_lb_collision_rule(lb_method=methodWithForce) - generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule) + collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True}) + generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule, field_layout='fzyx') diff --git a/python/lbmpy_walberla/boundary.py b/python/lbmpy_walberla/boundary.py index 1ca58b37a8da98f2c925a135aacc22b6f2b22124..89c0a7d97d8aea3c1693c2ad59b2c91143d7cf58 100644 --- a/python/lbmpy_walberla/boundary.py +++ b/python/lbmpy_walberla/boundary.py @@ -29,6 +29,7 @@ def generate_boundary(generation_context, class_name, boundary_object, lb_method kernel = create_lattice_boltzmann_boundary_kernel(pdf_field, index_field, lb_method, boundary_object, target=target, openmp=generation_context.openmp) kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D if lb_method.dim == 2: diff --git a/python/lbmpy_walberla/walberla_lbm_generation.py b/python/lbmpy_walberla/walberla_lbm_generation.py index 7e32867b05b1d8b9bd15b4da09b5c4d20d3cb3cd..a191c47e9a45d037a5110084127b05c140d7dcd7 100644 --- a/python/lbmpy_walberla/walberla_lbm_generation.py +++ b/python/lbmpy_walberla/walberla_lbm_generation.py @@ -122,7 +122,7 @@ def __lattice_model(generation_context, class_name, lb_method, stream_collide_as generation_context.write_file("{}.cpp".format(class_name), source) -def generate_lattice_model(generation_context, class_name, collision_rule, refinement_scaling=None, +def generate_lattice_model(generation_context, class_name, collision_rule, field_layout='zyxf', refinement_scaling=None, **create_kernel_params): # usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial @@ -138,20 +138,28 @@ def generate_lattice_model(generation_context, class_name, collision_rule, refin if create_kernel_params['target'] == 'gpu': raise ValueError("Lattice Models can only be generated for CPUs. To generate LBM on GPUs use sweeps directly") - src_field = ps.Field.create_generic('pdfs', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q,)) - dst_field = ps.Field.create_generic('pdfs_tmp', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q,)) + if field_layout == 'fzyx': + create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] = True + elif field_layout == 'zyxf': + create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] = False + + src_field = ps.Field.create_generic('pdfs', dim, dtype, index_dimensions=1, layout=field_layout, index_shape=(q,)) + dst_field = ps.Field.create_generic('pdfs_tmp', dim, dtype, index_dimensions=1, layout=field_layout, index_shape=(q,)) stream_collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, StreamPullTwoFieldsAccessor()) stream_collide_ast = create_kernel(stream_collide_update_rule, **create_kernel_params) stream_collide_ast.function_name = 'kernel_streamCollide' + stream_collide_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor()) collide_ast = create_kernel(collide_update_rule, **create_kernel_params) collide_ast.function_name = 'kernel_collide' + collide_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] - stream_update_rule = create_stream_pull_only_kernel(lb_method.stencil, None, 'pdfs', 'pdfs_tmp', 'fzyx', dtype) + stream_update_rule = create_stream_pull_only_kernel(lb_method.stencil, None, 'pdfs', 'pdfs_tmp', field_layout, dtype) stream_ast = create_kernel(stream_update_rule, **create_kernel_params) stream_ast.function_name = 'kernel_stream' + stream_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] __lattice_model(generation_context, class_name, lb_method, stream_collide_ast, collide_ast, stream_ast, refinement_scaling) diff --git a/python/pystencils_walberla/__init__.py b/python/pystencils_walberla/__init__.py index 88eb440a617713fdf636221c80485bfcd655cf48..b6c310169019dcc215268c60cd90e914dfed3648 100644 --- a/python/pystencils_walberla/__init__.py +++ b/python/pystencils_walberla/__init__.py @@ -2,8 +2,9 @@ from .boundary import generate_staggered_boundary, generate_staggered_flux_bound from .cmake_integration import CodeGeneration from .codegen import ( generate_pack_info, generate_pack_info_for_field, generate_pack_info_from_kernel, - generate_mpidtype_info_from_kernel, generate_sweep) + generate_mpidtype_info_from_kernel, generate_sweep, get_vectorize_instruction_set) __all__ = ['CodeGeneration', 'generate_sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field', 'generate_pack_info', - 'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary'] + 'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary', + 'get_vectorize_instruction_set'] diff --git a/python/pystencils_walberla/boundary.py b/python/pystencils_walberla/boundary.py index a0086890bb4bb47cb5b9d915886dcee38e2a906c..2df49896e38ae1c34f8266e9007369d1e28f3791 100644 --- a/python/pystencils_walberla/boundary.py +++ b/python/pystencils_walberla/boundary.py @@ -29,6 +29,7 @@ def generate_staggered_boundary(generation_context, class_name, boundary_object, kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target, openmp=generation_context.openmp) kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D if dim == 2: @@ -82,6 +83,7 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target, openmp=generation_context.openmp) kernel.function_name = "boundary_" + boundary_object.name + kernel.assumed_inner_stride_one = False # waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D if dim == 2: diff --git a/python/pystencils_walberla/codegen.py b/python/pystencils_walberla/codegen.py index fb665277b56b62a41063334a0cc5e2968cb1a7e4..7b94dc95548468a217d4ddcfb73b4f9aa072d32b 100644 --- a/python/pystencils_walberla/codegen.py +++ b/python/pystencils_walberla/codegen.py @@ -1,3 +1,4 @@ +import warnings from collections import OrderedDict, defaultdict from itertools import product from typing import Dict, Optional, Sequence, Tuple @@ -13,7 +14,8 @@ from pystencils.stencil import inverse_direction, offset_to_direction_string from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env __all__ = ['generate_sweep', 'generate_pack_info', 'generate_pack_info_for_field', 'generate_pack_info_from_kernel', - 'generate_mpidtype_info_from_kernel', 'default_create_kernel_parameters', 'KernelInfo'] + 'generate_mpidtype_info_from_kernel', 'default_create_kernel_parameters', 'KernelInfo', + 'get_vectorize_instruction_set'] def generate_sweep(generation_context, class_name, assignments, @@ -56,6 +58,7 @@ def generate_sweep(generation_context, class_name, assignments, ast = create_kernel(assignments, **create_kernel_params) else: ast = create_staggered_kernel(assignments, **create_kernel_params) + ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] def to_name(f): return f.name if isinstance(f, Field) else f @@ -222,15 +225,18 @@ def generate_pack_info(generation_context, class_name: str, pack_assignments = [Assignment(buffer(i), term) for i, term in enumerate(terms)] pack_ast = create_kernel(pack_assignments, **create_kernel_params, ghost_layers=0) pack_ast.function_name = 'pack_{}'.format("_".join(direction_strings)) + pack_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] unpack_assignments = [Assignment(term, buffer(i)) for i, term in enumerate(terms)] unpack_ast = create_kernel(unpack_assignments, **create_kernel_params, ghost_layers=0) unpack_ast.function_name = 'unpack_{}'.format("_".join(direction_strings)) + unpack_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] pack_kernels[direction_strings] = KernelInfo(pack_ast) unpack_kernels[direction_strings] = KernelInfo(unpack_ast) elements_per_cell[direction_strings] = len(terms) fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params) + fused_kernel.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] jinja_context = { 'class_name': class_name, @@ -322,26 +328,29 @@ class KernelInfo: self.parameters = ast.get_parameters() # cache parameters here -def default_create_kernel_parameters(generation_context, params): - default_dtype = "float64" if generation_context.double_accuracy else 'float32' - +def get_vectorize_instruction_set(generation_context): if generation_context.optimize_for_localhost: supported_instruction_sets = get_supported_instruction_sets() if supported_instruction_sets: - default_vec_is = get_supported_instruction_sets()[-1] + return supported_instruction_sets[-1] else: # if cpuinfo package is not installed - default_vec_is = 'sse' + warnings.warn("Could not obtain supported vectorization instruction sets - defaulting to sse") + return 'sse' else: - default_vec_is = None + return None + +def default_create_kernel_parameters(generation_context, params): + default_dtype = "float64" if generation_context.double_accuracy else 'float32' params['target'] = params.get('target', 'cpu') params['data_type'] = params.get('data_type', default_dtype) params['cpu_openmp'] = params.get('cpu_openmp', generation_context.openmp) params['cpu_vectorize_info'] = params.get('cpu_vectorize_info', {}) + default_vec_is = get_vectorize_instruction_set(generation_context) vec = params['cpu_vectorize_info'] vec['instruction_set'] = vec.get('instruction_set', default_vec_is) - vec['assume_inner_stride_one'] = True + vec['assume_inner_stride_one'] = vec.get('assume_inner_stride_one', True) vec['assume_aligned'] = vec.get('assume_aligned', False) vec['nontemporal'] = vec.get('nontemporal', False) return params diff --git a/python/pystencils_walberla/jinja_filters.py b/python/pystencils_walberla/jinja_filters.py index ee79ef903fae7add43ac4b791f0e5cdccf8cc718..b05eefd2297b2d3ab1f1c2e23100db3125e170e7 100644 --- a/python/pystencils_walberla/jinja_filters.py +++ b/python/pystencils_walberla/jinja_filters.py @@ -252,6 +252,8 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non coordinates = tuple(coordinates) kernel_call_lines.append("%s %s = %s->dataAt(%s, %s, %s, %s);" % ((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates)) + if ast.assumed_inner_stride_one and field.index_dimensions > 0: + kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (param.field_name,)) elif param.is_field_stride: casted_stride = get_field_stride(param) type_str = param.symbol.dtype.base_name @@ -265,6 +267,8 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non max_value = "%s->%sSizeWithGhostLayer()" % (field.name, ('x', 'y', 'z')[coord]) kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);" % (max_value, shape)) kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape)) + if ast.assumed_inner_stride_one and field.index_dimensions > 0: + kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (field.name,)) call_parameters = ", ".join([p.symbol.name for p in ast_params]) diff --git a/tests/lbm/CMakeLists.txt b/tests/lbm/CMakeLists.txt index 4d732d1cf5aa74d47533e1e377584fa4091e5c00..221d6ff7330bfa83a3f437aaaa5db25ae024dd0a 100644 --- a/tests/lbm/CMakeLists.txt +++ b/tests/lbm/CMakeLists.txt @@ -72,13 +72,23 @@ waLBerla_execute_test( NAME SuViscoelasticityTest COMMAND $<TARGET_FILE:SuVisco # Code Generation if( WALBERLA_BUILD_WITH_CODEGEN ) + waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGenerated FILE codegen/LbCodeGenerationExample.py OUT_FILES LbCodeGenerationExample_LatticeModel.cpp LbCodeGenerationExample_LatticeModel.h LbCodeGenerationExample_NoSlip.cpp LbCodeGenerationExample_NoSlip.h LbCodeGenerationExample_UBB.cpp LbCodeGenerationExample_UBB.h ) waLBerla_compile_test( FILES codegen/LbCodeGenerationExample.cpp DEPENDS LbCodeGenerationExampleGenerated) + waLBerla_generate_target_from_python(NAME FluctuatingMRTGenerated FILE codegen/FluctuatingMRT.py OUT_FILES FluctuatingMRT_LatticeModel.cpp FluctuatingMRT_LatticeModel.h ) waLBerla_compile_test( FILES codegen/FluctuatingMRT.cpp DEPENDS FluctuatingMRTGenerated) + +waLBerla_generate_target_from_python(NAME FieldLayoutAndVectorizationTestGenerated FILE codegen/FieldLayoutAndVectorizationTest.py + OUT_FILES FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.cpp FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.h + FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.h + FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.h + FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h ) +waLBerla_compile_test( FILES codegen/FieldLayoutAndVectorizationTest.cpp DEPENDS FieldLayoutAndVectorizationTestGenerated) + endif() \ No newline at end of file diff --git a/tests/lbm/codegen/FieldLayoutAndVectorizationTest.cpp b/tests/lbm/codegen/FieldLayoutAndVectorizationTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..09ff983b2325f63dd4401216c43658b5bb1fce30 --- /dev/null +++ b/tests/lbm/codegen/FieldLayoutAndVectorizationTest.cpp @@ -0,0 +1,127 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldLayoutAndVectorizationTest.cpp +//! \author Christoph Rettinger <christoph.rettinger@fau.de> +// +//====================================================================================================================== + +#include "blockforest/Initialization.h" + +#include "core/DataTypes.h" +#include "core/Environment.h" +#include "core/debug/Debug.h" +#include "core/debug/TestSubsystem.h" +#include "core/math/all.h" +#include "core/logging/all.h" + +#include "lbm/field/AddToStorage.h" +#include "lbm/field/PdfField.h" + +#include "FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.h" +#include "FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.h" +#include "FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.h" +#include "FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h" + +namespace field_layout_vectorization_test { + +using namespace walberla; + +typedef walberla::uint8_t flag_t; +typedef FlagField<flag_t> FlagField_T; + +template<typename LM1_T, typename LM2_T> +void checkEquivalence(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID pdfField1ID, BlockDataID pdfField2ID, std::string identifier) +{ + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { + auto pdfField1 = blockIt->getData<lbm::PdfField<LM1_T> >(pdfField1ID); + auto pdfField2 = blockIt->getData<lbm::PdfField<LM2_T> >(pdfField2ID); + + WALBERLA_FOR_ALL_CELLS_XYZ(pdfField1, + for( uint_t f = 0; f < pdfField1->fSize(); ++f) + { + WALBERLA_CHECK_FLOAT_EQUAL(pdfField1->get(x,y,z,f), pdfField2->get(x,y,z,f), + identifier << " - mismatch in " << x << " " << y << " " << z << " " << f); + } + ) + + } + +} + +int main(int argc, char **argv) { + + debug::enterTestMode(); + + mpi::Environment env( argc, argv ); + + walberla::Environment walberlaEnv(argc, argv); + + auto blocks = blockforest::createUniformBlockGrid( 1, 1, 1, + 32, 32, 32, real_t(1), + 0, false, false, + true, true, true, //periodicity + false ); + + + real_t omega = real_t(1.6); + Vector3<real_t> initialVel(real_t(0.1), real_t(0), real_t(0)); + + using LatticeModel1_T = lbm::FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel; + LatticeModel1_T latticeModel1(omega); + BlockDataID pdfField1ID = lbm::addPdfFieldToStorage(blocks, "pdf field1", latticeModel1, initialVel, real_t(1), field::fzyx); + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { + auto sweep = LatticeModel1_T::Sweep(pdfField1ID); + sweep(&(*blockIt)); + } + + using LatticeModel2_T = lbm::FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel; + LatticeModel2_T latticeModel2(omega); + BlockDataID pdfField2ID = lbm::addPdfFieldToStorage(blocks, "pdf field2", latticeModel2, initialVel, real_t(1), field::fzyx); + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { + auto sweep = LatticeModel2_T::Sweep(pdfField2ID); + sweep(&(*blockIt)); + } + + checkEquivalence<LatticeModel1_T,LatticeModel2_T>(blocks, pdfField1ID, pdfField2ID, "1 vs 2"); + + using LatticeModel3_T = lbm::FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel; + LatticeModel3_T latticeModel3(omega); + BlockDataID pdfField3ID = lbm::addPdfFieldToStorage(blocks, "pdf field3", latticeModel3, initialVel, real_t(1), field::zyxf); + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { + auto sweep = LatticeModel3_T::Sweep(pdfField3ID); + sweep(&(*blockIt)); + } + + checkEquivalence<LatticeModel2_T,LatticeModel3_T>(blocks, pdfField2ID, pdfField3ID, "2 vs 3"); + + using LatticeModel4_T = lbm::FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel; + LatticeModel4_T latticeModel4(omega); + BlockDataID pdfField4ID = lbm::addPdfFieldToStorage(blocks, "pdf field4", latticeModel4, initialVel, real_t(1), field::zyxf); + for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) { + auto sweep = LatticeModel4_T::Sweep(pdfField4ID); + sweep(&(*blockIt)); + } + + checkEquivalence<LatticeModel3_T,LatticeModel4_T>(blocks, pdfField3ID, pdfField4ID, "3 vs 4"); + + return EXIT_SUCCESS; +} + +} //namespace field_layout_vectorization_test + +int main( int argc, char **argv ){ + field_layout_vectorization_test::main(argc, argv); +} diff --git a/tests/lbm/codegen/FieldLayoutAndVectorizationTest.py b/tests/lbm/codegen/FieldLayoutAndVectorizationTest.py new file mode 100644 index 0000000000000000000000000000000000000000..5e43eee556cbb985a524201565bc4a0abfd1b73b --- /dev/null +++ b/tests/lbm/codegen/FieldLayoutAndVectorizationTest.py @@ -0,0 +1,25 @@ +import sympy as sp +from lbmpy.creationfunctions import create_lb_collision_rule +from pystencils_walberla import CodeGeneration +from lbmpy_walberla import generate_lattice_model +from pystencils_walberla import get_vectorize_instruction_set + +from collections import namedtuple + +with CodeGeneration() as ctx: + omega_shear = sp.symbols("omega") + collision_rule = create_lb_collision_rule( stencil='D2Q9', compressible=False, method='srt' ) + + SetupDefinition = namedtuple('SetupDefinition', ['name', 'field_layout', 'vectorization_dict']) + + default_vectorize_instruction_set = get_vectorize_instruction_set(ctx) + + configurations = [SetupDefinition('FZYX_Vec', 'fzyx', {'instruction_set': default_vectorize_instruction_set} ), + SetupDefinition('FZYX_NoVec', 'fzyx', {'instruction_set': None} ), + SetupDefinition('ZYXF_Vec', 'zyxf', {'instruction_set': default_vectorize_instruction_set} ), # does/should not vectorize, but instead yield warning + SetupDefinition('ZYXF_NoVec', 'zyxf', {'instruction_set': None} )] + + for conf in configurations: + generate_lattice_model(ctx, 'FieldLayoutAndVectorizationTest_'+conf.name+'_LatticeModel', collision_rule, + field_layout=conf.field_layout, refinement_scaling=None, cpu_vectorize_info=conf.vectorization_dict) + diff --git a/tests/lbm/codegen/FluctuatingMRT.py b/tests/lbm/codegen/FluctuatingMRT.py index f6b0074f27e6dee6e38c4828eb3161146e67de98..a5ffa9bafb0c998bec3908f88d89ce0c463519b5 100644 --- a/tests/lbm/codegen/FluctuatingMRT.py +++ b/tests/lbm/codegen/FluctuatingMRT.py @@ -9,7 +9,7 @@ from lbmpy_walberla import generate_lattice_model with CodeGeneration() as ctx: omega_shear = sp.symbols("omega_shear") temperature = sp.symbols("temperature") - force_field, vel_field = ps.fields("force(3), velocity(3): [3D]", layout='fzyx') + force_field, vel_field = ps.fields("force(3), velocity(3): [3D]", layout='zyxf') def rr_getter(moment_group): is_shear = [is_shear_moment(m, 3) for m in moment_group] diff --git a/tests/lbm/codegen/LbCodeGenerationExample.py b/tests/lbm/codegen/LbCodeGenerationExample.py index 67aed714ee565b149adc0aacd0f5f07c8eb5e054..259a9787e017e9d44d0150aa3816f646403bcb0f 100644 --- a/tests/lbm/codegen/LbCodeGenerationExample.py +++ b/tests/lbm/codegen/LbCodeGenerationExample.py @@ -7,7 +7,7 @@ from lbmpy_walberla import RefinementScaling, generate_boundary, generate_lattic with CodeGeneration() as ctx: omega, omega_free = sp.symbols("omega, omega_free") - force_field, vel_field, omega_out = ps.fields("force(3), velocity(3), omega_out: [3D]", layout='fzyx') + force_field, vel_field, omega_out = ps.fields("force(3), velocity(3), omega_out: [3D]", layout='zyxf') # the collision rule of the LB method where the some advanced features collision_rule = create_lb_collision_rule(