Commit 64a4d686 authored by Michael Kuron's avatar Michael Kuron 🔬

Merge branch 'codegen_layout' into 'master'

Allowed different field layouts in LBM codegen

Closes #119

See merge request !294
parents 8347ddc8 1aad0dcb
Pipeline #24575 passed with stages
in 302 minutes and 48 seconds
......@@ -10,6 +10,7 @@ from lbmpy.stencils import get_stencil
with CodeGeneration() as ctx:
omegaVisc = sp.Symbol("omega_visc")
omegaBulk = ps.fields("omega_bulk: [3D]", layout='fzyx')
omegaMagic = sp.Symbol("omega_magic")
......@@ -37,8 +38,8 @@ with CodeGeneration() as ctx:
#print(methodWithForce.relaxation_rates)
#print(methodWithForce.moment_matrix)
collision_rule = create_lb_collision_rule(lb_method=methodWithForce)
generate_lattice_model(ctx, 'GeneratedLBM', collision_rule)
collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
generate_lattice_model(ctx, 'GeneratedLBM', collision_rule, field_layout='fzyx')
......
......@@ -41,5 +41,5 @@ with CodeGeneration() as ctx:
#print(methodWithForce.relaxation_rates)
#print(methodWithForce.moment_matrix)
collision_rule = create_lb_collision_rule(lb_method=methodWithForce)
generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule)
collision_rule = create_lb_collision_rule(lb_method=methodWithForce, optimization={'cse_global': True})
generate_lattice_model(ctx, 'GeneratedLBMWithForce', collision_rule, field_layout='fzyx')
......@@ -29,6 +29,7 @@ def generate_boundary(generation_context, class_name, boundary_object, lb_method
kernel = create_lattice_boltzmann_boundary_kernel(pdf_field, index_field, lb_method, boundary_object, target=target,
openmp=generation_context.openmp)
kernel.function_name = "boundary_" + boundary_object.name
kernel.assumed_inner_stride_one = False
# waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D
if lb_method.dim == 2:
......
......@@ -122,7 +122,7 @@ def __lattice_model(generation_context, class_name, lb_method, stream_collide_as
generation_context.write_file("{}.cpp".format(class_name), source)
def generate_lattice_model(generation_context, class_name, collision_rule, refinement_scaling=None,
def generate_lattice_model(generation_context, class_name, collision_rule, field_layout='zyxf', refinement_scaling=None,
**create_kernel_params):
# usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial
......@@ -138,20 +138,28 @@ def generate_lattice_model(generation_context, class_name, collision_rule, refin
if create_kernel_params['target'] == 'gpu':
raise ValueError("Lattice Models can only be generated for CPUs. To generate LBM on GPUs use sweeps directly")
src_field = ps.Field.create_generic('pdfs', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q,))
dst_field = ps.Field.create_generic('pdfs_tmp', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q,))
if field_layout == 'fzyx':
create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] = True
elif field_layout == 'zyxf':
create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one'] = False
src_field = ps.Field.create_generic('pdfs', dim, dtype, index_dimensions=1, layout=field_layout, index_shape=(q,))
dst_field = ps.Field.create_generic('pdfs_tmp', dim, dtype, index_dimensions=1, layout=field_layout, index_shape=(q,))
stream_collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, StreamPullTwoFieldsAccessor())
stream_collide_ast = create_kernel(stream_collide_update_rule, **create_kernel_params)
stream_collide_ast.function_name = 'kernel_streamCollide'
stream_collide_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor())
collide_ast = create_kernel(collide_update_rule, **create_kernel_params)
collide_ast.function_name = 'kernel_collide'
collide_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
stream_update_rule = create_stream_pull_only_kernel(lb_method.stencil, None, 'pdfs', 'pdfs_tmp', 'fzyx', dtype)
stream_update_rule = create_stream_pull_only_kernel(lb_method.stencil, None, 'pdfs', 'pdfs_tmp', field_layout, dtype)
stream_ast = create_kernel(stream_update_rule, **create_kernel_params)
stream_ast.function_name = 'kernel_stream'
stream_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
__lattice_model(generation_context, class_name, lb_method, stream_collide_ast, collide_ast, stream_ast,
refinement_scaling)
......
......@@ -2,8 +2,9 @@ from .boundary import generate_staggered_boundary, generate_staggered_flux_bound
from .cmake_integration import CodeGeneration
from .codegen import (
generate_pack_info, generate_pack_info_for_field, generate_pack_info_from_kernel,
generate_mpidtype_info_from_kernel, generate_sweep)
generate_mpidtype_info_from_kernel, generate_sweep, get_vectorize_instruction_set)
__all__ = ['CodeGeneration',
'generate_sweep', 'generate_pack_info_from_kernel', 'generate_pack_info_for_field', 'generate_pack_info',
'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary']
'generate_mpidtype_info_from_kernel', 'generate_staggered_boundary', 'generate_staggered_flux_boundary',
'get_vectorize_instruction_set']
......@@ -29,6 +29,7 @@ def generate_staggered_boundary(generation_context, class_name, boundary_object,
kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target,
openmp=generation_context.openmp)
kernel.function_name = "boundary_" + boundary_object.name
kernel.assumed_inner_stride_one = False
# waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D
if dim == 2:
......@@ -82,6 +83,7 @@ def generate_staggered_flux_boundary(generation_context, class_name, boundary_ob
kernel = create_boundary_kernel(staggered_field, index_field, neighbor_stencil, boundary_object, target=target,
openmp=generation_context.openmp)
kernel.function_name = "boundary_" + boundary_object.name
kernel.assumed_inner_stride_one = False
# waLBerla is a 3D framework. Therefore, a zero for the z index has to be added if we work in 2D
if dim == 2:
......
import warnings
from collections import OrderedDict, defaultdict
from itertools import product
from typing import Dict, Optional, Sequence, Tuple
......@@ -13,7 +14,8 @@ from pystencils.stencil import inverse_direction, offset_to_direction_string
from pystencils_walberla.jinja_filters import add_pystencils_filters_to_jinja_env
__all__ = ['generate_sweep', 'generate_pack_info', 'generate_pack_info_for_field', 'generate_pack_info_from_kernel',
'generate_mpidtype_info_from_kernel', 'default_create_kernel_parameters', 'KernelInfo']
'generate_mpidtype_info_from_kernel', 'default_create_kernel_parameters', 'KernelInfo',
'get_vectorize_instruction_set']
def generate_sweep(generation_context, class_name, assignments,
......@@ -56,6 +58,7 @@ def generate_sweep(generation_context, class_name, assignments,
ast = create_kernel(assignments, **create_kernel_params)
else:
ast = create_staggered_kernel(assignments, **create_kernel_params)
ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
def to_name(f):
return f.name if isinstance(f, Field) else f
......@@ -222,15 +225,18 @@ def generate_pack_info(generation_context, class_name: str,
pack_assignments = [Assignment(buffer(i), term) for i, term in enumerate(terms)]
pack_ast = create_kernel(pack_assignments, **create_kernel_params, ghost_layers=0)
pack_ast.function_name = 'pack_{}'.format("_".join(direction_strings))
pack_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
unpack_assignments = [Assignment(term, buffer(i)) for i, term in enumerate(terms)]
unpack_ast = create_kernel(unpack_assignments, **create_kernel_params, ghost_layers=0)
unpack_ast.function_name = 'unpack_{}'.format("_".join(direction_strings))
unpack_ast.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
pack_kernels[direction_strings] = KernelInfo(pack_ast)
unpack_kernels[direction_strings] = KernelInfo(unpack_ast)
elements_per_cell[direction_strings] = len(terms)
fused_kernel = create_kernel([Assignment(buffer.center, t) for t in all_accesses], **create_kernel_params)
fused_kernel.assumed_inner_stride_one = create_kernel_params['cpu_vectorize_info']['assume_inner_stride_one']
jinja_context = {
'class_name': class_name,
......@@ -322,26 +328,29 @@ class KernelInfo:
self.parameters = ast.get_parameters() # cache parameters here
def default_create_kernel_parameters(generation_context, params):
default_dtype = "float64" if generation_context.double_accuracy else 'float32'
def get_vectorize_instruction_set(generation_context):
if generation_context.optimize_for_localhost:
supported_instruction_sets = get_supported_instruction_sets()
if supported_instruction_sets:
default_vec_is = get_supported_instruction_sets()[-1]
return supported_instruction_sets[-1]
else: # if cpuinfo package is not installed
default_vec_is = 'sse'
warnings.warn("Could not obtain supported vectorization instruction sets - defaulting to sse")
return 'sse'
else:
default_vec_is = None
return None
def default_create_kernel_parameters(generation_context, params):
default_dtype = "float64" if generation_context.double_accuracy else 'float32'
params['target'] = params.get('target', 'cpu')
params['data_type'] = params.get('data_type', default_dtype)
params['cpu_openmp'] = params.get('cpu_openmp', generation_context.openmp)
params['cpu_vectorize_info'] = params.get('cpu_vectorize_info', {})
default_vec_is = get_vectorize_instruction_set(generation_context)
vec = params['cpu_vectorize_info']
vec['instruction_set'] = vec.get('instruction_set', default_vec_is)
vec['assume_inner_stride_one'] = True
vec['assume_inner_stride_one'] = vec.get('assume_inner_stride_one', True)
vec['assume_aligned'] = vec.get('assume_aligned', False)
vec['nontemporal'] = vec.get('nontemporal', False)
return params
......
......@@ -252,6 +252,8 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
coordinates = tuple(coordinates)
kernel_call_lines.append("%s %s = %s->dataAt(%s, %s, %s, %s);" %
((param.symbol.dtype, param.symbol.name, param.field_name) + coordinates))
if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (param.field_name,))
elif param.is_field_stride:
casted_stride = get_field_stride(param)
type_str = param.symbol.dtype.base_name
......@@ -265,6 +267,8 @@ def generate_call(ctx, kernel_info, ghost_layers_to_include=0, cell_interval=Non
max_value = "%s->%sSizeWithGhostLayer()" % (field.name, ('x', 'y', 'z')[coord])
kernel_call_lines.append("WALBERLA_ASSERT_GREATER_EQUAL(%s, %s);" % (max_value, shape))
kernel_call_lines.append("const %s %s = %s;" % (type_str, param.symbol.name, shape))
if ast.assumed_inner_stride_one and field.index_dimensions > 0:
kernel_call_lines.append("WALBERLA_ASSERT_EQUAL(%s->layout(), field::fzyx);" % (field.name,))
call_parameters = ", ".join([p.symbol.name for p in ast_params])
......
......@@ -72,13 +72,23 @@ waLBerla_execute_test( NAME SuViscoelasticityTest COMMAND $<TARGET_FILE:SuVisco
# Code Generation
if( WALBERLA_BUILD_WITH_CODEGEN )
waLBerla_generate_target_from_python(NAME LbCodeGenerationExampleGenerated
FILE codegen/LbCodeGenerationExample.py
OUT_FILES LbCodeGenerationExample_LatticeModel.cpp LbCodeGenerationExample_LatticeModel.h
LbCodeGenerationExample_NoSlip.cpp LbCodeGenerationExample_NoSlip.h
LbCodeGenerationExample_UBB.cpp LbCodeGenerationExample_UBB.h )
waLBerla_compile_test( FILES codegen/LbCodeGenerationExample.cpp DEPENDS LbCodeGenerationExampleGenerated)
waLBerla_generate_target_from_python(NAME FluctuatingMRTGenerated FILE codegen/FluctuatingMRT.py
OUT_FILES FluctuatingMRT_LatticeModel.cpp FluctuatingMRT_LatticeModel.h )
waLBerla_compile_test( FILES codegen/FluctuatingMRT.cpp DEPENDS FluctuatingMRTGenerated)
waLBerla_generate_target_from_python(NAME FieldLayoutAndVectorizationTestGenerated FILE codegen/FieldLayoutAndVectorizationTest.py
OUT_FILES FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.cpp FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.h
FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.h
FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.h
FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.cpp FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h )
waLBerla_compile_test( FILES codegen/FieldLayoutAndVectorizationTest.cpp DEPENDS FieldLayoutAndVectorizationTestGenerated)
endif()
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file FieldLayoutAndVectorizationTest.cpp
//! \author Christoph Rettinger <christoph.rettinger@fau.de>
//
//======================================================================================================================
#include "blockforest/Initialization.h"
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/debug/Debug.h"
#include "core/debug/TestSubsystem.h"
#include "core/math/all.h"
#include "core/logging/all.h"
#include "lbm/field/AddToStorage.h"
#include "lbm/field/PdfField.h"
#include "FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel.h"
#include "FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel.h"
#include "FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel.h"
#include "FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel.h"
namespace field_layout_vectorization_test {
using namespace walberla;
typedef walberla::uint8_t flag_t;
typedef FlagField<flag_t> FlagField_T;
template<typename LM1_T, typename LM2_T>
void checkEquivalence(const shared_ptr<StructuredBlockStorage> & blocks, BlockDataID pdfField1ID, BlockDataID pdfField2ID, std::string identifier)
{
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto pdfField1 = blockIt->getData<lbm::PdfField<LM1_T> >(pdfField1ID);
auto pdfField2 = blockIt->getData<lbm::PdfField<LM2_T> >(pdfField2ID);
WALBERLA_FOR_ALL_CELLS_XYZ(pdfField1,
for( uint_t f = 0; f < pdfField1->fSize(); ++f)
{
WALBERLA_CHECK_FLOAT_EQUAL(pdfField1->get(x,y,z,f), pdfField2->get(x,y,z,f),
identifier << " - mismatch in " << x << " " << y << " " << z << " " << f);
}
)
}
}
int main(int argc, char **argv) {
debug::enterTestMode();
mpi::Environment env( argc, argv );
walberla::Environment walberlaEnv(argc, argv);
auto blocks = blockforest::createUniformBlockGrid( 1, 1, 1,
32, 32, 32, real_t(1),
0, false, false,
true, true, true, //periodicity
false );
real_t omega = real_t(1.6);
Vector3<real_t> initialVel(real_t(0.1), real_t(0), real_t(0));
using LatticeModel1_T = lbm::FieldLayoutAndVectorizationTest_FZYX_Vec_LatticeModel;
LatticeModel1_T latticeModel1(omega);
BlockDataID pdfField1ID = lbm::addPdfFieldToStorage(blocks, "pdf field1", latticeModel1, initialVel, real_t(1), field::fzyx);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel1_T::Sweep(pdfField1ID);
sweep(&(*blockIt));
}
using LatticeModel2_T = lbm::FieldLayoutAndVectorizationTest_FZYX_NoVec_LatticeModel;
LatticeModel2_T latticeModel2(omega);
BlockDataID pdfField2ID = lbm::addPdfFieldToStorage(blocks, "pdf field2", latticeModel2, initialVel, real_t(1), field::fzyx);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel2_T::Sweep(pdfField2ID);
sweep(&(*blockIt));
}
checkEquivalence<LatticeModel1_T,LatticeModel2_T>(blocks, pdfField1ID, pdfField2ID, "1 vs 2");
using LatticeModel3_T = lbm::FieldLayoutAndVectorizationTest_ZYXF_Vec_LatticeModel;
LatticeModel3_T latticeModel3(omega);
BlockDataID pdfField3ID = lbm::addPdfFieldToStorage(blocks, "pdf field3", latticeModel3, initialVel, real_t(1), field::zyxf);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel3_T::Sweep(pdfField3ID);
sweep(&(*blockIt));
}
checkEquivalence<LatticeModel2_T,LatticeModel3_T>(blocks, pdfField2ID, pdfField3ID, "2 vs 3");
using LatticeModel4_T = lbm::FieldLayoutAndVectorizationTest_ZYXF_NoVec_LatticeModel;
LatticeModel4_T latticeModel4(omega);
BlockDataID pdfField4ID = lbm::addPdfFieldToStorage(blocks, "pdf field4", latticeModel4, initialVel, real_t(1), field::zyxf);
for(auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt) {
auto sweep = LatticeModel4_T::Sweep(pdfField4ID);
sweep(&(*blockIt));
}
checkEquivalence<LatticeModel3_T,LatticeModel4_T>(blocks, pdfField3ID, pdfField4ID, "3 vs 4");
return EXIT_SUCCESS;
}
} //namespace field_layout_vectorization_test
int main( int argc, char **argv ){
field_layout_vectorization_test::main(argc, argv);
}
import sympy as sp
from lbmpy.creationfunctions import create_lb_collision_rule
from pystencils_walberla import CodeGeneration
from lbmpy_walberla import generate_lattice_model
from pystencils_walberla import get_vectorize_instruction_set
from collections import namedtuple
with CodeGeneration() as ctx:
omega_shear = sp.symbols("omega")
collision_rule = create_lb_collision_rule( stencil='D2Q9', compressible=False, method='srt' )
SetupDefinition = namedtuple('SetupDefinition', ['name', 'field_layout', 'vectorization_dict'])
default_vectorize_instruction_set = get_vectorize_instruction_set(ctx)
configurations = [SetupDefinition('FZYX_Vec', 'fzyx', {'instruction_set': default_vectorize_instruction_set} ),
SetupDefinition('FZYX_NoVec', 'fzyx', {'instruction_set': None} ),
SetupDefinition('ZYXF_Vec', 'zyxf', {'instruction_set': default_vectorize_instruction_set} ), # does/should not vectorize, but instead yield warning
SetupDefinition('ZYXF_NoVec', 'zyxf', {'instruction_set': None} )]
for conf in configurations:
generate_lattice_model(ctx, 'FieldLayoutAndVectorizationTest_'+conf.name+'_LatticeModel', collision_rule,
field_layout=conf.field_layout, refinement_scaling=None, cpu_vectorize_info=conf.vectorization_dict)
......@@ -9,7 +9,7 @@ from lbmpy_walberla import generate_lattice_model
with CodeGeneration() as ctx:
omega_shear = sp.symbols("omega_shear")
temperature = sp.symbols("temperature")
force_field, vel_field = ps.fields("force(3), velocity(3): [3D]", layout='fzyx')
force_field, vel_field = ps.fields("force(3), velocity(3): [3D]", layout='zyxf')
def rr_getter(moment_group):
is_shear = [is_shear_moment(m, 3) for m in moment_group]
......
......@@ -7,7 +7,7 @@ from lbmpy_walberla import RefinementScaling, generate_boundary, generate_lattic
with CodeGeneration() as ctx:
omega, omega_free = sp.symbols("omega, omega_free")
force_field, vel_field, omega_out = ps.fields("force(3), velocity(3), omega_out: [3D]", layout='fzyx')
force_field, vel_field, omega_out = ps.fields("force(3), velocity(3), omega_out: [3D]", layout='zyxf')
# the collision rule of the LB method where the some advanced features
collision_rule = create_lb_collision_rule(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment