Commit fc54e898 authored by Helen Schottenhamml's avatar Helen Schottenhamml
Browse files

Merge branch 'UpdateNewLbmpyVersion' into 'master'

Update new lbmpy version

See merge request !481
parents d4acb932 9fe4bbd5
Pipeline #34512 passed with stages
in 296 minutes and 3 seconds
This diff is collapsed.
from pystencils import Target
from pystencils.field import fields
from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
from lbmpy_walberla.additional_data_handler import UBBAdditionalDataHandler, OutflowAdditionalDataHandler
from lbmpy_walberla import generate_boundary, generate_lb_pack_info
from lbmpy_walberla import generate_lb_pack_info
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_lbm_boundary
import sympy as sp
with CodeGeneration() as ctx:
data_type = "float64" if ctx.double_accuracy else "float32"
stencil = get_stencil("D3Q27")
q = len(stencil)
dim = len(stencil[0])
stencil = LBStencil(Stencil.D3Q27)
q = stencil.Q
dim = stencil.D
streaming_pattern = 'esotwist'
timesteps = get_timesteps(streaming_pattern)
......@@ -32,21 +33,12 @@ with CodeGeneration() as ctx:
'velocity': velocity_field
}
opt = {'symbolic_field': pdfs,
'cse_global': False,
'cse_pdfs': False,
'double_precision': True if ctx.double_accuracy else False}
method_params = {'method': 'cumulant',
'stencil': stencil,
'relaxation_rate': omega,
'galilean_correction': True,
'field_name': 'pdfs',
'streaming_pattern': streaming_pattern,
'output': output,
'optimization': opt}
collision_rule = create_lb_collision_rule(**method_params)
lbm_config = LBMConfig(stencil=stencil, method=Method.CUMULANT, relaxation_rate=omega, galilean_correction=True,
field_name='pdfs', streaming_pattern=streaming_pattern, output=output)
lbm_optimisation = LBMOptimisation(symbolic_field=pdfs, cse_global=False, cse_pdfs=False)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_optimisation)
lb_method = collision_rule.method
# getter & setter
......@@ -63,15 +55,14 @@ with CodeGeneration() as ctx:
'ScalarField_T': density_field}
if ctx.cuda:
target = 'gpu'
target = Target.GPU
else:
target = 'cpu'
opt['target'] = target
target = Target.CPU
# sweeps
generate_alternating_lbm_sweep(ctx, 'FlowAroundSphereCodeGen_LbSweep',
collision_rule, streaming_pattern, optimization=opt)
collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_optimisation,
target=target)
generate_sweep(ctx, 'FlowAroundSphereCodeGen_MacroSetter', setter_assignments, target=target)
# boundaries
......
from pystencils import fields, TypedSymbol
from pystencils import fields, Target, TypedSymbol
from pystencils.simp import sympy_cse
from lbmpy import LBMConfig, LBStencil, Method, Stencil
from lbmpy.creationfunctions import create_lb_method
from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field, generate_info_header
from lbmpy_walberla import generate_lb_pack_info
......@@ -18,16 +18,9 @@ import numpy as np
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
stencil_phase_name = "D3Q15"
stencil_hydro_name = "D3Q27"
stencil_phase = get_stencil(stencil_phase_name, "walberla")
stencil_hydro = get_stencil(stencil_hydro_name, "walberla")
q_phase = len(stencil_phase)
q_hydro = len(stencil_hydro)
assert (len(stencil_phase[0]) == len(stencil_hydro[0]))
dimensions = len(stencil_hydro[0])
stencil_phase = LBStencil(Stencil.D3Q15)
stencil_hydro = LBStencil(Stencil.D3Q27)
assert (stencil_phase.D == stencil_hydro.D)
########################
# PARAMETER DEFINITION #
......@@ -54,17 +47,17 @@ with CodeGeneration() as ctx:
########################
# velocity field
u = fields(f"vel_field({dimensions}): {field_type}[{dimensions}D]", layout='fzyx')
u = fields(f"vel_field({stencil_hydro.D}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
# phase-field
C = fields(f"phase_field: {field_type}[{dimensions}D]", layout='fzyx')
C_tmp = fields(f"phase_field_tmp: {field_type}[{dimensions}D]", layout='fzyx')
C = fields(f"phase_field: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
C_tmp = fields(f"phase_field_tmp: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
# phase-field distribution functions
h = fields(f"lb_phase_field({q_phase}): {field_type}[{dimensions}D]", layout='fzyx')
h_tmp = fields(f"lb_phase_field_tmp({q_phase}): {field_type}[{dimensions}D]", layout='fzyx')
h = fields(f"lb_phase_field({stencil_phase.Q}): {field_type}[{stencil_phase.D}D]", layout='fzyx')
h_tmp = fields(f"lb_phase_field_tmp({stencil_phase.Q}): {field_type}[{stencil_phase.D}D]", layout='fzyx')
# hydrodynamic distribution functions
g = fields(f"lb_velocity_field({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx')
g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx')
g = fields(f"lb_velocity_field({stencil_hydro.Q}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
g_tmp = fields(f"lb_velocity_field_tmp({stencil_hydro.Q}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
########################################
# RELAXATION RATES AND EXTERNAL FORCES #
......@@ -83,21 +76,23 @@ with CodeGeneration() as ctx:
# LBM METHODS #
###############
method_phase = create_lb_method(stencil=stencil_phase, method='srt', relaxation_rate=w_c, compressible=True)
lbm_config_phase = LBMConfig(stencil=stencil_phase, method=Method.SRT, relaxation_rate=w_c, compressible=True)
method_phase = create_lb_method(lbm_config=lbm_config_phase)
method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True,
relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1])
lbm_config_hydro = LBMConfig(stencil=stencil_hydro, method=Method.MRT, weighted=True,
relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1])
method_hydro = create_lb_method(lbm_config=lbm_config_hydro)
# create the kernels for the initialization of the g and h field
h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))]
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=LBStencil(Stencil.D3Q27))]
force_model_h = MultiphaseForceModel(force=force_h)
force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta,
body_force,
fd_stencil=get_stencil("D3Q27"))
fd_stencil=LBStencil(Stencil.D3Q27))
force_model_g = MultiphaseForceModel(force=force_g, rho=density)
......@@ -150,64 +145,64 @@ with CodeGeneration() as ctx:
'PhaseField_T': C}
additional_code = f"""
const char * StencilNamePhase = "{stencil_phase_name}";
const char * StencilNameHydro = "{stencil_hydro_name}";
const char * StencilNamePhase = "{stencil_phase.name}";
const char * StencilNameHydro = "{stencil_hydro.name}";
"""
if not ctx.cuda:
if not ctx.optimize_for_localhost:
cpu_vec = {'instruction_set': None}
generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates)
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates)
generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates, target=Target.CPU)
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target=Target.CPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp), (C, C_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec)
cpu_vectorize_info=cpu_vec,
target=Target.CPU)
generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
field_swaps=[(g, g_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec)
cpu_vectorize_info=cpu_vec,
target=Target.CPU)
# communication
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target='cpu')
streaming_pattern='pull', target=Target.CPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='push', target='cpu')
streaming_pattern='push', target=Target.CPU)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='cpu')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.CPU)
if ctx.cuda:
generate_sweep(ctx, 'initialize_phase_field_distributions',
h_updates, target='gpu')
h_updates, target=Target.GPU)
generate_sweep(ctx, 'initialize_velocity_based_distributions',
g_updates, target='gpu')
g_updates, target=Target.GPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp), (C, C_tmp)],
target='gpu',
target=Target.GPU,
gpu_indexing_params=sweep_params,
varying_parameters=vp)
generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
field_swaps=[(g, g_tmp)],
target='gpu',
target=Target.GPU,
gpu_indexing_params=sweep_params,
varying_parameters=vp)
# communication
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target='gpu')
streaming_pattern='pull', target=Target.GPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='push', target='gpu')
streaming_pattern='push', target=Target.GPU)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='gpu')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.GPU)
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'GenDefines', stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
additional_code=additional_code)
print("finished code generation successfully")
......@@ -2,15 +2,17 @@ import sympy as sp
import numpy as np
import pystencils as ps
from dataclasses import replace
from pystencils.data_types import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.updatekernels import create_stream_only_kernel
from lbmpy.fieldaccess import *
......@@ -32,40 +34,40 @@ gpu_indexing_params = {'block_size': sweep_block_size}
options_dict = {
'srt': {
'method': 'srt',
'method': Method.SRT,
'relaxation_rate': omega,
'compressible': False,
},
'trt': {
'method': 'trt',
'method': Method.TRT,
'relaxation_rate': omega,
},
'mrt': {
'method': 'mrt',
'method': Method.MRT,
'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
},
'mrt-overrelax': {
'method': 'mrt',
'method': Method.MRT,
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
},
'cumulant': {
'method': 'cumulant',
'method': Method.CUMULANT,
'relaxation_rate': omega,
'compressible': True,
},
'cumulant-overrelax': {
'method': 'cumulant',
'method': Method.CUMULANT,
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
'compressible': True,
},
'entropic': {
'method': 'mrt',
'method': Method.MRT,
'compressible': True,
'relaxation_rates': [omega, omega] + [omega_free] * 6,
'entropic': True,
},
'smagorinsky': {
'method': 'srt',
'method': Method.SRT,
'smagorinsky': True,
'relaxation_rate': omega,
}
......@@ -94,34 +96,26 @@ with CodeGeneration() as ctx:
if len(config_tokens) >= 4:
optimize = (config_tokens[3] != 'noopt')
stencil = get_stencil(stencil_str)
if stencil_str == "D3Q27":
stencil = LBStencil(Stencil.D3Q27)
else:
stencil = LBStencil(Stencil.D3Q19)
assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"
options = options_dict[collision_setup]
q = len(stencil)
dim = len(stencil[0])
q = stencil.Q
dim = stencil.D
assert dim == 3, "This app supports only three-dimensional stencils"
pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : {field_type}[3D]",
layout='fzyx')
common_options = {
'stencil': stencil,
'field_name': pdfs.name,
'optimization': {
'target': 'gpu',
'cse_global': True,
'cse_pdfs': False,
'symbolic_field': pdfs,
'field_layout': 'fzyx',
'gpu_indexing_params': gpu_indexing_params
}
}
options.update(common_options)
lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx')
if not is_inplace(streaming_pattern):
options['optimization']['symbolic_temporary_field'] = pdfs_tmp
lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)
field_swaps = [(pdfs, pdfs_tmp)]
else:
field_swaps = []
......@@ -141,7 +135,7 @@ with CodeGeneration() as ctx:
stream_only_kernel = create_stream_only_kernel(stencil, pdfs, pdfs_tmp, accessor=accessor)
# LB Sweep
collision_rule = create_lb_collision_rule(**options)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
if optimize:
collision_rule = insert_fast_divisions(collision_rule)
......@@ -149,8 +143,8 @@ with CodeGeneration() as ctx:
lb_method = collision_rule.method
generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, streaming_pattern,
optimization=options['optimization'],
generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, lbm_config=lbm_config,
lbm_optimisation=lbm_opt, target=ps.Target.GPU,
inner_outer_split=True, varying_parameters=vp, field_swaps=field_swaps)
# getter & setter
......@@ -158,32 +152,32 @@ with CodeGeneration() as ctx:
pdfs=pdfs,
streaming_pattern=streaming_pattern,
previous_timestep=Timestep.EVEN)
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target='gpu')
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target=ps.Target.GPU)
# Stream only kernel
generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only,
gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target='gpu')
gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target=ps.Target.GPU)
# Boundaries
noslip = NoSlip()
ubb = UBB((0.05, 0, 0))
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
streaming_pattern=streaming_pattern, target=ps.Target.GPU)
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_UBB', ubb, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
streaming_pattern=streaming_pattern, target=ps.Target.GPU)
# communication
generate_lb_pack_info(ctx, 'UniformGridGPU_PackInfo', stencil, pdfs,
streaming_pattern=streaming_pattern, target='gpu',
streaming_pattern=streaming_pattern, target=ps.Target.GPU,
always_generate_separate_classes=True)
infoHeaderParams = {
'stencil': stencil_str,
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(options['optimization']['cse_global']),
'cse_pdfs': int(options['optimization']['cse_pdfs']),
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
stencil_typedefs = {'Stencil_T': stencil,
......
......@@ -128,10 +128,11 @@ class Scenario:
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs_{data['stencil']}_{data['streamingPattern']}_{data['collisionSetup']}_{prod(self.blocks)}"
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, "runs", DB_FILE)
storeSingle(result, "runs", DB_FILE)
checkAndUpdateSchema(result, table_name, DB_FILE)
storeSingle(result, table_name, DB_FILE)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
......@@ -220,23 +221,18 @@ def single_gpu_benchmark():
job_script_header = """
#!/bin/bash -l
#SBATCH --job-name=scaling
#SBATCH --time=0:30:00
#SBATCH --time=01:00:00
#SBATCH --nodes={nodes}
#SBATCH -o out_scaling_{nodes}_%j.txt
#SBATCH -e err_scaling_{nodes}_%j.txt
#SBATCH --ntasks-per-core=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --partition=normal
#SBATCH --constraint=gpu
#SBATCH --account=d105
cd {folder}
#SBATCH --account=s1042
source ~/env.sh
module load daint-gpu
module load craype-accel-nvidia60
export MPICH_RDMA_ENABLED_CUDA=1 # allow GPU-GPU data transfer
export CRAY_CUDA_MPS=1 # allow GPU sharing
export MPICH_G2G_PIPELINE=256 # adapt maximum number of concurrent in-flight messages
......@@ -247,7 +243,7 @@ export CRAY_CUDA_MPS=1
export MPICH_RANK_REORDER_METHOD=3
export PMI_MMAP_SYNC_WAIT_TIME=300
cd {folder}
# grid_order -R -H -c 1,1,8 -g 16,16,8
ulimit -c 0
......@@ -262,10 +258,18 @@ do
done
"""
all_executables = ('UniformGridBenchmarkGPU_mrt_d3q27',
'UniformGridBenchmarkGPU_smagorinsky_d3q27',
'UniformGridBenchmarkGPU_cumulant'
'UniformGridBenchmarkGPU_cumulant_d3q27')
streaming_patterns = ['pull', 'push', 'aa', 'esotwist']
stencils = ['d3q27', 'd3q19']
methods = ['srt', 'mrt', 'cumulant', 'entropic']
all_executables = []
for stencil in stencils:
for streaming_pattern in streaming_patterns:
for method in methods:
all_executables.append(f"UniformGridGPU_{stencil}_{streaming_pattern}_{method}")
all_executables = tuple(all_executables)
def generate_jobscripts(exe_names=all_executables):
......
......@@ -2,7 +2,7 @@ waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "*.py" )
foreach(config trt smagorinsky mrt entropic_kbc_n4 cumulant )
foreach(config srt trt smagorinsky mrt entropic_kbc_n4 cumulant )
waLBerla_generate_target_from_python(NAME UniformGridGenerated_${config}
CODEGEN_CFG ${config}
FILE UniformGridGenerated.py
......
......@@ -20,29 +20,11 @@
#include "lbm/lattice_model/D3Q19.h"
#include "GenDefines.h"
#include "GenMacroGetter.h"
#include "GenMacroSetter.h"
#include "GenLbKernel.h"
#include "GenLbKernelAAEven.h"
#include "GenLbKernelAAOdd.h"
#include "GenPackInfo.h"
#include "GenPackInfoAAPush.h"
#include "GenPackInfoAAPull.h"
#include "GenMpiDtypeInfo.h"
#include "GenMpiDtypeInfoAAPull.h"
#include "GenMpiDtypeInfoAAPush.h"
#include <iomanip>
using namespace walberla;
using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >;
using VelocityField_T = GhostLayerField< real_t, 3 >;
int main( int argc, char **argv )
{
mpi::Environment env( argc, argv );
......
......@@ -2,7 +2,9 @@ import sympy as sp
import pystencils as ps
from lbmpy.creationfunctions import create_lb_update_rule, create_lb_collision_rule
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep,\
generate_mpidtype_info_from_kernel
generate_mpidtype_info_from_kernel, generate_info_header
from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
......@@ -42,7 +44,7 @@ options_dict = {
'entropic': True,
},
'entropic_kbc_n4': {
'method': 'trt-kbc-n4',
'method': 'trt_kbc_n4',
'stencil': 'D3Q27',
'compressible': True,
'relaxation_rates': [omega, omega_free],
......@@ -62,13 +64,6 @@ options_dict = {
},
}
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const char * optimizationDict = "{optimizationDict}";
"""
with CodeGeneration() as ctx:
common_options = {
'field_name': 'pdfs',
......@@ -106,18 +101,20 @@ with CodeGeneration() as ctx:
options = options.copy()
if d3q27:
options['stencil'] = 'D3Q27'
stencil = LBStencil(Stencil.D3Q27)
options['stencil'] = stencil
else:
stencil = LBStencil(options['stencil'])
dtype_string = 'float64' if ctx.double_accuracy else 'float32'
stencil_str = options['stencil']