Commit 9fe4bbd5 authored by Markus Holzer's avatar Markus Holzer Committed by Helen Schottenhamml
Browse files

Update new lbmpy version

parent d4acb932
This diff is collapsed.
from pystencils import Target
from pystencils.field import fields
from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
from lbmpy_walberla.additional_data_handler import UBBAdditionalDataHandler, OutflowAdditionalDataHandler
from lbmpy_walberla import generate_boundary, generate_lb_pack_info
from lbmpy_walberla import generate_lb_pack_info
from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_lbm_boundary
import sympy as sp
with CodeGeneration() as ctx:
data_type = "float64" if ctx.double_accuracy else "float32"
stencil = get_stencil("D3Q27")
q = len(stencil)
dim = len(stencil[0])
stencil = LBStencil(Stencil.D3Q27)
q = stencil.Q
dim = stencil.D
streaming_pattern = 'esotwist'
timesteps = get_timesteps(streaming_pattern)
......@@ -32,21 +33,12 @@ with CodeGeneration() as ctx:
'velocity': velocity_field
}
opt = {'symbolic_field': pdfs,
'cse_global': False,
'cse_pdfs': False,
'double_precision': True if ctx.double_accuracy else False}
method_params = {'method': 'cumulant',
'stencil': stencil,
'relaxation_rate': omega,
'galilean_correction': True,
'field_name': 'pdfs',
'streaming_pattern': streaming_pattern,
'output': output,
'optimization': opt}
collision_rule = create_lb_collision_rule(**method_params)
lbm_config = LBMConfig(stencil=stencil, method=Method.CUMULANT, relaxation_rate=omega, galilean_correction=True,
field_name='pdfs', streaming_pattern=streaming_pattern, output=output)
lbm_optimisation = LBMOptimisation(symbolic_field=pdfs, cse_global=False, cse_pdfs=False)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_optimisation)
lb_method = collision_rule.method
# getter & setter
......@@ -63,15 +55,14 @@ with CodeGeneration() as ctx:
'ScalarField_T': density_field}
if ctx.cuda:
target = 'gpu'
target = Target.GPU
else:
target = 'cpu'
opt['target'] = target
target = Target.CPU
# sweeps
generate_alternating_lbm_sweep(ctx, 'FlowAroundSphereCodeGen_LbSweep',
collision_rule, streaming_pattern, optimization=opt)
collision_rule, lbm_config=lbm_config, lbm_optimisation=lbm_optimisation,
target=target)
generate_sweep(ctx, 'FlowAroundSphereCodeGen_MacroSetter', setter_assignments, target=target)
# boundaries
......
from pystencils import fields, TypedSymbol
from pystencils import fields, Target, TypedSymbol
from pystencils.simp import sympy_cse
from lbmpy import LBMConfig, LBStencil, Method, Stencil
from lbmpy.creationfunctions import create_lb_method
from lbmpy.stencils import get_stencil
from pystencils_walberla import CodeGeneration, generate_sweep, generate_pack_info_for_field, generate_info_header
from lbmpy_walberla import generate_lb_pack_info
......@@ -18,16 +18,9 @@ import numpy as np
with CodeGeneration() as ctx:
field_type = "float64" if ctx.double_accuracy else "float32"
stencil_phase_name = "D3Q15"
stencil_hydro_name = "D3Q27"
stencil_phase = get_stencil(stencil_phase_name, "walberla")
stencil_hydro = get_stencil(stencil_hydro_name, "walberla")
q_phase = len(stencil_phase)
q_hydro = len(stencil_hydro)
assert (len(stencil_phase[0]) == len(stencil_hydro[0]))
dimensions = len(stencil_hydro[0])
stencil_phase = LBStencil(Stencil.D3Q15)
stencil_hydro = LBStencil(Stencil.D3Q27)
assert (stencil_phase.D == stencil_hydro.D)
########################
# PARAMETER DEFINITION #
......@@ -54,17 +47,17 @@ with CodeGeneration() as ctx:
########################
# velocity field
u = fields(f"vel_field({dimensions}): {field_type}[{dimensions}D]", layout='fzyx')
u = fields(f"vel_field({stencil_hydro.D}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
# phase-field
C = fields(f"phase_field: {field_type}[{dimensions}D]", layout='fzyx')
C_tmp = fields(f"phase_field_tmp: {field_type}[{dimensions}D]", layout='fzyx')
C = fields(f"phase_field: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
C_tmp = fields(f"phase_field_tmp: {field_type}[{stencil_hydro.D}D]", layout='fzyx')
# phase-field distribution functions
h = fields(f"lb_phase_field({q_phase}): {field_type}[{dimensions}D]", layout='fzyx')
h_tmp = fields(f"lb_phase_field_tmp({q_phase}): {field_type}[{dimensions}D]", layout='fzyx')
h = fields(f"lb_phase_field({stencil_phase.Q}): {field_type}[{stencil_phase.D}D]", layout='fzyx')
h_tmp = fields(f"lb_phase_field_tmp({stencil_phase.Q}): {field_type}[{stencil_phase.D}D]", layout='fzyx')
# hydrodynamic distribution functions
g = fields(f"lb_velocity_field({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx')
g_tmp = fields(f"lb_velocity_field_tmp({q_hydro}): {field_type}[{dimensions}D]", layout='fzyx')
g = fields(f"lb_velocity_field({stencil_hydro.Q}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
g_tmp = fields(f"lb_velocity_field_tmp({stencil_hydro.Q}): {field_type}[{stencil_hydro.D}D]", layout='fzyx')
########################################
# RELAXATION RATES AND EXTERNAL FORCES #
......@@ -83,21 +76,23 @@ with CodeGeneration() as ctx:
# LBM METHODS #
###############
method_phase = create_lb_method(stencil=stencil_phase, method='srt', relaxation_rate=w_c, compressible=True)
lbm_config_phase = LBMConfig(stencil=stencil_phase, method=Method.SRT, relaxation_rate=w_c, compressible=True)
method_phase = create_lb_method(lbm_config=lbm_config_phase)
method_hydro = create_lb_method(stencil=stencil_hydro, method="mrt", weighted=True,
relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1])
lbm_config_hydro = LBMConfig(stencil=stencil_hydro, method=Method.MRT, weighted=True,
relaxation_rates=[relaxation_rate, 1, 1, 1, 1, 1])
method_hydro = create_lb_method(lbm_config=lbm_config_hydro)
# create the kernels for the initialization of the g and h field
h_updates = initializer_kernel_phase_field_lb(h, C, u, method_phase, W)
g_updates = initializer_kernel_hydro_lb(g, u, method_hydro)
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=get_stencil("D3Q27"))]
force_h = [f / 3 for f in interface_tracking_force(C, stencil_phase, W, fd_stencil=LBStencil(Stencil.D3Q27))]
force_model_h = MultiphaseForceModel(force=force_h)
force_g = hydrodynamic_force(g, C, method_hydro, relaxation_time, density_liquid, density_gas, kappa, beta,
body_force,
fd_stencil=get_stencil("D3Q27"))
fd_stencil=LBStencil(Stencil.D3Q27))
force_model_g = MultiphaseForceModel(force=force_g, rho=density)
......@@ -150,64 +145,64 @@ with CodeGeneration() as ctx:
'PhaseField_T': C}
additional_code = f"""
const char * StencilNamePhase = "{stencil_phase_name}";
const char * StencilNameHydro = "{stencil_hydro_name}";
const char * StencilNamePhase = "{stencil_phase.name}";
const char * StencilNameHydro = "{stencil_hydro.name}";
"""
if not ctx.cuda:
if not ctx.optimize_for_localhost:
cpu_vec = {'instruction_set': None}
generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates)
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates)
generate_sweep(ctx, 'initialize_phase_field_distributions', h_updates, target=Target.CPU)
generate_sweep(ctx, 'initialize_velocity_based_distributions', g_updates, target=Target.CPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp), (C, C_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec)
cpu_vectorize_info=cpu_vec,
target=Target.CPU)
generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
field_swaps=[(g, g_tmp)],
inner_outer_split=True,
cpu_vectorize_info=cpu_vec)
cpu_vectorize_info=cpu_vec,
target=Target.CPU)
# communication
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target='cpu')
streaming_pattern='pull', target=Target.CPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='push', target='cpu')
streaming_pattern='push', target=Target.CPU)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='cpu')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.CPU)
if ctx.cuda:
generate_sweep(ctx, 'initialize_phase_field_distributions',
h_updates, target='gpu')
h_updates, target=Target.GPU)
generate_sweep(ctx, 'initialize_velocity_based_distributions',
g_updates, target='gpu')
g_updates, target=Target.GPU)
generate_sweep(ctx, 'phase_field_LB_step', phase_field_LB_step,
field_swaps=[(h, h_tmp), (C, C_tmp)],
target='gpu',
target=Target.GPU,
gpu_indexing_params=sweep_params,
varying_parameters=vp)
generate_sweep(ctx, 'hydro_LB_step', hydro_LB_step,
field_swaps=[(g, g_tmp)],
target='gpu',
target=Target.GPU,
gpu_indexing_params=sweep_params,
varying_parameters=vp)
# communication
generate_lb_pack_info(ctx, 'PackInfo_phase_field_distributions', stencil_phase, h,
streaming_pattern='pull', target='gpu')
streaming_pattern='pull', target=Target.GPU)
generate_lb_pack_info(ctx, 'PackInfo_velocity_based_distributions', stencil_hydro, g,
streaming_pattern='push', target='gpu')
streaming_pattern='push', target=Target.GPU)
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target='gpu')
generate_pack_info_for_field(ctx, 'PackInfo_phase_field', C, target=Target.GPU)
# Info header containing correct template definitions for stencil and field
generate_info_header(ctx, 'GenDefines', stencil_typedefs=stencil_typedefs, field_typedefs=field_typedefs,
additional_code=additional_code)
print("finished code generation successfully")
......@@ -2,15 +2,17 @@ import sympy as sp
import numpy as np
import pystencils as ps
from dataclasses import replace
from pystencils.data_types import TypedSymbol
from pystencils.fast_approximation import insert_fast_sqrts, insert_fast_divisions
from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
from lbmpy.advanced_streaming import Timestep, is_inplace
from lbmpy.advanced_streaming.utility import streaming_patterns
from lbmpy.boundaries import NoSlip, UBB
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.updatekernels import create_stream_only_kernel
from lbmpy.fieldaccess import *
......@@ -32,40 +34,40 @@ gpu_indexing_params = {'block_size': sweep_block_size}
options_dict = {
'srt': {
'method': 'srt',
'method': Method.SRT,
'relaxation_rate': omega,
'compressible': False,
},
'trt': {
'method': 'trt',
'method': Method.TRT,
'relaxation_rate': omega,
},
'mrt': {
'method': 'mrt',
'method': Method.MRT,
'relaxation_rates': [omega, 1, 1, 1, 1, 1, 1],
},
'mrt-overrelax': {
'method': 'mrt',
'method': Method.MRT,
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
},
'cumulant': {
'method': 'cumulant',
'method': Method.CUMULANT,
'relaxation_rate': omega,
'compressible': True,
},
'cumulant-overrelax': {
'method': 'cumulant',
'method': Method.CUMULANT,
'relaxation_rates': [omega] + [1 + x * 1e-2 for x in range(1, 11)],
'compressible': True,
},
'entropic': {
'method': 'mrt',
'method': Method.MRT,
'compressible': True,
'relaxation_rates': [omega, omega] + [omega_free] * 6,
'entropic': True,
},
'smagorinsky': {
'method': 'srt',
'method': Method.SRT,
'smagorinsky': True,
'relaxation_rate': omega,
}
......@@ -94,34 +96,26 @@ with CodeGeneration() as ctx:
if len(config_tokens) >= 4:
optimize = (config_tokens[3] != 'noopt')
stencil = get_stencil(stencil_str)
if stencil_str == "D3Q27":
stencil = LBStencil(Stencil.D3Q27)
else:
stencil = LBStencil(Stencil.D3Q19)
assert streaming_pattern in streaming_patterns, f"Invalid streaming pattern: {streaming_pattern}"
options = options_dict[collision_setup]
q = len(stencil)
dim = len(stencil[0])
q = stencil.Q
dim = stencil.D
assert dim == 3, "This app supports only three-dimensional stencils"
pdfs, pdfs_tmp, velocity_field = ps.fields(f"pdfs({q}), pdfs_tmp({q}), velocity(3) : {field_type}[3D]",
layout='fzyx')
common_options = {
'stencil': stencil,
'field_name': pdfs.name,
'optimization': {
'target': 'gpu',
'cse_global': True,
'cse_pdfs': False,
'symbolic_field': pdfs,
'field_layout': 'fzyx',
'gpu_indexing_params': gpu_indexing_params
}
}
options.update(common_options)
lbm_config = LBMConfig(stencil=stencil, field_name=pdfs.name, streaming_pattern=streaming_pattern, **options)
lbm_opt = LBMOptimisation(cse_global=True, cse_pdfs=False, symbolic_field=pdfs, field_layout='fzyx')
if not is_inplace(streaming_pattern):
options['optimization']['symbolic_temporary_field'] = pdfs_tmp
lbm_opt = replace(lbm_opt, symbolic_temporary_field=pdfs_tmp)
field_swaps = [(pdfs, pdfs_tmp)]
else:
field_swaps = []
......@@ -141,7 +135,7 @@ with CodeGeneration() as ctx:
stream_only_kernel = create_stream_only_kernel(stencil, pdfs, pdfs_tmp, accessor=accessor)
# LB Sweep
collision_rule = create_lb_collision_rule(**options)
collision_rule = create_lb_collision_rule(lbm_config=lbm_config, lbm_optimisation=lbm_opt)
if optimize:
collision_rule = insert_fast_divisions(collision_rule)
......@@ -149,8 +143,8 @@ with CodeGeneration() as ctx:
lb_method = collision_rule.method
generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, streaming_pattern,
optimization=options['optimization'],
generate_alternating_lbm_sweep(ctx, 'UniformGridGPU_LbKernel', collision_rule, lbm_config=lbm_config,
lbm_optimisation=lbm_opt, target=ps.Target.GPU,
inner_outer_split=True, varying_parameters=vp, field_swaps=field_swaps)
# getter & setter
......@@ -158,32 +152,32 @@ with CodeGeneration() as ctx:
pdfs=pdfs,
streaming_pattern=streaming_pattern,
previous_timestep=Timestep.EVEN)
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target='gpu')
generate_sweep(ctx, 'UniformGridGPU_MacroSetter', setter_assignments, target=ps.Target.GPU)
# Stream only kernel
generate_sweep(ctx, 'UniformGridGPU_StreamOnlyKernel', stream_only_kernel, field_swaps=field_swaps_stream_only,
gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target='gpu')
gpu_indexing_params=gpu_indexing_params, varying_parameters=vp, target=ps.Target.GPU)
# Boundaries
noslip = NoSlip()
ubb = UBB((0.05, 0, 0))
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_NoSlip', noslip, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
streaming_pattern=streaming_pattern, target=ps.Target.GPU)
generate_alternating_lbm_boundary(ctx, 'UniformGridGPU_UBB', ubb, lb_method, field_name=pdfs.name,
streaming_pattern=streaming_pattern, target='gpu')
streaming_pattern=streaming_pattern, target=ps.Target.GPU)
# communication
generate_lb_pack_info(ctx, 'UniformGridGPU_PackInfo', stencil, pdfs,
streaming_pattern=streaming_pattern, target='gpu',
streaming_pattern=streaming_pattern, target=ps.Target.GPU,
always_generate_separate_classes=True)
infoHeaderParams = {
'stencil': stencil_str,
'streaming_pattern': streaming_pattern,
'collision_setup': collision_setup,
'cse_global': int(options['optimization']['cse_global']),
'cse_pdfs': int(options['optimization']['cse_pdfs']),
'cse_global': int(lbm_opt.cse_global),
'cse_pdfs': int(lbm_opt.cse_pdfs),
}
stencil_typedefs = {'Stencil_T': stencil,
......
......@@ -128,10 +128,11 @@ class Scenario:
sequenceValuesToScalars(result)
num_tries = 4
# check multiple times e.g. may fail when multiple benchmark processes are running
table_name = f"runs_{data['stencil']}_{data['streamingPattern']}_{data['collisionSetup']}_{prod(self.blocks)}"
for num_try in range(num_tries):
try:
checkAndUpdateSchema(result, "runs", DB_FILE)
storeSingle(result, "runs", DB_FILE)
checkAndUpdateSchema(result, table_name, DB_FILE)
storeSingle(result, table_name, DB_FILE)
break
except sqlite3.OperationalError as e:
wlb.log_warning(f"Sqlite DB writing failed: try {num_try + 1}/{num_tries} {str(e)}")
......@@ -220,23 +221,18 @@ def single_gpu_benchmark():
job_script_header = """
#!/bin/bash -l
#SBATCH --job-name=scaling
#SBATCH --time=0:30:00
#SBATCH --time=01:00:00
#SBATCH --nodes={nodes}
#SBATCH -o out_scaling_{nodes}_%j.txt
#SBATCH -e err_scaling_{nodes}_%j.txt
#SBATCH --ntasks-per-core=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1
#SBATCH --partition=normal
#SBATCH --constraint=gpu
#SBATCH --account=d105
cd {folder}
#SBATCH --account=s1042
source ~/env.sh
module load daint-gpu
module load craype-accel-nvidia60
export MPICH_RDMA_ENABLED_CUDA=1 # allow GPU-GPU data transfer
export CRAY_CUDA_MPS=1 # allow GPU sharing
export MPICH_G2G_PIPELINE=256 # adapt maximum number of concurrent in-flight messages
......@@ -247,7 +243,7 @@ export CRAY_CUDA_MPS=1
export MPICH_RANK_REORDER_METHOD=3
export PMI_MMAP_SYNC_WAIT_TIME=300
cd {folder}
# grid_order -R -H -c 1,1,8 -g 16,16,8
ulimit -c 0
......@@ -262,10 +258,18 @@ do
done
"""
all_executables = ('UniformGridBenchmarkGPU_mrt_d3q27',
'UniformGridBenchmarkGPU_smagorinsky_d3q27',
'UniformGridBenchmarkGPU_cumulant'
'UniformGridBenchmarkGPU_cumulant_d3q27')
streaming_patterns = ['pull', 'push', 'aa', 'esotwist']
stencils = ['d3q27', 'd3q19']
methods = ['srt', 'mrt', 'cumulant', 'entropic']
all_executables = []
for stencil in stencils:
for streaming_pattern in streaming_patterns:
for method in methods:
all_executables.append(f"UniformGridGPU_{stencil}_{streaming_pattern}_{method}")
all_executables = tuple(all_executables)
def generate_jobscripts(exe_names=all_executables):
......
......@@ -2,7 +2,7 @@ waLBerla_link_files_to_builddir( "*.prm" )
waLBerla_link_files_to_builddir( "*.py" )
foreach(config trt smagorinsky mrt entropic_kbc_n4 cumulant )
foreach(config srt trt smagorinsky mrt entropic_kbc_n4 cumulant )
waLBerla_generate_target_from_python(NAME UniformGridGenerated_${config}
CODEGEN_CFG ${config}
FILE UniformGridGenerated.py
......
......@@ -20,29 +20,11 @@
#include "lbm/lattice_model/D3Q19.h"
#include "GenDefines.h"
#include "GenMacroGetter.h"
#include "GenMacroSetter.h"
#include "GenLbKernel.h"
#include "GenLbKernelAAEven.h"
#include "GenLbKernelAAOdd.h"
#include "GenPackInfo.h"
#include "GenPackInfoAAPush.h"
#include "GenPackInfoAAPull.h"
#include "GenMpiDtypeInfo.h"
#include "GenMpiDtypeInfoAAPull.h"
#include "GenMpiDtypeInfoAAPush.h"
#include <iomanip>
using namespace walberla;
using PdfField_T = GhostLayerField< real_t, Stencil_T::Q >;
using VelocityField_T = GhostLayerField< real_t, 3 >;
int main( int argc, char **argv )
{
mpi::Environment env( argc, argv );
......
......@@ -2,7 +2,9 @@ import sympy as sp
import pystencils as ps
from lbmpy.creationfunctions import create_lb_update_rule, create_lb_collision_rule
from pystencils_walberla import CodeGeneration, generate_pack_info_from_kernel, generate_sweep,\
generate_mpidtype_info_from_kernel
generate_mpidtype_info_from_kernel, generate_info_header
from lbmpy import LBMConfig, LBMOptimisation, LBStencil, Method, Stencil
from lbmpy.macroscopic_value_kernels import macroscopic_values_getter, macroscopic_values_setter
from lbmpy.fieldaccess import AAEvenTimeStepAccessor, AAOddTimeStepAccessor
......@@ -42,7 +44,7 @@ options_dict = {
'entropic': True,
},
'entropic_kbc_n4': {
'method': 'trt-kbc-n4',
'method': 'trt_kbc_n4',
'stencil': 'D3Q27',
'compressible': True,
'relaxation_rates': [omega, omega_free],
......@@ -62,13 +64,6 @@ options_dict = {
},
}
info_header = """
#include "stencil/D3Q{q}.h"\nusing Stencil_T = walberla::stencil::D3Q{q};
const char * infoStencil = "{stencil}";
const char * infoConfigName = "{configName}";
const char * optimizationDict = "{optimizationDict}";
"""
with CodeGeneration() as ctx:
common_options = {
'field_name': 'pdfs',
......@@ -106,18 +101,20 @@ with CodeGeneration() as ctx:
options = options.copy()
if d3q27:
options['stencil'] = 'D3Q27'
stencil = LBStencil(Stencil.D3Q27)
options['stencil'] = stencil
else:
stencil = LBStencil(options['stencil'])
dtype_string = 'float64' if ctx.double_accuracy else 'float32'
stencil_str = options['stencil']
q = int(stencil_str[stencil_str.find('Q') + 1:])
pdfs, velocity_field = ps.fields(f'pdfs({q}), velocity(3) : {dtype_string}[3D]', layout='fzyx')