Commit 79d35512 authored by Frederik Hennig's avatar Frederik Hennig
Browse files

Merge branch 'master' of i10git.cs.fau.de:walberla/walberla

parents dfcc0aaf 3b00f584
Pipeline #34128 failed with stages
in 250 minutes and 44 seconds
......@@ -1011,11 +1011,18 @@ endif()
option ( WALBERLA_THREAD_SAFE_LOGGING "Enables/Disables thread-safe logging" ON )
if ( WALBERLA_BUILD_WITH_OPENMP )
if( APPLE AND EXISTS /opt/local/lib/libomp AND EXISTS /opt/local/include/libomp ) # find libomp from MacPorts
set( CMAKE_FRAMEWORK_PATH /opt/local/lib/libomp )
set( CMAKE_INCLUDE_PATH /opt/local/include/libomp )
endif()
find_package( OpenMP )
if (OpenMP_FOUND)
add_flag ( CMAKE_C_FLAGS "${OpenMP_C_FLAGS}" )
add_flag ( CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS}" )
list ( APPEND SERVICE_LIBS ${OpenMP_CXX_LIBRARIES} )
if( OpenMP_CXX_INCLUDE_DIRS )
include_directories( ${OpenMP_CXX_INCLUDE_DIRS} )
endif()
else()
#workarounds
if ( WALBERLA_CXX_COMPILER_IS_NEC )
......
......@@ -32,4 +32,4 @@ endif()
# Python module
if ( WALBERLA_BUILD_WITH_PYTHON )
add_subdirectory( pythonmodule )
endif()
\ No newline at end of file
endif()
......@@ -70,21 +70,29 @@ auto pdfFieldAdder = [](IBlock* const block, StructuredBlockStorage* const stora
};
auto VelocityCallback = [](const Cell& pos, const shared_ptr< StructuredBlockForest >& SbF, IBlock& block,
real_t inflow_velocity) {
Cell globalCell;
CellInterval domain = SbF->getDomainCellBB();
real_t h_y = domain.yMax() - domain.yMin();
real_t h_z = domain.zMax() - domain.zMin();
SbF->transformBlockLocalToGlobalCell(globalCell, block, pos);
real_t inflow_velocity, const bool constant_inflow = true) {
if (constant_inflow)
{
Vector3< real_t > result(inflow_velocity, 0.0, 0.0);
return result;
}
else
{
Cell globalCell;
CellInterval domain = SbF->getDomainCellBB();
real_t h_y = real_c(domain.ySize());
real_t h_z = real_c(domain.zSize());
SbF->transformBlockLocalToGlobalCell(globalCell, block, pos);
real_t y1 = globalCell[1] - (h_y / 2.0 + 0.5);
real_t z1 = globalCell[2] - (h_z / 2.0 + 0.5);
real_t y1 = real_c(globalCell[1] - (h_y / 2.0 - 0.5));
real_t z1 = real_c(globalCell[2] - (h_z / 2.0 - 0.5));
real_t u = (inflow_velocity * 16) / (h_y * h_y * h_z * h_z) * (h_y / 2.0 - y1) * (h_y / 2 + y1) * (h_z / 2 - z1) *
(h_z / 2 + z1);
real_t u = (inflow_velocity * real_c(16)) / (h_y * h_y * h_z * h_z) * (h_y / real_c(2.0) - y1) *
(h_y / real_c(2.0) + y1) * (h_z / real_c(2.0) - z1) * (h_z / real_c(2.0) + z1);
Vector3< real_t > result(u, 0.0, 0.0);
return result;
Vector3< real_t > result(u, 0.0, 0.0);
return result;
}
};
class AlternatingBeforeFunction
......@@ -147,6 +155,7 @@ int main(int argc, char** argv)
const real_t u_max = parameters.getParameter< real_t >("u_max", real_t(0.05));
const real_t reynolds_number = parameters.getParameter< real_t >("reynolds_number", real_t(1000));
const uint_t diameter_sphere = parameters.getParameter< uint_t >("diameter_sphere", uint_t(5));
const bool constant_inflow = parameters.getParameter< bool >("constant_inflow", true);
const double remainingTimeLoggerFrequency =
parameters.getParameter< double >("remainingTimeLoggerFrequency", 3.0); // in seconds
......@@ -204,7 +213,7 @@ int main(int argc, char** argv)
auto boundariesConfig = config->getOneBlock("Boundaries");
std::function< Vector3< real_t >(const Cell&, const shared_ptr< StructuredBlockForest >&, IBlock&) >
velocity_initialisation = std::bind(VelocityCallback, _1, _2, _3, u_max);
velocity_initialisation = std::bind(VelocityCallback, _1, _2, _3, u_max, constant_inflow);
#if defined(WALBERLA_BUILD_WITH_CUDA)
lbm::FlowAroundSphereCodeGen_UBB ubb(blocks, pdfFieldIDGPU, velocity_initialisation);
......@@ -236,10 +245,9 @@ int main(int argc, char** argv)
AlternatingBeforeFunction communication(evenComm, oddComm, tracker);
// add LBM sweep and communication to time loop
timeloop.add() << BeforeFunction(communication, "communication")
<< Sweep(noSlip.getSweep(tracker), "noSlip boundary");
timeloop.add() << BeforeFunction(communication, "communication") << Sweep(ubb.getSweep(tracker), "ubb boundary");
timeloop.add() << Sweep(outflow.getSweep(tracker), "outflow boundary");
timeloop.add() << Sweep(ubb.getSweep(tracker), "ubb boundary");
timeloop.add() << Sweep(noSlip.getSweep(tracker), "noSlip boundary");
timeloop.add() << BeforeFunction(tracker->getAdvancementFunction(), "Timestep Advancement")
<< Sweep(lbSweep.getSweep(tracker), "LB update rule");
......
from pystencils.field import fields
from lbmpy.advanced_streaming.utility import get_timesteps, Timestep
from lbmpy.advanced_streaming.utility import get_timesteps
from lbmpy.macroscopic_value_kernels import macroscopic_values_setter
from lbmpy.stencils import get_stencil
from lbmpy.creationfunctions import create_lb_collision_rule, create_lb_method, create_lb_update_rule
from lbmpy.creationfunctions import create_lb_collision_rule
from lbmpy.boundaries import NoSlip, UBB, ExtrapolationOutflow
from pystencils_walberla import CodeGeneration, generate_sweep, generate_info_header
......@@ -14,51 +14,54 @@ from lbmpy_walberla import generate_alternating_lbm_sweep, generate_alternating_
import sympy as sp
stencil = get_stencil("D3Q27")
q = len(stencil)
dim = len(stencil[0])
streaming_pattern = 'esotwist'
timesteps = get_timesteps(streaming_pattern)
pdfs, velocity_field, density_field = fields(f"pdfs({q}), velocity({dim}), density(1) : double[{dim}D]", layout='fzyx')
omega = sp.Symbol("omega")
u_max = sp.Symbol("u_max")
output = {
'density': density_field,
'velocity': velocity_field
}
opt = {'symbolic_field': pdfs,
'cse_global': False,
'cse_pdfs': False}
method_params = {'method': 'cumulant',
'stencil': stencil,
'relaxation_rate': omega,
'galilean_correction': True,
'field_name': 'pdfs',
'streaming_pattern': streaming_pattern,
'output': output,
'optimization': opt}
collision_rule = create_lb_collision_rule(**method_params)
lb_method = collision_rule.method
# getter & setter
setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs, density=1,
streaming_pattern=streaming_pattern,
previous_timestep=timesteps[0])
# opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
stencil_typedefs = {'Stencil_T': stencil}
field_typedefs = {'PdfField_T': pdfs,
'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
with CodeGeneration() as ctx:
data_type = "float64" if ctx.double_accuracy else "float32"
stencil = get_stencil("D3Q27")
q = len(stencil)
dim = len(stencil[0])
streaming_pattern = 'esotwist'
timesteps = get_timesteps(streaming_pattern)
pdfs, velocity_field, density_field = fields(f"pdfs({q}), velocity({dim}), density(1) : {data_type}[{dim}D]",
layout='fzyx')
omega = sp.Symbol("omega")
u_max = sp.Symbol("u_max")
output = {
'density': density_field,
'velocity': velocity_field
}
opt = {'symbolic_field': pdfs,
'cse_global': False,
'cse_pdfs': False,
'double_precision': True if ctx.double_accuracy else False}
method_params = {'method': 'cumulant',
'stencil': stencil,
'relaxation_rate': omega,
'galilean_correction': True,
'field_name': 'pdfs',
'streaming_pattern': streaming_pattern,
'output': output,
'optimization': opt}
collision_rule = create_lb_collision_rule(**method_params)
lb_method = collision_rule.method
# getter & setter
setter_assignments = macroscopic_values_setter(lb_method, velocity=velocity_field.center_vector,
pdfs=pdfs, density=1.0,
streaming_pattern=streaming_pattern,
previous_timestep=timesteps[0])
# opt = {'instruction_set': 'sse', 'assume_aligned': True, 'nontemporal': False, 'assume_inner_stride_one': True}
stencil_typedefs = {'Stencil_T': stencil}
field_typedefs = {'PdfField_T': pdfs,
'VelocityField_T': velocity_field,
'ScalarField_T': density_field}
if ctx.cuda:
target = 'gpu'
else:
......
......@@ -4,13 +4,15 @@ from lbmpy.relaxationrates import relaxation_rate_from_lattice_viscosity
class Scenario:
def __init__(self):
self.timesteps = 101
self.vtkWriteFrequency = 2000
self.timesteps = 1001
self.vtkWriteFrequency = 100
self.cells = (384, 128, 128)
self.blocks = (1, 1, 1)
self.periodic = (0, 0, 0)
self.constant_inflow = True
self.diameter_sphere = min(self.cells) // 2
self.u_max = 0.1
self.reynolds_number = 1000000
......@@ -38,7 +40,8 @@ class Scenario:
'omega': self.omega,
'u_max': self.u_max,
'reynolds_number': self.reynolds_number,
'diameter_sphere': self.diameter_sphere
'diameter_sphere': self.diameter_sphere,
'constant_inflow': self.constant_inflow
},
'Boundaries': {
'Border': [
......@@ -54,7 +57,7 @@ class Scenario:
'midpoint': (int(0.40 * self.total_cells[0]), self.total_cells[1] // 2, self.total_cells[2] // 2),
'radius': self.diameter_sphere // 2,
'flag': 'NoSlip'}
]
],
},
}
......
......@@ -3,30 +3,43 @@ import waLBerla as wlb
import pandas as pd
from waLBerla.tools.sqlitedb import sequenceValuesToScalars
from waLBerla.tools.config import block_decomposition
import sys
from math import prod
def domain_block_size_ok(block_size, total_mem, gls=1, q_phase=15, q_hydro=27, size_per_value=8):
"""Checks if a single block of given size fits into GPU memory"""
cells = prod(b + 2 * gls for b in block_size)
# 3 values for the velocity and two for the phase field and the temporary phase field
values_per_cell = 2 * q_phase + 2 * q_hydro + 3 + 2
needed_memory = values_per_cell * cells * size_per_value
return needed_memory < total_mem
class Scenario:
def __init__(self, timeStepStrategy, overlappingWidth, cuda_block_size):
def __init__(self, time_step_strategy, cuda_block_size, cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies
self.vtkWriteFrequency = 0
# simulation parameters
self.timesteps = 101
edge_size = 32
self.cells = (edge_size, edge_size, edge_size)
self.blocks = (1, 1, 1)
self.cells_per_block = cells_per_block
self.blocks = block_decomposition(wlb.mpi.numProcesses())
self.periodic = (1, 1, 1)
self.size = (self.cells[0] * self.blocks[0],
self.cells[1] * self.blocks[1],
self.cells[2] * self.blocks[2])
self.size = (self.cells_per_block[0] * self.blocks[0],
self.cells_per_block[1] * self.blocks[1],
self.cells_per_block[2] * self.blocks[2])
self.timeStepStrategy = timeStepStrategy
self.overlappingWidth = overlappingWidth
self.timeStepStrategy = time_step_strategy
self.cuda_block_size = cuda_block_size
self.warmupSteps = 10
self.cudaEnabledMpi = cuda_enabled_mpi
# bubble parameters
self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
......@@ -41,19 +54,18 @@ class Scenario:
return {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0,
'remainingTimeLoggerFrequency': -1,
'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size,
'warmupSteps': self.warmupSteps,
'scenario': self.scenario,
'cudaEnabledMpi': self.cudaEnabledMpi
},
'Boundaries_GPU': {
'Border': []
......@@ -86,43 +98,46 @@ class Scenario:
df.to_csv(self.csv_file, index=False, mode='a', header=False)
def overlap_benchmark():
def benchmark():
scenarios = wlb.ScenarioManager()
overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32)]
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1), (512, 1, 1),
(32, 2, 1), (64, 2, 1), (128, 2, 1), (256, 2, 1),
(32, 4, 1), (64, 4, 1), (128, 4, 1),
(32, 4, 2), (64, 4, 2), (128, 2, 2),
(32, 8, 1), (64, 8, 1), (64, 4, 2),
(32, 16, 1), (16, 16, 1), (16, 16, 2)]
# no overlap
scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1), cuda_block_size=(16, 16, 1)))
# overlap
for overlap_strategy in ['overlap']:
for overlappingWidth in overlappingWidths:
for cuda_block in cuda_blocks:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=overlappingWidth,
cuda_block_size=cuda_block)
scenarios.add(scenario)
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
gpu_mem = gpu_mem_gb * (2 ** 30)
block_size = (256, 256, 256)
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
else:
scenarios.add(Scenario(time_step_strategy='normal', cuda_block_size=(256, 1, 1), cells_per_block=block_size))
def kernel_benchmark():
scenarios = wlb.ScenarioManager()
# overlap
# for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
for overlap_strategy in ['overlap']:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=(1, 1, 1),
cuda_block_size=(128, 1, 1))
scenarios.add(scenario)
gpu_mem_gb = int(os.environ.get('GPU_MEMORY_GB', 8))
gpu_mem = gpu_mem_gb * (2 ** 30)
block_sizes = [(i, i, i) for i in (32, 64, 128, 256, 320, 384, 448, 512)]
cuda_blocks = [(32, 1, 1), (64, 1, 1), (128, 1, 1), (256, 1, 1),
(32, 2, 1), (64, 2, 1), (128, 2, 1),
(32, 4, 1), (64, 4, 1),
(32, 4, 2),
(32, 8, 1),
(16, 16, 1)]
for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
for cuda_block in cuda_blocks:
for block_size in block_sizes:
if not domain_block_size_ok(block_size, gpu_mem):
wlb.log_info_on_root(f"Block size {block_size} would exceed GPU memory. Skipping.")
continue
scenario = Scenario(time_step_strategy=time_step_strategy,
cuda_block_size=cuda_block,
cells_per_block=block_size)
scenarios.add(scenario)
# overlap_benchmark()
# benchmark()
kernel_benchmark()
......@@ -3,57 +3,55 @@ import waLBerla as wlb
import pandas as pd
from waLBerla.tools.sqlitedb import sequenceValuesToScalars
from waLBerla.tools.config import block_decomposition
import sys
class Scenario:
def __init__(self, timeStepStrategy, overlappingWidth):
def __init__(self, time_step_strategy, cells_per_block=(256, 256, 256),
cuda_enabled_mpi=False):
# output frequencies
self.vtkWriteFrequency = -1
self.vtkWriteFrequency = 0
# simulation parameters
self.timesteps = 500
edge_size = 50
self.cells = (edge_size, edge_size, edge_size)
self.blocks = (1, 1, 1)
self.timesteps = 101
self.cells_per_block = cells_per_block
self.blocks = block_decomposition(wlb.mpi.numProcesses())
self.periodic = (1, 1, 1)
self.size = (self.cells[0] * self.blocks[0],
self.cells[1] * self.blocks[1],
self.cells[2] * self.blocks[2])
self.size = (self.cells_per_block[0] * self.blocks[0],
self.cells_per_block[1] * self.blocks[1],
self.cells_per_block[2] * self.blocks[2])
self.timeStepStrategy = timeStepStrategy
self.overlappingWidth = overlappingWidth
self.cuda_block_size = (-1, -1, -1)
self.timeStepStrategy = time_step_strategy
self.warmupSteps = 10
self.cudaEnabledMpi = cuda_enabled_mpi
# bubble parameters
self.bubbleRadius = min(self.size) // 4
self.bubbleMidPoint = (self.size[0] / 2, self.size[1] / 2, self.size[2] / 2)
self.scenario = 1 # 1 rising bubble, 2 RTI
self.config_dict = self.config()
self.csv_file = "benchmark_cpu.csv"
self.csv_file = "benchmark.csv"
@wlb.member_callback
def config(self):
return {
'DomainSetup': {
'blocks': self.blocks,
'cellsPerBlock': self.cells,
'cellsPerBlock': self.cells_per_block,
'periodic': self.periodic,
},
'Parameters': {
'timesteps': self.timesteps,
'vtkWriteFrequency': self.vtkWriteFrequency,
'useGui': 0,
'remainingTimeLoggerFrequency': 10.0,
'remainingTimeLoggerFrequency': -1,
'timeStepStrategy': self.timeStepStrategy,
'overlappingWidth': self.overlappingWidth,
'gpuBlockSize': self.cuda_block_size,
'warmupSteps': self.warmupSteps,
'scenario': self.scenario,
'scenario': 1,
'cudaEnabledMpi': self.cudaEnabledMpi
},
'Boundaries_GPU': {
'Border': []
......@@ -86,31 +84,23 @@ class Scenario:
df.to_csv(self.csv_file, index=False, mode='a', header=False)
def overlap_benchmark():
def benchmark():
scenarios = wlb.ScenarioManager()
overlappingWidths = [(1, 1, 1), (4, 1, 1), (8, 1, 1), (16, 1, 1), (32, 1, 1),
(4, 4, 1), (8, 8, 1), (16, 16, 1), (32, 32, 1),
(4, 4, 4), (8, 8, 8), (16, 16, 16), (32, 32, 32),
(64, 32, 32), (64, 64, 32), (64, 64, 64)]
# no overlap
scenarios.add(Scenario(timeStepStrategy='normal', overlappingWidth=(1, 1, 1)))
# overlap
for overlap_strategy in ['overlap']:
for overlappingWidth in overlappingWidths:
scenario = Scenario(timeStepStrategy=overlap_strategy,
overlappingWidth=overlappingWidth)
scenarios.add(scenario)
block_size = (64, 64, 64)
scenarios.add(Scenario(time_step_strategy='normal', cells_per_block=block_size))
def kernel_benchmark():
scenarios = wlb.ScenarioManager()
block_sizes = [(i, i, i) for i in (8, 16, 32, 64, 128)]
# overlap
scenario = Scenario(timeStepStrategy='overlap',
overlappingWidth=(8, 8, 8))
scenarios.add(scenario)
for time_step_strategy in ['phase_only', 'hydro_only', 'kernel_only', 'normal']:
for block_size in block_sizes:
scenario = Scenario(time_step_strategy=time_step_strategy,
cells_per_block=block_size)
scenarios.add(scenario)
# overlap_benchmark()
# benchmark()
kernel_benchmark()
......@@ -37,8 +37,6 @@
#include "timeloop/SweepTimeloop.h"
#include <blockforest/communication/UniformBufferedScheme.h>
#include "InitializerFunctions.h"
//////////////////////////////
......@@ -48,45 +46,23 @@
#if defined(WALBERLA_BUILD_WITH_CUDA)
# include "cuda/AddGPUFieldToStorage.h"
# include "cuda/DeviceSelectMPI.h"
# include "cuda/HostFieldAllocator.h"
# include "cuda/ParallelStreams.h"
# include "cuda/communication/GPUPackInfo.h"
# include "cuda/communication/MemcpyPackInfo.h"
# include "cuda/communication/UniformGPUScheme.h"
# include "GenDefines.h"
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
#else
# include "GenDefines.h"
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
# include <blockforest/communication/UniformBufferedScheme.h>
#endif
#include "GenDefines.h"
using namespace walberla;
using PdfField_phase_T = GhostLayerField< real_t, Stencil_phase_T::Size >;
using PdfField_hydro_T = GhostLayerField< real_t, Stencil_hydro_T::Size >;
using VelocityField_T = GhostLayerField< real_t, Stencil_hydro_T::Dimension >;
using PhaseField_T = GhostLayerField< real_t, 1 >;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;
using flag_t = walberla::uint8_t;
using FlagField_T = FlagField< flag_t >;