Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
waLBerla
waLBerla
Commits
b4b0cb4b
Commit
b4b0cb4b
authored
Aug 16, 2021
by
Christoph Schwarzmeier
Browse files
Merge branch 'UpdatePhaseField' into 'master'
Update phase field See merge request
!477
parents
3455bf3e
40e80f3e
Pipeline
#33777
passed with stages
in 565 minutes and 16 seconds
Changes
19
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
apps/benchmarks/PhaseFieldAllenCahn/benchmark.py
View file @
b4b0cb4b
...
@@ -3,30 +3,43 @@ import waLBerla as wlb
...
@@ -3,30 +3,43 @@ import waLBerla as wlb
import
pandas
as
pd
import
pandas
as
pd
from
waLBerla.tools.sqlitedb
import
sequenceValuesToScalars
from
waLBerla.tools.sqlitedb
import
sequenceValuesToScalars
from
waLBerla.tools.config
import
block_decomposition
import
sys
import
sys
from
math
import
prod
def
domain_block_size_ok
(
block_size
,
total_mem
,
gls
=
1
,
q_phase
=
15
,
q_hydro
=
27
,
size_per_value
=
8
):
"""Checks if a single block of given size fits into GPU memory"""
cells
=
prod
(
b
+
2
*
gls
for
b
in
block_size
)
# 3 values for the velocity and two for the phase field and the temporary phase field
values_per_cell
=
2
*
q_phase
+
2
*
q_hydro
+
3
+
2
needed_memory
=
values_per_cell
*
cells
*
size_per_value
return
needed_memory
<
total_mem
class
Scenario
:
class
Scenario
:
def
__init__
(
self
,
timeStepStrategy
,
overlappingWidth
,
cuda_block_size
):
def
__init__
(
self
,
time_step_strategy
,
cuda_block_size
,
cells_per_block
=
(
256
,
256
,
256
),
cuda_enabled_mpi
=
False
):
# output frequencies
# output frequencies
self
.
vtkWriteFrequency
=
0
self
.
vtkWriteFrequency
=
0
# simulation parameters
# simulation parameters
self
.
timesteps
=
101
self
.
timesteps
=
101
edge_size
=
32
self
.
cells_per_block
=
cells_per_block
self
.
cells
=
(
edge_size
,
edge_size
,
edge_size
)
self
.
blocks
=
block_decomposition
(
wlb
.
mpi
.
numProcesses
())
self
.
blocks
=
(
1
,
1
,
1
)
self
.
periodic
=
(
1
,
1
,
1
)
self
.
periodic
=
(
1
,
1
,
1
)
self
.
size
=
(
self
.
cells
[
0
]
*
self
.
blocks
[
0
],
self
.
size
=
(
self
.
cells
_per_block
[
0
]
*
self
.
blocks
[
0
],
self
.
cells
[
1
]
*
self
.
blocks
[
1
],
self
.
cells
_per_block
[
1
]
*
self
.
blocks
[
1
],
self
.
cells
[
2
]
*
self
.
blocks
[
2
])
self
.
cells
_per_block
[
2
]
*
self
.
blocks
[
2
])
self
.
timeStepStrategy
=
timeStepStrategy
self
.
timeStepStrategy
=
time_step_strategy
self
.
overlappingWidth
=
overlappingWidth
self
.
cuda_block_size
=
cuda_block_size
self
.
cuda_block_size
=
cuda_block_size
self
.
warmupSteps
=
10
self
.
warmupSteps
=
10
self
.
cudaEnabledMpi
=
cuda_enabled_mpi
# bubble parameters
# bubble parameters
self
.
bubbleRadius
=
min
(
self
.
size
)
//
4
self
.
bubbleRadius
=
min
(
self
.
size
)
//
4
self
.
bubbleMidPoint
=
(
self
.
size
[
0
]
/
2
,
self
.
size
[
1
]
/
2
,
self
.
size
[
2
]
/
2
)
self
.
bubbleMidPoint
=
(
self
.
size
[
0
]
/
2
,
self
.
size
[
1
]
/
2
,
self
.
size
[
2
]
/
2
)
...
@@ -41,19 +54,18 @@ class Scenario:
...
@@ -41,19 +54,18 @@ class Scenario:
return
{
return
{
'DomainSetup'
:
{
'DomainSetup'
:
{
'blocks'
:
self
.
blocks
,
'blocks'
:
self
.
blocks
,
'cellsPerBlock'
:
self
.
cells
,
'cellsPerBlock'
:
self
.
cells
_per_block
,
'periodic'
:
self
.
periodic
,
'periodic'
:
self
.
periodic
,
},
},
'Parameters'
:
{
'Parameters'
:
{
'timesteps'
:
self
.
timesteps
,
'timesteps'
:
self
.
timesteps
,
'vtkWriteFrequency'
:
self
.
vtkWriteFrequency
,
'vtkWriteFrequency'
:
self
.
vtkWriteFrequency
,
'useGui'
:
0
,
'remainingTimeLoggerFrequency'
:
-
1
,
'remainingTimeLoggerFrequency'
:
-
1
,
'timeStepStrategy'
:
self
.
timeStepStrategy
,
'timeStepStrategy'
:
self
.
timeStepStrategy
,
'overlappingWidth'
:
self
.
overlappingWidth
,
'gpuBlockSize'
:
self
.
cuda_block_size
,
'gpuBlockSize'
:
self
.
cuda_block_size
,
'warmupSteps'
:
self
.
warmupSteps
,
'warmupSteps'
:
self
.
warmupSteps
,
'scenario'
:
self
.
scenario
,
'scenario'
:
self
.
scenario
,
'cudaEnabledMpi'
:
self
.
cudaEnabledMpi
},
},
'Boundaries_GPU'
:
{
'Boundaries_GPU'
:
{
'Border'
:
[]
'Border'
:
[]
...
@@ -86,43 +98,46 @@ class Scenario:
...
@@ -86,43 +98,46 @@ class Scenario:
df
.
to_csv
(
self
.
csv_file
,
index
=
False
,
mode
=
'a'
,
header
=
False
)
df
.
to_csv
(
self
.
csv_file
,
index
=
False
,
mode
=
'a'
,
header
=
False
)
def
overlap_
benchmark
():
def
benchmark
():
scenarios
=
wlb
.
ScenarioManager
()
scenarios
=
wlb
.
ScenarioManager
()
overlappingWidths
=
[(
1
,
1
,
1
),
(
4
,
1
,
1
),
(
8
,
1
,
1
),
(
16
,
1
,
1
),
(
32
,
1
,
1
),
(
4
,
4
,
1
),
(
8
,
8
,
1
),
(
16
,
16
,
1
),
(
32
,
32
,
1
),
gpu_mem_gb
=
int
(
os
.
environ
.
get
(
'GPU_MEMORY_GB'
,
8
))
(
4
,
4
,
4
),
(
8
,
8
,
8
),
(
16
,
16
,
16
),
(
32
,
32
,
32
)]
gpu_mem
=
gpu_mem_gb
*
(
2
**
30
)
cuda_blocks
=
[(
32
,
1
,
1
),
(
64
,
1
,
1
),
(
128
,
1
,
1
),
(
256
,
1
,
1
),
(
512
,
1
,
1
),
block_size
=
(
256
,
256
,
256
)
(
32
,
2
,
1
),
(
64
,
2
,
1
),
(
128
,
2
,
1
),
(
256
,
2
,
1
),
(
32
,
4
,
1
),
(
64
,
4
,
1
),
(
128
,
4
,
1
),
if
not
domain_block_size_ok
(
block_size
,
gpu_mem
):
(
32
,
4
,
2
),
(
64
,
4
,
2
),
(
128
,
2
,
2
),
wlb
.
log_info_on_root
(
f
"Block size
{
block_size
}
would exceed GPU memory. Skipping."
)
(
32
,
8
,
1
),
(
64
,
8
,
1
),
(
64
,
4
,
2
),
else
:
(
32
,
16
,
1
),
(
16
,
16
,
1
),
(
16
,
16
,
2
)]
scenarios
.
add
(
Scenario
(
time_step_strategy
=
'normal'
,
cuda_block_size
=
(
256
,
1
,
1
),
cells_per_block
=
block_size
))
# no overlap
scenarios
.
add
(
Scenario
(
timeStepStrategy
=
'normal'
,
overlappingWidth
=
(
1
,
1
,
1
),
cuda_block_size
=
(
16
,
16
,
1
)))
# overlap
for
overlap_strategy
in
[
'overlap'
]:
for
overlappingWidth
in
overlappingWidths
:
for
cuda_block
in
cuda_blocks
:
scenario
=
Scenario
(
timeStepStrategy
=
overlap_strategy
,
overlappingWidth
=
overlappingWidth
,
cuda_block_size
=
cuda_block
)
scenarios
.
add
(
scenario
)
def
kernel_benchmark
():
def
kernel_benchmark
():
scenarios
=
wlb
.
ScenarioManager
()
scenarios
=
wlb
.
ScenarioManager
()
# overlap
gpu_mem_gb
=
int
(
os
.
environ
.
get
(
'GPU_MEMORY_GB'
,
8
))
# for overlap_strategy in ['phase_only', 'hydro_only', 'kernel_only']:
gpu_mem
=
gpu_mem_gb
*
(
2
**
30
)
for
overlap_strategy
in
[
'overlap'
]:
scenario
=
Scenario
(
timeStepStrategy
=
overlap_strategy
,
block_sizes
=
[(
i
,
i
,
i
)
for
i
in
(
32
,
64
,
128
,
256
,
320
,
384
,
448
,
512
)]
overlappingWidth
=
(
1
,
1
,
1
),
cuda_block_size
=
(
128
,
1
,
1
))
cuda_blocks
=
[(
32
,
1
,
1
),
(
64
,
1
,
1
),
(
128
,
1
,
1
),
(
256
,
1
,
1
),
scenarios
.
add
(
scenario
)
(
32
,
2
,
1
),
(
64
,
2
,
1
),
(
128
,
2
,
1
),
(
32
,
4
,
1
),
(
64
,
4
,
1
),
(
32
,
4
,
2
),
(
32
,
8
,
1
),
(
16
,
16
,
1
)]
for
time_step_strategy
in
[
'phase_only'
,
'hydro_only'
,
'kernel_only'
,
'normal'
]:
for
cuda_block
in
cuda_blocks
:
for
block_size
in
block_sizes
:
if
not
domain_block_size_ok
(
block_size
,
gpu_mem
):
wlb
.
log_info_on_root
(
f
"Block size
{
block_size
}
would exceed GPU memory. Skipping."
)
continue
scenario
=
Scenario
(
time_step_strategy
=
time_step_strategy
,
cuda_block_size
=
cuda_block
,
cells_per_block
=
block_size
)
scenarios
.
add
(
scenario
)
#
overlap_
benchmark()
# benchmark()
kernel_benchmark
()
kernel_benchmark
()
apps/benchmarks/PhaseFieldAllenCahn/benchmark_cpu.py
View file @
b4b0cb4b
...
@@ -3,57 +3,55 @@ import waLBerla as wlb
...
@@ -3,57 +3,55 @@ import waLBerla as wlb
import
pandas
as
pd
import
pandas
as
pd
from
waLBerla.tools.sqlitedb
import
sequenceValuesToScalars
from
waLBerla.tools.sqlitedb
import
sequenceValuesToScalars
from
waLBerla.tools.config
import
block_decomposition
import
sys
import
sys
class
Scenario
:
class
Scenario
:
def
__init__
(
self
,
timeStepStrategy
,
overlappingWidth
):
def
__init__
(
self
,
time_step_strategy
,
cells_per_block
=
(
256
,
256
,
256
),
cuda_enabled_mpi
=
False
):
# output frequencies
# output frequencies
self
.
vtkWriteFrequency
=
-
1
self
.
vtkWriteFrequency
=
0
# simulation parameters
# simulation parameters
self
.
timesteps
=
500
self
.
timesteps
=
101
edge_size
=
50
self
.
cells_per_block
=
cells_per_block
self
.
cells
=
(
edge_size
,
edge_size
,
edge_size
)
self
.
blocks
=
block_decomposition
(
wlb
.
mpi
.
numProcesses
())
self
.
blocks
=
(
1
,
1
,
1
)
self
.
periodic
=
(
1
,
1
,
1
)
self
.
periodic
=
(
1
,
1
,
1
)
self
.
size
=
(
self
.
cells
[
0
]
*
self
.
blocks
[
0
],
self
.
size
=
(
self
.
cells
_per_block
[
0
]
*
self
.
blocks
[
0
],
self
.
cells
[
1
]
*
self
.
blocks
[
1
],
self
.
cells
_per_block
[
1
]
*
self
.
blocks
[
1
],
self
.
cells
[
2
]
*
self
.
blocks
[
2
])
self
.
cells
_per_block
[
2
]
*
self
.
blocks
[
2
])
self
.
timeStepStrategy
=
timeStepStrategy
self
.
timeStepStrategy
=
time_step_strategy
self
.
overlappingWidth
=
overlappingWidth
self
.
cuda_block_size
=
(
-
1
,
-
1
,
-
1
)
self
.
warmupSteps
=
10
self
.
warmupSteps
=
10
self
.
cudaEnabledMpi
=
cuda_enabled_mpi
# bubble parameters
# bubble parameters
self
.
bubbleRadius
=
min
(
self
.
size
)
//
4
self
.
bubbleRadius
=
min
(
self
.
size
)
//
4
self
.
bubbleMidPoint
=
(
self
.
size
[
0
]
/
2
,
self
.
size
[
1
]
/
2
,
self
.
size
[
2
]
/
2
)
self
.
bubbleMidPoint
=
(
self
.
size
[
0
]
/
2
,
self
.
size
[
1
]
/
2
,
self
.
size
[
2
]
/
2
)
self
.
scenario
=
1
# 1 rising bubble, 2 RTI
self
.
config_dict
=
self
.
config
()
self
.
config_dict
=
self
.
config
()
self
.
csv_file
=
"benchmark
_cpu
.csv"
self
.
csv_file
=
"benchmark.csv"
@
wlb
.
member_callback
@
wlb
.
member_callback
def
config
(
self
):
def
config
(
self
):
return
{
return
{
'DomainSetup'
:
{
'DomainSetup'
:
{
'blocks'
:
self
.
blocks
,
'blocks'
:
self
.
blocks
,
'cellsPerBlock'
:
self
.
cells
,
'cellsPerBlock'
:
self
.
cells
_per_block
,
'periodic'
:
self
.
periodic
,
'periodic'
:
self
.
periodic
,
},
},
'Parameters'
:
{
'Parameters'
:
{
'timesteps'
:
self
.
timesteps
,
'timesteps'
:
self
.
timesteps
,
'vtkWriteFrequency'
:
self
.
vtkWriteFrequency
,
'vtkWriteFrequency'
:
self
.
vtkWriteFrequency
,
'useGui'
:
0
,
'remainingTimeLoggerFrequency'
:
-
1
,
'remainingTimeLoggerFrequency'
:
10.0
,
'timeStepStrategy'
:
self
.
timeStepStrategy
,
'timeStepStrategy'
:
self
.
timeStepStrategy
,
'overlappingWidth'
:
self
.
overlappingWidth
,
'gpuBlockSize'
:
self
.
cuda_block_size
,
'warmupSteps'
:
self
.
warmupSteps
,
'warmupSteps'
:
self
.
warmupSteps
,
'scenario'
:
self
.
scenario
,
'scenario'
:
1
,
'cudaEnabledMpi'
:
self
.
cudaEnabledMpi
},
},
'Boundaries_GPU'
:
{
'Boundaries_GPU'
:
{
'Border'
:
[]
'Border'
:
[]
...
@@ -86,31 +84,23 @@ class Scenario:
...
@@ -86,31 +84,23 @@ class Scenario:
df
.
to_csv
(
self
.
csv_file
,
index
=
False
,
mode
=
'a'
,
header
=
False
)
df
.
to_csv
(
self
.
csv_file
,
index
=
False
,
mode
=
'a'
,
header
=
False
)
def
overlap_
benchmark
():
def
benchmark
():
scenarios
=
wlb
.
ScenarioManager
()
scenarios
=
wlb
.
ScenarioManager
()
overlappingWidths
=
[(
1
,
1
,
1
),
(
4
,
1
,
1
),
(
8
,
1
,
1
),
(
16
,
1
,
1
),
(
32
,
1
,
1
),
block_size
=
(
64
,
64
,
64
)
(
4
,
4
,
1
),
(
8
,
8
,
1
),
(
16
,
16
,
1
),
(
32
,
32
,
1
),
(
4
,
4
,
4
),
(
8
,
8
,
8
),
(
16
,
16
,
16
),
(
32
,
32
,
32
),
scenarios
.
add
(
Scenario
(
time_step_strategy
=
'normal'
,
cells_per_block
=
block_size
))
(
64
,
32
,
32
),
(
64
,
64
,
32
),
(
64
,
64
,
64
)]
# no overlap
scenarios
.
add
(
Scenario
(
timeStepStrategy
=
'normal'
,
overlappingWidth
=
(
1
,
1
,
1
)))
# overlap
for
overlap_strategy
in
[
'overlap'
]:
for
overlappingWidth
in
overlappingWidths
:
scenario
=
Scenario
(
timeStepStrategy
=
overlap_strategy
,
overlappingWidth
=
overlappingWidth
)
scenarios
.
add
(
scenario
)
def
kernel_benchmark
():
def
kernel_benchmark
():
scenarios
=
wlb
.
ScenarioManager
()
scenarios
=
wlb
.
ScenarioManager
()
block_sizes
=
[(
i
,
i
,
i
)
for
i
in
(
8
,
16
,
32
,
64
,
128
)]
# overlap
for
time_step_strategy
in
[
'phase_only'
,
'hydro_only'
,
'kernel_only'
,
'normal'
]:
scenario
=
Scenario
(
timeStepStrategy
=
'overlap'
,
for
block_size
in
block_sizes
:
overlappingWidth
=
(
8
,
8
,
8
))
scenario
=
Scenario
(
time_step_strategy
=
time_step_strategy
,
scenarios
.
add
(
scenario
)
cells_per_block
=
block_size
)
scenarios
.
add
(
scenario
)
#
overlap_
benchmark()
# benchmark()
kernel_benchmark
()
kernel_benchmark
()
apps/benchmarks/PhaseFieldAllenCahn/benchmark_multiphase.cpp
View file @
b4b0cb4b
...
@@ -37,8 +37,6 @@
...
@@ -37,8 +37,6 @@
#include "timeloop/SweepTimeloop.h"
#include "timeloop/SweepTimeloop.h"
#include <blockforest/communication/UniformBufferedScheme.h>
#include "InitializerFunctions.h"
#include "InitializerFunctions.h"
//////////////////////////////
//////////////////////////////
...
@@ -48,40 +46,19 @@
...
@@ -48,40 +46,19 @@
#if defined(WALBERLA_BUILD_WITH_CUDA)
#if defined(WALBERLA_BUILD_WITH_CUDA)
# include "cuda/AddGPUFieldToStorage.h"
# include "cuda/AddGPUFieldToStorage.h"
# include "cuda/DeviceSelectMPI.h"
# include "cuda/DeviceSelectMPI.h"
# include "cuda/HostFieldAllocator.h"
# include "cuda/ParallelStreams.h"
# include "cuda/ParallelStreams.h"
# include "cuda/communication/GPUPackInfo.h"
# include "cuda/communication/MemcpyPackInfo.h"
# include "cuda/communication/MemcpyPackInfo.h"
# include "cuda/communication/UniformGPUScheme.h"
# include "cuda/communication/UniformGPUScheme.h"
# include "GenDefines.h"
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
#else
#else
# include "GenDefines.h"
# include <blockforest/communication/UniformBufferedScheme.h>
# include "PackInfo_phase_field.h"
# include "PackInfo_phase_field_distributions.h"
# include "PackInfo_velocity_based_distributions.h"
# include "hydro_LB_step.h"
# include "initialize_phase_field_distributions.h"
# include "initialize_velocity_based_distributions.h"
# include "phase_field_LB_step.h"
#endif
#endif
#include "GenDefines.h"
using
namespace
walberla
;
using
namespace
walberla
;
using
PdfField_phase_T
=
GhostLayerField
<
real_t
,
Stencil_phase_T
::
Size
>
;
using
flag_t
=
walberla
::
uint8_t
;
using
PdfField_hydro_T
=
GhostLayerField
<
real_t
,
Stencil_hydro_T
::
Size
>
;
using
FlagField_T
=
FlagField
<
flag_t
>
;
using
VelocityField_T
=
GhostLayerField
<
real_t
,
Stencil_hydro_T
::
Dimension
>
;
using
PhaseField_T
=
GhostLayerField
<
real_t
,
1
>
;
using
flag_t
=
walberla
::
uint8_t
;
using
FlagField_T
=
FlagField
<
flag_t
>
;
#if defined(WALBERLA_BUILD_WITH_CUDA)
#if defined(WALBERLA_BUILD_WITH_CUDA)
typedef
cuda
::
GPUField
<
real_t
>
GPUField
;
typedef
cuda
::
GPUField
<
real_t
>
GPUField
;
...
@@ -111,9 +88,7 @@ int main(int argc, char** argv)
...
@@ -111,9 +88,7 @@ int main(int argc, char** argv)
const
real_t
remainingTimeLoggerFrequency
=
const
real_t
remainingTimeLoggerFrequency
=
parameters
.
getParameter
<
real_t
>
(
"remainingTimeLoggerFrequency"
,
3.0
);
parameters
.
getParameter
<
real_t
>
(
"remainingTimeLoggerFrequency"
,
3.0
);
const
uint_t
scenario
=
parameters
.
getParameter
<
uint_t
>
(
"scenario"
,
uint_c
(
1
));
const
uint_t
scenario
=
parameters
.
getParameter
<
uint_t
>
(
"scenario"
,
uint_c
(
1
));
Vector3
<
int
>
overlappingWidth
=
const
uint_t
warmupSteps
=
parameters
.
getParameter
<
uint_t
>
(
"warmupSteps"
,
uint_t
(
2
));
parameters
.
getParameter
<
Vector3
<
int
>
>
(
"overlappingWidth"
,
Vector3
<
int
>
(
1
,
1
,
1
));
const
uint_t
warmupSteps
=
parameters
.
getParameter
<
uint_t
>
(
"warmupSteps"
,
uint_t
(
2
));
#if defined(WALBERLA_BUILD_WITH_CUDA)
#if defined(WALBERLA_BUILD_WITH_CUDA)
// CPU fields
// CPU fields
...
@@ -165,24 +140,21 @@ int main(int argc, char** argv)
...
@@ -165,24 +140,21 @@ int main(int argc, char** argv)
pystencils
::
initialize_velocity_based_distributions
init_g
(
lb_velocity_field_gpu
,
vel_field_gpu
);
pystencils
::
initialize_velocity_based_distributions
init_g
(
lb_velocity_field_gpu
,
vel_field_gpu
);
pystencils
::
phase_field_LB_step
phase_field_LB_step
(
pystencils
::
phase_field_LB_step
phase_field_LB_step
(
lb_phase_field_gpu
,
phase_field_gpu
,
vel_field_gpu
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
lb_phase_field_gpu
,
phase_field_gpu
,
vel_field_gpu
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
gpuBlockSize
[
2
]);
Cell
(
overlappingWidth
[
0
],
overlappingWidth
[
1
],
overlappingWidth
[
2
]));
pystencils
::
hydro_LB_step
hydro_LB_step
(
lb_velocity_field_gpu
,
phase_field_gpu
,
vel_field_gpu
,
gpuBlockSize
[
0
],
pystencils
::
hydro_LB_step
hydro_LB_step
(
lb_velocity_field_gpu
,
phase_field_gpu
,
vel_field_gpu
,
gpuBlockSize
[
0
],
gpuBlockSize
[
1
],
gpuBlockSize
[
1
],
gpuBlockSize
[
2
]);
Cell
(
overlappingWidth
[
0
],
overlappingWidth
[
1
],
overlappingWidth
[
2
]));
#else
#else
pystencils
::
initialize_phase_field_distributions
init_h
(
lb_phase_field
,
phase_field
,
vel_field
);
pystencils
::
initialize_phase_field_distributions
init_h
(
lb_phase_field
,
phase_field
,
vel_field
);
pystencils
::
initialize_velocity_based_distributions
init_g
(
lb_velocity_field
,
vel_field
);
pystencils
::
initialize_velocity_based_distributions
init_g
(
lb_velocity_field
,
vel_field
);
pystencils
::
phase_field_LB_step
phase_field_LB_step
(
pystencils
::
phase_field_LB_step
phase_field_LB_step
(
lb_phase_field
,
phase_field
,
vel_field
);
lb_phase_field
,
phase_field
,
vel_field
,
Cell
(
overlappingWidth
[
0
],
overlappingWidth
[
1
],
overlappingWidth
[
2
]));
pystencils
::
hydro_LB_step
hydro_LB_step
(
lb_velocity_field
,
phase_field
,
vel_field
);
pystencils
::
hydro_LB_step
hydro_LB_step
(
lb_velocity_field
,
phase_field
,
vel_field
,
Cell
(
overlappingWidth
[
0
],
overlappingWidth
[
1
],
overlappingWidth
[
2
]));
#endif
#endif
// add communication
// add communication
#if defined(WALBERLA_BUILD_WITH_CUDA)
#if defined(WALBERLA_BUILD_WITH_CUDA)
const
bool
cudaEnabledMpi
=
parameters
.
getParameter
<
bool
>
(
"cudaEnabledMpi"
,
false
);
auto
Comm_velocity_based_distributions
=
auto
Comm_velocity_based_distributions
=
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
0
);
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
cudaEnabledMpi
);
auto
generatedPackInfo_velocity_based_distributions
=
auto
generatedPackInfo_velocity_based_distributions
=
make_shared
<
lbm
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field_gpu
);
make_shared
<
lbm
::
PackInfo_velocity_based_distributions
>
(
lb_velocity_field_gpu
);
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_velocity_based_distributions
);
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_velocity_based_distributions
);
...
@@ -190,13 +162,12 @@ int main(int argc, char** argv)
...
@@ -190,13 +162,12 @@ int main(int argc, char** argv)
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_phase_field
);
Comm_velocity_based_distributions
->
addPackInfo
(
generatedPackInfo_phase_field
);
auto
Comm_phase_field_distributions
=
auto
Comm_phase_field_distributions
=
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
0
);
make_shared
<
cuda
::
communication
::
UniformGPUScheme
<
Stencil_hydro_T
>
>
(
blocks
,
cudaEnabledMpi
);
auto
generatedPackInfo_phase_field_distributions
=
auto
generatedPackInfo_phase_field_distributions
=
make_shared
<
lbm
::
PackInfo_phase_field_distributions
>
(
lb_phase_field_gpu
);
make_shared
<
lbm
::
PackInfo_phase_field_distributions
>
(
lb_phase_field_gpu
);
Comm_phase_field_distributions
->
addPackInfo
(
generatedPackInfo_phase_field_distributions
);
Comm_phase_field_distributions
->
addPackInfo
(
generatedPackInfo_phase_field_distributions
);
#else
#else
blockforest
::
communication
::
UniformBufferedScheme
<
Stencil_hydro_T
>
Comm_velocity_based_distributions
(
blocks
);
blockforest
::
communication
::
UniformBufferedScheme
<
Stencil_hydro_T
>
Comm_velocity_based_distributions
(
blocks
);
auto
generatedPackInfo_phase_field
=
make_shared
<
pystencils
::
PackInfo_phase_field
>
(
phase_field
);
auto
generatedPackInfo_phase_field
=
make_shared
<
pystencils
::
PackInfo_phase_field
>
(
phase_field
);
...
@@ -244,29 +215,15 @@ int main(int argc, char** argv)
...
@@ -244,29 +215,15 @@ int main(int argc, char** argv)
auto
timeLoop
=
make_shared
<
SweepTimeloop
>
(
blocks
->
getBlockStorage
(),
timesteps
);
auto
timeLoop
=
make_shared
<
SweepTimeloop
>
(
blocks
->
getBlockStorage
(),
timesteps
);
#if defined(WALBERLA_BUILD_WITH_CUDA)
#if defined(WALBERLA_BUILD_WITH_CUDA)
auto
normalTimeStep
=
[
&
]()
{
auto
normalTimeStep
=
[
&
]()
{
Comm_velocity_based_distributions
->
startCommunication
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
)
for
(
auto
&
block
:
*
blocks
)
{
phase_field_LB_step
(
&
block
,
defaultStream
);
Comm_phase_field_distributions
->
communicate
(
nullptr
);
Comm_velocity_based_distributions
->
wait
(
defaultStream
);
phase_field_LB_step
(
&
block
);
Comm_velocity_based_distributions
->
communicate
(
nullptr
);
hydro_LB_step
(
&
block
);
}
};
auto
simpleOverlapTimeStep
=
[
&
]()
{
Comm_phase_field_distributions
->
startCommunication
(
defaultStream
);
Comm_phase_field_distributions
->
startCommunication
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
)
for
(
auto
&
block
:
*
blocks
)
phase_field
_LB_step
.
inner
(
&
block
,
defaultStream
);
hydro
_LB_step
(
&
block
,
defaultStream
);
Comm_phase_field_distributions
->
wait
(
defaultStream
);
Comm_phase_field_distributions
->
wait
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
)
phase_field_LB_step
.
outer
(
&
block
,
defaultStream
);
Comm_velocity_based_distributions
->
startCommunication
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
)
hydro_LB_step
.
inner
(
&
block
,
defaultStream
);
Comm_velocity_based_distributions
->
wait
(
defaultStream
);
for
(
auto
&
block
:
*
blocks
)
hydro_LB_step
.
outer
(
&
block
,
defaultStream
);
};
};
auto
phase_only
=
[
&
]()
{
auto
phase_only
=
[
&
]()
{
for
(
auto
&
block
:
*
blocks
)
for
(
auto
&
block
:
*
blocks
)
...
@@ -284,29 +241,15 @@ int main(int argc, char** argv)
...
@@ -284,29 +241,15 @@ int main(int argc, char** argv)
};
};
#else
#else
auto
normalTimeStep
=
[
&
]()
{
auto
normalTimeStep
=
[
&
]()
{
for
(
auto
&
block
:
*
blocks
)
Comm_velocity_based_distributions
.
startCommunication
();
{
for
(
auto
&
block
:
*
blocks
)
Comm_phase_field_distributions
();
phase_field_LB_step
(
&
block
);
phase_field_LB_step
(
&
block
);
Comm_velocity_based_distributions
.
wait
();
Comm_velocity_based_distributions
();
Comm_phase_field_distributions
.
startCommunication
();
hydro_LB_step
(
&
block
);
for
(
auto
&
block
:
*
blocks
)
}
hydro_LB_step
(
&
block
);
};
Comm_phase_field_distributions
.
wait
();
auto
simpleOverlapTimeStep
=
[
&
]()
{
Comm_phase_field_distributions
.
startCommunication
();
for
(
auto
&
block
:
*
blocks
)
phase_field_LB_step
.
inner
(
&
block
);
Comm_phase_field_distributions
.
wait
();
for
(
auto
&
block
:
*
blocks
)
phase_field_LB_step
.
outer
(
&
block
);
Comm_velocity_based_distributions
.
startCommunication
();
for
(
auto
&
block
:
*
blocks
)
hydro_LB_step
.
inner
(
&
block
);
Comm_velocity_based_distributions
.
wait
();
for
(
auto
&
block
:
*
blocks
)
hydro_LB_step
.
outer
(
&
block
);
};
};
auto
phase_only
=
[
&
]()
{
auto
phase_only
=
[
&
]()
{
for
(
auto
&
block
:
*
blocks
)