Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Frederik Hennig
waLBerla
Commits
a5a51ac0
Commit
a5a51ac0
authored
Mar 16, 2022
by
Markus Holzer
Committed by
Helen Schottenhamml
Mar 16, 2022
Browse files
UniformGridCPU
parent
5e29f153
Changes
22
Hide whitespace changes
Inline
Side-by-side
apps/benchmarks/CMakeLists.txt
View file @
a5a51ac0
...
...
@@ -22,7 +22,7 @@ if ( WALBERLA_BUILD_WITH_PYTHON )
if
(
WALBERLA_BUILD_WITH_CODEGEN
)
add_subdirectory
(
FlowAroundSphereCodeGen
)
add_subdirectory
(
UniformGrid
Generated
)
add_subdirectory
(
UniformGrid
CPU
)
add_subdirectory
(
PhaseFieldAllenCahn
)
endif
()
...
...
apps/benchmarks/UniformGridCPU/CMakeLists.txt
0 → 100644
View file @
a5a51ac0
waLBerla_link_files_to_builddir
(
"*.prm"
)
waLBerla_link_files_to_builddir
(
"*.py"
)
waLBerla_link_files_to_builddir
(
"simulation_setup"
)
foreach
(
streaming_pattern pull push aa esotwist
)
foreach
(
stencil d3q19 d3q27
)
foreach
(
collision_setup srt trt mrt mrt-overrelax central central-overrelax cumulant cumulant-overrelax entropic smagorinsky
)
# KBC methods only for D2Q9 and D3Q27 defined
if
(
${
collision_setup
}
STREQUAL
"entropic"
AND
${
stencil
}
STREQUAL
"d3q19"
)
continue
()
endif
(
${
collision_setup
}
STREQUAL
"entropic"
AND
${
stencil
}
STREQUAL
"d3q19"
)
set
(
config
${
stencil
}
_
${
streaming_pattern
}
_
${
collision_setup
}
)
waLBerla_generate_target_from_python
(
NAME UniformGridCPUGenerated_
${
config
}
FILE UniformGridCPU.py
CODEGEN_CFG
${
config
}
OUT_FILES UniformGridCPU_LbKernel.cpp UniformGridCPU_LbKernel.h
UniformGridCPU_PackInfoEven.cpp UniformGridCPU_PackInfoEven.h
UniformGridCPU_PackInfoOdd.cpp UniformGridCPU_PackInfoOdd.h
UniformGridCPU_NoSlip.cpp UniformGridCPU_NoSlip.h
UniformGridCPU_UBB.cpp UniformGridCPU_UBB.h
UniformGridCPU_MacroSetter.cpp UniformGridCPU_MacroSetter.h
UniformGridCPU_MacroGetter.cpp UniformGridCPU_MacroGetter.h
UniformGridCPU_StreamOnlyKernel.cpp UniformGridCPU_StreamOnlyKernel.h
UniformGridCPU_InfoHeader.h
)
waLBerla_add_executable
(
NAME UniformGridCPU_
${
config
}
FILES UniformGridCPU.cpp
DEPENDS blockforest boundary core domain_decomposition field geometry python_coupling timeloop vtk UniformGridCPUGenerated_
${
config
}
)
# all configs are excluded from all except for pull d3q27.
if
(
${
streaming_pattern
}
STREQUAL
"pull"
AND
${
stencil
}
STREQUAL
"d3q27"
)
set_target_properties
(
UniformGridCPUGenerated_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL FALSE
)
set_target_properties
(
UniformGridCPU_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL FALSE
)
else
()
set_target_properties
(
UniformGridCPUGenerated_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL TRUE
)
set_target_properties
(
UniformGridCPU_
${
config
}
PROPERTIES EXCLUDE_FROM_ALL TRUE
)
endif
(
${
streaming_pattern
}
STREQUAL
"pull"
AND
${
stencil
}
STREQUAL
"d3q27"
)
endforeach
()
endforeach
()
endforeach
()
apps/benchmarks/UniformGrid
Generated
/InitShearVelocity.h
→
apps/benchmarks/UniformGrid
CPU
/InitShearVelocity.h
View file @
a5a51ac0
File moved
apps/benchmarks/UniformGrid
Generated
/ManualKernels.h
→
apps/benchmarks/UniformGrid
CPU
/ManualKernels.h
View file @
a5a51ac0
File moved
apps/benchmarks/UniformGridCPU/UniformGridCPU.cpp
0 → 100644
View file @
a5a51ac0
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file UniformGridCPU.cpp
//! \author Markus Holzer <markus.holzer@fau.de>
//
//======================================================================================================================
#include
"blockforest/Initialization.h"
#include
"blockforest/communication/UniformBufferedScheme.h"
#include
"core/Environment.h"
#include
"core/OpenMP.h"
#include
"core/logging/Initialization.h"
#include
"core/timing/RemainingTimeLogger.h"
#include
"core/timing/TimingPool.h"
#include
"domain_decomposition/SharedSweep.h"
#include
"field/AddToStorage.h"
#include
"field/vtk/VTKWriter.h"
#include
"geometry/InitBoundaryHandling.h"
#include
"lbm/communication/CombinedInPlaceCpuPackInfo.h"
#include
"python_coupling/CreateConfig.h"
#include
"python_coupling/DictWrapper.h"
#include
"python_coupling/PythonCallback.h"
#include
"timeloop/all.h"
#include
<iomanip>
#include
"InitShearVelocity.h"
#include
"ManualKernels.h"
#include
"UniformGridCPU_InfoHeader.h"
using
namespace
walberla
;
using
PackInfoEven_T
=
lbm
::
UniformGridCPU_PackInfoEven
;
using
PackInfoOdd_T
=
lbm
::
UniformGridCPU_PackInfoOdd
;
using
LbSweep
=
lbm
::
UniformGridCPU_LbKernel
;
using
FlagField_T
=
FlagField
<
uint8_t
>
;
auto
pdfFieldAdder
=
[](
IBlock
*
const
block
,
StructuredBlockStorage
*
const
storage
)
{
return
new
PdfField_T
(
storage
->
getNumberOfXCells
(
*
block
),
storage
->
getNumberOfYCells
(
*
block
),
storage
->
getNumberOfZCells
(
*
block
),
uint_t
(
1
),
field
::
fzyx
,
make_shared
<
field
::
AllocateAligned
<
real_t
,
64
>
>
());
};
int
main
(
int
argc
,
char
**
argv
)
{
mpi
::
Environment
env
(
argc
,
argv
);
for
(
auto
cfg
=
python_coupling
::
configBegin
(
argc
,
argv
);
cfg
!=
python_coupling
::
configEnd
();
++
cfg
)
{
WALBERLA_MPI_WORLD_BARRIER
()
auto
config
=
*
cfg
;
logging
::
configureLogging
(
config
);
auto
blocks
=
blockforest
::
createUniformBlockGridFromConfig
(
config
);
Vector3
<
uint_t
>
cellsPerBlock
=
config
->
getBlock
(
"DomainSetup"
).
getParameter
<
Vector3
<
uint_t
>
>
(
"cellsPerBlock"
);
// Reading parameters
auto
parameters
=
config
->
getOneBlock
(
"Parameters"
);
const
real_t
omega
=
parameters
.
getParameter
<
real_t
>
(
"omega"
,
real_c
(
1.4
));
const
uint_t
timesteps
=
parameters
.
getParameter
<
uint_t
>
(
"timesteps"
,
uint_c
(
50
));
const
bool
initShearFlow
=
parameters
.
getParameter
<
bool
>
(
"initShearFlow"
,
true
);
// Creating fields
BlockDataID
pdfFieldId
=
blocks
->
addStructuredBlockData
<
PdfField_T
>
(
pdfFieldAdder
,
"pdfs"
);
BlockDataID
velFieldId
=
field
::
addToStorage
<
VelocityField_T
>
(
blocks
,
"vel"
,
real_t
(
0
),
field
::
fzyx
);
BlockDataID
densityFieldId
=
field
::
addToStorage
<
ScalarField_T
>
(
blocks
,
"density"
,
real_t
(
1.0
),
field
::
fzyx
);
// Initialize velocity on cpu
if
(
initShearFlow
)
{
WALBERLA_LOG_INFO_ON_ROOT
(
"Initializing shear flow"
)
initShearVelocity
(
blocks
,
velFieldId
);
}
pystencils
::
UniformGridCPU_MacroSetter
setterSweep
(
densityFieldId
,
pdfFieldId
,
velFieldId
);
pystencils
::
UniformGridCPU_MacroGetter
getterSweep
(
densityFieldId
,
pdfFieldId
,
velFieldId
);
// Set up initial PDF values
for
(
auto
&
block
:
*
blocks
)
setterSweep
(
&
block
);
Vector3
<
int
>
innerOuterSplit
=
parameters
.
getParameter
<
Vector3
<
int
>
>
(
"innerOuterSplit"
,
Vector3
<
int
>
(
1
,
1
,
1
));
for
(
uint_t
i
=
0
;
i
<
3
;
++
i
)
{
if
(
int_c
(
cellsPerBlock
[
i
])
<=
innerOuterSplit
[
i
]
*
2
)
{
WALBERLA_ABORT_NO_DEBUG_INFO
(
"innerOuterSplit too large - make it smaller or increase cellsPerBlock"
)
}
}
Cell
innerOuterSplitCell
(
innerOuterSplit
[
0
],
innerOuterSplit
[
1
],
innerOuterSplit
[
2
]);
LbSweep
lbSweep
(
pdfFieldId
,
omega
,
innerOuterSplitCell
);
pystencils
::
UniformGridCPU_StreamOnlyKernel
StreamOnlyKernel
(
pdfFieldId
);
// Boundaries
const
FlagUID
fluidFlagUID
(
"Fluid"
);
BlockDataID
flagFieldID
=
field
::
addFlagFieldToStorage
<
FlagField_T
>
(
blocks
,
"Boundary Flag Field"
);
auto
boundariesConfig
=
config
->
getBlock
(
"Boundaries"
);
bool
boundaries
=
false
;
if
(
boundariesConfig
)
{
WALBERLA_LOG_INFO_ON_ROOT
(
"Setting boundary conditions"
)
boundaries
=
true
;
geometry
::
initBoundaryHandling
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
boundariesConfig
);
geometry
::
setNonBoundaryCellsToDomain
<
FlagField_T
>
(
*
blocks
,
flagFieldID
,
fluidFlagUID
);
}
lbm
::
UniformGridCPU_NoSlip
noSlip
(
blocks
,
pdfFieldId
);
noSlip
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"NoSlip"
),
fluidFlagUID
);
lbm
::
UniformGridCPU_UBB
ubb
(
blocks
,
pdfFieldId
);
ubb
.
fillFromFlagField
<
FlagField_T
>
(
blocks
,
flagFieldID
,
FlagUID
(
"UBB"
),
fluidFlagUID
);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// COMMUNICATION SCHEME ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Initial setup is the post-collision state of an even time step
auto
tracker
=
make_shared
<
lbm
::
TimestepTracker
>
(
0
);
auto
packInfo
=
make_shared
<
lbm
::
CombinedInPlaceCpuPackInfo
<
PackInfoEven_T
,
PackInfoOdd_T
>
>
(
tracker
,
pdfFieldId
);
blockforest
::
communication
::
UniformBufferedScheme
<
Stencil_T
>
communication
(
blocks
);
communication
.
addPackInfo
(
packInfo
);
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME STEP DEFINITIONS ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
auto
boundarySweep
=
[
&
](
IBlock
*
block
,
uint8_t
t
)
{
noSlip
.
run
(
block
,
t
);
ubb
.
run
(
block
,
t
);
};
auto
boundaryInner
=
[
&
](
IBlock
*
block
,
uint8_t
t
)
{
noSlip
.
inner
(
block
,
t
);
ubb
.
inner
(
block
,
t
);
};
auto
boundaryOuter
=
[
&
](
IBlock
*
block
,
uint8_t
t
)
{
noSlip
.
outer
(
block
,
t
);
ubb
.
outer
(
block
,
t
);
};
auto
simpleOverlapTimeStep
=
[
&
]()
{
// Communicate post-collision values of previous timestep...
communication
.
startCommunication
();
for
(
auto
&
block
:
*
blocks
)
{
if
(
boundaries
)
boundaryInner
(
&
block
,
tracker
->
getCounter
());
lbSweep
.
inner
(
&
block
,
tracker
->
getCounterPlusOne
());
}
communication
.
wait
();
for
(
auto
&
block
:
*
blocks
)
{
if
(
boundaries
)
boundaryOuter
(
&
block
,
tracker
->
getCounter
());
lbSweep
.
outer
(
&
block
,
tracker
->
getCounterPlusOne
());
}
tracker
->
advance
();
};
auto
normalTimeStep
=
[
&
]()
{
communication
.
communicate
();
for
(
auto
&
block
:
*
blocks
)
{
if
(
boundaries
)
boundarySweep
(
&
block
,
tracker
->
getCounter
());
lbSweep
(
&
block
,
tracker
->
getCounterPlusOne
());
}
tracker
->
advance
();
};
// With two-fields patterns, ghost layer cells act as constant stream-in boundaries;
// with in-place patterns, ghost layer cells act as wet-node no-slip boundaries.
auto
kernelOnlyFunc
=
[
&
]()
{
tracker
->
advance
();
for
(
auto
&
block
:
*
blocks
)
lbSweep
(
&
block
,
tracker
->
getCounter
());
};
// Stream only function to test a streaming pattern without executing lbm operations inside
auto
StreamOnlyFunc
=
[
&
]()
{
for
(
auto
&
block
:
*
blocks
)
StreamOnlyKernel
(
&
block
);
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// TIME LOOP SETUP ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
SweepTimeloop
timeLoop
(
blocks
->
getBlockStorage
(),
timesteps
);
const
std
::
string
timeStepStrategy
=
parameters
.
getParameter
<
std
::
string
>
(
"timeStepStrategy"
,
"normal"
);
std
::
function
<
void
()
>
timeStep
;
if
(
timeStepStrategy
==
"noOverlap"
)
timeStep
=
std
::
function
<
void
()
>
(
normalTimeStep
);
else
if
(
timeStepStrategy
==
"simpleOverlap"
)
timeStep
=
simpleOverlapTimeStep
;
else
if
(
timeStepStrategy
==
"kernelOnly"
)
{
WALBERLA_LOG_INFO_ON_ROOT
(
"Running only compute kernel without boundary - this makes only sense for benchmarking!"
)
// Run initial communication once to provide any missing stream-in populations
communication
.
communicate
();
timeStep
=
kernelOnlyFunc
;
}
else
if
(
timeStepStrategy
==
"StreamOnly"
)
{
WALBERLA_LOG_INFO_ON_ROOT
(
"Running only streaming kernel without LBM - this makes only sense for benchmarking!"
)
// Run initial communication once to provide any missing stream-in populations
timeStep
=
StreamOnlyFunc
;
}
else
{
WALBERLA_ABORT_NO_DEBUG_INFO
(
"Invalid value for 'timeStepStrategy'. Allowed values are 'noOverlap', "
"'simpleOverlap', 'kernelOnly'"
)
}
timeLoop
.
add
()
<<
BeforeFunction
(
timeStep
)
<<
Sweep
([](
IBlock
*
)
{},
"time step"
);
uint_t
vtkWriteFrequency
=
parameters
.
getParameter
<
uint_t
>
(
"vtkWriteFrequency"
,
0
);
if
(
vtkWriteFrequency
>
0
)
{
auto
vtkOutput
=
vtk
::
createVTKOutput_BlockData
(
*
blocks
,
"vtk"
,
vtkWriteFrequency
,
0
,
false
,
"vtk_out"
,
"simulation_step"
,
false
,
true
,
true
,
false
,
0
);
auto
velWriter
=
make_shared
<
field
::
VTKWriter
<
VelocityField_T
>
>
(
velFieldId
,
"vel"
);
vtkOutput
->
addCellDataWriter
(
velWriter
);
vtkOutput
->
addBeforeFunction
([
&
]()
{
for
(
auto
&
block
:
*
blocks
){
getterSweep
(
&
block
);}
});
timeLoop
.
addFuncBeforeTimeStep
(
vtk
::
writeFiles
(
vtkOutput
),
"VTK Output"
);
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/// BENCHMARK ///
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
int
warmupSteps
=
parameters
.
getParameter
<
int
>
(
"warmupSteps"
,
2
);
int
outerIterations
=
parameters
.
getParameter
<
int
>
(
"outerIterations"
,
1
);
for
(
int
i
=
0
;
i
<
warmupSteps
;
++
i
)
timeLoop
.
singleStep
();
real_t
remainingTimeLoggerFrequency
=
parameters
.
getParameter
<
real_t
>
(
"remainingTimeLoggerFrequency"
,
-
1.0
);
// in seconds
if
(
remainingTimeLoggerFrequency
>
0
)
{
auto
logger
=
timing
::
RemainingTimeLogger
(
timeLoop
.
getNrOfTimeSteps
()
*
uint_c
(
outerIterations
),
remainingTimeLoggerFrequency
);
timeLoop
.
addFuncAfterTimeStep
(
logger
,
"remaining time logger"
);
}
for
(
int
outerIteration
=
0
;
outerIteration
<
outerIterations
;
++
outerIteration
)
{
timeLoop
.
setCurrentTimeStepToZero
();
WcTimer
simTimer
;
WALBERLA_LOG_INFO_ON_ROOT
(
"Starting simulation with "
<<
timesteps
<<
" time steps"
)
simTimer
.
start
();
timeLoop
.
run
();
simTimer
.
end
();
WALBERLA_LOG_INFO_ON_ROOT
(
"Simulation finished"
)
real_t
time
=
simTimer
.
last
();
WALBERLA_MPI_SECTION
()
{
walberla
::
mpi
::
reduceInplace
(
time
,
walberla
::
mpi
::
MAX
);
}
auto
nrOfCells
=
real_c
(
cellsPerBlock
[
0
]
*
cellsPerBlock
[
1
]
*
cellsPerBlock
[
2
]);
auto
mlupsPerProcess
=
nrOfCells
*
real_c
(
timesteps
)
/
time
*
1e-6
;
WALBERLA_LOG_RESULT_ON_ROOT
(
"MLUPS per process "
<<
mlupsPerProcess
)
WALBERLA_LOG_RESULT_ON_ROOT
(
"Time per time step "
<<
time
/
real_c
(
timesteps
))
WALBERLA_ROOT_SECTION
()
{
python_coupling
::
PythonCallback
pythonCallbackResults
(
"results_callback"
);
if
(
pythonCallbackResults
.
isCallable
())
{
pythonCallbackResults
.
data
().
exposeValue
(
"mlupsPerProcess"
,
mlupsPerProcess
);
pythonCallbackResults
.
data
().
exposeValue
(
"stencil"
,
infoStencil
);
pythonCallbackResults
.
data
().
exposeValue
(
"streamingPattern"
,
infoStreamingPattern
);
pythonCallbackResults
.
data
().
exposeValue
(
"collisionSetup"
,
infoCollisionSetup
);
pythonCallbackResults
.
data
().
exposeValue
(
"cse_global"
,
infoCseGlobal
);
pythonCallbackResults
.
data
().
exposeValue
(
"cse_pdfs"
,
infoCsePdfs
);
// Call Python function to report results
pythonCallbackResults
();
}
}
}
}
return
EXIT_SUCCESS
;
}
apps/benchmarks/UniformGridCPU/UniformGridCPU.py
0 → 100644
View file @
a5a51ac0
from
dataclasses
import
replace
import
sympy
as
sp
import
pystencils
as
ps
from
pystencils.fast_approximation
import
insert_fast_divisions
,
insert_fast_sqrts
from
pystencils.simp.subexpression_insertion
import
insert_zeros
,
insert_aliases
,
insert_constants
,
\
insert_symbol_times_minus_one
from
lbmpy.advanced_streaming
import
Timestep
,
is_inplace
from
lbmpy.advanced_streaming.utility
import
streaming_patterns
from
lbmpy.boundaries
import
NoSlip
,
UBB
from
lbmpy.creationfunctions
import
LBMConfig
,
LBMOptimisation
,
LBStencil
,
create_lb_collision_rule
from
lbmpy.enums
import
Method
,
Stencil
from
lbmpy.fieldaccess
import
CollideOnlyInplaceAccessor
from
lbmpy.macroscopic_value_kernels
import
macroscopic_values_getter
,
macroscopic_values_setter
from
lbmpy.updatekernels
import
create_stream_only_kernel
from
pystencils_walberla
import
CodeGeneration
,
generate_pack_info_from_kernel
,
generate_sweep
,
\
generate_mpidtype_info_from_kernel
,
generate_info_header
from
lbmpy_walberla
import
generate_alternating_lbm_sweep
,
generate_alternating_lbm_boundary
,
generate_lb_pack_info
omega
=
sp
.
symbols
(
'omega'
)
omega_free
=
sp
.
Symbol
(
'omega_free'
)
# best configs in terms of FLOPS
options_dict
=
{
'srt'
:
{
'method'
:
Method
.
SRT
,
'relaxation_rate'
:
omega
,
'compressible'
:
False
,
},
'trt'
:
{
'method'
:
Method
.
TRT
,
'relaxation_rate'
:
omega
,
'compressible'
:
False
,
},
'mrt'
:
{
'method'
:
Method
.
MRT
,
'relaxation_rates'
:
[
omega
,
1
,
1
,
1
,
1
,
1
,
1
],
'compressible'
:
False
,
},
'mrt-overrelax'
:
{
'method'
:
Method
.
MRT
,
'relaxation_rates'
:
[
omega
]
+
[
1
+
x
*
1e-2
for
x
in
range
(
1
,
11
)],
'compressible'
:
False
,
},
'central'
:
{
'method'
:
Method
.
CENTRAL_MOMENT
,
'relaxation_rate'
:
omega
,
'compressible'
:
True
,
},
'central-overrelax'
:
{
'method'
:
Method
.
CENTRAL_MOMENT
,
'relaxation_rates'
:
[
omega
]
+
[
1
+
x
*
1e-2
for
x
in
range
(
1
,
11
)],
'compressible'
:
True
,
},
'cumulant'
:
{
'method'
:
Method
.
MONOMIAL_CUMULANT
,
'relaxation_rate'
:
omega
,
'compressible'
:
True
,
},
'cumulant-overrelax'
:
{
'method'
:
Method
.
MONOMIAL_CUMULANT
,
'relaxation_rates'
:
[
omega
]
+
[
1
+
x
*
1e-2
for
x
in
range
(
1
,
18
)],
'compressible'
:
True
,
},
'entropic'
:
{
'method'
:
Method
.
TRT_KBC_N4
,
'compressible'
:
True
,
'relaxation_rates'
:
[
omega
,
omega_free
],
'entropic'
:
True
,
'entropic_newton_iterations'
:
False
},
'smagorinsky'
:
{
'method'
:
Method
.
SRT
,
'smagorinsky'
:
False
,
'relaxation_rate'
:
omega
,
}
}
info_header
=
"""
const char * infoStencil = "{stencil}";
const char * infoStreamingPattern = "{streaming_pattern}";
const char * infoCollisionSetup = "{collision_setup}";
const bool infoCseGlobal = {cse_global};
const bool infoCsePdfs = {cse_pdfs};
"""
# DEFAULTS
optimize
=
True
with
CodeGeneration
()
as
ctx
:
openmp
=
True
if
ctx
.
openmp
else
False
field_type
=
"float64"
if
ctx
.
double_accuracy
else
"float32"
if
ctx
.
optimize_for_localhost
:
cpu_vec
=
{
"nontemporal"
:
True
,
"assume_aligned"
:
True
}
else
:
cpu_vec
=
None
config_tokens
=
ctx
.
config
.
split
(
'_'
)
assert
len
(
config_tokens
)
>=
3
stencil_str
=
config_tokens
[
0
]
streaming_pattern
=
config_tokens
[
1
]
collision_setup
=
config_tokens
[
2
]
if
len
(
config_tokens
)
>=
4
:
optimize
=
(
config_tokens
[
3
]
!=
'noopt'
)
if
stencil_str
==
"d3q27"
:
stencil
=
LBStencil
(
Stencil
.
D3Q27
)
elif
stencil_str
==
"d3q19"
:
stencil
=
LBStencil
(
Stencil
.
D3Q19
)
else
:
raise
ValueError
(
"Only D3Q27 and D3Q19 stencil are supported at the moment"
)
assert
streaming_pattern
in
streaming_patterns
,
f
"Invalid streaming pattern:
{
streaming_pattern
}
"
options
=
options_dict
[
collision_setup
]
q
=
stencil
.
Q
dim
=
stencil
.
D
assert
dim
==
3
,
"This app supports only three-dimensional stencils"
pdfs
,
pdfs_tmp
=
ps
.
fields
(
f
"pdfs(
{
q
}
), pdfs_tmp(
{
q
}
):
{
field_type
}
[3D]"
,
layout
=
'fzyx'
)
density_field
,
velocity_field
=
ps
.
fields
(
f
"density, velocity(3) :
{
field_type
}
[3D]"
,
layout
=
'fzyx'
)
lbm_config
=
LBMConfig
(
stencil
=
stencil
,
field_name
=
pdfs
.
name
,
streaming_pattern
=
streaming_pattern
,
**
options
)
lbm_opt
=
LBMOptimisation
(
cse_global
=
True
,
cse_pdfs
=
False
,
symbolic_field
=
pdfs
,
field_layout
=
'fzyx'
)
if
not
is_inplace
(
streaming_pattern
):
lbm_opt
=
replace
(
lbm_opt
,
symbolic_temporary_field
=
pdfs_tmp
)
field_swaps
=
[(
pdfs
,
pdfs_tmp
)]
else
:
field_swaps
=
[]
# Sweep for Stream only. This is for benchmarking an empty streaming pattern without LBM.
# is_inplace is set to False to ensure that the streaming is done with src and dst field.
# If this is not the case the compiler might simplify the streaming in a way that benchmarking makes no sense.
accessor
=
CollideOnlyInplaceAccessor
()
accessor
.
is_inplace
=
False
field_swaps_stream_only
=
[(
pdfs
,
pdfs_tmp
)]
stream_only_kernel
=
create_stream_only_kernel
(
stencil
,
pdfs
,
pdfs_tmp
,
accessor
=
accessor
)
# LB Sweep
collision_rule
=
create_lb_collision_rule
(
lbm_config
=
lbm_config
,
lbm_optimisation
=
lbm_opt
)
if
optimize
:
collision_rule
=
insert_fast_divisions
(
collision_rule
)
collision_rule
=
insert_fast_sqrts
(
collision_rule
)
collision_rule
=
insert_constants
(
collision_rule
)
collision_rule
=
insert_zeros
(
collision_rule
)
collision_rule
=
insert_aliases
(
collision_rule
)
collision_rule
=
insert_symbol_times_minus_one
(
collision_rule
)
lb_method
=
collision_rule
.
method
generate_alternating_lbm_sweep
(
ctx
,
'UniformGridCPU_LbKernel'
,
collision_rule
,
lbm_config
=
lbm_config
,
lbm_optimisation
=
lbm_opt
,
target
=
ps
.
Target
.
CPU
,
inner_outer_split
=
True
,
field_swaps
=
field_swaps
,
cpu_openmp
=
openmp
,
cpu_vectorize_info
=
cpu_vec
)
# getter & setter
setter_assignments
=
macroscopic_values_setter
(
lb_method
,
density
=
density_field
.
center
,
velocity
=
velocity_field
.
center_vector
,
pdfs
=
pdfs
,
streaming_pattern
=
streaming_pattern
,
previous_timestep
=
Timestep
.
EVEN
)
getter_assignments
=
macroscopic_values_getter
(
lb_method
,
density
=
density_field
,
velocity
=
velocity_field
,
pdfs
=
pdfs
,
streaming_pattern
=
streaming_pattern
,
previous_timestep
=
Timestep
.
EVEN
)
generate_sweep
(
ctx
,
'UniformGridCPU_MacroSetter'
,
setter_assignments
,
target
=
ps
.
Target
.
CPU
,
cpu_openmp
=
openmp
)
generate_sweep
(
ctx
,
'UniformGridCPU_MacroGetter'
,
getter_assignments
,
target
=
ps
.
Target
.
CPU
,
cpu_openmp
=
openmp
)
# Stream only kernel
generate_sweep
(
ctx
,
'UniformGridCPU_StreamOnlyKernel'
,
stream_only_kernel
,
field_swaps
=
field_swaps_stream_only
,
target
=
ps
.
Target
.
CPU
,
cpu_openmp
=
openmp
)
# Boundaries
noslip
=
NoSlip
()
ubb
=
UBB
((
0.05
,
0
,
0
),
data_type
=
field_type
)
generate_alternating_lbm_boundary
(
ctx
,
'UniformGridCPU_NoSlip'
,
noslip
,
lb_method
,
field_name
=
pdfs
.
name
,
streaming_pattern
=
streaming_pattern
,
target
=
ps
.
Target
.
CPU
,
cpu_openmp
=
openmp
)
generate_alternating_lbm_boundary
(
ctx
,
'UniformGridCPU_UBB'
,
ubb
,
lb_method
,
field_name
=
pdfs
.
name
,
streaming_pattern
=
streaming_pattern
,
target
=
ps
.
Target
.
CPU
,
cpu_openmp
=
openmp
)
# communication
generate_lb_pack_info
(
ctx
,
'UniformGridCPU_PackInfo'
,
stencil
,
pdfs
,
streaming_pattern
=
streaming_pattern
,
target
=
ps
.
Target
.
CPU
,
always_generate_separate_classes
=
True
)
infoHeaderParams
=
{
'stencil'
:
stencil_str
,
'streaming_pattern'
:
streaming_pattern
,
'collision_setup'
:
collision_setup
,
'cse_global'
:
int
(
lbm_opt
.
cse_global
),
'cse_pdfs'
:
int
(
lbm_opt
.
cse_pdfs
),
}
stencil_typedefs
=