Commit f8b09b27 authored by Andrei Dumitrasc's avatar Andrei Dumitrasc
Browse files

Merge branch 'master' into PETSc_CG

parents 3bedde7d 3ca56cbe
Pipeline #18150 canceled with stages
......@@ -9,6 +9,8 @@ stages:
- test
- deploy
variables:
GIT_SUBMODULE_STRATEGY: normal
###############################################################################
## ##
......@@ -24,12 +26,11 @@ stages:
- cmake --version
- ccache --version
- mpirun --version
- git clone --depth 1 --branch master https://i10git.cs.fau.de/walberla/walberla.git
- export CCACHE_BASEDIR=$CI_PROJECT_DIR
- mkdir $CI_PROJECT_DIR/build
- cd $CI_PROJECT_DIR/build
- if dpkg --compare-versions `ompi_info | head -2 | tail -1 | sed 's/[^0-9.]*\([0-9.]*\).*/\1/'` ge 1.10; then export MPIEXEC_PREFLAGS="--allow-run-as-root" ; fi
- cmake .. -DWALBERLA_DIR=$CI_PROJECT_DIR/walberla -DWARNING_ERROR=ON -DWALBERLA_BUILD_WITH_MPI=$WALBERLA_BUILD_WITH_MPI -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DPE_DIR=/builds/pe -DHHG_BUILD_WITH_PETSC=$HHG_BUILD_WITH_PETSC -DPETSC_DIR=$PETSC_DIR -DHHG_BUILD_WITH_EIGEN=$HHG_BUILD_WITH_EIGEN -DWALBERLA_SANITIZE_ADDRESS=$WALBERLA_SANITIZE_ADDRESS -DWALBERLA_SANITIZE_UNDEFINED=$WALBERLA_SANITIZE_UNDEFINED
- cmake .. -DWARNING_ERROR=ON -DWALBERLA_BUILD_WITH_MPI=$WALBERLA_BUILD_WITH_MPI -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DPE_DIR=/builds/pe -DHHG_BUILD_WITH_PETSC=$HHG_BUILD_WITH_PETSC -DPETSC_DIR=$PETSC_DIR -DHHG_BUILD_WITH_EIGEN=$HHG_BUILD_WITH_EIGEN -DWALBERLA_SANITIZE_ADDRESS=$WALBERLA_SANITIZE_ADDRESS -DWALBERLA_SANITIZE_UNDEFINED=$WALBERLA_SANITIZE_UNDEFINED
- cmake . -LAH
- cd $CI_PROJECT_DIR/build/apps
- make -j $(nproc) -l $(nproc)
......@@ -837,9 +838,9 @@ clang_7.0_serial_dbg:
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:7.0
variables:
<<: *build_serial_dbg_variables
except:
only:
variables:
- $DISABLE_PER_COMMIT_BUILDS
- $ENABLE_NIGHTLY_BUILDS
tags:
- docker
......@@ -848,9 +849,9 @@ clang_7.0_mpionly_dbg:
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:7.0
variables:
<<: *build_mpionly_dbg_variables
except:
only:
variables:
- $DISABLE_PER_COMMIT_BUILDS
- $ENABLE_NIGHTLY_BUILDS
tags:
- docker
......@@ -859,15 +860,81 @@ clang_7.0_hybrid_dbg_sp:
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:7.0
variables:
<<: *build_hybrid_dbg_sp_variables
except:
only:
variables:
- $DISABLE_PER_COMMIT_BUILDS
- $ENABLE_NIGHTLY_BUILDS
tags:
- docker
clang_7.0_eigen_petsc:
<<: *build_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:7.0
variables:
<<: *build_eigen_petsc_variables
only:
variables:
- $ENABLE_NIGHTLY_BUILDS
tags:
- docker
clang_8.0_serial:
<<: *build_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
variables:
<<: *build_serial_variables
only:
variables:
- $ENABLE_NIGHTLY_BUILDS
tags:
- docker
clang_8.0_mpionly:
<<: *build_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
variables:
<<: *build_mpionly_variables
only:
variables:
- $ENABLE_NIGHTLY_BUILDS
tags:
- docker
clang_8.0_serial_dbg:
<<: *build_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
variables:
<<: *build_serial_dbg_variables
except:
variables:
- $DISABLE_PER_COMMIT_BUILDS
tags:
- docker
clang_8.0_mpionly_dbg:
<<: *build_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
variables:
<<: *build_mpionly_dbg_variables
except:
variables:
- $DISABLE_PER_COMMIT_BUILDS
tags:
- docker
clang_8.0_hybrid_dbg_sp:
<<: *build_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
variables:
<<: *build_hybrid_dbg_sp_variables
except:
variables:
- $DISABLE_PER_COMMIT_BUILDS
tags:
- docker
clang_8.0_eigen_petsc:
<<: *build_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
stage: pretest
variables:
<<: *build_eigen_petsc_variables
......@@ -888,11 +955,10 @@ clang_7.0_eigen_petsc:
pages:
image: walberla/buildenv-ubuntu-basic:16.04
script:
- git clone --depth 1 --branch master https://i10git.cs.fau.de/walberla/walberla.git
- export CCACHE_BASEDIR=$CI_PROJECT_DIR
- mkdir $CI_PROJECT_DIR/build
- cd $CI_PROJECT_DIR/build
- cmake .. -DWALBERLA_DIR=$CI_PROJECT_DIR/walberla
- cmake ..
- make doc_tinyhhg
- mkdir $CI_PROJECT_DIR/public
- mv doc/html/* $CI_PROJECT_DIR/public/
......@@ -936,12 +1002,11 @@ coverage:
- mpirun --version
- gcov --version
- echo $CI_PROJECT_DIR
- git clone --depth 1 --branch master https://i10git.cs.fau.de/walberla/walberla.git
# - export CCACHE_BASEDIR=$CI_PROJECT_DIR
- mkdir build
- cd build
- if dpkg --compare-versions `ompi_info | head -2 | tail -1 | sed 's/[^0-9.]*\([0-9.]*\).*/\1/'` ge 1.10; then export MPIEXEC_PREFLAGS="--allow-run-as-root" ; fi
- cmake .. -DWALBERLA_DIR=$CI_PROJECT_DIR/walberla -DWARNING_ERROR=ON -DWALBERLA_BUILD_WITH_MPI=$WALBERLA_BUILD_WITH_MPI -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DPE_DIR=/builds/pe -DHHG_BUILD_WITH_PETSC=$HHG_BUILD_WITH_PETSC -DPETSC_DIR=$PETSC_DIR
- cmake .. -DWARNING_ERROR=ON -DWALBERLA_BUILD_WITH_MPI=$WALBERLA_BUILD_WITH_MPI -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP -DCMAKE_BUILD_TYPE=$CMAKE_BUILD_TYPE -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DPE_DIR=/builds/pe -DHHG_BUILD_WITH_PETSC=$HHG_BUILD_WITH_PETSC -DPETSC_DIR=$PETSC_DIR
- cd tests
- make -j $(nproc) -l $(nproc)
- ctest -LE $CTEST_EXCLUDE_LABELS -C $CMAKE_BUILD_TYPE --output-on-failure -j 2
......@@ -979,10 +1044,9 @@ coverage:
- export MSMPI_DISABLE_SHM=1
- export
- cmake --version
- git clone --depth 1 --branch master https://i10git.cs.fau.de/walberla/walberla.git
- mkdir build
- cd build
- cmake -LAH -DWALBERLA_DIR=../walberla -DWALBERLA_BUILD_BENCHMARKS=OFF -DWALBERLA_BUILD_TUTORIALS=OFF -DWALBERLA_BUILD_WITH_MPI=$WALBERLA_BUILD_WITH_MPI -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP -DWALBERLA_DOUBLE_ACCURACY=$WALBERLA_DOUBLE_ACCURACY -DWARNING_ERROR=OFF -G "$CMAKE_GENERATOR" ..
- cmake -LAH -DWALBERLA_BUILD_BENCHMARKS=OFF -DWALBERLA_BUILD_TUTORIALS=OFF -DWALBERLA_BUILD_WITH_MPI=$WALBERLA_BUILD_WITH_MPI -DWALBERLA_BUILD_WITH_OPENMP=$WALBERLA_BUILD_WITH_OPENMP -DWALBERLA_DOUBLE_ACCURACY=$WALBERLA_DOUBLE_ACCURACY -DWARNING_ERROR=OFF -G "$CMAKE_GENERATOR" ..
- cmake --build . --config $BUILD_CONFIGURATION -- /maxcpucount:4
#- cd tests
#- ctest -LE $CTEST_EXCLUDE_LABELS -C $BUILD_CONFIGURATION --output-on-failure -j 4
......@@ -1032,12 +1096,11 @@ msvc-14.1_MpiOnly_Dbg:
- cmake --version
- ccache --version
- mpirun --version
- git clone --depth 1 --branch master https://i10git.cs.fau.de/walberla/walberla.git
- export CCACHE_BASEDIR=$CI_PROJECT_DIR
- mkdir $CI_PROJECT_DIR/build
- cd $CI_PROJECT_DIR/build
- if dpkg --compare-versions `ompi_info | head -2 | tail -1 | sed 's/[^0-9.]*\([0-9.]*\).*/\1/'` ge 1.10; then export MPIEXEC_PREFLAGS="--allow-run-as-root" ; fi
- cmake .. -DWALBERLA_DIR=$CI_PROJECT_DIR/walberla -DWALBERLA_BUFFER_DEBUG=OFF -DWALBERLA_BUILD_TESTS=OFF -DWALBERLA_BUILD_BENCHMARKS=OFF -DWALBERLA_BUILD_TUTORIALS=OFF -DWALBERLA_BUILD_TOOLS=OFF -DWALBERLA_BUILD_WITH_MPI=ON -DWALBERLA_BUILD_WITH_CUDA=OFF -DWALBERLA_BUILD_WITH_PYTHON=OFF -DWALBERLA_BUILD_WITH_OPENMP=OFF -DCMAKE_BUILD_TYPE=RELEASE -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DWALBERLA_DOUBLE_ACCURACY=ON -DWARNING_ERROR=ON -DWALBERLA_BUILD_WITH_METIS=OFF -DWALBERLA_BUILD_WITH_PARMETIS=OFF -DWALBERLA_OPTIMIZE_FOR_LOCALHOST=ON -DWALBERLA_BUILD_WITH_FASTMATH=OFF -DWALBERLA_BUILD_WITH_LTO=ON
- cmake .. -DWALBERLA_BUFFER_DEBUG=OFF -DWALBERLA_BUILD_TESTS=OFF -DWALBERLA_BUILD_BENCHMARKS=OFF -DWALBERLA_BUILD_TUTORIALS=OFF -DWALBERLA_BUILD_TOOLS=OFF -DWALBERLA_BUILD_WITH_MPI=ON -DWALBERLA_BUILD_WITH_CUDA=OFF -DWALBERLA_BUILD_WITH_PYTHON=OFF -DWALBERLA_BUILD_WITH_OPENMP=OFF -DCMAKE_BUILD_TYPE=RELEASE -DMPIEXEC_PREFLAGS=$MPIEXEC_PREFLAGS -DWALBERLA_DOUBLE_ACCURACY=ON -DWARNING_ERROR=ON -DWALBERLA_BUILD_WITH_METIS=OFF -DWALBERLA_BUILD_WITH_PARMETIS=OFF -DWALBERLA_OPTIMIZE_FOR_LOCALHOST=ON -DWALBERLA_BUILD_WITH_FASTMATH=OFF -DWALBERLA_BUILD_WITH_LTO=OFF -DWALBERLA_BUILD_WITH_LIKWID_MARKERS=ON
- cmake . -LAH
- time make core -j20
- time make tinyhhg_core -j20
......@@ -1074,20 +1137,29 @@ msvc-14.1_MpiOnly_Dbg:
- likwid-setFrequencies -f 3.3 # set frequency to 3.3
- mpirun --allow-run-as-root -np 1 --map-by core --bind-to core --report-bindings ./P2SolverBenchmark P2SolverBenchmark.prm -Parameters.level=8 | tee P2SolverBenchmarkOutput.txt
- python3 upload.py
# only:
# variables:
# - $ENABLE_BENCHMARKS
- cd $CI_PROJECT_DIR/build/apps/benchmarks/ApplyPerformanceAnalysis-2D-P2/
- make -j 20
- python3 createLevelScalingDataOnSwarm.py
- mkdir -p ~/.config/matplotlib
- echo backend:Agg > ~/.config/matplotlib/matplotlibrc
- cat $DOCKER_IMAGE_NAME.txt
- python3 createLevelScalingPlot.py $DOCKER_IMAGE_NAME.txt 2 15 --perfgroup "DP MFLOP/s STAT"
tags:
- docker-benchmark
artifacts:
paths:
- $CI_PROJECT_DIR/build/apps/benchmarks/ApplyPerformanceAnalysis-2D-P2/*.pdf
- $CI_PROJECT_DIR/build/apps/benchmarks/ApplyPerformanceAnalysis-2D-P2/*.txt
expire_in: 1 weeks
benchmark_intel17:
benchmark_intel19:
<<: *benchmark_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:17
image: i10git.cs.fau.de:5005/walberla/buildenvs/intel:19
benchmark_gcc7:
benchmark_gcc8:
<<: *benchmark_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:7
image: i10git.cs.fau.de:5005/walberla/buildenvs/gcc:8
benchmark_clang6:
benchmark_clang8:
<<: *benchmark_definition
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:6.0
\ No newline at end of file
image: i10git.cs.fau.de:5005/walberla/buildenvs/clang:8.0
\ No newline at end of file
[submodule "walberla"]
path = walberla
url = https://i10git.cs.fau.de/walberla/walberla
......@@ -5,12 +5,12 @@ enable_testing()
option ( HHG_BUILD_WITH_PETSC "Build with PETSc" OFF)
option ( HHG_BUILD_WITH_EIGEN "Build with Eigen" OFF)
option ( HHG_REMOVE_WALBERLA "Remove unnecessary walberla components" OFF)
option ( HHG_USE_GENERATED_KERNELS "Use generated pystencil kernels if available" ON)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(WALBERLA_OPTIMIZE_FOR_LOCALHOST ON CACHE BOOL "Enable compiler optimizations spcific to localhost")
set(WALBERLA_DIR ${tinyhhg_SOURCE_DIR}/walberla CACHE PATH "waLBerla path")
include_directories ( src )
......@@ -33,10 +33,6 @@ if ( HHG_BUILD_WITH_EIGEN )
link_directories ( ${EIGEN3_LIBRARY_DIR} )
endif()
if( HHG_REMOVE_WALBERLA )
execute_process(COMMAND ${tinyhhg_SOURCE_DIR}/cmake/patchWalberla.sh WORKING_DIRECTORY ${WALBERLA_DIR})
endif()
find_package( waLBerla )
if(WALBERLA_CXX_COMPILER_IS_GNU)
......
......@@ -31,13 +31,14 @@ Optional:
To build TinyHHG, clone the TinyHHG and the waLBerla source code:
$ git clone https://i10git.cs.fau.de/terraneo/tinyhhg
$ git clone https://i10git.cs.fau.de/walberla/walberla
$ git clone --recurse-submodules https://i10git.cs.fau.de/terraneo/tinyhhg
Then run cmake (out-of-source build recommended) and supply the waLBerla source code directory via `-DWALBERLA_DIR=<walberla-source-directory>`:
`--recurse-submodules` will automatically initialize and clone walberla as a submodule.
$ mkdir tinyhhg-build && cd tinyhhg-build
$ cmake ../tinyhhg -DWALBERLA_DIR=../walberla
$ mkdir tinyhhg-build
$ cd tinyhhg-build
$ cmake ../tinyhhg
CMake will then produce Makefiles for the included tests and applications. To build and run an application (e.g. a stabilized stokes solver) invoke:
......
......@@ -35,11 +35,11 @@ waLBerla_add_executable( NAME show_mesh
DEPENDS tinyhhg_core)
waLBerla_add_executable( NAME PolarLaplacian
FILES polar_laplacian.cpp
FILES polar_laplacian.cpp
DEPENDS tinyhhg_core)
waLBerla_add_executable( NAME PrimitiveSizes
FILES PrimitiveSizes.cpp
FILES PrimitiveSizes.cpp
DEPENDS tinyhhg_core)
if( HHG_BUILD_WITH_EIGEN )
......@@ -52,6 +52,12 @@ if( HHG_BUILD_WITH_EIGEN )
DEPENDS tinyhhg_core)
endif()
if( HHG_BUILD_WITH_PETSC )
waLBerla_add_executable( NAME exportOperatorMatrix
FILES exportOperatorMatrix.cpp
DEPENDS tinyhhg_core)
endif()
add_subdirectory(benchmarks)
add_subdirectory(stokesSphere)
add_subdirectory(stokesCubeTransport)
......
......@@ -32,7 +32,7 @@ int main( int argc, char* argv[] )
walberla::Environment walberlaEnv( argc, argv );
walberla::MPIManager::instance()->useWorldComm();
WALBERLA_LOG_INFO_ON_ROOT( " --- Primitive Sizes --- " );
WALBERLA_LOG_INFO_ON_ROOT( " --- Primitive Sizes (number of INNER DoFs) --- " );
WALBERLA_LOG_INFO_ON_ROOT( "P1:" );
WALBERLA_LOG_INFO_ON_ROOT( "" );
hhg::printPrimitiveSizes< hhg::P1FunctionTag >();
......
#include "core/DataTypes.h"
#include "core/Environment.h"
#include "core/config/Config.h"
#include "core/math/Constants.h"
#include "core/mpi/MPIManager.h"
#include "core/timing/TimingJSON.h"
#include "core/math/Constants.h"
#include "tinyhhg_core/Format.hpp"
#include "tinyhhg_core/LikwidWrapper.hpp"
#include "tinyhhg_core/VTKWriter.hpp"
#include "tinyhhg_core/edgedofspace/generatedKernels/GeneratedKernelsEdgeToEdgeMacroFace2D.hpp"
#include "tinyhhg_core/mesh/MeshInfo.hpp"
#include "tinyhhg_core/misc/dummy.hpp"
#include "tinyhhg_core/mixedoperators/EdgeDoFToVertexDoFOperator/EdgeDoFToVertexDoFApply.hpp"
#include "tinyhhg_core/mixedoperators/EdgeDoFToVertexDoFOperator/generatedKernels/GeneratedKernelsEdgeToVertexMacroFace2D.hpp"
#include "tinyhhg_core/mixedoperators/VertexDoFToEdgeDoFOperator/generatedKernels/GeneratedKernelsVertexToEdgeMacroFace2D.hpp"
......@@ -20,8 +21,6 @@
#include "tinyhhg_core/primitivestorage/SetupPrimitiveStorage.hpp"
#include "tinyhhg_core/primitivestorage/loadbalancing/SimpleBalancer.hpp"
const int USE_GENERATED_KERNELS = 1;
using walberla::real_t;
using namespace hhg;
......@@ -30,116 +29,218 @@ static void performBenchmark( hhg::P2Function< double >& src,
hhg::P2ConstantLaplaceOperator& laplace,
const uint_t& level,
Face& face,
const uint_t& sampleSize,
walberla::WcTimingTree& timingTree )
{
const std::string benchInfoString = "level" + ( level < 10 ? "0" + std::to_string( level ) : std::to_string( level ) ) + "-" +
"sampleSize" + std::to_string( sampleSize ) +
"numProcs" + std::to_string(walberla::mpi::MPIManager::instance()->numProcesses());
const std::string benchInfoString = "level" + ( level < 10 ? "0" + std::to_string( level ) : std::to_string( level ) ) +
"-numProcs" + std::to_string( walberla::mpi::MPIManager::instance()->numProcesses() );
double time = 0, mlups, mflops;
#ifdef LIKWID_PERFMON
double events;
int nevents = 0, count;
#endif
uint_t iterations = 1;
std::string vvname, vename, eename, evname;
vvname = "Vertex-to-Vertex-Apply-" + benchInfoString;
evname = "Edge-to-Vertex-Apply-" + benchInfoString;
eename = "Edge-to-Edge-Apply-" + benchInfoString;
vename = "Vertex-to-Edge-Apply-" + benchInfoString;
LIKWID_MARKER_REGISTER( vvname.c_str() );
LIKWID_MARKER_REGISTER( evname.c_str() );
LIKWID_MARKER_REGISTER( eename.c_str() );
LIKWID_MARKER_REGISTER( vename.c_str() );
uint_t innerIterationsVertex =
levelinfo::num_microvertices_per_face_from_width( levelinfo::num_microvertices_per_edge( level ) - 3 );
WALBERLA_LOG_INFO_ON_ROOT(
hhg::format( "%18s|%10s|%10s|%10s|%6s|%5s", "kernel", "Time (s)", "MLUPs", "MFLOPs", " Iter", " Level" ) );
typedef edgedof::EdgeDoFOrientation eo;
std::map< eo, uint_t > firstIdx;
for ( auto e : edgedof::faceLocalEdgeDoFOrientations )
firstIdx[e] = edgedof::macroface::index( level, 0, 0, e );
std::string name;
/// Vertex to Vertex
for( uint_t i = 0; i < sampleSize; i++ )
do
{
name = "Vertex-to-Vertex-Apply-" + benchInfoString;
timingTree.start( name );
LIKWID_MARKER_START( name.c_str() );
if( USE_GENERATED_KERNELS )
{
auto dstPtr = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
auto srcPtr = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
auto stencilPtr = face.getData( laplace.getVertexToVertexOpr().getFaceStencilID() )->getPointer( level );
hhg::vertexdof::macroface::generated::apply_2D_macroface_vertexdof_to_vertexdof_replace( dstPtr, srcPtr, stencilPtr, static_cast< int64_t >( level ) );
} else
timingTree.start( vvname );
///only works with likwid 4.3.3 and higher
LIKWID_MARKER_RESET( vvname.c_str() );
LIKWID_MARKER_START( vvname.c_str() );
auto dstPtr = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
auto srcPtr = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
auto stencilPtr = face.getData( laplace.getVertexToVertexOpr().getFaceStencilID() )->getPointer( level );
for ( uint_t i = 0; i < iterations; ++i )
{
hhg::vertexdof::macroface::apply( level,
face,
laplace.getVertexToVertexOpr().getFaceStencilID(),
src.getVertexDoFFunction().getFaceDataID(),
dst.getVertexDoFFunction().getFaceDataID(),
hhg::Replace );
hhg::vertexdof::macroface::generated::apply_2D_macroface_vertexdof_to_vertexdof_replace(
dstPtr, srcPtr, stencilPtr, static_cast< int64_t >( level ) );
hhg::misc::dummy( srcPtr, dstPtr );
}
LIKWID_MARKER_STOP( name.c_str() );
timingTree.stop( name );
}
LIKWID_MARKER_STOP( vvname.c_str() );
timingTree.stop( vvname );
iterations *= 2;
} while ( timingTree[vvname].last() < 0.5 );
iterations /= 2;
#ifdef LIKWID_PERFMON
LIKWID_MARKER_GET( vvname.c_str(), &nevents, &events, &time, &count );
#else
time = timingTree[vvname].last();
#endif
mlups = real_t( innerIterationsVertex * iterations ) / time / 1e6;
/// 13 Flops: 7 Mults and 6 Adds
mflops = real_t( innerIterationsVertex * iterations * 13 ) / time / 1e6;
WALBERLA_LOG_INFO_ON_ROOT(
hhg::format( "%18s|%10.3e|%10.3e|%10.3e|%6u|%5u", "vertex to vertex", time, mlups, mflops, iterations, level ) );
/// Edge to Vertex
for( uint_t i = 0; i < sampleSize; i++ )
iterations = 1;
do
{
name = "Edge-to-Vertex-Apply-" + benchInfoString;
timingTree.start( name );
LIKWID_MARKER_START( name.c_str() );
if( USE_GENERATED_KERNELS )
{
auto dstPtr = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
auto srcPtr = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
auto stencilPtr = face.getData( laplace.getEdgeToVertexOpr().getFaceStencilID() )->getPointer( level );
hhg::EdgeDoFToVertexDoF::generated::apply_2D_macroface_edgedof_to_vertexdof_replace( srcPtr, stencilPtr, dstPtr, static_cast< int64_t >( level ) );
} else
timingTree.start( evname );
///only works with likwid 4.3.3 and higher
LIKWID_MARKER_RESET( evname.c_str() );
LIKWID_MARKER_START( evname.c_str() );
auto dstPtr = face.getData( dst.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
auto srcPtr = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
auto stencilPtr = face.getData( laplace.getEdgeToVertexOpr().getFaceStencilID() )->getPointer( level );
for ( uint_t i = 0; i < iterations; i++ )
{
hhg::EdgeDoFToVertexDoF::applyFace( level,
face,
laplace.getEdgeToVertexOpr().getFaceStencilID(),
src.getEdgeDoFFunction().getFaceDataID(),
dst.getVertexDoFFunction().getFaceDataID(),
hhg::Replace );
hhg::EdgeDoFToVertexDoF::generated::apply_2D_macroface_edgedof_to_vertexdof_replace( &srcPtr[firstIdx[eo::X]],
&srcPtr[firstIdx[eo::XY]],
&srcPtr[firstIdx[eo::Y]],
stencilPtr,
dstPtr,
static_cast< int64_t >( level ) );
hhg::misc::dummy( srcPtr, dstPtr );
}
LIKWID_MARKER_STOP( name.c_str() );
timingTree.stop( name );
}
LIKWID_MARKER_STOP( evname.c_str() );
timingTree.stop( evname );
iterations *= 2;
} while ( timingTree[evname].last() < 0.5 );
#ifdef LIKWID_PERFMON
LIKWID_MARKER_GET( evname.c_str(), &nevents, &events, &time, &count );
#else
time = timingTree[evname].last();
#endif
mlups = real_t( innerIterationsVertex * iterations ) / time / 1e6;
/// 4 DoFs for each subgroup; 23 Flops: 12 Mults and 11 Adds
mflops = real_t( innerIterationsVertex * iterations * 23 ) / time / 1e6;
WALBERLA_LOG_INFO_ON_ROOT(
hhg::format( "%18s|%10.3e|%10.3e|%10.3e|%6u|%5u", "edge to vertex", time, mlups, mflops, iterations, level ) );
/// Edge to Edge
for( uint_t i = 0; i < sampleSize; i++ )
iterations = 1;
do
{
name = "Edge-to-Edge-Apply-" + benchInfoString;
timingTree.start( name );
LIKWID_MARKER_START( name.c_str() );
if( USE_GENERATED_KERNELS )
{
auto dstPtr = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
auto srcPtr = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
auto stencilPtr = face.getData( laplace.getEdgeToEdgeOpr().getFaceStencilID() )->getPointer( level );
hhg::edgedof::macroface::generated::apply_2D_macroface_edgedof_to_edgedof_replace( dstPtr, srcPtr, &stencilPtr[5], &stencilPtr[0], &stencilPtr[10], static_cast< int64_t >( level ) );
} else
timingTree.start( eename );
///only works with likwid 4.3.3 and higher
LIKWID_MARKER_RESET( eename.c_str() );
LIKWID_MARKER_START( eename.c_str() );
auto dstPtr = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
auto srcPtr = face.getData( src.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
auto stencilPtr = face.getData( laplace.getEdgeToEdgeOpr().getFaceStencilID() )->getPointer( level );
for ( uint_t i = 0; i < iterations; i++ )
{
hhg::edgedof::macroface::apply( level,
face,
laplace.getEdgeToEdgeOpr().getFaceStencilID(),
src.getEdgeDoFFunction().getFaceDataID(),
dst.getEdgeDoFFunction().getFaceDataID(),
hhg::Replace );
hhg::edgedof::macroface::generated::apply_2D_macroface_edgedof_to_edgedof_replace( &dstPtr[firstIdx[eo::X]],
&dstPtr[firstIdx[eo::XY]],
&dstPtr[firstIdx[eo::Y]],
&srcPtr[firstIdx[eo::X]],
&srcPtr[firstIdx[eo::XY]],
&srcPtr[firstIdx[eo::Y]],
&stencilPtr[5],
&stencilPtr[0],
&stencilPtr[10],
static_cast< int64_t >( level ) );
hhg::misc::dummy( srcPtr, dstPtr );
}
LIKWID_MARKER_STOP( name.c_str() );
timingTree.stop( name );
}
LIKWID_MARKER_STOP( eename.c_str() );
timingTree.stop( eename );
iterations *= 2;
} while ( timingTree[eename].last() < 0.5 );
iterations /= 2;
#ifdef LIKWID_PERFMON
LIKWID_MARKER_GET( eename.c_str(), &nevents, &events, &time, &count );
#else
time = timingTree[eename].last();
#endif
mlups = real_t( innerIterationsVertex * iterations ) / time / 1e6;
/// 5 DoFs for each subgroup; 29 Flops: 15 Mults and 14 Adds
mflops = real_t( innerIterationsVertex * iterations * 29 ) / time / 1e6;
WALBERLA_LOG_INFO_ON_ROOT(
hhg::format( "%18s|%10.3e|%10.3e|%10.3e|%6u|%5u", "edge to edge", time, mlups, mflops, iterations, level ) );
/// Vertex to Edge
for( uint_t i = 0; i < sampleSize; i++ )
iterations = 1;
do
{
name = "Vertex-to-Edge-Apply-" + benchInfoString;
timingTree.start( name );
LIKWID_MARKER_START( name.c_str() );
if( USE_GENERATED_KERNELS )
{
auto dstPtr = face.getData( dst.getEdgeDoFFunction().getFaceDataID() )->getPointer( level );
auto srcPtr = face.getData( src.getVertexDoFFunction().getFaceDataID() )->getPointer( level );
auto stencilPtr = face.getData( laplace.getVertexToEdgeOpr().getFaceStencilID() )->getPointer( level );
auto vertexToDiagonalEdgeStencil = &stencilPtr[4];
auto vertexToHorizontalEdgeStencil = &stencilPtr[0];
auto vertexToVerticalEdgeStencil = &stencilPtr[8];