Skip to content
Snippets Groups Projects
Commit c5f6979f authored by Nils Kohl's avatar Nils Kohl :full_moon_with_face: Committed by Markus Holzer
Browse files

Experimental x86 half precision support.

parent 55e06038
No related merge requests found
......@@ -102,6 +102,8 @@ option ( WALBERLA_LOG_SKIPPED "Log skipped cmake targets"
option ( WALBERLA_GIT_SUBMODULE_AUTO "Check submodules during cmake run" ON )
option ( WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT "Experimental half precision support" OFF )
# Installation Directory
set ( CMAKE_INSTALL_PREFIX /usr/local/waLBerla CACHE STRING "The default installation directory." )
......@@ -476,18 +478,18 @@ endif()
# disable Xcode 7.3+ linker deduplication pass to speed up linking in debug mode
if ( APPLE )
execute_process( COMMAND ${CMAKE_LINKER} -v OUTPUT_VARIABLE LINKER_VERSION ERROR_VARIABLE LINKER_VERSION )
string( REGEX MATCH "ld64-[0-9\\.\\-]+" LINKER_VERSION ${LINKER_VERSION} )
string( REGEX MATCHALL "[^\\-]+" LINKER_VERSION ${LINKER_VERSION} )
list( GET LINKER_VERSION 0 LINKER_TYPE )
list( GET LINKER_VERSION 1 LINKER_VERSION )
if( LINKER_TYPE STREQUAL "ld64" AND LINKER_VERSION VERSION_GREATER 264.3.101 )
add_flag( CMAKE_EXE_LINKER_FLAGS_DEBUG "-Wl,-no_deduplicate")
add_flag( CMAKE_MODULE_LINKER_FLAGS_DEBUG "-Wl,-no_deduplicate")
add_flag( CMAKE_SHARED_LINKER_FLAGS_DEBUG "-Wl,-no_deduplicate")
endif()
endif()
#if ( APPLE )
# execute_process( COMMAND ${CMAKE_LINKER} -v OUTPUT_VARIABLE LINKER_VERSION ERROR_VARIABLE LINKER_VERSION )
# string( REGEX MATCH "ld64-[0-9\\.\\-]+" LINKER_VERSION ${LINKER_VERSION} )
# string( REGEX MATCHALL "[^\\-]+" LINKER_VERSION ${LINKER_VERSION} )
# list( GET LINKER_VERSION 0 LINKER_TYPE )
# list( GET LINKER_VERSION 1 LINKER_VERSION )
# if( LINKER_TYPE STREQUAL "ld64" AND LINKER_VERSION VERSION_GREATER 264.3.101 )
# add_flag( CMAKE_EXE_LINKER_FLAGS_DEBUG "-Wl,-no_deduplicate")
# add_flag( CMAKE_MODULE_LINKER_FLAGS_DEBUG "-Wl,-no_deduplicate")
# add_flag( CMAKE_SHARED_LINKER_FLAGS_DEBUG "-Wl,-no_deduplicate")
# endif()
#endif()
############################################################################################################################
......@@ -1271,6 +1273,34 @@ if ( WALBERLA_SANITIZE_UNDEFINED )
endif()
endif()
############################################################################################################################
##
## Half precision
##
############################################################################################################################
if (WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT)
if (WALBERLA_CXX_COMPILER_IS_GNU OR WALBERLA_CXX_COMPILER_IS_CLANG)
message(STATUS "Configuring with *experimental* half precision (float16) support. You better know what you are doing.")
if (WALBERLA_CXX_COMPILER_IS_GNU AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.0.0)
message(WARNING "[WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT] "
"Half precision support for gcc has only been tested with version >= 12. "
"You are using a previous version - it may not work correctly.")
endif ()
if (WALBERLA_CXX_COMPILER_IS_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0)
message(WARNING "[WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT] "
"Half precision support for clang has only been tested with version >= 15. "
"You are using a previous version - it may not work correctly.")
endif ()
if (NOT WALBERLA_OPTIMIZE_FOR_LOCALHOST)
message(WARNING "[WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT] "
"You are not optimizing for localhost. You may encounter linker errors, or WORSE: silent incorrect fp16 arithmetic! Consider also enabling WALBERLA_OPTIMIZE_FOR_LOCALHOST!")
endif ()
else ()
message(FATAL_ERROR "[WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT] "
"Half precision support is currently only available for gcc and clang.")
endif ()
endif ()
############################################################################################################################
# Documentation Generation
#
......
add_subdirectory( MixedPrecision )
add_subdirectory( povrayFileCompressor )
\ No newline at end of file
waLBerla_add_executable ( NAME CheckFP16
FILES CheckFP16.cpp
DEPENDS core )
\ No newline at end of file
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file CheckFP16.cpp
//! \brief Checks the availability of float16 (half precision) and verifies some properties.
//! \author Nils Kohl <nils.kohl@fau.de>
//
//======================================================================================================================
#include <core/DataTypes.h>
#include <core/Environment.h>
#include <core/logging/Logging.h>
#include <core/perf_analysis/extern/likwid.h>
namespace walberla
{
template< typename T >
void kernel(T* v, T* vv, T* r, size_t vsize)
{
for (size_t i = 0; i < vsize; i++)
{
r[i] = v[i] + vv[i];
}
}
int main(int argc, char** argv)
{
Environment const env(argc, argv);
WALBERLA_LOG_INFO_ON_ROOT("-------------")
WALBERLA_LOG_INFO_ON_ROOT(" FP16 checks ")
WALBERLA_LOG_INFO_ON_ROOT("-------------")
#ifndef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
WALBERLA_LOG_INFO_ON_ROOT(" - Apparently you have not enabled half precision support.")
WALBERLA_LOG_INFO_ON_ROOT(" Reconfigure by setting the respective CMake variable to ON.")
WALBERLA_LOG_INFO_ON_ROOT(" At the time of writing this it's called WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT.")
return EXIT_FAILURE;
#else
WALBERLA_LOG_INFO_ON_ROOT(" - Half precision support enabled via CMake!")
WALBERLA_LOG_INFO_ON_ROOT(" - Sizeof checks: ")
const auto sfloat64 = sizeof(float64);
const auto sfloat32 = sizeof(float32);
const auto sfloat16 = sizeof(float16);
WALBERLA_LOG_INFO_ON_ROOT(" + sizeof( float64 ) == " << sfloat64)
WALBERLA_LOG_INFO_ON_ROOT(" + sizeof( float32 ) == " << sfloat32)
WALBERLA_LOG_INFO_ON_ROOT(" + sizeof( float16 ) == " << sfloat16)
if (sfloat64 != 8 || sfloat32 != 4 || sfloat16 != 2)
{
WALBERLA_LOG_INFO_ON_ROOT(" Your types don't seem to have the expected sizes.")
return EXIT_FAILURE;
}
WALBERLA_LOG_INFO_ON_ROOT(" -> works out!")
WALBERLA_LOG_INFO_ON_ROOT(" - Casting checks (promotion is required to format strings): ")
const float64 a64 = 42;
const float32 a32 = 42;
const float16 a16 = 42;
WALBERLA_LOG_INFO_ON_ROOT(" + float64: " << a64)
WALBERLA_LOG_INFO_ON_ROOT(" + float32: " << a32)
WALBERLA_LOG_INFO_ON_ROOT(" + float16: " << (double) a16)
WALBERLA_LOG_INFO_ON_ROOT(" Casting and output compiles.")
WALBERLA_LOG_INFO_ON_ROOT(" - Basic arithmetic check: ")
const auto x = float16(1.2);
const auto y = float16(-1.8);
const float64 z = -0.6;
const float16 sum = x + y;
WALBERLA_LOG_INFO_ON_ROOT(" " << (double) x << " + " << (double) y << " == " << (float64) (x + y) << "")
WALBERLA_CHECK(std::abs((float64) sum - z) < 1e-3, "Float16 arithmetic is broken.");
WALBERLA_LOG_INFO_ON_ROOT("")
# ifdef WALBERLA_BUILD_WITH_LIKWID_MARKERS
WALBERLA_LOG_INFO_ON_ROOT(" - Memory traffic test. You have built with likwid enabled. Make sure to run ")
WALBERLA_LOG_INFO_ON_ROOT(" $ likwid-perfctr -g MEM_DP -m ./CheckFP16")
WALBERLA_LOG_INFO_ON_ROOT(" to compare the memory traffic, and")
WALBERLA_LOG_INFO_ON_ROOT(" $ likwid-perfctr -g FLOPS_AVX -m ./CheckFP16")
WALBERLA_LOG_INFO_ON_ROOT(
" for the stream-triad-like benchmark to check whether automatic float32 vectorization works.")
WALBERLA_LOG_INFO_ON_ROOT("")
WALBERLA_LOG_INFO_ON_ROOT(" The only real benefit of using float16 is reduced memory traffic since internally,\n"
"all arithmetic operations are preceded by promotions to float32 (likely - depends on "
"the machine).")
WALBERLA_LOG_INFO_ON_ROOT(" + Stream test ... ")
LIKWID_MARKER_INIT;
LIKWID_MARKER_THREADINIT;
LIKWID_MARKER_REGISTER("float64-mem");
LIKWID_MARKER_REGISTER("float32-mem");
LIKWID_MARKER_REGISTER("float16-mem");
LIKWID_MARKER_REGISTER("float64-vec");
LIKWID_MARKER_REGISTER("float32-vec");
LIKWID_MARKER_REGISTER("float16-vec");
size_t vsize = 100000000;
std::vector< float64 > v64(vsize, 0.01);
std::vector< float32 > v32(vsize, 0.01f);
std::vector< float16 > v16(vsize, float16(0.01));
std::vector< float64 > vv64(vsize, 0.02);
std::vector< float32 > vv32(vsize, 0.02f);
std::vector< float16 > vv16(vsize, float16(0.02));
std::vector< float64 > r64(vsize);
std::vector< float32 > r32(vsize);
std::vector< float16 > r16(vsize);
LIKWID_MARKER_START("float64-mem");
float64 sum64 = 0;
for (size_t j = 0; j < vsize; j++)
{
if (0 == j % 2) { sum64 += v64[j]; }
else { sum64 -= v64[j]; }
}
WALBERLA_LOG_INFO_ON_ROOT(
" + Printing sum of float64 vector entries. Should be zero up to rounding errors: " << sum64);
LIKWID_MARKER_STOP("float64-mem");
// Start measurements
LIKWID_MARKER_START("float32-mem");
float32 sum32 = 0;
for (size_t j = 0; j < vsize; j++)
{
if (0 == j % 2) { sum32 += v32[j]; }
else { sum32 -= v32[j]; }
}
WALBERLA_LOG_INFO_ON_ROOT(
" + Printing sum of float32 vector entries. Should be zero up to rounding errors: " << sum32);
LIKWID_MARKER_STOP("float32-mem");
// Start measurements
LIKWID_MARKER_START("float16-mem");
float16 sum16 = 0;
for (size_t j = 0; j < vsize; j++)
{
if (0 == j % 2) { sum16 += v16[j]; }
else { sum16 -= v16[j]; }
}
WALBERLA_LOG_INFO_ON_ROOT(
" + Printing sum of float16 vector entries. Should be zero up to rounding errors: " << (double) sum16);
LIKWID_MARKER_STOP("float16-mem");
WALBERLA_LOG_INFO_ON_ROOT(" + Vectorization test ... ")
float64* v64_ptr = v64.data();
float64* vv64_ptr = vv64.data();
float64* r64_ptr = r64.data();
LIKWID_MARKER_START("float64-vec");
kernel(v64_ptr, vv64_ptr, r64_ptr, vsize);
WALBERLA_LOG_INFO_ON_ROOT(" + Printing entry of float64 vector sum: " << r64[vsize / 2]);
LIKWID_MARKER_STOP("float64-vec");
float32* v32_ptr = v32.data();
float32* vv32_ptr = vv32.data();
float32* r32_ptr = r32.data();
LIKWID_MARKER_START("float32-vec");
kernel(v32_ptr, vv32_ptr, r32_ptr, vsize);
WALBERLA_LOG_INFO_ON_ROOT(" + Printing entry of float32 vector sum: " << r32[vsize / 2]);
LIKWID_MARKER_STOP("float32-vec");
float16* v16_ptr = v16.data();
float16* vv16_ptr = vv16.data();
float16* r16_ptr = r16.data();
LIKWID_MARKER_START("float16-vec");
kernel(v16_ptr, vv16_ptr, r16_ptr, vsize);
WALBERLA_LOG_INFO_ON_ROOT(" + Printing entry of float16 vector sum: " << (double) r16[vsize / 2]);
LIKWID_MARKER_STOP("float16-vec");
LIKWID_MARKER_CLOSE;
# else
WALBERLA_LOG_INFO_ON_ROOT(" - Build and run with likwid to run memory traffic test.")
# endif
#endif
return EXIT_SUCCESS;
}
} // namespace walberla
int main(int argc, char** argv) { return walberla::main(argc, argv); }
......@@ -167,6 +167,33 @@ using real_t = double;
using real_t = float;
#endif
/// Half precision support. Experimental. Use carefully.
///
/// This feature is experimental, since it strictly depends on the underlying architecture and compiler support.
/// On x86 architectures, what you can expect is that the data format is supported natively only for storage and
/// interchange. Arithmetic operations will likely involve casting to fp32 (C++ float) and truncation to fp16.
/// Only bandwidth bound code may therefore benefit. None of this is guaranteed, and may change in the future.
///
#ifdef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
# if defined(WALBERLA_CXX_COMPILER_IS_CLANG) || defined(WALBERLA_CXX_COMPILER_IS_GNU)
/// Clang version must be 15 or higher for x86 half precision support.
/// GCC version must be 12 or higher for x86 half precision support.
/// Also support seems to require SSE, so ensure that respective instruction sets are enabled.
/// See
/// https://clang.llvm.org/docs/LanguageExtensions.html#half-precision-floating-point
/// https://gcc.gnu.org/onlinedocs/gcc/Half-Precision.html
/// for more information.
using half = _Float16;
using float16 = half;
# else
static_assert(false, "\n\n### Attempting to built walberla with half precision support.\n"
"### However, the compiler you chose is not suited for that, or we simply have not implemented "
"support for half precision and your compiler.\n");
# endif
#endif
using float32 = float;
using float64 = double;
inline constexpr real_t operator"" _r( long double t ) { return static_cast< real_t >(t); }
inline constexpr real_t operator"" _r( unsigned long long int t ) { return static_cast< real_t >(t); }
template< typename T > inline real_t real_c ( T t ) { return numeric_cast< real_t >(t); } ///< cast to type real_t using "real_c(x)"
......
......@@ -13,6 +13,8 @@
// double or single precision
#cmakedefine WALBERLA_DOUBLE_ACCURACY
// Experimental half precision support.
#cmakedefine WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
// Debugging options
#cmakedefine WALBERLA_ENABLE_GUI
......@@ -25,6 +27,7 @@
#cmakedefine WALBERLA_BUILD_WITH_OPENMP
#cmakedefine WALBERLA_BUILD_WITH_METIS
#cmakedefine WALBERLA_BUILD_WITH_PARMETIS
#cmakedefine WALBERLA_BUILD_WITH_LIKWID_MARKERS
#cmakedefine WALBERLA_BUILD_WITH_PYTHON
......
......@@ -193,6 +193,9 @@ waLBerla_compile_test( FILES DebugSTLTest.cpp )
waLBerla_execute_test( NAME DebugSTLTest )
set_tests_properties(DebugSTLTest PROPERTIES WILL_FAIL TRUE)
waLBerla_compile_test( FILES FP16Test.cpp )
waLBerla_execute_test( NAME FP16Test )
waLBerla_compile_test( FILES FunctionTraitsTest.cpp )
waLBerla_execute_test( NAME FunctionTraitsTest )
......
//======================================================================================================================
//
// This file is part of waLBerla. waLBerla is free software: you can
// redistribute it and/or modify it under the terms of the GNU General Public
// License as published by the Free Software Foundation, either version 3 of
// the License, or (at your option) any later version.
//
// waLBerla is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// for more details.
//
// You should have received a copy of the GNU General Public License along
// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
//
//! \file FP16Test.cpp
//! \ingroup core
//! \author Nils Kohl <nils.kohl@fau.de>
//
//======================================================================================================================
#include "core/DataTypes.h"
#include "core/debug/Debug.h"
#include "core/debug/TestSubsystem.h"
#include "core/logging/Logging.h"
#include "core/Environment.h"
#include <cstdlib>
#include <iostream>
namespace walberla {
void fp16Test( int argc, char ** argv )
{
Environment const env( argc, argv );
WALBERLA_LOG_INFO_ON_ROOT("-------------")
WALBERLA_LOG_INFO_ON_ROOT(" FP16 checks ")
WALBERLA_LOG_INFO_ON_ROOT("-------------")
#ifndef WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT
WALBERLA_LOG_INFO_ON_ROOT(" - Test does nothing as it was not built with fp16 support.")
WALBERLA_LOG_INFO_ON_ROOT(" - Apparently you have not enabled half precision support.")
WALBERLA_LOG_INFO_ON_ROOT(" - Reconfigure by setting the respective CMake variable "
"(at the time of writing this it's called WALBERLA_BUILD_WITH_HALF_PRECISION_SUPPORT) "
"to ON.")
#else
WALBERLA_LOG_INFO_ON_ROOT(" - Half precision support enabled via CMake!")
WALBERLA_LOG_INFO_ON_ROOT(" - Sizeof checks: ")
auto sfloat64 = sizeof(float64);
auto sfloat32 = sizeof(float32);
auto sfloat16 = sizeof(float16);
WALBERLA_CHECK_EQUAL( sfloat64, 8, "Your types don't seem to have the expected sizes." );
WALBERLA_CHECK_EQUAL( sfloat32, 4, "Your types don't seem to have the expected sizes." );
WALBERLA_CHECK_EQUAL( sfloat16, 2, "Your types don't seem to have the expected sizes." );
WALBERLA_LOG_INFO_ON_ROOT(" - Casting checks (promotion is required to format strings): ")
const float64 a64 = 42;
const float32 a32 = 42;
const float16 a16 = 42;
WALBERLA_LOG_INFO_ON_ROOT(" + float64: " << a64)
WALBERLA_LOG_INFO_ON_ROOT(" + float32: " << a32)
WALBERLA_LOG_INFO_ON_ROOT(" + float16: " << (double) a16)
WALBERLA_LOG_INFO_ON_ROOT(" Casting and output compiles.")
WALBERLA_LOG_INFO_ON_ROOT(" - Basic arithmetic check: ")
const float16 x = 1.2f16;
const float16 y = -1.8f16;
const float64 z = -0.6;
WALBERLA_LOG_INFO_ON_ROOT(" + " << (double) x << " + " << (double) y << " == " << (float64) (x + y) << " ? ")
WALBERLA_CHECK_FLOAT_EQUAL((float64) (x + y), z, "float16 addition does not work correctly.");
#endif
}
}
int main( int argc, char** argv )
{
walberla::debug::enterTestMode();
walberla::fp16Test( argc, argv );
return EXIT_SUCCESS;
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment