From 3111e9d43cde17b56f313654a1dc8d86a5123ca4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Thu, 4 Nov 2021 13:03:37 +0000
Subject: [PATCH] New hash algorithm for Cell and Vector<int> objects

---
 src/core/cell/Cell.h         | 24 ++++++++++----
 src/core/math/Vector2.h      | 22 ++++++++-----
 src/core/math/Vector3.h      | 23 ++++++++-----
 tests/core/cell/CellTest.cpp | 62 ++++++++++++++++++++++++++++++++++++
 4 files changed, 108 insertions(+), 23 deletions(-)

diff --git a/src/core/cell/Cell.h b/src/core/cell/Cell.h
index 27097bfd4..3af43b5ff 100644
--- a/src/core/cell/Cell.h
+++ b/src/core/cell/Cell.h
@@ -330,18 +330,28 @@ inline std::istream & operator>>( std::istream & is, Cell & cell )
  *
  * \param [in]   cell  The cell to be hashed.
  *
- * \return  a hopefully unique hash.
+ * \return  a hash that is unique for cell indices from 0 to 2 million
+ *          (64bit architectures) or from 0 to 1000 (32bit architectures)
+ *          in all three dimensions.
  **********************************************************************************************************************/
 inline std::size_t hash_value( const Cell & cell )
 {
-  std::size_t seed = 0;
-  std::hash<cell_idx_t> hasher;
+   std::size_t seed;
 
-  seed ^= hasher(cell.x()) + 0x9e3779b9 + (seed<<6) + (seed>>2);
-  seed ^= hasher(cell.y()) + 0x9e3779b9 + (seed<<6) + (seed>>2);
-  seed ^= hasher(cell.z()) + 0x9e3779b9 + (seed<<6) + (seed>>2);
+   if constexpr( sizeof(std::size_t) >= 8 )
+   {
+      seed = (static_cast<std::size_t>(cell.x()) << 42) +
+             (static_cast<std::size_t>(cell.y()) << 21) +
+             (static_cast<std::size_t>(cell.z()) << 0);
+   }
+   else
+   {
+      seed = (static_cast<std::size_t>(cell.x()) << 21) +
+             (static_cast<std::size_t>(cell.y()) << 10) +
+             (static_cast<std::size_t>(cell.z()) << 0);
+   }
 
-  return seed;
+   return seed;
 }
 
 
diff --git a/src/core/math/Vector2.h b/src/core/math/Vector2.h
index c6a2707aa..68a43e309 100644
--- a/src/core/math/Vector2.h
+++ b/src/core/math/Vector2.h
@@ -1557,21 +1557,27 @@ struct Vector2LexicographicalyLess
 /**
 // \brief Function providing a hash value for Vector2.
 //
-// \tparam  T Datatype of the Vector2's elements.
+// \tparam  T Datatype of the Vector2's elements (only integers are supported).
 // \param   v The vector the hash is computed for.
 // \returns   A hash for the entire Vector2.
 */
-template< typename T >
+template< typename T, typename Enable = std::enable_if_t<std::is_integral_v<T>> >
 std::size_t hash_value( const Vector2<T> & v )
 {
-   std::size_t seed = 0;
-   std::hash<T> hasher;
+   std::size_t seed;
 
-   seed ^= hasher(v[0]) + 0x9e3779b9 + (seed<<6) + (seed>>2);
-   seed ^= hasher(v[1]) + 0x9e3779b9 + (seed<<6) + (seed>>2);
+   if constexpr( sizeof(std::size_t) >= 8 )
+   {
+      seed = (static_cast<std::size_t>(v[0]) << 42) +
+             (static_cast<std::size_t>(v[1]) << 21);
+   }
+   else
+   {
+      seed = (static_cast<std::size_t>(v[0]) << 21) +
+             (static_cast<std::size_t>(v[1]) << 10);
+   }
 
    return seed;
-
 }
 //**********************************************************************************************************************
 
@@ -1687,7 +1693,7 @@ namespace std
     {
         std::size_t operator()( walberla::Vector2<T> const & v ) const noexcept
         {
-            return walberla::Vector2<T>::hash_value( v );
+            return walberla::math::hash_value( v );
         }
     };
 } // namespace std
diff --git a/src/core/math/Vector3.h b/src/core/math/Vector3.h
index 350c33c18..f377bec15 100644
--- a/src/core/math/Vector3.h
+++ b/src/core/math/Vector3.h
@@ -1817,22 +1817,29 @@ struct Vector3LexicographicalyLess
 /**
 // \brief Function providing a hash value for Vector3.
 //
-// \tparam  T Datatype of the Vector3's elements.
+// \tparam  T Datatype of the Vector3's elements (only integers are supported).
 // \param   v The vector the hash is computed for.
 // \returns   A hash for the entire Vector3.
 */
-template< typename T >
+template< typename T, typename Enable = std::enable_if_t<std::is_integral_v<T>> >
 std::size_t hash_value( const Vector3<T> & v )
 {
-   std::size_t seed = 0;
-   std::hash<T> hasher;
+   std::size_t seed;
 
-   seed ^= hasher(v[0]) + 0x9e3779b9 + (seed<<6) + (seed>>2);
-   seed ^= hasher(v[1]) + 0x9e3779b9 + (seed<<6) + (seed>>2);
-   seed ^= hasher(v[2]) + 0x9e3779b9 + (seed<<6) + (seed>>2);
+   if constexpr( sizeof(std::size_t) >= 8 )
+   {
+      seed = (static_cast<std::size_t>(v[0]) << 42) +
+             (static_cast<std::size_t>(v[1]) << 21) +
+             (static_cast<std::size_t>(v[2]) << 0);
+   }
+   else
+   {
+      seed = (static_cast<std::size_t>(v[0]) << 21) +
+             (static_cast<std::size_t>(v[1]) << 10) +
+             (static_cast<std::size_t>(v[2]) << 0);
+   }
 
    return seed;
-
 }
 } // namespace math
 
diff --git a/tests/core/cell/CellTest.cpp b/tests/core/cell/CellTest.cpp
index 55c6097ba..59fc28c00 100644
--- a/tests/core/cell/CellTest.cpp
+++ b/tests/core/cell/CellTest.cpp
@@ -21,9 +21,12 @@
 //======================================================================================================================
 
 #include "core/cell/Cell.h"
+#include "core/math/Vector2.h"
+#include "core/math/Vector3.h"
 #include "core/debug/TestSubsystem.h"
 
 #include <random>
+#include <unordered_set>
 
 
 using namespace walberla;
@@ -129,6 +132,63 @@ void testBinaryOperators( const Cell & c0, const Cell & c1 )
    WALBERLA_CHECK( ss.eof() );
 }
 
+void testHashAlgorithm()
+{
+   auto const hasher = std::hash< walberla::Cell >();
+
+   // check hash concatenates individual elements
+   std::size_t const prefix = hasher(walberla::Cell{15, 6, 0});
+   cell_idx_t const max_z = (sizeof(std::size_t) >= 8)? 2<<21 : 2<<10;
+   std::size_t mismatches = 0;
+   for( cell_idx_t z = 0; z < max_z; z += 5 )
+   {
+      auto const cell = walberla::Cell{15, 6, z};
+      auto const expected_hash = prefix + static_cast<std::size_t>(z);
+      if( hasher(cell) != expected_hash )
+      {
+         mismatches++;
+      }
+   }
+   WALBERLA_CHECK_EQUAL( mismatches, 0 );
+
+   // check hash collisions (use a small block size to limit memory footprint)
+   cell_idx_t const block_size = 128;
+   cell_idx_t const ghost_layer = 8;
+   std::unordered_set<std::size_t> keys{};
+   std::size_t collisions = 0;
+   for( auto x = -ghost_layer; x < block_size + ghost_layer; ++x )
+   {
+      for( auto y = -ghost_layer; y < block_size + ghost_layer; ++y )
+      {
+         for( auto z = -ghost_layer; z < block_size + ghost_layer; ++z )
+         {
+            auto const cell = walberla::Cell{x, y, z};
+            auto const hash = hasher(cell);
+            if (keys.count(hash))
+            {
+               collisions++;
+            }
+            else
+            {
+               keys.emplace(hash);
+            }
+         }
+      }
+   }
+   WALBERLA_CHECK_EQUAL( collisions, 0 );
+
+   // check hash matches with Vector2 and Vector3
+   auto const hasher2 = std::hash< walberla::Vector2<int> >();
+   auto const hasher3 = std::hash< walberla::Vector3<int> >();
+   
+   auto const cell = walberla::Cell{15, 6, 42};
+   auto const vec3 = walberla::Vector3<int>{15, 6, 42};
+   WALBERLA_CHECK_EQUAL( hasher(cell), hasher3(vec3) );
+   auto const vec2 = walberla::Vector2<int>{15, 6};
+   auto const vec3b = walberla::Vector3<int>{15, 6, 0};
+   WALBERLA_CHECK_EQUAL( hasher2(vec2), hasher3(vec3b) );
+}
+
 
 int main( int /*argc*/, char** /*argv*/ ) {
 
@@ -153,5 +213,7 @@ int main( int /*argc*/, char** /*argv*/ ) {
       testBinaryOperators( Cell(x0, y0, z0), Cell(x1, y1, z1) );
    }
 
+   testHashAlgorithm();
+
    return 0;
 }
-- 
GitLab