From 0e026808036f59d3ea4e5954b1a19fcfcb9a9702 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 8 May 2023 16:31:58 -0600 Subject: [PATCH 01/23] [hash] Add hash impl for integers Part of https://github.com/harfbuzz/harfbuzz/issues/4227 --- src/hb-algs.hh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index da383e050..4bebad771 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -236,7 +236,11 @@ struct private: template constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) + impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) + + // Horrible: std:hash() of integers seems to be identity in gcc / clang?! + template constexpr auto + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 2654435761u) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From c005e3a2e30eaea700e67907dc816709263b4046 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 8 May 2023 16:33:31 -0600 Subject: [PATCH 02/23] [bytes] Simplify hash function Part of https://github.com/harfbuzz/harfbuzz/issues/4227 --- src/hb-array.hh | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/hb-array.hh b/src/hb-array.hh index 1a22e15c0..a7553fc4e 100644 --- a/src/hb-array.hh +++ b/src/hb-array.hh @@ -458,21 +458,9 @@ inline uint32_t hb_array_t::hash () const { // FNV-1a hash function uint32_t current = /*cbf29ce4*/0x84222325; - unsigned i = 0; - -#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \ - ((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__)) - struct __attribute__((packed)) packed_uint32_t { uint32_t v; }; - for (; i + 4 <= this->length; i += 4) + for (auto &v : *this) { - current = current ^ hb_hash ((uint32_t) ((const packed_uint32_t *) &this->arrayZ[i])->v); - current = current * 16777619; - } -#endif - - for (; i < this->length; i++) - { - current = current ^ hb_hash (this->arrayZ[i]); + current = current ^ v; current = current * 16777619; } return current; @@ -483,21 +471,9 @@ inline uint32_t hb_array_t::hash () const { // FNV-1a hash function uint32_t current = /*cbf29ce4*/0x84222325; - unsigned i = 0; - -#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \ - ((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__)) - struct __attribute__((packed)) packed_uint32_t { uint32_t v; }; - for (; i + 4 <= this->length; i += 4) + for (auto &v : *this) { - current = current ^ hb_hash ((uint32_t) ((const packed_uint32_t *) &this->arrayZ[i])->v); - current = current * 16777619; - } -#endif - - for (; i < this->length; i++) - { - current = current ^ hb_hash (this->arrayZ[i]); + current = current ^ v; current = current * 16777619; } return current; From fe0f7dc57bc7411c7cc7eb80fa44c8dd8c5e4644 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 8 May 2023 22:19:02 -0600 Subject: [PATCH 03/23] [bytes] Use fasthash as hash algorithm Part of https://github.com/harfbuzz/harfbuzz/pull/4228 --- src/hb-algs.hh | 76 +++++++++++++++++++++++++++++++++++++++++++++++++ src/hb-array.hh | 18 ++---------- 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 4bebad771..dbb18dc2b 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -231,6 +231,82 @@ struct } HB_FUNCOBJ (hb_bool); + +/* The MIT License + + Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com) + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + + +// Compression function for Merkle-Damgard construction. +// This function is generated using the framework provided. +#define mix(h) ({ \ + (h) ^= (h) >> 23; \ + (h) *= 0x2127599bf4325c37ULL; \ + (h) ^= (h) >> 47; }) + +static inline uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) +{ + const uint64_t m = 0x880355f21e6d1965ULL; + const uint64_t *pos = (const uint64_t *)buf; + const uint64_t *end = pos + (len / 8); + const unsigned char *pos2; + uint64_t h = seed ^ (len * m); + uint64_t v; + + while (pos != end) { + v = *pos++; + h ^= mix(v); + h *= m; + } + + pos2 = (const unsigned char*)pos; + v = 0; + + switch (len & 7) { + case 7: v ^= (uint64_t)pos2[6] << 48; HB_FALLTHROUGH; + case 6: v ^= (uint64_t)pos2[5] << 40; HB_FALLTHROUGH; + case 5: v ^= (uint64_t)pos2[4] << 32; HB_FALLTHROUGH; + case 4: v ^= (uint64_t)pos2[3] << 24; HB_FALLTHROUGH; + case 3: v ^= (uint64_t)pos2[2] << 16; HB_FALLTHROUGH; + case 2: v ^= (uint64_t)pos2[1] << 8; HB_FALLTHROUGH; + case 1: v ^= (uint64_t)pos2[0]; + h ^= mix(v); + h *= m; + } + + return mix(h); +} + +static inline uint32_t fasthash32(const void *buf, size_t len, uint32_t seed) +{ + // the following trick converts the 64-bit hashcode to Fermat + // residue, which shall retain information from both the higher + // and lower parts of hashcode. + uint64_t h = fasthash64(buf, len, seed); + return h - (h >> 32); +} + struct { private: diff --git a/src/hb-array.hh b/src/hb-array.hh index a7553fc4e..50b6617e2 100644 --- a/src/hb-array.hh +++ b/src/hb-array.hh @@ -456,27 +456,13 @@ inline bool hb_array_t::operator == (const hb_array_t inline uint32_t hb_array_t::hash () const { - // FNV-1a hash function - uint32_t current = /*cbf29ce4*/0x84222325; - for (auto &v : *this) - { - current = current ^ v; - current = current * 16777619; - } - return current; + return fasthash32(arrayZ, length, 0xf437ffe6 /* magic? */); } template <> inline uint32_t hb_array_t::hash () const { - // FNV-1a hash function - uint32_t current = /*cbf29ce4*/0x84222325; - for (auto &v : *this) - { - current = current ^ v; - current = current * 16777619; - } - return current; + return fasthash32(arrayZ, length, 0xf437ffe6 /* magic? */); } From 078b2a510189088ceda4cf23bc7c0197518831dd Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 8 May 2023 22:28:48 -0600 Subject: [PATCH 04/23] [hash] Use a Mersenne prime for int hash And hope that compiler optimizes to int ops instead of modula. Improves chaining it seems. Part of https://github.com/harfbuzz/harfbuzz/pull/4228 --- src/hb-algs.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index dbb18dc2b..e4889a6d8 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -316,7 +316,7 @@ struct // Horrible: std:hash() of integers seems to be identity in gcc / clang?! template constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 2654435761u) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From fa64e42d755709df2837fcfb8d60ff6d8b1179fb Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 8 May 2023 23:31:52 -0600 Subject: [PATCH 05/23] [algs] Adjust int hash --- src/hb-algs.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index e4889a6d8..0324d0ac1 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -315,7 +315,8 @@ struct impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) // Horrible: std:hash() of integers seems to be identity in gcc / clang?! - template constexpr auto + template ::value)> constexpr auto impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607) template constexpr auto From e2fd49ff1a419dad6d6dd077aa25c20d054530ff Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 8 May 2023 23:37:47 -0600 Subject: [PATCH 06/23] [hash] Comment --- src/hb-algs.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 0324d0ac1..08e3daa2c 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -317,7 +317,7 @@ struct // Horrible: std:hash() of integers seems to be identity in gcc / clang?! template ::value)> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607 /* Mersenne prime */) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From 9fbab46f2636aabf70ff10acc6c141b147794a2a Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 01:42:44 -0600 Subject: [PATCH 07/23] [cairo] Fix a clang warning --- src/hb-cairo-utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-cairo-utils.cc b/src/hb-cairo-utils.cc index 0f94d8169..23970890c 100644 --- a/src/hb-cairo-utils.cc +++ b/src/hb-cairo-utils.cc @@ -763,7 +763,7 @@ _hb_cairo_add_sweep_gradient_patches (hb_color_stop_t *stops, } //assert (angles[0] + k * span <= 0 && 0 < angles[n_stops - 1] + k * span); - span = fabs (span); + span = fabsf (span); for (signed l = k; l < 1000; l++) { From 075ecff750088854854147d0b32b88b120693a48 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 01:43:39 -0600 Subject: [PATCH 08/23] [hash] Work around g++ bug?! I kid you not. Revert this and see src/test-map loop forever eating your memory freezing your maching. In this loop: { hb_hashmap_t m0; hb_hashmap_t m1; hb_hashmap_t m2; hb_hashmap_t m3; std::string s; for (unsigned i = 1; i < 1000; i++) { s += "x"; m0.set (i, i); m1.set (s, i); m2.set (i, s); m3.set (s, s); } } i will not stop at 1000 and just keeps going. If you figure out what's going on, please enlighten me! --- src/hb-algs.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 08e3daa2c..39f239496 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -317,7 +317,7 @@ struct // Horrible: std:hash() of integers seems to be identity in gcc / clang?! template ::value)> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607 /* Mersenne prime */) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607u /* Mersenne prime */) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From da619c69c8f1a4be9e29bbc95cf684bf38641468 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 02:06:37 -0600 Subject: [PATCH 09/23] [fasthash] Try to fix unaligned access --- src/hb-algs.hh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 39f239496..6a675e559 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -267,15 +267,16 @@ HB_FUNCOBJ (hb_bool); static inline uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) { + struct __attribute__((packed)) packed_uint64_t { uint64_t v; }; const uint64_t m = 0x880355f21e6d1965ULL; - const uint64_t *pos = (const uint64_t *)buf; - const uint64_t *end = pos + (len / 8); + const packed_uint64_t *pos = (const packed_uint64_t *)buf; + const packed_uint64_t *end = pos + (len / 8); const unsigned char *pos2; uint64_t h = seed ^ (len * m); uint64_t v; while (pos != end) { - v = *pos++; + v = pos++->v; h ^= mix(v); h *= m; } From 99f5050ccd35e6e447661af2ed330f509ae9bcd8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 02:07:54 -0600 Subject: [PATCH 10/23] [algs] Remove HB_NO_PACKED We depend on packed attribute in fasthash now. --- src/hb-algs.hh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 6a675e559..51cf6a12d 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -107,9 +107,8 @@ struct BEInt uint8_t ((V ) & 0xFF)} {} struct __attribute__((packed)) packed_uint16_t { uint16_t v; }; - constexpr operator Type () const - { -#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \ + constexpr operator Type () const { +#if defined(__OPTIMIZE__) && \ defined(__BYTE_ORDER) && \ (__BYTE_ORDER == __BIG_ENDIAN || \ (__BYTE_ORDER == __LITTLE_ENDIAN && \ @@ -155,7 +154,7 @@ struct BEInt struct __attribute__((packed)) packed_uint32_t { uint32_t v; }; constexpr operator Type () const { -#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \ +#if defined(__OPTIMIZE__) && \ defined(__BYTE_ORDER) && \ (__BYTE_ORDER == __BIG_ENDIAN || \ (__BYTE_ORDER == __LITTLE_ENDIAN && \ From f04d08b883d9b4894d5329cec351d2f0ea50590b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 02:14:30 -0600 Subject: [PATCH 11/23] [fasthash] Remove GNU extension --- src/hb-algs.hh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 51cf6a12d..dde075243 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -259,10 +259,10 @@ HB_FUNCOBJ (hb_bool); // Compression function for Merkle-Damgard construction. // This function is generated using the framework provided. -#define mix(h) ({ \ - (h) ^= (h) >> 23; \ - (h) *= 0x2127599bf4325c37ULL; \ - (h) ^= (h) >> 47; }) +#define mix(h) ( \ + (h) ^= (h) >> 23, \ + (h) *= 0x2127599bf4325c37ULL, \ + (h) ^= (h) >> 47) static inline uint64_t fasthash64(const void *buf, size_t len, uint64_t seed) { From 1fa4b415315257bdbae08e6539f2ca63423572e8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 10:52:58 -0600 Subject: [PATCH 12/23] [map] Adjust resizing criteria --- src/hb-map.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-map.hh b/src/hb-map.hh index c685a9a3e..d42b2be86 100644 --- a/src/hb-map.hh +++ b/src/hb-map.hh @@ -204,7 +204,7 @@ struct hb_hashmap_t bool set_with_hash (KK&& key, uint32_t hash, VV&& value, bool is_delete=false) { if (unlikely (!successful)) return false; - if (unlikely ((occupancy + occupancy / 2) >= mask && !resize ())) return false; + if (unlikely ((occupancy + occupancy) >= mask && !resize ())) return false; item_t &item = item_for_hash (key, hash); if (is_delete && !(item == key)) From abb92388cc8bebff0cf40cbed0045292e038bcd8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 10:54:54 -0600 Subject: [PATCH 13/23] Revert "[map] Adjust resizing criteria" This reverts commit 1fa4b415315257bdbae08e6539f2ca63423572e8. --- src/hb-map.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-map.hh b/src/hb-map.hh index d42b2be86..c685a9a3e 100644 --- a/src/hb-map.hh +++ b/src/hb-map.hh @@ -204,7 +204,7 @@ struct hb_hashmap_t bool set_with_hash (KK&& key, uint32_t hash, VV&& value, bool is_delete=false) { if (unlikely (!successful)) return false; - if (unlikely ((occupancy + occupancy) >= mask && !resize ())) return false; + if (unlikely ((occupancy + occupancy / 2) >= mask && !resize ())) return false; item_t &item = item_for_hash (key, hash); if (is_delete && !(item == key)) From 33ef96b649fd249808af6a13f376efb819e31882 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 11:13:51 -0600 Subject: [PATCH 14/23] [glyf] Micro-optimize a few hash operations --- src/OT/glyf/Glyph.hh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/OT/glyf/Glyph.hh b/src/OT/glyf/Glyph.hh index 2bd5fe820..7b0bf4afa 100644 --- a/src/OT/glyf/Glyph.hh +++ b/src/OT/glyf/Glyph.hh @@ -112,10 +112,12 @@ struct Glyph if (!plan->new_gid_for_old_gid (gid, &new_gid)) return; + uint32_t hash = hb_hash (new_gid); + if (type != EMPTY) { - plan->bounds_width_map.set (new_gid, xMax - xMin); - plan->bounds_height_map.set (new_gid, yMax - yMin); + plan->bounds_width_map.set_with_hash (new_gid, hash, xMax - xMin); + plan->bounds_height_map.set_with_hash (new_gid, hash, yMax - yMin); } unsigned len = all_points.length; @@ -127,7 +129,7 @@ struct Glyph signed hori_aw = roundf (rightSideX - leftSideX); if (hori_aw < 0) hori_aw = 0; int lsb = roundf (xMin - leftSideX); - plan->hmtx_map.set (new_gid, hb_pair ((unsigned) hori_aw, lsb)); + plan->hmtx_map.set_with_hash (new_gid, hash, hb_pair ((unsigned) hori_aw, lsb)); //flag value should be computed using non-empty glyphs if (type != EMPTY && lsb != xMin) plan->head_maxp_info.allXMinIsLsb = false; @@ -135,7 +137,7 @@ struct Glyph signed vert_aw = roundf (topSideY - bottomSideY); if (vert_aw < 0) vert_aw = 0; int tsb = roundf (topSideY - yMax); - plan->vmtx_map.set (new_gid, hb_pair ((unsigned) vert_aw, tsb)); + plan->vmtx_map.set_with_hash (new_gid, hash, hb_pair ((unsigned) vert_aw, tsb)); } bool compile_header_bytes (const hb_subset_plan_t *plan, From fe3339ea241528652f3480fb061abca3c6bb2ed8 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 11:31:06 -0600 Subject: [PATCH 15/23] [algs] Add hash for 64bit ints --- src/hb-algs.hh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index dde075243..5629d7594 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -315,10 +315,15 @@ struct impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) // Horrible: std:hash() of integers seems to be identity in gcc / clang?! + // template ::value)> constexpr auto + hb_enable_if (std::is_integral::value && sizeof (T) <= sizeof (uint32_t))> constexpr auto impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607u /* Mersenne prime */) + template ::value && sizeof (T) > sizeof (uint32_t))> constexpr auto + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v * 8388607u) ^ ((v >> 32) * 8388607u) /* Mersenne prime */) + template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From a58bbe5408b76c6b22d3b097649b7eef530c3e13 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 12:06:35 -0600 Subject: [PATCH 16/23] [set] Use better hash --- src/hb-bit-page.hh | 5 +---- src/hb-bit-set.hh | 6 +++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/hb-bit-page.hh b/src/hb-bit-page.hh index 9b027ac59..e578d2643 100644 --- a/src/hb-bit-page.hh +++ b/src/hb-bit-page.hh @@ -104,10 +104,7 @@ struct hb_bit_page_t } uint32_t hash () const { - return - + hb_iter (v) - | hb_reduce ([] (uint32_t h, const elt_t &_) { return h * 31 + hb_hash (_); }, (uint32_t) 0u) - ; + return hb_bytes_t ((const char *) &v, sizeof (v)).hash (); } void add (hb_codepoint_t g) { elt (g) |= mask (g); } diff --git a/src/hb-bit-set.hh b/src/hb-bit-set.hh index d290f6114..aad81e25a 100644 --- a/src/hb-bit-set.hh +++ b/src/hb-bit-set.hh @@ -134,7 +134,11 @@ struct hb_bit_set_t { uint32_t h = 0; for (auto &map : page_map) - h = h * 31 + hb_hash (map.major) + hb_hash (pages[map.index]); + { + auto &page = pages.arrayZ[map.index]; + if (unlikely (page.is_empty ())) continue; + h = h * 31 + hb_hash (map.major) + hb_hash (page); + } return h; } From bdaa74d25ff5477c72f69249181b5d840cb4cb59 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 11:58:35 -0600 Subject: [PATCH 17/23] [hash] Use fasthash for integer hash This seems to speed things up surprisingly. --- src/hb-algs.hh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 5629d7594..e34885fe2 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -307,6 +307,12 @@ static inline uint32_t fasthash32(const void *buf, size_t len, uint32_t seed) return h - (h >> 32); } +template // This line speeds things up. Go figure... +static inline uint32_t _hb_hash32 (uint32_t v) +{ + return fasthash32 (&v, sizeof (v), 0); +} + struct { private: @@ -318,11 +324,11 @@ struct // template ::value && sizeof (T) <= sizeof (uint32_t))> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607u /* Mersenne prime */) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, _hb_hash32 (v)) template ::value && sizeof (T) > sizeof (uint32_t))> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v * 8388607u) ^ ((v >> 32) * 8388607u) /* Mersenne prime */) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, _hb_hash32 (v) ^ _hb_hash32 (v >> 32)) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From 05567da082b59bc422356c8c10cbe8fc87a6bd13 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 12:22:43 -0600 Subject: [PATCH 18/23] Revert "[hash] Use fasthash for integer hash" This reverts commit 3bf758a57071572a0ffae3c359b4cfec5a096312. This was resulting in long chains again :(. --- src/hb-algs.hh | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index e34885fe2..5629d7594 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -307,12 +307,6 @@ static inline uint32_t fasthash32(const void *buf, size_t len, uint32_t seed) return h - (h >> 32); } -template // This line speeds things up. Go figure... -static inline uint32_t _hb_hash32 (uint32_t v) -{ - return fasthash32 (&v, sizeof (v), 0); -} - struct { private: @@ -324,11 +318,11 @@ struct // template ::value && sizeof (T) <= sizeof (uint32_t))> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, _hb_hash32 (v)) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607u /* Mersenne prime */) template ::value && sizeof (T) > sizeof (uint32_t))> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, _hb_hash32 (v) ^ _hb_hash32 (v >> 32)) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v * 8388607u) ^ ((v >> 32) * 8388607u) /* Mersenne prime */) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From b2b15fa30aeaad022a7953f2a150442d69e30e5b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 13:39:52 -0600 Subject: [PATCH 19/23] [hash] Links --- src/hb-algs.hh | 3 ++- src/hb-array.hh | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 5629d7594..1dfa2d092 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -315,7 +315,8 @@ struct impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) // Horrible: std:hash() of integers seems to be identity in gcc / clang?! - // + // https://github.com/harfbuzz/harfbuzz/pull/4228 + template ::value && sizeof (T) <= sizeof (uint32_t))> constexpr auto impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607u /* Mersenne prime */) diff --git a/src/hb-array.hh b/src/hb-array.hh index 50b6617e2..a0c826206 100644 --- a/src/hb-array.hh +++ b/src/hb-array.hh @@ -123,6 +123,7 @@ struct hb_array_t : hb_iter_with_fallback_t, Type&> uint32_t hash () const { // FNV-1a hash function + // https://github.com/harfbuzz/harfbuzz/pull/4228 uint32_t current = /*cbf29ce4*/0x84222325; for (auto &v : *this) { @@ -456,12 +457,14 @@ inline bool hb_array_t::operator == (const hb_array_t inline uint32_t hb_array_t::hash () const { + // https://github.com/harfbuzz/harfbuzz/pull/4228 return fasthash32(arrayZ, length, 0xf437ffe6 /* magic? */); } template <> inline uint32_t hb_array_t::hash () const { + // https://github.com/harfbuzz/harfbuzz/pull/4228 return fasthash32(arrayZ, length, 0xf437ffe6 /* magic? */); } From 826fe2c9f78932af8c5aed4ba4db6328f83f44fe Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 13:48:38 -0600 Subject: [PATCH 20/23] [hash] Wrap specialization in HB_OPTIMIZE_SIZE_MORE --- src/hb-array.hh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hb-array.hh b/src/hb-array.hh index a0c826206..6b43962a4 100644 --- a/src/hb-array.hh +++ b/src/hb-array.hh @@ -454,6 +454,7 @@ inline bool hb_array_t::operator == (const hb_array_t inline uint32_t hb_array_t::hash () const { @@ -467,6 +468,7 @@ inline uint32_t hb_array_t::hash () const // https://github.com/harfbuzz/harfbuzz/pull/4228 return fasthash32(arrayZ, length, 0xf437ffe6 /* magic? */); } +#endif typedef hb_array_t hb_bytes_t; From 9cc7eb80ffac20cc5cfa90b80bcff2872f6c466b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 16:13:46 -0600 Subject: [PATCH 21/23] [hash] Speed-up int64 hash --- src/hb-algs.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 1dfa2d092..14a1ac55a 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -323,7 +323,7 @@ struct template ::value && sizeof (T) > sizeof (uint32_t))> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v * 8388607u) ^ ((v >> 32) * 8388607u) /* Mersenne prime */) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v ^ (v >> 32)) * 8388607u /* Mersenne prime */) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From 1fbb08584b172553651a7842ec9ee977991c93c7 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 17:11:05 -0600 Subject: [PATCH 22/23] [hash] Adjust prime number Previous one wasn't a prime. Ouch! --- src/hb-algs.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index 14a1ac55a..e8ce0a36c 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -319,11 +319,11 @@ struct template ::value && sizeof (T) <= sizeof (uint32_t))> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 8388607u /* Mersenne prime */) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 131071u /* Mersenne prime */) template ::value && sizeof (T) > sizeof (uint32_t))> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v ^ (v >> 32)) * 8388607u /* Mersenne prime */) + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v ^ (v >> 32)) * 131071u /* Mersenne prime */) template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v))) From 5d0cc0062a75013a388f6929b59cbfa7939dc6e1 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 9 May 2023 18:40:35 -0600 Subject: [PATCH 23/23] [hash] Disable int hash as it has negative performance gain --- src/hb-algs.hh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/hb-algs.hh b/src/hb-algs.hh index e8ce0a36c..1130219c8 100644 --- a/src/hb-algs.hh +++ b/src/hb-algs.hh @@ -314,6 +314,13 @@ struct template constexpr auto impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) +#if 0 + // The following, unfortunately, while keeps the probing chains short, slows + // down the overall hash table performance. Not because of the extra operation + // itself in my opinion, but something else going on that we have not been able + // to track down. So for now, this is disabled. Discuss: + // https://github.com/harfbuzz/harfbuzz/pull/4228 + // Horrible: std:hash() of integers seems to be identity in gcc / clang?! // https://github.com/harfbuzz/harfbuzz/pull/4228 @@ -324,6 +331,7 @@ struct template ::value && sizeof (T) > sizeof (uint32_t))> constexpr auto impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v ^ (v >> 32)) * 131071u /* Mersenne prime */) +#endif template constexpr auto impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash>{} (hb_deref (v)))