Merge pull request #4228 from harfbuzz/better-hash

Better hash
This commit is contained in:
Behdad Esfahbod 2023-05-09 19:02:26 -06:00 committed by GitHub
commit 5d543d6422
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 120 additions and 55 deletions

View file

@ -112,10 +112,12 @@ struct Glyph
if (!plan->new_gid_for_old_gid (gid, &new_gid))
return;
uint32_t hash = hb_hash (new_gid);
if (type != EMPTY)
{
plan->bounds_width_map.set (new_gid, xMax - xMin);
plan->bounds_height_map.set (new_gid, yMax - yMin);
plan->bounds_width_map.set_with_hash (new_gid, hash, xMax - xMin);
plan->bounds_height_map.set_with_hash (new_gid, hash, yMax - yMin);
}
unsigned len = all_points.length;
@ -127,7 +129,7 @@ struct Glyph
signed hori_aw = roundf (rightSideX - leftSideX);
if (hori_aw < 0) hori_aw = 0;
int lsb = roundf (xMin - leftSideX);
plan->hmtx_map.set (new_gid, hb_pair ((unsigned) hori_aw, lsb));
plan->hmtx_map.set_with_hash (new_gid, hash, hb_pair ((unsigned) hori_aw, lsb));
//flag value should be computed using non-empty glyphs
if (type != EMPTY && lsb != xMin)
plan->head_maxp_info.allXMinIsLsb = false;
@ -135,7 +137,7 @@ struct Glyph
signed vert_aw = roundf (topSideY - bottomSideY);
if (vert_aw < 0) vert_aw = 0;
int tsb = roundf (topSideY - yMax);
plan->vmtx_map.set (new_gid, hb_pair ((unsigned) vert_aw, tsb));
plan->vmtx_map.set_with_hash (new_gid, hash, hb_pair ((unsigned) vert_aw, tsb));
}
bool compile_header_bytes (const hb_subset_plan_t *plan,

View file

@ -107,9 +107,8 @@ struct BEInt<Type, 2>
uint8_t ((V ) & 0xFF)} {}
struct __attribute__((packed)) packed_uint16_t { uint16_t v; };
constexpr operator Type () const
{
#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \
constexpr operator Type () const {
#if defined(__OPTIMIZE__) && \
defined(__BYTE_ORDER) && \
(__BYTE_ORDER == __BIG_ENDIAN || \
(__BYTE_ORDER == __LITTLE_ENDIAN && \
@ -155,7 +154,7 @@ struct BEInt<Type, 4>
struct __attribute__((packed)) packed_uint32_t { uint32_t v; };
constexpr operator Type () const {
#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \
#if defined(__OPTIMIZE__) && \
defined(__BYTE_ORDER) && \
(__BYTE_ORDER == __BIG_ENDIAN || \
(__BYTE_ORDER == __LITTLE_ENDIAN && \
@ -231,12 +230,108 @@ struct
}
HB_FUNCOBJ (hb_bool);
/* The MIT License
Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use, copy,
modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
// Compression function for Merkle-Damgard construction.
// This function is generated using the framework provided.
#define mix(h) ( \
(h) ^= (h) >> 23, \
(h) *= 0x2127599bf4325c37ULL, \
(h) ^= (h) >> 47)
static inline uint64_t fasthash64(const void *buf, size_t len, uint64_t seed)
{
struct __attribute__((packed)) packed_uint64_t { uint64_t v; };
const uint64_t m = 0x880355f21e6d1965ULL;
const packed_uint64_t *pos = (const packed_uint64_t *)buf;
const packed_uint64_t *end = pos + (len / 8);
const unsigned char *pos2;
uint64_t h = seed ^ (len * m);
uint64_t v;
while (pos != end) {
v = pos++->v;
h ^= mix(v);
h *= m;
}
pos2 = (const unsigned char*)pos;
v = 0;
switch (len & 7) {
case 7: v ^= (uint64_t)pos2[6] << 48; HB_FALLTHROUGH;
case 6: v ^= (uint64_t)pos2[5] << 40; HB_FALLTHROUGH;
case 5: v ^= (uint64_t)pos2[4] << 32; HB_FALLTHROUGH;
case 4: v ^= (uint64_t)pos2[3] << 24; HB_FALLTHROUGH;
case 3: v ^= (uint64_t)pos2[2] << 16; HB_FALLTHROUGH;
case 2: v ^= (uint64_t)pos2[1] << 8; HB_FALLTHROUGH;
case 1: v ^= (uint64_t)pos2[0];
h ^= mix(v);
h *= m;
}
return mix(h);
}
static inline uint32_t fasthash32(const void *buf, size_t len, uint32_t seed)
{
// the following trick converts the 64-bit hashcode to Fermat
// residue, which shall retain information from both the higher
// and lower parts of hashcode.
uint64_t h = fasthash64(buf, len, seed);
return h - (h >> 32);
}
struct
{
private:
template <typename T> constexpr auto
impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, hb_deref (v).hash ())
impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ())
#if 0
// The following, unfortunately, while keeps the probing chains short, slows
// down the overall hash table performance. Not because of the extra operation
// itself in my opinion, but something else going on that we have not been able
// to track down. So for now, this is disabled. Discuss:
// https://github.com/harfbuzz/harfbuzz/pull/4228
// Horrible: std:hash() of integers seems to be identity in gcc / clang?!
// https://github.com/harfbuzz/harfbuzz/pull/4228
template <typename T,
hb_enable_if (std::is_integral<T>::value && sizeof (T) <= sizeof (uint32_t))> constexpr auto
impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, v * 131071u /* Mersenne prime */)
template <typename T,
hb_enable_if (std::is_integral<T>::value && sizeof (T) > sizeof (uint32_t))> constexpr auto
impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, (v ^ (v >> 32)) * 131071u /* Mersenne prime */)
#endif
template <typename T> constexpr auto
impl (const T& v, hb_priority<0>) const HB_RETURN (uint32_t, std::hash<hb_decay<decltype (hb_deref (v))>>{} (hb_deref (v)))

View file

@ -123,6 +123,7 @@ struct hb_array_t : hb_iter_with_fallback_t<hb_array_t<Type>, Type&>
uint32_t hash () const
{
// FNV-1a hash function
// https://github.com/harfbuzz/harfbuzz/pull/4228
uint32_t current = /*cbf29ce4*/0x84222325;
for (auto &v : *this)
{
@ -453,55 +454,21 @@ inline bool hb_array_t<const unsigned char>::operator == (const hb_array_t<const
/* Specialize hash() for byte arrays. */
#ifndef HB_OPTIMIZE_SIZE_MORE
template <>
inline uint32_t hb_array_t<const char>::hash () const
{
// FNV-1a hash function
uint32_t current = /*cbf29ce4*/0x84222325;
unsigned i = 0;
#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \
((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__))
struct __attribute__((packed)) packed_uint32_t { uint32_t v; };
for (; i + 4 <= this->length; i += 4)
{
current = current ^ hb_hash ((uint32_t) ((const packed_uint32_t *) &this->arrayZ[i])->v);
current = current * 16777619;
}
#endif
for (; i < this->length; i++)
{
current = current ^ hb_hash (this->arrayZ[i]);
current = current * 16777619;
}
return current;
// https://github.com/harfbuzz/harfbuzz/pull/4228
return fasthash32(arrayZ, length, 0xf437ffe6 /* magic? */);
}
template <>
inline uint32_t hb_array_t<const unsigned char>::hash () const
{
// FNV-1a hash function
uint32_t current = /*cbf29ce4*/0x84222325;
unsigned i = 0;
#if defined(__OPTIMIZE__) && !defined(HB_NO_PACKED) && \
((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__))
struct __attribute__((packed)) packed_uint32_t { uint32_t v; };
for (; i + 4 <= this->length; i += 4)
{
current = current ^ hb_hash ((uint32_t) ((const packed_uint32_t *) &this->arrayZ[i])->v);
current = current * 16777619;
}
#endif
for (; i < this->length; i++)
{
current = current ^ hb_hash (this->arrayZ[i]);
current = current * 16777619;
}
return current;
// https://github.com/harfbuzz/harfbuzz/pull/4228
return fasthash32(arrayZ, length, 0xf437ffe6 /* magic? */);
}
#endif
typedef hb_array_t<const char> hb_bytes_t;

View file

@ -104,10 +104,7 @@ struct hb_bit_page_t
}
uint32_t hash () const
{
return
+ hb_iter (v)
| hb_reduce ([] (uint32_t h, const elt_t &_) { return h * 31 + hb_hash (_); }, (uint32_t) 0u)
;
return hb_bytes_t ((const char *) &v, sizeof (v)).hash ();
}
void add (hb_codepoint_t g) { elt (g) |= mask (g); }

View file

@ -134,7 +134,11 @@ struct hb_bit_set_t
{
uint32_t h = 0;
for (auto &map : page_map)
h = h * 31 + hb_hash (map.major) + hb_hash (pages[map.index]);
{
auto &page = pages.arrayZ[map.index];
if (unlikely (page.is_empty ())) continue;
h = h * 31 + hb_hash (map.major) + hb_hash (page);
}
return h;
}

View file

@ -763,7 +763,7 @@ _hb_cairo_add_sweep_gradient_patches (hb_color_stop_t *stops,
}
//assert (angles[0] + k * span <= 0 && 0 < angles[n_stops - 1] + k * span);
span = fabs (span);
span = fabsf (span);
for (signed l = k; l < 1000; l++)
{