[set-digest] Inline combiner

Instead of defining one digest and then combining three of them at
different shifts, inline the code The compiler can optimize it better.

3% speedup with Amiri benchmark.
This commit is contained in:
Behdad Esfahbod 2025-02-01 19:11:20 +00:00
parent c4c8eb4f8c
commit 92f13bbdd7

View file

@ -64,45 +64,56 @@
* check is done using four bitwise operations only.
*/
template <typename mask_t, unsigned int shift>
struct hb_set_digest_bits_pattern_t
struct hb_set_digest_t
{
using mask_t = uint64_t;
static constexpr unsigned shifts[] = {4, 0, 9};
static constexpr unsigned mask_bytes = sizeof (mask_t);
static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
static constexpr unsigned num_bits = 0
+ (mask_bytes >= 1 ? 3 : 0)
+ (mask_bytes >= 2 ? 1 : 0)
+ (mask_bytes >= 4 ? 1 : 0)
+ (mask_bytes >= 8 ? 1 : 0)
+ (mask_bytes >= 16? 1 : 0)
+ 0;
static constexpr unsigned n = sizeof (shifts) / sizeof (shifts[0]);
static constexpr hb_codepoint_t mb1 = mask_bits - 1;
static constexpr mask_t one = 1;
static constexpr mask_t all = (mask_t) -1;
static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
void init ()
{ for (unsigned i = 0; i < n; i++) masks[i] = 0; }
void init () { mask = 0; }
static hb_set_digest_t full ()
{
hb_set_digest_t d;
for (unsigned i = 0; i < n; i++) d.masks[i] = all;
return d;
}
static hb_set_digest_bits_pattern_t full () { hb_set_digest_bits_pattern_t d; d.mask = (mask_t) -1; return d; }
void union_ (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; }
void add (hb_codepoint_t g) { mask |= mask_for (g); }
void union_ (const hb_set_digest_t &o)
{ for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; }
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
{
if (mask == (mask_t) -1) return false;
if ((b >> shift) - (a >> shift) >= mask_bits - 1)
bool ret;
ret = false;
for (unsigned i = 0; i < n; i++)
if (masks[i] != all)
ret = true;
if (!ret) return false;
ret = false;
for (unsigned i = 0; i < n; i++)
{
mask = (mask_t) -1;
return false;
}
else
{
mask_t ma = mask_for (a);
mask_t mb = mask_for (b);
mask |= mb + (mb - ma) - (mb < ma);
return true;
mask_t shift = shifts[i];
if ((b >> shift) - (a >> shift) >= mb1)
masks[i] = all;
else
{
mask_t ma = one << ((a >> shift) & mb1);
mask_t mb = one << ((b >> shift) & mb1);
masks[i] |= mb + (mb - ma) - (mb < ma);
ret = true;
}
}
return ret;
}
template <typename T>
@ -125,103 +136,36 @@ struct hb_set_digest_bits_pattern_t
template <typename T>
bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
bool may_have (const hb_set_digest_bits_pattern_t &o) const
{ return mask & o.mask; }
bool may_have (hb_codepoint_t g) const
{ return mask & mask_for (g); }
bool operator [] (hb_codepoint_t g) const
{ return may_have (g); }
private:
static mask_t mask_for (hb_codepoint_t g)
{ return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
mask_t mask = 0;
};
template <typename head_t, typename tail_t>
struct hb_set_digest_combiner_t
{
void init ()
{
head.init ();
tail.init ();
}
static hb_set_digest_combiner_t full () { hb_set_digest_combiner_t d; d.head = head_t::full(); d.tail = tail_t::full (); return d; }
void union_ (const hb_set_digest_combiner_t &o)
{
head.union_ (o.head);
tail.union_(o.tail);
}
void add (hb_codepoint_t g)
{
head.add (g);
tail.add (g);
}
bool add_range (hb_codepoint_t a, hb_codepoint_t b)
{
return (int) head.add_range (a, b) | (int) tail.add_range (a, b);
}
template <typename T>
void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
head.add_array (array, count, stride);
tail.add_array (array, count, stride);
}
template <typename T>
void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
template <typename T>
bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
{
return head.add_sorted_array (array, count, stride) &&
tail.add_sorted_array (array, count, stride);
}
template <typename T>
bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
bool may_have (const hb_set_digest_combiner_t &o) const
{
return head.may_have (o.head) && tail.may_have (o.tail);
for (unsigned i = 0; i < n; i++)
masks[i] |= one << ((g >> shifts[i]) & mb1);
}
bool may_have (hb_codepoint_t g) const
{
return head.may_have (g) && tail.may_have (g);
for (unsigned i = 0; i < n; i++)
if (!(masks[i] & (one << ((g >> shifts[i]) & mb1))))
return false;
return true;
}
bool operator [] (hb_codepoint_t g) const
{ return may_have (g); }
bool may_have (const hb_set_digest_t &o) const
{
for (unsigned i = 0; i < n; i++)
if (!(masks[i] & o.masks[i]))
return false;
return true;
}
private:
head_t head;
tail_t tail;
mask_t masks[n] = {};
};
/*
* hb_set_digest_t
*
* This is a combination of digests that performs "best".
* There is not much science to this: it's a result of intuition
* and testing.
*/
using hb_set_digest_t =
hb_set_digest_combiner_t
<
hb_set_digest_bits_pattern_t<unsigned long, 4>,
hb_set_digest_combiner_t
<
hb_set_digest_bits_pattern_t<unsigned long, 0>,
hb_set_digest_bits_pattern_t<unsigned long, 9>
>
>
;
#endif /* HB_SET_DIGEST_HH */