[set-digest] Inline combiner

Instead of defining one digest and then combining three of them at different shifts, inline the code The compiler can optimize it better. 3% speedup with Amiri benchmark.
2025-04-15 01:18:13 +00:00 · 2025-02-01 19:11:20 +00:00 · 2025-02-01 19:11:20 +00:00 · 92f13bbdd7
commit 92f13bbdd7
parent c4c8eb4f8c
1 changed files with 54 additions and 110 deletions
--- a/src/hb-set-digest.hh
+++ b/src/hb-set-digest.hh
@ -64,45 +64,56 @@
 * check is done using four bitwise operations only.
 */

-template <typename mask_t, unsigned int shift>
-struct hb_set_digest_bits_pattern_t
+struct hb_set_digest_t
 {
+  using mask_t = uint64_t;
+  static constexpr unsigned shifts[] = {4, 0, 9};
+
  static constexpr unsigned mask_bytes = sizeof (mask_t);
  static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
-  static constexpr unsigned num_bits = 0
-				     + (mask_bytes >= 1 ? 3 : 0)
-				     + (mask_bytes >= 2 ? 1 : 0)
-				     + (mask_bytes >= 4 ? 1 : 0)
-				     + (mask_bytes >= 8 ? 1 : 0)
-				     + (mask_bytes >= 16? 1 : 0)
-				     + 0;
+  static constexpr unsigned n = sizeof (shifts) / sizeof (shifts[0]);
+  static constexpr hb_codepoint_t mb1 = mask_bits - 1;
+  static constexpr mask_t one = 1;
+  static constexpr mask_t all = (mask_t) -1;

-  static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
-  static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
+  void init ()
+  { for (unsigned i = 0; i < n; i++) masks[i] = 0; }

-  void init () { mask = 0; }
+  static hb_set_digest_t full ()
+  {
+    hb_set_digest_t d;
+    for (unsigned i = 0; i < n; i++) d.masks[i] = all;
+    return d;
+  }

-  static hb_set_digest_bits_pattern_t full () { hb_set_digest_bits_pattern_t d; d.mask = (mask_t) -1; return d; }
-
-  void union_ (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; }
-
-  void add (hb_codepoint_t g) { mask |= mask_for (g); }
+  void union_ (const hb_set_digest_t &o)
+  { for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; }

  bool add_range (hb_codepoint_t a, hb_codepoint_t b)
  {
-    if (mask == (mask_t) -1) return false;
-    if ((b >> shift) - (a >> shift) >= mask_bits - 1)
+    bool ret;
+
+    ret = false;
+    for (unsigned i = 0; i < n; i++)
+      if (masks[i] != all)
+	ret = true;
+    if (!ret) return false;
+
+    ret = false;
+    for (unsigned i = 0; i < n; i++)
    {
-      mask = (mask_t) -1;
-      return false;
-    }
-    else
-    {
-      mask_t ma = mask_for (a);
-      mask_t mb = mask_for (b);
-      mask |= mb + (mb - ma) - (mb < ma);
-      return true;
+      mask_t shift = shifts[i];
+      if ((b >> shift) - (a >> shift) >= mb1)
+	masks[i] = all;
+      else
+      {
+	mask_t ma = one << ((a >> shift) & mb1);
+	mask_t mb = one << ((b >> shift) & mb1);
+	masks[i] |= mb + (mb - ma) - (mb < ma);
+	ret = true;
+      }
    }
+    return ret;
  }

  template <typename T>
@ -125,103 +136,36 @@ struct hb_set_digest_bits_pattern_t
  template <typename T>
  bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }

-  bool may_have (const hb_set_digest_bits_pattern_t &o) const
-  { return mask & o.mask; }
-
-  bool may_have (hb_codepoint_t g) const
-  { return mask & mask_for (g); }
-
  bool operator [] (hb_codepoint_t g) const
  { return may_have (g); }

-  private:
-
-  static mask_t mask_for (hb_codepoint_t g)
-  { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
-  mask_t mask = 0;
-};
-
-template <typename head_t, typename tail_t>
-struct hb_set_digest_combiner_t
-{
-  void init ()
-  {
-    head.init ();
-    tail.init ();
-  }
-
-  static hb_set_digest_combiner_t full () { hb_set_digest_combiner_t d; d.head = head_t::full(); d.tail = tail_t::full (); return d; }
-
-  void union_ (const hb_set_digest_combiner_t &o)
-  {
-    head.union_ (o.head);
-    tail.union_(o.tail);
-  }

  void add (hb_codepoint_t g)
  {
-    head.add (g);
-    tail.add (g);
-  }
-
-  bool add_range (hb_codepoint_t a, hb_codepoint_t b)
-  {
-    return (int) head.add_range (a, b) | (int) tail.add_range (a, b);
-  }
-  template <typename T>
-  void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
-  {
-    head.add_array (array, count, stride);
-    tail.add_array (array, count, stride);
-  }
-  template <typename T>
-  void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
-  template <typename T>
-  bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
-  {
-    return head.add_sorted_array (array, count, stride) &&
-	   tail.add_sorted_array (array, count, stride);
-  }
-  template <typename T>
-  bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
-
-  bool may_have (const hb_set_digest_combiner_t &o) const
-  {
-    return head.may_have (o.head) && tail.may_have (o.tail);
+    for (unsigned i = 0; i < n; i++)
+      masks[i] |= one << ((g >> shifts[i]) & mb1);
  }

  bool may_have (hb_codepoint_t g) const
  {
-    return head.may_have (g) && tail.may_have (g);
+    for (unsigned i = 0; i < n; i++)
+      if (!(masks[i] & (one << ((g >> shifts[i]) & mb1))))
+	return false;
+    return true;
  }

-  bool operator [] (hb_codepoint_t g) const
-  { return may_have (g); }
+  bool may_have (const hb_set_digest_t &o) const
+  {
+    for (unsigned i = 0; i < n; i++)
+      if (!(masks[i] & o.masks[i]))
+	return false;
+    return true;
+  }

  private:
-  head_t head;
-  tail_t tail;
+
+  mask_t masks[n] = {};
 };


-/*
- * hb_set_digest_t
- *
- * This is a combination of digests that performs "best".
- * There is not much science to this: it's a result of intuition
- * and testing.
- */
-using hb_set_digest_t =
-  hb_set_digest_combiner_t
-  <
-    hb_set_digest_bits_pattern_t<unsigned long, 4>,
-    hb_set_digest_combiner_t
-    <
-      hb_set_digest_bits_pattern_t<unsigned long, 0>,
-      hb_set_digest_bits_pattern_t<unsigned long, 9>
-    >
-  >
-;
-
-
 #endif /* HB_SET_DIGEST_HH */