From 0e13e78153c0d6842705066252ae48cdbac42cc5 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 16:50:04 +0000 Subject: [PATCH 01/10] [aat] Add a class-cache to the machine Speeds up Times.ttc benchmark by 20%. --- src/hb-aat-layout-common.hh | 22 ++++++++++++++++++---- src/hb-aat-layout-morx-table.hh | 24 ++++++++++++++++++++++++ src/hb-atomic.hh | 1 + 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index e241f9f17..540b2dd3b 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -30,6 +30,7 @@ #include "hb-aat-layout.hh" #include "hb-aat-map.hh" #include "hb-open-type.hh" +#include "hb-cache.hh" namespace OT { struct GDEF; @@ -43,6 +44,8 @@ using namespace OT; struct ankr; +using hb_aat_class_cache_t = hb_cache_t<24, 16, 8>; + struct hb_aat_apply_context_t : hb_dispatch_context_t { @@ -65,6 +68,7 @@ struct hb_aat_apply_context_t : hb_set_digest_t machine_glyph_set = hb_set_digest_t::full (); hb_set_digest_t left_set = hb_set_digest_t::full (); hb_set_digest_t right_set = hb_set_digest_t::full (); + hb_aat_class_cache_t *machine_class_cache = nullptr; hb_mask_t subtable_flags = 0; /* Unused. For debug tracing only. */ @@ -636,14 +640,24 @@ struct StateTable int new_state (unsigned int newState) const { return Types::extended ? newState : ((int) newState - (int) stateArrayTable) / (int) nClasses; } - template + template unsigned int get_class (hb_codepoint_t glyph_id, unsigned int num_glyphs, - const set_t &glyphs) const + const set_t &glyphs, + cache_t *cache) const { if (unlikely (glyph_id == DELETED_GLYPH)) return CLASS_DELETED_GLYPH; if (!glyphs[glyph_id]) return CLASS_OUT_OF_BOUNDS; - return (this+classTable).get_class (glyph_id, num_glyphs, CLASS_OUT_OF_BOUNDS); + if (cache) + { + unsigned int klass; + if (cache->get (glyph_id, &klass)) + return klass; + } + unsigned klass = (this+classTable).get_class (glyph_id, num_glyphs, CLASS_OUT_OF_BOUNDS); + if (cache) + cache->set (glyph_id, klass); + return klass; } const Entry *get_entries () const @@ -977,7 +991,7 @@ struct StateTableDriver } unsigned int klass = likely (buffer->idx < buffer->len) ? - machine.get_class (buffer->cur().codepoint, num_glyphs, ac->machine_glyph_set) : + machine.get_class (buffer->cur().codepoint, num_glyphs, ac->machine_glyph_set, ac->machine_class_cache) : (unsigned) CLASS_END_OF_TEXT; DEBUG_MSG (APPLY, nullptr, "c%u at %u", klass, buffer->idx); const EntryT &entry = machine.get_entry (state, klass); diff --git a/src/hb-aat-layout-morx-table.hh b/src/hb-aat-layout-morx-table.hh index 532766ff0..f9bda6b3d 100644 --- a/src/hb-aat-layout-morx-table.hh +++ b/src/hb-aat-layout-morx-table.hh @@ -936,6 +936,7 @@ struct hb_accelerate_subtables_context_t : public: hb_set_digest_t digest; + hb_aat_class_cache_t *class_cache; template auto init_ (const T &obj_, unsigned num_glyphs, hb_priority<1>) HB_AUTO_RETURN @@ -953,6 +954,15 @@ struct hb_accelerate_subtables_context_t : void init (const T &obj_, unsigned num_glyphs) { init_ (obj_, num_glyphs, hb_prioritize); + class_cache = (hb_aat_class_cache_t *) malloc (sizeof (hb_aat_class_cache_t)); + if (class_cache) + class_cache->clear (); + } + + void + fini () + { + free (class_cache); } }; @@ -999,12 +1009,21 @@ struct hb_aat_layout_chain_accelerator_t if (unlikely (!thiz)) return nullptr; + thiz->count = count; + hb_accelerate_subtables_context_t c_accelerate_subtables (thiz->subtables, num_glyphs); chain.dispatch (&c_accelerate_subtables); return thiz; } + void destroy () + { + for (unsigned i = 0; i < count; i++) + subtables[i].fini (); + } + + unsigned count; hb_accelerate_subtables_context_t::hb_applicable_t subtables[HB_VAR_ARRAY]; }; @@ -1157,6 +1176,7 @@ struct Chain goto skip; c->subtable_flags = subtable->subFeatureFlags; c->machine_glyph_set = accel ? accel->subtables[i].digest : hb_set_digest_t::full (); + c->machine_class_cache = accel ? accel->subtables[i].class_cache : nullptr; if (!(subtable->get_coverage() & ChainSubtable::AllDirections) && HB_DIRECTION_IS_VERTICAL (c->buffer->props.direction) != @@ -1317,7 +1337,11 @@ struct mortmorx ~accelerator_t () { for (unsigned int i = 0; i < this->chain_count; i++) + { + if (this->accels[i]) + this->accels[i]->destroy (); hb_free (this->accels[i]); + } hb_free (this->accels); this->table.destroy (); } diff --git a/src/hb-atomic.hh b/src/hb-atomic.hh index 366fb32b7..121c463a5 100644 --- a/src/hb-atomic.hh +++ b/src/hb-atomic.hh @@ -212,6 +212,7 @@ struct hb_atomic_ptr_t T *get_acquire () const { return (T *) hb_atomic_ptr_impl_get ((void **) &v); } bool cmpexch (const T *old, T *new_) const { return hb_atomic_ptr_impl_cmpexch ((void **) &v, (void *) old, (void *) new_); } + operator bool () const { return get_acquire () != nullptr; } T * operator -> () const { return get_acquire (); } template operator C * () const { return get_acquire (); } From 832f199607292a41b4621b87a646c30cddc00124 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 17:04:06 +0000 Subject: [PATCH 02/10] [aat] Remove set-digest Now that we have a class cache, this was just speeding things down. --- src/hb-aat-layout-common.hh | 9 +++------ src/hb-aat-layout-morx-table.hh | 31 ------------------------------- 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index 540b2dd3b..0e694407b 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -65,7 +65,6 @@ struct hb_aat_apply_context_t : const OT::GDEF *gdef_table; const hb_sorted_vector_t *range_flags = nullptr; hb_set_digest_t buffer_digest = hb_set_digest_t::full (); - hb_set_digest_t machine_glyph_set = hb_set_digest_t::full (); hb_set_digest_t left_set = hb_set_digest_t::full (); hb_set_digest_t right_set = hb_set_digest_t::full (); hb_aat_class_cache_t *machine_class_cache = nullptr; @@ -640,14 +639,12 @@ struct StateTable int new_state (unsigned int newState) const { return Types::extended ? newState : ((int) newState - (int) stateArrayTable) / (int) nClasses; } - template + template unsigned int get_class (hb_codepoint_t glyph_id, unsigned int num_glyphs, - const set_t &glyphs, cache_t *cache) const { if (unlikely (glyph_id == DELETED_GLYPH)) return CLASS_DELETED_GLYPH; - if (!glyphs[glyph_id]) return CLASS_OUT_OF_BOUNDS; if (cache) { unsigned int klass; @@ -948,7 +945,7 @@ struct StateTableDriver { const auto entry = machine.get_entry (StateTableT::STATE_START_OF_TEXT, CLASS_OUT_OF_BOUNDS); return !c->is_actionable (ac->buffer, this, entry) && - machine.new_state (entry.newState) == StateTableT::STATE_START_OF_TEXT; + machine.new_state (entry.newState) == StateTableT::STATE_START_OF_TEXT; } template @@ -991,7 +988,7 @@ struct StateTableDriver } unsigned int klass = likely (buffer->idx < buffer->len) ? - machine.get_class (buffer->cur().codepoint, num_glyphs, ac->machine_glyph_set, ac->machine_class_cache) : + machine.get_class (buffer->cur().codepoint, num_glyphs, ac->machine_class_cache) : (unsigned) CLASS_END_OF_TEXT; DEBUG_MSG (APPLY, nullptr, "c%u at %u", klass, buffer->idx); const EntryT &entry = machine.get_entry (state, klass); diff --git a/src/hb-aat-layout-morx-table.hh b/src/hb-aat-layout-morx-table.hh index f9bda6b3d..6e2b33565 100644 --- a/src/hb-aat-layout-morx-table.hh +++ b/src/hb-aat-layout-morx-table.hh @@ -171,10 +171,6 @@ struct RearrangementSubtable StateTableDriver driver (machine, c->face); - if (driver.is_idempotent_on_all_out_of_bounds (&dc, c) && - !c->buffer_digest.may_have (c->machine_glyph_set)) - return_trace (false); - driver.drive (&dc, c); return_trace (dc.ret); @@ -336,10 +332,6 @@ struct ContextualSubtable StateTableDriver driver (machine, c->face); - if (driver.is_idempotent_on_all_out_of_bounds (&dc, c) && - !c->buffer_digest.may_have (c->machine_glyph_set)) - return_trace (false); - driver.drive (&dc, c); return_trace (dc.ret); @@ -599,10 +591,6 @@ struct LigatureSubtable StateTableDriver driver (machine, c->face); - if (driver.is_idempotent_on_all_out_of_bounds (&dc, c) && - !c->buffer_digest.may_have (c->machine_glyph_set)) - return_trace (false); - driver.drive (&dc, c); return_trace (dc.ret); @@ -875,10 +863,6 @@ struct InsertionSubtable StateTableDriver driver (machine, c->face); - if (driver.is_idempotent_on_all_out_of_bounds (&dc, c) && - !c->buffer_digest.may_have (c->machine_glyph_set)) - return_trace (false); - driver.drive (&dc, c); return_trace (dc.ret); @@ -935,25 +919,11 @@ struct hb_accelerate_subtables_context_t : friend struct hb_aat_layout_lookup_accelerator_t; public: - hb_set_digest_t digest; hb_aat_class_cache_t *class_cache; - template - auto init_ (const T &obj_, unsigned num_glyphs, hb_priority<1>) HB_AUTO_RETURN - ( - obj_.machine.collect_glyphs (this->digest, num_glyphs) - ) - - template - void init_ (const T &obj_, unsigned num_glyphs, hb_priority<0>) - { - digest = digest.full (); - } - template void init (const T &obj_, unsigned num_glyphs) { - init_ (obj_, num_glyphs, hb_prioritize); class_cache = (hb_aat_class_cache_t *) malloc (sizeof (hb_aat_class_cache_t)); if (class_cache) class_cache->clear (); @@ -1175,7 +1145,6 @@ struct Chain hb_map ([&subtable] (const hb_aat_map_t::range_flags_t _) -> bool { return subtable->subFeatureFlags & (_.flags); }))) goto skip; c->subtable_flags = subtable->subFeatureFlags; - c->machine_glyph_set = accel ? accel->subtables[i].digest : hb_set_digest_t::full (); c->machine_class_cache = accel ? accel->subtables[i].class_cache : nullptr; if (!(subtable->get_coverage() & ChainSubtable::AllDirections) && From 4fb0ac7728e5d3ad20a63f00e636b5a6c180c46a Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 17:06:45 +0000 Subject: [PATCH 03/10] [aat] Minor simplify --- src/hb-aat-layout-common.hh | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index 0e694407b..df4c87637 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -639,21 +639,15 @@ struct StateTable int new_state (unsigned int newState) const { return Types::extended ? newState : ((int) newState - (int) stateArrayTable) / (int) nClasses; } - template unsigned int get_class (hb_codepoint_t glyph_id, unsigned int num_glyphs, - cache_t *cache) const + hb_aat_class_cache_t *cache = nullptr) const { if (unlikely (glyph_id == DELETED_GLYPH)) return CLASS_DELETED_GLYPH; - if (cache) - { - unsigned int klass; - if (cache->get (glyph_id, &klass)) - return klass; - } - unsigned klass = (this+classTable).get_class (glyph_id, num_glyphs, CLASS_OUT_OF_BOUNDS); - if (cache) - cache->set (glyph_id, klass); + unsigned klass; + if (cache && cache->get (glyph_id, &klass)) return klass; + klass = (this+classTable).get_class (glyph_id, num_glyphs, CLASS_OUT_OF_BOUNDS); + if (cache) cache->set (glyph_id, klass); return klass; } From b89ab7d0fb2a8e8cd109e6b6239a611763b14d26 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 17:08:20 +0000 Subject: [PATCH 04/10] [aat] Shrink the class cache to be 512 bytes instead of 1kb --- src/hb-aat-layout-common.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index df4c87637..b4ad51b38 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -44,7 +44,7 @@ using namespace OT; struct ankr; -using hb_aat_class_cache_t = hb_cache_t<24, 16, 8>; +using hb_aat_class_cache_t = hb_cache_t<16, 8, 8>; struct hb_aat_apply_context_t : hb_dispatch_context_t From 7b44a94a5592a651598fb83d289e0fedbba31b90 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 17:33:46 +0000 Subject: [PATCH 05/10] [aat] Shrink cache to 256 bytes per subtable --- src/hb-aat-layout-common.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index b4ad51b38..9f4495d95 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -44,7 +44,8 @@ using namespace OT; struct ankr; -using hb_aat_class_cache_t = hb_cache_t<16, 8, 8>; +using hb_aat_class_cache_t = hb_cache_t<15, 8, 7>; +static_assert (sizeof (hb_aat_class_cache_t) == 256, ""); struct hb_aat_apply_context_t : hb_dispatch_context_t From d9a25bc4ee6da22c002b8a612b8943d29edaa103 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 17:37:43 +0000 Subject: [PATCH 06/10] [aat] Allocate caches together No separate malloc. --- src/hb-aat-layout-morx-table.hh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/hb-aat-layout-morx-table.hh b/src/hb-aat-layout-morx-table.hh index 6e2b33565..689621418 100644 --- a/src/hb-aat-layout-morx-table.hh +++ b/src/hb-aat-layout-morx-table.hh @@ -919,20 +919,17 @@ struct hb_accelerate_subtables_context_t : friend struct hb_aat_layout_lookup_accelerator_t; public: - hb_aat_class_cache_t *class_cache; + mutable hb_aat_class_cache_t class_cache; template void init (const T &obj_, unsigned num_glyphs) { - class_cache = (hb_aat_class_cache_t *) malloc (sizeof (hb_aat_class_cache_t)); - if (class_cache) - class_cache->clear (); + class_cache.clear (); } void fini () { - free (class_cache); } }; @@ -1145,7 +1142,7 @@ struct Chain hb_map ([&subtable] (const hb_aat_map_t::range_flags_t _) -> bool { return subtable->subFeatureFlags & (_.flags); }))) goto skip; c->subtable_flags = subtable->subFeatureFlags; - c->machine_class_cache = accel ? accel->subtables[i].class_cache : nullptr; + c->machine_class_cache = accel ? &accel->subtables[i].class_cache : nullptr; if (!(subtable->get_coverage() & ChainSubtable::AllDirections) && HB_DIRECTION_IS_VERTICAL (c->buffer->props.direction) != From 556eb0297733f673fe59e7eed97cbc5792478198 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 18:19:32 +0000 Subject: [PATCH 07/10] [aat] Remove a lambda This lambda was added to "simplify" the logic. But has a lot of overhead. --- src/hb-aat-layout-common.hh | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index 9f4495d95..a3d2d63a2 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -1032,26 +1032,24 @@ struct StateTableDriver && (entry.flags & context_t::DontAdvance) == (wouldbe_entry.flags & context_t::DontAdvance); }; - const auto is_safe_to_break = [&]() - { + bool is_safe_to_break = + ( /* 1. */ - if (c->is_actionable (buffer, this, entry)) - return false; + !c->is_actionable (buffer, this, entry) && /* 2. */ // This one is meh, I know... - const auto ok = + ( state == StateTableT::STATE_START_OF_TEXT || ((entry.flags & context_t::DontAdvance) && next_state == StateTableT::STATE_START_OF_TEXT) - || is_safe_to_break_extra(); - if (!ok) - return false; + || is_safe_to_break_extra() + ) && /* 3. */ - return !c->is_actionable (buffer, this, machine.get_entry (state, CLASS_END_OF_TEXT)); - }; + !c->is_actionable (buffer, this, machine.get_entry (state, CLASS_END_OF_TEXT)) + ); - if (!is_safe_to_break () && buffer->backtrack_len () && buffer->idx < buffer->len) + if (!is_safe_to_break && buffer->backtrack_len () && buffer->idx < buffer->len) buffer->unsafe_to_break_from_outbuffer (buffer->backtrack_len () - 1, buffer->idx + 1); c->transition (buffer, this, entry); From 91572945bb1bf53dcaa9beabc784ed68cac00298 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 2 Feb 2025 18:25:45 +0000 Subject: [PATCH 08/10] [aat] More delambda Again, this was costly. --- src/hb-aat-layout-common.hh | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index a3d2d63a2..eab51eb3f 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -1017,21 +1017,7 @@ struct StateTableDriver * * https://github.com/harfbuzz/harfbuzz/issues/2860 */ - - const auto is_safe_to_break_extra = [&]() - { - /* 2c. */ - const auto &wouldbe_entry = machine.get_entry(StateTableT::STATE_START_OF_TEXT, klass); - - /* 2c'. */ - if (c->is_actionable (buffer, this, wouldbe_entry)) - return false; - - /* 2c". */ - return next_state == machine.new_state(wouldbe_entry.newState) - && (entry.flags & context_t::DontAdvance) == (wouldbe_entry.flags & context_t::DontAdvance); - }; - + const EntryT *wouldbe_entry; bool is_safe_to_break = ( /* 1. */ @@ -1042,7 +1028,18 @@ struct StateTableDriver ( state == StateTableT::STATE_START_OF_TEXT || ((entry.flags & context_t::DontAdvance) && next_state == StateTableT::STATE_START_OF_TEXT) - || is_safe_to_break_extra() + || ( + /* 2c. */ + wouldbe_entry = &machine.get_entry(StateTableT::STATE_START_OF_TEXT, klass) + , + /* 2c'. */ + !c->is_actionable (buffer, this, *wouldbe_entry) && + /* 2c". */ + ( + next_state == machine.new_state(wouldbe_entry->newState) && + (entry.flags & context_t::DontAdvance) == (wouldbe_entry->flags & context_t::DontAdvance) + ) + ) ) && /* 3. */ From ed37725e00a94fa4fa730a5e973eac23374ab8c4 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 3 Feb 2025 12:59:58 +0000 Subject: [PATCH 09/10] [aat] Micro-optimize get_class --- src/hb-aat-layout-common.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index eab51eb3f..76aa690b5 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -644,9 +644,9 @@ struct StateTable unsigned int num_glyphs, hb_aat_class_cache_t *cache = nullptr) const { - if (unlikely (glyph_id == DELETED_GLYPH)) return CLASS_DELETED_GLYPH; unsigned klass; if (cache && cache->get (glyph_id, &klass)) return klass; + if (unlikely (glyph_id == DELETED_GLYPH)) return CLASS_DELETED_GLYPH; klass = (this+classTable).get_class (glyph_id, num_glyphs, CLASS_OUT_OF_BOUNDS); if (cache) cache->set (glyph_id, klass); return klass; From ee4ca63b6dc579baf9d55efe911389d263fa77b6 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 3 Feb 2025 13:23:08 +0000 Subject: [PATCH 10/10] [aat] Micro-optimize --- src/hb-aat-layout-common.hh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/hb-aat-layout-common.hh b/src/hb-aat-layout-common.hh index 76aa690b5..3e7c98aab 100644 --- a/src/hb-aat-layout-common.hh +++ b/src/hb-aat-layout-common.hh @@ -657,13 +657,14 @@ struct StateTable const Entry &get_entry (int state, unsigned int klass) const { - if (unlikely (klass >= nClasses)) + unsigned n_classes = nClasses; + if (unlikely (klass >= n_classes)) klass = CLASS_OUT_OF_BOUNDS; const HBUSHORT *states = (this+stateArrayTable).arrayZ; const Entry *entries = (this+entryTable).arrayZ; - unsigned int entry = states[state * nClasses + klass]; + unsigned int entry = states[state * n_classes + klass]; DEBUG_MSG (APPLY, nullptr, "e%u", entry); return entries[entry];