From 0aa400b1d8d8bf933396e74af9a4248b6c92287b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sat, 15 Feb 2025 23:19:44 -0700 Subject: [PATCH 01/15] [decycler] Implement an efficient graph cycle detector This is an algorithm I came up with, based on the Floyd's Tortoise-Hare constant-memory linear-time linked-list cycle-detection algorithm. https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_tortoise_and_hare It is linear-time and malloc-free. It *eventually* detects cycles, not immediately. The main different with Floyd's algorithm is that this algorithm detects cycles when one is traversing down a graph, not just a linked list. Our existing cycle-detection algorithms use a set-of-integers, either hb_set_t, or more efficient in this case, hb_map_t. Those include at least one malloc, and as such show up on profiles. Port hb-ot-font COLRv1 to use the decycler instead of previous hb_map_t usage for cycle detection. benchmark-font paint_glyph on NotoColorEmoji-Regular.ttf: Before: 8ms; After: 5.5ms. No cycle detection: 5.5ms. FT COLRv1 API is so slow (174ms) it's not worth porting to this. Other graphs (VARC, etc) to be ported. Test and documentation to be added. --- src/OT/Color/COLR/COLR.hh | 26 +++++----- src/hb-decycler.hh | 100 ++++++++++++++++++++++++++++++++++++++ src/meson.build | 1 + 3 files changed, 112 insertions(+), 15 deletions(-) create mode 100644 src/hb-decycler.hh diff --git a/src/OT/Color/COLR/COLR.hh b/src/OT/Color/COLR/COLR.hh index 6af7be990..bc0d6756a 100644 --- a/src/OT/Color/COLR/COLR.hh +++ b/src/OT/Color/COLR/COLR.hh @@ -29,6 +29,7 @@ #define OT_COLOR_COLR_COLR_HH #include "../../../hb.hh" +#include "../../../hb-decycler.hh" #include "../../../hb-open-type.hh" #include "../../../hb-ot-var-common.hh" #include "../../../hb-paint.hh" @@ -71,8 +72,8 @@ public: hb_array_t palette; hb_color_t foreground; ItemVarStoreInstancer &instancer; - hb_map_t current_glyphs; - hb_map_t current_layers; + hb_decycler_t glyphs_decycler; + hb_decycler_t layers_decycler; int depth_left = HB_MAX_NESTING_LEVEL; int edge_count = HB_MAX_GRAPH_EDGE_COUNT; @@ -2576,7 +2577,9 @@ struct COLR &(get_delta_set_index_map ()), hb_array (font->coords, font->num_coords)); hb_paint_context_t c (this, funcs, data, font, palette_index, foreground, instancer); - c.current_glyphs.add (glyph); + + hb_decycler_node_t node (c.glyphs_decycler); + node.visit (glyph); if (version >= 1) { @@ -2692,19 +2695,16 @@ void PaintColrLayers::paint_glyph (hb_paint_context_t *c) const { TRACE_PAINT (this); const LayerList &paint_offset_lists = c->get_colr_table ()->get_layerList (); + hb_decycler_node_t node (c->layers_decycler); for (unsigned i = firstLayerIndex; i < firstLayerIndex + numLayers; i++) { - if (unlikely (c->current_layers.has (i))) - continue; - - c->current_layers.add (i); + if (unlikely (!node.visit (i))) + return; const Paint &paint = paint_offset_lists.get_paint (i); c->funcs->push_group (c->data); c->recurse (paint); c->funcs->pop_group (c->data, HB_PAINT_COMPOSITE_MODE_SRC_OVER); - - c->current_layers.del (i); } } @@ -2712,16 +2712,14 @@ void PaintColrGlyph::paint_glyph (hb_paint_context_t *c) const { TRACE_PAINT (this); - if (unlikely (c->current_glyphs.has (gid))) + hb_decycler_node_t node (c->glyphs_decycler); + if (unlikely (!node.visit (gid))) return; - c->current_glyphs.add (gid); - c->funcs->push_inverse_root_transform (c->data, c->font); if (c->funcs->color_glyph (c->data, gid, c->font)) { c->funcs->pop_transform (c->data); - c->current_glyphs.del (gid); return; } c->funcs->pop_transform (c->data); @@ -2744,8 +2742,6 @@ void PaintColrGlyph::paint_glyph (hb_paint_context_t *c) const if (has_clip_box) c->funcs->pop_clip (c->data); - - c->current_glyphs.del (gid); } } /* namespace OT */ diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh new file mode 100644 index 000000000..faee8ad08 --- /dev/null +++ b/src/hb-decycler.hh @@ -0,0 +1,100 @@ +/* + * Copyright © 2025 Behdad Esfahbod + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Author(s): Behdad Esfahbod + */ + +#ifndef HB_DECYCLER_HH +#define HB_DECYCLER_HH + +#include "hb.hh" + +struct hb_decycler_node_t; + +struct hb_decycler_t +{ + friend struct hb_decycler_node_t; + + private: + hb_decycler_node_t *tortoise = nullptr; + hb_decycler_node_t *hare = nullptr; + bool tortoise_asleep = false; +}; + +struct hb_decycler_node_t +{ + hb_decycler_node_t (hb_decycler_t &decycler) + : decycler (decycler) + { + snapshot = decycler; + + if (!decycler.tortoise) + { + // First node. + decycler.tortoise = decycler.hare = this; + return; + } + + decycler.hare->next = this; + decycler.hare = this; + + if (decycler.tortoise_asleep) + { + // Wake up toirtoise. + decycler.tortoise_asleep = false; + // Time to move. + decycler.tortoise = decycler.tortoise->next; + } + else + { + // Put toirtoise to sleep. + decycler.tortoise_asleep = true; + } + } + + ~hb_decycler_node_t () + { + decycler = snapshot; + } + + bool visit (unsigned value_) + { + value = value_; + + if (decycler.tortoise == this) + return true; // First node; not a cycle. + + if (decycler.tortoise->value == value) + return false; // Cycle detected. + + return true; + } + + private: + hb_decycler_t &decycler; + hb_decycler_t snapshot; + hb_decycler_node_t *next = nullptr; + unsigned value = (unsigned) -1; +}; + +#endif /* HB_DECYCLER_HH */ diff --git a/src/meson.build b/src/meson.build index b9daabf01..00132b011 100644 --- a/src/meson.build +++ b/src/meson.build @@ -43,6 +43,7 @@ hb_base_sources = files( 'hb-common.cc', 'hb-config.hh', 'hb-debug.hh', + 'hb-decycler.hh', 'hb-dispatch.hh', 'hb-draw.cc', 'hb-draw.hh', From d5faabe7ea3f4025dbe6d76d8f99ad7d1b2fdb23 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 09:47:58 -0700 Subject: [PATCH 02/15] [decycler] Add test --- src/meson.build | 1 + src/test-decycler.cc | 88 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 src/test-decycler.cc diff --git a/src/meson.build b/src/meson.build index 00132b011..8e40e0498 100644 --- a/src/meson.build +++ b/src/meson.build @@ -726,6 +726,7 @@ if get_option('tests').enabled() 'test-bimap': ['test-bimap.cc', 'hb-static.cc'], 'test-cff': ['test-cff.cc', 'hb-static.cc'], 'test-classdef-graph': ['graph/test-classdef-graph.cc', 'hb-static.cc', 'graph/gsubgpos-context.cc'], + 'test-decycler': ['test-decycler.cc', 'hb-static.cc'], 'test-iter': ['test-iter.cc', 'hb-static.cc'], 'test-machinery': ['test-machinery.cc', 'hb-static.cc'], 'test-map': ['test-map.cc', 'hb-static.cc'], diff --git a/src/test-decycler.cc b/src/test-decycler.cc new file mode 100644 index 000000000..b6fa0f747 --- /dev/null +++ b/src/test-decycler.cc @@ -0,0 +1,88 @@ +/* + * Copyright © 2025 Behdad Esfahbod + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Author(s): Behdad Esfahbod + */ + +#include "hb.hh" +#include "hb-decycler.hh" + +static void +tree_recurse (unsigned value, + unsigned max_value, + hb_decycler_t &decycler) +{ + if (value >= max_value) + return; + + hb_decycler_node_t node (decycler); + + bool ret = node.visit (value); + assert (ret); + + tree_recurse (value * 2 + 1, max_value, decycler); + tree_recurse (value * 2 + 2, max_value, decycler); +} + +static void +test_tree () +{ + hb_decycler_t decycler; + tree_recurse (0, 64, decycler); +} + +static void +cycle_recurse (signed value, + signed cycle_length, + hb_decycler_t &decycler) +{ + assert (cycle_length > 0); + + hb_decycler_node_t node (decycler); + + if (!node.visit (value)) + return; + + if (value >= cycle_length) + value = value % cycle_length; + + cycle_recurse (value + 1, cycle_length, decycler); +} + +static void +test_cycle () +{ + hb_decycler_t decycler; + cycle_recurse (2, 3, decycler); + cycle_recurse (-20, 8, decycler); +} + +int +main (int argc, char **argv) +{ + test_tree (); + test_cycle (); + + return 0; +} + From a0f83e783f3eba1ec1edf777b031fa5b01358945 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 09:55:12 -0700 Subject: [PATCH 03/15] [decycler] Reduce stack use 48bytes -> 40bytes per node. --- src/hb-decycler.hh | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index faee8ad08..368cc37ae 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -38,7 +38,6 @@ struct hb_decycler_t private: hb_decycler_node_t *tortoise = nullptr; hb_decycler_node_t *hare = nullptr; - bool tortoise_asleep = false; }; struct hb_decycler_node_t @@ -55,21 +54,12 @@ struct hb_decycler_node_t return; } + tortoise_asleep = !decycler.hare->tortoise_asleep; decycler.hare->next = this; decycler.hare = this; - if (decycler.tortoise_asleep) - { - // Wake up toirtoise. - decycler.tortoise_asleep = false; - // Time to move. - decycler.tortoise = decycler.tortoise->next; - } - else - { - // Put toirtoise to sleep. - decycler.tortoise_asleep = true; - } + if (!tortoise_asleep) + decycler.tortoise = decycler.tortoise->next; // Time to move. } ~hb_decycler_node_t () @@ -95,6 +85,7 @@ struct hb_decycler_node_t hb_decycler_t snapshot; hb_decycler_node_t *next = nullptr; unsigned value = (unsigned) -1; + bool tortoise_asleep = false; }; #endif /* HB_DECYCLER_HH */ From 4335e49a029f6233da093d8a56a889a87648fa4e Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 10:33:49 -0700 Subject: [PATCH 04/15] [VARC] Port to hb-decycler-t 5% faster on varc-hanzi test case. --- src/OT/Var/VARC/VARC.cc | 20 +++++++++----------- src/OT/Var/VARC/VARC.hh | 9 +++++---- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/OT/Var/VARC/VARC.cc b/src/OT/Var/VARC/VARC.cc index 26a052705..9414c5a2b 100644 --- a/src/OT/Var/VARC/VARC.cc +++ b/src/OT/Var/VARC/VARC.cc @@ -134,7 +134,7 @@ VarComponent::get_path_at (hb_font_t *font, hb_array_t coords, hb_transform_t total_transform, hb_ubytes_t total_record, - hb_set_t *visited, + hb_decycler_t *decycler, signed *edges_left, signed depth_left, VarRegionList::cache_t *cache) const @@ -319,7 +319,7 @@ VarComponent::get_path_at (hb_font_t *font, VARC.get_path_at (font, gid, draw_session, component_coords, total_transform, parent_gid, - visited, edges_left, depth_left - 1); + decycler, edges_left, depth_left - 1); } #undef PROCESS_TRANSFORM_COMPONENTS @@ -335,13 +335,13 @@ VARC::get_path_at (hb_font_t *font, hb_array_t coords, hb_transform_t transform, hb_codepoint_t parent_glyph, - hb_set_t *visited, + hb_decycler_t *decycler, signed *edges_left, signed depth_left) const { - hb_set_t stack_set; - if (visited == nullptr) - visited = &stack_set; + hb_decycler_t stack_decycler; + if (decycler == nullptr) + decycler = &stack_decycler; signed stack_edges = HB_MAX_GRAPH_EDGE_COUNT; if (edges_left == nullptr) edges_left = &stack_edges; @@ -377,9 +377,9 @@ VARC::get_path_at (hb_font_t *font, return true; (*edges_left)--; - if (visited->has (glyph) || visited->in_error ()) + hb_decycler_node_t node (*decycler); + if (unlikely (!node.visit (glyph))) return true; - visited->add (glyph); hb_ubytes_t record = (this+glyphRecords)[idx]; @@ -392,13 +392,11 @@ VARC::get_path_at (hb_font_t *font, VarCompositeGlyph::get_path_at (font, glyph, draw_session, coords, transform, record, - visited, edges_left, depth_left, + decycler, edges_left, depth_left, cache); (this+varStore).destroy_cache (cache); - visited->del (glyph); - return true; } diff --git a/src/OT/Var/VARC/VARC.hh b/src/OT/Var/VARC/VARC.hh index d6e7e2c84..a1d0f43e0 100644 --- a/src/OT/Var/VARC/VARC.hh +++ b/src/OT/Var/VARC/VARC.hh @@ -1,6 +1,7 @@ #ifndef OT_VAR_VARC_VARC_HH #define OT_VAR_VARC_VARC_HH +#include "../../../hb-decycler.hh" #include "../../../hb-geometry.hh" #include "../../../hb-ot-layout-common.hh" #include "../../../hb-ot-glyf-table.hh" @@ -49,7 +50,7 @@ struct VarComponent hb_array_t coords, hb_transform_t transform, hb_ubytes_t record, - hb_set_t *visited, + hb_decycler_t *decycler, signed *edges_left, signed depth_left, VarRegionList::cache_t *cache = nullptr) const; @@ -64,7 +65,7 @@ struct VarCompositeGlyph hb_array_t coords, hb_transform_t transform, hb_ubytes_t record, - hb_set_t *visited, + hb_decycler_t *decycler, signed *edges_left, signed depth_left, VarRegionList::cache_t *cache = nullptr) @@ -75,7 +76,7 @@ struct VarCompositeGlyph record = comp.get_path_at (font, glyph, draw_session, coords, transform, record, - visited, edges_left, depth_left, cache); + decycler, edges_left, depth_left, cache); } } }; @@ -95,7 +96,7 @@ struct VARC hb_array_t coords, hb_transform_t transform = HB_TRANSFORM_IDENTITY, hb_codepoint_t parent_glyph = HB_CODEPOINT_INVALID, - hb_set_t *visited = nullptr, + hb_decycler_t *decycler = nullptr, signed *edges_left = nullptr, signed depth_left = HB_MAX_NESTING_LEVEL) const; From 0667ceae8756eeba13e6c941a9cbe8afa1507003 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 10:37:21 -0700 Subject: [PATCH 05/15] [VARC] Reduce stack use --- src/OT/Var/VARC/VARC.cc | 7 ------- src/OT/Var/VARC/VARC.hh | 24 ++++++++++++++++++------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/OT/Var/VARC/VARC.cc b/src/OT/Var/VARC/VARC.cc index 9414c5a2b..d15073f75 100644 --- a/src/OT/Var/VARC/VARC.cc +++ b/src/OT/Var/VARC/VARC.cc @@ -339,13 +339,6 @@ VARC::get_path_at (hb_font_t *font, signed *edges_left, signed depth_left) const { - hb_decycler_t stack_decycler; - if (decycler == nullptr) - decycler = &stack_decycler; - signed stack_edges = HB_MAX_GRAPH_EDGE_COUNT; - if (edges_left == nullptr) - edges_left = &stack_edges; - // Don't recurse on the same glyph. unsigned idx = glyph == parent_glyph ? NOT_COVERED : diff --git a/src/OT/Var/VARC/VARC.hh b/src/OT/Var/VARC/VARC.hh index a1d0f43e0..0772d2864 100644 --- a/src/OT/Var/VARC/VARC.hh +++ b/src/OT/Var/VARC/VARC.hh @@ -94,15 +94,27 @@ struct VARC hb_codepoint_t glyph, hb_draw_session_t &draw_session, hb_array_t coords, - hb_transform_t transform = HB_TRANSFORM_IDENTITY, - hb_codepoint_t parent_glyph = HB_CODEPOINT_INVALID, - hb_decycler_t *decycler = nullptr, - signed *edges_left = nullptr, - signed depth_left = HB_MAX_NESTING_LEVEL) const; + hb_transform_t transform, + hb_codepoint_t parent_glyph, + hb_decycler_t *decycler, + signed *edges_left, + signed depth_left) const; bool get_path (hb_font_t *font, hb_codepoint_t gid, hb_draw_session_t &draw_session) const - { return get_path_at (font, gid, draw_session, hb_array (font->coords, font->num_coords)); } + { + hb_decycler_t decycler; + signed edges = HB_MAX_GRAPH_EDGE_COUNT; + + return get_path_at (font, + gid, + draw_session, + hb_array (font->coords, font->num_coords), + HB_TRANSFORM_IDENTITY, + HB_CODEPOINT_INVALID, + &decycler, + &edges, + HB_MAX_NESTING_LEVEL); } bool paint_glyph (hb_font_t *font, hb_codepoint_t gid, hb_paint_funcs_t *funcs, void *data, hb_color_t foreground) const { From 3cb49717299aba61a60d3ff5e8d7bca915c972ca Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 10:54:11 -0700 Subject: [PATCH 06/15] [decycler] Add some documentation --- src/hb-decycler.hh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index 368cc37ae..f4f8888b8 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -29,6 +29,32 @@ #include "hb.hh" +/* + * hb_decycler_t is an efficient cycle detector for graph traversal. + * It's a simple tortoise-and-hare algorithm with a twist: it's + * designed to detect cycles while traversing a graph in a DFS manner, + * instead of just a linked list. + * + * For Floyd's tortoise and hare algorithm, see: + * https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_tortoise_and_hare + * + * Like Floyd's algorithm, hb_decycler_t is O(n) in the number of nodes + * in the graph. Unlike Floyd's algorithm, hb_decycler_t is designed + * to be used in a DFS traversal, where the graph is not a simple + * linked list, but a tree with cycles. The decycler works by creating + * an implicit linked-list on the stack, of the path from the root to + * the current node, and apply Floyd's algorithm on that list as it goes. + * + * The decycler is malloc-free, and as such, much faster to use than a + * hb_set_t or hb_map_t equivalent. + * + * The decycler detects cycles in the graph *eventually*, not *immediately*. + * That is, it may not detect a cycle until the cycle is fully traversed, + * even multiple times. See Floyd's algorithm analysis for details. + * + * For usage examples see test-decycler.cc. + */ + struct hb_decycler_node_t; struct hb_decycler_t From 2bdf9850225225c65c92b4bf99f97696894a20e9 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 12:14:42 -0700 Subject: [PATCH 07/15] [decycler] Add alternative way of using it to tests --- src/test-decycler.cc | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/test-decycler.cc b/src/test-decycler.cc index b6fa0f747..b09faf48d 100644 --- a/src/test-decycler.cc +++ b/src/test-decycler.cc @@ -28,9 +28,9 @@ #include "hb-decycler.hh" static void -tree_recurse (unsigned value, - unsigned max_value, - hb_decycler_t &decycler) +tree_recurse_binary (unsigned value, + unsigned max_value, + hb_decycler_t &decycler) { if (value >= max_value) return; @@ -40,15 +40,41 @@ tree_recurse (unsigned value, bool ret = node.visit (value); assert (ret); - tree_recurse (value * 2 + 1, max_value, decycler); - tree_recurse (value * 2 + 2, max_value, decycler); + tree_recurse_binary (value * 2 + 1, max_value, decycler); + tree_recurse_binary (value * 2 + 2, max_value, decycler); +} + +static void +tree_recurse_tertiary (unsigned value, + unsigned max_value, + hb_decycler_t &decycler) +{ + /* This function implements an alternative way to use the + * decycler. It checks for each node before visiting it. + * It demonstrates reusing a node for multiple visits. */ + + if (value >= max_value) + return; + + hb_decycler_node_t node (decycler); + + value *= 3; + + for (unsigned i = 1; i <= 3; i++) + { + bool ret = node.visit (value + i); + assert (ret); + + tree_recurse_tertiary (value + i, max_value, decycler); + } } static void test_tree () { hb_decycler_t decycler; - tree_recurse (0, 64, decycler); + tree_recurse_binary (0, 64, decycler); + tree_recurse_tertiary (0, 1000, decycler); } static void From bedc8d93735c004b9ec4b440374c1bca251fab5b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 12:30:18 -0700 Subject: [PATCH 08/15] [decycler] Document algorithm --- src/hb-decycler.hh | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index f4f8888b8..dfa596224 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -41,9 +41,12 @@ * Like Floyd's algorithm, hb_decycler_t is O(n) in the number of nodes * in the graph. Unlike Floyd's algorithm, hb_decycler_t is designed * to be used in a DFS traversal, where the graph is not a simple - * linked list, but a tree with cycles. The decycler works by creating - * an implicit linked-list on the stack, of the path from the root to - * the current node, and apply Floyd's algorithm on that list as it goes. + * linked list, but a tree with cycles. Like Floyd's algorithm, it is + * constant-memory (just two pointers). + * + * The decycler works by creating an implicit linked-list on the stack, + * of the path from the root to the current node, and apply Floyd's + * algorithm on that list as it goes. * * The decycler is malloc-free, and as such, much faster to use than a * hb_set_t or hb_map_t equivalent. @@ -52,6 +55,24 @@ * That is, it may not detect a cycle until the cycle is fully traversed, * even multiple times. See Floyd's algorithm analysis for details. * + * There are three method's: + * + * - hb_decycler_node_t() constructor: Creates a new node in the traversal. + * The constructor takes a reference to the decycler object and takes a + * snapshot of it, and advances the hare pointer, and for every other + * descent, advances the tortoise pointer. + * + * - ~hb_decycler_node_t() destructor: Restores the decycler object to the + * snapshot taken in the constructor, effectively removing the node from + * the traversal path. + * + * - bool visit(unsigned value): Called on every node in the graph. Returns + * true if the node is not part of a cycle, and false if it is. The value + * parameter is used to detect cycles. It's the caller's responsibility + * to ensure that the value is unique for each node in the graph. + * The cycle detection is as simple as comparing the value to the value + * held by the tortoise pointer, which is the Floyd's algorithm. + * * For usage examples see test-decycler.cc. */ From c7fc03a33acec9d108757bf6a4ac0311b2626483 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 16 Feb 2025 14:43:09 -0700 Subject: [PATCH 09/15] [ft-colr] Use hb_decycler_t --- src/hb-ft-colr.hh | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/hb-ft-colr.hh b/src/hb-ft-colr.hh index b9c39eda4..aa515f564 100644 --- a/src/hb-ft-colr.hh +++ b/src/hb-ft-colr.hh @@ -27,6 +27,7 @@ #include "hb.hh" +#include "hb-decycler.hh" #include "hb-paint-extents.hh" #include FT_COLOR_H @@ -105,8 +106,8 @@ struct hb_ft_paint_context_t FT_Color *palette; unsigned palette_index; hb_color_t foreground; - hb_map_t current_glyphs; - hb_map_t current_layers; + hb_decycler_t glyphs_decycler; + hb_decycler_t layers_decycler; int depth_left = HB_MAX_NESTING_LEVEL; int edge_count = HB_MAX_GRAPH_EDGE_COUNT; }; @@ -218,6 +219,7 @@ _hb_ft_paint (hb_ft_paint_context_t *c, case FT_COLR_PAINTFORMAT_COLR_LAYERS: { FT_OpaquePaint other_paint = {0}; + hb_decycler_node_t node (c->layers_decycler); while (FT_Get_Paint_Layers (ft_face, &paint.u.colr_layers.layer_iterator, &other_paint)) @@ -226,16 +228,12 @@ _hb_ft_paint (hb_ft_paint_context_t *c, // for cycle detection. unsigned i = (unsigned) (uintptr_t) other_paint.p; - if (unlikely (c->current_layers.has (i))) + if (unlikely (!node.visit (i))) continue; - c->current_layers.add (i); - c->funcs->push_group (c->data); c->recurse (other_paint); c->funcs->pop_group (c->data, HB_PAINT_COMPOSITE_MODE_SRC_OVER); - - c->current_layers.del (i); } } break; @@ -335,18 +333,16 @@ _hb_ft_paint (hb_ft_paint_context_t *c, { hb_codepoint_t gid = paint.u.colr_glyph.glyphID; - if (unlikely (c->current_glyphs.has (gid))) + hb_decycler_node_t node (c->glyphs_decycler); + if (unlikely (!node.visit (gid))) return; - c->current_glyphs.add (gid); - c->funcs->push_inverse_root_transform (c->data, c->font); c->ft_font->lock.unlock (); if (c->funcs->color_glyph (c->data, gid, c->font)) { c->ft_font->lock.lock (); c->funcs->pop_transform (c->data); - c->current_glyphs.del (gid); return; } c->ft_font->lock.lock (); @@ -382,8 +378,6 @@ _hb_ft_paint (hb_ft_paint_context_t *c, if (has_clip_box) c->funcs->pop_clip (c->data); - - c->current_glyphs.del (gid); } } break; @@ -508,7 +502,8 @@ hb_ft_paint_glyph_colr (hb_font_t *font, hb_ft_paint_context_t c (ft_font, font, paint_funcs, paint_data, palette, palette_index, foreground); - c.current_glyphs.add (gid); + hb_decycler_node_t node (c.glyphs_decycler); + node.visit (gid); bool is_bounded = true; FT_ClipBox clip_box; @@ -532,7 +527,8 @@ hb_ft_paint_glyph_colr (hb_font_t *font, hb_ft_paint_context_t ce (ft_font, font, extents_funcs, &extents_data, palette, palette_index, foreground); - ce.current_glyphs.add (gid); + hb_decycler_node_t node2 (ce.glyphs_decycler); + node2.visit (gid); ce.funcs->push_root_transform (ce.data, font); ce.recurse (paint); ce.funcs->pop_transform (ce.data); From 5aea89b5c433f297d6252c7974325a1903f180eb Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 17 Feb 2025 14:32:23 -0700 Subject: [PATCH 10/15] [decycler] Don't leave a dangling pointer around Even if it was never accessed. --- src/hb-decycler.hh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index dfa596224..dd192022d 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -112,6 +112,8 @@ struct hb_decycler_node_t ~hb_decycler_node_t () { decycler = snapshot; + if (decycler.hare) + decycler.hare->next = nullptr; } bool visit (unsigned value_) From 646da80c41cb45799f97aa5c81cb0ff5f0f475f2 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 17 Feb 2025 14:49:06 -0700 Subject: [PATCH 11/15] [decycler] Reduce stack use Down from 5 pointers to 4. --- src/hb-decycler.hh | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index dd192022d..710daeb80 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -58,13 +58,12 @@ * There are three method's: * * - hb_decycler_node_t() constructor: Creates a new node in the traversal. - * The constructor takes a reference to the decycler object and takes a - * snapshot of it, and advances the hare pointer, and for every other - * descent, advances the tortoise pointer. + * The constructor takes a reference to the decycler object and inserts + * itself as the latest node in the traversal path, by advancing the hare + * pointer, and for every other descent, advancing the tortoise pointer. * - * - ~hb_decycler_node_t() destructor: Restores the decycler object to the - * snapshot taken in the constructor, effectively removing the node from - * the traversal path. + * - ~hb_decycler_node_t() destructor: Restores the decycler object to its + * previous state by removing the node from the traversal path. * * - bool visit(unsigned value): Called on every node in the graph. Returns * true if the node is not part of a cycle, and false if it is. The value @@ -92,8 +91,6 @@ struct hb_decycler_node_t hb_decycler_node_t (hb_decycler_t &decycler) : decycler (decycler) { - snapshot = decycler; - if (!decycler.tortoise) { // First node. @@ -102,6 +99,7 @@ struct hb_decycler_node_t } tortoise_asleep = !decycler.hare->tortoise_asleep; + this->prev = decycler.hare; decycler.hare->next = this; decycler.hare = this; @@ -111,9 +109,13 @@ struct hb_decycler_node_t ~hb_decycler_node_t () { - decycler = snapshot; - if (decycler.hare) - decycler.hare->next = nullptr; + assert (decycler.hare == this); + decycler.hare = prev; + if (prev) + prev->next = nullptr; + + if (!tortoise_asleep) + decycler.tortoise = decycler.tortoise->prev; } bool visit (unsigned value_) @@ -131,8 +133,8 @@ struct hb_decycler_node_t private: hb_decycler_t &decycler; - hb_decycler_t snapshot; hb_decycler_node_t *next = nullptr; + hb_decycler_node_t *prev = nullptr; unsigned value = (unsigned) -1; bool tortoise_asleep = false; }; From fb0e181a3e98760d86581f5da1a5e6ef78977738 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 17 Feb 2025 14:57:20 -0700 Subject: [PATCH 12/15] [decycler] Reduce stack use further Down to three pointers. Exercise for the reader to prove this is optimal. --- src/hb-decycler.hh | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index 710daeb80..364e8c464 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -55,6 +55,13 @@ * That is, it may not detect a cycle until the cycle is fully traversed, * even multiple times. See Floyd's algorithm analysis for details. * + * The implementation saves a pointer storage on the stack by combining + * this->u.decycler and this->u.next into a union. This is possible because + * at any point we only need one of those values. The invariant is that + * after construction, and before destruction, of a node, the u.decycler + * field is always valid. The u.next field is only valid when the node is + * in the traversal path, parent to another node. + * * There are three method's: * * - hb_decycler_node_t() constructor: Creates a new node in the traversal. @@ -89,8 +96,9 @@ struct hb_decycler_t struct hb_decycler_node_t { hb_decycler_node_t (hb_decycler_t &decycler) - : decycler (decycler) { + u.decycler = &decycler; + if (!decycler.tortoise) { // First node. @@ -100,19 +108,21 @@ struct hb_decycler_node_t tortoise_asleep = !decycler.hare->tortoise_asleep; this->prev = decycler.hare; - decycler.hare->next = this; + decycler.hare->u.next = this; decycler.hare = this; if (!tortoise_asleep) - decycler.tortoise = decycler.tortoise->next; // Time to move. + decycler.tortoise = decycler.tortoise->u.next; // Time to move. } ~hb_decycler_node_t () { + hb_decycler_t &decycler = *u.decycler; + assert (decycler.hare == this); decycler.hare = prev; if (prev) - prev->next = nullptr; + prev->u.decycler = &decycler; if (!tortoise_asleep) decycler.tortoise = decycler.tortoise->prev; @@ -122,6 +132,8 @@ struct hb_decycler_node_t { value = value_; + hb_decycler_t &decycler = *u.decycler; + if (decycler.tortoise == this) return true; // First node; not a cycle. @@ -132,8 +144,10 @@ struct hb_decycler_node_t } private: - hb_decycler_t &decycler; - hb_decycler_node_t *next = nullptr; + union { + hb_decycler_t *decycler; + hb_decycler_node_t *next; + } u = {nullptr}; hb_decycler_node_t *prev = nullptr; unsigned value = (unsigned) -1; bool tortoise_asleep = false; From 1c18646dd6d5a737e8b831dc5e8e8d6dda93a607 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 17 Feb 2025 15:06:27 -0700 Subject: [PATCH 13/15] [decycler] Reduce stack use, kinda Move the bool to the decycler from the node. The value can now become a full pointer size (next commit). --- src/hb-decycler.hh | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index 364e8c464..51cbaf03c 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -89,6 +89,7 @@ struct hb_decycler_t friend struct hb_decycler_node_t; private: + bool tortoise_asleep = true; hb_decycler_node_t *tortoise = nullptr; hb_decycler_node_t *hare = nullptr; }; @@ -99,33 +100,38 @@ struct hb_decycler_node_t { u.decycler = &decycler; + decycler.tortoise_asleep = !decycler.tortoise_asleep; + if (!decycler.tortoise) { // First node. decycler.tortoise = decycler.hare = this; return; } + if (!decycler.tortoise_asleep) + decycler.tortoise = decycler.tortoise->u.next; // Time to move. - tortoise_asleep = !decycler.hare->tortoise_asleep; this->prev = decycler.hare; decycler.hare->u.next = this; decycler.hare = this; - - if (!tortoise_asleep) - decycler.tortoise = decycler.tortoise->u.next; // Time to move. } ~hb_decycler_node_t () { hb_decycler_t &decycler = *u.decycler; + // Inverse of the constructor. + assert (decycler.hare == this); decycler.hare = prev; if (prev) prev->u.decycler = &decycler; - if (!tortoise_asleep) + assert (decycler.tortoise); + if (!decycler.tortoise_asleep) decycler.tortoise = decycler.tortoise->prev; + + decycler.tortoise_asleep = !decycler.tortoise_asleep; } bool visit (unsigned value_) @@ -150,7 +156,6 @@ struct hb_decycler_node_t } u = {nullptr}; hb_decycler_node_t *prev = nullptr; unsigned value = (unsigned) -1; - bool tortoise_asleep = false; }; #endif /* HB_DECYCLER_HH */ From c84e9b95beda05e20c93145dc14e7547fbd76f29 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Mon, 17 Feb 2025 15:08:03 -0700 Subject: [PATCH 14/15] [decycler] Change value type from unsigned to uintptr_t Since the node struct is gonna be 3*sizeof(void*) bytes anyway, change value type to use the full space available. --- src/hb-decycler.hh | 6 +++--- src/hb-ft-colr.hh | 4 +--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/hb-decycler.hh b/src/hb-decycler.hh index 51cbaf03c..e944f350f 100644 --- a/src/hb-decycler.hh +++ b/src/hb-decycler.hh @@ -72,7 +72,7 @@ * - ~hb_decycler_node_t() destructor: Restores the decycler object to its * previous state by removing the node from the traversal path. * - * - bool visit(unsigned value): Called on every node in the graph. Returns + * - bool visit(uintptr_t value): Called on every node in the graph. Returns * true if the node is not part of a cycle, and false if it is. The value * parameter is used to detect cycles. It's the caller's responsibility * to ensure that the value is unique for each node in the graph. @@ -134,7 +134,7 @@ struct hb_decycler_node_t decycler.tortoise_asleep = !decycler.tortoise_asleep; } - bool visit (unsigned value_) + bool visit (uintptr_t value_) { value = value_; @@ -155,7 +155,7 @@ struct hb_decycler_node_t hb_decycler_node_t *next; } u = {nullptr}; hb_decycler_node_t *prev = nullptr; - unsigned value = (unsigned) -1; + uintptr_t value = 0; }; #endif /* HB_DECYCLER_HH */ diff --git a/src/hb-ft-colr.hh b/src/hb-ft-colr.hh index aa515f564..c96698369 100644 --- a/src/hb-ft-colr.hh +++ b/src/hb-ft-colr.hh @@ -226,9 +226,7 @@ _hb_ft_paint (hb_ft_paint_context_t *c, { // FreeType doesn't provide a way to get the layer index, so we use the pointer // for cycle detection. - unsigned i = (unsigned) (uintptr_t) other_paint.p; - - if (unlikely (!node.visit (i))) + if (unlikely (!node.visit ((uintptr_t) other_paint.p))) continue; c->funcs->push_group (c->data); From 0bb72eeed0116bd11c35c4874e7f6e3854d70191 Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Tue, 18 Feb 2025 00:44:29 -0700 Subject: [PATCH 15/15] [decycler] Turn off compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ../src/OT/Var/VARC/../../../hb-decycler.hh:108:25: warning: storing the address of local variable ‘node’ in ‘*&c_15(D)->layers_decycler.hb_decycler_t::tortoise’ [-Wdangling-pointer=] --- src/hb.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/hb.hh b/src/hb.hh index fe466fe1f..8e1dde8dd 100644 --- a/src/hb.hh +++ b/src/hb.hh @@ -131,6 +131,7 @@ #pragma GCC diagnostic ignored "-Wclass-memaccess" #pragma GCC diagnostic ignored "-Wcast-function-type-strict" // https://github.com/harfbuzz/harfbuzz/pull/3859#issuecomment-1295409126 #pragma GCC diagnostic ignored "-Wdangling-reference" // https://github.com/harfbuzz/harfbuzz/issues/4043 +#pragma GCC diagnostic ignored "-Wdangling-pointer" // Trigerred by hb_decycler_node_t(). #pragma GCC diagnostic ignored "-Wformat-nonliteral" #pragma GCC diagnostic ignored "-Wformat-zero-length" #pragma GCC diagnostic ignored "-Wmissing-field-initializers"