diff --git a/src/hb-subset-plan.cc b/src/hb-subset-plan.cc index 11a40fccd..a56965246 100644 --- a/src/hb-subset-plan.cc +++ b/src/hb-subset-plan.cc @@ -29,6 +29,8 @@ #include "hb-map.hh" #include "hb-multimap.hh" #include "hb-set.hh" +#include "hb-subset.h" +#include "hb-unicode.h" #include "hb-ot-cmap-table.hh" #include "hb-ot-glyf-table.hh" @@ -209,15 +211,46 @@ _fill_unicode_and_glyph_map(hb_subset_plan_t *plan, _fill_unicode_and_glyph_map(plan, unicode_iterator, unicode_to_gid_for_iterator, unicode_to_gid_for_iterator); } +/* + * Finds additional unicode codepoints which are reachable from the input unicode set. + * Currently this adds in mirrored variants (needed for bidi) of any input unicodes. + */ +static hb_set_t +_unicode_closure (const hb_set_t* unicodes, bool bidi_closure) { + // TODO: we may want to also consider pulling in reachable unicode composition and decompositions. + // see: https://github.com/harfbuzz/harfbuzz/issues/2283 + hb_set_t out = *unicodes; + if (!bidi_closure) return out; + + if (out.is_inverted()) { + // don't closure inverted sets, they are asking to specifically exclude certain codepoints. + // otherwise everything is already included. + return out; + } + + auto unicode_funcs = hb_unicode_funcs_get_default (); + for (hb_codepoint_t cp : *unicodes) { + hb_codepoint_t mirror = hb_unicode_mirroring(unicode_funcs, cp); + if (unlikely (mirror != cp)) { + out.add(mirror); + } + } + + return out; +} + static void -_populate_unicodes_to_retain (const hb_set_t *unicodes, +_populate_unicodes_to_retain (const hb_set_t *unicodes_in, const hb_set_t *glyphs, hb_subset_plan_t *plan) { + hb_set_t unicodes = _unicode_closure(unicodes_in, + !(plan->flags & HB_SUBSET_FLAGS_NO_BIDI_CLOSURE)); + OT::cmap::accelerator_t cmap (plan->source); unsigned size_threshold = plan->source->get_num_glyphs (); - if (glyphs->is_empty () && unicodes->get_population () < size_threshold) + if (glyphs->is_empty () && unicodes.get_population () < size_threshold) { const hb_map_t* unicode_to_gid = nullptr; @@ -227,9 +260,9 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, // This is approach to collection is faster, but can only be used if glyphs // are not being explicitly added to the subset and the input unicodes set is // not excessively large (eg. an inverted set). - plan->unicode_to_new_gid_list.alloc (unicodes->get_population ()); + plan->unicode_to_new_gid_list.alloc (unicodes.get_population ()); if (!unicode_to_gid) { - _fill_unicode_and_glyph_map(plan, unicodes->iter(), [&] (hb_codepoint_t cp) { + _fill_unicode_and_glyph_map(plan, unicodes.iter(), [&] (hb_codepoint_t cp) { hb_codepoint_t gid; if (!cmap.get_nominal_glyph (cp, &gid)) { return HB_MAP_VALUE_INVALID; @@ -241,7 +274,7 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, // the map. This code is mostly duplicated from above to avoid doing // conditionals on the presence of the unicode_to_gid map each // iteration. - _fill_unicode_and_glyph_map(plan, unicodes->iter(), [&] (hb_codepoint_t cp) { + _fill_unicode_and_glyph_map(plan, unicodes.iter(), [&] (hb_codepoint_t cp) { return unicode_to_gid->get (cp); }); } @@ -258,7 +291,7 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, if (!plan->accelerator) { cmap.collect_mapping (&cmap_unicodes_storage, &unicode_glyphid_map_storage); - plan->unicode_to_new_gid_list.alloc (hb_min(unicodes->get_population () + plan->unicode_to_new_gid_list.alloc (hb_min(unicodes.get_population () + glyphs->get_population (), cmap_unicodes->get_population ())); } else { @@ -267,10 +300,10 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, } if (plan->accelerator && - unicodes->get_population () < cmap_unicodes->get_population () && + unicodes.get_population () < cmap_unicodes->get_population () && glyphs->get_population () < cmap_unicodes->get_population ()) { - plan->codepoint_to_glyph->alloc (unicodes->get_population () + glyphs->get_population ()); + plan->codepoint_to_glyph->alloc (unicodes.get_population () + glyphs->get_population ()); auto &gid_to_unicodes = plan->accelerator->gid_to_unicodes; @@ -285,7 +318,7 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, }); } - _fill_unicode_and_glyph_map(plan, unicodes->iter(), [&] (hb_codepoint_t cp) { + _fill_unicode_and_glyph_map(plan, unicodes.iter(), [&] (hb_codepoint_t cp) { /* Don't double-add entry. */ if (plan->codepoint_to_glyph->has (cp)) return HB_MAP_VALUE_INVALID; @@ -306,7 +339,7 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, { _fill_unicode_and_glyph_map(plan, hb_range(first, last + 1), [&] (hb_codepoint_t cp) { hb_codepoint_t gid = (*unicode_glyphid_map)[cp]; - if (!unicodes->has (cp) && !glyphs->has (gid)) + if (!unicodes.has (cp) && !glyphs->has (gid)) return HB_MAP_VALUE_INVALID; return gid; }, diff --git a/src/hb-subset.h b/src/hb-subset.h index 51bc7e793..1ce9c6934 100644 --- a/src/hb-subset.h +++ b/src/hb-subset.h @@ -71,10 +71,12 @@ typedef struct hb_subset_plan_t hb_subset_plan_t; * in the final subset. * @HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES: If set then the unicode ranges in * OS/2 will not be recalculated. - * @HB_SUBSET_FLAGS_NO_LAYOUT_CLOSURE: If set don't perform glyph closure on layout + * @HB_SUBSET_FLAGS_NO_LAYOUT_CLOSURE: If set do not perform glyph closure on layout * substitution rules (GSUB). Since: 7.2.0. * @HB_SUBSET_FLAGS_OPTIMIZE_IUP_DELTAS: If set perform IUP delta optimization on the * remaining gvar table's deltas. Since: 8.5.0 + * @HB_SUBSET_FLAGS_NO_BIDI_CLOSURE: If set do not pull mirrored versions of input +* codepoints into the subset. Since: REPLACEME * @HB_SUBSET_FLAGS_IFTB_REQUIREMENTS: If set enforce requirements on the output subset * to allow it to be used with incremental font transfer IFTB patches. Primarily, * this forces all outline data to use long (32 bit) offsets. Since: EXPERIMENTAL @@ -96,8 +98,9 @@ typedef enum { /*< flags >*/ HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES = 0x00000100u, HB_SUBSET_FLAGS_NO_LAYOUT_CLOSURE = 0x00000200u, HB_SUBSET_FLAGS_OPTIMIZE_IUP_DELTAS = 0x00000400u, + HB_SUBSET_FLAGS_NO_BIDI_CLOSURE = 0x00000800u, #ifdef HB_EXPERIMENTAL_API - HB_SUBSET_FLAGS_IFTB_REQUIREMENTS = 0x00000800u, + HB_SUBSET_FLAGS_IFTB_REQUIREMENTS = 0x00001000u, #endif } hb_subset_flags_t; diff --git a/test/subset/data/expected/bidi/Roboto-Regular.default.61,62,63,3C.ttf b/test/subset/data/expected/bidi/Roboto-Regular.default.61,62,63,3C.ttf new file mode 100644 index 000000000..743de85d5 Binary files /dev/null and b/test/subset/data/expected/bidi/Roboto-Regular.default.61,62,63,3C.ttf differ diff --git a/test/subset/data/expected/bidi/Roboto-Regular.default.61,62,63,3E.ttf b/test/subset/data/expected/bidi/Roboto-Regular.default.61,62,63,3E.ttf new file mode 100644 index 000000000..743de85d5 Binary files /dev/null and b/test/subset/data/expected/bidi/Roboto-Regular.default.61,62,63,3E.ttf differ diff --git a/test/subset/data/expected/bidi/Roboto-Regular.no_bidi_closure.61,62,63,3C.ttf b/test/subset/data/expected/bidi/Roboto-Regular.no_bidi_closure.61,62,63,3C.ttf new file mode 100644 index 000000000..3586ec85a Binary files /dev/null and b/test/subset/data/expected/bidi/Roboto-Regular.no_bidi_closure.61,62,63,3C.ttf differ diff --git a/test/subset/data/expected/bidi/Roboto-Regular.no_bidi_closure.61,62,63,3E.ttf b/test/subset/data/expected/bidi/Roboto-Regular.no_bidi_closure.61,62,63,3E.ttf new file mode 100644 index 000000000..0810670ef Binary files /dev/null and b/test/subset/data/expected/bidi/Roboto-Regular.no_bidi_closure.61,62,63,3E.ttf differ diff --git a/test/subset/data/profiles/layout-test-retain-gids.txt b/test/subset/data/profiles/layout-test-retain-gids.txt index 63c67eb97..4fe71eb7e 100644 --- a/test/subset/data/profiles/layout-test-retain-gids.txt +++ b/test/subset/data/profiles/layout-test-retain-gids.txt @@ -1,3 +1,4 @@ --layout-features=* --notdef-outline +--no-bidi-closure --retain-gids diff --git a/test/subset/data/profiles/layout-test.txt b/test/subset/data/profiles/layout-test.txt index d98633a00..895f69db5 100644 --- a/test/subset/data/profiles/layout-test.txt +++ b/test/subset/data/profiles/layout-test.txt @@ -1,2 +1,3 @@ --layout-features=* --notdef-outline +--no-bidi-closure diff --git a/test/subset/data/profiles/no_bidi_closure.txt b/test/subset/data/profiles/no_bidi_closure.txt new file mode 100644 index 000000000..31adb7199 --- /dev/null +++ b/test/subset/data/profiles/no_bidi_closure.txt @@ -0,0 +1 @@ +--no-bidi-closure diff --git a/test/subset/data/tests/bidi.tests b/test/subset/data/tests/bidi.tests new file mode 100644 index 000000000..fb5732871 --- /dev/null +++ b/test/subset/data/tests/bidi.tests @@ -0,0 +1,10 @@ +FONTS: +Roboto-Regular.ttf + +PROFILES: +default.txt +no_bidi_closure.txt + +SUBSETS: +abc< +abc> diff --git a/test/subset/meson.build b/test/subset/meson.build index 7037223c7..f87bab543 100644 --- a/test/subset/meson.build +++ b/test/subset/meson.build @@ -1,5 +1,6 @@ tests = [ 'basics', + 'bidi', 'cmap', 'cmap14', 'preprocess', diff --git a/util/hb-subset.cc b/util/hb-subset.cc index 431edda34..bdb71c332 100644 --- a/util/hb-subset.cc +++ b/util/hb-subset.cc @@ -985,6 +985,7 @@ subset_main_t::add_options () {"notdef-outline", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Keep the outline of \'.notdef\' glyph", nullptr}, {"no-prune-unicode-ranges", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Don't change the 'OS/2 ulUnicodeRange*' bits.", nullptr}, {"no-layout-closure", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Don't perform glyph closure for layout substitution (GSUB).", nullptr}, + {"no-bidi-closure", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Don't perform bidi closure (adding mirrored variants) for input codepoints.", nullptr}, {"glyph-names", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Keep PS glyph names in TT-flavored fonts. ", nullptr}, {"passthrough-tables", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, (gpointer) &set_flag, "Do not drop tables that the tool does not know how to subset.", nullptr}, {"preprocess-face", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &this->preprocess,