diff --git a/.github/workflows/fontations.yml b/.github/workflows/fontations.yml index ba8ff9aa3..371265b2d 100644 --- a/.github/workflows/fontations.yml +++ b/.github/workflows/fontations.yml @@ -56,7 +56,6 @@ jobs: -Dchafa=disabled \ -Dgraphite=enabled \ -Doptimization=2 \ - -Ddoc_tests=true \ -Dfontations=enabled - name: Build run: meson compile -Cbuild diff --git a/docs/harfbuzz-sections.txt b/docs/harfbuzz-sections.txt index 5eb940ee8..f09c85128 100644 --- a/docs/harfbuzz-sections.txt +++ b/docs/harfbuzz-sections.txt @@ -114,6 +114,9 @@ hb_glyph_position_t hb_buffer_content_type_t hb_buffer_flags_t hb_buffer_cluster_level_t +HB_BUFFER_CLUSTER_LEVEL_IS_CHARACTERS +HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES +HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE hb_segment_properties_t hb_buffer_serialize_format_t hb_buffer_serialize_flags_t diff --git a/src/hb-buffer-verify.cc b/src/hb-buffer-verify.cc index 345f08d26..76f821637 100644 --- a/src/hb-buffer-verify.cc +++ b/src/hb-buffer-verify.cc @@ -63,24 +63,25 @@ static bool buffer_verify_monotone (hb_buffer_t *buffer, hb_font_t *font) { - /* Check that clusters are monotone. */ - if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES || - buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) + if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) { - bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); - - unsigned int num_glyphs; - hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); - - for (unsigned int i = 1; i < num_glyphs; i++) - if (info[i-1].cluster != info[i].cluster && - (info[i-1].cluster < info[i].cluster) != is_forward) - { - buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone."); - return false; - } + /* Cannot perform this check without monotone clusters. */ + return true; } + bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); + + unsigned int num_glyphs; + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); + + for (unsigned int i = 1; i < num_glyphs; i++) + if (info[i-1].cluster != info[i].cluster && + (info[i-1].cluster < info[i].cluster) != is_forward) + { + buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone."); + return false; + } + return true; } @@ -92,8 +93,7 @@ buffer_verify_unsafe_to_break (hb_buffer_t *buffer, unsigned int num_features, const char * const *shapers) { - if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && - buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) + if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) { /* Cannot perform this check without monotone clusters. */ return true; @@ -207,8 +207,7 @@ buffer_verify_unsafe_to_concat (hb_buffer_t *buffer, unsigned int num_features, const char * const *shapers) { - if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && - buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) + if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) { /* Cannot perform this check without monotone clusters. */ return true; diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index d0c40664a..7fb95fe7e 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -518,7 +518,7 @@ void hb_buffer_t::merge_clusters_impl (unsigned int start, unsigned int end) { - if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS) + if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (cluster_level)) { unsafe_to_break (start, end); return; @@ -551,7 +551,7 @@ void hb_buffer_t::merge_out_clusters (unsigned int start, unsigned int end) { - if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS) + if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (cluster_level)) return; if (unlikely (end - start < 2)) diff --git a/src/hb-buffer.h b/src/hb-buffer.h index dd0edb9b7..14063ccd0 100644 --- a/src/hb-buffer.h +++ b/src/hb-buffer.h @@ -422,18 +422,34 @@ hb_buffer_get_flags (const hb_buffer_t *buffer); * @HB_BUFFER_CLUSTER_LEVEL_CHARACTERS: Don't group cluster values. * @HB_BUFFER_CLUSTER_LEVEL_DEFAULT: Default cluster level, * equal to @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES. - * + * @HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES: Only group clusters, but don't enforce monotone order. + * * Data type for holding HarfBuzz's clustering behavior options. The cluster level - * dictates one aspect of how HarfBuzz will treat non-base characters + * dictates one aspect of how HarfBuzz will treat non-base characters * during shaping. * * In @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES, non-base * characters are merged into the cluster of the base character that precedes them. + * There is also cluster merging every time the clusters will otherwise become non-monotone. * * In @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS, non-base characters are initially * assigned their own cluster values, which are not merged into preceding base * clusters. This allows HarfBuzz to perform additional operations like reorder - * sequences of adjacent marks. + * sequences of adjacent marks. The output is still monotone, but the cluster + * values are more granular. + * + * In @HB_BUFFER_CLUSTER_LEVEL_CHARACTERS, non-base characters are assigned their + * own cluster values, which are not merged into preceding base clusters. Moreover, + * the cluster values are not merged into monotone order. This is the most granular + * cluster level, and it is useful for clients that need to know the exact cluster + * values of each character, but is harder to use for clients, since clusters + * might appear in any order. + * + * In @HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES, non-base characters are merged into the + * cluster of the base character that precedes them. This is similar to the Unicode + * Grapheme Cluster algorithm, but it is not exactly the same. The output is + * not forced to be monotone. This is useful for clients that want to use HarfBuzz + * as a cheap implementation of the Unicode Grapheme Cluster algorithm. * * @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES is the default, because it maintains * backward compatibility with older versions of HarfBuzz. New client programs that @@ -446,9 +462,52 @@ typedef enum { HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES = 0, HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS = 1, HB_BUFFER_CLUSTER_LEVEL_CHARACTERS = 2, + HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES = 3, HB_BUFFER_CLUSTER_LEVEL_DEFAULT = HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES } hb_buffer_cluster_level_t; +/** + * HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE: + * @level: #hb_buffer_cluster_level_t to test + * + * Tests whether a cluster level groups cluster values into monotone order. + * Requires that the level be valid. + * + * XSince: REPLACEME + */ +#define HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE(level) \ + ((bool) ((1u << (unsigned) (level)) & \ + ((1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) | \ + (1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)))) + +/** + * HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES: + * @level: #hb_buffer_cluster_level_t to test + * + * Tests whether a cluster level groups cluster values by graphemes. Requires + * that the level be valid. + * + * XSince: REPLACEME + */ +#define HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES(level) \ + ((bool) ((1u << (unsigned) (level)) & \ + ((1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) | \ + (1u << HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES)))) + +/** + * HB_BUFFER_CLUSTER_LEVEL_IS_CHARACTERS + * @level: #hb_buffer_cluster_level_t to test + * + * Tests whether a cluster level does not group cluster values by graphemes. + * Requires that the level be valid. + * + * XSince: REPLACEME + */ +#define HB_BUFFER_CLUSTER_LEVEL_IS_CHARACTERS(level) \ + ((bool) ((1u << (unsigned) (level)) & \ + ((1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARCATERS) | \ + (1u << HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)))) + HB_EXTERN void hb_buffer_set_cluster_level (hb_buffer_t *buffer, hb_buffer_cluster_level_t cluster_level); diff --git a/src/hb-coretext-shape.cc b/src/hb-coretext-shape.cc index 0bb235f6d..2e029fe2b 100644 --- a/src/hb-coretext-shape.cc +++ b/src/hb-coretext-shape.cc @@ -646,7 +646,7 @@ _hb_coretext_shape (hb_shape_plan_t *shape_plan, * B1 M1 B2 M2, and B1-B2 form a ligature, M2's cluster will * continue pointing to B2 even though B2 was merged into B1's * cluster... */ - if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + if (HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES (buffer->cluster_level)) { hb_unicode_funcs_t *unicode = buffer->unicode; unsigned int count = buffer->len; @@ -1292,7 +1292,7 @@ resize_and_retry: * or the native OT backend, only that the cluster indices will be * monotonic in the output buffer. */ if (count > 1 && (status_or & kCTRunStatusNonMonotonic) && - buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_CHARACTERS) + HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level)) { hb_glyph_info_t *info = buffer->info; if (HB_DIRECTION_IS_FORWARD (buffer->props.direction)) diff --git a/src/hb-ot-layout.hh b/src/hb-ot-layout.hh index cf0307995..ddec57e06 100644 --- a/src/hb-ot-layout.hh +++ b/src/hb-ot-layout.hh @@ -387,6 +387,8 @@ _hb_grapheme_group_func (const hb_glyph_info_t& a HB_UNUSED, static inline void _hb_ot_layout_reverse_graphemes (hb_buffer_t *buffer) { + // MONOTONE_GRAPHEMES was already applied and is taken care of by _hb_grapheme_group_func. + // So we just check for MONOTONE_CHARACTERS here. buffer->reverse_groups (_hb_grapheme_group_func, buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS); } diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index 6eecc1a10..0469d5981 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -551,7 +551,7 @@ hb_form_clusters (hb_buffer_t *buffer) if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII)) return; - if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + if (HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES (buffer->cluster_level)) foreach_grapheme (buffer, start, end) buffer->merge_clusters (start, end); else @@ -609,7 +609,7 @@ hb_ensure_native_direction (hb_buffer_t *buffer) * Ogham fonts are supposed to be implemented BTT or not. Need to research that * first. */ if ((HB_DIRECTION_IS_HORIZONTAL (direction) && - direction != horiz_dir && horiz_dir != HB_DIRECTION_INVALID) || + direction != horiz_dir && HB_DIRECTION_IS_VALID (horiz_dir)) || (HB_DIRECTION_IS_VERTICAL (direction) && direction != HB_DIRECTION_TTB)) { diff --git a/src/hb-ot-shaper-hangul.cc b/src/hb-ot-shaper-hangul.cc index c90476bc4..50ea53272 100644 --- a/src/hb-ot-shaper-hangul.cc +++ b/src/hb-ot-shaper-hangul.cc @@ -298,8 +298,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED, end = start + 2; if (unlikely (!buffer->successful)) break; - if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) - buffer->merge_out_clusters (start, end); + buffer->merge_out_clusters (start, end); continue; } } @@ -372,8 +371,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED, if (i < end) info[i++].hangul_shaping_feature() = TJMO; - if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) - buffer->merge_out_clusters (start, end); + buffer->merge_out_clusters (start, end); continue; } else if ((!tindex && buffer->idx + 1 < count && isT (buffer->cur(+1).codepoint))) diff --git a/src/hb-ot-shaper-thai.cc b/src/hb-ot-shaper-thai.cc index 6cd67cde3..6124a2114 100644 --- a/src/hb-ot-shaper-thai.cc +++ b/src/hb-ot-shaper-thai.cc @@ -360,7 +360,7 @@ preprocess_text_thai (const hb_ot_shape_plan_t *plan, { /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the * previous cluster. */ - if (start && buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + if (start) buffer->merge_out_clusters (start - 1, end); } } diff --git a/test/shape/data/in-house/tests/cluster.tests b/test/shape/data/in-house/tests/cluster.tests index 2663e2362..544a561be 100644 --- a/test/shape/data/in-house/tests/cluster.tests +++ b/test/shape/data/in-house/tests/cluster.tests @@ -1,3 +1,5 @@ +# Our cluster-level=3 doesn't really test anything here. +../fonts/4fac3929fc3332834e93673780ec0fe94342d193.ttf;--cluster-level=3;U+0078,U+030A,U+0058,U+030A;[gid2=0+1083|gid3=1@-1132,-8+0|gid1=2+1200|gid3=3@-1190,349+0] ../fonts/4fac3929fc3332834e93673780ec0fe94342d193.ttf;--cluster-level=2;U+0078,U+030A,U+0058,U+030A;[gid2=0+1083|gid3=1@-1132,-8+0|gid1=2+1200|gid3=3@-1190,349+0] ../fonts/43ef465752be9af900745f72fe29cb853a1401a5.ttf;--cluster-level=1;U+05D4,U+05B7,U+05E9,U+05BC,U+05C1,U+05B8,U+05DE,U+05B4,U+05DD;[uni05DD=8+1359|uni05B4=7@111,0+0|uni05DE=6+1391|uni05B8=5+0|uni05BC=3+0|uni05C1=3+0|uni05E9=2+1451|uni05B7=1@28,0+0|uni05D4=0+1338] ../fonts/6f36d056bad6d478fc0bf7397bd52dc3bd197d5f.ttf;--cluster-level=1;U+099B,U+09CB,U+09C8,U+09C2,U+09CB,U+098C;[evowelsigninibeng=0+346|aivowelsignbeng=0+346|evowelsignbeng=0+346|chabeng=0+687|uuvowelsignlongbeng=0@-96,0+0|aavowelsignbeng=0+266|aavowelsignbeng=4+266|lvocalicbeng=5+639] diff --git a/util/shape-options.hh b/util/shape-options.hh index caad5a999..a663008ff 100644 --- a/util/shape-options.hh +++ b/util/shape-options.hh @@ -383,7 +383,7 @@ shape_options_t::add_options (option_parser_t *parser) 0, 0, G_OPTION_ARG_INT, &this->not_found_variation_selector_glyph, "Glyph value to replace not-found variation-selector characters with", nullptr}, {"utf8-clusters", 0, 0, G_OPTION_ARG_NONE, &this->utf8_clusters, "Use UTF8 byte indices, not char indices", nullptr}, - {"cluster-level", 0, 0, G_OPTION_ARG_INT, &this->cluster_level, "Cluster merging level (default: 0)", "0/1/2"}, + {"cluster-level", 0, 0, G_OPTION_ARG_INT, &this->cluster_level, "Cluster merging level (default: 0)", "0/1/2/3"}, {"normalize-glyphs",0, 0, G_OPTION_ARG_NONE, &this->normalize_glyphs, "Rearrange glyph clusters in nominal order", nullptr}, {"unsafe-to-concat",0, 0, G_OPTION_ARG_NONE, &this->unsafe_to_concat, "Produce unsafe-to-concat glyph flag", nullptr}, {"safe-to-insert-tatweel",0, 0, G_OPTION_ARG_NONE, &this->safe_to_insert_tatweel, "Produce safe-to-insert-tatweel glyph flag", nullptr},