mirror of
https://github.com/harfbuzz/harfbuzz.git
synced 2025-04-05 13:35:06 +00:00
Merge pull request #5069 from harfbuzz/cluster-level-graphemes
[buffer] Add HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES
This commit is contained in:
commit
9c0ac9aec4
12 changed files with 97 additions and 35 deletions
1
.github/workflows/fontations.yml
vendored
1
.github/workflows/fontations.yml
vendored
|
@ -56,7 +56,6 @@ jobs:
|
|||
-Dchafa=disabled \
|
||||
-Dgraphite=enabled \
|
||||
-Doptimization=2 \
|
||||
-Ddoc_tests=true \
|
||||
-Dfontations=enabled
|
||||
- name: Build
|
||||
run: meson compile -Cbuild
|
||||
|
|
|
@ -114,6 +114,9 @@ hb_glyph_position_t
|
|||
hb_buffer_content_type_t
|
||||
hb_buffer_flags_t
|
||||
hb_buffer_cluster_level_t
|
||||
HB_BUFFER_CLUSTER_LEVEL_IS_CHARACTERS
|
||||
HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES
|
||||
HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE
|
||||
hb_segment_properties_t
|
||||
hb_buffer_serialize_format_t
|
||||
hb_buffer_serialize_flags_t
|
||||
|
|
|
@ -63,24 +63,25 @@ static bool
|
|||
buffer_verify_monotone (hb_buffer_t *buffer,
|
||||
hb_font_t *font)
|
||||
{
|
||||
/* Check that clusters are monotone. */
|
||||
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES ||
|
||||
buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
|
||||
if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level))
|
||||
{
|
||||
bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer));
|
||||
|
||||
unsigned int num_glyphs;
|
||||
hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs);
|
||||
|
||||
for (unsigned int i = 1; i < num_glyphs; i++)
|
||||
if (info[i-1].cluster != info[i].cluster &&
|
||||
(info[i-1].cluster < info[i].cluster) != is_forward)
|
||||
{
|
||||
buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone.");
|
||||
return false;
|
||||
}
|
||||
/* Cannot perform this check without monotone clusters. */
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer));
|
||||
|
||||
unsigned int num_glyphs;
|
||||
hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs);
|
||||
|
||||
for (unsigned int i = 1; i < num_glyphs; i++)
|
||||
if (info[i-1].cluster != info[i].cluster &&
|
||||
(info[i-1].cluster < info[i].cluster) != is_forward)
|
||||
{
|
||||
buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone.");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -92,8 +93,7 @@ buffer_verify_unsafe_to_break (hb_buffer_t *buffer,
|
|||
unsigned int num_features,
|
||||
const char * const *shapers)
|
||||
{
|
||||
if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES &&
|
||||
buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
|
||||
if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level))
|
||||
{
|
||||
/* Cannot perform this check without monotone clusters. */
|
||||
return true;
|
||||
|
@ -207,8 +207,7 @@ buffer_verify_unsafe_to_concat (hb_buffer_t *buffer,
|
|||
unsigned int num_features,
|
||||
const char * const *shapers)
|
||||
{
|
||||
if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES &&
|
||||
buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS)
|
||||
if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level))
|
||||
{
|
||||
/* Cannot perform this check without monotone clusters. */
|
||||
return true;
|
||||
|
|
|
@ -518,7 +518,7 @@ void
|
|||
hb_buffer_t::merge_clusters_impl (unsigned int start,
|
||||
unsigned int end)
|
||||
{
|
||||
if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
|
||||
if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (cluster_level))
|
||||
{
|
||||
unsafe_to_break (start, end);
|
||||
return;
|
||||
|
@ -551,7 +551,7 @@ void
|
|||
hb_buffer_t::merge_out_clusters (unsigned int start,
|
||||
unsigned int end)
|
||||
{
|
||||
if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
|
||||
if (!HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (cluster_level))
|
||||
return;
|
||||
|
||||
if (unlikely (end - start < 2))
|
||||
|
|
|
@ -422,18 +422,34 @@ hb_buffer_get_flags (const hb_buffer_t *buffer);
|
|||
* @HB_BUFFER_CLUSTER_LEVEL_CHARACTERS: Don't group cluster values.
|
||||
* @HB_BUFFER_CLUSTER_LEVEL_DEFAULT: Default cluster level,
|
||||
* equal to @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES.
|
||||
*
|
||||
* @HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES: Only group clusters, but don't enforce monotone order.
|
||||
*
|
||||
* Data type for holding HarfBuzz's clustering behavior options. The cluster level
|
||||
* dictates one aspect of how HarfBuzz will treat non-base characters
|
||||
* dictates one aspect of how HarfBuzz will treat non-base characters
|
||||
* during shaping.
|
||||
*
|
||||
* In @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES, non-base
|
||||
* characters are merged into the cluster of the base character that precedes them.
|
||||
* There is also cluster merging every time the clusters will otherwise become non-monotone.
|
||||
*
|
||||
* In @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS, non-base characters are initially
|
||||
* assigned their own cluster values, which are not merged into preceding base
|
||||
* clusters. This allows HarfBuzz to perform additional operations like reorder
|
||||
* sequences of adjacent marks.
|
||||
* sequences of adjacent marks. The output is still monotone, but the cluster
|
||||
* values are more granular.
|
||||
*
|
||||
* In @HB_BUFFER_CLUSTER_LEVEL_CHARACTERS, non-base characters are assigned their
|
||||
* own cluster values, which are not merged into preceding base clusters. Moreover,
|
||||
* the cluster values are not merged into monotone order. This is the most granular
|
||||
* cluster level, and it is useful for clients that need to know the exact cluster
|
||||
* values of each character, but is harder to use for clients, since clusters
|
||||
* might appear in any order.
|
||||
*
|
||||
* In @HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES, non-base characters are merged into the
|
||||
* cluster of the base character that precedes them. This is similar to the Unicode
|
||||
* Grapheme Cluster algorithm, but it is not exactly the same. The output is
|
||||
* not forced to be monotone. This is useful for clients that want to use HarfBuzz
|
||||
* as a cheap implementation of the Unicode Grapheme Cluster algorithm.
|
||||
*
|
||||
* @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES is the default, because it maintains
|
||||
* backward compatibility with older versions of HarfBuzz. New client programs that
|
||||
|
@ -446,9 +462,52 @@ typedef enum {
|
|||
HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES = 0,
|
||||
HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS = 1,
|
||||
HB_BUFFER_CLUSTER_LEVEL_CHARACTERS = 2,
|
||||
HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES = 3,
|
||||
HB_BUFFER_CLUSTER_LEVEL_DEFAULT = HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES
|
||||
} hb_buffer_cluster_level_t;
|
||||
|
||||
/**
|
||||
* HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE:
|
||||
* @level: #hb_buffer_cluster_level_t to test
|
||||
*
|
||||
* Tests whether a cluster level groups cluster values into monotone order.
|
||||
* Requires that the level be valid.
|
||||
*
|
||||
* XSince: REPLACEME
|
||||
*/
|
||||
#define HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE(level) \
|
||||
((bool) ((1u << (unsigned) (level)) & \
|
||||
((1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) | \
|
||||
(1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS))))
|
||||
|
||||
/**
|
||||
* HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES:
|
||||
* @level: #hb_buffer_cluster_level_t to test
|
||||
*
|
||||
* Tests whether a cluster level groups cluster values by graphemes. Requires
|
||||
* that the level be valid.
|
||||
*
|
||||
* XSince: REPLACEME
|
||||
*/
|
||||
#define HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES(level) \
|
||||
((bool) ((1u << (unsigned) (level)) & \
|
||||
((1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) | \
|
||||
(1u << HB_BUFFER_CLUSTER_LEVEL_GRAPHEMES))))
|
||||
|
||||
/**
|
||||
* HB_BUFFER_CLUSTER_LEVEL_IS_CHARACTERS
|
||||
* @level: #hb_buffer_cluster_level_t to test
|
||||
*
|
||||
* Tests whether a cluster level does not group cluster values by graphemes.
|
||||
* Requires that the level be valid.
|
||||
*
|
||||
* XSince: REPLACEME
|
||||
*/
|
||||
#define HB_BUFFER_CLUSTER_LEVEL_IS_CHARACTERS(level) \
|
||||
((bool) ((1u << (unsigned) (level)) & \
|
||||
((1u << HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARCATERS) | \
|
||||
(1u << HB_BUFFER_CLUSTER_LEVEL_CHARACTERS))))
|
||||
|
||||
HB_EXTERN void
|
||||
hb_buffer_set_cluster_level (hb_buffer_t *buffer,
|
||||
hb_buffer_cluster_level_t cluster_level);
|
||||
|
|
|
@ -646,7 +646,7 @@ _hb_coretext_shape (hb_shape_plan_t *shape_plan,
|
|||
* B1 M1 B2 M2, and B1-B2 form a ligature, M2's cluster will
|
||||
* continue pointing to B2 even though B2 was merged into B1's
|
||||
* cluster... */
|
||||
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
|
||||
if (HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES (buffer->cluster_level))
|
||||
{
|
||||
hb_unicode_funcs_t *unicode = buffer->unicode;
|
||||
unsigned int count = buffer->len;
|
||||
|
@ -1292,7 +1292,7 @@ resize_and_retry:
|
|||
* or the native OT backend, only that the cluster indices will be
|
||||
* monotonic in the output buffer. */
|
||||
if (count > 1 && (status_or & kCTRunStatusNonMonotonic) &&
|
||||
buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_CHARACTERS)
|
||||
HB_BUFFER_CLUSTER_LEVEL_IS_MONOTONE (buffer->cluster_level))
|
||||
{
|
||||
hb_glyph_info_t *info = buffer->info;
|
||||
if (HB_DIRECTION_IS_FORWARD (buffer->props.direction))
|
||||
|
|
|
@ -387,6 +387,8 @@ _hb_grapheme_group_func (const hb_glyph_info_t& a HB_UNUSED,
|
|||
static inline void
|
||||
_hb_ot_layout_reverse_graphemes (hb_buffer_t *buffer)
|
||||
{
|
||||
// MONOTONE_GRAPHEMES was already applied and is taken care of by _hb_grapheme_group_func.
|
||||
// So we just check for MONOTONE_CHARACTERS here.
|
||||
buffer->reverse_groups (_hb_grapheme_group_func,
|
||||
buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS);
|
||||
}
|
||||
|
|
|
@ -551,7 +551,7 @@ hb_form_clusters (hb_buffer_t *buffer)
|
|||
if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII))
|
||||
return;
|
||||
|
||||
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
|
||||
if (HB_BUFFER_CLUSTER_LEVEL_IS_GRAPHEMES (buffer->cluster_level))
|
||||
foreach_grapheme (buffer, start, end)
|
||||
buffer->merge_clusters (start, end);
|
||||
else
|
||||
|
@ -609,7 +609,7 @@ hb_ensure_native_direction (hb_buffer_t *buffer)
|
|||
* Ogham fonts are supposed to be implemented BTT or not. Need to research that
|
||||
* first. */
|
||||
if ((HB_DIRECTION_IS_HORIZONTAL (direction) &&
|
||||
direction != horiz_dir && horiz_dir != HB_DIRECTION_INVALID) ||
|
||||
direction != horiz_dir && HB_DIRECTION_IS_VALID (horiz_dir)) ||
|
||||
(HB_DIRECTION_IS_VERTICAL (direction) &&
|
||||
direction != HB_DIRECTION_TTB))
|
||||
{
|
||||
|
|
|
@ -298,8 +298,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED,
|
|||
end = start + 2;
|
||||
if (unlikely (!buffer->successful))
|
||||
break;
|
||||
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
|
||||
buffer->merge_out_clusters (start, end);
|
||||
buffer->merge_out_clusters (start, end);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -372,8 +371,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED,
|
|||
if (i < end)
|
||||
info[i++].hangul_shaping_feature() = TJMO;
|
||||
|
||||
if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
|
||||
buffer->merge_out_clusters (start, end);
|
||||
buffer->merge_out_clusters (start, end);
|
||||
continue;
|
||||
}
|
||||
else if ((!tindex && buffer->idx + 1 < count && isT (buffer->cur(+1).codepoint)))
|
||||
|
|
|
@ -360,7 +360,7 @@ preprocess_text_thai (const hb_ot_shape_plan_t *plan,
|
|||
{
|
||||
/* Since we decomposed, and NIKHAHIT is combining, merge clusters with the
|
||||
* previous cluster. */
|
||||
if (start && buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES)
|
||||
if (start)
|
||||
buffer->merge_out_clusters (start - 1, end);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
# Our cluster-level=3 doesn't really test anything here.
|
||||
../fonts/4fac3929fc3332834e93673780ec0fe94342d193.ttf;--cluster-level=3;U+0078,U+030A,U+0058,U+030A;[gid2=0+1083|gid3=1@-1132,-8+0|gid1=2+1200|gid3=3@-1190,349+0]
|
||||
../fonts/4fac3929fc3332834e93673780ec0fe94342d193.ttf;--cluster-level=2;U+0078,U+030A,U+0058,U+030A;[gid2=0+1083|gid3=1@-1132,-8+0|gid1=2+1200|gid3=3@-1190,349+0]
|
||||
../fonts/43ef465752be9af900745f72fe29cb853a1401a5.ttf;--cluster-level=1;U+05D4,U+05B7,U+05E9,U+05BC,U+05C1,U+05B8,U+05DE,U+05B4,U+05DD;[uni05DD=8+1359|uni05B4=7@111,0+0|uni05DE=6+1391|uni05B8=5+0|uni05BC=3+0|uni05C1=3+0|uni05E9=2+1451|uni05B7=1@28,0+0|uni05D4=0+1338]
|
||||
../fonts/6f36d056bad6d478fc0bf7397bd52dc3bd197d5f.ttf;--cluster-level=1;U+099B,U+09CB,U+09C8,U+09C2,U+09CB,U+098C;[evowelsigninibeng=0+346|aivowelsignbeng=0+346|evowelsignbeng=0+346|chabeng=0+687|uuvowelsignlongbeng=0@-96,0+0|aavowelsignbeng=0+266|aavowelsignbeng=4+266|lvocalicbeng=5+639]
|
||||
|
|
|
@ -383,7 +383,7 @@ shape_options_t::add_options (option_parser_t *parser)
|
|||
0, 0, G_OPTION_ARG_INT, &this->not_found_variation_selector_glyph,
|
||||
"Glyph value to replace not-found variation-selector characters with", nullptr},
|
||||
{"utf8-clusters", 0, 0, G_OPTION_ARG_NONE, &this->utf8_clusters, "Use UTF8 byte indices, not char indices", nullptr},
|
||||
{"cluster-level", 0, 0, G_OPTION_ARG_INT, &this->cluster_level, "Cluster merging level (default: 0)", "0/1/2"},
|
||||
{"cluster-level", 0, 0, G_OPTION_ARG_INT, &this->cluster_level, "Cluster merging level (default: 0)", "0/1/2/3"},
|
||||
{"normalize-glyphs",0, 0, G_OPTION_ARG_NONE, &this->normalize_glyphs, "Rearrange glyph clusters in nominal order", nullptr},
|
||||
{"unsafe-to-concat",0, 0, G_OPTION_ARG_NONE, &this->unsafe_to_concat, "Produce unsafe-to-concat glyph flag", nullptr},
|
||||
{"safe-to-insert-tatweel",0, 0, G_OPTION_ARG_NONE, &this->safe_to_insert_tatweel, "Produce safe-to-insert-tatweel glyph flag", nullptr},
|
||||
|
|
Loading…
Add table
Reference in a new issue