mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 15:42:14 +00:00
ICU-22707 fix hst=V: hst=NA for Kirat Rai
This commit is contained in:
parent
47e9389b8e
commit
6543634649
4 changed files with 29 additions and 2 deletions
|
@ -590,7 +590,11 @@ static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*
|
|||
|
||||
/*
|
||||
* Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
|
||||
* Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
|
||||
* Hangul_Syllable_Type is redundant with a subset of Grapheme_Cluster_Break.
|
||||
*
|
||||
* Starting with Unicode 16, there is an exception:
|
||||
* Some Kirat Rai vowels are given GCB=V for proper grapheme clustering, but
|
||||
* they are of course not related to Hangul syllables.
|
||||
*/
|
||||
static const UHangulSyllableType gcbToHst[]={
|
||||
U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */
|
||||
|
@ -610,6 +614,11 @@ static const UHangulSyllableType gcbToHst[]={
|
|||
};
|
||||
|
||||
static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
|
||||
// Ignore supplementary code points: They all have HST=NA.
|
||||
// This is a simple way to handle the GCB!=hst cases since Unicode 16 (Kirat Rai vowels).
|
||||
if(c>0xffff) {
|
||||
return U_HST_NOT_APPLICABLE;
|
||||
}
|
||||
/* see comments on gcbToHst[] above */
|
||||
int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
|
||||
if(gcb<UPRV_LENGTHOF(gcbToHst)) {
|
||||
|
|
|
@ -2699,6 +2699,10 @@ TestAdditionalProperties(void) {
|
|||
|
||||
{ 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
|
||||
|
||||
// GCB=V but hst=NA (exception to GCB=hst for relevant values)
|
||||
{ 0x16D67, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_NOT_APPLICABLE },
|
||||
{ 0x16D6A, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_NOT_APPLICABLE },
|
||||
|
||||
{ -1, 0x410, 0 }, /* version break for Unicode 4.1 */
|
||||
|
||||
{ 0x00d7, UCHAR_PATTERN_SYNTAX, true },
|
||||
|
|
|
@ -648,7 +648,11 @@ public final class UCharacterProperty
|
|||
|
||||
/*
|
||||
* Map some of the Grapheme Cluster Break values to Hangul Syllable Types.
|
||||
* Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break.
|
||||
* Hangul_Syllable_Type is redundant with a subset of Grapheme_Cluster_Break.
|
||||
*
|
||||
* Starting with Unicode 16, there is an exception:
|
||||
* Some Kirat Rai vowels are given GCB=V for proper grapheme clustering, but
|
||||
* they are of course not related to Hangul syllables.
|
||||
*/
|
||||
private static final int /* UHangulSyllableType */ gcbToHst[]={
|
||||
HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */
|
||||
|
@ -809,6 +813,12 @@ public final class UCharacterProperty
|
|||
new IntProperty(SRC_PROPSVEC) { // HANGUL_SYLLABLE_TYPE
|
||||
@Override
|
||||
int getValue(int c) {
|
||||
// Ignore supplementary code points: They all have HST=NA.
|
||||
// This is a simple way to handle the GCB!=hst cases since Unicode 16
|
||||
// (Kirat Rai vowels).
|
||||
if(c>0xffff) {
|
||||
return HangulSyllableType.NOT_APPLICABLE;
|
||||
}
|
||||
/* see comments on gcbToHst[] above */
|
||||
int gcb=(getAdditional(c, 2)&GCB_MASK)>>>GCB_SHIFT;
|
||||
if(gcb<gcbToHst.length) {
|
||||
|
|
|
@ -2109,6 +2109,10 @@ public final class UCharacterTest extends CoreTestFmwk
|
|||
|
||||
{ 0xd7a4, UProperty.HANGUL_SYLLABLE_TYPE, 0 },
|
||||
|
||||
// GCB=V but hst=NA (exception to GCB=hst for relevant values)
|
||||
{ 0x16D67, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.NOT_APPLICABLE },
|
||||
{ 0x16D6A, UProperty.HANGUL_SYLLABLE_TYPE, UCharacter.HangulSyllableType.NOT_APPLICABLE },
|
||||
|
||||
{ -1, 0x410, 0 }, /* version break for Unicode 4.1 */
|
||||
|
||||
{ 0x00d7, UProperty.PATTERN_SYNTAX, 1 },
|
||||
|
|
Loading…
Add table
Reference in a new issue