mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-12725 Update u_isIDStart and u_isIDPart to TR31
ICU-12725 move to uprops.cpp ICU-12725 change dependency ICU-12725 Fix Java implementation
This commit is contained in:
parent
bb0e745e25
commit
de9cb9a133
7 changed files with 47 additions and 73 deletions
|
@ -304,30 +304,6 @@ u_ispunct(UChar32 c) {
|
|||
return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character can start a Unicode identifier.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isIDStart(UChar32 c) {
|
||||
/* same as u_isalpha() */
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character can be a Unicode identifier part other than starting the
|
||||
identifier.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isIDPart(UChar32 c) {
|
||||
uint32_t props;
|
||||
GET_PROPS(c, props);
|
||||
return (UBool)(
|
||||
(CAT_MASK(props)&
|
||||
(U_GC_ND_MASK|U_GC_NL_MASK|
|
||||
U_GC_L_MASK|
|
||||
U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
|
||||
)!=0 ||
|
||||
u_isIDIgnorable(c));
|
||||
}
|
||||
|
||||
/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isIDIgnorable(UChar32 c) {
|
||||
|
|
|
@ -3837,9 +3837,8 @@ u_getPropertyValueEnum(UProperty property,
|
|||
|
||||
/**
|
||||
* Determines if the specified character is permissible as the
|
||||
* first character in an identifier according to Unicode
|
||||
* (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers).
|
||||
* True for characters with general categories "L" (letters) and "Nl" (letter numbers).
|
||||
* first character in an identifier as ID_Start according to
|
||||
* Unicode® Standard Annex #31 UNICODE IDENTIFIER AND PATTERN SYNTAX
|
||||
*
|
||||
* Same as java.lang.Character.isUnicodeIdentifierStart().
|
||||
* Same as UCHAR_ID_START
|
||||
|
@ -3856,12 +3855,9 @@ U_CAPI UBool U_EXPORT2
|
|||
u_isIDStart(UChar32 c);
|
||||
|
||||
/**
|
||||
* Determines if the specified character is permissible
|
||||
* in an identifier according to Java.
|
||||
* True for characters with general categories "L" (letters),
|
||||
* "Nl" (letter numbers), "Nd" (decimal digits),
|
||||
* "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and
|
||||
* u_isIDIgnorable(c).
|
||||
* Determines if the specified character is permissible as a
|
||||
* character other than the first character in an identifier as ID_Continue
|
||||
* according to Unicode® Standard Annex #31 UNICODE IDENTIFIER AND PATTERN SYNTAX
|
||||
*
|
||||
* Same as java.lang.Character.isUnicodeIdentifierPart().
|
||||
* Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE)
|
||||
|
@ -3869,7 +3865,8 @@ u_isIDStart(UChar32 c);
|
|||
* u_isIDIgnorable(c).
|
||||
*
|
||||
* @param c the code point to be tested
|
||||
* @return true if the code point may occur in an identifier according to Java
|
||||
* @return true if the code point may occur in an identifier other than the
|
||||
* first character.
|
||||
*
|
||||
* @see UCHAR_ID_CONTINUE
|
||||
* @see u_isIDStart
|
||||
|
|
|
@ -423,6 +423,19 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
|||
}
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character can start a Unicode identifier.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isIDStart(UChar32 c) {
|
||||
return u_hasBinaryProperty(c, UCHAR_ID_START);
|
||||
}
|
||||
|
||||
/* Checks if the Unicode character can be a Unicode identifier part other than starting the
|
||||
identifier.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_isIDPart(UChar32 c) {
|
||||
return u_hasBinaryProperty(c, UCHAR_ID_CONTINUE);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which) {
|
||||
if (s == nullptr && length != 0) { return false; }
|
||||
|
|
|
@ -895,10 +895,10 @@ static void TestIdentifier()
|
|||
const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
|
||||
const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
|
||||
const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
|
||||
const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
|
||||
const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
|
||||
const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
|
||||
const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
|
||||
const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061, 0x1885, 0x212e, 0x309b};
|
||||
const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019, 0x2e2f};
|
||||
const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045, 0x1886, 0x212e, 0x309c};
|
||||
const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020, 0x2019, 0x2e2f};
|
||||
const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85};
|
||||
const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
|
||||
|
||||
|
|
|
@ -351,7 +351,7 @@ group: uniset_core
|
|||
group: icu_utility_with_props
|
||||
util_props.o
|
||||
deps
|
||||
icu_utility uchar ucase
|
||||
icu_utility uchar ucase uprops
|
||||
|
||||
group: icu_utility
|
||||
util.o
|
||||
|
|
|
@ -4550,20 +4550,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static boolean isUnicodeIdentifierPart(int ch)
|
||||
{
|
||||
// if props == 0, it will just fall through and return false
|
||||
// cat == format
|
||||
return ((1 << getType(ch))
|
||||
& ((1 << UCharacterCategory.UPPERCASE_LETTER)
|
||||
| (1 << UCharacterCategory.LOWERCASE_LETTER)
|
||||
| (1 << UCharacterCategory.TITLECASE_LETTER)
|
||||
| (1 << UCharacterCategory.MODIFIER_LETTER)
|
||||
| (1 << UCharacterCategory.OTHER_LETTER)
|
||||
| (1 << UCharacterCategory.LETTER_NUMBER)
|
||||
| (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
|
||||
| (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
|
||||
| (1 << UCharacterCategory.COMBINING_SPACING_MARK)
|
||||
| (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
|
||||
|| isIdentifierIgnorable(ch);
|
||||
return hasBinaryProperty(ch, UProperty.ID_CONTINUE); // single code point
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4588,15 +4575,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
|||
*/
|
||||
public static boolean isUnicodeIdentifierStart(int ch)
|
||||
{
|
||||
/*int cat = getType(ch);*/
|
||||
// if props == 0, it will just fall through and return false
|
||||
return ((1 << getType(ch))
|
||||
& ((1 << UCharacterCategory.UPPERCASE_LETTER)
|
||||
| (1 << UCharacterCategory.LOWERCASE_LETTER)
|
||||
| (1 << UCharacterCategory.TITLECASE_LETTER)
|
||||
| (1 << UCharacterCategory.MODIFIER_LETTER)
|
||||
| (1 << UCharacterCategory.OTHER_LETTER)
|
||||
| (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
|
||||
return hasBinaryProperty(ch, UProperty.ID_START); // single code point
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -623,15 +623,14 @@ public final class UCharacterTest extends TestFmwk
|
|||
@Test
|
||||
public void TestIdentifier()
|
||||
{
|
||||
int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061};
|
||||
int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019};
|
||||
int unicodeidpart[] = {0x005f, 0x000032, 0x000045};
|
||||
int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020};
|
||||
int unicodeidstart[] = {0x0250, 0x0000e2, 0x000061, 0x001885, 0x00212e, 0x00309b};
|
||||
int nonunicodeidstart[] = {0x2000, 0x00000a, 0x002019, 0x002e2f};
|
||||
int unicodeidpart[] = {0x005f, 0x000032, 0x000045, 0x001886, 0x00212e, 0x00309c};
|
||||
int nonunicodeidpart[] = {0x2030, 0x0000a3, 0x000020, 0x002019, 0x002e2f};
|
||||
int idignore[] = {0x0006, 0x0010, 0x206b};
|
||||
int nonidignore[] = {0x0075, 0x0000a3, 0x000061};
|
||||
|
||||
int size = unicodeidstart.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
for (int i = 0; i < unicodeidstart.length; i ++)
|
||||
{
|
||||
if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i]))
|
||||
{
|
||||
|
@ -639,6 +638,9 @@ public final class UCharacterTest extends TestFmwk
|
|||
" expected to be a unicode identifier start character");
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < nonunicodeidstart.length; i ++)
|
||||
{
|
||||
if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i]))
|
||||
{
|
||||
errln("FAIL \\u" + hex(nonunicodeidstart[i]) +
|
||||
|
@ -646,12 +648,18 @@ public final class UCharacterTest extends TestFmwk
|
|||
"character");
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < unicodeidpart.length; i ++)
|
||||
{
|
||||
if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i]))
|
||||
{
|
||||
errln("FAIL \\u" + hex(unicodeidpart[i]) +
|
||||
" expected to be a unicode identifier part character");
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < nonunicodeidpart.length; i ++)
|
||||
{
|
||||
if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i]))
|
||||
{
|
||||
errln("FAIL \\u" + hex(nonunicodeidpart[i]) +
|
||||
|
@ -659,23 +667,24 @@ public final class UCharacterTest extends TestFmwk
|
|||
"character");
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < idignore.length; i ++)
|
||||
{
|
||||
if (!UCharacter.isIdentifierIgnorable(idignore[i]))
|
||||
{
|
||||
errln("FAIL \\u" + hex(idignore[i]) +
|
||||
" expected to be a ignorable unicode character");
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < nonidignore.length; i ++)
|
||||
{
|
||||
if (UCharacter.isIdentifierIgnorable(nonidignore[i]))
|
||||
{
|
||||
errln("FAIL \\u" + hex(nonidignore[i]) +
|
||||
" expected not to be a ignorable unicode character");
|
||||
break;
|
||||
}
|
||||
logln("Ok \\u" + hex(unicodeidstart[i]) + " and \\u" +
|
||||
hex(nonunicodeidstart[i]) + " and \\u" +
|
||||
hex(unicodeidpart[i]) + " and \\u" +
|
||||
hex(nonunicodeidpart[i]) + " and \\u" +
|
||||
hex(idignore[i]) + " and \\u" + hex(nonidignore[i]));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue