mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-22503 add property Indic_Conjunct_Break
This commit is contained in:
parent
2cbfd134ef
commit
06c077bd35
22 changed files with 3406 additions and 3287 deletions
.bazeliskrc
icu4c/source
common
data
test
icu4j/main/core/src
main
java/com/ibm/icu
resources/com/ibm/icu/impl/data/icudata
test/java/com/ibm/icu/dev/test/lang
tools/unicode
|
@ -6,4 +6,4 @@
|
||||||
# for running Bazel commands while ensuring, through configuration, that only a
|
# for running Bazel commands while ensuring, through configuration, that only a
|
||||||
# specific version of Bazel is executed.
|
# specific version of Bazel is executed.
|
||||||
|
|
||||||
USE_BAZEL_VERSION=7.1.1
|
USE_BAZEL_VERSION=7.2.1
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -677,13 +677,19 @@ typedef enum UProperty {
|
||||||
* @draft ICU 75
|
* @draft ICU 75
|
||||||
*/
|
*/
|
||||||
UCHAR_IDENTIFIER_STATUS=0x1019,
|
UCHAR_IDENTIFIER_STATUS=0x1019,
|
||||||
|
/**
|
||||||
|
* Enumerated property Indic_Conjunct_Break.
|
||||||
|
* Used in the grapheme cluster break algorithm in UAX #29.
|
||||||
|
* @draft ICU 76
|
||||||
|
*/
|
||||||
|
UCHAR_INDIC_CONJUNCT_BREAK=0x101A,
|
||||||
#endif // U_HIDE_DRAFT_API
|
#endif // U_HIDE_DRAFT_API
|
||||||
#ifndef U_HIDE_DEPRECATED_API
|
#ifndef U_HIDE_DEPRECATED_API
|
||||||
/**
|
/**
|
||||||
* One more than the last constant for enumerated/integer Unicode properties.
|
* One more than the last constant for enumerated/integer Unicode properties.
|
||||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||||
*/
|
*/
|
||||||
UCHAR_INT_LIMIT=0x101A,
|
UCHAR_INT_LIMIT=0x101B,
|
||||||
#endif // U_HIDE_DEPRECATED_API
|
#endif // U_HIDE_DEPRECATED_API
|
||||||
|
|
||||||
/** Bitmask property General_Category_Mask.
|
/** Bitmask property General_Category_Mask.
|
||||||
|
@ -2729,6 +2735,31 @@ typedef enum UIndicSyllabicCategory {
|
||||||
U_INSC_REORDERING_KILLER,
|
U_INSC_REORDERING_KILLER,
|
||||||
} UIndicSyllabicCategory;
|
} UIndicSyllabicCategory;
|
||||||
|
|
||||||
|
#ifndef U_HIDE_DRAFT_API
|
||||||
|
/**
|
||||||
|
* Indic Conjunct Break constants.
|
||||||
|
*
|
||||||
|
* @see UCHAR_INDIC_CONJUNCT_BREAK
|
||||||
|
* @draft ICU 76
|
||||||
|
*/
|
||||||
|
typedef enum UIndicConjunctBreak {
|
||||||
|
/*
|
||||||
|
* Note: UIndicConjunctBreak constants are parsed by preparseucd.py.
|
||||||
|
* It matches lines like
|
||||||
|
* U_INCB_<Unicode Indic_Conjunct_Break value name>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
U_INCB_NONE,
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
U_INCB_CONSONANT,
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
U_INCB_EXTEND,
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
U_INCB_LINKER,
|
||||||
|
} UIndicConjunctBreak;
|
||||||
|
#endif // U_HIDE_DRAFT_API
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Vertical Orientation constants.
|
* Vertical Orientation constants.
|
||||||
*
|
*
|
||||||
|
|
|
@ -728,6 +728,7 @@ static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={
|
||||||
{ UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue },
|
{ UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue },
|
||||||
{ UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue },
|
{ UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue },
|
||||||
{ UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_ID_STATUS_ALLOWED), getIDStatusValue, getMaxValueFromShift },
|
{ UPROPS_SRC_PROPSVEC, 0, static_cast<int32_t>(U_ID_STATUS_ALLOWED), getIDStatusValue, getMaxValueFromShift },
|
||||||
|
{ 0, UPROPS_INCB_MASK, UPROPS_INCB_SHIFT,defaultGetValue, defaultGetMaxValue },
|
||||||
};
|
};
|
||||||
|
|
||||||
U_CAPI int32_t U_EXPORT2
|
U_CAPI int32_t U_EXPORT2
|
||||||
|
|
|
@ -127,7 +127,8 @@ namespace {
|
||||||
// Bits
|
// Bits
|
||||||
// 31..26 Age major version (major=0..63)
|
// 31..26 Age major version (major=0..63)
|
||||||
// 25..24 Age minor version (minor=0..3)
|
// 25..24 Age minor version (minor=0..3)
|
||||||
// 23..15 reserved
|
// 23..17 reserved
|
||||||
|
// 16..15 Indic Conjunct Break
|
||||||
// 14..12 East Asian Width
|
// 14..12 East Asian Width
|
||||||
// 11..10 3..1: Bits 9..0 = Script_Extensions index
|
// 11..10 3..1: Bits 9..0 = Script_Extensions index
|
||||||
// 3: Script value from Script_Extensions
|
// 3: Script value from Script_Extensions
|
||||||
|
@ -158,6 +159,9 @@ inline constexpr uint8_t UPROPS_AGE_MINOR_MAX = 3;
|
||||||
inline constexpr uint32_t UPROPS_EA_MASK = 0x00007000;
|
inline constexpr uint32_t UPROPS_EA_MASK = 0x00007000;
|
||||||
inline constexpr int32_t UPROPS_EA_SHIFT = 12;
|
inline constexpr int32_t UPROPS_EA_SHIFT = 12;
|
||||||
|
|
||||||
|
inline constexpr uint32_t UPROPS_INCB_MASK = 0x00018000;
|
||||||
|
inline constexpr int32_t UPROPS_INCB_SHIFT = 15;
|
||||||
|
|
||||||
/** Script_Extensions: mask includes Script */
|
/** Script_Extensions: mask includes Script */
|
||||||
inline constexpr uint32_t UPROPS_SCRIPT_X_MASK = 0x00000fff;
|
inline constexpr uint32_t UPROPS_SCRIPT_X_MASK = 0x00000fff;
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -103,6 +103,9 @@ export UNICODE_TOOLS=~/oss/unicodetools/mine/src
|
||||||
so that the makefiles see the new version number.
|
so that the makefiles see the new version number.
|
||||||
cd $ICU_OUT/icu4c
|
cd $ICU_OUT/icu4c
|
||||||
ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data CXXFLAGS="-DU_USING_ICU_NAMESPACE=0 -Wimplicit-fallthrough" CPPFLAGS="-DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -fsanitize=bounds" LDFLAGS=-fsanitize=bounds ../../src/icu4c/source/runConfigureICU --enable-debug --disable-release Linux/clang --prefix=/usr/local/google/home/mscherer/icu/mine/inst/icu4c > config.out 2>&1 ; tail config.out
|
ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data CXXFLAGS="-DU_USING_ICU_NAMESPACE=0 -Wimplicit-fallthrough" CPPFLAGS="-DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -fsanitize=bounds" LDFLAGS=-fsanitize=bounds ../../src/icu4c/source/runConfigureICU --enable-debug --disable-release Linux/clang --prefix=/usr/local/google/home/mscherer/icu/mine/inst/icu4c > config.out 2>&1 ; tail config.out
|
||||||
|
+ Elango's version (diff default C++ compiler & in-source build paths):
|
||||||
|
cd $ICU_OUT/icu4c/source
|
||||||
|
ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data CXXFLAGS="-DU_USING_ICU_NAMESPACE=0 -Wimplicit-fallthrough" CPPFLAGS="-DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 -fsanitize=bounds" LDFLAGS=-fsanitize=bounds ./runConfigureICU --enable-debug --disable-release Linux/gcc --prefix=/usr/local/google/home/elango/oss/icu/icu4c > config.out 2>&1 ; tail config.out
|
||||||
|
|
||||||
*** data files & enums & parser code
|
*** data files & enums & parser code
|
||||||
|
|
||||||
|
@ -360,8 +363,6 @@ copying that version number into the $ICU_SRC/.bazeliskrc config file.
|
||||||
|
|
||||||
* run & fix ICU4J tests
|
* run & fix ICU4J tests
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
*** API additions
|
*** API additions
|
||||||
- send notice to icu-design about new born-@stable API (enum constants etc.)
|
- send notice to icu-design about new born-@stable API (enum constants etc.)
|
||||||
|
|
||||||
|
|
|
@ -2802,6 +2802,14 @@ TestAdditionalProperties(void) {
|
||||||
{ 0x0606, UCHAR_PREPENDED_CONCATENATION_MARK, false },
|
{ 0x0606, UCHAR_PREPENDED_CONCATENATION_MARK, false },
|
||||||
{ 0x110BD, UCHAR_PREPENDED_CONCATENATION_MARK, true },
|
{ 0x110BD, UCHAR_PREPENDED_CONCATENATION_MARK, true },
|
||||||
|
|
||||||
|
/* Indic_Conjunct_Break values */
|
||||||
|
{ 0x094D, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_LINKER },
|
||||||
|
{ 0x09B9, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_CONSONANT },
|
||||||
|
{ 0x05BE, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_NONE },
|
||||||
|
{ 0x05BF, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_EXTEND },
|
||||||
|
{ 0x05C0, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_NONE },
|
||||||
|
{ 0xD800, UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_NONE },
|
||||||
|
|
||||||
/* undefined UProperty values */
|
/* undefined UProperty values */
|
||||||
{ 0x61, 0x4a7, 0 },
|
{ 0x61, 0x4a7, 0 },
|
||||||
{ 0x234bc, 0x15ed, 0 }
|
{ 0x234bc, 0x15ed, 0 }
|
||||||
|
|
|
@ -21,6 +21,9 @@
|
||||||
#include "testutil.h"
|
#include "testutil.h"
|
||||||
#include "uparse.h"
|
#include "uparse.h"
|
||||||
#include "ucdtest.h"
|
#include "ucdtest.h"
|
||||||
|
#include "usettest.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
static const char *ignorePropNames[]={
|
static const char *ignorePropNames[]={
|
||||||
"FC_NFKC",
|
"FC_NFKC",
|
||||||
|
@ -1092,6 +1095,10 @@ void UnicodeTest::TestPropertiesUsingPpucd() {
|
||||||
{ UCHAR_NFC_QUICK_CHECK, UNORM_MAYBE },
|
{ UCHAR_NFC_QUICK_CHECK, UNORM_MAYBE },
|
||||||
{ UCHAR_NFKC_QUICK_CHECK, UNORM_MAYBE },
|
{ UCHAR_NFKC_QUICK_CHECK, UNORM_MAYBE },
|
||||||
#endif // !UCONFIG_NO_NORMALIZATION
|
#endif // !UCONFIG_NO_NORMALIZATION
|
||||||
|
{ UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_NONE },
|
||||||
|
{ UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_CONSONANT },
|
||||||
|
{ UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_EXTEND },
|
||||||
|
{ UCHAR_INDIC_CONJUNCT_BREAK, U_INCB_LINKER },
|
||||||
};
|
};
|
||||||
|
|
||||||
// Iterate through PPUCD file, accumulating each line's data into each UnicodeSet per property
|
// Iterate through PPUCD file, accumulating each line's data into each UnicodeSet per property
|
||||||
|
@ -1133,7 +1140,7 @@ void UnicodeTest::TestPropertiesUsingPpucd() {
|
||||||
if (!tp.isBinary()) {
|
if (!tp.isBinary()) {
|
||||||
msg = msg + "=" + u_getPropertyValueName(tp.prop, tp.value, U_LONG_PROPERTY_NAME);
|
msg = msg + "=" + u_getPropertyValueName(tp.prop, tp.value, U_LONG_PROPERTY_NAME);
|
||||||
}
|
}
|
||||||
assertTrue(msg.c_str(), tp.set == icuPropSet);
|
UnicodeSetTest::checkEqual(*this, tp.set, icuPropSet, msg.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2114,20 +2114,26 @@ void UnicodeSetTest::copyWithIterator(UnicodeSet& t, const UnicodeSet& s, UBool
|
||||||
}
|
}
|
||||||
|
|
||||||
UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
|
UBool UnicodeSetTest::checkEqual(const UnicodeSet& s, const UnicodeSet& t, const char* message) {
|
||||||
assertEquals(UnicodeString("RangeCount: ","") + message, s.getRangeCount(), t.getRangeCount());
|
return checkEqual(*this, s, t, message);
|
||||||
assertEquals(UnicodeString("size: ","") + message, s.size(), t.size());
|
}
|
||||||
|
|
||||||
|
UBool UnicodeSetTest::checkEqual(
|
||||||
|
IntlTest& intlTest,
|
||||||
|
const UnicodeSet& s, const UnicodeSet& t, const char* message) {
|
||||||
|
intlTest.assertEquals(UnicodeString("RangeCount: ","") + message, s.getRangeCount(), t.getRangeCount());
|
||||||
|
intlTest.assertEquals(UnicodeString("size: ","") + message, s.size(), t.size());
|
||||||
UnicodeString source; s.toPattern(source, true);
|
UnicodeString source; s.toPattern(source, true);
|
||||||
UnicodeString result; t.toPattern(result, true);
|
UnicodeString result; t.toPattern(result, true);
|
||||||
if (s != t) {
|
if (s != t) {
|
||||||
errln(UnicodeString("FAIL: ") + message
|
intlTest.errln((UnicodeString)"FAIL: " + message
|
||||||
+ "; source = " + source
|
+ "\nsource = " + source
|
||||||
+ "; result = " + result
|
+ "\nresult = " + result
|
||||||
);
|
);
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
logln(UnicodeString("Ok: ") + message
|
intlTest.logln((UnicodeString)"Ok: " + message
|
||||||
+ "; source = " + source
|
+ "\nsource = " + source
|
||||||
+ "; result = " + result
|
+ "\nresult = " + result
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -33,6 +33,8 @@ public:
|
||||||
UnicodeSetTest();
|
UnicodeSetTest();
|
||||||
~UnicodeSetTest();
|
~UnicodeSetTest();
|
||||||
|
|
||||||
|
static UBool checkEqual(IntlTest& intlTest, const UnicodeSet& s, const UnicodeSet& t, const char* message);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par=nullptr) override;
|
void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par=nullptr) override;
|
||||||
|
|
||||||
|
|
|
@ -904,6 +904,7 @@ public final class UCharacterProperty
|
||||||
return IdentifierStatus.ALLOWED.ordinal();
|
return IdentifierStatus.ALLOWED.ordinal();
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
new IntProperty(0, INCB_MASK, INCB_SHIFT), // INDIC_CONJUNCT_BREAK
|
||||||
};
|
};
|
||||||
|
|
||||||
public int getIntPropertyValue(int c, int which) {
|
public int getIntPropertyValue(int c, int which) {
|
||||||
|
@ -1378,7 +1379,8 @@ public final class UCharacterProperty
|
||||||
// Bits
|
// Bits
|
||||||
// 31..26 Age major version (major=0..63)
|
// 31..26 Age major version (major=0..63)
|
||||||
// 25..24 Age minor version (minor=0..3)
|
// 25..24 Age minor version (minor=0..3)
|
||||||
// 23..15 reserved
|
// 23..17 reserved
|
||||||
|
// 16..15 Indic Conjunct Break
|
||||||
// 14..12 East Asian Width
|
// 14..12 East Asian Width
|
||||||
// 11..10 3..1: Bits 9..0 = Script_Extensions index
|
// 11..10 3..1: Bits 9..0 = Script_Extensions index
|
||||||
// 3: Script value from Script_Extensions
|
// 3: Script value from Script_Extensions
|
||||||
|
@ -1390,6 +1392,9 @@ public final class UCharacterProperty
|
||||||
private static final int EAST_ASIAN_MASK_ = 0x00007000;
|
private static final int EAST_ASIAN_MASK_ = 0x00007000;
|
||||||
private static final int EAST_ASIAN_SHIFT_ = 12;
|
private static final int EAST_ASIAN_SHIFT_ = 12;
|
||||||
|
|
||||||
|
private static final int INCB_MASK = 0x00018000;
|
||||||
|
private static final int INCB_SHIFT = 15;
|
||||||
|
|
||||||
/** Script_Extensions: mask includes Script */
|
/** Script_Extensions: mask includes Script */
|
||||||
public static final int SCRIPT_X_MASK = 0x00000fff;
|
public static final int SCRIPT_X_MASK = 0x00000fff;
|
||||||
|
|
||||||
|
|
|
@ -4124,6 +4124,24 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
|
||||||
public static final int REORDERING_KILLER = 36;
|
public static final int REORDERING_KILLER = 36;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indic Conjunct Break constants.
|
||||||
|
* See https://unicode.org/reports/tr44/#Indic_Conjunct_Break
|
||||||
|
*
|
||||||
|
* @see UProperty#INDIC_CONJUNCT_BREAK
|
||||||
|
* @draft ICU 76
|
||||||
|
*/
|
||||||
|
public enum IndicConjunctBreak {
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
NONE,
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
CONSONANT,
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
EXTEND,
|
||||||
|
/** @draft ICU 76 */
|
||||||
|
LINKER,
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Vertical Orientation constants.
|
* Vertical Orientation constants.
|
||||||
*
|
*
|
||||||
|
|
|
@ -859,12 +859,19 @@ public interface UProperty
|
||||||
*/
|
*/
|
||||||
public static final int IDENTIFIER_STATUS = 0x1019;
|
public static final int IDENTIFIER_STATUS = 0x1019;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enumerated property Indic_Conjunct_Break.
|
||||||
|
* Used in the grapheme cluster break algorithm in UAX #29.
|
||||||
|
* @draft ICU 76
|
||||||
|
*/
|
||||||
|
public static final int INDIC_CONJUNCT_BREAK = 0x101A;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* One more than the last constant for enumerated/integer Unicode properties.
|
* One more than the last constant for enumerated/integer Unicode properties.
|
||||||
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public static final int INT_LIMIT = 0x101A;
|
public static final int INT_LIMIT = 0x101B;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bitmask property General_Category_Mask.
|
* Bitmask property General_Category_Mask.
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -2212,6 +2212,14 @@ public final class UCharacterTest extends CoreTestFmwk
|
||||||
{ 0x0606, UProperty.PREPENDED_CONCATENATION_MARK, FALSE },
|
{ 0x0606, UProperty.PREPENDED_CONCATENATION_MARK, FALSE },
|
||||||
{ 0x110BD, UProperty.PREPENDED_CONCATENATION_MARK, TRUE },
|
{ 0x110BD, UProperty.PREPENDED_CONCATENATION_MARK, TRUE },
|
||||||
|
|
||||||
|
/* Indic_Conjunct_Break values */
|
||||||
|
{ 0x094D, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.LINKER.ordinal() },
|
||||||
|
{ 0x09B9, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.CONSONANT.ordinal() },
|
||||||
|
{ 0x05BE, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.NONE.ordinal() },
|
||||||
|
{ 0x05BF, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.EXTEND.ordinal() },
|
||||||
|
{ 0x05C0, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.NONE.ordinal() },
|
||||||
|
{ 0xD800, UProperty.INDIC_CONJUNCT_BREAK, UCharacter.IndicConjunctBreak.NONE.ordinal() },
|
||||||
|
|
||||||
/* undefined UProperty values */
|
/* undefined UProperty values */
|
||||||
{ 0x61, 0x4a7, 0 },
|
{ 0x61, 0x4a7, 0 },
|
||||||
{ 0x234bc, 0x15ed, 0 }
|
{ 0x234bc, 0x15ed, 0 }
|
||||||
|
|
|
@ -312,6 +312,10 @@ although the trie can hold 16-bit values.
|
||||||
|
|
||||||
Props vector 0 bits shuffled so that script and script extensions bits are contiguous.
|
Props vector 0 bits shuffled so that script and script extensions bits are contiguous.
|
||||||
|
|
||||||
|
Used 2 bits from props vector 0 to add Indic_Conjunct_Break. The bits used were freed up
|
||||||
|
by the preceding move of the Block property out of props vector 0 and the bit shuffling
|
||||||
|
("defragmentation") of Script and Script_Extensions.
|
||||||
|
|
||||||
----------------------------------------------------------------------------- */
|
----------------------------------------------------------------------------- */
|
||||||
|
|
||||||
U_NAMESPACE_USE
|
U_NAMESPACE_USE
|
||||||
|
@ -712,6 +716,7 @@ struct PropToEnum {
|
||||||
const PropToEnum
|
const PropToEnum
|
||||||
propToEnums[]={
|
propToEnums[]={
|
||||||
{ UCHAR_EAST_ASIAN_WIDTH, 0, UPROPS_EA_SHIFT, UPROPS_EA_MASK },
|
{ UCHAR_EAST_ASIAN_WIDTH, 0, UPROPS_EA_SHIFT, UPROPS_EA_MASK },
|
||||||
|
{ UCHAR_INDIC_CONJUNCT_BREAK, 0, UPROPS_INCB_SHIFT, UPROPS_INCB_MASK },
|
||||||
{ UCHAR_DECOMPOSITION_TYPE, 2, 0, UPROPS_DT_MASK },
|
{ UCHAR_DECOMPOSITION_TYPE, 2, 0, UPROPS_DT_MASK },
|
||||||
{ UCHAR_GRAPHEME_CLUSTER_BREAK, 2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK },
|
{ UCHAR_GRAPHEME_CLUSTER_BREAK, 2, UPROPS_GCB_SHIFT, UPROPS_GCB_MASK },
|
||||||
{ UCHAR_WORD_BREAK, 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK },
|
{ UCHAR_WORD_BREAK, 2, UPROPS_WB_SHIFT, UPROPS_WB_MASK },
|
||||||
|
|
|
@ -1186,6 +1186,13 @@ static const Value VALUES_ID_Status[2] = {
|
||||||
Value(U_ID_STATUS_ALLOWED, "Allowed Allowed"),
|
Value(U_ID_STATUS_ALLOWED, "Allowed Allowed"),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const Value VALUES_InCB[4] = {
|
||||||
|
Value(U_INCB_NONE, "None None"),
|
||||||
|
Value(U_INCB_CONSONANT, "Consonant Consonant"),
|
||||||
|
Value(U_INCB_EXTEND, "Extend Extend"),
|
||||||
|
Value(U_INCB_LINKER, "Linker Linker"),
|
||||||
|
};
|
||||||
|
|
||||||
static const Value VALUES_gcm[38] = {
|
static const Value VALUES_gcm[38] = {
|
||||||
Value((int32_t)U_GC_C_MASK, "C Other"),
|
Value((int32_t)U_GC_C_MASK, "C Other"),
|
||||||
Value((int32_t)U_GC_CC_MASK, "Cc Control cntrl"),
|
Value((int32_t)U_GC_CC_MASK, "Cc Control cntrl"),
|
||||||
|
@ -1242,7 +1249,7 @@ static const Value VALUES_ID_Type[12] = {
|
||||||
Value(U_ID_TYPE_RECOMMENDED, "Recommended Recommended"),
|
Value(U_ID_TYPE_RECOMMENDED, "Recommended Recommended"),
|
||||||
};
|
};
|
||||||
|
|
||||||
static const Property PROPERTIES[119] = {
|
static const Property PROPERTIES[120] = {
|
||||||
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"),
|
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"),
|
||||||
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"),
|
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"),
|
||||||
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"),
|
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"),
|
||||||
|
@ -1344,6 +1351,7 @@ static const Property PROPERTIES[119] = {
|
||||||
Property(UCHAR_INDIC_SYLLABIC_CATEGORY, "InSC Indic_Syllabic_Category", VALUES_InSC, 37),
|
Property(UCHAR_INDIC_SYLLABIC_CATEGORY, "InSC Indic_Syllabic_Category", VALUES_InSC, 37),
|
||||||
Property(UCHAR_VERTICAL_ORIENTATION, "vo Vertical_Orientation", VALUES_vo, 4),
|
Property(UCHAR_VERTICAL_ORIENTATION, "vo Vertical_Orientation", VALUES_vo, 4),
|
||||||
Property(UCHAR_IDENTIFIER_STATUS, "ID_Status Identifier_Status", VALUES_ID_Status, 2),
|
Property(UCHAR_IDENTIFIER_STATUS, "ID_Status Identifier_Status", VALUES_ID_Status, 2),
|
||||||
|
Property(UCHAR_INDIC_CONJUNCT_BREAK, "InCB Indic_Conjunct_Break", VALUES_InCB, 4),
|
||||||
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38),
|
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38),
|
||||||
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
|
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
|
||||||
Property(UCHAR_AGE, "age Age"),
|
Property(UCHAR_AGE, "age Age"),
|
||||||
|
|
|
@ -2012,7 +2012,7 @@ _ublock_re = re.compile(" *(UBLOCK_[0-9A-Z_]+) *= *[0-9]+,")
|
||||||
# Sample line to match:
|
# Sample line to match:
|
||||||
# U_EA_AMBIGUOUS,
|
# U_EA_AMBIGUOUS,
|
||||||
_prop_and_value_re = re.compile(
|
_prop_and_value_re = re.compile(
|
||||||
" *(U_(BPT|DT|EA|GCB|HST|ID_STATUS|ID_TYPE|INPC|INSC|LB|JG|JT|NT|SB|VO|WB)_([0-9A-Z_]+))")
|
" *(U_(BPT|DT|EA|GCB|HST|ID_STATUS|ID_TYPE|INCB|INPC|INSC|LB|JG|JT|NT|SB|VO|WB)_([0-9A-Z_]+))")
|
||||||
|
|
||||||
# Sample line to match if it has matched _prop_and_value_re
|
# Sample line to match if it has matched _prop_and_value_re
|
||||||
# (we want to exclude aliases):
|
# (we want to exclude aliases):
|
||||||
|
|
Loading…
Add table
Reference in a new issue