mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-6232 Add character class for Al-Lakuna, fVirama -> fAllakuna, handle Al-Lakuna in state table, remove MPRE_FIXUPS from SINH_SCRIPT_FLAGS, add Al-Lakuna test cases.
X-SVN-Rev: 23999
This commit is contained in:
parent
6eed22616f
commit
0c67eefa93
5 changed files with 101 additions and 34 deletions
|
@ -48,6 +48,7 @@ U_NAMESPACE_BEGIN
|
|||
#define _m2 (CC_SPLIT_VOWEL_PIECE_2 | CF_LENGTH_MARK)
|
||||
#define _m3 (CC_SPLIT_VOWEL_PIECE_3 | CF_LENGTH_MARK)
|
||||
#define _vr (CC_VIRAMA)
|
||||
#define _al (CC_AL_LAKUNA)
|
||||
|
||||
// split matras
|
||||
#define _s1 (_dv | _x1)
|
||||
|
@ -206,7 +207,7 @@ static const IndicClassTable::CharClass sinhCharClasses[] =
|
|||
_iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _ct, _ct, _ct, _ct, _ct, _ct, // 0D90 - 0D9F
|
||||
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0DA0 - 0DAF
|
||||
_ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _xx, _xx, // 0DB0 - 0DBF
|
||||
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _vr, _xx, _xx, _xx, _xx, _dr, // 0DC0 - 0DCF
|
||||
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _al, _xx, _xx, _xx, _xx, _dr, // 0DC0 - 0DCF
|
||||
_dr, _dr, _da, _da, _db, _xx, _db, _xx, _dr, _dl, _s1, _dl, _s2, _s3, _s4, _dr, // 0DD0 - 0DDF
|
||||
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0DE0 - 0DEF
|
||||
_xx, _xx, _dr, _dr, _xx // 0DF0 - 0DF4
|
||||
|
@ -229,7 +230,7 @@ static const SplitMatra kndaSplitTable[] = {{0x0CBF, 0x0CD5}, {0x0CC6, 0x0CD5},
|
|||
static const SplitMatra mlymSplitTable[] = {{0x0D46, 0x0D3E}, {0x0D47, 0x0D3E}, {0x0D46, 0x0D57}};
|
||||
|
||||
|
||||
static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF,0x0DCA},
|
||||
static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF, 0x0DCA},
|
||||
{0x0DD9, 0x0DDF}};
|
||||
//
|
||||
// Script Flags
|
||||
|
@ -248,7 +249,7 @@ static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF},
|
|||
#define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
|
||||
#define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
|
||||
#define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT /*| SF_FILTER_ZERO_WIDTH*/)
|
||||
#define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT)
|
||||
#define SINH_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)
|
||||
|
||||
//
|
||||
// Indic Class Tables
|
||||
|
|
|
@ -73,8 +73,8 @@ private:
|
|||
LEUnicode fLengthMark;
|
||||
le_int32 fLengthMarkIndex;
|
||||
|
||||
LEUnicode fVirama;
|
||||
le_int32 fViramaIndex;
|
||||
LEUnicode fAlLakuna;
|
||||
le_int32 fAlLakunaIndex;
|
||||
|
||||
FeatureMask fMatraFeatures;
|
||||
|
||||
|
@ -97,9 +97,9 @@ private:
|
|||
if (IndicClassTable::isLengthMark(matraClass)) {
|
||||
fLengthMark = matra;
|
||||
fLengthMarkIndex = matraIndex;
|
||||
} else if (IndicClassTable::isVirama(matraClass)) {
|
||||
fVirama = matra;
|
||||
fViramaIndex = matraIndex;
|
||||
} else if (IndicClassTable::isAlLakuna(matraClass)) {
|
||||
fAlLakuna = matra;
|
||||
fAlLakunaIndex = matraIndex;
|
||||
} else {
|
||||
switch (matraClass & CF_POS_MASK) {
|
||||
case CF_POS_BEFORE:
|
||||
|
@ -133,7 +133,7 @@ public:
|
|||
IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups)
|
||||
: fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage),
|
||||
fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
|
||||
fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fVirama(0), fViramaIndex(0),
|
||||
fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
|
||||
fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
|
||||
fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
|
||||
fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0)
|
||||
|
@ -150,7 +150,7 @@ public:
|
|||
{
|
||||
fSyllableCount += 1;
|
||||
|
||||
fMpre = fMbelow = fMabove = fMpost = fLengthMark = fVirama = 0;
|
||||
fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0;
|
||||
fMPreOutIndex = -1;
|
||||
|
||||
fVMabove = fVMpost = 0;
|
||||
|
@ -255,11 +255,11 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
// Handles virama in Sinhala split vowels.
|
||||
void writeVirama()
|
||||
// Handles Al-Lakuna in Sinhala split vowels.
|
||||
void writeAlLakuna()
|
||||
{
|
||||
if (fVirama != 0) {
|
||||
writeChar(fVirama, fViramaIndex, fMatraFeatures);
|
||||
if (fAlLakuna != 0) {
|
||||
writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -371,20 +371,21 @@ static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap);
|
|||
|
||||
static const le_int8 stateTable[][CC_COUNT] =
|
||||
{
|
||||
// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw
|
||||
{ 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1}, // 0 - ground state
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12}, // 2 - consonant with nukta
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12}, // 3 - consonant
|
||||
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
|
||||
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
|
||||
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1}, // 10 - second part of split vowel
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 11 - independent vowels that can take an iv
|
||||
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
|
||||
// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al
|
||||
{ 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant
|
||||
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
|
||||
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
|
||||
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel
|
||||
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv
|
||||
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant
|
||||
};
|
||||
|
||||
|
||||
|
@ -511,7 +512,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||
}
|
||||
|
||||
output.writeLengthMark();
|
||||
output.writeVirama();
|
||||
output.writeAlLakuna();
|
||||
|
||||
if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
|
||||
output.writeVMabove();
|
||||
|
@ -643,7 +644,8 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||
bcSpan += 1;
|
||||
}
|
||||
|
||||
if (baseConsonant == lastConsonant && bcSpan < markStart && classTable->isVirama(chars[bcSpan])) {
|
||||
if (baseConsonant == lastConsonant && bcSpan < markStart &&
|
||||
(classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) {
|
||||
bcSpan += 1;
|
||||
|
||||
if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) {
|
||||
|
@ -719,7 +721,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
|
|||
}
|
||||
|
||||
output.writeLengthMark();
|
||||
output.writeVirama();
|
||||
output.writeAlLakuna();
|
||||
|
||||
// write reph
|
||||
if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*
|
||||
* (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
|
||||
* (C) Copyright IBM Corp. 1998-2008 - All Rights Reserved
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -37,7 +37,8 @@ U_NAMESPACE_BEGIN
|
|||
#define CC_SPLIT_VOWEL_PIECE_3 12U
|
||||
#define CC_VIRAMA 13U
|
||||
#define CC_ZERO_WIDTH_MARK 14U
|
||||
#define CC_COUNT 15U
|
||||
#define CC_AL_LAKUNA 15U
|
||||
#define CC_COUNT 16U
|
||||
|
||||
// Character class flags
|
||||
#define CF_CLASS_MASK 0x0000FFFFU
|
||||
|
@ -98,6 +99,7 @@ struct IndicClassTable
|
|||
inline le_bool isConsonant(LEUnicode ch) const;
|
||||
inline le_bool isReph(LEUnicode ch) const;
|
||||
inline le_bool isVirama(LEUnicode ch) const;
|
||||
inline le_bool isAlLakuna(LEUnicode ch) const;
|
||||
inline le_bool isNukta(LEUnicode ch) const;
|
||||
inline le_bool isVattu(LEUnicode ch) const;
|
||||
inline le_bool isMatra(LEUnicode ch) const;
|
||||
|
@ -112,6 +114,7 @@ struct IndicClassTable
|
|||
inline static le_bool isConsonant(CharClass charClass);
|
||||
inline static le_bool isReph(CharClass charClass);
|
||||
inline static le_bool isVirama(CharClass charClass);
|
||||
inline static le_bool isAlLakuna(CharClass charClass);
|
||||
inline static le_bool isNukta(CharClass charClass);
|
||||
inline static le_bool isVattu(CharClass charClass);
|
||||
inline static le_bool isMatra(CharClass charClass);
|
||||
|
@ -193,6 +196,11 @@ inline le_bool IndicClassTable::isVirama(CharClass charClass)
|
|||
return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
|
||||
}
|
||||
|
||||
inline le_bool IndicClassTable::isAlLakuna(CharClass charClass)
|
||||
{
|
||||
return (charClass & CF_CLASS_MASK) == CC_AL_LAKUNA;
|
||||
}
|
||||
|
||||
inline le_bool IndicClassTable::isVattu(CharClass charClass)
|
||||
{
|
||||
return (charClass & CF_VATTU) != 0;
|
||||
|
@ -255,6 +263,11 @@ inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
|
|||
return isVirama(getCharClass(ch));
|
||||
}
|
||||
|
||||
inline le_bool IndicClassTable::isAlLakuna(LEUnicode ch) const
|
||||
{
|
||||
return isAlLakuna(getCharClass(ch));
|
||||
}
|
||||
|
||||
inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
|
||||
{
|
||||
return isNukta(getCharClass(ch));
|
||||
|
|
|
@ -123,4 +123,9 @@
|
|||
<test-font name="ANGSA.TTF"/>
|
||||
<test-text>บทที่๑พายุไซโคลนโดโรธีอาศัยอยู่ท่ามกลางทุ่งใหญ่ในแคนซัสกับลุงเฮนรีชาวไร่และป้าเอ็มภรรยาชาวไร่บ้านของพวกเขาหลังเล็กเพราะไม้สร้างบ้านต้องขนมาด้วยเกวียนเป็นระยะทางหลายไมล์</test-text>
|
||||
</test-case>
|
||||
|
||||
<test-case id="Sinhala Al-Lakuna Test" script="sinh">
|
||||
<test-font name="lklug.hj.ttf"/>
|
||||
<test-text>ක්රෙ ක්යෙ ක්ෂෙ ක්ෂ්යෙ ක්ෂෙ කර්මෙ ස්ට්රේ ස්සෙ ස්ස</test-text>
|
||||
</test-case>
|
||||
</layout-tests>
|
48
icu4c/source/test/testdata/letest.xml
vendored
48
icu4c/source/test/testdata/letest.xml
vendored
|
@ -8,7 +8,7 @@
|
|||
UNLESS YOU REALLY KNOW WHAT YOU'RE DOING.
|
||||
|
||||
file name: letest.xml
|
||||
generated on: 05/26/2008 02:49:25 PM Hawaiian Standard Time
|
||||
generated on: 05/27/2008 12:57:14 PM Hawaiian Standard Time
|
||||
generated by: gendata.cpp
|
||||
-->
|
||||
|
||||
|
@ -1406,4 +1406,50 @@
|
|||
</result-positions>
|
||||
</test-case>
|
||||
|
||||
<test-case id="Sinhala Al-Lakuna Test" script="sinh">
|
||||
<test-font name="lklug.hj.ttf" version="Version 0.3 " checksum="0x2A8B3DA2"/>
|
||||
|
||||
<test-text>ක්රෙ ක්යෙ ක්ෂෙ ක්ෂ්යෙ ක්ෂෙ කර්මෙ ස්ට්රේ ස්සෙ ස්ස</test-text>
|
||||
|
||||
<result-glyphs>
|
||||
0x0000004A, 0x000001D3, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x0000004A, 0x00000018,
|
||||
0x00000089, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x0000004A, 0x00000088, 0x0000FFFF, 0x0000FFFF,
|
||||
0x0000FFFF, 0x00000003, 0x0000004A, 0x00000088, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000089,
|
||||
0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000001D4, 0x0000FFFF, 0x0000004A, 0x0000003C, 0x00000003,
|
||||
0x00000018, 0x0000004A, 0x000001F6, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000000A7,
|
||||
0x0000FFFF, 0x0000004A, 0x00000078, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003,
|
||||
0x0000004A, 0x00000201, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000000A7, 0x0000FFFF,
|
||||
0x0000003D
|
||||
</result-glyphs>
|
||||
|
||||
<result-indices>
|
||||
0x00000004, 0x00000000, 0x00000001, 0x00000002, 0x00000003, 0x00000005, 0x0000000A, 0x00000006,
|
||||
0x00000007, 0x00000008, 0x00000009, 0x0000000B, 0x00000010, 0x0000000C, 0x0000000D, 0x0000000E,
|
||||
0x0000000F, 0x00000011, 0x00000019, 0x00000012, 0x00000013, 0x00000014, 0x00000015, 0x00000016,
|
||||
0x00000017, 0x00000018, 0x0000001A, 0x0000001B, 0x0000001C, 0x0000001E, 0x0000001D, 0x0000001F,
|
||||
0x00000020, 0x00000025, 0x00000021, 0x00000022, 0x00000023, 0x00000024, 0x00000026, 0x00000027,
|
||||
0x00000028, 0x0000002D, 0x00000029, 0x0000002A, 0x0000002B, 0x0000002C, 0x0000002D, 0x0000002E,
|
||||
0x00000033, 0x0000002F, 0x00000030, 0x00000031, 0x00000032, 0x00000034, 0x00000035, 0x00000036,
|
||||
0x00000037
|
||||
</result-indices>
|
||||
|
||||
<result-positions>
|
||||
0.000000, 0.000000, 8.520000, 0.000000, 19.224001, 0.000000, 19.224001, 0.000000,
|
||||
19.224001, 0.000000, 19.224001, 0.000000, 26.640001, 0.000000, 35.160004, 0.000000,
|
||||
45.864006, 0.000000, 51.936005, 0.000000, 51.936005, 0.000000, 51.936005, 0.000000,
|
||||
59.352005, 0.000000, 67.872009, 0.000000, 82.704010, 0.000000, 82.704010, 0.000000,
|
||||
82.704010, 0.000000, 82.704010, 0.000000, 90.120010, 0.000000, 98.640015, 0.000000,
|
||||
113.472015, 0.000000, 113.472015, 0.000000, 113.472015, 0.000000, 113.472015, 0.000000,
|
||||
119.544014, 0.000000, 119.544014, 0.000000, 119.544014, 0.000000, 126.960014, 0.000000,
|
||||
137.664017, 0.000000, 137.664017, 0.000000, 146.184021, 0.000000, 154.296021, 0.000000,
|
||||
161.712021, 0.000000, 172.416016, 0.000000, 180.936020, 0.000000, 189.552017, 0.000000,
|
||||
189.552017, 0.000000, 189.552017, 0.000000, 189.552017, 0.000000, 196.968018, 0.000000,
|
||||
205.584015, 0.000000, 205.584015, 0.000000, 214.104019, 0.000000, 222.720016, 0.000000,
|
||||
222.720016, 0.000000, 222.720016, 0.000000, 222.720016, 0.000000, 222.720016, 0.000000,
|
||||
230.136017, 0.000000, 238.656021, 0.000000, 254.784027, 0.000000, 254.784027, 0.000000,
|
||||
254.784027, 0.000000, 254.784027, 0.000000, 262.200012, 0.000000, 270.816010, 0.000000,
|
||||
270.816010, 0.000000, 279.432007, 0.000000
|
||||
</result-positions>
|
||||
</test-case>
|
||||
|
||||
</layout-tests>
|
||||
|
|
Loading…
Add table
Reference in a new issue