ICU-6232 Add character class for Al-Lakuna, fVirama -> fAllakuna, handle Al-Lakuna in state table, remove MPRE_FIXUPS from SINH_SCRIPT_FLAGS, add Al-Lakuna test cases.

X-SVN-Rev: 23999
This commit is contained in:
Eric Mader 2008-05-27 22:44:40 +00:00
parent 6eed22616f
commit 0c67eefa93
5 changed files with 101 additions and 34 deletions

View file

@ -48,6 +48,7 @@ U_NAMESPACE_BEGIN
#define _m2 (CC_SPLIT_VOWEL_PIECE_2 | CF_LENGTH_MARK)
#define _m3 (CC_SPLIT_VOWEL_PIECE_3 | CF_LENGTH_MARK)
#define _vr (CC_VIRAMA)
#define _al (CC_AL_LAKUNA)
// split matras
#define _s1 (_dv | _x1)
@ -206,7 +207,7 @@ static const IndicClassTable::CharClass sinhCharClasses[] =
_iv, _iv, _iv, _iv, _iv, _iv, _iv, _xx, _xx, _xx, _ct, _ct, _ct, _ct, _ct, _ct, // 0D90 - 0D9F
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, // 0DA0 - 0DAF
_ct, _ct, _xx, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _ct, _xx, _xx, // 0DB0 - 0DBF
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _vr, _xx, _xx, _xx, _xx, _dr, // 0DC0 - 0DCF
_ct, _ct, _ct, _ct, _ct, _ct, _ct, _xx, _xx, _xx, _al, _xx, _xx, _xx, _xx, _dr, // 0DC0 - 0DCF
_dr, _dr, _da, _da, _db, _xx, _db, _xx, _dr, _dl, _s1, _dl, _s2, _s3, _s4, _dr, // 0DD0 - 0DDF
_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0DE0 - 0DEF
_xx, _xx, _dr, _dr, _xx // 0DF0 - 0DF4
@ -229,7 +230,7 @@ static const SplitMatra kndaSplitTable[] = {{0x0CBF, 0x0CD5}, {0x0CC6, 0x0CD5},
static const SplitMatra mlymSplitTable[] = {{0x0D46, 0x0D3E}, {0x0D47, 0x0D3E}, {0x0D46, 0x0D57}};
static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF,0x0DCA},
static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF, 0x0DCA},
{0x0DD9, 0x0DDF}};
//
// Script Flags
@ -248,7 +249,7 @@ static const SplitMatra sinhSplitTable[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF},
#define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
#define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
#define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT /*| SF_FILTER_ZERO_WIDTH*/)
#define SINH_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT)
#define SINH_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)
//
// Indic Class Tables

View file

@ -73,8 +73,8 @@ private:
LEUnicode fLengthMark;
le_int32 fLengthMarkIndex;
LEUnicode fVirama;
le_int32 fViramaIndex;
LEUnicode fAlLakuna;
le_int32 fAlLakunaIndex;
FeatureMask fMatraFeatures;
@ -97,9 +97,9 @@ private:
if (IndicClassTable::isLengthMark(matraClass)) {
fLengthMark = matra;
fLengthMarkIndex = matraIndex;
} else if (IndicClassTable::isVirama(matraClass)) {
fVirama = matra;
fViramaIndex = matraIndex;
} else if (IndicClassTable::isAlLakuna(matraClass)) {
fAlLakuna = matra;
fAlLakunaIndex = matraIndex;
} else {
switch (matraClass & CF_POS_MASK) {
case CF_POS_BEFORE:
@ -133,7 +133,7 @@ public:
IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups)
: fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage),
fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fVirama(0), fViramaIndex(0),
fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups),
fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0)
@ -150,7 +150,7 @@ public:
{
fSyllableCount += 1;
fMpre = fMbelow = fMabove = fMpost = fLengthMark = fVirama = 0;
fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0;
fMPreOutIndex = -1;
fVMabove = fVMpost = 0;
@ -255,11 +255,11 @@ public:
}
}
// Handles virama in Sinhala split vowels.
void writeVirama()
// Handles Al-Lakuna in Sinhala split vowels.
void writeAlLakuna()
{
if (fVirama != 0) {
writeChar(fVirama, fViramaIndex, fMatraFeatures);
if (fAlLakuna != 0) {
writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures);
}
}
@ -371,20 +371,21 @@ static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap);
static const le_int8 stateTable[][CC_COUNT] =
{
// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw
{ 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1}, // 0 - ground state
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12}, // 2 - consonant with nukta
{-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12}, // 3 - consonant
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1}, // 10 - second part of split vowel
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 11 - independent vowels that can take an iv
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
// xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al
{ 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta
{-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
{-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel
{-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel
{-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv
{-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant
};
@ -511,7 +512,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
}
output.writeLengthMark();
output.writeVirama();
output.writeAlLakuna();
if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {
output.writeVMabove();
@ -643,7 +644,8 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
bcSpan += 1;
}
if (baseConsonant == lastConsonant && bcSpan < markStart && classTable->isVirama(chars[bcSpan])) {
if (baseConsonant == lastConsonant && bcSpan < markStart &&
(classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) {
bcSpan += 1;
if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) {
@ -719,7 +721,7 @@ le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le
}
output.writeLengthMark();
output.writeVirama();
output.writeAlLakuna();
// write reph
if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) {

View file

@ -1,6 +1,6 @@
/*
*
* (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
* (C) Copyright IBM Corp. 1998-2008 - All Rights Reserved
*
*/
@ -37,7 +37,8 @@ U_NAMESPACE_BEGIN
#define CC_SPLIT_VOWEL_PIECE_3 12U
#define CC_VIRAMA 13U
#define CC_ZERO_WIDTH_MARK 14U
#define CC_COUNT 15U
#define CC_AL_LAKUNA 15U
#define CC_COUNT 16U
// Character class flags
#define CF_CLASS_MASK 0x0000FFFFU
@ -98,6 +99,7 @@ struct IndicClassTable
inline le_bool isConsonant(LEUnicode ch) const;
inline le_bool isReph(LEUnicode ch) const;
inline le_bool isVirama(LEUnicode ch) const;
inline le_bool isAlLakuna(LEUnicode ch) const;
inline le_bool isNukta(LEUnicode ch) const;
inline le_bool isVattu(LEUnicode ch) const;
inline le_bool isMatra(LEUnicode ch) const;
@ -112,6 +114,7 @@ struct IndicClassTable
inline static le_bool isConsonant(CharClass charClass);
inline static le_bool isReph(CharClass charClass);
inline static le_bool isVirama(CharClass charClass);
inline static le_bool isAlLakuna(CharClass charClass);
inline static le_bool isNukta(CharClass charClass);
inline static le_bool isVattu(CharClass charClass);
inline static le_bool isMatra(CharClass charClass);
@ -193,6 +196,11 @@ inline le_bool IndicClassTable::isVirama(CharClass charClass)
return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
}
inline le_bool IndicClassTable::isAlLakuna(CharClass charClass)
{
return (charClass & CF_CLASS_MASK) == CC_AL_LAKUNA;
}
inline le_bool IndicClassTable::isVattu(CharClass charClass)
{
return (charClass & CF_VATTU) != 0;
@ -255,6 +263,11 @@ inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
return isVirama(getCharClass(ch));
}
inline le_bool IndicClassTable::isAlLakuna(LEUnicode ch) const
{
return isAlLakuna(getCharClass(ch));
}
inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
{
return isNukta(getCharClass(ch));

View file

@ -123,4 +123,9 @@
<test-font name="ANGSA.TTF"/>
<test-text>บทที่๑พายุไซโคลนโดโรธีอาศัยอยู่ท่ามกลางทุ่งใหญ่ในแคนซัสกับลุงเฮนรีชาวไร่และป้าเอ็มภรรยาชาวไร่บ้านของพวกเขาหลังเล็กเพราะไม้สร้างบ้านต้องขนมาด้วยเกวียนเป็นระยะทางหลายไมล์</test-text>
</test-case>
<test-case id="Sinhala Al-Lakuna Test" script="sinh">
<test-font name="lklug.hj.ttf"/>
<test-text>ක්‍රෙ ක්‍යෙ ක්‍ෂෙ ක්‍ෂ්‍යෙ ක්ෂෙ කර්‍මෙ ස්ට්‍රේ ස‍්සෙ ස්ස</test-text>
</test-case>
</layout-tests>

View file

@ -8,7 +8,7 @@
UNLESS YOU REALLY KNOW WHAT YOU'RE DOING.
file name: letest.xml
generated on: 05/26/2008 02:49:25 PM Hawaiian Standard Time
generated on: 05/27/2008 12:57:14 PM Hawaiian Standard Time
generated by: gendata.cpp
-->
@ -1406,4 +1406,50 @@
</result-positions>
</test-case>
<test-case id="Sinhala Al-Lakuna Test" script="sinh">
<test-font name="lklug.hj.ttf" version="Version 0.3 " checksum="0x2A8B3DA2"/>
<test-text>ක්‍රෙ ක්‍යෙ ක්‍ෂෙ ක්‍ෂ්‍යෙ ක්ෂෙ කර්‍මෙ ස්ට්‍රේ ස‍්සෙ ස්ස</test-text>
<result-glyphs>
0x0000004A, 0x000001D3, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x0000004A, 0x00000018,
0x00000089, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x0000004A, 0x00000088, 0x0000FFFF, 0x0000FFFF,
0x0000FFFF, 0x00000003, 0x0000004A, 0x00000088, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000089,
0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000001D4, 0x0000FFFF, 0x0000004A, 0x0000003C, 0x00000003,
0x00000018, 0x0000004A, 0x000001F6, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000000A7,
0x0000FFFF, 0x0000004A, 0x00000078, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003,
0x0000004A, 0x00000201, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000003, 0x000000A7, 0x0000FFFF,
0x0000003D
</result-glyphs>
<result-indices>
0x00000004, 0x00000000, 0x00000001, 0x00000002, 0x00000003, 0x00000005, 0x0000000A, 0x00000006,
0x00000007, 0x00000008, 0x00000009, 0x0000000B, 0x00000010, 0x0000000C, 0x0000000D, 0x0000000E,
0x0000000F, 0x00000011, 0x00000019, 0x00000012, 0x00000013, 0x00000014, 0x00000015, 0x00000016,
0x00000017, 0x00000018, 0x0000001A, 0x0000001B, 0x0000001C, 0x0000001E, 0x0000001D, 0x0000001F,
0x00000020, 0x00000025, 0x00000021, 0x00000022, 0x00000023, 0x00000024, 0x00000026, 0x00000027,
0x00000028, 0x0000002D, 0x00000029, 0x0000002A, 0x0000002B, 0x0000002C, 0x0000002D, 0x0000002E,
0x00000033, 0x0000002F, 0x00000030, 0x00000031, 0x00000032, 0x00000034, 0x00000035, 0x00000036,
0x00000037
</result-indices>
<result-positions>
0.000000, 0.000000, 8.520000, 0.000000, 19.224001, 0.000000, 19.224001, 0.000000,
19.224001, 0.000000, 19.224001, 0.000000, 26.640001, 0.000000, 35.160004, 0.000000,
45.864006, 0.000000, 51.936005, 0.000000, 51.936005, 0.000000, 51.936005, 0.000000,
59.352005, 0.000000, 67.872009, 0.000000, 82.704010, 0.000000, 82.704010, 0.000000,
82.704010, 0.000000, 82.704010, 0.000000, 90.120010, 0.000000, 98.640015, 0.000000,
113.472015, 0.000000, 113.472015, 0.000000, 113.472015, 0.000000, 113.472015, 0.000000,
119.544014, 0.000000, 119.544014, 0.000000, 119.544014, 0.000000, 126.960014, 0.000000,
137.664017, 0.000000, 137.664017, 0.000000, 146.184021, 0.000000, 154.296021, 0.000000,
161.712021, 0.000000, 172.416016, 0.000000, 180.936020, 0.000000, 189.552017, 0.000000,
189.552017, 0.000000, 189.552017, 0.000000, 189.552017, 0.000000, 196.968018, 0.000000,
205.584015, 0.000000, 205.584015, 0.000000, 214.104019, 0.000000, 222.720016, 0.000000,
222.720016, 0.000000, 222.720016, 0.000000, 222.720016, 0.000000, 222.720016, 0.000000,
230.136017, 0.000000, 238.656021, 0.000000, 254.784027, 0.000000, 254.784027, 0.000000,
254.784027, 0.000000, 254.784027, 0.000000, 262.200012, 0.000000, 270.816010, 0.000000,
270.816010, 0.000000, 279.432007, 0.000000
</result-positions>
</test-case>
</layout-tests>