diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
index cf1c99d5bc6..9f7237e516b 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
-* $Date: 2004/02/18 03:08:59 $
-* $Revision: 1.11 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.12 $
*
*******************************************************************************
*/
@@ -782,11 +782,11 @@ abstract public class GenerateBreakTest implements UCD_Types {
public boolean isBreak(String source, int offset) {
- setRule("1: sot ");
+ setRule("1: sot ÷");
if (offset < 0 || offset > source.length()) return false;
if (offset == 0) return true;
- setRule("2: eot");
+ setRule("2: ÷ eot");
if (offset == source.length()) return true;
// UTF-16: never break in the middle of a code point
@@ -801,29 +801,29 @@ abstract public class GenerateBreakTest implements UCD_Types {
byte before = getResolvedType(cpBefore);
byte after = getResolvedType(cpAfter);
- setRule("3: CR LF");
+ setRule("3: CR × LF");
if (before == CR && after == LF) return false;
- setRule("4: ( Control | CR | LF ) ");
+ setRule("4: ( Control | CR | LF ) ÷");
if (before == CR || before == LF || before == Control) return true;
- setRule("5: ( Control | CR | LF )");
+ setRule("5: ÷ ( Control | CR | LF )");
if (after == Control || after == LF || after == CR) return true;
- setRule("6: L ( L | V | LV | LVT )");
+ setRule("6: L × ( L | V | LV | LVT )");
if (before == L && (after == L || after == V || after == LV || after == LVT)) return false;
- setRule("7: ( LV | V ) ( V | T )");
+ setRule("7: ( LV | V ) × ( V | T )");
if ((before == LV || before == V) && (after == V || after == T)) return false;
- setRule("8: ( LVT | T ) T");
+ setRule("8: ( LVT | T ) × T");
if ((before == LVT || before == T) && (after == T)) return false;
- setRule("9: Extend");
+ setRule("9: × Extend");
if (after == Extend) return false;
// Otherwise break after all characters.
- setRule("10: Any Any");
+ setRule("10: Any ÷ Any");
return true;
}
@@ -914,12 +914,12 @@ abstract public class GenerateBreakTest implements UCD_Types {
public boolean isBreak(String source, int offset) {
- setRule("1: sot ");
+ setRule("1: sot ÷");
if (offset < 0 || offset > source.length()) return false;
if (offset == 0) return true;
- setRule("2: eot");
+ setRule("2: ÷ eot");
if (offset == source.length()) return true;
// Treat a grapheme cluster as if it were a single character:
@@ -943,43 +943,43 @@ abstract public class GenerateBreakTest implements UCD_Types {
//Don't break between most letters
- setRule("5: ALetter ALetter");
+ setRule("5: ALetter × ALetter");
if (before == ALetter && after == ALetter) return false;
- // Dont break letters across certain punctuation
+ // Don’t break letters across certain punctuation
- setRule("6: ALetter (MidLetter | MidNumLet) ALetter");
+ setRule("6: ALetter × (MidLetter | MidNumLet) ALetter");
if (before == ALetter && (after == MidLetter || after == MidNumLet) && after2 == ALetter) return false;
- setRule("7: ALetter (MidLetter | MidNumLet) ALetter");
+ setRule("7: ALetter (MidLetter | MidNumLet) × ALetter");
if (before2 == ALetter && (before == MidLetter || before == MidNumLet) && after == ALetter) return false;
- // Dont break within sequences of digits, or digits adjacent to letters.
+ // Don’t break within sequences of digits, or digits adjacent to letters.
- setRule("8: Numeric Numeric");
+ setRule("8: Numeric × Numeric");
if (before == Numeric && after == Numeric) return false;
- setRule("9: ALetter Numeric");
+ setRule("9: ALetter × Numeric");
if (before == ALetter && after == Numeric) return false;
- setRule("10: Numeric ALetter");
+ setRule("10: Numeric × ALetter");
if (before == Numeric && after == ALetter) return false;
- // Dont break within sequences like: '-3.2'
- setRule("11: Numeric (MidNum | MidNumLet) Numeric");
+ // Don’t break within sequences like: '-3.2'
+ setRule("11: Numeric (MidNum | MidNumLet) × Numeric");
if (before2 == Numeric && (before == MidNum || before == MidNumLet) && after == Numeric) return false;
- setRule("12: Numeric (MidNum | MidNumLet) Numeric");
+ setRule("12: Numeric × (MidNum | MidNumLet) Numeric");
if (before == Numeric && (after == MidNum || after == MidNumLet) && after2 == Numeric) return false;
// Don't break between Katakana
- setRule("13: Katakana Katakana");
+ setRule("13: Katakana × Katakana");
if (before == Katakana && after == Katakana) return false;
// Otherwise break always.
- setRule("14: Any Any");
+ setRule("14: Any ÷ Any");
return true;
}
@@ -1235,7 +1235,7 @@ abstract public class GenerateBreakTest implements UCD_Types {
// LB 2a Never break at the start of text
- setRule("2a: sot");
+ setRule("2a: × sot");
if (offset <= 0) return false;
// LB 2b Always break at the end of text
@@ -1269,26 +1269,26 @@ abstract public class GenerateBreakTest implements UCD_Types {
//byte after = getResolvedType(cpAfter);
- setRule("3a: CR LF ; ( BK | CR | LF | NL ) !");
+ setRule("3a: CR × LF ; ( BK | CR | LF | NL ) !");
// Always break after hard line breaks (but never between CR and LF).
// CR ^ LF
if (before == LB_CR && after == LB_LF) return false;
if (before == LB_BK || before == LB_LF || before == LB_CR) return true;
- //LB 3b Dont break before hard line breaks.
- setRule("3b: ( BK | CR | LF )");
+ //LB 3b Don’t break before hard line breaks.
+ setRule("3b: × ( BK | CR | LF )");
if (after == LB_BK || after == LB_LF || after == LB_CR) return false;
- // LB 4 Dont break before spaces or zero-width space.
- setRule("4: ( SP | ZW )");
+ // LB 4 Don’t break before spaces or zero-width space.
+ setRule("4: × ( SP | ZW )");
if (after == LB_SP || after == LB_ZW) return false;
// LB 5 Break after zero-width space.
- setRule("5: ZW ");
+ setRule("5: ZW ÷");
if (before == LB_ZW) return true;
- // LB 6 Dont break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
+ // LB 6 Don’t break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
setRule("6: DGC -> FC");
if (!grapheme.isBreak( source, offset)) return false;
@@ -1324,9 +1324,9 @@ abstract public class GenerateBreakTest implements UCD_Types {
if (setBase && backBase == -1) before = LB_AL;
- // LB 8 Dont break before ] or ! or ; or /, even after spaces.
- // CL, EX, IS, SY
- setRule("8: ( CL | EX | IS | SY )");
+ // LB 8 Don’t break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
+ // × CL, × EX, × IS, × SY
+ setRule("8: × ( CL | EX | IS | SY )");
if (after == LB_CL || after == LB_EX || after == LB_SY | after == LB_IS) return false;
@@ -1339,97 +1339,97 @@ abstract public class GenerateBreakTest implements UCD_Types {
}
}
- // LB 9 Dont break after [, even after spaces.
- // OP SP*
- setRule("9: OP SP* ");
+ // LB 9 Don’t break after ‘[’, even after spaces.
+ // OP SP* ×
+ setRule("9: OP SP* ×");
if (lastNonSpace == LB_OP) return false;
- // LB 10 Dont break within [, , even with intervening spaces.
- // QU SP* OP
- setRule("10: QU SP* OP");
+ // LB 10 Don’t break within ‘”[’, , even with intervening spaces.
+ // QU SP* × OP
+ setRule("10: QU SP* × OP");
if (lastNonSpace == LB_QU && after == LB_OP) return false;
- // LB 11 Dont break within ]h, even with intervening spaces.
- // CL SP* NS
- setRule("11: CL SP* NS");
+ // LB 11 Don’t break within ‘]h’, even with intervening spaces.
+ // CL SP* × NS
+ setRule("11: CL SP* × NS");
if (lastNonSpace == LB_CL && after == LB_NS) return false;
- // LB 11a Dont break within , even with intervening spaces.
- // B2 B2
- setRule("11a: B2 B2");
+ // LB 11a Don’t break within ‘——’, even with intervening spaces.
+ // B2 × B2
+ setRule("11a: B2 × B2");
if (lastNonSpace == LB_B2 && after == LB_B2) return false;
- // LB 13 Dont break before or after NBSP or WORD JOINER
- // GL
- // GL
+ // LB 13 Don’t break before or after NBSP or WORD JOINER
+ // × GL
+ // GL ×
- setRule("11b: WJ ; WJ ");
+ setRule("11b: × WJ ; WJ ×");
if (after == LB_WJ || before == LB_WJ) return false;
// [Note: by this time, all of the "X" in the table are accounted for. We can safely break after spaces.]
// LB 12 Break after spaces
- setRule("12: SP ");
+ setRule("12: SP ÷");
if (before == LB_SP) return true;
- // LB 13 Dont break before or after NBSP or WORD JOINER
- setRule("13: GL ; GL ");
+ // LB 13 Don’t break before or after NBSP or WORD JOINER
+ setRule("13: × GL ; GL ×");
if (after == LB_GL || before == LB_GL) return false;
- // LB 14 Dont break before or after
- setRule("14: QU ; QU ");
+ // LB 14 Don’t break before or after ‘”’
+ setRule("14: × QU ; QU ×");
if (before == LB_QU || after == LB_QU) return false;
// LB 14a Break before and after CB
- setRule("14a: CB ; CB ");
+ setRule("14a: ÷ CB ; CB ÷");
if (before == LB_CB || after == LB_CB) return true;
- // LB 15 Dont break before hyphen-minus, other hyphens, fixed-width spaces,
+ // LB 15 Don’t break before hyphen-minus, other hyphens, fixed-width spaces,
// small kana and other non- starters, or after acute accents:
- setRule("15: ( BA | HY | NS ) ; BB ");
+ setRule("15: × ( BA | HY | NS ) ; BB ×");
if (after == LB_NS) return false;
if (after == LB_HY) return false;
if (after == LB_BA) return false;
if (before == LB_BB) return false;
- //setRule("15a: HY NU"); // NEW
+ //setRule("15a: HY × NU"); // NEW
//if (before == LB_HY && after == LB_NU) return false;
- // LB 16 Dont break between two ellipses, or between letters or numbers and ellipsis:
- // Examples: 9..., a..., H...
- setRule("16: ( AL | ID | IN | NU ) IN");
+ // LB 16 Don’t break between two ellipses, or between letters or numbers and ellipsis:
+ // Examples: ’9...’, ‘a...’, ‘H...’
+ setRule("16: ( AL | ID | IN | NU ) × IN");
if ((before == LB_NU || before == LB_AL || before == LB_ID) && after == LB_IN) return false;
if (before == LB_IN && after == LB_IN) return false;
// Don't break alphanumerics.
- // LB 17 Dont break within a9, 3a, or H%
+ // LB 17 Don’t break within ‘a9’, ‘3a’, or ‘H%’
// Numbers are of the form PR ? ( OP | HY ) ? NU (NU | IS) * CL ? PO ?
- // Examples: $(12.35) 2,1234 (12) 12.54
+ // Examples: $(12.35) 2,1234 (12)¢ 12.54¢
// This is approximated with the following rules. (Some cases already handled above,
- // like 9,, [9.)
- setRule("17: ID PO ; AL NU; NU AL");
+ // like ‘9,’, ‘[9’.)
+ setRule("17: ID × PO ; AL × NU; NU × AL");
if (before == LB_ID && after == LB_PO) return false;
if (before == LB_AL && after == LB_NU) return false;
if (before == LB_NU && after == LB_AL) return false;
- // LB 18 Dont break between the following pairs of classes.
- // CL PO
- // HY NU
- // IS NU
- // NU NU
- // NU PO
- // PR AL
- // PR HY
- // PR ID
- // PR NU
- // PR OP
- // SY NU
- // Example pairs: $9, $[, $-, -9, /9, 99, ,9, 9% ]%
+ // LB 18 Don’t break between the following pairs of classes.
+ // CL × PO
+ // HY × NU
+ // IS × NU
+ // NU × NU
+ // NU × PO
+ // PR × AL
+ // PR × HY
+ // PR × ID
+ // PR × NU
+ // PR × OP
+ // SY × NU
+ // Example pairs: ‘$9’, ‘$[’, ‘$-‘, ‘-9’, ‘/9’, ‘99’, ‘,9’, ‘9%’ ‘]%’
- setRule("18: CL PO ; NU PO ; ( IS | NU | HY | PR | SY ) NU ; PR ( AL | HY | ID | OP )");
+ setRule("18: CL × PO ; NU × PO ; ( IS | NU | HY | PR | SY ) × NU ; PR × ( AL | HY | ID | OP )");
if (before == LB_CL && after == LB_PO) return false;
if (before == LB_IS && after == LB_NU) return false;
if (before == LB_NU && after == LB_NU) return false;
@@ -1446,30 +1446,30 @@ abstract public class GenerateBreakTest implements UCD_Types {
if (before == LB_SY && after == LB_NU) return false;
// LB 15b Break after hyphen-minus, and before acute accents:
- setRule("18b: HY ; BB");
+ setRule("18b: HY ÷ ; ÷ BB");
if (before == LB_HY) return true;
if (after == LB_BB) return true;
- // LB 19 Dont break between alphabetics (at)
- // AL AL
+ // LB 19 Don’t break between alphabetics (“at”)
+ // AL × AL
- setRule("19: AL AL");
+ setRule("19: AL × AL");
if (before == LB_AL && after == LB_AL) return false;
// LB 20 Break everywhere else
- // ALL
- // ALL
+ // ALL ÷
+ // ÷ ALL
if (ucd.getCompositeVersion() > 0x040000) {
- setRule("19b: IS AL");
+ setRule("19b: IS × AL");
if (before == LB_IS && after == LB_AL) return false;
}
// LB 20 Break everywhere else
- // ALL
- // ALL
+ // ALL ÷
+ // ÷ ALL
- setRule("20: ALL ; ALL");
+ setRule("20: ALL ÷ ; ÷ ALL");
return true;
}
}
@@ -1498,8 +1498,8 @@ abstract public class GenerateBreakTest implements UCD_Types {
"U.S.A\u0300.",
"3.4",
"c.d",
- "etc.)\u2019\u2018(the",
- "etc.)\u2019\u2018(The",
+ "etc.)\u2019 \u2018(the",
+ "etc.)\u2019 \u2018(The",
"the resp. leaders are",
"\u5B57.\u5B57",
"etc.\u5B83",
@@ -1631,15 +1631,15 @@ abstract public class GenerateBreakTest implements UCD_Types {
public boolean isBreak(String source, int offset) {
// Break at the start and end of text.
- setRule("1: sot ");
+ setRule("1: sot ÷");
if (offset < 0 || offset > source.length()) return false;
if (offset == 0) return true;
- setRule("2: eot");
+ setRule("2: ÷ eot");
if (offset == source.length()) return true;
- setRule("3: Sep ");
+ setRule("3: Sep ÷");
byte beforeChar = getResolvedType(source.charAt(offset-1));
if (beforeChar == Sep) return true;
@@ -1662,22 +1662,22 @@ abstract public class GenerateBreakTest implements UCD_Types {
// HACK COPY for rule collection!
if (collectingRules) {
- setRule("6: ATerm ( Numeric | Lower )");
- setRule("7: Upper ATerm Upper");
- setRule("8: ATerm Close* Sp* ( (OLetter | Upper | Lower) )* Lower");
- setRule("9: ( Term | ATerm ) Close* ( Close | Sp | Sep )");
- setRule("10: ( Term | ATerm ) Close* Sp ( Sp | Sep )");
- setRule("11: ( Term | ATerm ) Close* Sp* ");
- setRule("12: Any Any");
+ setRule("6: ATerm × ( Numeric | Lower )");
+ setRule("7: Upper ATerm × Upper");
+ setRule("8: ATerm Close* Sp* × ( ¬(OLetter | Upper | Lower) )* Lower");
+ setRule("9: ( Term | ATerm ) Close* × ( Close | Sp | Sep )");
+ setRule("10: ( Term | ATerm ) Close* Sp × ( Sp | Sep )");
+ setRule("11: ( Term | ATerm ) Close* Sp* ÷");
+ setRule("12: Any × Any");
collectingRules = false;
}
// Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter, is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase. For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence.
if (before == ATerm) {
- setRule("6: ATerm ( Numeric | Lower )");
+ setRule("6: ATerm × ( Numeric | Lower )");
if (after == Lower || after == Numeric) return false;
- setRule("7: Upper ATerm Upper");
+ setRule("7: Upper ATerm × Upper");
if (DEBUG_GRAPHEMES) System.out.println(context + ", " + Upper);
if (before2 == Upper && after == Upper) return false;
}
@@ -1736,17 +1736,17 @@ abstract public class GenerateBreakTest implements UCD_Types {
if (lookAfter == -1) {
// Otherwise, do not break
- // Any Any (11)
- setRule("12: Any Any");
+ // Any × Any (11)
+ setRule("12: Any × Any");
return false;
}
- // ATerm Close* Sp*(( OLetter))* Lower(8)
+ // ATerm Close* Sp*×(¬( OLetter))* Lower(8)
// Break after sentence terminators, but include closing punctuation, trailing spaces, and (optionally) a paragraph separator.
- // ( Term | ATerm ) Close*( Close | Sp | Sep )(9)
- // ( Term | ATerm ) Close* Sp( Sp | Sep )(10)
- // ( Term | ATerm ) Close* Sp*(11)
+ // ( Term | ATerm ) Close*×( Close | Sp | Sep )(9)
+ // ( Term | ATerm ) Close* Sp×( Sp | Sep )(10)
+ // ( Term | ATerm ) Close* Sp*÷(11)
// We DID find one. Loop to see if the right side is ok.
@@ -1764,16 +1764,16 @@ abstract public class GenerateBreakTest implements UCD_Types {
if (isFirst) {
isFirst = false;
if (lookAfter == ATerm && t == Upper) {
- setRule("8: ATerm Close* Sp* ( (OLetter | Upper | Lower) )* Lower");
+ setRule("8: ATerm Close* Sp* × ( ¬(OLetter | Upper | Lower) )* Lower");
return false;
}
if (gotSpace) {
if (t == Sp || t == Sep) {
- setRule("10: ( Term | ATerm ) Close* Sp ( Sp | Sep )");
+ setRule("10: ( Term | ATerm ) Close* Sp × ( Sp | Sep )");
return false;
}
} else if (t == Close || t == Sp || t == Sep) {
- setRule("9: ( Term | ATerm ) Close* ( Close | Sp | Sep )");
+ setRule("9: ( Term | ATerm ) Close* × ( Close | Sp | Sep )");
return false;
}
if (lookAfter == Term) break;
@@ -1782,12 +1782,12 @@ abstract public class GenerateBreakTest implements UCD_Types {
// at this point, we have an ATerm. All other conditions are ok, but we need to verify 6
if (t != OLetter && t != Upper && t != Lower) continue;
if (t == Lower) {
- setRule("8: ATerm Close* Sp* ( (OLetter | Upper | Lower) )* Lower");
+ setRule("8: ATerm Close* Sp* × ( ¬(OLetter | Upper | Lower) )* Lower");
return false;
}
break;
}
- setRule("11: ( Term | ATerm ) Close* Sp* ");
+ setRule("11: ( Term | ATerm ) Close* Sp* ÷");
return true;
}
}
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
index 3c4fc7a9a65..6816bd49df8 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
-* $Date: 2004/02/07 01:01:15 $
-* $Revision: 1.14 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.15 $
*
*******************************************************************************
*/
@@ -547,18 +547,18 @@ public final class GenerateHanTransliterator implements UCD_Types {
"e", "ei", "er", "en", "eng",
"i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
"u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
- "", "e", "an", "n"
+ "ü", "üe", "üan", "ün"
};
// Don't bother with the following rules; just add w,y to initials
- // When i stands alone, a y will be added before it as yi.
- // If i is the first letter of the syllable it will be changed to y.
- // When u stands alone, a w will be added before it as wu.
- // If u is the first letter of the syllable it will be changed to w. e.g. uang -> wang.
- // When stands alone, a y will be added before it and will be changed to u as yu.
- // If is the first letter of the syllable, then the spelling will be changed to yu. e.g. an -> yuan.
- //Note: The nasal final ueng never occurs after an initial but always form a syllable by itself.
- // The o in iou is hidden, so it will be wrote as iu. But, dont forget to pronounce it.
- // The e in uei is hidden, so it will be wrote as ui. But, dont forget to pronounce it.
+ // When “i” stands alone, a “y” will be added before it as “yi”.
+ // If “i” is the first letter of the syllable it will be changed to “y”.
+ // When “u” stands alone, a “w” will be added before it as “wu”.
+ // If “u” is the first letter of the syllable it will be changed to “w”. e.g. “uang -> wang”.
+ // When “ü” stands alone, a “y” will be added before it and “ü” will be changed to “u” as “yu”.
+ // If “ü” is the first letter of the syllable, then the spelling will be changed to “yu”. e.g. “üan -> yuan”.
+ //Note: The nasal final “ueng” never occurs after an initial but always form a syllable by itself.
+ // The “o” in “iou” is hidden, so it will be wrote as “iu”. But, don’t forget to pronounce it.
+ // The “e” in “uei” is hidden, so it will be wrote as “ui”. But, don’t forget to pronounce it.
public static final String[] pinyin_bopomofo = {
@@ -749,9 +749,9 @@ public final class GenerateHanTransliterator implements UCD_Types {
"long", "\u310c\u3128\u3125",
"lou", "\u310c\u3121",
"lu", "\u310c\u3128",
- "l", "\u310c\u3129",
+ "lü", "\u310c\u3129",
"luan", "\u310c\u3128\u3122",
- "le", "\u310c\u3129\u311d",
+ "lüe", "\u310c\u3129\u311d",
"lun", "\u310c\u3128\u3123",
"luo", "\u310c\u3128\u311b",
"m", "\u3107",
@@ -796,9 +796,9 @@ public final class GenerateHanTransliterator implements UCD_Types {
"nong", "\u310b\u3128\u3125",
"nou", "\u310b\u3121",
"nu", "\u310b\u3128",
- "n", "\u310b\u3129",
+ "nü", "\u310b\u3129",
"nuan", "\u310b\u3128\u3122",
- "ne", "\u310b\u3129\u311d",
+ "nüe", "\u310b\u3129\u311d",
"nuo", "\u310b\u3128\u311b",
"o", "\u311b",
"ou", "\u3121",
@@ -1007,52 +1007,52 @@ public final class GenerateHanTransliterator implements UCD_Types {
}
/*
- U+347C li #lyu
-U+3500 l #lv
-U+3527 li #ly
-U+3729 o #u
-U+380E j #jj
-U+3825 l #lv
-U+3A3C l #lu
-U+3B5A li #ly *** l?
-U+3CB6 l #lv
-U+3D56 ni #ny *** n?
-U+3D88 ling #ling
-U+3EF2 li #ly*** l?
-U+3F94 li #ly*** l?
-U+4071 o #u
-U+40AE li #lyu *** le?
-U+430E li #lyu *** le?
-U+451E li #ly *** l?
-U+4588 n #nu
-U+458B n #nu
-U+45A1 ni #ny *** n?
-U+4610 ni #ny *** n?
-U+46BC ni #ny *** n?
-U+46DA li #lyu *** le?
-U+4896 li #ly *** l?
-U+4923 li #lyu *** le?
-U+4968 li #ly *** l?
-U+4A0B ni #nyu *** ne?
-U+4AC4 chu #chu
-U+4D08 o #u
-U+4D8A ni #ny *** n?
-U+51CA qng #qng
-U+51D6 zhun #zhun *** this is probably zhn
-U+5481 gn #gm
-U+5838 fng #fng
-U+639F l #lu *** this pronunciation surprises me, but I don't know...
-U+66D5 yn #yin
-U+6B3B chu #chu *** chua _is_ ok after all, my table missed an entry
-U+6B56 chu #chu *** chua
-U+6C7C ni #niu
-U+6E6D qi #qiu
-U+6F71 y #yi
-U+7493 xi #xiu
-U+7607 zhng #zhng *** I suspect zhng
-U+7674 lun #ln
-U+7867 yng #ing
-U+7878 n #nu
+ U+347C · liù #lyuè
+U+3500 · lüè #lvè
+U+3527 · liù #lyù
+U+3729 · ào #àu
+U+380E · jí #jjí
+U+3825 · l· #lv·
+U+3A3C · lüè #luè
+U+3B5A · li· #ly· *** lü?
+U+3CB6 · l· #lv·
+U+3D56 · niù #nyù *** nü?
+U+3D88 · li·ng #li·ng
+U+3EF2 · li· #ly·*** lü?
+U+3F94 · li· #ly·*** lü?
+U+4071 · ào #àu
+U+40AE · liù #lyuè *** lüe?
+U+430E · liù #lyuè *** lüe?
+U+451E · liù #lyù *** lü?
+U+4588 · nüè #nuè
+U+458B · nüè #nuè
+U+45A1 · niù #nyù *** nü?
+U+4610 · niù #nyù *** nü?
+U+46BC · niù #nyù *** nü?
+U+46DA · liù #lyuè *** lüe?
+U+4896 · liù #lyù *** lü?
+U+4923 · liù #lyuè *** lüe?
+U+4968 · liù #lyù *** lü?
+U+4A0B · niù #nyuè *** nüe?
+U+4AC4 · chuò #chuà
+U+4D08 · ·o #·u
+U+4D8A · niù #nyù *** nü?
+U+51CA · qíng #qýng
+U+51D6 · zhu·n #zhu·n *** this is probably zh·n
+U+5481 · gàn #gèm
+U+5838 · féng #fúng
+U+639F · lü· #lu· *** this pronunciation surprises me, but I don't know...
+U+66D5 · yàn #yiàn
+U+6B3B · chu· #chu· *** chua _is_ ok after all, my table missed an entry
+U+6B56 · chu· #chu· *** chua
+U+6C7C · ni· #ni·u
+U+6E6D · qiú #qióu
+U+6F71 · y· #yi·
+U+7493 · xiù #xiòu
+U+7607 · zh·ng #zh·ng *** I suspect zh·ng
+U+7674 · luán #lüán
+U+7867 · y·ng #i·ng
+U+7878 · nüè #nuè
*/
static Transliterator fixTypos = Transliterator.createFromRules("fix_typos",
@@ -1061,12 +1061,12 @@ U+7878
+"$cons{iou}$nlet > iu;"
+"$cons{em}$nlet > an;"
+"$cons{uen}$nlet > ueng;"
- +"$cons{ve}$nlet > e;"
- +"$cons{v}$nlet > ;"
+ +"$cons{ve}$nlet > üe;"
+ +"$cons{v}$nlet > ü;"
+"$cons{yue}$nlet > iu;"
+"$cons{yng}$nlet > ing;"
+"$cons{yu}$nlet > iu;"
- //+"$cons{ue} > e;"
+ //+"$cons{ue} > üe;"
+"jj > j;"
//+"$nlet{ng}$nlet > eng;"
//+"$nlet{n}$nlet > en;"
@@ -1076,13 +1076,13 @@ U+7878
// new fixes
+"zhueng}$nlet > zhong;"
+"zhuen}$nlet > zhuan;"
- +"lue > le;"
+ +"lue > lüe;"
+"liong > liang;"
- +"nue > ne;"
+ +"nue > nüe;"
+"chua > chuo;"
+"yian > yan;"
+"yie > ye;"
- +"lan > luan;"
+ +"lüan > luan;"
+"iong > yong;"
, Transliterator.FORWARD);
@@ -1113,7 +1113,7 @@ U+7878
try {
// chinese_frequency.txt
- // 1 的 1588561 1588561 3.5008%
+ // 1 çš„ 1588561 1588561 3.5008%
// japanese_frequency.txt
// 1 ? 17176
@@ -1421,7 +1421,7 @@ U+7878
@Unihan Data
Bad pinyin data: \u4E7F ? LE
-\u7684 ? de, de, d, d
+\u7684 ? de, de, dí, dì
*/
static void fixChineseOverrides() throws IOException {
@@ -2024,7 +2024,7 @@ Bad pinyin data: \u4E7F ? LE
+ "# otherwise 'o'\n"
+ "# otherwise last vowel\n"
+ "::NFC;\n"
- + "$vowel = [aAeEiIoOuU];\n"
+ + "$vowel = [aAeEiIoOuUüÜ];\n"
+ "$consonant = [[a-z A-Z] - [$vowel]];\n"
+ "$digit = [1-5];\n"
+ "([aAeE]) ($vowel* $consonant*) ($digit) > $1 &digit-tone($3) $2;\n"
@@ -2054,10 +2054,10 @@ Bad pinyin data: \u4E7F ? LE
if (i > 0) {
char last = result.charAt(result.length()-1);
if (last == 'u') {
- result.setCharAt(result.length()-1, '');
+ result.setCharAt(result.length()-1, 'ü');
continue main;
} else if (last == 'U') {
- result.setCharAt(result.length()-1, '');
+ result.setCharAt(result.length()-1, 'Ü');
continue main;
}
}
@@ -2085,22 +2085,22 @@ Bad pinyin data: \u4E7F ? LE
for (int i = source.length()-2; i >= 0; --i) {
ch = source.charAt(i);
if (ch == ':') {
- ch = '';
+ ch = 'Ü';
--i;
}
if ('0' <= ch && ch <= '9') break;
- if (ch != '' && (ch < 'A' || ch > 'Z')) {
+ if (ch != 'Ü' && (ch < 'A' || ch > 'Z')) {
Utility.fixDot();
System.out.println("Warning: non-ASCII in " + hex.transliterate(source) + " (" + hex.transliterate(debugLine) + ")");
break;
}
if (!gotIt) switch (ch) {
- case 'A': ch = "A\u0102\u0100".charAt(num); gotIt = true; break;
- case 'E': ch = "E\u0114\u0112".charAt(num); gotIt = true; break;
- case 'I': ch = "I\u012C\u012A".charAt(num); gotIt = true; break;
- case 'O': ch = "O\u014E\u014C".charAt(num); gotIt = true; break;
- case 'U': ch = "U\u016C\u016A".charAt(num); gotIt = true; break;
- case '': ch = "\u01D7\u01D9\u01DB\u01D5".charAt(num); gotIt = true; break;
+ case 'A': ch = "AÁ\u0102À\u0100".charAt(num); gotIt = true; break;
+ case 'E': ch = "EÉ\u0114È\u0112".charAt(num); gotIt = true; break;
+ case 'I': ch = "IÍ\u012CÌ\u012A".charAt(num); gotIt = true; break;
+ case 'O': ch = "OÓ\u014EÒ\u014C".charAt(num); gotIt = true; break;
+ case 'U': ch = "UÚ\u016CÙ\u016A".charAt(num); gotIt = true; break;
+ case 'Ü': ch = "Ü\u01D7\u01D9\u01DB\u01D5".charAt(num); gotIt = true; break;
}
handlePinyinTemp.insert(0,ch);
}
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
index fbb451d2465..f77a3760400 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java,v $
-* $Date: 2004/02/07 01:01:15 $
-* $Revision: 1.4 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.5 $
*
*******************************************************************************
*/
@@ -398,23 +398,23 @@ public class GenerateLineBreakTest implements UCD_Types {
if (before == LB_CR && after == LB_LF) return false;
if (before == LB_BK || before == LB_LF || before == LB_CR) return true;
- //LB 3b Dont break before hard line breaks.
+ //LB 3b Don’t break before hard line breaks.
rule="3b";
if (after == LB_BK || after == LB_LF | after == LB_CR) return false;
- // LB 4 Dont break before spaces or zero-width space.
- // SP
- // ZW
+ // LB 4 Don’t break before spaces or zero-width space.
+ // × SP
+ // × ZW
rule="4";
if (after == LB_SP || after == LB_ZW) return false;
// LB 5 Break after zero-width space.
- // ZW
+ // ZW ÷
rule="5";
if (before == LB_ZW) return true;
- // LB 6 Dont break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
+ // LB 6 Don’t break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
rule="6";
if (after == LB_CM) return false;
@@ -441,8 +441,8 @@ public class GenerateLineBreakTest implements UCD_Types {
rule="7";
if (setBase && before == LB_SP) before = LB_ID;
- // LB 8 Dont break before ] or ! or ; or /, even after spaces.
- // CL, EX, IS, SY
+ // LB 8 Don’t break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
+ // × CL, × EX, × IS, × SY
rule="8";
if (after == LB_CL || after == LB_EX || after == LB_SY | after == LB_IS) return false;
@@ -456,31 +456,31 @@ public class GenerateLineBreakTest implements UCD_Types {
}
}
- // LB 9 Dont break after [, even after spaces.
- // OP SP*
+ // LB 9 Don’t break after ‘[’, even after spaces.
+ // OP SP* ×
rule="9";
if (lastNonSpace == LB_OP) return false;
- // LB 10 Dont break within [, , even with intervening spaces.
- // QU SP* OP
+ // LB 10 Don’t break within ‘”[’, , even with intervening spaces.
+ // QU SP* × OP
rule="10";
if (lastNonSpace == LB_QU && after == LB_OP) return false;
- // LB 11 Dont break within ]h, even with intervening spaces.
- // CL SP* NS
+ // LB 11 Don’t break within ‘]h’, even with intervening spaces.
+ // CL SP* × NS
rule="11";
if (lastNonSpace == LB_CL && after == LB_NS) return false;
- // LB 11a Dont break within , even with intervening spaces.
- // B2 B2
+ // LB 11a Don’t break within ‘——’, even with intervening spaces.
+ // B2 × B2
rule="11a";
if (lastNonSpace == LB_B2 && after == LB_B2) return false;
if (recommended) {
- // LB 13 Dont break before or after NBSP or WORD JOINER
- // GL
- // GL
+ // LB 13 Don’t break before or after NBSP or WORD JOINER
+ // × GL
+ // GL ×
rule="11b";
if (after == LB_GL || before == LB_GL) return false;
@@ -490,36 +490,36 @@ public class GenerateLineBreakTest implements UCD_Types {
rule="12";
// LB 12 Break after spaces
- // SP
+ // SP ÷
if (before == LB_SP) return true;
if (!recommended) {
- // LB 13 Dont break before or after NBSP or WORD JOINER
- // GL
- // GL
+ // LB 13 Don’t break before or after NBSP or WORD JOINER
+ // × GL
+ // GL ×
rule="13";
if (after == LB_GL || before == LB_GL) return false;
}
rule="14";
- // LB 14 Dont break before or after
- // QU
- // QU
+ // LB 14 Don’t break before or after ‘”’
+ // × QU
+ // QU ×
if (before == LB_QU || after == LB_QU) return false;
- // LB 15 Dont break before hyphen-minus, other hyphens, fixed-width spaces,
+ // LB 15 Don’t break before hyphen-minus, other hyphens, fixed-width spaces,
// small kana and other non- starters, or after acute accents:
- // BA
- // HY
- // NS
- // BB
+ // × BA
+ // × HY
+ // × NS
+ // BB ×
if (recommended) {
// LB 14a Break before and after CB
- // CB
- // CB
+ // CB ÷
+ // ÷ CB
if (before == LB_CB || after == LB_CB) return true;
}
@@ -532,51 +532,51 @@ public class GenerateLineBreakTest implements UCD_Types {
if (!recommended) {
// LB 15b Break after hyphen-minus, and before acute accents:
- // HY
- // BB
+ // HY ÷
+ // ÷ BB
rule="15b";
if (before == LB_HY) return true;
if (after == LB_BB) return true;
}
- // LB 16 Dont break between two ellipses, or between letters or numbers and ellipsis:
- // AL IN
- // ID IN
- // IN IN
- // NU IN
- // Examples: 9..., a..., H...
+ // LB 16 Don’t break between two ellipses, or between letters or numbers and ellipsis:
+ // AL × IN
+ // ID × IN
+ // IN × IN
+ // NU × IN
+ // Examples: ’9...’, ‘a...’, ‘H...’
rule="16";
if ((before == LB_NU || before == LB_AL || before == LB_ID) && after == LB_IN) return false;
if (before == LB_IN && after == LB_IN) return false;
// Don't break alphanumerics.
- // LB 17 Dont break within a9, 3a, or H%
- // ID PO
- // AL NU
- // NU AL
+ // LB 17 Don’t break within ‘a9’, ‘3a’, or ‘H%’
+ // ID × PO
+ // AL × NU
+ // NU × AL
// Numbers are of the form PR ? ( OP | HY ) ? NU (NU | IS) * CL ? PO ?
- // Examples: $(12.35) 2,1234 (12) 12.54
+ // Examples: $(12.35) 2,1234 (12)¢ 12.54¢
// This is approximated with the following rules. (Some cases already handled above,
- // like 9,, [9.)
+ // like ‘9,’, ‘[9’.)
rule="17";
if (before == LB_ID && after == LB_PO) return false;
if (before == LB_AL && after == LB_NU) return false;
if (before == LB_NU && after == LB_AL) return false;
- // LB 18 Dont break between the following pairs of classes.
- // CL PO
- // HY NU
- // IS NU
- // NU NU
- // NU PO
- // PR AL
- // PR HY
- // PR ID
- // PR NU
- // PR OP
- // SY NU
- // Example pairs: $9, $[, $-, -9, /9, 99, ,9, 9% ]%
+ // LB 18 Don’t break between the following pairs of classes.
+ // CL × PO
+ // HY × NU
+ // IS × NU
+ // NU × NU
+ // NU × PO
+ // PR × AL
+ // PR × HY
+ // PR × ID
+ // PR × NU
+ // PR × OP
+ // SY × NU
+ // Example pairs: ‘$9’, ‘$[’, ‘$-‘, ‘-9’, ‘/9’, ‘99’, ‘,9’, ‘9%’ ‘]%’
rule="18";
if (before == LB_CL && after == LB_PO) return false;
@@ -595,23 +595,23 @@ public class GenerateLineBreakTest implements UCD_Types {
if (recommended) {
// LB 15b Break after hyphen-minus, and before acute accents:
- // HY
- // BB
+ // HY ÷
+ // ÷ BB
rule="18b";
if (before == LB_HY) return true;
if (after == LB_BB) return true;
}
- // LB 19 Dont break between alphabetics (at)
- // AL AL
+ // LB 19 Don’t break between alphabetics (“at”)
+ // AL × AL
rule="19";
if (before == LB_AL && after == LB_AL) return false;
// LB 20 Break everywhere else
- // ALL
- // ALL
+ // ALL ÷
+ // ÷ ALL
rule="20";
return true;
@@ -754,7 +754,7 @@ public class GenerateLineBreakTest implements UCD_Types {
// Do not break between linking characters and letters, or before linking characters. This provides for Indic graphemes, where virama (halant) will link character clusters together.
rule = "12";
- //Link Extend* LetterBase (12)
+ //Link Extend* × LetterBase (12)
if (after == LetterBase || after == L || after == V || after == T || after == LV || after == LVT) {
int backOffset = findLastNon(source, offset, Extend, recommended);
if (backOffset >= 0) {
diff --git a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
index f175b5e531e..9e272ecfdcb 100644
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
@@ -1169,6 +1169,7 @@ public class MakeUnicodeFiles {
String line = in.readLine();
if (line == null) break;
if (line.startsWith("\uFEFF")) line = line.substring(1);
+ out.println(line);
line = line.trim();
int pos = line.indexOf('#');
if (pos >= 0) line = line.substring(0,pos).trim();
@@ -1232,9 +1233,9 @@ public class MakeUnicodeFiles {
break;
default: throw new IllegalArgumentException("Internal Error");
}
- out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH) + ":\t" + line);
if (ok) continue;
out.println();
+ out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
out.println("**** START Error Info ****");
bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
out.println("**** END Error Info ****");
diff --git a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
index beedcc1dd69..163bf2de87a 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
-* $Date: 2004/02/06 18:30:20 $
-* $Revision: 1.15 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.16 $
*
*******************************************************************************
*/
@@ -22,7 +22,7 @@ import com.ibm.text.utility.*;
/**
* Implements Unicode Normalization Forms C, D, KC, KD.
* See UTR#15 for details.
- * Copyright 1998-1999 Unicode, Inc. All Rights Reserved.
+ * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.
* The Unicode Consortium makes no expressed or implied warranty of any
* kind, and assumes no liability for errors or omissions.
* No liability is assumed for incidental and consequential damages
diff --git a/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java b/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
index acd52ebe4bf..9c780e844e4 100644
--- a/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
@@ -10,7 +10,7 @@ import com.ibm.text.utility.*;
/**
* Implements Unicode Normalization Forms C, D, KC, KD.
* See UTR#15 for details.
- * Copyright 1998-1999 Unicode, Inc. All Rights Reserved.
+ * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.
* The Unicode Consortium makes no expressed or implied warranty of any
* kind, and assumes no liability for errors or omissions.
* No liability is assumed for incidental and consequential damages
diff --git a/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
index bc101f29ef0..6d587175267 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
@@ -41,6 +41,8 @@
#$East_Asian_Width:Neutral ? $GC:Uppercase_Letter
$GC:Zs ? $Name:«.*SPACE.*»
+[$script:greek&$gc:«.*letter.*»] = [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126]
+
# Examples of parsing errors
# $LBA:Neutral = $GC:Zp # example of non-existant property
@@ -54,7 +56,35 @@ $Line_Break:Unknown = [$General_Category:Unassigned $GeneralCategory:PrivateUse]
$LB:OP = $GC:Ps
$General_Category:Decimal_Number = $Numeric_Type:Decimal
$Whitespace ⊃ [$GC:Zs $GC:Zp $GC:Zl]
+
+# Comparisons across versions
+
$ID_Start ⊇ $×ID_Start
$ID_Continue ⊇ $×ID_Continue
+#$age:4.0.1 = $age4.0.0
+# Derivations
+
+$Math = [$GC:Sm $Other_Math]
+$Alphabetic = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_Alphabetic]
+$Lowercase = [$GC:Ll $Other_Lowercase]
+$Uppercase = [$GC:Lu $Other_Uppercase]
+$ID_Start = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_ID_Start]
+$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc]
+$Default_Ignorable_Code_Point = [[$Other_Default_Ignorable_Code_Point $GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]
+$Grapheme_Extend = [$GC:Me $GC:Mn $Other_Grapheme_Extend]
+$Grapheme_Base = [^$GC:Cc $GC:Cf $GC:Cs $GC:Co $GC:Cn $GC:Zl $GC:Zp $Grapheme_Extend]
+
+# "Minimal" Other_: NOT hard requirements; just if we want to be minimal
+
+$Other_Math = [$Math - $GC:Sm]
+$Other_Alphabetic = [$Alphabetic - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
+$Other_Lowercase = [$Lowercase - $GC:Ll]
+$Other_Uppercase = [$Uppercase - $GC:Lu]
+$Other_ID_Start = [$ID_Start - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
+$Other_Default_Ignorable_Code_Point = [$Default_Ignorable_Code_Point - [[$GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]]
+$Other_Grapheme_Extend = [$Grapheme_Extend - [$GC:Me $GC:Mn]]
+
+# Testing
+$script:greek = $×script:greek
diff --git a/tools/unicodetools/com/ibm/text/utility/UTF32.java b/tools/unicodetools/com/ibm/text/utility/UTF32.java
index 5e34251930c..138abbcfa15 100644
--- a/tools/unicodetools/com/ibm/text/utility/UTF32.java
+++ b/tools/unicodetools/com/ibm/text/utility/UTF32.java
@@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF32.java,v $
-* $Date: 2001/08/31 00:19:16 $
-* $Revision: 1.2 $
+* $Date: 2004/04/17 18:21:38 $
+* $Revision: 1.3 $
*
*******************************************************************************
*/
@@ -53,28 +53,28 @@ package com.ibm.text.utility;
// iteration forwards: Original for (int i = 0; i < s.length(); ++i) { -char ch = s.charAt(i); -doSomethingWith(ch); + char ch = s.charAt(i); + doSomethingWith(ch); } // iteration forwards: Changes for UTF-32 int ch; for (int i = 0; i < s.length(); i+=UTF32.count16(ch)) { -ch = UTF32.char32At(s,i); -doSomethingWith(ch); + ch = UTF32.char32At(s,i); + doSomethingWith(ch); } // iteration backwards: Original for (int i = s.length()-1; i >= 0; --i) { -char ch = s.charAt(i); -doSomethingWith(ch); + char ch = s.charAt(i); + doSomethingWith(ch); } // iteration backwards: Changes for UTF-32 int ch; for (int i = s.length()-1; i > 0; i-=UTF32.count16(ch)) { -ch = UTF32.char32At(s,i); -doSomethingWith(ch); + ch = UTF32.char32At(s,i); + doSomethingWith(ch); } *