From b8870b2691155cd1dc67244fa6303c653511bdae Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Fri, 16 Apr 2004 14:16:48 +0000 Subject: [PATCH] ICU-3170 fixed the roundtrip test and two greek transliterators. Excluded some characters; added rules for others. X-SVN-Rev: 14994 --- .../icu/dev/test/translit/RoundTripTest.java | 15 ++++++++----- .../impl/data/Transliterator_Greek_Latin.txt | 21 ++++++++++++------- .../Transliterator_Greek_Latin_UNGEGN.txt | 20 ++++++++++++------ 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java index ef69175c8f6..500a374f7da 100755 --- a/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java +++ b/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java @@ -160,16 +160,21 @@ public class RoundTripTest extends TestFmwk { String getGreekSet() { // Time bomb - return isICU28() ? - "[[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]&[:Age=3.2:]]" : - "[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]"; + return + // isICU28() ? "[[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]&[:Age=3.2:]]" : + "[\u003B\u00B7[[:Greek:]&[:Letter:]]-[" + + "\u1D26-\u1D2A" + // L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI + "\u1D5D-\u1D61" + // Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI + "\u1D66-\u1D6A" + // L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI + "\u03D7-\u03EF" + // \N{GREEK KAI SYMBOL}..\N{COPTIC SMALL LETTER DEI} + "]]"; } - + public void TestGreek() throws IOException, ParseException { long start = System.currentTimeMillis(); new Test("Latin-Greek", 50) .test("[a-zA-Z]", getGreekSet(), - "[\u00B5\u037A\u03D0-\u03F5]", /* roundtrip exclusions */ + "[\u00B5\u037A\u03D0-\u03F5\u03F9]", /* roundtrip exclusions */ this, new LegalGreek(true)); showElapsed(start, "TestGreek"); } diff --git a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt index c2401282455..75f28271da8 100755 --- a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt +++ b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin.txt @@ -3,14 +3,14 @@ # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- # $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Greek_Latin.txt,v $ -# $Date: 2002/07/21 08:39:23 $ -# $Revision: 1.21 $ +# $Date: 2004/04/16 14:16:48 $ +# $Revision: 1.22 $ #-------------------------------------------------------------------- # Rules are predicated on running NFD first, and NFC afterwards # :: [\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ; # MINIMAL FILTER GENERATED FOR: Greek-Latin -:: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126] ; +:: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ; :: NFD (NFC) ; @@ -251,10 +251,16 @@ $smooth > ; ρ <> r ; Ρ <> R ; -# insert separator +# insert separator before things that turn into s -[Pp] { } ς > \' ; -[Pp] { } σ > \' ; +[Pp] { } [ςσΣϷϸϺϻ] > \' ; + +# special S variants + +Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L # underbar means exception @@ -270,7 +276,7 @@ $afterLetter { ς <> $afterLetter { s ; ς <> s $underbar; σ <> s ; -[Pp] { Σ <> \'S ; +# [Pp] { Σ <> \'S ; Σ <> S ; τ <> t ; @@ -322,6 +328,7 @@ $rough <> h ; ϰ > | κ ; ϱ > | ρ ; ϲ > | σ ; +Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL ϳ > j ; ϴ > | Θ ; ϵ > | ε ; diff --git a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt index c98768f33a2..26ef88dd705 100644 --- a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt +++ b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Greek_Latin_UNGEGN.txt @@ -3,15 +3,15 @@ # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- # $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Greek_Latin_UNGEGN.txt,v $ -# $Date: 2002/07/26 16:09:04 $ -# $Revision: 1.2 $ +# $Date: 2004/04/16 14:16:47 $ +# $Revision: 1.3 $ #-------------------------------------------------------------------- # For modern Greek, based on UNGEGN rules. # Rules are predicated on running NFD first, and NFC afterwards # MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN # WARNING: need to add accents to both filters ### -# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ] ; +# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ; :: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ; ::NFD (NFC) ; @@ -169,8 +169,15 @@ $fmaker { Υ <> U $under ; ρ <> r ; Ρ <> R ; -[Pp] { } ς > \' ; -[Pp] { } σ > \' ; +# insert separator before things that turn into s +[Pp] { } [ςσΣϷϸϺϻ] > \' ; + +# special S variants + +Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L # Caron means exception @@ -186,7 +193,7 @@ $afterLetter { ς <> $afterLetter { s ; ς <> s $under; σ <> s ; -[Pp] { Σ <> \'S ; +# [Pp] { Σ <> \'S ; Σ <> S ; τ <> t ; @@ -232,6 +239,7 @@ $afterLetter { ς <> $afterLetter { s ; ϰ > | κ ; ϱ > | ρ ; ϲ > | σ ; +Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL ϳ > j ; ϴ > | Θ ; ϵ > | ε ;