From 820a963b3b5bd95adcea88a9180e7338e0ab9b91 Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Fri, 30 Jun 2000 00:00:30 +0000 Subject: [PATCH] Add Hiragana-Katakana transliterator X-SVN-Rev: 1715 --- .../dev/test/translit/TransliteratorTest.java | 40 +++- .../icu/dev/tool/translit/dumpICUrules.bat | 1 + .../com/ibm/icu/impl/data/LocaleElements.java | 7 +- .../ibm/test/translit/TransliteratorTest.java | 40 +++- .../ibm/text/resources/LocaleElements.java | 7 +- ...TransliterationRule_Hiragana_Katakana.java | 221 ++++++++++++++++++ .../com/ibm/tools/translit/dumpICUrules.bat | 1 + 7 files changed, 309 insertions(+), 8 deletions(-) create mode 100755 icu4j/src/com/ibm/text/resources/TransliterationRule_Hiragana_Katakana.java diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java index 987f701356d..739d604e4f9 100755 --- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java +++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $ - * $Date: 2000/06/29 21:59:36 $ - * $Revision: 1.23 $ + * $Date: 2000/06/30 00:00:21 $ + * $Revision: 1.24 $ * ***************************************************************************************** */ @@ -604,6 +604,42 @@ public class TransliteratorTest extends TestFmwk { } } + /** + * Test the Hiragana-Katakana transliterator. + */ + public void TestHiraganaKatakana() { + Transliterator hk = Transliterator.getInstance("Hiragana-Katakana"); + Transliterator kh = Transliterator.getInstance("Katakana-Hiragana"); + + // Array of 3n items + // Each item is "hk"|"kh"|"both", , + String[] DATA = { + "both", + "\u3042\u3090\u3099\u3092\u3050", + "\u30A2\u30F8\u30F2\u30B0", + + "kh", + "\u307C\u3051\u3060\u3042\u3093\u30FC", + "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", + }; + + for (int i=0; i, + String[] DATA = { + "both", + "\u3042\u3090\u3099\u3092\u3050", + "\u30A2\u30F8\u30F2\u30B0", + + "kh", + "\u307C\u3051\u3060\u3042\u3093\u30FC", + "\u30DC\u30F6\u30C0\u30FC\u30F3\u30FC", + }; + + for (int i=0; i \u30F7;" + + "\u3090\u3099 <> \u30F8;" + + "\u3091\u3099 <> \u30F9;" + + "\u3092\u3099 <> \u30FA;" + + + // One-to-one mappings, main block + // 3041:3094 <> 30A1:30F4 + // 309D,E <> 30FD,E + "\u3041 <> \u30A1;" + + "\u3042 <> \u30A2;" + + "\u3043 <> \u30A3;" + + "\u3044 <> \u30A4;" + + "\u3045 <> \u30A5;" + + "\u3046 <> \u30A6;" + + "\u3047 <> \u30A7;" + + "\u3048 <> \u30A8;" + + "\u3049 <> \u30A9;" + + "\u304A <> \u30AA;" + + "\u304B <> \u30AB;" + + "\u304C <> \u30AC;" + + "\u304D <> \u30AD;" + + "\u304E <> \u30AE;" + + "\u304F <> \u30AF;" + + "\u3050 <> \u30B0;" + + "\u3051 <> \u30B1;" + + "\u3052 <> \u30B2;" + + "\u3053 <> \u30B3;" + + "\u3054 <> \u30B4;" + + "\u3055 <> \u30B5;" + + "\u3056 <> \u30B6;" + + "\u3057 <> \u30B7;" + + "\u3058 <> \u30B8;" + + "\u3059 <> \u30B9;" + + "\u305A <> \u30BA;" + + "\u305B <> \u30BB;" + + "\u305C <> \u30BC;" + + "\u305D <> \u30BD;" + + "\u305E <> \u30BE;" + + "\u305F <> \u30BF;" + + "\u3060 <> \u30C0;" + + "\u3061 <> \u30C1;" + + "\u3062 <> \u30C2;" + + "\u3063 <> \u30C3;" + + "\u3064 <> \u30C4;" + + "\u3065 <> \u30C5;" + + "\u3066 <> \u30C6;" + + "\u3067 <> \u30C7;" + + "\u3068 <> \u30C8;" + + "\u3069 <> \u30C9;" + + "\u306A <> \u30CA;" + + "\u306B <> \u30CB;" + + "\u306C <> \u30CC;" + + "\u306D <> \u30CD;" + + "\u306E <> \u30CE;" + + "\u306F <> \u30CF;" + + "\u3070 <> \u30D0;" + + "\u3071 <> \u30D1;" + + "\u3072 <> \u30D2;" + + "\u3073 <> \u30D3;" + + "\u3074 <> \u30D4;" + + "\u3075 <> \u30D5;" + + "\u3076 <> \u30D6;" + + "\u3077 <> \u30D7;" + + "\u3078 <> \u30D8;" + + "\u3079 <> \u30D9;" + + "\u307A <> \u30DA;" + + "\u307B <> \u30DB;" + + "\u307C <> \u30DC;" + + "\u307D <> \u30DD;" + + "\u307E <> \u30DE;" + + "\u307F <> \u30DF;" + + "\u3080 <> \u30E0;" + + "\u3081 <> \u30E1;" + + "\u3082 <> \u30E2;" + + "\u3083 <> \u30E3;" + + "\u3084 <> \u30E4;" + + "\u3085 <> \u30E5;" + + "\u3086 <> \u30E6;" + + "\u3087 <> \u30E7;" + + "\u3088 <> \u30E8;" + + "\u3089 <> \u30E9;" + + "\u308A <> \u30EA;" + + "\u308B <> \u30EB;" + + "\u308C <> \u30EC;" + + "\u308D <> \u30ED;" + + "\u308E <> \u30EE;" + + "\u308F <> \u30EF;" + + "\u3090 <> \u30F0;" + + "\u3091 <> \u30F1;" + + "\u3092 <> \u30F2;" + + "\u3093 <> \u30F3;" + + "\u3094 <> \u30F4;" + + "\u309D <> \u30FD;" + + "\u309E <> \u30FE;" + + + // Fallback; this is a one-way Katakana-Hiragana xform. + "\u304B < \u30F5;" + + "\u3051 < \u30F6;" + + + // Anything followed by a prolonged sound mark 30FC has + // its final vowel doubled. This is a Katakana-Hiragana + // one-way information-losing transformation. We + // include the small Katakana (e.g., small A 3041) and + // do not distinguish them from their large + // counterparts. It doesn't make sense to double a + // small counterpart vowel as a small Hiragana vowel, so + // we don't do so. In natural text this should never + // occur anyway. If a 30FC is seen without a preceding + // vowel sound (e.g., after n 30F3) we do not change it. + + "$long = \u30FC;" + + + // The following categories are Hiragana, not Katakana + // as might be expected, since by the time we get to the + // 30FC, the preceding character will have already been + // transformed to Hiragana. + + // {The following mechanically generated from the + // Unicode 3.0 data:} + + "$xa = [" + + "\u3041 \u3042 \u304B \u304C \u3055 \u3056" + + "\u305F \u3060 \u306A \u306F \u3070 \u3071" + + "\u307E \u3083 \u3084 \u3089 \u308E \u308F" + + "];" + + + "$xi = [" + + "\u3043 \u3044 \u304D \u304E \u3057 \u3058" + + "\u3061 \u3062 \u306B \u3072 \u3073 \u3074" + + "\u307F \u308A \u3090" + + "];" + + + "$xu = [" + + "\u3045 \u3046 \u304F \u3050 \u3059 \u305A" + + "\u3063 \u3064 \u3065 \u306C \u3075 \u3076" + + "\u3077 \u3080 \u3085 \u3086 \u308B \u3094" + + "];" + + + "$xe = [" + + "\u3047 \u3048 \u3051 \u3052 \u305B \u305C" + + "\u3066 \u3067 \u306D \u3078 \u3079 \u307A" + + "\u3081 \u308C \u3091" + + "];" + + + "$xo = [" + + "\u3049 \u304A \u3053 \u3054 \u305D \u305E" + + "\u3068 \u3069 \u306E \u307B \u307C \u307D" + + "\u3082 \u3087 \u3088 \u308D \u3092" + + "];" + + + "\u3042 < $xa {$long};" + + "\u3044 < $xi {$long};" + + "\u3046 < $xu {$long};" + + "\u3048 < $xe {$long};" + + "\u304A < $xo {$long};" + + + "" + } + }; + } +} diff --git a/icu4j/src/com/ibm/tools/translit/dumpICUrules.bat b/icu4j/src/com/ibm/tools/translit/dumpICUrules.bat index f637006b27c..a26c308c42e 100755 --- a/icu4j/src/com/ibm/tools/translit/dumpICUrules.bat +++ b/icu4j/src/com/ibm/tools/translit/dumpICUrules.bat @@ -58,6 +58,7 @@ $NAME_MAP = <<'END'; { "Latin-Hebrew", "Hebrew-Latin", "lhebrew" } { "Latin-Jamo", "Jamo-Latin", "ljamo" } { "Latin-Kana", "Kana-Latin", "lkana" } + { "Hiragana-Katakana", "Katakana-Hiragana", "kana" } // Other miscellaneous rules { "StraightQuotes-CurlyQuotes", "CurlyQuotes-StraightQuotes", "quotes" }