From 123132b8e69b81f8755eedf137af1347ea0a6e0c Mon Sep 17 00:00:00 2001 From: Ram Viswanadha Date: Mon, 2 Aug 2004 20:06:55 +0000 Subject: [PATCH] ICU-3925 separate Transliterator data to its own tree X-SVN-Rev: 16095 --- icu4c/source/data/makedata.mak | 55 +- icu4c/source/data/translit/Any_Accents.txt | 290 ++++ icu4c/source/data/translit/Any_Publishing.txt | 34 + icu4c/source/data/translit/Arabic_Latin.txt | 146 ++ .../data/translit/Bengali_InterIndic.txt | 103 ++ icu4c/source/data/translit/Cyrillic_Latin.txt | 306 ++++ .../data/translit/Devanagari_InterIndic.txt | 117 ++ .../data/translit/Fullwidth_Halfwidth.txt | 271 +++ icu4c/source/data/translit/Greek_Latin.txt | 345 ++++ .../data/translit/Greek_Latin_UNGEGN.txt | 252 +++ .../data/translit/Gujarati_InterIndic.txt | 91 ++ .../data/translit/Gurmukhi_InterIndic.txt | 95 ++ icu4c/source/data/translit/Han_Latin.txt | 1440 ++++++++++++++++ icu4c/source/data/translit/Han_Spacedhan.txt | 24 + icu4c/source/data/translit/Hebrew_Latin.txt | 109 ++ .../data/translit/Hiragana_Katakana.txt | 207 +++ icu4c/source/data/translit/Hiragana_Latin.txt | 14 + .../data/translit/InterIndic_Bengali.txt | 147 ++ .../data/translit/InterIndic_Devanagari.txt | 158 ++ .../data/translit/InterIndic_Gujarati.txt | 138 ++ .../data/translit/InterIndic_Gurmukhi.txt | 147 ++ .../data/translit/InterIndic_Kannada.txt | 141 ++ .../source/data/translit/InterIndic_Latin.txt | 529 ++++++ .../data/translit/InterIndic_Malayalam.txt | 141 ++ .../source/data/translit/InterIndic_Oriya.txt | 137 ++ .../source/data/translit/InterIndic_Tamil.txt | 151 ++ .../data/translit/InterIndic_Telugu.txt | 141 ++ .../data/translit/Kannada_InterIndic.txt | 92 ++ .../source/data/translit/Latin_InterIndic.txt | 383 +++++ icu4c/source/data/translit/Latin_Jamo.txt | 522 ++++++ icu4c/source/data/translit/Latin_Katakana.txt | 495 ++++++ .../data/translit/Latin_NumericPinyin.txt | 41 + .../data/translit/Malayalam_InterIndic.txt | 85 + .../source/data/translit/Oriya_InterIndic.txt | 95 ++ .../source/data/translit/Tamil_InterIndic.txt | 76 + .../data/translit/Telugu_InterIndic.txt | 90 + .../data/translit/ThaiLogical_Latin.txt | 187 +++ .../source/data/translit/Thai_ThaiLogical.txt | 26 + icu4c/source/data/translit/Thai_ThaiSemi.txt | 11 + icu4c/source/data/translit/Tone_Digit.txt | 11 + icu4c/source/data/translit/el.txt | 8 + icu4c/source/data/translit/en.txt | 22 + icu4c/source/data/translit/root.txt | 752 +++++++++ icu4c/source/data/translit/t_Any_Accents.txt | 306 ---- .../source/data/translit/t_Any_Publishing.txt | 50 - icu4c/source/data/translit/t_Arab_Latn.txt | 162 -- .../data/translit/t_Beng_InterIndic.txt | 119 -- icu4c/source/data/translit/t_Cyrl_Latn.txt | 322 ---- .../data/translit/t_Deva_InterIndic.txt | 133 -- .../source/data/translit/t_FWidth_HWidth.txt | 287 ---- icu4c/source/data/translit/t_Grek_Latn.txt | 361 ---- .../data/translit/t_Grek_Latn_UNGEGN.txt | 268 --- .../data/translit/t_Gujr_InterIndic.txt | 107 -- .../data/translit/t_Guru_InterIndic.txt | 111 -- icu4c/source/data/translit/t_Hani_Latn.txt | 1455 ----------------- icu4c/source/data/translit/t_Hani_SpHan.txt | 39 - icu4c/source/data/translit/t_Hebr_Latn.txt | 124 -- icu4c/source/data/translit/t_Hira_Kana.txt | 223 --- icu4c/source/data/translit/t_Hira_Latn.txt | 30 - .../data/translit/t_InterIndic_Beng.txt | 163 -- .../data/translit/t_InterIndic_Deva.txt | 174 -- .../data/translit/t_InterIndic_Gujr.txt | 154 -- .../data/translit/t_InterIndic_Guru.txt | 163 -- .../data/translit/t_InterIndic_Knda.txt | 157 -- .../data/translit/t_InterIndic_Latn.txt | 545 ------ .../data/translit/t_InterIndic_Mlym.txt | 157 -- .../data/translit/t_InterIndic_Orya.txt | 153 -- .../data/translit/t_InterIndic_Taml.txt | 167 -- .../data/translit/t_InterIndic_Telu.txt | 157 -- .../data/translit/t_Knda_InterIndic.txt | 108 -- .../data/translit/t_Latn_InterIndic.txt | 399 ----- icu4c/source/data/translit/t_Latn_Jamo.txt | 538 ------ icu4c/source/data/translit/t_Latn_Kana.txt | 511 ------ icu4c/source/data/translit/t_Latn_NPinyn.txt | 56 - .../data/translit/t_Mlym_InterIndic.txt | 101 -- .../data/translit/t_Orya_InterIndic.txt | 111 -- .../data/translit/t_Taml_InterIndic.txt | 92 -- .../data/translit/t_Telu_InterIndic.txt | 106 -- icu4c/source/data/translit/t_Tone_Digit.txt | 26 - icu4c/source/data/translit/translit_index.txt | 275 ---- icu4c/source/data/translit/trnsfiles.mk | 40 +- icu4c/source/i18n/rbt.h | 3 + icu4c/source/i18n/translit.cpp | 45 +- icu4c/source/i18n/transreg.cpp | 10 +- icu4c/source/test/intltest/intltest.cpp | 11 + icu4c/source/test/intltest/intltest.h | 6 + icu4c/source/test/intltest/intltest.vcproj | 20 +- icu4c/source/test/intltest/transapi.cpp | 6 + icu4c/source/test/intltest/transtst.cpp | 10 +- 89 files changed, 8683 insertions(+), 8503 deletions(-) create mode 100644 icu4c/source/data/translit/Any_Accents.txt create mode 100644 icu4c/source/data/translit/Any_Publishing.txt create mode 100644 icu4c/source/data/translit/Arabic_Latin.txt create mode 100644 icu4c/source/data/translit/Bengali_InterIndic.txt create mode 100644 icu4c/source/data/translit/Cyrillic_Latin.txt create mode 100644 icu4c/source/data/translit/Devanagari_InterIndic.txt create mode 100644 icu4c/source/data/translit/Fullwidth_Halfwidth.txt create mode 100644 icu4c/source/data/translit/Greek_Latin.txt create mode 100644 icu4c/source/data/translit/Greek_Latin_UNGEGN.txt create mode 100644 icu4c/source/data/translit/Gujarati_InterIndic.txt create mode 100644 icu4c/source/data/translit/Gurmukhi_InterIndic.txt create mode 100644 icu4c/source/data/translit/Han_Latin.txt create mode 100644 icu4c/source/data/translit/Han_Spacedhan.txt create mode 100644 icu4c/source/data/translit/Hebrew_Latin.txt create mode 100644 icu4c/source/data/translit/Hiragana_Katakana.txt create mode 100644 icu4c/source/data/translit/Hiragana_Latin.txt create mode 100644 icu4c/source/data/translit/InterIndic_Bengali.txt create mode 100644 icu4c/source/data/translit/InterIndic_Devanagari.txt create mode 100644 icu4c/source/data/translit/InterIndic_Gujarati.txt create mode 100644 icu4c/source/data/translit/InterIndic_Gurmukhi.txt create mode 100644 icu4c/source/data/translit/InterIndic_Kannada.txt create mode 100644 icu4c/source/data/translit/InterIndic_Latin.txt create mode 100644 icu4c/source/data/translit/InterIndic_Malayalam.txt create mode 100644 icu4c/source/data/translit/InterIndic_Oriya.txt create mode 100644 icu4c/source/data/translit/InterIndic_Tamil.txt create mode 100644 icu4c/source/data/translit/InterIndic_Telugu.txt create mode 100644 icu4c/source/data/translit/Kannada_InterIndic.txt create mode 100644 icu4c/source/data/translit/Latin_InterIndic.txt create mode 100644 icu4c/source/data/translit/Latin_Jamo.txt create mode 100644 icu4c/source/data/translit/Latin_Katakana.txt create mode 100644 icu4c/source/data/translit/Latin_NumericPinyin.txt create mode 100644 icu4c/source/data/translit/Malayalam_InterIndic.txt create mode 100644 icu4c/source/data/translit/Oriya_InterIndic.txt create mode 100644 icu4c/source/data/translit/Tamil_InterIndic.txt create mode 100644 icu4c/source/data/translit/Telugu_InterIndic.txt create mode 100644 icu4c/source/data/translit/ThaiLogical_Latin.txt create mode 100644 icu4c/source/data/translit/Thai_ThaiLogical.txt create mode 100644 icu4c/source/data/translit/Thai_ThaiSemi.txt create mode 100644 icu4c/source/data/translit/Tone_Digit.txt create mode 100644 icu4c/source/data/translit/el.txt create mode 100644 icu4c/source/data/translit/en.txt create mode 100644 icu4c/source/data/translit/root.txt delete mode 100644 icu4c/source/data/translit/t_Any_Accents.txt delete mode 100644 icu4c/source/data/translit/t_Any_Publishing.txt delete mode 100644 icu4c/source/data/translit/t_Arab_Latn.txt delete mode 100644 icu4c/source/data/translit/t_Beng_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Cyrl_Latn.txt delete mode 100644 icu4c/source/data/translit/t_Deva_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_FWidth_HWidth.txt delete mode 100644 icu4c/source/data/translit/t_Grek_Latn.txt delete mode 100644 icu4c/source/data/translit/t_Grek_Latn_UNGEGN.txt delete mode 100644 icu4c/source/data/translit/t_Gujr_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Guru_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Hani_Latn.txt delete mode 100644 icu4c/source/data/translit/t_Hani_SpHan.txt delete mode 100644 icu4c/source/data/translit/t_Hebr_Latn.txt delete mode 100644 icu4c/source/data/translit/t_Hira_Kana.txt delete mode 100644 icu4c/source/data/translit/t_Hira_Latn.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Beng.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Deva.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Gujr.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Guru.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Knda.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Latn.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Mlym.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Orya.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Taml.txt delete mode 100644 icu4c/source/data/translit/t_InterIndic_Telu.txt delete mode 100644 icu4c/source/data/translit/t_Knda_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Latn_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Latn_Jamo.txt delete mode 100644 icu4c/source/data/translit/t_Latn_Kana.txt delete mode 100644 icu4c/source/data/translit/t_Latn_NPinyn.txt delete mode 100644 icu4c/source/data/translit/t_Mlym_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Orya_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Taml_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Telu_InterIndic.txt delete mode 100644 icu4c/source/data/translit/t_Tone_Digit.txt delete mode 100644 icu4c/source/data/translit/translit_index.txt diff --git a/icu4c/source/data/makedata.mak b/icu4c/source/data/makedata.mak index 4978f9695d1..b881f0ffa57 100644 --- a/icu4c/source/data/makedata.mak +++ b/icu4c/source/data/makedata.mak @@ -73,7 +73,7 @@ ICUCOL=coll # ICURBNF=rbnf -# ICUTRANSLIT +# ICUTRNS # The directory that contains trfiles.mk files along with *.txt transliterator files # ICUTRNS=translit @@ -248,7 +248,10 @@ TRANLIT_SOURCE=$(TRANSLIT_SOURCE) $(TRANSLIT_SOURCE_LOCAL) !MESSAGE Warning: cannot find "trnsfiles.mk" !ENDIF -TRANSLIT_FILES = $(TRANSLIT_SOURCE:.txt=.res) +TRANSLIT_FILES = $(ICUTRNS)\root.txt $(TRANSLIT_ALIAS_SOURCE) $(TRANSLIT_SOURCE) +TRANSLIT_RES_FILES = $(TRANSLIT_FILES:.txt =.res translit\) +TRANSLIT_RES_FILES = $(TRANSLIT_RES_FILES:.txt=.res) +TRANSLIT_RES_FILES = $(TRANSLIT_RES_FILES:translit\ =translit\) # Read list of miscellaneous resource bundle files !IF EXISTS("$(ICUSRCDATA)\$(ICUMISC2)\miscfiles.mk") @@ -268,6 +271,7 @@ MISC_FILES = $(MISC_SOURCE:.txt=.res) INDEX_RES_FILES = res_index.res INDEX_COL_FILES = $(ICUCOL)\res_index.res INDEX_RBNF_FILES = $(ICURBNF)\res_index.res +#INDEX_TRANSLIT_FILES = $(ICUTRNS)\res_index.res # # Break iterator data files. @@ -276,7 +280,7 @@ BRK_SOURCE_FILES = sent.txt char.txt line.txt word.txt title.txt line_th.txt wor BRK_FILES=$(BRK_SOURCE_FILES:.txt=.brk) # don't include COL_FILES -ALL_RES = $(INDEX_RES_FILES) $(RB_FILES) $(TRANSLIT_FILES) $(MISC_FILES) +ALL_RES = $(INDEX_RES_FILES) $(RB_FILES) $(MISC_FILES) ############################################################################# # @@ -293,7 +297,7 @@ ALL : GODATA "$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" "$(TESTDATAOUT)\testdata.dat" # # testdata - nmake will invoke pkgdata, which will create testdata.dat # -"$(TESTDATAOUT)\testdata.dat": "$(ICUBLD)\ucadata.icu" $(TRANSLIT_FILES) $(MISC_FILES) $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe +"$(TESTDATAOUT)\testdata.dat": "$(ICUBLD)\ucadata.icu" $(TRANSLIT_RES_FILES) $(MISC_FILES) $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe @cd "$(TESTDATA)" @echo building testdata... nmake /nologo /f "$(TESTDATA)\testdata.mak" TESTDATA=. ICUTOOLS="$(ICUTOOLS)" ICUP="$(ICUP)" CFG=$(CFG) TESTDATAOUT="$(TESTDATAOUT)" ICUDATA="$(ICUDATA)" TESTDATABLD="$(TESTDATABLD)" @@ -303,7 +307,7 @@ ALL : GODATA "$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" "$(TESTDATAOUT)\testdata.dat" # move the .dll and .lib files to their final destination afterwards. # The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata. # -"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt" +"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt" @echo Building icu data cd "$(ICUBLD)" @"$(ICUP)\bin\pkgdata" -f -e $(U_ICUDATA_NAME) -v $(ICU_PACKAGE_MODE) -c -p $(ICUPKG) -T "$(ICUTMP)" -L $(U_ICUDATA_NAME) -d "$(ICUBLD)" -s . <<"$(ICUTMP)\pkgdatain.txt" @@ -325,6 +329,8 @@ $(ICUCOL)\res_index.res $(RBNF_RES_FILES:.res =.res ) $(ICURBNF)\res_index.res +$(TRANSLIT_RES_FILES:.res =.res +) $(BRK_FILES:.brk =.brk ) < ; + +# Provide keyboard equivalents for common diacritics used in transliteration + +$pre \` $post <> \u0300 ; # COMBINING GRAVE ACCENT +$pre \' $post <> \u0301 ; # COMBINING ACUTE ACCENT +$pre \^ $post <> \u0302 ; # COMBINING CIRCUMFLEX ACCENT +$pre \~ $post <> \u0303 ; # COMBINING TILDE +$pre \- $post <> \u0304 ; # COMBINING MACRON +$pre \" $post <> \u0308 ; # COMBINING DIAERESIS +$pre \* $post <> \u030A ; # COMBINING RING ABOVE +$pre \, $post <> \u0327 ; # COMBINING CEDILLA +$pre '/' $post <> \u0338 ; # COMBINING LONG SOLIDUS OVERLAY +$pre \. $post <> \u0323 ; # COMBINING DOT BELOW + +# Combine common characters + +$pre AE $post <> \u00C6 ; # LATIN CAPITAL LETTER AE +$pre ae $post <> \u00E6 ; # LATIN SMALL LETTER AE +$pre D $post <> \u00D0 ; # LATIN CAPITAL LETTER ETH +$pre d $post <> \u00F0 ; # LATIN SMALL LETTER ETH +$pre O'/' $post <> \u00D8 ; # LATIN CAPITAL LETTER O WITH STROKE +$pre o'/' $post <> \u00F8 ; # LATIN SMALL LETTER O WITH STROKE +$pre TH $post <> \u00DE ; # LATIN CAPITAL LETTER THORN +$pre th $post <> \u00FE ; # LATIN SMALL LETTER THORN +$pre OE $post <> \u0152 ; # LATIN CAPITAL LIGATURE OE +$pre oe $post <> \u0153 ; # LATIN SMALL LIGATURE OE + +$pre ss $post <> \u00DF ; # LATIN SMALL LETTER SHARP S + +$pre NG $post <> \u014A ; # LATIN CAPITAL LETTER ENG +$pre ng $post <> \u014B ; # LATIN SMALL LETTER ENG + +$pre T $post <> \u0398 ; # THETA +$pre t $post <> \u03B8 ; # THETA +$pre SH $post <> \u01A9 ; # LATIN CAPITAL LETTER ESH +$pre sh $post <> \u0283 ; # LATIN SMALL LETTER ESH +$pre ZH $post <> \u01B7 ; # LATIN CAPITAL LETTER EZH +$pre zh $post <> \u0292 ; # LATIN SMALL LETTER EZH + +$pre U $post <> \u01B1 ; # LATIN CAPITAL LETTER UPSILON +$pre u $post <> \u028A ; # LATIN SMALL LETTER UPSILON +$pre A $post <> \u018F ; # LATIN CAPITAL LETTER SCHWA +$pre a $post <> \u0259 ; # LATIN SMALL LETTER SCHWA +$pre O $post <> \u0186 ; # LATIN CAPITAL LETTER OPEN O +$pre o $post <> \u0254 ; # LATIN SMALL LETTER OPEN O +$pre E $post <> \u0190 ; # LATIN CAPITAL LETTER OPEN E +$pre e $post <> \u025B ; # LATIN SMALL LETTER OPEN E + +# three that don't have uppercases + +$pre '?' $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP +$pre i $post <> \u026A ; # LATIN LETTER SMALL CAPITAL I +$pre v $post <> \u028C ; # LATIN SMALL LETTER TURNED V + +# Additional Characters that may be added in the future + +# $pre XXX $post <> \u0306 ; # COMBINING BREVE +# $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE +# $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE +# $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT +# $pre XXX $post <> \u030C ; # COMBINING CARON +# $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT +# $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE +# $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE +# $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE +# $pre XXX $post <> \u031B ; # COMBINING HORN +# $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW +# $pre XXX $post <> \u0325 ; # COMBINING RING BELOW +# $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW +# $pre XXX $post <> \u0328 ; # COMBINING OGONEK +# $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW +# $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW +# $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW +# $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW + +# $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR +# $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR +# $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE +# $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE +# $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE +# $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE +# $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I +# $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA +# $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +# $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT +# $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE +# $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE +# $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +# $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE +# $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE +# $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S +# $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE +# $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK +# $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR +# $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR +# $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX +# $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX +# $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK +# $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK +# $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D +# $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK +# $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR +# $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR +# $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA +# $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E +# $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK +# $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK +# $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK +# $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA +# $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV +# $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA +# $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE +# $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK +# $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK +# $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR +# $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE +# $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M +# $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK +# $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG +# $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +# $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI +# $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI +# $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK +# $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK +# $pre YYY $post <> \u01A6 ; # LATIN LETTER YR +# $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO +# $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO +# $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP +# $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK +# $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK +# $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK +# $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +# $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK +# $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK +# $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK +# $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE +# $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE +# $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED +# $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED +# $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL +# $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE +# $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE +# $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE +# $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE +# $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN +# $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK +# $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK +# $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK +# $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK +# $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON +# $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +# $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON +# $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ +# $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +# $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ +# $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ +# $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +# $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ +# $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E +# $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE +# $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE +# $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ +# $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +# $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ +# $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR +# $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN +# $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH +# $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH +# $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU +# $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU +# $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK +# $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK +# $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A +# $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA +# $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA +# $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK +# $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL +# $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL +# $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK +# $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E +# $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK +# $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E +# $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK +# $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E +# $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE +# $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK +# $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G +# $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G +# $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA +# $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN +# $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H +# $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK +# $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK +# $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE +# $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA +# $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE +# $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT +# $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK +# $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH +# $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M +# $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG +# $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK +# $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK +# $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK +# $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N +# $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O +# $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE +# $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA +# $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI +# $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R +# $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG +# $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK +# $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG +# $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL +# $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK +# $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK +# $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R +# $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R +# $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK +# $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK +# $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH +# $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL +# $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T +# $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK +# $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR +# $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK +# $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W +# $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y +# $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y +# $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK +# $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL +# $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL +# $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP +# $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE +# $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP +# $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C +# $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK +# $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B +# $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E +# $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK +# $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H +# $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL +# $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K +# $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L +# $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK +# $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE +# $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE +# $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH +# $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH +# $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL +# $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH +# $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH +# $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL +# $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH +# $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH +# $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH +# $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE +# $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE +# $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H +# $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK +# $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J +# $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R +# $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R +# $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK +# $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R +# $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W +# $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y +# $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA +# $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L +# $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S +# $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X +# $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +# $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING +# $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N + +:: NFC (NFD) ; \ No newline at end of file diff --git a/icu4c/source/data/translit/Any_Publishing.txt b/icu4c/source/data/translit/Any_Publishing.txt new file mode 100644 index 00000000000..5e3c36b2144 --- /dev/null +++ b/icu4c/source/data/translit/Any_Publishing.txt @@ -0,0 +1,34 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Test case +# "The" "(quick)" ('brown') `fox' ` jumped -- "over?" + +# Variables + +$single = \' ; +$space = ' ' ; +$double = \" ; +$back = \` ; +$tab = '\u0008' ; +$makeRight = [[:Z:][:Ps:][:Pi:]$] ; + +# fix UNIX quotes + +$back $back > “ ; +$back > ‘ ; + +# fix typewriter quotes, by context + +$makeRight {$double} <> “ ; +$double <> ” ; + +$makeRight {$single} <> ‘ ; +$single <> ’; + +# fix multiple spaces and hyphens + +$space {$space} > ; +'--' <> — ; diff --git a/icu4c/source/data/translit/Arabic_Latin.txt b/icu4c/source/data/translit/Arabic_Latin.txt new file mode 100644 index 00000000000..3b8cdcf0cd4 --- /dev/null +++ b/icu4c/source/data/translit/Arabic_Latin.txt @@ -0,0 +1,146 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Generally follows UNGEGN +# Occasionally deviates in the direction of ISO 233 +# a) where required for disambiguation. +# b) with underdot instead of cedilla for letter like SAD, since +# those are explicitly in Unicode for transliteration. +# c) with extra non-Arabic-language letters, like PEH + +# Does *not* do assimilation of "al", nor hyphenation. +# While it could be done, we need to determine whether a prefix "al" could +# occur other than as the definite article (since no space is used). + +:: [[:Arabic:] [‎ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ; +:: NFKD (NFC); +$disambig = ̱ ; +$disambig2 = ̰ ; +$under = ̣ ; + +$notAbove = [[:^ccc=0:]&[:^ccc=230:]]; + +# non-letters + + ٫ <> '.' $disambig ; # ARABIC DECIMAL SEPARATOR + ٬ <> ',' $disambig ; # ARABIC THOUSANDS SEPARATOR +# ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate + +، <> ',' ; # ARABIC COMMA + ؛ <> ';' ; # ARABIC SEMICOLON + ؟ <> '?' ; # ARABIC QUESTION MARK + ٪ <> '%' ; # ARABIC PERCENT SIGN + + ۰ <> 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO + ۱ <> 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE + ۲ <> 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO + ۳ <> 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE + ۴ <> 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR + ۵ <> 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE + ۶ <> 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX + ۷ <> 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN + ۸ <> 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT + ۹ <> 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE + + ٠ <> 0 ; # ARABIC-INDIC DIGIT ZERO + ١ <> 1 ; # ARABIC-INDIC DIGIT ONE + ٢ <> 2 ; # ARABIC-INDIC DIGIT TWO + ٣ <> 3 ; # ARABIC-INDIC DIGIT THREE + ٤ <> 4 ; # ARABIC-INDIC DIGIT FOUR + ٥ <> 5 ; # ARABIC-INDIC DIGIT FIVE + ٦ <> 6 ; # ARABIC-INDIC DIGIT SIX + ٧ <> 7 ; # ARABIC-INDIC DIGIT SEVEN + ٨ <> 8 ; # ARABIC-INDIC DIGIT EIGHT + ٩ <> 9 ; # ARABIC-INDIC DIGIT NINE + +# letters + +# long vowels + َا<> ā ; # ARABIC FATHA, ARABIC LETTER ALEF + ُو <> ū ; # ARABIC DAMMA, ARABIC LETTER WAW + ِي <> ī ; # ARABIC KASRA, ARABIC LETTER YEH + +# longer items moved here to prevent masking + ث <> t h $disambig ; # ARABIC LETTER THEH + ذ <> d h $disambig ; # ARABIC LETTER THAL + ش <> s h $disambig ; # ARABIC LETTER SHEEN + ص <> s $under ; # ARABIC LETTER SAD + ض <> d $under ; # ARABIC LETTER DAD + ط <> t $under ; # ARABIC LETTER TAH + ظ <> z $under ; # ARABIC LETTER ZAH + غ <> g h $disambig ; # ARABIC LETTER GHAIN + +# WARNING: special case +# will be canonically ordered as +# so on the return, we have to skip over (but preserve) the half-ring below (or others like it) +# ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS + + ة <> t \u0308 ; # ARABIC LETTER TEH MARBUTA + ة | $1 < t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA + +# non-Arabic language + ژ <> z h $disambig ; # ARABIC LETTER JEH + ڭ <> n $disambig g ; # ARABIC LETTER NG + ۋ <> v $disambig ; # ARABIC LETTER VE + ی <> y $disambig2 ; # ARABIC LETTER FARSI YEH + +# Arabic language + + ء <> ʾ ; # ARABIC LETTER HAMZA + ا <> a $under; # ARABIC LETTER ALEF + ب <> b ; # ARABIC LETTER BEH + ت <> t ; # ARABIC LETTER TEH + ج <> j ; # ARABIC LETTER JEEM + ح <> h $under ; # ARABIC LETTER HAH + خ <> k h $disambig ; # ARABIC LETTER KHAH + د <> d ; # ARABIC LETTER DAL + ر <> r ; # ARABIC LETTER REH + ز <> z ; # ARABIC LETTER ZAIN + س <> s ; # ARABIC LETTER SEEN + ع <> ʿ ; # ARABIC LETTER AIN + ـ > ; # ARABIC TATWEEL + ف <> f ; # ARABIC LETTER FEH + ق <> q ; # ARABIC LETTER QAF + ك <> k ; # ARABIC LETTER KAF + ل <> l ; # ARABIC LETTER LAM + م <> m ; # ARABIC LETTER MEEM + ن <> n ; # ARABIC LETTER NOON + ه <> h ; # ARABIC LETTER HEH + و <> w ; # ARABIC LETTER WAW + ى <> y $disambig ; # ARABIC LETTER ALEF MAKSURA + ي <> y ; # ARABIC LETTER YEH + ً <> aⁿ ; # ARABIC FATHATAN + ٌ <> uⁿ ; # ARABIC DAMMATAN + ٍ <> iⁿ ; # ARABIC KASRATAN + َ <> a ; # ARABIC FATHA + ُ <> u ; # ARABIC DAMMA + ِ <> i ; # ARABIC KASRA + ّ <> ̃ ; # ARABIC SHADDA + ْ <> ̊ ; # ARABIC SUKUN + +# special combining marks + ٓ <> ̂ ; # ARABIC MADDAH ABOVE + ٔ <> ̉ ; # ARABIC HAMZA ABOVE + ٕ <> ̹ ; # ARABIC HAMZA BELOW + +# Some non-Arabic language (not in UNGEGN) + پ <> p ; # ARABIC LETTER PEH + چ <> c h $disambig ; # ARABIC LETTER TCHEH + ڤ <> v ; # ARABIC LETTER VEH +# ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW +# ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW + گ <> g ; # ARABIC LETTER GAF + +# fallbacks +| s < c } [eiy]; +| k < c ; +| i < e ; +| u < o ; +| ks < x ; +| n < ‎ⁿ; + +:: (lower) ; +::NFC (NFD); +:: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] ); \ No newline at end of file diff --git a/icu4c/source/data/translit/Bengali_InterIndic.txt b/icu4c/source/data/translit/Bengali_InterIndic.txt new file mode 100644 index 00000000000..fed1236a10b --- /dev/null +++ b/icu4c/source/data/translit/Bengali_InterIndic.txt @@ -0,0 +1,103 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Bengali-InterIndic + +\u09C7\u09BE>\uE04B; # VOWEL SIGN O +\u09C7\u09D7>\uE04C; # VOWEL SIGN AU +\u0981>\uE001; # SIGN CANDRABINDU +\u0982>\uE002; # SIGN ANUSVARA +\u0983>\uE003; # SIGN VISARGA +\u0985>\uE005; # LETTER A +\u0986>\uE006; # LETTER AA +\u0987>\uE007; # LETTER I +\u0988>\uE008; # LETTER II +\u0989>\uE009; # LETTER U +\u098A>\uE00A; # LETTER UU +\u098B>\uE00B; # LETTER VOCALIC R +\u098C>\uE00C; # LETTER VOCALIC L +\u098F>\uE00F; # LETTER E +\u0990>\uE010; # LETTER AI +\u0993>\uE013; # LETTER O +\u0994>\uE014; # LETTER AU +\u0995>\uE015; # LETTER KA +\u0996>\uE016; # LETTER KHA +\u0997>\uE017; # LETTER GA +\u0998>\uE018; # LETTER GHA +\u0999>\uE019; # LETTER NGA +\u099A>\uE01A; # LETTER CA +\u099B>\uE01B; # LETTER CHA +\u099C>\uE01C; # LETTER JA +\u099D>\uE01D; # LETTER JHA +\u099E>\uE01E; # LETTER NYA +\u099F>\uE01F; # LETTER TTA +\u09A0>\uE020; # LETTER TTHA +\u09A1>\uE021; # LETTER DDA +\u09A2>\uE022; # LETTER DDHA +\u09A3>\uE023; # LETTER NNA +\u09A4>\uE024; # LETTER TA +\u09A5>\uE025; # LETTER THA +\u09A6>\uE026; # LETTER DA +\u09A7>\uE027; # LETTER DHA +\u09A8>\uE028; # LETTER NA +\u09AA>\uE02A; # LETTER PA +\u09AB>\uE02B; # LETTER PHA +\u09AC>\uE02C; # LETTER BA +\u09AD>\uE02D; # LETTER BHA +\u09AE>\uE02E; # LETTER MA +\u09AF>\uE02F; # LETTER YA +\u09B0>\uE030; # LETTER RA +\u09B2>\uE032; # LETTER LA +\u09B6>\uE036; # LETTER SHA +\u09B7>\uE037; # LETTER SSA +\u09B8>\uE038; # LETTER SA +\u09B9>\uE039; # LETTER HA +\u09BC>\uE03C; # SIGN NUKTA +\u09BD>\uE03D; # SIGN AVAGRAHA +\u09BE>\uE03E; # VOWEL SIGN AA +\u09BF>\uE03F; # VOWEL SIGN I +\u09C0>\uE040; # VOWEL SIGN II +\u09C1>\uE041; # VOWEL SIGN U +\u09C2>\uE042; # VOWEL SIGN UU +\u09C3>\uE043; # VOWEL SIGN VOCALIC R +\u09C4>\uE044; # VOWEL SIGN VOCALIC RR +\u09C7>\uE047; # VOWEL SIGN E +\u09C8>\uE048; # VOWEL SIGN AI +\u09CB>\uE04B; +\u09CC>\uE04C; +# +\u09CD>\uE04D; # SIGN VIRAMA +\u09D7>\uE057; # AU LENGTH MARK +# +\u09E0>\uE060; # LETTER VOCALIC RR +\u09E1>\uE061; # LETTER VOCALIC LL +\u09E2>\uE062; # VOWEL SIGN VOCALIC L +\u09E3>\uE063; # VOWEL SIGN VOCALIC LL +\u09E6>\uE066; # DIGIT ZERO +\u09E7>\uE067; # DIGIT ONE +\u09E8>\uE068; # DIGIT TWO +\u09E9>\uE069; # DIGIT THREE +\u09EA>\uE06A; # DIGIT FOUR +\u09EB>\uE06B; # DIGIT FIVE +\u09EC>\uE06C; # DIGIT SIX +\u09ED>\uE06D; # DIGIT SEVEN +\u09EE>\uE06E; # DIGIT EIGHT +\u09EF>\uE06F; # DIGIT NINE +\u09F0>\ue071; # Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL +\u09F1>\ue072; # Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL +\u09F2>\ue073; # Bengali-InterIndic: RUPEE MARK +\u09F3>\ue074; # Bengali-InterIndic: RUPEE SIGN +\u09F4>\ue075; # Bengali-InterIndic: CURRENCY NUMERATOR ONE +\u09F5>\ue076; # Bengali-InterIndic: CURRENCY NUMERATOR TWO +\u09F6>\ue077; # Bengali-InterIndic: CURRENCY NUMERATOR THREE +\u09F7>\ue078; # Bengali-InterIndic: CURRENCY NUMERATOR FOUR +\u09F8>\ue079; # Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\u09F9>\ue07A; # Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN +\u09FA>\ue07B; # ISSHAR + +\u0964>\ue064; # DANDA +\u0965>\ue065; # DOUBLE DANDA +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/Cyrillic_Latin.txt b/icu4c/source/data/translit/Cyrillic_Latin.txt new file mode 100644 index 00000000000..c50e9d6849e --- /dev/null +++ b/icu4c/source/data/translit/Cyrillic_Latin.txt @@ -0,0 +1,306 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +# TODO: add remaining characters +# Should add variants for Russian-English, Russian-German +# Those can use this as a base, and then remap cases +# like a $hat to ya or ja. + +# :: [\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ; +### WARNING, \u0308 must be added to the generated filters, in both directions ### +# MINIMAL FILTER +:: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ; +:: NFD (NFC) ; + +$modprime = \u02B9; +$modprime2 = \u02BA; + +$grave = \u0300; +$acute = \u0301; +$hat = \u0302; +$breve = \u0306 ; +$dot = \u0307 ; +$caron = \u030C ; +$comma = \u0326 ; +$under = \u0331 ; + +# move up so not masked + +я <> a $hat ; # CYRILLIC SMALL LETTER YA +Я <> A $hat ; # CYRILLIC CAPITAL LETTER YA + +ч <> c $caron ; # CYRILLIC SMALL LETTER CHE +Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE +# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER +# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE +# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + +э <> e $acute; # CYRILLIC SMALL LETTER E +Э <> E $acute; # CYRILLIC CAPITAL LETTER E +є <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE +Є <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE + +ш <> s $caron ; # CYRILLIC SMALL LETTER SHA +Ш <> S $caron ; # CYRILLIC CAPITAL LETTER SHA +щ <> s $hat ; # CYRILLIC SMALL LETTER SHCHA +Щ <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA + +ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE +Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE +# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE +# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE + +ю <> u $hat ; # CYRILLIC SMALL LETTER YU +Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU + +і <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +І <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +ј <> j $caron; # CYRILLIC SMALL LETTER JE +Ј <> J $caron; # CYRILLIC CAPITAL LETTER JE + +љ <> l $hat ; # CYRILLIC SMALL LETTER LJE +Љ <> L $hat ; # CYRILLIC CAPITAL LETTER LJE +њ <> n $hat ; # CYRILLIC SMALL LETTER NJE +Њ <> N $hat ; # CYRILLIC CAPITAL LETTER NJE + +ћ <> c $acute ; # CYRILLIC SMALL LETTER TSHE +Ћ <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE + +џ <> d $hat ; # CYRILLIC SMALL LETTER DZHE +Џ <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE + +# Normal order + +а <> a ; # CYRILLIC SMALL LETTER A +А <> A ; # CYRILLIC CAPITAL LETTER A +ә <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA +Ә <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA +ӕ <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE +Ӕ <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE +б <> b ; # CYRILLIC SMALL LETTER BE +Б <> B ; # CYRILLIC CAPITAL LETTER BE +в <> v ; # CYRILLIC SMALL LETTER VE +В <> V ; # CYRILLIC CAPITAL LETTER VE + +ґ <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN +Ґ <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +ғ <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE +Ғ <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +ҕ <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +Ҕ <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +г <> g ; # CYRILLIC SMALL LETTER GHE +Г <> G ; # CYRILLIC CAPITAL LETTER GHE + +д <> d; # CYRILLIC SMALL LETTER DE +Д <> D; # CYRILLIC CAPITAL LETTER DE +ђ <> đ ; # CYRILLIC SMALL LETTER DJE +Ђ <> Đ ; # CYRILLIC CAPITAL LETTER DJE +ҙ <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER +Ҙ <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +е <> e ; # CYRILLIC SMALL LETTER IE +Е <> E; # CYRILLIC CAPITAL LETTER IE + +ж <> z $caron; # CYRILLIC SMALL LETTER ZHE +Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE + +# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER +# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + +з <> z ; # CYRILLIC SMALL LETTER ZE +З <> Z; # CYRILLIC CAPITAL LETTER ZE + +й <> j ; # CYRILLIC SMALL LETTER I +Й <> J ; # CYRILLIC CAPITAL LETTER I +и <> i ; # CYRILLIC SMALL LETTER I +И <> I ; # CYRILLIC CAPITAL LETTER I + +к <> k ; # CYRILLIC SMALL LETTER KA +К <> K; # CYRILLIC CAPITAL LETTER KA + +# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER +# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK +# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK +# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA +# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA +# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE +# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE +# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +л <> l ; # CYRILLIC SMALL LETTER EL +Л <> L; # CYRILLIC CAPITAL LETTER EL + +м <> m ; # CYRILLIC SMALL LETTER EM +М <> M ; # CYRILLIC CAPITAL LETTER EM +н <> n ; # CYRILLIC SMALL LETTER EN +Н <> N; # CYRILLIC CAPITAL LETTER EN +# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER +# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK +# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK +# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE +# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE + +о <> o ; # CYRILLIC SMALL LETTER O +О <> O ; # CYRILLIC CAPITAL LETTER O +# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O +# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O +п <> p ; # CYRILLIC SMALL LETTER PE +П <> P ; # CYRILLIC CAPITAL LETTER PE +# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA +# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA +р <> r ; # CYRILLIC SMALL LETTER ER +Р <> R ; # CYRILLIC CAPITAL LETTER ER +# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK +# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK +с <> s ; # CYRILLIC SMALL LETTER ES +С <> S ; # CYRILLIC CAPITAL LETTER ES +# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER +# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +т <> t ; # CYRILLIC SMALL LETTER TE +Т <> T ; # CYRILLIC CAPITAL LETTER TE +# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER +# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER + +у <> u ; # CYRILLIC SMALL LETTER U +У <> U ; # CYRILLIC CAPITAL LETTER U +# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U +# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U +# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK +# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK +ф <> f ; # CYRILLIC SMALL LETTER EF +Ф <> F ; # CYRILLIC CAPITAL LETTER EF +х <> h ; # CYRILLIC SMALL LETTER HA +Х <> H; # CYRILLIC CAPITAL LETTER HA +# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER +# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA +# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA +# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA +# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA +# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT +# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT +# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO +# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA +# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA +ц <> c ; # CYRILLIC SMALL LETTER TSE +Ц <> C; # CYRILLIC CAPITAL LETTER TSE +# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE +# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE + +# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE +# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +# ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +# Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER + + +Ъ <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN +ъ <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN +Ь <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN +ь <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN + +ы <> y ; # CYRILLIC SMALL LETTER YERU +Ы <> Y ; # CYRILLIC CAPITAL LETTER YERU + +# ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN +# Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT +# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT + +# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E +# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E +# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS +# Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS +# ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS +# Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS +# ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS +# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI +# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI +# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI +# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI +# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA +# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA +# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA +# Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA +# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA +# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA +### ӑ <> XXX ; # CYRILLIC SMALL LETTER A +### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A +### ӓ <> XXX ; # CYRILLIC SMALL LETTER A +### Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A +### ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA +### Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA +### ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE +### Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE +### ѐ <> XXX ; # CYRILLIC SMALL LETTER IE +### Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE +### ё <> XXX ; # CYRILLIC SMALL LETTER IE +### Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE +### ӗ <> XXX ; # CYRILLIC SMALL LETTER IE +### Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE +### ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE +### Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE +### ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE +### Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE +### ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE +### Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE +### ѝ <> XXX ; # CYRILLIC SMALL LETTER I +### Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I +### ӣ <> XXX ; # CYRILLIC SMALL LETTER I +### Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I +### ӥ <> XXX ; # CYRILLIC SMALL LETTER I +### Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I +### ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +### Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +### ӧ <> XXX ; # CYRILLIC SMALL LETTER O +### Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O +### ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O +### Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O +### ќ <> XXX ; # CYRILLIC SMALL LETTER KA +### Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA +### ӯ <> XXX ; # CYRILLIC SMALL LETTER U +### Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U +### ў <> XXX ; # CYRILLIC SMALL LETTER U +### Ў <> XXX ; # CYRILLIC CAPITAL LETTER U +### ӱ <> XXX ; # CYRILLIC SMALL LETTER U +### Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U +### ӳ <> XXX ; # CYRILLIC SMALL LETTER U +### Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U +### ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE +### Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE +### ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU +### Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU +### ӭ <> XXX ; # CYRILLIC SMALL LETTER E +### Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E +### ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA +### Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA + +# Completeness +$ignore = [[:Mark:]''] * ; +| k < q ; +| K < Q ; +| u < w ; +| U < W ; +| KS < X } $ignore [:UppercaseLetter:] ; +| KS < [:UppercaseLetter:] $ignore { X ; +| Ks < X ; +| ks < x ; + +:: NFC (NFD) ; +# note: a global filter is more efficient, but MUST include all source chars!! +# :: ([\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]); +# MINIMAL FILTER: Latin-Cyrillic +:: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ; diff --git a/icu4c/source/data/translit/Devanagari_InterIndic.txt b/icu4c/source/data/translit/Devanagari_InterIndic.txt new file mode 100644 index 00000000000..ebbe468e6a2 --- /dev/null +++ b/icu4c/source/data/translit/Devanagari_InterIndic.txt @@ -0,0 +1,117 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Devanagari-InterIndic +# :: NFD; +#Rules for Decomposed characters + + + \u0901>\uE001; # SIGN CANDRABINDU + \u0902>\uE002; # SIGN ANUSVARA + \u0903>\uE003; # SIGN VISARGA + \u0904>\uE004; # SIGN SHORT A + \u0905>\uE005; # LETTER A + \u0906>\uE006; # LETTER AA + \u0907>\uE007; # LETTER I + \u0908>\uE008; # LETTER II + \u0909>\uE009; # LETTER U + \u090A>\uE00A; # LETTER UU + \u090B>\uE00B; # LETTER VOCALIC R + \u090C>\uE00C; # LETTER VOCALIC L + \u090D>\uE00D; # LETTER CANDRA E (For representing English sounds) + \u090E>\uE00E; # UNMAPPED LETTER SHORT E(For Southern Scripts) + \u090F>\uE00F; # LETTER E + \u0910>\uE010; # LETTER AI + \u0911>\uE011; # LETTER CANDRA O (For representing English sounds) + \u0912>\uE012; # UNMAPPED LETTER SHORT O (For Southern Scripts) + \u0913>\uE013; # LETTER O + \u0914>\uE014; # LETTER AU + \u0915>\uE015; # LETTER KA + \u0916>\uE016; # LETTER KHA + \u0917>\uE017; # LETTER GA + \u0918>\uE018; # LETTER GHA + \u0919>\uE019; # LETTER NGA + \u091A>\uE01A; # LETTER CA + \u091B>\uE01B; # LETTER CHA + \u091C>\uE01C; # LETTER JA + \u091D>\uE01D; # LETTER JHA + \u091E>\uE01E; # LETTER NYA + \u091F>\uE01F; # LETTER TTA + \u0920>\uE020; # LETTER TTHA + \u0921>\uE021; # LETTER DDA + \u0922>\uE022; # LETTER DDHA + \u0923>\uE023; # LETTER NNA + \u0924>\uE024; # LETTER TA + \u0925>\uE025; # LETTER THA + \u0926>\uE026; # LETTER DA + \u0927>\uE027; # LETTER DHA + \u0928>\uE028; # LETTER NA + \u0929>\uE029; + \u092A>\uE02A; # LETTER PA + \u092B>\uE02B; # LETTER PHA + \u092C>\uE02C; # LETTER BA + \u092D>\uE02D; # LETTER BHA + \u092E>\uE02E; # LETTER MA + \u092F>\uE02F; # LETTER YA + \u0930>\uE030; # LETTER RA + \u0931>\uE031; + \u0932>\uE032; # LETTER LA + \u0933>\uE033; # LETTER LLA + \u0934>\uE034; + + \u0935>\uE035; # LETTER VA + \u0936>\uE036; # LETTER SHA + \u0937>\uE037; # LETTER SSA + \u0938>\uE038; # LETTER SA + \u0939>\uE039; # LETTER HA + \u093C>\uE03C; # SIGN NUKTA + \u093D>\uE03D; # SIGN AVAGRAHA + \u093E>\uE03E; # VOWEL SIGN AA + \u093F>\uE03F; # VOWEL SIGN I + \u0940>\uE040; # VOWEL SIGN II + \u0941>\uE041; # VOWEL SIGN U + \u0942>\uE042; # VOWEL SIGN UU + \u0943>\uE043; # VOWEL SIGN VOCALIC R + \u0944>\uE044; # VOWEL SIGN VOCALIC RR + \u0945>\uE045; # VOWEL SIGN CANDRA E + \u0946>\uE046; # UNMAPPED VOWEL SIGN SHORT E + \u0947>\uE047; # VOWEL SIGN E + \u0948>\uE048; # VOWEL SIGN AI + \u0949>\uE049; # VOWEL SIGN CANDRA O + \u094A>\uE04A; # UNMAPPED VOWEL SIGN SHORT O + \u094B>\uE04B; # VOWEL SIGN O + \u094C>\uE04C; # VOWEL SIGN AU + \u094D>\uE04D; # SIGN VIRAMA + \u0950>\uE050; # OM + \u0951>\uE051; # UNMAPPED STRESS SIGN UDATTA + \u0952>\uE052; # UNMAPPED STRESS SIGN ANUDATTA + \u0953>\uE053; # UNMAPPED GRAVE ACCENT + \u0954>\uE054; # UNMAPPED ACUTE ACCENT + \u0958>\uE058; + \u0959>\uE059; + \u095A>\uE05a; + \u095B>\uE05b; + \u095C>\uE05c; + \u095D>\uE05d; + \u095E>\uE05e; + \u095F>\uE05f; + \u0960>\uE060; # LETTER VOCALIC RR + \u0961>\uE061; # LETTER VOCALIC LL + \u0962>\uE062; # VOWEL SIGN VOCALIC L + \u0963>\uE063; # VOWEL SIGN VOCALIC LL + \u0964>\ue064; # DANDA + \u0965>\ue065; # DOUBLE DANDA + \u0966>\uE066; # DIGIT ZERO + \u0967>\uE067; # DIGIT ONE + \u0968>\uE068; # DIGIT TWO + \u0969>\uE069; # DIGIT THREE + \u096A>\uE06A; # DIGIT FOUR + \u096B>\uE06B; # DIGIT FIVE + \u096C>\uE06C; # DIGIT SIX + \u096D>\uE06D; # DIGIT SEVEN + \u096E>\uE06E; # DIGIT EIGHT + \u096F>\uE06F; # DIGIT NINE + \u0970>\uE070; # Devanagari-InterIndic: ABBREVIATION SIGN +# :: NFC (NFD) ; diff --git a/icu4c/source/data/translit/Fullwidth_Halfwidth.txt b/icu4c/source/data/translit/Fullwidth_Halfwidth.txt new file mode 100644 index 00000000000..e09a13e1fd9 --- /dev/null +++ b/icu4c/source/data/translit/Fullwidth_Halfwidth.txt @@ -0,0 +1,271 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Fullwidth-Halfwidth + +# Mechanically generated from Unicode Character Database +# IDEOGRAPHIC SPACE then added, and +# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON + +# multicharacter + +ガ<>ガ; # to KATAKANA LETTER GA +ギ<>ギ; # to KATAKANA LETTER GI +グ<>グ; # to KATAKANA LETTER GU +ゲ<>ゲ; # to KATAKANA LETTER GE +ゴ<>ゴ; # to KATAKANA LETTER GO +ザ<>ザ; # to KATAKANA LETTER ZA +ジ<>ジ; # to KATAKANA LETTER ZI +ズ<>ズ; # to KATAKANA LETTER ZU +ゼ<>ゼ; # to KATAKANA LETTER ZE +ゾ<>ゾ; # to KATAKANA LETTER ZO +ダ<>ダ; # to KATAKANA LETTER DA +ヂ<>ヂ; # to KATAKANA LETTER DI +ヅ<>ヅ; # to KATAKANA LETTER DU +デ<>デ; # to KATAKANA LETTER DE +ド<>ド; # to KATAKANA LETTER DO +バ<>バ; # to KATAKANA LETTER BA +パ<>パ; # to KATAKANA LETTER PA +ビ<>ビ; # to KATAKANA LETTER BI +ピ<>ピ; # to KATAKANA LETTER PI +ブ<>ブ; # to KATAKANA LETTER BU +プ<>プ; # to KATAKANA LETTER PU +ベ<>ベ; # to KATAKANA LETTER BE +ペ<>ペ; # to KATAKANA LETTER PE +ボ<>ボ; # to KATAKANA LETTER BO +ポ<>ポ; # to KATAKANA LETTER PO +ヴ<>ヴ; # to KATAKANA LETTER VU +ヷ<>ヷ; # to KATAKANA LETTER VA +ヺ<>ヺ; # to KATAKANA LETTER VO + +# single character + +!<>'!'; # from FULLWIDTH EXCLAMATION MARK +"<>'\"'; # from FULLWIDTH QUOTATION MARK +#<>'#'; # from FULLWIDTH NUMBER SIGN +$<>'$'; # from FULLWIDTH DOLLAR SIGN +%<>'%'; # from FULLWIDTH PERCENT SIGN +&<>'&'; # from FULLWIDTH AMPERSAND +'<>''; # from FULLWIDTH APOSTROPHE +(<>'('; # from FULLWIDTH LEFT PARENTHESIS +)<>')'; # from FULLWIDTH RIGHT PARENTHESIS +*<>'*'; # from FULLWIDTH ASTERISK ++<>'+'; # from FULLWIDTH PLUS SIGN +,<>','; # from FULLWIDTH COMMA +-<>'-'; # from FULLWIDTH HYPHEN-MINUS +.<>'.'; # from FULLWIDTH FULL STOP +/<>'/'; # from FULLWIDTH SOLIDUS +0<>'0'; # from FULLWIDTH DIGIT ZERO +1<>'1'; # from FULLWIDTH DIGIT ONE +2<>'2'; # from FULLWIDTH DIGIT TWO +3<>'3'; # from FULLWIDTH DIGIT THREE +4<>'4'; # from FULLWIDTH DIGIT FOUR +5<>'5'; # from FULLWIDTH DIGIT FIVE +6<>'6'; # from FULLWIDTH DIGIT SIX +7<>'7'; # from FULLWIDTH DIGIT SEVEN +8<>'8'; # from FULLWIDTH DIGIT EIGHT +9<>'9'; # from FULLWIDTH DIGIT NINE +:<>':'; # from FULLWIDTH COLON +;<>';'; # from FULLWIDTH SEMICOLON +<<>'<'; # from FULLWIDTH LESS-THAN SIGN +=<>'='; # from FULLWIDTH EQUALS SIGN +><>'>'; # from FULLWIDTH GREATER-THAN SIGN +?<>'?'; # from FULLWIDTH QUESTION MARK +@<>'@'; # from FULLWIDTH COMMERCIAL AT +A<>A; # from FULLWIDTH LATIN CAPITAL LETTER A +B<>B; # from FULLWIDTH LATIN CAPITAL LETTER B +C<>C; # from FULLWIDTH LATIN CAPITAL LETTER C +D<>D; # from FULLWIDTH LATIN CAPITAL LETTER D +E<>E; # from FULLWIDTH LATIN CAPITAL LETTER E +F<>F; # from FULLWIDTH LATIN CAPITAL LETTER F +G<>G; # from FULLWIDTH LATIN CAPITAL LETTER G +H<>H; # from FULLWIDTH LATIN CAPITAL LETTER H +I<>I; # from FULLWIDTH LATIN CAPITAL LETTER I +J<>J; # from FULLWIDTH LATIN CAPITAL LETTER J +K<>K; # from FULLWIDTH LATIN CAPITAL LETTER K +L<>L; # from FULLWIDTH LATIN CAPITAL LETTER L +M<>M; # from FULLWIDTH LATIN CAPITAL LETTER M +N<>N; # from FULLWIDTH LATIN CAPITAL LETTER N +O<>O; # from FULLWIDTH LATIN CAPITAL LETTER O +P<>P; # from FULLWIDTH LATIN CAPITAL LETTER P +Q<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q +R<>R; # from FULLWIDTH LATIN CAPITAL LETTER R +S<>S; # from FULLWIDTH LATIN CAPITAL LETTER S +T<>T; # from FULLWIDTH LATIN CAPITAL LETTER T +U<>U; # from FULLWIDTH LATIN CAPITAL LETTER U +V<>V; # from FULLWIDTH LATIN CAPITAL LETTER V +W<>W; # from FULLWIDTH LATIN CAPITAL LETTER W +X<>X; # from FULLWIDTH LATIN CAPITAL LETTER X +Y<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y +Z<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z +[<>'['; # from FULLWIDTH LEFT SQUARE BRACKET +\<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu} +]<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET +^<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT +_<>'_'; # from FULLWIDTH LOW LINE +`<>'`'; # from FULLWIDTH GRAVE ACCENT +a<>a; # from FULLWIDTH LATIN SMALL LETTER A +b<>b; # from FULLWIDTH LATIN SMALL LETTER B +c<>c; # from FULLWIDTH LATIN SMALL LETTER C +d<>d; # from FULLWIDTH LATIN SMALL LETTER D +e<>e; # from FULLWIDTH LATIN SMALL LETTER E +f<>f; # from FULLWIDTH LATIN SMALL LETTER F +g<>g; # from FULLWIDTH LATIN SMALL LETTER G +h<>h; # from FULLWIDTH LATIN SMALL LETTER H +i<>i; # from FULLWIDTH LATIN SMALL LETTER I +j<>j; # from FULLWIDTH LATIN SMALL LETTER J +k<>k; # from FULLWIDTH LATIN SMALL LETTER K +l<>l; # from FULLWIDTH LATIN SMALL LETTER L +m<>m; # from FULLWIDTH LATIN SMALL LETTER M +n<>n; # from FULLWIDTH LATIN SMALL LETTER N +o<>o; # from FULLWIDTH LATIN SMALL LETTER O +p<>p; # from FULLWIDTH LATIN SMALL LETTER P +q<>q; # from FULLWIDTH LATIN SMALL LETTER Q +r<>r; # from FULLWIDTH LATIN SMALL LETTER R +s<>s; # from FULLWIDTH LATIN SMALL LETTER S +t<>t; # from FULLWIDTH LATIN SMALL LETTER T +u<>u; # from FULLWIDTH LATIN SMALL LETTER U +v<>v; # from FULLWIDTH LATIN SMALL LETTER V +w<>w; # from FULLWIDTH LATIN SMALL LETTER W +x<>x; # from FULLWIDTH LATIN SMALL LETTER X +y<>y; # from FULLWIDTH LATIN SMALL LETTER Y +z<>z; # from FULLWIDTH LATIN SMALL LETTER Z +{<>'{'; # from FULLWIDTH LEFT CURLY BRACKET +|<>'|'; # from FULLWIDTH VERTICAL LINE +}<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET +~<>'~'; # from FULLWIDTH TILDE +。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP +「<>「; # to HALFWIDTH LEFT CORNER BRACKET +」<>」; # to HALFWIDTH RIGHT CORNER BRACKET +、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA +・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT +ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO +ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A +ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I +ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U +ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E +ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O +ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA +ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU +ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO +ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU +ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +ア<>ア; # to HALFWIDTH KATAKANA LETTER A +イ<>イ; # to HALFWIDTH KATAKANA LETTER I +ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U +エ<>エ; # to HALFWIDTH KATAKANA LETTER E +オ<>オ; # to HALFWIDTH KATAKANA LETTER O +カ<>カ; # to HALFWIDTH KATAKANA LETTER KA +キ<>キ; # to HALFWIDTH KATAKANA LETTER KI +ク<>ク; # to HALFWIDTH KATAKANA LETTER KU +ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE +コ<>コ; # to HALFWIDTH KATAKANA LETTER KO +サ<>サ; # to HALFWIDTH KATAKANA LETTER SA +シ<>シ; # to HALFWIDTH KATAKANA LETTER SI +ス<>ス; # to HALFWIDTH KATAKANA LETTER SU +セ<>セ; # to HALFWIDTH KATAKANA LETTER SE +ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO +タ<>タ; # to HALFWIDTH KATAKANA LETTER TA +チ<>チ; # to HALFWIDTH KATAKANA LETTER TI +ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU +テ<>テ; # to HALFWIDTH KATAKANA LETTER TE +ト<>ト; # to HALFWIDTH KATAKANA LETTER TO +ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA +ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI +ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU +ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE +ノ<>ノ; # to HALFWIDTH KATAKANA LETTER NO +ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA +ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI +フ<>フ; # to HALFWIDTH KATAKANA LETTER HU +ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE +ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO +マ<>マ; # to HALFWIDTH KATAKANA LETTER MA +ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI +ム<>ム; # to HALFWIDTH KATAKANA LETTER MU +メ<>メ; # to HALFWIDTH KATAKANA LETTER ME +モ<>モ; # to HALFWIDTH KATAKANA LETTER MO +ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA +ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU +ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO +ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA +リ<>リ; # to HALFWIDTH KATAKANA LETTER RI +ル<>ル; # to HALFWIDTH KATAKANA LETTER RU +レ<>レ; # to HALFWIDTH KATAKANA LETTER RE +ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO +ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA +ン<>ン; # to HALFWIDTH KATAKANA LETTER N +゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK +゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +ᅠ<>ᅠ; # to HALFWIDTH HANGUL FILLER +ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK +ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK +ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS +ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN +ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC +ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH +ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT +ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT +ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL +ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK +ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM +ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP +ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS +ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH +ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH +ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH +ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM +ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP +ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP +ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS +ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS +ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS +ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG +ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC +ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC +ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH +ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH +ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH +ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH +ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH +ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A +ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE +ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA +ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE +ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO +ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E +ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO +ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE +ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O +ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA +ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE +ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE +ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO +ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U +ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO +ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE +ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI +ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU +ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU +ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI +ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I +¢<>'¢'; # from FULLWIDTH CENT SIGN +£<>'£'; # from FULLWIDTH POUND SIGN +¬<>'¬'; # from FULLWIDTH NOT SIGN + ̄<>'¯'; # from FULLWIDTH MACRON +' '<>' '; # ideographic space (place this after MACRON) +¦<>'¦'; # from FULLWIDTH BROKEN BAR +¥<>'¥'; # from FULLWIDTH YEN SIGN +₩<>₩; # from FULLWIDTH WON SIGN +│<>│; # to HALFWIDTH FORMS LIGHT VERTICAL +'←'<>'←'; # to HALFWIDTH LEFTWARDS ARROW +↑<>↑; # to HALFWIDTH UPWARDS ARROW +'→'<>'→'; # to HALFWIDTH RIGHTWARDS ARROW +↓<>↓; # to HALFWIDTH DOWNWARDS ARROW +■<>■; # to HALFWIDTH BLACK SQUARE +○<>○; # to HALFWIDTH WHITE CIRCLE + +# eof + diff --git a/icu4c/source/data/translit/Greek_Latin.txt b/icu4c/source/data/translit/Greek_Latin.txt new file mode 100644 index 00000000000..221e5d084e6 --- /dev/null +++ b/icu4c/source/data/translit/Greek_Latin.txt @@ -0,0 +1,345 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Rules are predicated on running NFD first, and NFC afterwards +# :: [\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ; +# MINIMAL FILTER GENERATED FOR: Greek-Latin +:: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ; + +:: NFD (NFC) ; + +# TEST CASES + +# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος +# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ +# ᾳ ῃ ῳ ὃ ὄ +# ὠς ὡς ὢς ὣς +# Ὠς Ὡς Ὢς Ὣς +# ὨΣ ὩΣ ὪΣ ὫΣ +# Ạ, ạ, Ẹ, ẹ, Ọ, ọ + +# Useful variables + +$lower = [[:latin:][:greek:] & [:Ll:]]; +$glower = [[:greek:] & [:Ll:]]; +$upper = [[:latin:][:greek:] & [:Lu:]] ; +$accent = [:M:] ; + +# NOTE: restrict to just the Greek & Latin accents that we care about +# TODO: broaden out once interation is fixed +$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ; + +$macron = \u0304 ; +$ddot = \u0308 ; +$ddotmac = [$ddot$macron]; + +$lcgvowel = [αεηιουω] ; +$ucgvowel = [ΑΕΗΙΟΥΩ] ; +$gvowel = [$lcgvowel $ucgvowel] ; +$lcgvowelC = [$lcgvowel $accent] ; + +$evowel = [aeiouyAEIOUY]; +$evowel2 = [iuyIUY]; +$vowel = [ $evowel $gvowel] ; + +$gammaLike = [ΓΚΞΧγκξχϰ] ; +$egammaLike = [GKXCgkxc] ; +$smooth = ̓ ; +$rough = ̔ ; +$iotasub = ͅ ; + +$evowel_i = [$evowel-[iI]] ; +$evowel2_i = [uyUY]; + +$underbar = \u0331; + +$afterLetter = [:L:] [[:M:]\']* ; +$beforeLetter = [[:M:]\']* [:L:] ; +$beforeLower = $accent * $lower ; + +$notLetter = [^[:L:][:M:]] ; +$under = ̱; + +# Fix punctuation +# preserve original +\: <> \: $under ; +\? <> \? $under ; + +\; <> \? ; +· <> \: ; + +# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve + +\u0342 <> \u0302 ; + +# IOTA: convert iota subscript to iota +# first make previous alpha long! + +$accent_minus = [[$accent]-[$iotasub$macron]]; + +Α } $accent_minus * $iotasub > | Α $macron ; +α } $accent_minus * $iotasub > | α $macron ; + +# now convert to uppercase if after uppercase, ow to lowercase + +$upper $accent * { $iotasub > I ; +$iotasub > i ; + +| $1 $iotasub < ($evowel $macron $accentMinus *) i ; +| $1 $iotasub < ($evowel $macron $accentMinus *) I ; + +# BREATHING + +# Convert rough breathing to h, and move before letters. + +# Make A ` x = > H a x + + Α ($macron?) $rough } $beforeLower > H | α $1; + Ε $rough } $beforeLower > H | ε; + Η $rough } $beforeLower > H | η ; + Ι ($ddot?) $rough } $beforeLower > H | ι $1; + Ο $rough } $beforeLower > H | ο ; + Υ $rough } $beforeLower > H | υ ; + Ω ($ddot?) $rough } $beforeLower > H | ω $1; + +# Make A x ` = > H a x + +Α ($glower $macron?) $rough > H | α $1 ; +Ε ($glower) $rough > H | ε $1 ; +Η ($glower) $rough > H | η $1 ; +Ι ($glower $ddot?) $rough > H | ι $1 ; +Ο ($glower) $rough > H | ο $1 ; +Υ ($glower) $rough > H | υ $1 ; +Ω ($glower $ddot?) $rough > H | ω $1 ; + +#Otherwise, make x ` into h x and X ` into H X + +($lcgvowel + $ddotmac? ) $rough > h | $1 ; +($gvowel + $ddotmac? ) $rough > H | $1 ; + +# Go backwards with H + +| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ; +| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ; +| $1 $rough < h ($evowel $macron? $ddot?) ; + +| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ; +| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ; +| $1 $rough < H ([AEIOUY] $macron? $ddot?) ; + +# titlecase, have to fix individually +# in the future, we should add &uppercase() to make this easier + +| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ; +| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ; +| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ; +| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ; +| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ; +| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ; + +| A $1 $rough < H a ($ddot? $evowel2 $macron?) ; +| E $1 $rough < H e ($ddot? $evowel2 $macron?) ; +| I $1 $rough < H i ($ddot? $evowel2 $macron?) ; +| O $1 $rough < H o ($ddot? $evowel2 $macron?) ; +| U $1 $rough < H u ($ddot? $evowel2 $macron?) ; +| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ; + +| A $1 $rough < H a ($macron? $ddot? ) ; +| E $1 $rough < H e ($macron? $ddot? ) ; +| I $1 $rough < H i ($macron? $ddot? ) ; +| O $1 $rough < H o ($macron? $ddot? ) ; +| U $1 $rough < H u ($macron? $ddot? ) ; +| Y $1 $rough < H y ($macron? $ddot? ) ; + +# Now do smooth + +#delete smooth breathing for Latin +$smooth > ; + +# insert in Greek +# the assumption is that all Marks are on letters. + + | $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ; + | $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ; + | $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ; + +# TODO: preserve smooth/rough breathing if not +# on initial vowel sequence + +# need to have these up here so the rules don't mask + +# remove now superfluous macron when returning + +Α < A $macron ; +α < a $macron ; + +η <> e $macron ; +Η <> E $macron ; + +φ <> ph ; +Ψ } $beforeLower <> Ps ; +Ψ <> PS ; + +Φ } $beforeLower <> Ph ; +Φ <> PH ; +ψ <> ps ; + +ω <> o $macron ; +Ω <> O $macron; + +# NORMAL + +α <> a ; +Α <> A ; + +β <> b ; +Β <> B ; + +γ } $gammaLike <> n } $egammaLike ; +γ <> g ; +Γ } $gammaLike <> N } $egammaLike ; +Γ <> G ; + +δ <> d ; +Δ <> D ; + +ε <> e ; +Ε <> E ; + +ζ <> z ; +Ζ <> Z ; + +θ <> th ; +Θ } $beforeLower <> Th ; +Θ <> TH ; + +ι <> i ; +Ι <> I ; + +κ <> k ; +Κ <> K ; + +λ <> l ; +Λ <> L ; + +μ <> m ; +Μ <> M ; + +ν } $gammaLike > n\' ; +ν <> n ; +Ν } $gammaLike <> N\' ; +Ν <> N ; + +ξ <> x ; +Ξ <> X ; + +ο <> o ; +Ο <> O ; + +π <> p ; +Π <> P ; + +ρ $rough <> rh; +Ρ $rough } $beforeLower <> Rh ; +Ρ $rough <> RH ; +ρ <> r ; +Ρ <> R ; + +# insert separator before things that turn into s + +[Pp] { } [ςσΣϷϸϺϻ] > \' ; + +# special S variants + +Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L + +# underbar means exception + +# before a letter, initial +ς } $beforeLetter <> s $underbar } $beforeLetter; +σ } $beforeLetter <> s } $beforeLetter; + +# otherwise, after a letter = final +$afterLetter { σ <> $afterLetter { s $underbar; +$afterLetter { ς <> $afterLetter { s ; + +# otherwise (isolated) = initial +ς <> s $underbar; +σ <> s ; + +# [Pp] { Σ <> \'S ; +Σ <> S ; + +τ <> t ; +Τ <> T ; + +$vowel {υ } <> u ; +υ <> y ; +$vowel { Υ <> U ; +Υ <> Y ; + +χ <> ch ; +Χ } $beforeLower <> Ch ; +Χ <> CH ; + +# Completeness for ASCII + +$ignore = [[:Mark:]''] * ; + +| k < c ; +| ph < f ; +| i < j ; +| k < q ; +| b < v } $vowel ; +| b < w } $vowel; +| u < v ; +| u < w; +| K < C ; +| Ph < F ; +| I < J ; +| K < Q ; +| B < V } $vowel ; +| B < W } $vowel ; +| U < V ; +| U < W ; + +$rough } $ignore [:UppercaseLetter:] > H ; +$ignore [:UppercaseLetter:] { $rough > H ; +$rough < H ; +$rough <> h ; + +# Completeness for Greek + +ϐ > | β ; +ϑ > | θ ; +ϒ > | Υ ; +ϕ > | φ ; +ϖ > | π ; + +ϰ > | κ ; +ϱ > | ρ ; +ϲ > | σ ; +Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL +ϳ > j ; +ϴ > | Θ ; +ϵ > | ε ; + +µ > | μ ; + + ͺ > i; + +# delete any trailing ' marks used for roundtripping + + < [Ππ] { \' } [Ss] ; + < [Νν] { \' } $egammaLike ; + +::NFC (NFD) ; +# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; +# ([\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ; +# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD +:: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ; diff --git a/icu4c/source/data/translit/Greek_Latin_UNGEGN.txt b/icu4c/source/data/translit/Greek_Latin_UNGEGN.txt new file mode 100644 index 00000000000..f14bc236cef --- /dev/null +++ b/icu4c/source/data/translit/Greek_Latin_UNGEGN.txt @@ -0,0 +1,252 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- +# For modern Greek, based on UNGEGN rules. + +# Rules are predicated on running NFD first, and NFC afterwards +# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN +# WARNING: need to add accents to both filters ### +# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ; + +:: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ; +::NFD (NFC) ; + +# Useful variables + +$lower = [[:latin:][:greek:] & [:Ll:]] ; +$upper = [[:latin:][:greek:] & [:Lu:]] ; +$accent = [[:Mn:][:Me:]] ; + +$macron = ̄ ; +$ddot = ̈ ; + +$lcgvowel = [αεηιουω] ; +$ucgvowel = [ΑΕΗΙΟΥΩ] ; +$gvowel = [$lcgvowel $ucgvowel] ; +$lcgvowelC = [$lcgvowel $accent] ; + +$evowel = [aeiouyAEIOUY]; +$vowel = [ $evowel $gvowel] ; + +$beforeLower = $accent * $lower ; + +$gammaLike = [ΓΚΞΧγκξχϰ] ; +$egammaLike = [GKXCgkxc] ; +$smooth = ̓ ; +$rough = ̔ ; +$iotasub = ͅ ; + +$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ; + +$under = ̱; + +$caron = ̌; + +$afterLetter = [:L:] [\'$accent]* ; +$beforeLetter = [\'$accent]* [:L:] ; + +# Fix punctuation + +# preserve orginal +\: <> \: $under ; +\? <> \? $under ; + +\; <> \? ; +· <> \: ; + +# Fix any ancient characters that creep in + +͂ > ́ ; +̂ > ́ ; +̀ > ́ ; +$smooth > ; +$rough > ; +$iotasub > ; +ͺ > ; + +# need to have these up here so the rules don't mask + +η <> i $under ; +Η <> I $under ; + +Ψ } $beforeLower <> Ps ; +Ψ <> PS ; +ψ <> ps ; + +ω <> o $under ; +Ω <> O $under; + +# at begining or end of word, convert mp to b + +[^[:L:]$accent] { μπ > b ; +μπ } [^[:L:]$accent] > b ; +[^[:L:]$accent] { [Μμ][Ππ] > B ; +[Μμ][Ππ] } [^[:L:]$accent] > B ; + +μπ < b ; +Μπ < B } $beforeLower ; +ΜΠ < B ; + +# handle diphthongs ending with upsilon + +ου <> ou ; +ΟΥ <> OU ; +Ου <> Ou ; +οΥ <> oU ; + +$fmaker = [aeiAEI] $under ? ; +$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate + +$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ; +υ $1 < ( $shiftForwardVowels )* v $under ; + +$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under; +υ $1 < ( $shiftForwardVowels )* f $under ; + +$fmaker { Υ } $softener <> V $under ; +$fmaker { Υ <> U $under ; + +υ <> y ; +Υ <> Y ; + +# NORMAL + +α <> a ; +Α <> A ; + +β <> v ; +Β <> V ; + +γ } $gammaLike <> n } $egammaLike ; +γ <> g ; +Γ } $gammaLike <> N } $egammaLike ; +Γ <> G ; + +δ <> d ; +Δ <> D ; + +ε <> e ; +Ε <> E ; + +ζ <> z ; +Ζ <> Z ; + +θ <> th ; +Θ } $beforeLower <> Th ; +Θ <> TH ; + +ι <> i ; +Ι <> I ; + +κ <> k ; +Κ <> K ; + +λ <> l ; +Λ <> L ; + +μ <> m ; +Μ <> M ; + +ν } $gammaLike > n\' ; +ν <> n ; +Ν } $gammaLike <> N\' ; +Ν <> N ; + +ξ <> x ; +Ξ <> X ; + +ο <> o ; +Ο <> O ; + +π <> p ; +Π <> P ; + +ρ <> r ; +Ρ <> R ; + +# insert separator before things that turn into s +[Pp] { } [ςσΣϷϸϺϻ] > \' ; + +# special S variants + +Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L +ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L +Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L +ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L + +# Caron means exception + +# before a letter, initial +ς } $beforeLetter <> s $under } $beforeLetter; +σ } $beforeLetter <> s } $beforeLetter; + +# otherwise, after a letter = final +$afterLetter { σ <> $afterLetter { s $under; +$afterLetter { ς <> $afterLetter { s ; + +# otherwise (isolated) = initial +ς <> s $under; +σ <> s ; + +# [Pp] { Σ <> \'S ; +Σ <> S ; + +τ <> t ; +Τ <> T ; + +φ <> f ; +Φ <> F ; + +χ <> ch ; +Χ } $beforeLower <> Ch ; +Χ <> CH ; + +# Completeness for ASCII + +# $ignore = [[:Mark:]''] * ; + +| ch < h ; +| k < c ; +| i < j ; +| k < q ; +| b < u } $vowel ; +| b < w } $vowel ; +| y < u ; +| y < w ; + +| Ch < H ; +| K < C ; +| I < J ; +| K < Q ; +| B < W } $vowel ; +| B < U } $vowel ; +| Y < W ; +| Y < U ; + +# Completeness for Greek + +ϐ > | β ; +ϑ > | θ ; +ϒ > | Υ ; +ϕ > | φ ; +ϖ > | π ; + +ϰ > | κ ; +ϱ > | ρ ; +ϲ > | σ ; +Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL +ϳ > j ; +ϴ > | Θ ; +ϵ > | ε ; +µ > | μ ; + +# delete any trailing ' marks used for roundtripping + + < [Ππ] { \' } [Ss] ; + < [Νν] { \' } $egammaLike ; + +::NFC (NFD) ; + +# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD +:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ; diff --git a/icu4c/source/data/translit/Gujarati_InterIndic.txt b/icu4c/source/data/translit/Gujarati_InterIndic.txt new file mode 100644 index 00000000000..2053427608a --- /dev/null +++ b/icu4c/source/data/translit/Gujarati_InterIndic.txt @@ -0,0 +1,91 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Gujarati-InterIndic +#:: NFD (NFC) ; +\u0a81>\ue001; # SIGN CANDRABINDU +\u0a82>\ue002; # SIGN ANUSVARA +\u0a83>\ue003; # SIGN VISARGA +\u0a85>\ue005; # LETTER A +\u0a86>\ue006; # LETTER AA +\u0a87>\ue007; # LETTER I +\u0a88>\ue008; # LETTER II +\u0a89>\ue009; # LETTER U +\u0a8a>\ue00a; # LETTER UU +\u0a8b>\ue00b; # LETTER VOCALIC R +\u0a8c>\ue00c; # LETTER VOCALLIC L +\u0a8d>\ue00d; # VOWEL CANDRA E +\u0a8f>\ue00f; # LETTER E +\u0a90>\ue010; # LETTER AI +\u0a91>\ue011; # VOWEL CANDRA O +\u0a93>\ue013; # LETTER O +\u0a94>\ue014; # LETTER AU +\u0a95>\ue015; # LETTER KA +\u0a96>\ue016; # LETTER KHA +\u0a97>\ue017; # LETTER GA +\u0a98>\ue018; # LETTER GHA +\u0a99>\ue019; # LETTER NGA +\u0a9a>\ue01a; # LETTER CA +\u0a9b>\ue01b; # LETTER CHA +\u0a9c>\ue01c; # LETTER JA +\u0a9d>\ue01d; # LETTER JHA +\u0a9e>\ue01e; # LETTER NYA +\u0a9f>\ue01f; # LETTER TTA +\u0aa0>\ue020; # LETTER TTHA +\u0aa1>\ue021; # LETTER DDA +\u0aa2>\ue022; # LETTER DDHA +\u0aa3>\ue023; # LETTER NNA +\u0aa4>\ue024; # LETTER TA +\u0aa5>\ue025; # LETTER THA +\u0aa6>\ue026; # LETTER DA +\u0aa7>\ue027; # LETTER DHA +\u0aa8>\ue028; # LETTER NA +\u0aaa>\ue02a; # LETTER PA +\u0aab>\ue02b; # LETTER PHA +\u0aac>\ue02c; # LETTER BA +\u0aad>\ue02d; # LETTER BHA +\u0aae>\ue02e; # LETTER MA +\u0aaf>\ue02f; # LETTER YA +\u0ab0>\ue030; # LETTER RA +\u0ab2>\ue032; # LETTER LA +\u0ab3>\ue033; # LETTER LLA +\u0ab5>\ue035; # LETTER VA +\u0ab6>\ue036; # LETTER SHA +\u0ab7>\ue037; # LETTER SSA +\u0ab8>\ue038; # LETTER SA +\u0ab9>\ue039; # LETTER HA +\u0abc>\ue03c; # SIGN NUKTA +\u0abd>\ue03d; # SIGN AVAGRAHA +\u0abe>\ue03e; # VOWEL SIGN AA +\u0abf>\ue03f; # VOWEL SIGN I +\u0ac0>\ue040; # VOWEL SIGN II +\u0ac1>\ue041; # VOWEL SIGN U +\u0ac2>\ue042; # VOWEL SIGN UU +\u0ac3>\ue043; # VOWEL SIGN VOCALIC R +\u0ac4>\ue044; # VOWEL SIGN VOCALIC RR +\u0ac5>\ue045; # VOWEL SIGN CANDRA E +\u0ac7>\ue047; # VOWEL SIGN E +\u0ac8>\ue048; # VOWEL SIGN AI +\u0ac9>\ue049; # VOWEL SIGN CANDRA O +\u0acb>\ue04b; # VOWEL SIGN O +\u0acc>\ue04c; # VOWEL SIGN AU +\u0acd>\ue04d; # SIGN VIRAMA +\u0ad0>\ue050; # OM +\u0ae0>\ue060; # LETTER VOCALIC RR +\u0ae1>\ue061; # LETTER VOCALIC LL +\u0ae6>\ue066; # DIGIT ZERO +\u0ae7>\ue067; # DIGIT ONE +\u0ae8>\ue068; # DIGIT TWO +\u0ae9>\ue069; # DIGIT THREE +\u0aea>\ue06a; # DIGIT FOUR +\u0aeb>\ue06b; # DIGIT FIVE +\u0aec>\ue06c; # DIGIT SIX +\u0aed>\ue06d; # DIGIT SEVEN +\u0aee>\ue06e; # DIGIT EIGHT +\u0aef>\ue06f; # DIGIT NINE +\u0964>\ue064; # DANDA +\u0965>\ue065; # DOUBLE DANDA +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/Gurmukhi_InterIndic.txt b/icu4c/source/data/translit/Gurmukhi_InterIndic.txt new file mode 100644 index 00000000000..2c93789836b --- /dev/null +++ b/icu4c/source/data/translit/Gurmukhi_InterIndic.txt @@ -0,0 +1,95 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Gurmukhi-InterIndic +#:: NFD (NFC) ; + +#\u0A16\u0A3C>\uE059; # LETTER KHHA +#\u0A17\u0A3C>\uE05A; # LETTER GHHA +#\u0A1C\u0A3C>\uE05B; # LETTER ZA +#\u0A38\u0A3C>\uE036; # LETTER SHA +#\u0A32\u0A3C>\uE033; # LETTER LLA +#\u0A2B\u0A3C>\uE05E; # LETTER FA +\u0A01>\ue001; # SIGN CHANDRABINDU +\u0A02>\uE002; # SIGN BINDI +\u0A05>\uE005; # LETTER A +\u0A06>\uE006; # LETTER AA +\u0A07>\uE007; # LETTER I +\u0A08>\uE008; # LETTER II +\u0A09>\uE009; # LETTER U +\u0A0A>\uE00A; # LETTER UU +\u0A0C>\uE032; # FALLBACK : VOCALLIC LA +\u0A0F>\uE00F; # LETTER EE +\u0A10>\uE010; # LETTER AI +\u0A13>\uE013; # LETTER OO +\u0A14>\uE014; # LETTER AU +\u0A15>\uE015; # LETTER KA +\u0A16>\uE016; # LETTER KHA +\u0A17>\uE017; # LETTER GA +\u0A18>\uE018; # LETTER GHA +\u0A19>\uE019; # LETTER NGA +\u0A1A>\uE01A; # LETTER CA +\u0A1B>\uE01B; # LETTER CHA +\u0A1C>\uE01C; # LETTER JA +\u0A1D>\uE01D; # LETTER JHA +\u0A1E>\uE01E; # LETTER NYA +\u0A1F>\uE01F; # LETTER TTA +\u0A20>\uE020; # LETTER TTHA +\u0A21>\uE021; # LETTER DDA +\u0A22>\uE022; # LETTER DDHA +\u0A23>\uE023; # LETTER NNA +\u0A24>\uE024; # LETTER TA +\u0A25>\uE025; # LETTER THA +\u0A26>\uE026; # LETTER DA +\u0A27>\uE027; # LETTER DHA +\u0A28>\uE028; # LETTER NA +\u0A2A>\uE02A; # LETTER PA +\u0A2B>\uE02B; # LETTER PHA +\u0A2C>\uE02C; # LETTER BA +\u0A2D>\uE02D; # LETTER BHA +\u0A2E>\uE02E; # LETTER MA +\u0A2F>\uE02F; # LETTER YA +\u0A30>\uE030; # LETTER RA +\u0A32>\uE032; # LETTER LA +\u0a33>\uE033; # FALLBACK +\u0A35>\uE035; # LETTER VA +\u0a36>\ue036; +\u0A38\0a3c>\ue036; # FALLBACK +\u0A38>\uE038; # LETTER SA +\u0A39>\uE039; # LETTER HA +\u0A3C>\uE03C; # SIGN NUKTA +\u0A3E>\uE03E; # VOWEL SIGN AA +\u0A3F>\uE03F; # VOWEL SIGN I +\u0A40>\uE040; # VOWEL SIGN II +\u0A41>\uE041; # VOWEL SIGN U +\u0A42>\uE042; # VOWEL SIGN UU +\u0A47>\uE047; # VOWEL SIGN EE +\u0A48>\uE048; # VOWEL SIGN AI +\u0A4B>\uE04B; # VOWEL SIGN OO +\u0A4C>\uE04C; # VOWEL SIGN AU +\u0A4D>\uE04D; # SIGN VIRAMA + +\u0A5C>\uE05C; # LETTER RRA + +\u0A66>\uE066; # DIGIT ZERO +\u0A67>\uE067; # DIGIT ONE +\u0A68>\uE068; # DIGIT TWO +\u0A69>\uE069; # DIGIT THREE +\u0A6A>\uE06A; # DIGIT FOUR +\u0A6B>\uE06B; # DIGIT FIVE +\u0A6C>\uE06C; # DIGIT SIX +\u0A6D>\uE06D; # DIGIT SEVEN +\u0A6E>\uE06E; # DIGIT EIGHT +\u0A6F>\uE06F; # DIGIT NINE +\u0A70>\uE07C; # TIPPI +\u0A71>\uE07D; # ADDAK +\u0A72>\uE07E; # IRI +\u0A73>\uE07F; # URA +\u0A74>\uE080; # EK ONKAR +\u0964>\ue064; # DANDA +\u0965>\ue065; # DOUBLE DANDA +# :: NFC (NFD) ; +# eof + diff --git a/icu4c/source/data/translit/Han_Latin.txt b/icu4c/source/data/translit/Han_Latin.txt new file mode 100644 index 00000000000..665609710e9 --- /dev/null +++ b/icu4c/source/data/translit/Han_Latin.txt @@ -0,0 +1,1440 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Warning: does not do round-trip mapping!! + +# Convert CJK characters +::Han-Spacedhan(); + +# Start RAW data for converting CJK characters +[吖呵錒锕阿]>ā; +嗄>á; +啊>a; +[哀哎唉埃挨銰鎄锿]>āi; +[㱯䠹䶣啀嘊嵦捱敱敳癌皑皚騃]>ái; +[㑸㗨㢊䑂䨠佁娾昹欸毐矮蔼藹躷霭靄馤]>ǎi; +[㕌㗒㘷㝶㤅㿄䀳䅬䔽䝽䬵伌僾叆嗳噯塧壒嫒嬡愛懓懝暧曖濭爱瑷璦皧瞹砨硋碍礙艾薆譪賹鑀隘靉鴱]>ài; +[侒媕安峎峖庵氨痷盦盫腌腤菴萻葊蓭誝諳谙銨铵鞌鞍馣鵪鶕鹌]>ān; +[䜙啽玵雸]>án; +[㜝㽢䁆䅖俺唵垵埯揞晻罯隌]>ǎn; +[㟁㱘㸩䅁䎏䎨䬓䮗䯥儑匎堓岸按暗案桉洝犴荌錌闇鮟黬黯]>àn; +骯>āng; +[㭿䀚䒢䩕䭹䭺卬岇昂昻]>áng; +[䍩軮雵]>ǎng; +[㦹㼜枊盎醠]>àng; +[凹柪梎軪]>āo; +[㟼㠂㿰䐿䚫䥝䦋䵅厫嗷嗸嶅廒摮敖滶熬爊獒獓璈磝翱翺聱蔜螯謷謸遨鏊鏖隞驁骜鰲鳌鷔鼇𦪈]>áo; +[㑃㤇䞝䯠䴈媪媼抝拗狕芺袄襖镺𥜌]>ǎo; +[㕭㘬㘭㜜㜩㠗㥿䁱䜒䫨䮯傲坳垇奡奥奧嫯岙嶴慠懊扷擙澳詏𩼈]>ào; +[仈八哵岜扒捌朳玐疤粑羓芭豝釟鲃]>bā; +[㔜㧊䟦䳁䳊叐坺墢妭抜拔炦犮秡胈茇菝詙跋軷鈸钹颰馛魃鼥]>bá; +[㞎把鈀钯靶]>bǎ; +[㖠㶚䃻䆉䇑䎬䎱䥯䩗䩻䰾䱝坝垻壩弝欛灞爸猈覇霸]>bà; +[叭吧巴笆罢罷]>ba; +掰>bāi; +[㼟㿟䳆白]>bái; +[㗗㼣䙓佰捭摆擺柏百矲粨絔襬]>bǎi; +[㗑㠔䒔䢙䴽庍拜拝敗稗粺蛽贁败]>bài; +[扳搬攽斑斒朌班瘢癍般虨螌褩辬頒颁]>bān; +[㩯㸞㺜䉽䬳坂岅昄板版粄舨蝂鈑钣闆阪]>bǎn; +[㚘㪵䕰伴办半姅怑拌湴瓣秚絆绊辦鉡靽]>bàn; +扮>ban; +[垹帮幇幚幫捠梆浜縍邦邫鞤]>bāng; +[㔙㮄䟺榜牓綁绑膀]>bǎng; +[㭋㯁㾦䂜䎧䖫䧛䰷傍塝棒磅稖艕蚌蜯謗谤鎊镑]>bàng; +[勹包孢笣胞苞蕔褒襃闁骲]>bāo; +[㵡㿺䈏䥤䨌䨔䪨嫑瓟窇薄雹]>báo; +[㙅㲏㻄䎂䭋䳈䳰䴐保堡堢宝宲寚寳寶怉珤緥葆褓褴賲靌飽饱駂鳵鴇鸨]>bǎo; +[㙸㫧㲒䤖儤勽報忁报抱暴曓煲爆犦菢虣袌豹趵鉋鑤铇鮑鲍鸔]>bào; +[伓卑岥庳悲揹杯桮椑盃碑禆綼萆藣錃鵯鹎]>bēi; +[㤳北鉳]>běi; +[㓈㔨㛝㣁㰆㶔㷶㸢㸬㸽㻗㼎㾱䁅䋳䔒䠙䡶䩀䰽俻倍偝偹備僃备孛悖惫愂憊昁梖焙牬犕狈狽珼琲糒紴背蓓被褙貝贝軰輩辈邶郥鄁鋇鐾钡鞴韛]>bèi; +[呗唄]>bei; +[奔泍渀犇錛锛]>bēn; +[㡷㮺夲奙本楍畚苯]>běn; +[㤓㨧㱵䬱倴坌捹撪桳笨輽逩]>bèn; +[伻嘣崩嵭祊綳閍]>bēng; +甭>béng; +[㑟䋽䙀䩬䭰䳞埲玤琫繃绷菶誁鞛]>běng; +[㱶㷯䨻塴搒槰泵蠯跰蹦迸逬鏰镚]>bèng; +[偪屄毴皀皕稫芘蓖螕豍逼颷]>bī; +[㮰䨆䵄荸魮鼻]>bí; +[㚰㠲㪏㻶䃾䇷䏢䘡䠋䣥佊俾匕吡啚夶妣彼朼柀比沘滗潷疕秕笔筆粃蚍貏鄙]>bǐ; +[㓖㗉㘠㘩㙄㡀㡙㢰㢶㢸㧙㪤㮿㯇㱸㳼㵥㵨㹃㻫㿫䀣䁹䄶䊧䋔䌟䎵䏶䕗䖩䟆䟤䦘䧗䩛䪐䫁䫾䬛䭮䮡䯗佖咇哔嗶坒堛壁奰妼婢嬖币幣庇廦弊弻弼彃必怭愊愎敝斃枈柲梐楅檘毕毖毙湢滭煏熚狴獘獙珌璧畀畢疪痹痺睤睥碧筚箄-箆篦篳粊縪罼聛腷臂苾荜蓽蔽薜蜌袐裨襞襣觱詖诐貱賁贔贲赑跸蹕躃躄避邲鄨鄪鉍鎞鏎鐴铋閇閉閟闭陛鞸韠飶饆馝駜驆骳髀魓鮅鰏鲾鵖鷝鷩鼊]>bì; +[煸牑猵甂砭稨笾箯籩糄編编蝙边辺邉邊鍽鞭鯾鯿鳊]>biān; +[㦚䁵匾惼扁碥窆藊褊貶贬]>biǎn; +[㝸㣐㭓㲢㳎㳒㴜㵷㺹㻞䉸䒪䛒䡢䪻便匥卞变変弁徧忭抃揙昪汳汴玣緶缏艑苄覍變辡辧-辩辫辮辯遍釆閞鴘]>biàn; +[儦墂幖彪标標滮瀌灬熛爂猋瘭磦穮脿臕謤贆鏢鑣镖镳颩颮飆飇飍飑飙飚驫骉髟麃麅]>biāo; +[㟽㠒㯱㯹䔸婊檦表裱褾諘錶飈]>biǎo; +[㧼䞄俵覅鰾鳔]>biào; +[憋虌鱉鳖鼈龞]>biē; +[䠥䭱別别莂蟞襒蹩]>bié; +[㿜瘪癟蛂]>biě; +[㢼䉲䋢䏟彆徶]>biè; +[宾彬斌椕槟檳汃滨濒濱瀕瑸璸矉繽缤蠙豩豳賓賔邠鑌镔霦顮馪驞]>bīn; +[䐔傧儐摈擯殡殯膑臏髌髕髩鬂鬓鬢]>bìn; +[仌兵冫冰掤栟梹氷絣]>bīng; +[㨀䋑䓑䴵丙偋怲抦昞昺柄炳眪禀秉稟窉苪蛃邴鉼陃鞞餅餠饼]>bǐng; +[䈂䗒並併倂傡寎并幷摒栤棅病竝鈵靐鮩]>bìng; +[剝剥哱嶓拨撥播柭波玻癶盋砵碆缽菠袚蹳鉢钵驋髉鮁鱍鲅]>bō; +[㗘㝿㟑㧳㩧㩭㪍㬍㬧㱟㴾㶿㹀䂍䊿䍨䍸䑈䒄䗚䙏䞳䟛䢌䢪䥬䪇䪬䫊䬪䭦䭯䮀䮂䯋䰊䶈亳伯侼僰勃博嚗壆嶏帛愽懪挬搏敀栢桲欂泺浡渤煿牔犻猼瓝礡礴箔簙簿糪胉脖舶艊苩葧蔔袯襏襮誖謈豰踄踣郣鉑鋍鎛鑮铂镈餑餺饽馎馞駁駮驳髆鮊鲌鵓鹁]>bó; +[箥簸蚾跛駊]>bǒ; +[㖕孹挀擘檗疈繴蘗譒]>bò; +[卜啵膊]>bo; +[峬庯晡逋餔鵏]>bū; +[㙛㨐䀯䋠䒈䪁䪔卟哺捕补補鳪]>bǔ; +[㘵㚴㳍㻉㾟䊇䍌䏽䑰䝵䬏䳝䴝䴺不佈吥咘埔埗埠布怖悑捗步歨歩篰荹蔀部郶鈽钚钸餢鮬]>bù; +[嚓擦]>cā; +礤>cǎ; +[䟃䵽囃]>cà; +猜>cāi; +[㒲䴭才材溨犲纔裁財财]>cái; +[㥒䌽䐆䣋倸婇彩採棌睬綵跴踩采]>cǎi; +[䌨䰂埰寀縩菜蔡]>cài; +[傪参-叅喰湌蓡謲飡餐驂骖]>cān; +[㥇㨻㱚㺑䍼䏼䑶䗝䗞䘉䙁䝳䣟䫮䳻嬠嬱惭慙慚残殘蚕蝅蠶蠺]>cán; +[㘔㜗㦧㿊䅟䬫惨慘憯朁黪黲]>cǎn; +[㛑㣓㻮㽩䛹澯灿燦璨粲薒]>càn; +[仓仺伧倉傖凔嵢沧滄舱艙苍蒼螥鶬鸧]>cāng; +[㵴㶓藏鑶]>cáng; +[䅮䢢賶]>càng; +[撡操糙鄵]>cāo; +[㜖㯥䄚䏆䐬嘈嶆曹曺槽漕艚蓸螬褿鏪]>cáo; +[䒑愺懆艸草騲]>cǎo; +[䒃肏襙]>cào; +[㥽㨲㩍䇲䈟䊂䔴䜺侧側冊册厕厠嫧廁恻惻憡拺敇测測畟矠笧策筞筴箣粣茦萗蓛遪頙]>cè; +[嵾梫]>cēn; +[㞥㻸䃡䅾䤁䨙䯔䲋埁岑梣橬涔笒]>cén; +曽>cēng; +[㬝䁬䉕层層嶒曾碀竲鄫]>céng; +[㣒蹭]>cèng; +[偛嗏嫅扠挿插揷杈槎疀肞臿艖銟靫餷馇]>chā; +[㢉㢒㪯㫅䁟䆛䑘䕓䤩䲦䶪垞察嵖搽查査檫痄碴秅茶詧鍤锸𦉆]>chá; +[䰈蹅鑔镲]>chǎ; +[㛳㢎㣾㤞䊬䒲䓭䟕䡨侘奼姹岔差汊紁衩訍詫诧]>chà; +叉>cha; +[拆芆釵钗]>chāi; +[㑪㾹䓱侪儕喍柴祡豺]>chái; +茝>chǎi; +[㳗䘍囆瘥虿蠆袃]>chài; +[幨惉搀攙梴袩裧襜辿鋓]>chān; +[㔆㙻㢆㶣㸥㺥䂁䜛䡪䡲䣑䤫䧯僝儃劖嚵婵嬋孱巉廛棎欃毚湹潹潺澶瀍瀺煘獑磛禪緾繵纏纒缠艬蝉蟬蟾誗讒谗躔鄽酁鋋鑱镵饞馋]>chán; +[㢟㦃㯆㹌㹽䊲䐮䑎䤘䥀䩶䴼䵐丳产冁刬剗剷囅嵼幝摌旵浐滻灛燀產産簅繟蒇蕆諂譂讇谄鏟铲閳闡阐驏骣]>chǎn; +[㙴㬄㵌䀡䠨䪜䱿儳忏懴懺硟羼韂顫颤]>chàn; +[伥倀娼昌晿淐猖琩菖裮錩锠閶阊鯧鲳鼚]>chāng; +[㙊㦂䗅䠆䯴仧偿償兏嘗嚐塲嫦尝常徜瑺瓺甞肠腸膓苌萇鋿鏛镸鱨鲿]>cháng; +[㫤䕋䠀僘厂厰场場廠昶氅鋹]>chǎng; +[䩨倡唱怅悵暢焻畅畼誯韔鬯]>chàng; +敞>chang; +[弨怊抄欩罺訬超鈔钞]>chāo; +[䄻䬤䰫嘲巢巣晁朝樔潮窲謿轈鄛鼂鼌]>cháo; +[㶤㷅䎐䏚吵巐炒煼眧麨]>chǎo; +[仦仯耖觘]>chào; +[伡俥唓砗硨莗蛼車车]>chē; +[㨋㵔䋲䞣䰩偖扯撦]>chě; +[㔭㥉㬚㯙㱌㵃㾝㿭䁤䑲䒆䚢䛸䜠䧪䨁勶坼屮彻徹掣撤澈烲爡瞮硩聅轍辙迠]>chè; +[棽琛瞋諃謓賝郴]>chēn; +[㕴㫳㲀㴴㽸䆣䒞䚘䜟䟢䢅䢈䢻䣅䤟塵宸尘屒忱愖敐曟樄沈沉煁臣茞莀莐蔯薼螴訦諶谌軙辰迧鈂陈陳霃鷐麎]>chén; +[䫖墋捵硶碜磣祳贂趻踸鍖]>chěn; +[㧱䞋儬儭嚫夦榇櫬疢藽衬襯讖谶趁趂齓齔龀]>chèn; +晨>chen; +[䞓偁噌埥憆撐撑棦橕檉泟浾爯琤瞠称稱穪竀緽蛏蟶赪赬鏿阷頳饓]>chēng; +[㞼㨃㲂㼩䁎䄇䆑䆵䆸䇸䔲䗊䧕䫆䮪丞乗乘呈城埕堘塍塖娍宬峸惩憕懲成承挰掁揨晟枨棖椉橙洆浈湞澂澄瀓珵珹程窚筬絾脀脭荿虰裎誠诚郕酲鋮铖騬]>chéng; +[侱塣庱徎悜睈逞騁骋]>chěng; +[㐼䀕牚秤靗]>chèng; +[吃哧喫嗤噄媸彨彲摛欫瓻痴癡眵瞝笞絺蚩螭訵誺郗骴魑黐齝]>chī; +[㓾㙜㞴㢮㮛䈕䐤䔟䙙䛂䜄䞾䪧䮈䶔䶵倁坻墀岻弛彽徥徲持歭池汦泜竾筂箈箎篪耛茌茬荎蚳謘貾赿趍踟迟遅遟遲馳驰]>chí; +[㘜㟂㢁㢋㱀㳏㶴䊼䑛䜵䜻侈叺呎垑姼尺恀恥拸搋欼歯粎耻蚇袳裭褫誃鉹齒齿]>chǐ; +[㒆㓼㔑㞿㡿㽚䇼䗖䟷䠠䤲䮻䰡䳵乿侙傺勅叱啻彳恜慗憏懘抶敕斥杘栻淔灻炽烾熾痓痸瘛眙翄翅翤觢赤趩跮踅遫鉓銐飭饎饬鶒鷘]>chì; +[充冲嘃徸忡憃憧摏沖浺珫舂茺衝蹖𢥞]>chōng; +[㓽㹐䌬䖝䳯崇崈漴痋翀虫蝩蟲褈隀]>chóng; +[埫宠寵]>chǒng; +[㧤揰銃铳𣀒]>chòng; +[婤怞抽搊犨犫瘳篘霌]>chōu; +[㐜㛶㤽㦞㨶㵞㿧䇺䊭䌧䌷䓓䛬䥒䲖仇俦儔嚋嬦帱幬惆愁懤栦椆焘燽燾畴疇皗稠筹籌紬絒綢绸菗薵裯詶讎讐踌躊酧醻雔雠]>chóu; +[䪮丑丒侴偢吜杻杽瞅矁醜魗]>chǒu; +[䔏殠簉臭臰霔]>chòu; +酬>chou; +[出初岀貙齣]>chū; +[㕏㕑㡡㶆㼥䅳䎝䎤䟞䠂䠧刍厨幮廚曯橱櫉櫥滁犓篨耝耡芻蒢蒭蕏藸蜍趎蹰躇躕鉏鋤锄除雏雛鶵𦷝]>chú; +[㹼䊰䖏䙘储儲処憷杵椘楮檚濋础礎褚齭齼]>chǔ; +[㔘㗰㙇㤕㤘䙕䜴䟣䦌䧁䮞亍俶傗儊处怵拀搐敊斣斶欪歜泏滀琡畜矗竌竐臅荲處触觸豖踀遚鄐閦黜]>chù; +楚>chu; +䫄>chuà; +揣>chuāi; +[㪓膗]>chuái; +㪜>chuǎi; +[䦟䦤䦷踹]>chuài; +[巛川氚瑏穿]>chuān; +[㯌㼷䁣伝传傳圌暷椽歂舩船諯輲遄]>chuán; +[㱛僢喘堾腨舛]>chuǎn; +[串汌賗釧钏]>chuàn; +[䆫刅戧摐牎牕疮瘡窓窗窻]>chuāng; +[㡖䃥䚒䡴䭚噇幢床橦牀疒]>chuáng; +[㵂䇬摤漺闖闯]>chuǎng; +[䎫创刱剏剙創怆愴獊]>chuàng; +[吹炊]>chuī; +[㓃㝽㥨㩾䄲䍋䳠倕垂埀捶搥棰槌湷箠篅腄菙錘鎚锤陲顀]>chuí; +䞼>chuǐ; +龡>chuì; +[媋旾春暙椿櫄膥萅鶞]>chūn; +[㝄㝇㵮㸪䏝䐇䓐䔚䣨䣩䥎䫃唇憌浱淳湻滣純纯脣莼蒓蓴醇醕陙鯙]>chún; +[㖺㿤䄝䏛䐏䞐䦮偆惷睶箺萶蠢踳]>chǔn; +[鶉鹑]>chun; +[戳鎈齹]>chuō; +犳>chuó; +[㚟㲋䂐䃗䄪䆯䇍䋘䍳䓎䮕啜嚽娖婥惙擉歠涰珿畷磭綽繛绰腏諁趠輟辍辵辶逴酫醊鑡齪龊]>chuò; +[偨庛疵薋蠀赼趀趑髊]>cī; +[㓨㘂㘹㤵䂣䆅䈘䖪䛐䧳䨏䭣䲿䳄垐嬨慈柌濨珁瓷甆磁礠祠糍茨詞词辝辞辤辭雌飺餈]>cí; +[㠿佌此泚玼皉紪跐]>cǐ; +[㞖㡹㢀㩞㹂䓧䗹䦻䯸䰍䳐伺佽刺刾朿次絘莿蚝蛓螆]>cì; +[匆囪囱忩怱悤憁暰枞棇樅樬樷漗焧燪瑽璁瞛篵緫繱聡聦聪聰苁葱蓯蔥蟌鍐鍯鏓鏦騘驄骢]>cōng; +[㼻䉘䕺䳷丛从叢婃孮従徖從悰慒淙漎潀灇爜琮誴賨賩錝]>cóng; +[欉藂謥]>còng; +㫶>cǒu; +[傶凑楱湊腠輳辏]>còu; +[粗觕麁麄麤]>cū; +[䓚䢐徂殂豠]>cú; +[㗤㰗䃚䎌䙯䛤䟟䠓䠞䥄䥘䬨促噈塶憱梀槭殧猝瘄瘯簇縬脨蔟趗踧蹙蹴醋鼀]>cù; +[撺攛汆蹿躥鋑]>cuān; +[㠝㭫䆘䰖劗巑櫕]>cuán; +[㵀㸑殩熶爨窜竄篡簒鑹镩]>cuàn; +[催嗺墔崔摧榱槯獕磪竴鏙]>cuī; +[凗慛]>cuí; +[㵏㷃䊫䧽漼璀皠趡]>cuǐ; +[㝮㥞㧘㯔㯜㱖㳃㷪䂱䃀䄟䆊伜倅啐啛忰悴毳淬焠琗疩瘁竁粋粹紣綷翆翠脃脆脺膬膵臎萃顇]>cuì; +[村澊皴邨]>cūn; +[存拵袸]>cún; +[刌忖]>cǔn; +[䍎吋寸籿]>cùn; +[搓撮睉磋蒫蹉遳]>cuō; +[㟇㽨䠡䣜䴾嵯嵳痤矬蔖虘醝鹺鹾]>cuó; +[䂳瑳縒脞]>cuǒ; +[䐣䟶䱜剉剒厝挫措歵莝莡蓌逪銼錯锉错齚齰]>cuò; +[哒噠墶搭撘耷荅褡]>dā; +[㜓㯚㾑㿯䃮䐊䑽䩢䳴䵣匒呾妲怛溚炟畗畣笪答繨荙薘蟽褟詚达迖逹達鐽靼鞑韃]>dá; +打>dǎ; +[大眔]>dà; +[㟷瘩]>da; +[呆呔懛獃]>dāi; +[䚞䚟傣歹歺逮]>dǎi; +[㐲㞭㫹㯂㶡㻖㿃䈆䒫代叇埭岱帒带帯帶廗待怠戴柋殆汏瀻玳瑇甙簤紿緿绐艜袋襶貸贷跢蹛軚軩迨霴靆黛黱]>dài; +[丹儋勯匰单単單噡妉媅担擔殚殫甔眈砃箪簞耼耽聃聸襌躭郸鄲酖頕黕]>dān; +[㔊㕪㽎䃫䉞䮰䱋亶伔刐掸撢撣澸玬瓭疸紞胆膽衴黵]>dǎn; +[㗖㡺㫜㱽㲷㵅㺗䄷䉷䨢䨵䩥䭛䳉但僤啖啗啿噉嚪帎弹弾彈惮憚憺旦暺柦氮沊淡潬澹狚疍瘅癉癚窞腅舕萏蛋蜑觛誕诞鉭钽霮饏馾駳髧鴠]>dàn; +[儅噹嵣当澢珰璫當筜簹艡蟷裆襠]>dāng; +[䣊䣣党挡擋攩欓灙譡讜谠黨]>dǎng; +[䑗䦒凼圵垱壋婸宕愓档檔氹潒璗瓽盪瞊砀碭礑簜荡菪蕩蘯趤逿闣雼]>dàng; +[鐺铛]>dang; +[刀刂幍忉朷氘舠釖魛鱽]>dāo; +[㠀㿒䆃䌦䲽壔导導岛島嶋嶌嶹捣捯搗擣祷禂禱蹈隝隯]>dǎo; +[䧂倒到噵悼檤瓙盗盜稲稻纛翿菿衜衟軇道]>dào; +[㝵㤫㥀㥁㯖䙷䙸嘚徳德恴惪淂鍀锝]>dé; +[地得的]>de; +[噔嬁灯燈璒登竳簦覴豋蹬鐙镫]>dēng; +[䒭戥朩等]>děng; +[䠬䮴僜凳墱嶝櫈瞪磴邓鄧隥霯]>dèng; +[仾低堤墑滴眡磾羝菂袛趆鍉鞮]>dī; +[㣙㰅㹍䊮䨀䨤䮤䯼䴞䵠仢唙啇嘀嚁嫡廸敌敵梑涤滌潪狄笛篴籴糴翟荻蔋蔐藡覿觌豴蹢迪鏑镝靮頔鸐]>dí; +[㡳㪆㭽䂡䍕䢑䣌䱃呧坘埞底弤抵拞掋柢氐牴砥聜苖茋菧觝詆诋軧邸阺骶]>dǐ; +[㢩㦅㼵䀸䀿䏑䑭䑯䞶䟡䧝䩘䩚䱱䶍偙僀哋啲坔埊墆墬奃娣嶳帝弟怟慸摕旳杕梊棣楴樀渧焍玓甋睇碲祶禘第締缔肑腣蒂蔕虳蝃螮諦谛踶軑轪递逓遞遰釱鉪馰]>dì; +嗲>diǎ; +[傎厧嵮巅巓巔掂攧敁槇槙滇甸瘨癫癲蹎顚顛颠齻]>diān; +䟍>dián; +[㚲㸃䍄䓦典嚸奌婰敟点琠痶碘蕇踮點]>diǎn; +[㓠㝪㞟㥆㵤㶘㼭䧃佃坫垫墊壂奠婝店惦扂橂殿淀澱玷琔电痁癜磹簟蜔鈿钿阽電靛]>diàn; +[凋刁叼奝彫扚殦汈琱瞗碉虭蛁貂雕鮉鯛鲷鳭鵰鼦]>diāo; +[㹿䉆屌釕钌]>diǎo; +[㒛㪕䂪䂽䔙䠼䵲伄吊弔掉瘹窎窵竨莜蓧藋訋調调釣銱鋽鑃钓铞魡]>diào; +[爹跌]>diē; +[㑙㥈㦶㩸㩹㫼㬪㭯㲲㲳㷸㻡䏲䘭䞇䞕䠟䪥䮢䲀䳀䴑叠喋垤堞峌嵽恎戜挕昳曡氎牃牒瓞畳疉疊眣眰碟絰绖聑胅臷艓苵蜨蝶褋褺詄諜谍趃跕蹀迭镻鰈鲽]>dié; +[惵耊耋]>diè; +[丁仃叮奵帄庰玎疔盯釘钉靪]>dīng; +婈>díng; +[㫀㴿㼗嵿檙濎薡酊鐤頂顶鼎鼑]>dǐng; +[㝎啶娗定忊椗矴碇碠磸腚訂订錠锭顁飣饤]>dìng; +[丟丢乣銩铥]>diū; +[东倲冬咚埬娻岽崠崬昸東氡氭涷笗苳菄蝀鯟鶇鸫鼕]>dōng; +[㖦㨂䂢䵔墥嬞懂箽董]>dǒng; +[㑈㓊㗢㜱㢥㼯䅍䍶䞒働冻凍动動垌戙挏栋棟洞眮胨胴腖霘駧]>dòng; +[兜兠吺唗橷篼蔸郖都]>dōu; +[㞳㪷䕱唞抖敨枓枡蚪阧陡]>dǒu; +[㛒㢄㷆䄈䕆䛠䬦斗梪毭浢痘窦竇脰荳豆逗鋀閗闘餖饾鬥鬦鬪鬬鬭]>dòu; +[厾嘟督醏闍阇都]>dū; +[㱩㸿㾄䓯䙱䢱䪅䫳䮷儥凟匵嬻椟櫝殰毒涜渎瀆牍牘犊犢独獨瓄皾碡蝳読讀讟读豄贕鑟韇韣韥騳髑黩黷]>dú; +[䀾䈞䐗堵帾暏琽睹笃篤裻覩賭赌錖]>dǔ; +[䄍䅊䟻䲧喥妒妬度杜殬渡秺簬簵肚荰螙蠧蠹鍍镀靯]>dù; +[偳剬媏端褍鍴]>duān; +短>duǎn; +[㫁㱭䠪塅断斷椴段毈煅瑖碫簖籪緞缎腶葮躖鍛锻]>duàn; +[垖堆塠嵟痽磓頧]>duī; +陮>duǐ; +[㙂㟋㠚㬣㳔㵽䇏䇤䔪䨴䨺䬈䬽䯟兊兌兑对対對怼憝憞懟濧瀩碓祋綐薱譈轛鐓鐜镦队隊]>duì; +[吨噸墩-墫弴惇撉撴敦犜礅蜳蹲蹾驐]>dūn; +[趸躉𣎴]>dǔn; +[㬿䤜伅囤崸庉扽沌潡燉盹盾砘踲逇遁遯鈍钝頓顿鶨]>dùn; +[剟咄哆嚉多夛崜]>duō; +[㣞䐾凙剫夺奪悳掇敓敚敠敪椯毲痥莌裰襗踱鈬鐸铎鮵]>duó; +[㔍㖼㙐㛆㛊㥩㻔䒳䙤䠤䤪䩣䫂䯬亸哚嚲垛垜埵奲憜挅挆朶痑綞缍趓躱躲鍺锗鬌]>duǒ; +[㧷㻧䅜䍴䑨䙃䙟䤻䩔刴剁堕墮墯尮嶞惰柁柮桗炧炨舵跥跺陊陏飿饳]>duò; +朵>duo; +[妸妿娿婀婐屙峉痾鈳钶]>ē; +[㼂㼰䄉䕏䖸䩹䱮䳗䳘俄吪娥峨峩枙涐珴皒睋硪磀莪蛾訛誐譌讹迗鈋隲頟額额騀魤鵝鵞鹅]>é; +[㼢噁]>ě; +[㓵㔩㕎㖾㗁㟧㠋㡋㦍㧖㩵㮙㱦㷈䆓䑥䑪䓊䔾䙳䛖䝈䞩䣞䫷䳬偔僫卾厄呃呝咢咹噩圔垩堊堨堮岋崿嶭恶悪惡愕戹扼掠搤搹櫮湂琧略砐硆胺腭苊萼蕚蚅蝁覨詻諤讍谔豟貖軛軶轭遌遏鄂鍔鑩锷閼阏阨阸頞顎颚餓餩饿魥鰐鱷鳄鶚鹗齶]>è; +[誒诶]>éi; +[奀恩煾蒽]>ēn; +䅰>ěn; +[䊐䬶䭓䭡摁]>èn; +[㖇㜨㧫㮕䋩䎟䎠䮘侕児唲栭檽洏而耏聏胹荋袻輀轜陑隭髵鮞鲕鴯鸸]>ér; +[㚷㢽䋙䌺尒-尔峏栮洱爾珥耳薾迩邇餌饵駬]>ěr; +[㒃㛅䎶䏪䣵二佴刵咡弍弐樲毦眲衈誀貮貳贰鉺铒]>èr; +[傠发彂橃沷発發瞂]>fā; +[㕹㘺䇅䣹乏伐垡姂栰疺砝筏罚罰罸茷藅閥阀]>fá; +[䂲佱法灋髮]>fǎ; +[㛲䒥珐琺蕟髪]>fà; +[僠噃嬏帆幡旙旛杋番笲籓繙翻轓飜鱕]>fān; +[㠶㸋㺕䀀䀟䉒䊩䋣䋦䌓䡊䪛䪤䫶䭵䮳凡-凣勫墦忛憣柉棥樊橎瀪瀿烦煩燔璠矾礬籵緐繁羳膰舧蕃薠藩蘩蠜襎蹯釩鐇钒颿鷭]>fán; +[䒠䛀反瓪軡返魬]>fǎn; +[㕨㝃㤆㴀㶗㼝㽹䉊䐪䒦䣲奿嬎梵氾汎泛滼犯畈盕笵範范訉販贩軓軬鄤飯飰饭飯]>fàn; +[匚方枋汸淓牥芳蚄邡鈁钫鴋]>fāng; +[㤃埅妨房肪防魴鰟鲂鳑]>fáng; +[㑂㕫㧍㯐䢍䦈䲱仿倣旊昉昘瓬眆紡纺舫訪访髣鶭]>fǎng; +[放趽]>fàng; +坊>fang; +[啡妃婓扉渄猆緋绯菲蜚裶霏非靟飛飝飞馡騑騛鯡鲱]>fēi; +[䈈淝肥腓萉蜰]>féi; +[㥱䕁䨽䨾匪奜悱斐朏棐榧篚翡蕜誹诽餥]>fěi; +[㔗㩌㭭㵒䆏䉬䑔䕠䚨䛍䠊䤵䰁俷剕厞吠屝废廃廢昲曊杮櫠沸濷狒疿痱癈砩肺胇芾蟦費费鐨镄靅鼣]>fèi; +[兝分吩岎帉昐朆氛燓砏紛纷翂芬衯訜酚鈖隫雰餴饙鳻𦐈]>fēn; +[㷊㸮䩿䯨䴅坆坟墳妢幩弅枌梤棻棼橨汾濆炃焚燌獖玢秎羒肦蒶蕡蚠蚡豮豶轒鐼馚馩魵黂鼖鼢]>fén; +[㥹粉羵黺]>fěn; +[㖹㮥㿎份偾僨坋奋奮忿愤憤瀵粪糞膹鱝鲼]>fèn; +[丰仹偑僼凨凬凮堼夆妦寷封峯峰崶捀枫桻楓檒沣灃烽熢犎猦疯瘋盽砜碸篈莑葑蘴蜂蠭豐鄷酆鋒鏠锋靊風飌风麷]>fēng; +[㦀㵯䏎䙜䩼冯堸摓沨浲渢漨綘艂逢馮]>féng; +[䟪唪覂諷讽]>fěng; +[㡝俸凤奉湗焨煈甮縫缝賵赗鳯鳳鴌]>fèng; +[仏坲]>fó; +[䳕剻哹紑裦]>fóu; +[否殕缶缹缻芣雬鴀]>fǒu; +[椱竎]>fòu; +[伕呋妋姇孵尃怤懯敷旉枹柎泭玞砆稃筟糐綒罦肤膚荴衭豧趺跗邞鄜鈇鳺麩麬麱麸]>fū; +[㚕㜑㟊㠅㤔㪄㫙䃽䋹䌿䍖䑧䒀䔰䕎䘠䞞䟮䡍䨗䪙䵗䵾乀伏佛俘冹凫刜匐咈嚩垘孚岪巿帗幅幞弗彿怫払扶拂服枎柫栿桴棴氟洑浮涪澓炥烰玸琈甶畉畐癁祓福符笰箙粰紱紼絥綍绂绋罘翇艀艴芙苻茀茯莩菔葍虙蚨蜉蝠諨踾輻辐郛鉘鉜韍韨颫髴鮄鳧鳬鴔鵩鶝福]>fú; +[㓡㕮䋨䌗䓛䗄䩉䫍䫝䯽乶俌俛俯呒嘸府弣抚拊捬撫斧暊滏焤盙秿簠腐腑莆蚥蜅輔辅郙釜釡頫鬴鯆黼]>fǔ; +[㙏㚆㤱㬼㳇㵗㽬㾈䂤䎅䒇䘀䘄䝾䞜䞯䞸䟔䠵䦣䧞䨱䭸䮛䯱付偩冨副圑坿复妇婏婦媍嬔富峊復榑父祔稪紨緮縛缚胕腹萯蕧蚹蛗蝜蝮袝複褔覆訃詂讣負賦賻负赋赙赴輹鍑鍢阜阝附陚馥駙驸鮒鰒鲋鳆黻𦱖]>fù; +[傅咐夫甫袱]>fu; +[嘎嘠]>gā; +[尜錷]>gá; +[尕玍𠁥]>gǎ; +尬>gà; +[侅垓姟峐晐畡祴胲荄該该豥賅赅郂陔隑]>gāi; +[䪱忋改絠]>gǎi; +[㕢㧉㮣䏗丐乢匃匄戤摡概槩槪溉漑瓂盖葢蓋賌鈣钙𩕭]>gài; +[乹乾亁凲坩尲尴尶尷嵅忓攼杆柑泔玕甘疳矸竿筸粓肝芉苷虷蜬迀鳱]>gān; +仠>gán; +[㺂䃭䇞䔈䤗䵟感擀敢桿橄澉灨皯盰秆稈衦贑赶趕鱤鳡]>gǎn; +[䯎䲺倝凎干幹旰榦檊汵涻淦簳紺绀詌贛赣骭]>gàn; +[冈冮刚剛堈堽岡掆杠棡牨犅碙笐綱纲缸罁罓罡肛舡鋼鎠钢]>gāng; +[㟠㟵㽘䴚岗崗港]>gǎng; +[戅-戇槓焵筻]>gàng; +[槔槹橰櫜滜皋皐睾篙糕羔羙膏臯餻高髙鷎鼛𦤎]>gāo; +[㚏㚖㾸䗣夰搞暠杲槁檺稾稿筶縞缟菒藁藳]>gǎo; +[叝吿告煰祮祰禞誥诰郜鋯锆鯌]>gào; +[割咯哥圪戈戓戨搁擱歌渮滒牁牫牱疙肐胳謌鎶鴚鴿鸽麧𪃿]>gē; +[㗆㝓㠷㦴㨰㪾㵧㷴䆟䈓䐙䕻䗘䘁䛋䛿䢔䨣䩐䪂䪺䫦佮匌呄嗝噶塥愅挌搿敋格槅獦膈臵茖蛒裓觡諽輵轕郃鎘镉閣閤阁阖隔革鞷韐韚骼鮯鰪齃]>gé; +[哿擖笴舸葛]>gě; +[䧄个個各嗰箇虼鉻铬]>gè; +[給给]>gěi; +[刯剆根跟]>gēn; +哏>gén; +䫀>gěn; +[㫔㮓亘亙艮茛]>gèn; +[庚揯搄浭畊秔稉粳絙絚緪縆羮羹耕菮賡赓鶊鹒]>gēng; +[㾘䋁䌄哽埂峺挭梗綆绠耿莄郠骾鯁鲠]>gěng; +[䱍䱎䱭䱴堩更]>gèng; +[供公功匑厷塨宫宮工幊弓恭愩攻杛玜疘碽篢糼肱觥觵躬躳釭魟龏龔龚]>gōng; +[㤨㧬㫒㭟㺬㼦䂬䡗巩拱拲栱汞珙穬蛬銾鋛鞏]>gǒng; +[㓋㔶㯯䇨䢚共廾羾貢贡]>gòng; +蚣>gong; +[勾沟溝篝緱缑耩芶褠鈎鉤钩鞲韝]>gōu; +[㺃岣枸狗玽笱耇-耉苟茩蚼豿]>gǒu; +[㗕㜌㝅㝤㨌㳶䃓䝭䞀傋冓坸垢够夠姤媾彀搆撀构構煹覯觏訽詬诟購购遘雊骺]>gòu; +[估呱夃姑嫴孤柧橭沽泒痼笟箍箛罛苽菰蛄觚軱辜酤鈲鮕鴣鸪]>gū; +[䜼䮩鶻鹘]>gú; +[㒴㚉㯏㼋㾶䀇䀜䀦䀰䅽䊺䍍䐨䡩䵻古唂唃嘏尳愲扢杚榖毂淈濲瀔焸牯狜皷皼盬瞽穀糓縎罟羖股脵臌蓇薣蛊蛌蠱詁诂谷轂鈷钴餶馉骨鼓鼔]>gǔ; +[㧽㽽䍛䓢䶜僱凅固堌崓崮怘故梏棝榾牿祻稒錮锢雇頋顧顾鯝鲴鶮]>gù; +[咕菇]>gu; +[刮劀懖栝桰煱瓜瘑筈緺聒胍脶腡葀趏踻銽頢颳騧鴰鸹]>guā; +[㒷䈑冎剐剮叧寡]>guǎ; +[卦啩坬挂掛絓罣罫褂詿诖髺]>guà; +乖>guāi; +[拐枴柺箉]>guǎi; +[㧔㷇㽇䂯䊽叏夬怪恠旝癐]>guài; +[倌关冠官棺瘝癏蒄覌観觀观関闗關鱞]>guān; +[䏓䗆䘾䦎䩪䪀䲘琯痯筦管脘舘輨錧館馆鳤館]>guǎn; +[㮡㴦䌯䎚䗰䙛䙮䝺丱悹悺惯慣掼摜樌毌泴涫潅灌爟瓘盥矔礶祼罆罐謴貫贯遦鏆鑵雚鱹鸛鹳]>guàn; +[侊僙光咣垙姯洸灮炗炛烡珖胱茪輄銧黆]>guāng; +[广広廣犷獷]>guǎng; +[㫛桄櫎臦臩逛]>guàng; +[亀傀圭妫媯嫢嬀帰归摫椝槻槼歸溈珪瑰璝瓌皈硅窐膭袿規规邽郌閨闺鬶鬹鮭鲑龜龟]>guī; +潙>guí; +[㔳㧪㨳㩻㲹㸵䁛䍯䞈䞨䣀䤥佹匦匭厬垝姽宄庋庪恑攱晷氿癸祪簋蛫蟡觤詭诡軌轨陒鬼]>guǐ; +[㙺㪈䇈䌆䍷䐴䖯䙆䝿䠩䯣䰎䳏刽刿劊劌匮匱嶡巜攰昋暩柜桂櫃溎炔筀蓕襘貴贵跪鞼鱥]>guì; +裩>gūn; +[㙥㫎㯻䃂䎾䜇丨掍滚滾磙緄绲蓘蔉衮袞袬輥辊鮌鯀鲧]>gǔn; +[䵪棍璭睴]>gùn; +[埚堝墎崞濄蝈蟈郭鈛鍋锅]>guō; +[㕵㖪㚍㶁䂸䆐䐸䤋䬎囯囶囻国圀國帼幗慖掴摑漍簂聝腘膕虢馘]>guó; +[㞅䙨䴹惈果椁槨淉猓粿綶菓蜾裹輠鐹餜馃]>guǒ; +[㳀腂过過]>guò; +[啯嘓]>guo; +[哈𠀀]>hā; +蛤>há; +[咍咳嗨]>hāi; +[㜾㨟䠽䯐䱺孩还還頦颏骸]>hái; +[海烸酼醢]>hǎi; +[㤥㦟㧡㺔䇋亥嗐害氦餀饚駭骇𠀅𥩲]>hài; +[唅嫨憨炶甝蚶谽酣頇顸馠魽鼾]>hān; +[㖤㙈㙔㟏㟔㮀㶰㼨䈄䗙䤴䥁䨡䮧䶃佄函凾含圅娢寒崡晗梒涵焓琀筨肣邗邯鋡韓韩]>hán; +[㘎㘕㘚㙳㵎㸁㺖㽉䍐䍑䓍䓿䛞厈喊浫罕蔊豃鬫]>hǎn; +[㑵㒈㜦㢨㨔㪋㲦㵄㽳䁔䌍䎯䏷䐄䕿䖔䘶䧲䫲傼哻垾娨屽悍憾扞捍撼攌旱晘晥暵汉汗浛涆淊漢澏瀚焊熯猂皔睅翰莟菡蛿蜭螒譀豻貋釬銲鋎閈闬雗頷顄颌颔馯駻鶾]>hàn; +夯>hāng; +[㤚䀪䘕䲳斻杭桁沆肮航苀蚢貥迒頏颃魧]>háng; +[汻酐]>hǎng; +[䟘䣈䦳䴂]>hàng; +[侾嚆蒿薅]>hāo; +[㕺㠙㩝㬔䝥䧫儫勂嗥嘷噑嚎壕椃毫濠獆獋獔籇蠔諕譹豪]>háo; +[好恏郝]>hǎo; +[㘪㙱㚪㝀㞻㬶㵆䒵䚽䝞䧚䪽䬉䯫傐号哠峼悎昊昦晧暤暭曍浩淏滈澔瀥灏灝皓皜皞皡皥秏耗聕薃號鄗鎬镐顥颢鰝]>hào; +[喝嗬峆抲訶诃]>hē; +[㓭㔠㕡㥺㪉㭘㭱㮝㮫㹇㿣䃒䅂䒩䕣䞦䢗䫘䳚䶅何劾厒合咊和哬啝姀廅惒敆曷柇核楁毼河涸滆澕熆狢盇盉盍盒礉禾秴篕紇纥翮耠荷菏萂蒚蚵蝎螛覈訸詥貈貉釛鉌閡闔阂鞨頜餲魺鲄鶡鹖齕龁龢]>hé; +[㕰㦦㬞㵑㷎㷤䎋䓼䚂䪚䳽䴳䵱佫俰嗃壑暍焃煂熇爀癋皬碋翯蠚袔褐賀贺赫隺靎靏鶴鸖鹤鶴]>hè; +[嘿潶黑黒]>hēi; +[㯊拫痕鞎]>hén; +[䓳佷很狠詪]>hěn; +恨>hèn; +[亨哼悙脝諻]>hēng; +[㔰㶇䄓䒛䬖䬝䯒姮恆恒揘楻横橫珩蘅衡誙鐄鑅韹鴴鸻黉黌]>héng; +[啈撔澋絎绗]>hèng; +[叿呍哄揈渹烘焢薨訇谾軣輷轟轰鍧顭鬨]>hōng; +[㖓㢬䂫䃔䆖䉺䍔䜫䞑䡌䡏䧆䨎䩑䪦䫹䫺䲨仜吰垬妅娂宏宖峵嵤弘彋汯泓洪浤渱潂灴玒硔硡竑竤篊粠紅紘紭綋红纮翃翝耾舼苰荭葒葓虹訌讧谹谼鈜鉷鋐閎闳霟鞃鴻鸿]>hóng; +[㬴䀧唝嗊晎]>hǒng; +[㶹澒蕻鍙閧]>hòng; +[㗋㤧㬋㮢㺅䂉䗔䙈䫛䳧侯喉帿猴瘊睺矦篌糇翭葔銗鍭餱鯸齁]>hóu; +[㖃㸸吼犼]>hǒu; +[㕈㫗䞧䪷厚后垕堠後洉缿豞逅郈鄇鮜鱟鲎鲘]>hòu; +候>hou; +[匢匫呼唿啒嘑垀寣峘幠忽惚昒曶欻歑泘滹烀烼苸虍虖軤轷雐]>hū; +[㗅㪶㯛㹱㾰㿥䁫䈸䉉䉿䊀䎁䔯䚛䞱䠒䧼䩴䭅䭌䭍䮸喖嘝囫壶壷壺媩弧抇搰斛楜槲湖瀫焀煳狐猢瑚瓳箶絗縠胡葫蔛蝴螜衚觳醐鍸頶餬鬍魱鰗鵠鶘鶦鹄鹕]>hú; +[䗂乕浒滸琥萀虎虝]>hǔ; +[㕆㦿㨭㸦㺉䇘䍓䕶䛎䨥䨼䪝䲵互冱冴嚛婟嫭嫮岵帍弖怙戶-戸戽扈护摢擭昈枑楛槴沍沪滬熩瓠祜笏簄綔蔰謼護鄠頀鳸]>hù; +[乎唬糊]>hu; +[哗嘩花芲錵鷨𢄶]>huā; +[㕲㟆㠏㦊㭉㮯䅿䏦䔢䱻䶤华嬅崋滑狯猾磆華蕐螖譁釫鏵铧驊骅]>huá; +蘳>huǎ; +[㓰㕦㕷㚌㠢㦎㩇䛡䠉划劃化婳嫿摦杹桦槬樺澅画畫畵繣舙觟話諣譮话]>huà; +竵>huāi; +[㜳䃶䈭䴜佪徊怀懐懷槐櫰淮瀤耲褢褱踝]>huái; +[咶坏壊壞孬蘹蘾諙]>huài; +[嚾懽歓犿獾讙貛酄驩鴅鵍]>huān; +[㡲㦥㵹㶎㿪䍺䝠䥧䦡䭴䮝䴟嬛寏寰懁捖桓梡洹澴狟环環瓛糫繯缳羦肒荁萈萑豲貆鍰鐶锾镮闤阛雈鬟]>huán; +[㣪㬊䈠唍嵈睆緩缓藧輐]>huǎn; +[㓉㕕㪱㬇㹖㼫䀓䀨䆠䯘唤喚喛垸奂奐宦幻患愌换換擐梙槵涣渙漶烉焕煥瑍痪瘓豢轘逭鰀]>huàn; +[欢歡]>huan; +[嚝塃巟慌朚肓荒衁]>huāng; +[㞷㾠㾮䅣䊗䊣䍿䐵䑟䞹䪄䮲䳨偟兤凰喤堭墴媓崲徨惶撗湟潢煌熿獚瑝璜癀皇磺穔篁簧艎葟蝗蟥趪遑鍠锽隍餭騜鰉鱑鳇鷬黃黄𪏙]>huáng; +[㤺㬻䁜䌙䐠宺幌怳恍愰晄榥滉炾熀皝詤謊谎鎤𣄙]>huǎng; +[㨪曂皩軦]>huàng; +晃>huang; +[咴噅噕婎幑徽恢悝拻挥揮撝晖暉洃瀈灰烣煇禈翚翬袆褘詼诙豗輝辉隓隳顪鰴麾]>huī; +[囘回囬廻廽恛洄痐茴蚘蛔蛕蜖迴逥鮰]>huí; +[㩓㷐䃣䏨䛼悔檓毀毁毇烠燬芔虺蝰譭𠧩]>huǐ; +[㑰㑹㒑㜇㞧㤬㥣㨤㨹㩨㬩㰥㱱㷄㻅䂕䅏䇻䌇䕇䙌䙡䛛䜋䤧䧥䩈䫭会僡儶匯卉喙嘒噧嚖圚嬇寭廆彗彙彚恚恵惠慧憓晦暳會槥橞櫘殨汇泋湏滙潓濊烩燴獩璯瞺秽穢篲絵繐繢繪绘缋翙翽荟蔧蕙薈藱螝蟪詯誨諱譓譿讳诲賄贿鏸鐬闠阓靧韢頮颒餯饖]>huì; +[婚惛惽昏昬棔殙涽睧睯荤葷閽阍]>hūn; +[㑮㨡䛰䫟䰟䴷堚忶棞楎浑渾珲琿繉轋顐餛餫馄魂鼲]>hún; +[睔鯶]>hǔn; +[㥵䅙䅱䚠䧰俒倱圂慁混溷焝觨諢诨]>hùn; +[劐豁鍃锪]>huō; +[䄆䄑䣶䯏佸活秮秳萿鈥钬]>huó; +[伙夥火邩]>huǒ; +[㗲㘞㦜㦯㨯㯉㸌䁨䂄䄀䉟䋭䦚䰥剨咟嗀嚄嚯嚿奯彟彠惑或捇掝攉旤曤楇檴沎湱濩瀖獲瓁癨眓矆矐矱砉礊祸禍穫耯臒臛艧获蓃藿蠖謋讗貨货鑊镬閄雘霍霩靃韄騞鱯鳠鸌鹱嗀]>huò; +硧>iǒng; +[䤠丌乩僟击刉刏剞勣叽咭唧喞嗘嘰圾基墼姫姬尐屐峜嵆嵇擊朞机枅樍機櫅毄激犄玑璣畸畿矶磯禨积稘稽積笄筓箕簊績绩羁羇羈耭聻肌虀虮蛣襀覉覊觭諅譏譤讥賫賷赍跡跻蹟躋躸迹銈鐖鑇鑙隮雞鞿韲飢饑饥鰿鳮鶏鷄鸡麡齎齏齑𠼻]>jī; +[㔕㖢㗊㗱㘍㙫㞃㠍㠎㡇㡮㤂㥛㧀㭲㮟㮨㱞㲺㴕㻷㽺㾊䁒䐕䐚䚐䞘䟌䣢䩯䯂䲯䳭亟亼伋佶偮卙即卽及吉堲塉姞嫉岌嵴嶯庴彶忣急愱戢揤撃擮极棘楫極槉橶檝殛汲湒漃潗濈焏狤疾瘠皍礏禝笈箿籍級级耤脊膌芨莋蒺蓻蕀蕺蝍螏襋觙踖蹐轚郆鈒銡鍓鏶钑集雦雧霵鞊鴶鶺鸄鹡]>jí; +[㚡㞆㞛㞦㦸㨈㴉䍤䢳䤒丮几妀己幾戟挤掎撠擠泲犱玘穖蟣踦鈘魕魢鱾麂]>jǐ; +[㑧㒫㙨㠖㠱㡭㡶㥍㭰㰟㲅㳵㸄㹄㻑㾒㾵䋟䐀䒁䓫䓽䗁䜞䝸䠏䢋䦇䨖䮺䰏䶓䶩伎兾冀剂剤劑哜嚌坖垍塈妓季寂寄彐彑忌悸惎懻技旡-旣暨曁梞檕檵洎济済漈濟瀱猤璾痵瘈癠瞡祭稩稷穄穊穧紀継繼纪继罽臮芰茍茤葪蓟蔇薊蘎蘮蘻裚褀覬觊計記誋计记跽际際霁霽驥骥髻鬾魝鮆鯚鯽鰶鱀鱭鲚鲫鵋齌𡜱]>jì; +[輯辑]>ji; +[乫伽佳傢加嘉夹夾家幏拁枷毠泇犌猳珈痂笳耞腵茄葭袈豭貑跏迦鉫鎵镓鴐麚]>jiā; +[㪴㮖㼪㿓䀫䀹䕛䛟䩡唊圿埉恝戛戞扴梜浃浹硈舺荚莢蛱蛺袷裌跲郏郟鉿鋏铗铪鞂鞈頬頰颊餄饸鵊]>jiá; +[㕅䑝仮假叚婽岬徦斚斝椵榎槚檟玾甲瘕胛賈贾鉀钾]>jiǎ; +[䁍价価價嫁架榢駕驾]>jià; +稼>jia; +[偂兼冿囏坚堅奸姦姧尖惤戋戔揃搛椷樫櫼歼殲湔瀐瀸煎熞熸牋犍猏玪监監睷碊礛笺箋篯籛緘縑缄缣肩艰艱菅菺葌蒹蕑蕳虃豜豣鑯間间靬鞬鞯韀韉顅餰馢騝鬋魐鰜鰹鲣鳒鳽鵳鶼鹣麉]>jiān; +[㔓㨵㳨㶕䄯䅐䉍䛳䟰䩆䭠䮿䯛䯡䵡䵤䶠俭倹儉减剪囝堿寋弿戩戬拣挸捡揀撿暕柬检検檢減湕瀽瑐睑瞼硷碱笕筧简簡絸繭翦茧藆蠒裥襇襺謇謭譾谫趼蹇鐗鐧锏鰔鹸鹻鹼]>jiǎn; +[㓺㔋㣤㦗㨴㯺㰄㺝䇟䟅䤔䥜䧖䬻䭈䭕䵖䵛件侟俴俿健僣僭剑剣剱劍劎劒劔建徤擶旔枧栫梘楗榗毽洊涧渐溅漸澗濺瀳牮珔磵箭糋繝腱臶舰艦荐蔪薦螹襉見覵覸见諌諓諫譼谏賎賤贱趝践踐踺釼鉴鋻鍳鍵鏩鑑鑒鑬鑳键餞饯鰎]>jiàn; +[僵壃姜将將摪橿殭江浆漿瓨畕畺疅疆礓繮缰翞茳葁薑螀螿豇韁鱂鳉𤕭]>jiāng; +[㢡㯍䁰䉃䋌䒂䙹奖奨奬桨槳滰獎膙蒋蔣襁講讲顜]>jiǎng; +[䞪匞夅嵹弜弶摾洚犟糡糨絳绛蔃袶謽酱醤醬降𢘸]>jiàng; +匠>jiang; +[䴔交僬嘄姣娇嬌峧嶕憍憿椒浇澆焦燋礁穚胶膠膲芁茭茮蕉蛟蟂蟭詨跤郊鐎驕骄鱎鴵鵁鷍鷦鷮鹪𨺹]>jiāo; +嫶>jiáo; +[㩰㭂㳅㽱㽲䀊䁶䘨䚩䠛䥞䴛佼侥僥儌剿劋勦孂徺挢捁搅摷撟撹攪敽敿晈暞曒湬漅灚烄煍狡璬皎皦矫矯筊絞繳绞缴脚腳臫虠蟜角譑賋踋鉸铰餃饺鮫鲛]>jiǎo; +[㠐㬭㰾䂃䆗䣤䪒叫呌嘂嘦噍噭嬓峤嶠徼挍敎教敫斠滘漖潐珓皭窌窖訆譥較轎轿较酵醮醶釂]>jiào; +[喈嗟堦巀接掲揭擑椄湝瑎皆秸稭腉菨薢蝔街謯阶階鶛]>jiē; +[㓗㓤㔾㘶㛃㝌㞯㦢㨗㨩㮞㮮㸅䀷䂒䂝䂶䅥䌖䕙䗻䣠䥛䲙倢偈偼傑刦刧刼劫劼卩卪喼婕孑岊崨嵥幯截拮捷搩擳昅杰栉栨桀楬楶榤櫛櫭洁滐潔瀄犵疖癤睫碣竭節結结羯脻节莭葜蓵蜐蠘蠞蠽衱袺訐詰誱讦诘趌踕迼鉣鍻鐑頡颉騔鮚鲒]>jié; +[媎檞解觧飷]>jiě; +[㑘㝏㠹㾏㿍䁓䇒䔿䛺䯰䰺䱄䲸丯介借吤唶堺屆届岕庎徣悈戒楐犗玠琾界畍疥砎紒繲艥芥藉蚧褯誡诫躤鎅魀魪𡽱]>jiè; +姐>jie; +[今埐嶜巾惍斤津珒瑧矜祲筋紟荕菳衿襟觔金钅鹶黅]>jīn; +[㝻㬐㯸㹏䐶䒺䤐䥆䭙仅侭僅儘卺堇尽巹慬槿殣漌瑾盡紧緊菫覲觐謹谨錦锦饉馑]>jǐn; +[㨷㬜㯲㰹㱈㴆㶦㶳䀆䆮䋮䌝䑤䖐䗯䝲䫴䶖伒僸凚劤劲勁噤嚍墐妗嫤嬧搢晉晋暜枃歏浕浸溍濅濜烬燼琎瑨璡璶瘽禁縉缙肵荩蓳藎賮贐赆近进進靳齽]>jìn; +[䴖京亰兢坕坙婛巠惊旌旍晶泾涇猄箐精経經经聙腈茎荆荊莖菁葏驚鯨鲸鵛鶁鶄麖麠黥鼱精]>jīng; +[㘫䜘丼井儆刭剄宑憬憼景暻汬烃烴燛璟璥穽肼蟼警阱頚頸颈]>jǐng; +[㕋㢣㣏㬌㵾㹵䔔䡖䵞俓倞傹净凈境妌婙婧弪弳径徑擏敬曔桱梷浄淨濪瀞獍痉痙竞竟竧竫競竸胫脛葝誩踁迳逕鏡镜靓靖静靚靜靖]>jìng; +睛>jing; +[冂冋坰垧埛扃蘏蘔駉駫]>jiōng; +[㓏㖥㢠㤯㷗㷡䌹䐃䢛侰僒冏囧幜泂炅炯烱煚煛熲皛窘絅綗褧迥逈顈颎]>jiǒng; +[㑋澃]>jiòng; +[丩勼啾揂揪揫朻樛湫牞究糺糾纠萛觓轇醔阄鬏鬮鳩鸠]>jiū; +㺵>jiú; +[㡱久乆九奺灸玖紤舏赳酒镹韭韮]>jiǔ; +[㠇㧕㩆㲃㶭㺩䅢䆒䊆䊘䓘䛮䡂䳎䳔僦匓匛匶厩咎媨就廄廏廐慦捄救旧柩柾桕疚臼舅舊鯦鷲鹫麔齨]>jiù; +[㞐凥刟娵居岨崌抅拘椐檋沮涺狙琚疽眗罝腒艍苴菹葅蜛裾趄跔踙陱雎鞠駒驹鮈鴡鶋]>jū; +[㘲㥌㩴㮂㽤䋰䏱䕮䗇䜯䡞䤎䪕䰬䱡䴗侷匊婅局巈挶掬桔梮椈橘毩毱泦淗焗犑狊箤粷菊蘜諊趜跼踘蹫躹輂郹鄓鋦锔閰鞫駶驧鵙鵴鶪鼳]>jú; +[䃊䄔䅓䈮䢹䶥举咀弆挙擧椇榉榘櫸欅竘筥舉莒蒟蝺袓跙踽齟龃]>jǔ; +[㘌㜘㞫㠪㨿㩀㬬㳥䆽䛯䣰䱟䵕䶙俱倨倶具冣剧劇勮句埧埾壉姖寠屦屨岠巨怇怚惧愳懼拒拠据據昛歫泃洰澽炬焣犋秬窭窶簴粔絇耟聚苣虡蚷詎讵貗距踞躆遽邭醵鉅鋸鐻钜锯颶飓駏鮔]>jù; +矩>ju; +[剶姢娟捐朘涓蠲裐鎸鐫镌鵑鹃]>juān; +[㷷卷埢捲臇菤𩜇]>juǎn; +[㢧㢾㪻㯞䄅䌸䖭䚈䡓䳪倦劵勌勬巻帣慻桊淃狷獧瓹眷睊睠絹绢罥羂鄄錈锩韏飬鬳]>juàn; +[噘屩撅]>juē; +[㓸㔃㔢㟲㤜㩱㭈㭾㰐㵐㷾㸕㹟㻕䀗䁷䆕䆢䇶䋉䍊䏐䏣䐘䖼䘿䙠䝌䞷䠇䡈䦆䦼亅倔傕僪决刔劂勪厥噱嚼孒孓屫崛崫嵑嶥弡彏憠憰戄抉挗捔掘撧攫斍柽桷橛橜欔欮殌氒決潏焆焳熦爑爝爴爵獗玃玦玨珏瑴疦瘚矍矡砄絕絶绝臄芵蕝蕨蚗蟨蟩蠼覐覚覺觉觖觼訣譎诀谲谻貜赽趹蹶蹷躩逫鈌鐍鐝钁镢鱊鱖鳜鴃鷢龣𧽸𩪗]>jué; +[䞵䟾]>juě; +[鴂𠢤]>juè; +[军君均姰桾汮皲皸皹碅莙菌蚐袀覠軍鈞銁銞钧頵鮶鲪麇麏麕]>jūn; +蜠>jǔn; +[㑺㒞㓴㕙㝦㴫㻒㽙䇹䕑䜭䝍俊儁呁寯峻懏捃攈攟晙殾浚濬焌珺畯睃竣箘箟葰蔨郡隽雋餕馂駿骏鵘]>jùn; +咖>kā; +[佧卡咔胩鉲]>kǎ; +髂>kà; +[奒开揩痎鐦锎開]>kāi; +[㡁䁗䐩䒓凯凱剀剴嘅垲塏恺愷慨暟楷蒈輆鍇鎧铠锴闓闿颽]>kǎi; +[㲉䡷勓忾愒愾欬炌炏烗礚]>kài; +[刊勘堪嵁戡栞龕龛]>kān; +[㸝䶫侃偘冚坎埳塪崁惂槛檻欿歁歞砍莰輡轗顑]>kǎn; +[䀍䘓墈看瞰矙磡竷衎闞阚]>kàn; +[嫝康忼慷槺漮穅粇糠躿鏮鱇]>kāng; +[扛摃]>káng; +䡉>kǎng; +[㰠亢伉匟囥抗炕犺砊邟鈧钪閌闶]>kàng; +[尻髛鷱]>kāo; +攷>káo; +[䯪丂拷栲槀洘烤燺稁考薧鮳鲓𥬯]>kǎo; +[䐧犒銬铐靠]>kào; +[峇柯棵樖犐珂疴瞌磕科稞窠簻胢苛萪薖蝌趷軻轲顆颗髁]>kē; +[壳殼]>ké; +[㞹㪃㪙㪡㪼㰤㵣可坷岢嶱敤渇渴炣礍]>kě; +[㕉㤩㾧䙐䶗克刻剋勀勊喀嗑垎堁娔客尅恪愘愙揢搕榼氪溘碦緙缂衉課课醘騍骒]>kè; +[啃垦墾恳懇肎肯肻豤錹齗齦龂龈]>kěn; +[㸧掯珢硍裉褃]>kèn; +[劥吭坈坑奟妔挳摼牼硁硜硻銵鍞鏗铿阬]>kēng; +䡰>kěng; +[倥埪崆悾涳硿空箜錓鵼]>kōng; +[㤟孔恐]>kǒng; +[㸜控鞚]>kòng; +[剾彄抠摳芤袧]>kōu; +[㔚劶口]>kǒu; +[㓂㰯㲄㽛䳟䳹佝冦叩宼寇怐扣敂滱瞉窛筘簆蔲蔻釦鷇𦶲]>kòu; +[刳哭圐堀扝枯桍窟胐跍軲轱郀顝骷]>kū; +[䇢苦]>kǔ; +[㒂㠸俈喾嚳库庫焅瘔矻秙絝绔袴裤褲趶酷]>kù; +[侉咵夸姱荂誇]>kuā; +[垮恗銙錁锞]>kuǎ; +[㐄䋀挎胯跨骻]>kuà; +呙>kuāi; +[㧟䓒擓蒯]>kuǎi; +[㔞㙕㙗㟴㬮㱮䈛䭝䯤䶐侩儈凷哙噲块塊墤廥快桧檜欳浍澮獪禬筷糩脍膾郐鄶駃鬠鱠鲙]>kuài; +[宽寛寬臗髋髖]>kuān; +[㯘䕀䥗䲌欵款歀窾]>kuǎn; +䤭>kuàn; +[劻匡匩哐恇框洭硄筐誆诓軭邼]>kuāng; +[㤮抂狂誑诳鵟]>kuáng; +[俇夼黋]>kuǎng; +[䊯䵃儣况卝圹壙岲懬懭旷昿曠況爌眖眶矌矿礦絖纊纩貺贶躀邝鄺鉱鑛]>kuàng; +[亏刲岿巋盔窥窺茥藈蘬虧鍷闚]>kuī; +[㙓㚝㨒䕫䟸䤆䧶䯓䳫喹夔奎戣揆晆暌楏楑櫆湀犪睽聧葵蘷虁躨逵鄈鍨頯馗騤骙魁]>kuí; +[㛻䠑䦱䫥煃跬蹞頍]>kuǐ; +[㕟䈐䍪䕚喟嘳媿尯愦愧憒撌槶樻溃潰瞆瞶篑簣籄聩聭聵腃蒉蔮蕢鐀鑎餽饋馈騩]>kuì; +[坤堃婫崐崑惃昆晜焜猑琨瑻菎蜫裈褌貇醌錕锟騉髠髡髨鯤鲲鵾鹍]>kūn; +[㩲䠅壸壼悃捆梱硱祵稇稛綑裍閫閸阃齫]>kǔn; +[㫻困涃睏]>kùn; +擃>kuǒ; +[㗥䟯䦢䯺姡廓彉彍扩拡括挄擴漷濶蛞闊阔鞟鞹]>kuò; +[嚹垃拉柆磖翋菈]>lā; +[㕇揦邋]>lá; +[䟑喇藞]>lǎ; +[㸊㻋㻝䂰䃳䏀䓥䗶䝓䪉䱫䶛剌揧攋楋爉瓎瘌腊臈臘蜡蝋蝲蠟辢辣鑞镴鬎鯻癩]>là; +[啦鞡]>la; +[㚓㥎䅘䋱䚅䠭䧒來俫倈婡孻崃崍庲徕徠来梾棶涞淶猍琜筙箂莱萊逨郲錸铼騋鯠鶆麳]>lái; +[䂾唻]>lǎi; +[㠣㾢䄤䓶䲚勑娕櫴濑瀨瀬癞癩睐睞籁籟藾襰賚賴赉赖頼顂鵣]>lài; +[㑣㘓㞩㦨㳕䆾䍀䑌䦨䪍䰐䳿儖兰厱啉囒婪岚嵐幱懢拦攔斓斕栏欄欗澜瀾灆灡燣燷璼礷篮籃籣糷繿葻蓝藍蘫蘭襕襤襴譋讕谰躝钄闌阑韊]>lán; +[㛦㨫㩜㰖䊖䌫壈嬾孄孏懒懶揽擥攬榄欖浨漤灠覧覽览醂顲𡒄]>lǎn; +[㜮㱫䃹嚂壏滥濫烂燗爁爛爤瓓纜缆鑭镧]>làn; +啷>lāng; +[㝗㟍㢃㱢㾿䆡䡙䯖䱶俍勆嫏廊桹榔樃欴狼琅瑯稂筤艆莨蓈蓢蜋螂踉躴郎郞鋃锒]>láng; +[㓪㙟㫰㮾㾗䀶䁁塱朖朗朤烺硠誏閬阆]>lǎng; +[䍚䕞埌崀浪蒗]>làng; +[捞撈]>lāo; +[㗦㞠㟉㟹㨓䃕䜎䝁䲏僗劳労勞哰唠嘮崂嶗憥浶牢痨癆磱窂簩蟧醪鐒铹]>láo; +[㟙㧯䇭䕩䝤䳓䵏佬咾恅栳橑狫老荖轑銠铑]>lǎo; +[嫪憦橯涝澇耢耮軂髝]>lào; +[㔹㖀㦡乐仂叻哷忇扐捋楽樂氻泐玏砳竻簕肋艻阞韷頱鰳鳓]>lè; +[了餎饹]>le; +勒>lēi; +[㒍㔣㵢㹎䉓䍣䐯䨓壨嫘擂檑欙瓃畾縲纍纝缧罍羸蘲虆蠝鐳鑘镭雷靁鼺]>léi; +[㑍㒦㙼㡞㶟㼍㿔䉂䛶䣂䴎傫儡儽厽垒壘樏櫐櫑洡漯灅瘣癗磊磥礨絫耒蕌蕾藟蘽誄讄诔轠鑸頛鸓]>lěi; +[㭩㲕㴃䉪䍥䒹䢮䣦䮑埒攂泪涙淚礌礧禷类累纇蘱酹銇錑頪類颣𩔗]>lèi; +嘞>lei; +[䉄䬋倰塄崚棱楞稜薐]>léng; +冷>lěng; +[䚏䮚堎愣踜]>lèng; +唎>lī; +[㒿㓯㠟㦒㰀㰚㴝㷰㹈㿛䄜䅻䉫䊍䋥䍠䍦䔆䔣䔧䖥䖽䖿䙰䣓䣫䱘䴻䵓䵩刕剓剺劙厘喱嚟囄嫠孋孷廲悡攡梨梩梸棃樆氂漓漦灕犁犂璃瓈睝离穲篱籬粍粚糎縭缡罹艃菞蓠蔾藜蘺蜊蟍褵謧貍邌醨釐鋫錅鏫鑗離騹驪骊鯬鱺鲡鵹鸝鹂黎黧]>lí; +[㸚㾖䗍䤚䧉俚兣娌峛峢峲欐欚浬澧理盠礼禮粴蟸蠡裏豊逦邐醴里鋰锂鯉鱧鲤鳢礼]>lǐ; +[㑦㒧㔏㕸㗚㘑㟳㡂㤡㤦㧰㬏㮚㯤㱹㺡㻎㻺㼖㽁㽝㾐㿨䁻䃯䅄䇐䊪䍽䓞䔁䔉䘈䚕䟏䟐䡃䤙䥶䬅䬆䮋䮥䰛䰜䲞䴄䴡䶘丽例俐俪傈儮儷凓利力励勵历厉厤厯厲吏呖唳嚦囇坜塛壢婯屴岦巁悧悷慄戻戾搮攊攦攭斄暦曆曞朸枥栎栗栛栵棙櫔櫟櫪歴歷沥沴涖溧濿瀝爄爏犡猁珕琍瑮瓅瓑瓥疠疬痢癘癧皪盭矋砅砬砺砾磿礪礫礰禲秝立笠筣篥粒粝糲綟纅脷苈苙茘荔莅莉蒞藶蚸蛎蛠蜧蝷蠇蠣蠫詈讈赲跞躒轢轣轹郦酈鉝隶-隸雳雴靂靋鬁鬲鱱鱳鳨鴗鷅鷑麗麜𥝢]>lì; +[哩李狸裡]>li; +[俩倆]>liǎ; +[㓎㜕㝺㟀㡘㢘㥕㦁㶌㺦㼓㾾䁠䃛䆂䏈䙺䥥䨬䭑亷僆劆匲匳嗹噒奁奩奱嫾帘廉怜慩憐梿槤櫣涟溓漣濂濓熑燫琏璉磏稴簾籢籨縺翴联聫聮聯臁莲蓮薕螊蠊裢褳覝謰譧蹥连連鄻鎌鐮镰鬑鰱鲢𢅏]>lián; +[㦑㪘㯬㰈㰸䇜䌞嬚摙羷脸膦臉莶薟]>liǎn; +[㜃㜻㪝㱨㶑㼑堜媡恋戀敛斂楝歛殓殮浰湅潋澰瀲炼煉瑓練纞练萰蔹蘝蘞裣襝錬鍊鏈链鰊]>liàn; +[㹁䝶䣼䭪凉墚梁椋樑涼粮粱糧綡良輬辌駺]>liáng; +[㒳㔝䓣䠃䩫両两兩唡啢掚緉脼蜽裲魉魎]>liǎng; +[亮哴喨悢晾湸諒谅輌輛辆量鍄]>liàng; +[撩蹽]>liāo; +[㙩㝋㵳䜍䜮䝀䨅僚嘹嫽寥寮屪嵺嶚嶛廫憀摎敹暸漻潦熮獠璙疗療窷簝繚缭聊膋膫蟟豂賿蹘辽遼鐐镣顟飂飉髎鷚鷯鹨鹩]>liáo; +[㶫䄦䑠䩍憭瞭蓼鄝镽]>liǎo; +[㡻㺒䉼䍡䎆䢧尞尥尦廖撂料炓燎爒]>liào; +巤>liē; +䟩>liě; +[㤠㧜㬯㭞㯿㲱㸹㼲㽟䁽䅀䉭䓟䜲䟹䴕儠冽列劣劽埓姴挒捩擸洌浖烈煭犣猎獵睙聗脟茢蛚蛶裂趔躐迾颲鬛鬣鮤鱲鴷]>liè; +咧>lie; +[㔂㝝㷠䚬䢯䫐䮼临亃厸壣嶙惏斴晽暽林淋潾瀶燐獜琳璘甐疄痳瞵矝碄磷箖粼綝繗罧翷臨轔辚遴邻鄰鏻隣霖驎鱗鳞麐麟]>lín; +[㐭㨆䕲僯凛凜廩廪懍懔撛檁檩澟癛癝菻]>lǐn; +[㖁䉮䗲䫰吝恡悋橉焛粦蔺藺蹸躏躙躪轥閵]>lìn; +[拎昤]>līng; +[㖫㡵㥄㦭㪮㬡㯪㱥㲆㸳㻏㾉䄥䈊䉁䉖䉹䌢䍅䔖䕘䖅䙥䚖䠲䡼䡿䧙䨩䯍䰱䴇䴒䴫凌刢囹坽夌姈孁岺朎柃棂櫺欞泠淩澪灵燯爧狑玲琌瓴皊睖砱碐祾秢竛笭紷綾绫羚翎聆舲苓菱蔆蕶蘦蛉衑裬詅跉軨輘酃醽鈴錂铃閝陵零霊霛霝靈駖魿鯪鲮鴒鸰鹷麢齡齢龄龗]>líng; +[呤岭嶺彾袊阾領领]>lǐng; +[令另掕炩]>lìng; +伶>ling; +㶈>liǒng; +[溜熘蹓]>liū; +[㐬㽞䉧䋷䗜䚧䬟䭷䰘䱖䱞䶉刘劉嚠媹嵧懰旈旒榴橊沠流浏瀏琉瑠瑬璢畄留畱疁瘤癅硫蒥蓅藰蟉裗遛鎏鎦鏐镏镠飀飅飗駠駵騮驑骝鰡鶹鹠麍]>liú; +[嬼柳栁桺橮珋綹绺罶羀鉚鋶铆锍飹]>liǔ; +[㙀㨨㶯㽌䄂六坴塯廇澑畂磂翏鐂雡霤餾馏鬸]>liù; +[㚅㝫㡣㦕㰍䃧䆍䏊䙪䥢䪊咙嚨屸嶐巃巄昽曨朧栊櫳泷湰滝漋瀧爖珑瓏癃眬矓砻礱礲竜笼篭簼籠聋聾胧茏蕯蘢蠪蠬襱豅躘鏧鑨隆霳靇鸗龍龒龓龙]>lóng; +[㙙㴳䡁儱垄垅壟壠拢攏竉陇隴]>lǒng; +[㑝㛞㟖㢅哢徿挵梇硦衖贚]>lòng; +窿>long; +[䁖瞜]>lōu; +[㟺㥪㲎㺏䄛䅹䝏䣚䫫䮫䱾剅娄婁廔慺楼樓熡耧耬艛蒌蔞蝼螻謱軁遱鞻髅髏鷜]>lóu; +[㪹塿嵝嶁搂摟甊篓簍]>lǒu; +[㔷屚漏瘺鏤镂陋]>lòu; +[喽嘍]>lou; +[噜嚕]>lū; +[㠠㢳㪭㭔㱺㿖䡎䮉䰕卢嚧垆壚庐廬攎曥栌櫨泸瀘炉爐獹玈璷瓐盧矑籚纑罏胪臚艫芦蘆蠦轤轳鑪顱颅髗魲鱸鲈鸕鸬黸]>lú; +[㔪㢚㯭䕡䲐卤塷掳撸擄擼樐橹櫓滷瀂硵磠舻艣艪蓾虏虜鏀鐪鑥镥魯鲁鹵]>lǔ; +[㓐㖨㛬㜙㟤㦇㪐㪖㫽㯝㯟㼾䃙䌒䎑䎼䐂䘵䚄䟿䡜䩮䱚䴪侓僇剹勎勠圥垏娽峍廘彔录戮摝椂樚淕淥渌漉潞熝琭璐甪盝睩硉碌磟祿禄稑穋箓簏簶籙粶膔菉蔍蕗虂螰觮觻賂赂趢路踛蹗輅轆辂辘逯醁錄録錴鏕鏴陆陸露騄騼鯥鵦鵱鷺鹭鹿麓]>lù; +[榈櫖櫚氀爈瘘瘻膢藘閭闾馿驢驴]>lǘ; +[㭚㻲㾔侣侶偻僂儢吕呂屡屢履挔捛旅梠溇漊祣稆穞穭絽縷缕膂膐褛褸郘鋁铝]>lǚ; +[㔧㠥㲶䔞䢖䥨勴寽嵂律慮氯滤濾率箻綠緑绿膟葎虑鑢]>lǜ; +[㝈㡩㱍䖂䜌圝圞娈孌孪孿峦巒挛曫栾欒滦灓灤癵羉脔臠虊銮鑾鵉鸞鸾]>luán; +卵>luǎn; +[乱亂薍釠]>luàn; +[攣癴]>lüán; +㨼>luè; +[㑼㔀䂮䌎䛚䤣圙擽畧稤鋝鋢锊]>lüè; +[抡掄]>lūn; +[㖮㷍䈁䑳仑伦侖倫囵圇婨崘崙惀沦淪溣綸纶腀菕蜦踚輪轮錀陯鯩]>lún; +[埨稐耣]>lǔn; +[碖論论]>lùn; +絯>lǜn; +[啰囉罗]>luō; +[㑩㼈㽋䊨䯁儸摞椤欏氇氌猡玀箩籮羅萝蔂蘿螺覙覶覼逻邏鏍鑼锣镙饠騾驘骡鸁]>luó; +[㒩㦬㩡㰁㱻倮攞曪瘰癳砢臝蓏蠃裸躶鎯]>luǒ; +[㓢㴖㿚䀩䇔䈷䌱䌴嗠峈洛濼烙犖珞硌笿絡纙络荦落袼酪雒駱骆鮥鴼鵅]>luò; +[妈媽嬤嬷]>mā; +[㦄䗫䳸犘痲蔴蟇麻]>má; +[㐷䣕䣖溤玛瑪码碼蚂螞鎷馬马鰢鷌]>mǎ; +[㑻㜫㨸㾺䯦傌唛嘜帓杩榪犸獁睰祃禡罵閁駡骂]>mà; +[吗嗎嘛蟆]>ma; +[㜥㼮䁲䚑䨪埋薶霾]>mái; +[买嘪荬蕒買鷶]>mǎi; +[䈿䘑䜕䨫䮮佅劢勱卖売眿脈脉蝐賣迈邁霢麥麦]>mài; +[㒼㗄㙢䅼䊡䐽䑱䛲䟂䯶䰋姏悗慲摱槾璊瞒瞞蛮蠻謾谩蹒蹣顢颟饅馒鬘鰻鳗]>mán; +[㛧䜱屘満满滿矕螨蟎襔鏋鮸]>mǎn; +[㗈㡢㬅㵘䕕䝡䝢䡬僈墁嫚幔慢曼漫澫澷熳獌縵缦蔄蔓鏝镘鬗]>màn; +[㝑㟌㟿㡛㻊䀮䅒䈍䟥䵨吂哤娏尨忙恾杗杧氓汒浝牤牻狵痝盲盳硭笀芒茫蘉蛖邙釯鋩铓駹鼆]>máng; +[㙁㟐㬒䁳䒎䖟壾漭硥茻莽莾蟒蠎]>mǎng; +[猫貓]>māo; +[㝟㲠䅦兞堥媌嫹旄枆毛渵牦犛矛罞茅蝥蟊覒軞酕錨锚髦髳鶜]>máo; +[㚹㧇冇卯夘峁戼昴泖笷茆]>mǎo; +[㒵㒻㡌㧌㪞㫯㮘㴘㺺㿞䀤䋃䓮䡚䫉冐冒媢帽懋暓柕楙毷瑁皃眊瞐耄艒芼茂萺蓩袤貌貿贸鄮]>mào; +[么麼]>me; +[㶬㺳䊈䍙䒽䤂呅堳塺媒嵋徾攗枚栂梅楣楳槑沒没湄湈煤猸玫珻瑂眉睂禖篃脄脢腜苺莓葿郿酶鋂鎇镅霉鶥鹛黴𪃏]>méi; +[䆀䓺䜸凂媄媺嬍嵄挴毎每浼渼燘美鎂镁黣]>měi; +[㭑䀛䉋䊊䰨䰪䵢妹媚寐抺昧沬煝痗眛睸祙蝞袂跊韎鬽魅]>mèi; +[㡈㨺䊟䝧䫒扪捫樠穈菛虋鍆钔門閅门]>mén; +暪>měn; +[㥃㦖㱪㵍悶懑懣焖燜闷]>mèn; +[们們]>men; +[㙹㠓㩚䀄䇇䉚䑃䑅䒐䓝䗈䙦䙩䤓䰒䲛䴌䴿䵆儚冡幪懞懵曚朦橗檬氋濛獴甍甿盟瞢矇矒礞艨苎莔萌萠蒙蕄虻蝱鄳鄸雺靀饛鯍鸏鹲]>méng; +[㚞䏵勐猛瓾艋蜢蠓錳锰鯭]>měng; +[㜴㝱䠢䥂䥰夢夣孟懜梦溕霥霿]>mèng; +[咪嘧眯瞇]>mī; +[㜷㟜㠧㣆㩢㸏䊳䋛䌕䌘䍘䕳䕷䛧䤍䥸䪾䴢冞弥彌戂擟攠檷瀰爢猕獼瓕祢籋糜縻罙蒾蘪蘼詸謎谜迷醚醾醿釄镾鸍麊麋麛𨢥]>mí; +[㝥㥝㰽㳽䭧䱊侎孊弭敉沵洣渳濔灖眫米脒葞蔝銤靡]>mǐ; +[㜆㨠㫘㳴㴵㵋㸓䁇䉾䌏䌐䌩䖑䛉䛑䣾䤉䭩䮭冖冪塓宓宻密峚幂幎幦榓樒櫁汨泌淧淿滵漞濗熐祕秘簚糸羃蔤藌蜜蠠覓覔覛觅謐谧鼏]>mì; +[㒙㝰㬆㮌㰃䃇䏃䫵䰓婂媔嬵宀棉檰櫋眠矈矊矏綿緜绵臱芇蝒醎]>mián; +[㛯㤁㻰䀎䤄丏偭免冕勉勔喕娩愐汅沔湎眄絻緬缅腼葂麪麫]>miǎn; +[㴐糆面靣麵麺]>miàn; +喵>miāo; +[㑤㠺䁧䖢描瞄緢苗鱙鶓鹋]>miáo; +[㦝䅺劰杪淼渺眇秒篎緲缈藐邈]>miǎo; +[妙庙庿廟玅竗]>miào; +[乜吀咩哶孭羋芈]>miē; +[㒝䁾䈼䘊䩏幭懱搣櫗滅灭礣篾蔑薎蠛衊覕鑖鱴鴓]>miè; +[㟩㟭㢯䁕䂥䃉䋋䟨䡑䡻䪸䲄姄岷崏忞忟怋捪敯旻旼民珉琘瑉痻盿砇碈緍緡缗罠苠鈱錉鍲閺]>mín; +[㞶㥸㨉䡅僶冺刡勄悯愍慜憫抿敃敏暋泯渑湣潣澠皿笢簢閔閩闵闽鰵鳘黽黾]>mǐn; +榠>mīng; +[㝠䄙䆨䆩䊅䫤佲冥名嫇明暝朙洺溟猽眀瞑蓂螟覭鄍酩銘铭鳴鸣]>míng; +[㟰㫥凕姳慏眳茗]>mǐng; +[䒌命詺]>mìng; +[謬谬]>miù; +摸>mō; +[䃺䉑䯢劘嚤嚰嫫摩摹擵模橅磨糢膜蘑謨谟饃饝馍髍魔]>mó; +[䩋懡抹]>mǒ; +[㱄㱳㷬㷵㹮䁼䁿䏞䒬䘃䜆䬴䮬䱅䳮䴲嗼嚜圽塻墨妺嫼寞帞慔昩暯末枺歾歿殁沫洦湐漠瀎爅瘼皌眜眽瞙砞礳秣粖絈縸纆耱茉莈莫蓦藦蛨蟔衇袹謩貃貊貘鄚銆鏌镆陌霡靺驀鬕魩默黙]>mò; +[庅麽]>mo; +哞>mōu; +[㭌䏬䗋䥐䱕侔劺洠牟眸瞴繆缪蛑蟱謀谋鉾鍪鴾麰]>móu; +[䍒某踇]>mǒu; +[愗瞀]>mòu; +[䱯恈毪氁獏譕]>mú; +[䥈亩姆姥峔母牡牳畆畒畝畞畮砪胟鉧]>mǔ; +[㒇㜈㣎㧅㾇䀲䊾䑵䧔仫募坶墓幕幙慕暮木楘毣沐炑牧狇目睦穆苜莯蚞鉬钼雮霂鞪鶩鹜]>mù; +拇>mu; +嗯>ń; +㐻>ň; +[䏧䛔䫱嗱拏拿誽鎿镎]>ná; +[乸哪雫]>nǎ; +[㗙㨥㴸䀑䅞䇣䇱䈫䎎䖓䖧䟜䪏䱹妠捺笝納纳肭蒳衲袦豽貀軜那鈉钠靹魶]>nà; +[㾍䍲䘅䯮摨熋釢]>nái; +[乃奶妳嬭廼氖疓艿迺]>nǎi; +[㮈㮏㲡倷奈柰榒渿耐萘螚褦錼鼐]>nài; +囡>nān; +[㓓㽖䔜䕼䛁䶲侽南喃奻娚暔枏枬柟楠男畘莮萳諵难難]>nán; +[㫱䁪䈒䔳戁揇湳罱腩蝻赧]>nǎn; +婻>nàn; +[乪囔]>nāng; +[䂇嚢囊欜]>náng; +[㶞攮曩灢饢馕]>nǎng; +[㒄儾齉]>nàng; +[㞪㺀䃩䄩䑋䛝䫸䴃呶夒峱嶩巎巙怓憹挠撓桡橈猱獶獿硇繷詉譊鐃铙髐𥑪]>náo; +[㑎㛴㺁䜀䜧匘垴堖嫐恼悩惱瑙碯脑腦]>nǎo; +[淖閙闹鬧]>nào; +[䎪䭆訥讷]>nè; +[吶呐呢]>ne; +[㼏䲎娞脮腇餒馁鮾鯘]>něi; +[㕯㖏㘨㨅䡾䳖內内氝]>nèi; +黁>nēn; +齳>něn; +[㜛㯎㶧嫩嫰]>nèn; +[㴰䏻嬣能薴]>néng; +㲌>nèng; +㕶>ng̀; +妮>nī; +[㞾㪒㹸䘦䘽䛏䝚倪坭埿婗尼屔怩泥淣狋猊秜籾臡蚭蜺觬貎跜輗郳霓鯓鯢鲵麑齯]>ní; +[㣇㵫䕥䦵䧇䭲䰯伱伲你儗儞孴抳拟擬旎柅狔禰苨薿鈮鉨铌隬馜]>nǐ; +[㠜㥾㦐㲻䁥䘌䵑䵒匿堄嫟嬺屰嶷惄愵昵暱氼溺痆睨糑縌胒腻膩迡逆鷁鷊鹝鹢]>nì; +[䄭䄹䩞䬯年拈秊秥鮎鯰鲇鲶黏]>nián; +[㘝㞋䚓捻撚撵攆碾簐跈蹨躎輦辇]>niǎn; +[㲽卄唸埝姩廿念涊淰艌鼰齞]>niàn; +[嬢孃]>niáng; +[䖆酿醸釀]>niàng; +娘>niang; +[㒟㜵㠡㭤䃵䐁䙚䦊䮍嫋嬝嬲樢茑蔦袅裊褭鳥鸟𢶑𢸣]>niǎo; +[㞙㳮尿脲]>niào; +[惗捏揑踗鈢鉩錜鑈]>niē; +[㡪苶]>nié; +[㖖㘿㙞㚔㜸㩶㮆㴪㸎䂼䄒䌜䜓䯀䯅䯵啮喦嗫噛嚙囁囓圼孼孽嵲帇摰敜枿棿槸櫱涅湼疌篞糱糵聂聶臬臲菍蘖蠥踂蹑躡鎳鑷钀镊镍闑陧隉顳颞齧]>niè; +[㤛䋻䚾䛘囜您]>nín; +拰>nǐn; +[㝕㲰䗿䭢儜凝咛嚀宁寍寕寗寜寧拧擰柠檸狞獰甯聍聹鑏鬡鸋]>níng; +[橣矃]>nǐng; +[㣷㿦䔭佞侫泞濘]>nìng; +妞>niū; +[䀔䒜牛]>niú; +[㺲䏔忸扭炄狃紐纽莥鈕钮靵]>niǔ; +[䋴衂]>niù; +[㶶䁸䢉侬儂农哝噥檂欁浓濃癑禯秾穠脓膿蕽襛農辳醲鬞齈]>nóng; +䵜>nǒng; +[弄挊]>nòng; +[㝹䨲羺譨]>nóu; +[䅶䘫䰭啂槈獳耨譳鎒鐞]>nòu; +[㚢伮奴孥笯蒘駑驽鴑]>nú; +[努弩砮胬]>nǔ; +[䢞怒抐搙]>nù; +[女籹釹钕]>nǚ; +[㵖䖡䘐䚼䶊恧朒衄]>nǜ; +[㬉䎡䙇暖渜煖煗餪]>nuǎn; +偄>nuàn; +[䖈䖋硸]>nuè; +[䨋疟瘧虐謔谑]>nüè; +[㑚㔮㰙傩儺娜挪捼梛郍]>nuó; +[㛂㡅橠砈砹]>nuǒ; +[㐡䚥愞懦懧挼掿搦搻稬穤糥糯諾诺蹃逽]>nuò; +[喔噢]>ō; +哦>ó; +[呕嘔櫙欧歐殴毆熰瓯甌眍瞘謳讴鏂鴎鷗鸥]>ōu; +[吽齵]>óu; +[㒖㼴䚆䯚偶吘塸耦腢蕅藕]>ǒu; +[㛏䌂䌔怄慪沤漚]>òu; +[啪夿妑皅舥葩蚆趴]>pā; +[䯲䶕杷潖爬琶筢耙跁]>pá; +[帊帕怕袙]>pà; +拍>pāi; +[㵺俳徘排棑牌猅篺簰簲輫]>pái; +俖>pǎi; +[㭛䖰哌汖派湃蒎鎃]>pài; +[㐴攀潘畨眅砙]>pān; +[䃑䃲䈲䰉䰔媻幋搫柈槃瀊盘盤磐磻縏蒰蟠跘鎜鞶]>pán; +[坢奤]>pǎn; +[㳪冸判叛拚沜泮溿炍牉畔盼袢襻詊鋬鑻頄頖]>pàn; +[乓滂膖]>pāng; +[㤶㥬㫄䅭䨦䮾厐厖嫎庞庬彷徬旁篣胮舽蒡螃逄雱霶騯髈龎龐]>páng; +[䒍嗙耪覫]>pǎng; +[㜊炐肨胖]>pàng; +[抛拋脬]>pāo; +[㚿䛌䩝刨匏咆垉庖炰爮狍袍軳鞄齙龅]>páo; +跑>pǎo; +[㘐㯡䶌奅泡炮疱皰砲礟礮靤髱麭]>pào; +[呸垺妚娝岯怌柸肧胚衃醅]>pēi; +[㟝䣙䪹䫠䲹培毰碚裴裵賠赔邳阫陪陫]>péi; +[昢琣]>pěi; +[㤄㧩㫲㳈䊃伂佩姵帔斾旆沛浿珮笩苝轡辔配霈馷]>pèi; +[呠喯喷噴]>pēn; +[湓瓫盆葐]>pén; +[翉翸]>pěn; +歕>pèn; +[匉嘭怦恲抨梈漰澎烹砰硑磞軯閛駍]>pēng; +[㛔㥊䄘䡫䰃䴶倗傰埄堋塜塳弸彭憉挷朋棚椖樥淜痭硼稝竼纄膨芃蓬蟚蟛輣錋鑝韸韼驡髼鬅鬔鵬鹏]>péng; +[捧淎皏]>pěng; +[㼞掽椪甏碰踫]>pèng; +篷>peng; +[丕伾劈噼坯怶悂憵批披抷旇炋狉狓砒磇礕秛秠翍耚豾釽鈈鈹鉟銔铍霹髬髲鮍鲏鴄]>pī; +[㔥㯅啤埤壀朇枇毗毘焷犤玭琵疲皮笓紕纰罴羆肶脾腗膍蚽蜱螷豼貔郫鈚錍阰陴駓魾鼙]>pí; +[㨽䏘䚰䚹䤏䫌䰦仳匹噽嚭圮崥庀疋痞癖脴苉苤諀銢隦]>pǐ; +[㿙䑀䑄䠘䡟䤨䴙僻媲嫓屁揊擗淠渒潎澼濞甓礔譬辟闢鷿鸊]>pì; +[偏囨媥犏篇翩萹頨鶣]>piān; +[㛹㼐䮁楄楩胼腁諚諞谝賆蹁輧駢騈骈骿]>pián; +[覑貵]>piǎn; +[㓲㸤䏒片騗騙骗]>piàn; +[嘌彯旚翲薸螵飃飄飘魒]>piāo; +[㼼䕯䴩淲瓢竂蚫闝]>piáo; +[㩠㵱㹾摽殍犥皫瞟縹缥膘蔈藨醥]>piǎo; +[㬓㺓䏇僄剽勡嫖徱慓漂票篻顠驃骠]>piào; +[撆暼氕瞥]>piē; +[䥕丿撇鐅]>piě; +嫳>piè; +[姘拼涄礗]>pīn; +[㰋嚬娦嫔嬪獱琕薲貧贫頻顰频颦]>pín; +[品榀]>pǐn; +[䀻牝聘]>pìn; +[乒俜娉焩砯頩]>pīng; +[㺸㻂䍈䶄凭凴呯坪屏屛帡帲幈平慿憑枰泙洴玶瓶甁甹竮箳簈缾聠胓艵苹荓萍蓱蘋蚲蛢評评軿郱鮃鲆鵧𦚓]>píng; +[坡泊溌癹鉕鏺钷陂頗颇]>pō; +[㨇㰴嘙婆櫇皤蔢鄱]>pó; +[叵尀笸]>pǒ; +[㛘䄸䎊䞟䣪䣮䨰䪖䯙岶廹炇烞狛珀破砶粕蒪迫酦醗醱釙钋魄]>pò; +[泼潑]>po; +剖>pōu; +[㧵抔抙捊掊裒錇锫]>póu; +[㕻勏哣婄廍棓犃瓿]>pǒu; +咅>pòu; +[仆噗墣扑抪撲擈攴濮痡醭陠]>pū; +[㒒㯷㲫㺪䈬䈻䑑䔕䗱䧤䴆僕匍樸毞獛璞瞨穙箁纀脯菐菩葡蒱蒲襆襥蹼轐酺鏷镤]>pú; +[㹒䲕圃圤普朴檏氆浦溥潽烳誧諩譜谱鐠镨]>pǔ; +[㬥曝瀑舖舗鋪铺]>pù; +[七倛僛凄唭嘁墄妻娸悽慼慽攲期柒栖桤桼棲榿欺沏淒漆磎磩粞緀萋諆郪霋顣魌鶈鸂𠀁]>qī; +[㜎㟓㟚㟢㩽㯦䄢䅲䉻䐡䑴䓅䞚䟚䡋䧘䧵䩓䭶䭼䰇䱈䲬䳢䶒䶞亓亝其剘圻埼墘奇岐岓崎帺忯愭懠掑斉斊旂旗棊棋檱櫀歧淇濝猉玂琦琪璂畁畦疷碁碕祁祈祺禥竒粸綦綨纃翗耆脐臍艩芪荠萁萕蕲薺藄蘄蚑蚔蚚蛴蜝蜞蠐衹跂踑軝迉鄿釮錡錤锜頎颀騎騏骐骑鬐鬿鮨鯕鰭鲯鳍鵸鶀麒齊齐]>qí; +[㒅㞓㥓㩩㫓㾨䄎䄫䉝䋯䎢䏿䒻䔇䛴䡔䭫䭬乞企启呇唘啓啔啟婍屺岂敧晵杞棨槣盀綮綺绮芑芞裿諬豈起邔闙]>qǐ; +[㓞㞚㣬㮑㼤䀈䀙䁈䁉䅤䌌䏅䏌䏠䒗䙄䚉䚍䟄䢀䫔䬣䰴呮咠噐器夡契弃徛忔憇憩摖暣栔棄气気氣汔汽泣湆湇炁甈盵矵砌碛碶磜磧礘綥緝缉罊葺蟿訖諿讫迄鏚鼜]>qì; +戚>qi; +掐>qiā; +䠍>qiá; +[跒酠]>qiǎ; +[㓣㡊㤉䜑䨐䯊䶝匼圶帢恰殎洽]>qià; +[㡨仟佥僉兛千圱圲奷婜孯岍幵悭愆慳扦拪掔搴撁攐攑攓杄檶櫏欦汘汧牵牽签簽籤粁縴羟羥肷膁臤芊蚈褰諐謙譣谦谸迁遷釺鉛鋟鐱钎铅锓阡韆顩騫骞鬜鬝鵮鹐]>qiān; +[㦮㨜㩮㸫䁮䈤䍉䕭䖍䨿仱前媊岒忴扲拑掮揵榩歬漧潛潜濳灊燂箝葥虔鈐鉗銭錢钤钱钳雂騚鬵鰬黔黚]>qián; +[㧄䪈䭤嵰撖槏浅淺繾缱蜸譴谴遣]>qiǎn; +[㐸㜞㟻㪠㯠䈴䊴䥅䦲䫡倩傔儙刋堑塹壍嵌悓慊棈椠槧欠歉皘篏篟綪芡茜蒨蔳輤]>qiàn; +[呛嗆嶈戕戗斨枪椌槍溬牄猐玱瑲矼篬羌羗羫腔蜣謒跄蹌蹡錆鎗鏘鏹锖锵镪]>qiāng; +[㩖丬墙墻嫱嬙廧強强彊樯檣漒爿牆蔷薔蘠軖]>qiáng; +[㛨䅚傸勥墏抢搶磢繈繦]>qiǎng; +[䵁唴炝熗羻]>qiàng; +[劁墝墽嵪幧悄敲橇硗磽繑跷踍蹺蹻郻鄡鄥鍫鍬鏒鐰锹鞒頝骹髜]>qiāo; +[㚁㝯䀉䎗䩌䱁乔侨僑喬嘺嶣憔桥樵橋燆癄瞧硚礄簥荞蕎藮譙谯趫趬鐈鞽顦]>qiáo; +[㚽㡑㤍䲾巧愀釥]>qiǎo; +[㢗㪣㴥䃝䆻䇌俏僺峭帩撬撽窍竅翘翹誚诮躈陗鞘韒髚]>qiào; +[㚗䦧癿]>qié; +且>qiě; +[㓶㗫㛍㛙㤲㥦㫸㰰㰼㹤㾀㾜䟙䤿䬊切匧妾怯悏惬愜挈朅洯淁穕窃竊笡箧篋緁藒蛪踥鍥锲鯜]>qiè; +[亲侵媇嵚嶔欽瀙綅衾親誛钦顉駸骎鮼]>qīn; +[㘦㢙㩒㪁㮗䔷䖌䦦勤厪嗪噙嫀庈廑懃懄捦擒斳檎澿珡琴琹禽秦耹芩芹菦蚙螓蠄赺鈙靲鳹鵭]>qín; +[㝲㾛䠴坅寑寝寢昑曋螼赾]>qǐn; +[㞬㤈䈜吢吣唚寖寴抋揿搇撳沁菣]>qìn; +[倾傾卿啨圊寈氢氫氰淸清狅蜻軽輕轻郬靑青鯖鲭]>qīng; +[㯳䝼䞍䲔剠勍夝情擎晴暒樈檠殑甠晴]>qíng; +[㷫䔛䯧庼廎檾請请頃顷]>qǐng; +[䋜䌠凊庆慶掅殸汫漀碃磘磬罄謦鑋靘]>qìng; +[匔焪穹芎銎]>qiōng; +[㒌㧭㮪㷀㼇䅃䆳䊄䓖䛪䠻儝卭宆惸桏棾橩焭煢琼璚瓊睘瞏穷窮竆笻筇茕藑藭蛩赹跫輁邛]>qióng; +苘>qiǒng; +[㐀䆋丘丠坵媝恘楸秋秌穐篍緧萩蓲蚯蝵蟗蠤趥邱鞦鞧鰍鱃鳅鶖鹙龝𠀉𩝠]>qiū; +[㐤㕤㞗㟈㤹㥢㧨㭝㷕㺫䊵䎿䜪䟵䣇䤛俅厹叴唒囚崷巯巰扏朹梂毬求汓泅浗渞煪犰玌球璆皳盚紌絿肍脙艽苬莍虬虯蛷蝤裘觩訄訅賕赇逎逑遒酋釓釚銶钆鮂鯄鰌鰽鼽]>qiú; +[㼒䞭搝糗]>qiǔ; +[䟬䠗殏螑]>qiù; +[㠊伹佉凵匤区區呿坥屈岖岴嶇憈抾敺曲瞿砠祛胠蛆蛐袪誳趋趨躯軀镼阹駆駈驅驱髷魼鰸鱋鶌黢]>qū; +[㖆㜹㣄㯫㲘䀠䂂䋧䝣䞤䟊䵶佢劬匷忂懅戵斪朐欋氍淭渠灈璖璩痀癯磲籧翑翵胊臞菃葋蕖蘧螶蟝蠷衢豦躣軥鑺鴝鸜鸲麯麴麹鼩𪍸]>qú; +[䶚取娶浀竬筁紶詓齲龋]>qǔ; +[㧁㫢㰦䁦䒧䠐刞厺去湨覰覷覻觑詘诎趣閴闃阒麮鼁]>qù; +[圈圏姾弮悛棬箞絟鐉]>quān; +[㒰㟨㟫䀬䑏䟒䠰佺全啳婘孉峑巏恮惓拳搼权権權泉洤湶牷犈瑔痊硂筌荃葲蜷蠸觠詮诠踡輇辁醛銓铨顴颧駩騡鬈鰁鳈鷤齤]>quán; +[䊎呟奆汱犬琄甽畎綣縓绻虇詃]>quǎn; +[䄐券劝勧勸烇牶玔絭荈]>quàn; +[缺蒛𩨭]>quē; +瘸>qué; +[㕁㩁㰌㱋㱿㴶㾡䇎䦬䧿却卻咑埆塙墧寉崅悫愨慤搉榷殻毃灍燩琷皵硞确碏確碻礐礭舭趞闋闕阕阙雀]>què; +[鵲鹊]>que; +[囷夋峮杶踆輴逡鰆]>qūn; +[㪊㿏䭽宭帬漘羣群裙裠錞]>qún; +蝽>qǔn; +[儿兒]>r; +[㜣㲯㸐㾆䑙䖄䫇嘫然燃繎肰蚦蚺蛅衻袇袡髥髯𤡮]>rán; +[㚩㯗㿵䎃䒣䣸䤡冄冉呥姌媣染橪珃苒]>rǎn; +[㚂䉴儴勷攘瀼獽瓤禳穣穰蘘躟鬤]>ráng; +[嚷壌壤爙]>rǎng; +[懹譲讓让]>ràng; +[㹛䫞荛蕘蛲蟯襓饒饶]>ráo; +[㑱娆嬈扰擾]>rǎo; +[繞绕遶隢顤]>rào; +[喏惹]>rě; +[渃热熱]>rè; +[䌾䴦人亻仁壬忈忎朲秂紝絍纴芢鈓銋魜鴹]>rén; +[㣼䏕䏰䭃忍栠栣棯秹稔腍荏荵]>rěn; +[㠴㶵㸾䀼䇮䋕仞仭任刃刄妊姙屻恁扨杒梕牣紉纫肕衽袵訒認认讱賃赁軔軠轫靭靱韌韧飪餁饪鵀]>rèn; +扔>rēng; +[㭁㺱䄧䚮仍礽辸陾]>réng; +芿>rèng; +[䒤囸日氜衵釰鈤馹驲]>rì; +茸>rōng; +[㘇㝐㣑㭜㲓㲨㺎㼸䇀䇯䈶䘬䠜䡆䡥䤊䩸媶容嵘嶸戎曧栄榕榮榵毧溶烿熔爃狨瑢穁絨縙绒羢肜茙荣蓉蝾融螎蠑褣鎔镕駥髶鰫鷛]>róng; +[㲝䢇傇冗宂搑氄軵]>rǒng; +鴧>ròng; +[㖻㽥䐓䧷䰆媃揉柔楺沑渘瑈瓇禸脜腬葇蝚蹂輮鍒鍕鞣韖騥鰇鶔]>róu; +[煣粈糅]>rǒu; +[䄾宍肉]>ròu; +[㨎㹘䋈䰰儒嚅如孺帤挐曘桇渪濡燸筎臑茹蕠薷蠕袽襦邚醹顬颥鱬鴽]>rú; +[㦺乳侞擩汝肗辱銣铷]>rǔ; +[傉入嗕媷洳溽縟缛蓐褥鄏]>rù; +[䓴堧壖撋]>ruán; +[㼱㽭䞂䪭媆朊瑌瓀盶碝礝緛耎腝蝡軟輭软阮]>ruǎn; +[㮃䅑甤緌蕤]>ruí; +[惢桵橤繠蕊蕋蘂蘃]>ruǐ; +[㓹㛱㪫㲊䌼䓲叡壡枘汭瑞睿芮蚋蜹銳鋭锐]>ruì; +[犉瞤]>rún; +[㠈䦞橍润潤膶閏閠闰]>rùn; +[䐞偌叒婼弱楉焫爇箬篛若蒻鄀鰙鰯鶸]>ruò; +[仨撒]>sā; +[㒎䊛洒灑靸]>sǎ; +[㪪㳐㽂䘮䙣䬃卅摋攃櫒脎萨薩趿鎝颯飒馺]>sà; +[塞愢揌毢腮顋鰓鳃]>sāi; +[㗷䈢]>sǎi; +[僿嗮簺賽赛]>sài; +噻>sai; +[三叁嘇弎攕毵毿犙糁糝鬖]>sān; +[㧲䀐䉈䊉䫩仐伞傘糂糣糤繖鏾饊馓]>sǎn; +[㤾㪔㪚䫅俕散潵閐]>sàn; +[桑桒]>sāng; +[䡦䫙嗓搡磉褬鎟顙颡]>sǎng; +[丧喪]>sàng; +[慅掻搔溞瘙繅缫臊颾騒騷骚鰠鰺鱢鳋]>sāo; +[㛮䕅嫂扫掃]>sǎo; +[㿋䐹䖣喿埽氉矂]>sào; +[㒊㥶㮦㱇㴔㻭䉢䔼䨛啬嗇懎歮歰涩澀澁濇瀒瑟璱穑穡繬翜色譅趇轖銫铯雭飋]>sè; +[曑森椮槮甧穼篸蔘襂]>sēn; +[僧鬙]>sēng; +[乷刹剎唦帴杀桬榝樧殺沙煞猀痧砂硰粆紗纱莎蔱裟鎩铩髿魦鮻鯊鯋鲨]>shā; +[傻儍訯]>shǎ; +[㚫㛼㰱䈉䝊䮜䵘䶎倽厦唼啑啥喢帹廈歃箑翣萐閯霎]>shà; +[筛篩]>shāi; +繺>shǎi; +[㬠晒曬閷]>shài; +[删刪剼姍姗山幓彡挻搧杉檆潸澘烻煽狦珊穇笘縿羴羶脠舢芟苫衫跚軕邖]>shān; +[㚒㣣㨛㪎㶒䠾晱睒覢醦閃闪陕陝陿]>shǎn; +[㣌㪨䄠䆄䚲䥇䦂䦅䱇䱉䴮傓僐善墠墡嬗扇掞摲擅汕灗熌疝磰禅繕缮膳蟮蟺訕謆譱讪贍赡赸鄯釤銏鐥钐饍騸骟鱓鱔鳝]>shàn; +[伤傷商墒慯殇殤滳漡熵蔏螪觞觴謪鬺]>shāng; +[䬕扄晌賞赏]>shǎng; +[丄上姠尙尚蠰銄鑜]>shàng; +裳>shang; +[弰捎旓梢烧焼燒稍筲艄莦蛸輎髾]>shāo; +[㲈㸛勺杓牊玿竰韶]>sháo; +[㪢䒚䔠少]>shǎo; +[㷹䏴䙼䬰佋劭卲哨娋潲睄紹綤绍袑邵颵]>shào; +[奢檨譇賒賖赊輋]>shē; +[㭙㰒䁋䂠䞌佘揲舌虵蛇蛥鉈鍦铊]>shé; +[䬷捨舍騇]>shě; +[㒤㢵㴇䀅䁯䄕䌰䠶䤮䵥厍厙射弽慑慴懾摂摄攝欇涉渉滠灄猞社蔎蠂設设赦韘麝]>shè; +[伸侁兟呻妽姺娠屾峷扟柛氠深燊珅申砷籶籸紳绅胂葠薓裑訷詵诜身鉮駪鯵鲹鵢]>shēn; +[䰠什甚神神]>shén; +[㔤㜤㰂㵊㾕吲哂婶嬸审宷審弞渖瀋瞫矤矧覾訠諗讅谂谉邥頣頥魫]>shěn; +[㥲㰮㵕䆦侺愼慎抻昚椹涁渗滲瘆瘎瘮眒眘肾脤腎葚蜃鋠]>shèn; +[升呏声斘昇殅泩湦焺牲狌珄生甡甥笙聲苼鉎阩陞鵿鼪]>shēng; +[䱆憴晠溗畻繩绳譝鱦]>shéng; +[㗂㼳㾪䁞䚇䪿偗省眚箵]>shěng; +[䎴䞉剩剰勝圣墭嵊榺琞盛聖胜蕂貹賸]>shèng; +[䴓呞失尸屍师師施浉湤湿溮溼濕狮獅箷絁葹蒒蓍虱蝨褷襹詩诗邿釶鈟鉇鯴鰤鲺鳲鳾鶳鸤]>shī; +[㖷㫑㵓䂖䖨䦹䶡乭十埘塒实実寔實峕时旹時榯湁湜溡炻石祏莳蒔蚀蝕遈鉐食飠饣鰣鲥鼫鼭]>shí; +[㕜㹬㹷䒨䦠乨使兘史始宩屎矢笶豕鉂駛驶]>shǐ; +[㒾㔺㮶㱁㸷㹝䁺䊓䏡䛈䟗䤱䩃䭄䰄世丗亊事仕似侍冟势勢卋卶叓呩嗜噬士奭嬕室崼市式弑弒忕恃戠戺拭揓是昰枾柹柿氏澨烒眂眎睗示筮簭翨舐舓襫視视試誓諟諡謚试谥豉貰贳軾轼适逝適遾釈释釋鈰鉃鉽銴铈飾餙餝饰鯷鳀齛𠀍]>shì; +[匙拾識识]>shi; +[収收荍]>shōu; +[㝊䭭垨守手掱艏首]>shǒu; +[㖟㥅䛵兽受售壽夀寿授涭狩獸璹痩瘦綬绶鏉]>shòu; +[书倏倐儵叔姝抒摅摴攄書杸枢梳樗樞橾殊殳疎疏紓綀纾舒蔬踈軗輸输陎鵨]>shū; +[䃞䝪䴰埱塾孰尗掓淑焂熟秫菽虪襡贖赎跾鸀]>shú; +[㒔㟬㯮㳆㻿䑕䞖䠱䩳婌属屬暑潻癙盨署薥薯藷蜀襩钃黍鼠鼡]>shǔ; +[㛸㜐㣽㶖㷂㽰㾁䆝䉀䎉䘤䜹䝂䢤䩱侸凁墅尌庶庻恕戍数數曙朮术束树樜樹沭漱潄澍濖竖竪絉荗蒁術袕裋豎述鉥錰鏣鮛鶐]>shù; +[刷唰]>shuā; +耍>shuǎ; +誜>shuà; +[孈摔縗缞衰]>shuāi; +甩>shuǎi; +[䢦卛咰帅帥繂蟀]>shuài; +[拴栓閂闩]>shuān; +[䧠涮]>shuàn; +[双孀孇欆礵艭雙霜驦骦鷞鸘鹴]>shuāng; +[㼽䗮䫪塽慡樉爽縔騻]>shuǎng; +[㦼䡯灀]>shuàng; +[誰谁]>shuí; +水>shuǐ; +[㽷䭨娷帨涗涚睡瞓祱稅税蛻蜕裞]>shuì; +[吮揗楯賰]>shǔn; +[㥧䀢䀵䑞橓瞚瞬舜蕣順顺鬊]>shùn; +[哾說説说]>shuō; +[䀥䈾䌃妁揱搠朔槊欶洬烁爍獡矟硕碩芍蒴鎙鑠铄]>shuò; +[丝偲凘厮厶司咝嘶噝媤廝思撕斯楒榹泀澌燍磃禗禠私簛籭糹絲緦纟缌罳蕬虒蛳蜤螄蟖蟴覗鉰鋖鐁颸飔騦鷥鸶]>sī; +𥐘>sí; +死>sǐ; +[㐌㕽㚶㣈㭒㸻㹑㾅䇃䎣䏤䦙亖佀俟儩兕嗣四姒娰孠寺巳杫柶汜泗泤洍涘瀃牭祀禩竢笥耜肂肆蕼貄賜赐釲鈻飤飼饲駟驷飼]>sì; +[倯凇娀崧嵩庺憽松枀柗梥檧淞濍硹菘蜙鍶锶鬆]>sōng; +[㧐㨦㩳䉥䜬傱嵷怂悚愯慫捒楤竦耸聳駷]>sǒng; +[㕬㮸䛦䢠宋訟誦讼诵送頌颂餸]>sòng; +[嗖廀廋捜搜摉溲獀艘蒐螋鄋醙鎪锼颼飕餿馊騪]>sōu; +[㖩㛐䈹䉤䏂䮟傁叜叟嗾擞擻櫢滫瞍籔薮藪]>sǒu; +[㵻瘶膄]>sòu; +嗽>sou; +[囌櫯甦稣穌苏蘇蘓酥]>sū; +俗>sú; +[㑉㑛㓘㔄㕖㜚㝛㨞㩋㪩㬘㯈㴋㴑㴼䃤䅇䌚䎘䏋䑿䔎䘻䛾䥔傃僳嗉塐塑夙嫊宿愫愬憟栜榡樕橚殐泝涑溯溸溹潥玊珟璛碿窣簌粛粟素縤肃肅膆蓿蔌藗蜶觫誎謖谡趚速遡遬鋉餗驌骕鱐鷫鹔]>sù; +[訴诉]>su; +[狻痠酸]>suān; +[㔯匴]>suǎn; +[祘笇筭算蒜]>suàn; +[倠哸夊攵毸浽滖濉熣眭睢綏绥芕荽荾虽雖鞖]>suī; +[㵦㻟䜔䢫遀隋随隨]>suí; +[䭉䯝巂瀡膸髄髓]>suǐ; +[㒸㞸㴚㻪㻽䅗䉌䍁䔹䠔䡵䥙亗埣嬘岁嵗旞檅檖歲歳澻煫燧璲瓍睟砕碎祟禭穂穗穟繀繸襚誶譢谇賥遂邃鐆鐩隧𡑞]>suì; +[孙孫搎槂狲猻荪蓀蕵薞飧飱]>sūn; +[㔼㡄㦏䁚损損榫笋筍箰簨鎨]>sǔn; +愻>sùn; +[傞唆嗍娑挱挲摍桫梭樎簑簔縮缩莏蓑趖蹜]>suō; +[㪽䂹䅴䈗䐝䖛䗢䞆䞽䣔䵀唢嗩所摵擌暛洓溑琐瑣璅瘷索褨鎍鎖鎻鏁鏼锁]>suǒ; +逤>suò; +嗦>suo; +[他嚃塌她它祂禢]>tā; +[㗳㺚塔墖榙]>tǎ; +[㒓㛥㣛㣵㧺㭼㯓㳠㳫㹺㿹䂿䈋䈳䌈䍇䍝䎓䑜䓠䜚䵬䶀䶁亣嗒嚺崉挞搨撻榻橽毾沓涾溻澾濌狧獭獺羍誻譶跶踏蹋蹹躂躢遝遢錔鎉鑉闒闥闼阘鞜鞳鮙鰨鳎龖龘]>tà; +[囼孡胎]>tāi; +[㒗㘆㙵㣍㬃㷘㸀䈚䑓䢰儓冭台坮嬯抬擡旲檯炱炲籉臺苔菭薹跆邰颱駘骀鮐鲐]>tái; +[㑷㥭䣭太夳忲态態曃汰泰溙燤肽舦酞鈦钛]>tài; +[啴嘽坍怹抩摊擹攤橝滩灘瘫癱緂舑舚譠貪贪]>tān; +[㲜㷋㽑䃪䉡䊤䕊倓嘾坛墰墵壇壜婒惔憛昙曇榃檀潭痰罈罎艢藫覃談譚谈谭貚郯醈醰錟鐔锬镡餤驔]>tán; +[㲭䆱䏙䞡䦔嗿坦忐憳憻毯璮禫膻菼袒襢贉醓黮]>tǎn; +[䐺䜖傝僋叹嘆埮探歎湠炭碳賧赕]>tàn; +[劏嘡坣汤湯羰蝪蹚鏜镗鼞]>tāng; +[㑽㙶㜍㭻㲥㼺䅯䉎䌅䣘䧜傏唐啺堂塘搪摚棠榶樘橖溏漟煻瑭磄禟篖糃糖糛膅膛蓎薚螗螳赯踼鄌醣鎕闛隚餹饄鶶]>táng; +[㒉㿩伖倘偒傥儻帑惝戃曭淌爣矘耥躺鎲钂镋𢠵]>tǎng; +[䟖摥烫燙趟鐋铴]>tàng; +[叨嫍弢慆掏搯槄洮涛滔濤瑫絛縚縧绦翢蜪詜謟轁鞱韜韬飸饕]>tāo; +[㹗䬞匋咷啕桃梼檮淘祹綯绹萄裪迯逃醄鋾錭陶鞀鞉饀駣騊鼗]>táo; +[䚯䚵䵚討讨]>tǎo; +[㚐套]>tào; +[㥂㧹忑忒慝棏特脦蚮蟘貣鋱铽鴏]>tè; +[膯鼟]>tēng; +[䒅䕨䠮䲍䲢儯幐滕漛疼籐籘縢腾蕛藤螣誊謄邆駦騰驣鰧]>téng; +[䴘剔梯踢鷈鷉]>tī; +[㖒㡗㣢䅠䔶䚣䛱䝰䣡䨑䬾偍厗啼嗁媂媞崹惿提漽珶瑅碮禔禵稊綈緹绨缇罤荑蝭褆謕趧蹄蹏遆醍鍗隄題题騠鮷鵜鶗鶙鹈鼶]>tí; +[䌡䣽䪆䶏体挮躰軆骵體]>tǐ; +[㗣㬱㯩䎮䙗䧅䯜䶑俤倜剃嚏嚔悌悐惕惖掦揥替歒殢涕睼籊薙褅趯迏逖逷銻鐟锑髢髰鬀鬄]>tì; +[屉屜]>ti; +[倎兲天婖沗添酟靔靝黇]>tiān; +[㧂䑚䟧䡒䡘䥖嗔塡填屇恬搷沺湉璳甛甜田畋畑盷窴胋菾闐阗鷆鷏]>tián; +[㐁㖭㙉㥏䄼䄽䐌䠄䣯䩄唺忝悿晪殄淟睓腆舔覥觍賟錪鍩锘靦餂]>tiǎn; +[㮇㶺掭煔]>tiàn; +[頲颋]>tian; +[佻庣恌挑旫祧聎蓨鮡]>tiāo; +[㑿㟘䎄䒒䖺䟭䩦䯾䱔䳂岧岹条條樤祒笤芀苕萔蜩趒迢鋚鎥鞗髫鰷鲦齠龆]>tiáo; +[㸠䠷嬥宨晀窕誂]>tiǎo; +[朓眺窱粜糶絩脁覜跳]>tiào; +[呫帖怗萜貼贴]>tiē; +[䥫鉄銕鋨鐡鐵铁锇驖鴩]>tiě; +[䴴䵿蛈飻餮]>tiè; +[厅厛听圢庁廰廳桯汀綎耓耵聴聼聽艼鞓]>tīng; +[㹶䗴䱓亭停婷嵉庭廷朾楟榳渟筳聤莛葶蜓蝏諪邒閮霆鯅鼮]>tíng; +[䋼䦐䵺侹挺梃涏烶珽町甼脡艇誔鋌铤]>tǐng; +[嗵恫樋炵熥狪痌蓪通]>tōng; +[㠉㠽㣚㣠㤏㮔㸗㼧㼿䂈䆚䮵䳋䴀䶱仝佟侗僮勭同哃峂峒峝庝彤晍曈朣桐氃浵潼烔燑爞犝獞瞳砼硐秱穜童筩粡絧罿膧艟茼蕫蚒詷迵酮鉖鉵銅铜餇鮦鲖鼨]>tóng; +[㛚㪌䆹姛捅桶筒統綂统]>tǒng; +[恸慟憅痛蘣衕]>tòng; +[偷偸婾媮鍮]>tōu; +[㓱㡏㢏䵉亠匬坄头投牏酘頭骰]>tóu; +[㪗䱏妵斢紏鈄钭黈]>tǒu; +[㖣䞬䟝透]>tòu; +[凸堗嶀廜捸涋痜禿秃突鋵]>tū; +[㭸㻌㻠㻬㻯䅷䖘䠈䣄䣝䤅䳜凃図图圖圗塗宊屠峹嵞庩徒怢捈揬梌湥潳瑹瘏稌筡腯荼葖蒤跿途酴鈯鍎馟駼鵚鶟鷋鷵鼵]>tú; +[吐唋土圡芏釷钍]>tǔ; +[兎兔堍菟鵵]>tù; +涂>tu; +[湍煓猯貒]>tuān; +[㩛䊜剸团団團慱抟摶槫漙糰鏄鷻]>tuán; +[䜝䵯畽疃黗]>tuǎn; +[彖褖]>tuàn; +[推蓷藬]>tuī; +[㢈㢑㾯㾽㿉㿗䀃䅪䍾䫋尵弚橔穨蘈蹪隤頹頺頽颓魋]>tuí; +[㞂㱣㾼俀僓腿蹆骽]>tuǐ; +[㦌㷟娧煺退駾]>tuì; +[吞呑啍噋旽暾朜涒炖焞]>tūn; +[㞘㩔㹠㼊坉屯忳臀臋芚訰豘豚軘霕飩饨魨鲀]>tún; +[㖔氽]>tǔn; +褪>tùn; +[仛佗侂侻咃托扥拕拖挩捝杔汑沰涶牠矺脫脱託讬飥饦馲驝]>tuō; +[㸰㸱㼠㾃䍫䡐䪑䭾䰿䴱坨堶岮沱沲狏砣砤碢紽袉詑跎迱酡阤陀陁鞁馱駄駞騨驒驮魠鮀鴕鸵鼉鼍鼧]>tuó; +[㟎䓕䲊妥媠嫷庹彵撱椭楕橢軃鰖鵎]>tuǒ; +[唾拓柝槖橐毤毻箨籜萚蘀跅]>tuò; +[駝驼]>tuo; +[劸嗗娲媧徍挖搲攨洼溛漥畖穵窊窪聉蛙鼃]>wā; +[㰪娃]>wá; +[㧚㼘佤咓瓦邷]>wǎ; +[䎳䚴䠚嗢婠淴腽膃袜襪韈韤]>wà; +哇>wa; +[㖞咼喎歪]>wāi; +[䠿外懀]>wài; +[剜帵弯彎湾潫灣蜿豌]>wān; +[㝴䯈丸刓完抏汍烷玩琓紈纨芄頑顽]>wán; +[㜶㽜㿸䂺䅋䖤䗕䘼䛷䝹䩊䳃倇埦婉宛惋挽晚晩晼梚椀浣澣琬畹皖盌睌睕碗綩綰绾脕莞菀萖踠輓鋄鋔錽鞔鯇鲩]>wǎn; +[㸘䥑万仴卍卐忨捥綄翫腕萬蟃貦贎鎫]>wàn; +[㑌尢尣尩-尫汪]>wāng; +[亡亾仼兦彺王莣蚟]>wáng; +[㓁㲿㳹㴏䋄䋞䰣往徃忹惘暀棢瀇網网罒罔菵蛧蝄誷輞辋迬魍]>wǎng; +[䛃䤑妄忘旺望朢迋]>wàng; +枉>wang; +[倭偎危喴威媁媙崴巍微愄揋椳楲渨溾烓煨燰碨萎葨葳蝛覣詴逶隇隈鰃鰄鳂]>wēi; +[㕒㣲㧑䉠䑊䔺䜅䝐䥩䧦唯囗囲围圍圩壝峗峞嵬帏帷幃惟桅欈沩洈涠湋溦潍潿濰瀢琟癓矀維维蒍蔿薇覹违違鄬醀鍏闈闱霺韋韦鮠]>wéi; +[㖐㞇㞑㟪㠕㢻㨊㬙㭏㱬䃬䈧䞔䪘䬐䬿䵋亹伟伪偉偽僞儰喡委娓寪尾崣嵔徫愇斖暐梶椲洧浘濻炜煒猥玮瑋痏痿硊磈緯纬腲艉芛苇荱葦蓶薳蘤蜲諉诿踓鍡隗韑韙韡韪頠颹骩-骫鮪鲔]>wěi; +[㥜㦣㷉䗽䘙䙿䜜䡺䪋䬑䭳䮹䲁䵳为位卫叞味喂媦嬒尉徻慰未渭為煟熭爲犚犩畏硙磑緭罻胃苿菋蔚薉藯蘶蜼螱衛衞褽謂讆讏谓贀躗躛軎轊錗鏏霨餧餵魏鮇鳚]>wèi; +[猬蝟]>wei; +[塭昷殟温溫瑥瘟瞃豱輼轀辒馧鰛鰮鳁]>wēn; +[䎹䎽䘇䰚彣文炆玟珳琝瘒紋纹聞芠蚉蚊螡蟁閿闅闦闻阌雯馼魰鳼鴍鴖鼤]>wén; +[㒚㖧㗃㝧㳷刎吻呡桽稳穏穩肳脗]>wěn; +[伆問妏抆揾搵汶渂熓璺紊莬问]>wèn; +[嗡嵡翁螉鎓霐鶲鹟]>wēng; +[㘢㜲䐥䤰勜塕奣暡浻滃瞈聬蓊]>wěng; +[瓮甕罋齆𦧅]>wèng; +[唩涡涹渦猧窝窩莴萵蜗蝸踒]>wō; +[㦱㧴䰀婑我捰]>wǒ; +[㠛㱧䁊䠎偓卧幄捾握斡楃沃涴渥濣焥肟臥蒦齷龌]>wò; +[乌剭呜嗚圬媉屋巫弙杇歍汙汚污洿烏瑦窏箼腛螐誣诬邬鄔鰞鴮鼿]>wū; +[㷻㹳㻍䍢䦜䫓䮏吳吴吾呉唔娪无梧毋洖浯無牾珸璑祦禑膴芜茣莁蕪蜈誈郚鋘鋙铻鯃鵐鷡鹀麌鼯]>wú; +[㐅㑄㬳㵲䒉䟼䡧䳇五仵侮倵儛午啎墲妩娒娬嫵嵨庑廡忤怃憮捂摀旿橆武潕玝珷甒碔舞躌鵡鹉]>wǔ; +[㐳㡔㽾䃖䎸䑁䛩䦍䳱俉兀勿卼坞垭塢奦婺寤屼岉嵍忢悞悟悮戊扤敄晤杌沕溩焐熃物痦矹窹粅芴蘁誤误迕逜遻鋈鎢钨阢隖雾霚霧靰騖骛齀兀]>wù; +[伍务務]>wu; +[䂀俙傒僁僖兮凞卥厀吸唏唽嘻噏夕奚娭媐嬆嬉屖嵠嶲巇希徆徯忚怷怸恓悉悕惁惜扱扸捿擕晞晰曦析桸榽樨橀欷歖氥浠淅溪潝烯焈煕熄熈熙熹-熻燨爔牺犀犧琋瓗皙睎睳瞦硒禧稀窸糦縘繥羲肹膝舾莃菥蒵蜥螅螇蟋蠵覀觹觽觿譆谿豀豨豯貕蹊酅醯錫鏭鑴锡隵雟騱驨鵗黊鼷凞𥋟]>xī; +[㔒㠄㤴㦻㩗㳧㵿㽯㿇䀘䏮䫣习媳嶍席昔棤椺槢檄欯漝焟焬獥瘜習蒠蓆薂袭裼襲覡觋謵趘郋鎴钖隰霫飁騽鰼鳛]>xí; +[䢄匸喜囍壐屣徙憘憙敼暿枲洗漇狶玺璽矖簁縰纚葈葸蓰蟢諰蹝躧酾釃霼鱚]>xǐ; +[㑶㙾㚛㞒㣟㤸㥡㭡㸍㹫䈪䊠䐼䓇䙽䚷䛥䜁䧍䨳䩤䮎䲪係卌呬咥喺嚊嚱墍屃屓屭忥怬恄戏戯戱戲晳椞歙汐滊潟澙熂犔盻矽磶禊稧穸系細綌繋繫细绤翕翖肸舃舄蕮虩衋覤謑赥赩郄郤鄎釳釸鎎闟阋隙隟餼饩鬩黖齂]>xì; +[息西]>xi; +[傄呷煆煵疨瞎虾蝦谺鍜閕颬]>xiā; +[㗇㘡㰺㽠䖎䖖䘥䛅䦖䪗䫗侠俠冾匣峡峽搳敮暇柙炠烚狎狭狹珨瑕硖硤碬磍祫笚筪縀縖翈舝蕸赮轄辖遐鎋陜霞騢魻鰕黠]>xiá; +閜>xiǎ; +[㗿㙤丅下吓嚇夏夓懗欱疜睱罅芐鏬鶷]>xià; +[仚僊先嘕奾嬐孅屳廯忺憸掀掺摻暹杴枮氙澖珗祅祆秈籼纎纖纤苮蓒蘐褼襳訮跹蹮躚酰銛鍁铦锨韯韱馦鮮鱻鲜]>xiān; +[㘅㘋㛾㡉㢺㭹㮭㳄㳭㵪䒸䕔䝨䦥䲗䶢咸唌啣嗛娴娹婱嫌嫺嫻弦憪涎燅甉痃痫癇癎瞯瞷礥絃羬胘舷葴藖蚿蛝衔衘諴賢贒贤輱銜閑閒闲鷳鷴鷼鹇鹹]>xián; +[㜪㧋㧥㫫㬎㭠㯀㶍㿅䉳䗾䘆䚚䜢䢾䥪䧋䧮冼尟尠崄嶮幰搟攇显櫶毨灦烍燹狝猃獫獮玁癣癬礆禒筅箲藓蘚赻跣銑鍌铣险険險韅顕顯鼸齴]>xiǎn; +[㔵㡾㦓㩈㪇㬗㺌䀏䁂䃱䃸䉯䏹䞁䤼䧟䨘䨷䱤䵇䶟伣俔僩僴县咞哯垷姭娊宪岘峴憲撊晛橌涀瀗献獻现現県睍粯糮絤綫線縣线缐羡羨腺臔臽苋莧蚬蜆袨誢豏轞鋧錎限陥陷霰餡馅麲𠜎]>xiàn; +仙>xian; +[㐮乡厢啌廂忀欀湘瓖相箱緗纕缃膷芗葙薌襄郷鄉鄊鄕鑲镶香驤骧麘]>xiāng; +[㟄䔗䜶佭庠栙祥絴翔詳详跭祥]>xiáng; +[㗽䊑䖮享亯响奛嶑想晑響飨餉饗饟饷鮝鯗鱶鲞]>xiǎng; +[㟟䐟䢽像向嚮塂巷恦曏橡珦萫蚃蟓蠁襐象鐌闀闂項项鱌]>xiàng; +[呺哓哮啋嘋嘐嘵嚣嚻囂婋宯宵庨彇憢撨枭枵梟櫹歊毊消潇潚瀟灱灲烋焇獢痚痟硝硣穘窙箫箾簘簫綃绡翛膮萧萷蕭藃虈虓蟏蟰蠨踃逍銷销霄驍骁髇魈鮹鴞鸮]>xiāo; +[㑾㚣㬵䒝洨笅訤誵郩]>xiáo; +[䒕䥵小晓暁曉皢筱筿篠謏]>xiǎo; +[㔅㗛㤊㹲䊥䕧俲傚効咲啸嘨嘯娎孝恔效敩斅斆校歗涍熽笑肖藠誟鞩]>xiào; +[些歇猲薛蠍褉]>xiē; +[㖿㙝㥟㨙㩉㩦㩪㭨䔑䕵䙎䡡䭎偕劦勰协協嗋垥奊恊愶慀拹挟挾搚携撷擷攜斜旪熁燲籺絜綊緳纈缬翓胁脅脇脥膎蝢衺襭諧谐邪鋣鞋鞵龤𩋘𩋧]>xié; +[㕐㝍䥱䥾写冩寫藛躠]>xiě; +[㒠㓔㔎㖑㙰㞕㣯㣰㦪㨝㰔㰡㳦㳿㴬㴮㴽㸉㽊䉏䉣䊝䕈䙊䙝䚳䚸䢡䦏䦑䩧䲒䵦亵伳偰卨卸塮妎媟屑屟屧嶰廨徢懈揳斺暬械楔榍榭泄泻洩渫澥瀉瀣灺焎燮爕獬疶祄禼糏紲絏絬緤绁缷薤蟹蠏衸褻謝谢躞邂鞢韰駴骱齘齥]>xiè; +[䜣俽兓噷妡廞心忻惞新昕杺欣歆炘盺芯莘薪訢辛鈊鋅鑫锌馨騂骍]>xīn; +[㚯䰼攳樳襑鄩]>xín; +伈>xǐn; +[㐰㛛㭄䒖䚱䛨䜗伩信卂囟孞焮煡脪舋衅訫軐釁阠顖馸]>xìn; +[垶惺星曐煋猩瑆皨篂腥蛵觪觲鍟馫鮏鯹]>xīng; +[㐩㓝㣜㼛䣆䤯侀刑坓型娙形洐濴烆硎筕胻行邢郉鈃鉶銒鋞钘铏陉陘雽餳饧]>xíng; +[㝭㨘㮐䳙擤渻睲醒]>xǐng; +[㓑㼬䁄䂔䓷䛭䰢倖兴姓婞嬹幸性悻杏涬緈臖興荇莕]>xìng; +[㐫兄兇凶匈哅忷恟汹洶胷胸訩詾讻賯]>xiōng; +[䧺熊赨雄]>xióng; +[夐敻詗诇]>xiòng; +[休俢修咻庥樇烌羞脩臹茠蓚貅銝鎀鏅饈馐髤髹鵂鸺]>xiū; +[㱙朽糔綇]>xiǔ; +[㗜㾋嗅嘼岫峀溴珛琇秀綉繍繡绣袖褎褏銹鏥鏽锈齅]>xiù; +[偦吁呴嘘噓墟媭嬃嬬幁戌揟旴晇楈欨欰歔歘疞盱稰籲縃繻胥蕦虗虚虛蝑訏譃鑐需須须驉鬚魖]>xū; +[䍱俆徐蒣]>xú; +[㑔㑯㞰㥠䅡䔓冔喣姁昫栩湑煦珝糈許詡諝许诩谞鄦醑]>xǔ; +[㐨㕛㖅㗵㘧㚜㜅㜿㞊㤢㦽㰲㵰㷦㺷㾥䂆䋶䘏䙒䛙䜡䢕䣱䣴䦗䦽䬔䱛䳳伵侐勖勗卹叙垿壻婿序怴恤慉敍敘旭-旯朂槒殈汿沀洫溆漵潊烅珬盢瞁瞲稸窢絮緒緖續绪续聟芧蓄藇藚訹賉酗銊頊顼魆魣鱮]>xù; +[佡儇吅咺塇媗宣弲愃愋揎昍晅暄梋煊瑄睻矎禤翧翾萱蕿藼蝖蠉諠諼譞谖軒轩鋗鍹鶱]>xuān; +[㘣㳬㹡䁢䗠䮄䲂䲻䴉䴋伭妶嫙悬懸旋暶檈漩玄玹琁璇璿蜁誸鹮]>xuán; +[㾌䍻䠣喧暅烜选選]>xuǎn; +[㧦㳙䍗䘩䝮䧎䩙䩰埍怰昡楥楦泫渲潠炫眩眴碹絢縼繏绚蔙衒讂贙鉉鏇铉镟鞙颴駽]>xuàn; +[削吙屵蒆辥靴鞾]>xuē; +[㖸㧒㶅㿱䫻䱑乴学學峃嶨斈泶澩燢茓觷雤鷽鸴𥄴]>xué; +[㡜䨮雪鱈鳕]>xuě; +[㞽䎀䤕䫼䬂䭥吷坹岤桖泬烕穴血謞趐]>xuè; +[勋勛勲勳坃埙塤壎壦曛焄熏燻獯矄纁臐蔒薫薰蘍醺𤑕]>xūn; +[㜄㝁㨚㰊㰬㽦䋸䖲䙉偱噚寻尋峋巡廵循恂挦撏旬杊枔栒桪槆橁毥洵浔潃潯灥燖珣璕畃紃荀荨蕁蟳詢询郇馴駨驯鱏鱘鲟]>xún; +[㢲䛜䞊䭀伨侚噀埈奞巺巽徇殉汛爋狥蕈訊訙讯賐迅迿逊遜鑂陖韗顨鵔鵕]>xùn; +[訓训]>xun; +[丫劜压圧壓孲庘押椏鴉鴨鵶鸦鸭]>yā; +[㧎䄰䊦伢厓堐岈崕崖枒桠涯漄牙犽猚笌芽蚜衙齖]>yá; +[㿿䪵厊哑唖啞庌痖瘂蕥雃雅]>yǎ; +[㝞㰳䅉䝟䢝䦪䯉䰲䵝亚亜亞俹嚈圠埡娅婭挜掗揠氩氬猰玡砑稏窫聐襾訝讶軋轧迓錏鐚铔齾]>yà; +呀>ya; +睚>yái; +[偣剦啱嫣嬮崦懕懨淹漹烟焉煙猒珚篶胭臙菸蔫鄢酀醃閹阉黭]>yān; +[㗴㘖㘙㫟㳂㶄㿕㿼䀋䀽䂴䇾䊙䌪䓂䕾䖗䗡䢥䤷䱲䶮严厳嚴埏塩壛壧妍姸娫娮孍岩嵒嵓巌巖巗延揅昖楌檐櫩沿湺炎狿琂盐研硏碞礹筵簷綖莚葕蔅虤蜒言詽讠郔鈆閻阎顃顏顔颜鹽麙麣]>yán; +[㓧㕣㚧㢂㫃㭺䁙䄋䊻䎦䗺䣍䲓乵俨偃儼兖兗剡匽厣厴噞夵奄姶嵃嶖巘巚弇愝戭扊抁掩揜曮棪椼檿沇渰渷演琰甗眼硽罨萒蝘衍裺褗躽遃郾酓隒験魇魘鰋鶠黡黤黶鼴鼹龑]>yǎn; +[㛪㢛㦔㬫㷔㷳㷼䂩䅧䑍䜩䢭䨄䭘䳛䳺䴏偐傿厌厭咽唁喭嚥堰墕妟姲婩嬊嬿宴彥彦恹敥晏暥曣椻滟灎灔灧灩焔焰焱燄燕爓牪砚硯艳艶艷覎觃觾諺讌讞谚谳豓豔贋贗赝酽醼釅閆闫隁雁餍饜騐騴驗驠验鳫鴈鴳鷃鷰]>yàn; +[佒咉央姎柍殃泱眏秧紻胦鉠鴦鸯]>yāng; +[㟅㬕䁑䖹䬗佯劷垟崵徉扬揚敭旸昜暘杨楊洋炀烊煬玚珜瑒疡瘍眻禓羊羏蛘諹輰鍚鐊阦阳陽霷颺飏鰑鸉]>yáng; +[㔦䇦䑆䒋䬬仰傟养勨坱岟慃懩抰攁氧氱炴痒癢蝆鞅養餋駚]>yǎng; +[㨾㺊㿮䬺䭐䵮怏恙样様樣漾瀁羕詇]>yàng; +[吆喓妖幺枖楆腰葽訞邀]>yāo; +[㨱㮁䂚䆙䉰䋂䌊䌛䍃䔄䖴䚺䚻䢣䬙倄傜嗂垚堯姚媱尧尭峣崤嶢嶤徭愮揺搖摇暚榣殽淆烑爻猇猺珧瑤瑶窑窯窰肴蘨謠謡谣軺轺遙遥邎銚铫颻飖餆餚鰩鳐𨍳]>yáo; +[㟱㢓㫏㫐㴭䁏䁘䆞䴠䶧仸偠咬夭婹宎岆崾抭杳柼榚殀溔眑窅窈窔舀苭蓔闄騕鷕鼼齩]>yǎo; +[㔽㝔㞁㵸㿑㿢䋤䑬䙅曜熎燿獟矅穾筄耀艞药葯薬藥袎要覞讑趭鑰钥靿鷂鹞]>yào; +[倻噎晔蠮]>yē; +[䓉䥺峫捓揶擨椰琊瑘耶釾鎁铘]>yé; +[㙒也冶吔嘢埜壄漜野]>yě; +[㐖㖡㖶㗼㙪㝣㥷㩎㪑㱉㸣䈎䤳䤶䥟䥡䧨䭟䲜业亱偞僷叶啘墷夜嶪嶫忦擛擪擫曄曅曗曵枼枽楪業殗殜液澲烨煠燁爗皣瞱瞸葉謁譺谒邺鄴鍱鎑鐷靥靨頁页餣饁馌驜鵺鸈]>yè; +[爷爺]>ye; +[㘈一伊依医吚咿噫壱壹夁嫛弌悘揖檹欹毉泆洢溰漪燚猗瑿祎禕稦繄蛜衣譩郼醫陭餏饻鷖鹥黟黳𣘦]>yī; +[㚦㝖㞔㥴㦾㰘㺿䄬䇵䐅䐖䖊䞅䩟䬁䬮䮊䱌䲑䴊乁仪侇儀冝凒匜咦圯夷姨宐宧寲峓嶬巸弬彛-彞怡恞扅暆杝枱柂桋椸歋沂沶洟熪珆瓵疑痍眤眱移簃羠胰苐萓蛦螔袘袲觺訑詒謻讉诒貤貽贻跠迻遗遺酏鈶鏔頉頤顊颐飴饴鮧鴺鸃]>yí; +[㕥㠯㩘㫊㰝㰻䝝䧧䰙乙以倚偯崺已庡扆扡掜攺旑旖晲栘椅檥矣礒笖肔胣舣艤苡苢蚁螘蟻衪輢轙迆迤逘釔鉯銥钇铱顗鳦齮]>yǐ; +[㐹㑊㑜㑥㓷㔴㖂㘁㘊㙠㙯㚤㛕㜋㜒㡫㡼㢞㣂㣻㦉㦤㱅㱲㲼㳑㴁㴒㵝㵩㶠㹓㹭㽈䄁䄿䆿䇩䉨䋚䋵䌻䎈䓃䓈䓹䔬䕍䖁䗑䗟䗷䘝䘸䝘䝯䢃䣧䦴䬥䭂䭇䭞䭿䯆䱒䴬乂义亄亦亿仡伇伿佚佾俋億兿刈劓劮勚勩呓呭呹唈嗌囈圛垼埶埸墿奕嫕嬑嬟寱屹峄嶧帟帠幆廙异弈弋役忆怈怿悒悥意憶懌懿抑抴挹捙掖撎敡斁易晹曀曎曳杙枍枻栧棭榏槷檍欭歝殔殪殹毅洂浂浥浳湙溢潩澺瀷炈焲熠熤熼燡燱獈玴異疫痬瘗瘞瘱癔益睪瞖秇穓竩緆縊繶繹绎缢羛義羿翊翌翳翼肄肊腋膉臆艗艺芅苅蓺薏藙藝蘙虉蛡蜴螠袣裔裛褹襼訲訳詍詣誼譯議讛议译诣谊豙豛豷跇軼轶逸邑醳醷釴鈠鎰鐿镒镱阣隿霬靾鞥顡饐駅驛驿骮鯣鶂鶃鷧鷾黓齸益逸𥜥]>yì; +宜>yi; +曕>yiàn; +鴁>yiāo; +[侌凐喑噾囙因垔堙姻婣峾廕愔慇摿歅殷氤洇洕湮溵瘖禋秵筃絪緸茵蒑裀諲銦铟闉阥阴陰陻隂霒霠鞇音韽韾駰骃黫]>yīn; +[㕂㖗㙬㝙㞤㸒㹜㹞䓄䖜䪩冘吟唫噖嚚圁垠夤婬寅崟崯斦檭殥泿淫滛烎犾狺璌碒苂荶蔩蟫訔訚訡誾鄞釿鈝銀银霪鰥鳏鷣]>yín; +[㐆㡥㥯㥼㦩㧈㱃㾙䇙䌥䒡䤺䨸乚尹嶾廴引檃櫽淾濦瘾癮磤紖縯纼蘟蚓螾讔趛鈏隐隠隱靷飮飲饮馻]>yǐn; +[㣧㪦㴈㼉䕃䚿䡛䤃䲟印垽堷慭憖憗懚朄檼湚濥猌癊窨胤茚荫蔭酳鮣]>yìn; +[偀嘤嚶婴媖嫈嬰孆孾应応應撄攖朠桜楧樱櫻渶煐瑛璎瓔甇甖碤礯緓纓绬缨罂罃罌膺英莺蘡蝧蠳褮譻鍈鑍锳霙韺鴬鶧鶯鷪鷹鸎鸚鹦鹰]>yīng; +[㢍㨕㴄㵬㹙㹚㿘䁝䃷䑉䕦䪯僌営塋嬴巆廮攍楹櫿溁滎潆濙濚瀛瀠瀯熒營瑩盁盈籝籯縈茔荥荧莹萤-萦萾藀蛍蝿螢覮謍贏赢迎]>yíng; +[㯋㲟䀴䨍䭊䭗巊影梬浧潁瀴璄瘿癭矨穎郢頴颍颕颖]>yǐng; +[㑞䁐䙬䤝噟媵摬映暎滢瀅硬膡蓥譍賏鎣鐛鞕]>yìng; +[蝇蠅]>ying; +[哟唷喲]>yō; +[㐯傭嗈噰墉壅庸廱慵拥擁澭瀜灉痈癕癰蕹邕郺鄘鏞镛雍雝饔𠆌]>yōng; +[㝘䗤喁嫆嫞槦滽牅顒颙鱅鳙]>yóng; +[㙲㦷㷏㽫䞻俑傛勇勈咏埇塎嵱彮恿悀惥愑愹慂搈柡栐永泳涌湧甬禜臃蛹詠踊踴鯒鲬]>yǒng; +[㞲㶲佣用苚醟]>yòng; +[优優呦嚘幽忧怮悠憂攸櫌瀀纋耰鄾麀]>yōu; +[㒡㕱㘥㚭㛜㫍㳺㻀㽕䑻䖻䚃䢊䢟囮尤峳怣斿楢櫾沋油浟游滺犹猶猷由疣秞肬莤莸蕕蚰蝣訧輏輶逌逰遊邮郵鈾铀駀魷鮋鯈鱿鲉]>yóu; +[㮋㰶㾞䅎䒴䬀䱂䳑丣偤卣唀岰庮懮有栯梄槱泑湵牖禉羐羑聈苃莠蚴蜏酉銪铕黝]>yǒu; +[㓜㕗㤑㹨㺠䀁䆜䛻䞥亴佑侑又右囿姷宥峟幼柚牰狖祐糿誘诱貁迶酭釉鴢鼬]>yòu; +友>you; +[唹扜毹毺淤瘀盓穻箊紆纡虶迂迃陓]>yū; +[㒜㚥㤤㥔㥚㥥㦛㪀㬂㬰㳛㶛㷒㺞㺮㼶䁩䂛䃋䄏䄨䍂䏸䐳䔡䗨䜽䢓䩒䰻䱷䲣于亐伃余俞兪堣堬妤娛娯娱媀嬩崳嵎嵛愉愚扵揄於旟杅桙楡楰榆欤歈歟歶渔渝湡漁澞狳玗玙瑜璵畬畭畲盂睮硢禺窬竽籅緰羭腧腴臾舁舆艅茰萸蕍蘛虞蝓螸衧褕覦觎諛謣谀踰輿轝逾邘鄃釪鍝隃隅雓雩餘馀骬髃魚鮽鰅鱼鵌鷠鸆鸒]>yú; +[㑨㒁㔱㙑㝢㠘㡰㣃㲾㺄㼌䣁䥏䨞与予伛俁俣偊傴噳圄圉宇寙屿峿嶼庾懙敔斔斞楀瑀瘐祤禹窳篽羽聥與萭蓹蘌螤語语貐鄅酑雨齬龉羽]>yǔ; +[㠨㳚㽣䁌䂊䆷䈅䉛䋖䍞䖇䘘䘱䛕䢩䨒䬄䮇䮙䴁䵫俼哊喅喐喩喻噊圫域堉墺妪嫗寓峪嶎庽彧御忬悆悇惐愈慾戫昱棛棜棫櫲欎欝欥欲毓汩浴淢淯滪澦灪焴煜燏燠爩狱獄獝玉琙瘉癒矞砡硲礇礖礜禦秗稢稶穥籞緎繘罭聿肀育芋芌茟萮蒮蓣蕷薁蜟蜮裕誉諭譽谕豫軉輍逳遇遹郁醧鈺銉鋊錥鐭钰閾阈隩霱預预飫饇饫馭騟驈驭鬰鬱鬻魊鳿鴥鴪鵒鷸鹆鹬黦龥]>yù; +[冤剈噮囦嬽寃悁惌棩淵渁渆渊渕灁眢箢肙葾蒬蜎蜵裷鋺駌鳶鴛鵷鸢鸳鹓鼘鼝]>yuān; +[㟶㥳㹉䖠䬧䲮䳒䳣元原厡厵员員园圆圎園圓圜垣塬妧媴嫄岏援杬榞榬橼櫞沅湲源溒爰猨猭猿獂笎緣縁缘羱芫萲蒝薗蚖蝝蝯螈袁貟贠轅辕邍邧鎱騵魭鶢鶰黿鼋]>yuán; +[䛄䛇䩩妴远逺遠]>yuǎn; +[㤪㥐㭇䅈䏍䬇䬼傆夗媛怨愿掾瑗禐苑衏裫褑褤謜院願]>yuàn; +[曰曱箹約约]>yuē; +[哕噦]>yuě; +[㜧㜰㬦㰛㹊䋐䖃䟠䠯䡇䢁䢲䤦䥃䶳刖妜岄岳嶽恱悅悦戉抈捳月枂樾泧瀹爚狘玥礿禴篗籆籥籰粤粵蘥蚎蚏越跀跃躍軏鈅鉞钺閱閲阅鸑鸙龠]>yuè; +[奫晕暈氲氳煴緼縕缊蒀蒕蝹贇赟]>yūn; +[㚃㜏䉙䢵云勻匀妘愪昀榅榲橒沄涢溳澐熅熉畇眃秐筠筼篔紜縜纭耘耺芸荺蒷蕓郧鄖鋆雲饂]>yún; +[䆬䇖䞫䡝䤞䦾䨶䪳傊允喗抎殒殞狁玧磒褞賱輑鈗阭陨隕霣]>yǔn; +[㚺㞌㟦䚋䩵䲰䵴囩夽孕恽惲愠慍枟熨緷腪蕰蕴薀藴蘊运運郓鄆酝醖醞韞韫韵韻鶤]>yùn; +[匝咂帀抸沞迊鉔]>zā; +[䕹䞙䪞偺囐嶻杂砸磼襍雑雜雥韴魳]>zá; +[咋鮺鲝]>zǎ; +[哉栽渽災灾烖賳𢦏]>zāi; +[㞨㱰㴓䏁䣬䮨宰崽縡]>zǎi; +[䵧侢傤儎再在扗載载酨]>zài; +[兂簪簮鐕]>zān; +[倃咱喒糌]>zán; +[㤰儧儹噆寁拶撍攅攒攢昝桚沯礸禶趱趲]>zǎn; +[㜺㟛㣅囋暂暫欑濽灒瓉瓒瓚穳襸讃讚賛贊赞鄼酂酇錾鏨]>zàn; +[匨牂羘臜臢蔵賍賘贓贜赃髒]>zāng; +臧>záng; +[駔驵]>zǎng; +[㘸塟奘弉脏臓臟葬銺]>zàng; +[傮糟蹧遭]>zāo; +[䥣凿醩鑿]>záo; +[䲃早枣棗澡璪繰缲薻藻蚤]>zǎo; +[㲧㿷䜊唕唣噪慥梍灶燥皁皂竃竈艁譟趮躁造髞]>zào; +[㖽㣱㳻䃎䇥䕉䕪䰹䶦则則啧嘖崱帻幘択择捑擇沢泽溭澤皟瞔笮箦簀耫舴荝萴蠈蠌諎謮責賾责赜迮鰂鲗]>zé; +[㳁仄夨庂昃昗汄稄]>zè; +[戝賊贼鱡]>zéi; +怎>zěn; +[䫈譖譛谮]>zèn; +[増增憎橧熷璔矰磳繒缯罾譄驓]>zēng; +㽪>zěng; +[䙢䰝甑贈赠]>zèng; +[偧剳哳喳扎抯挓揸摣柤楂樝渣猹皶皻觰齄齇]>zhā; +[㱜㴙䥷䵵劄札牐甴箚紥紮蚻蠿譗鍘铡閘闸霅]>zhá; +[㒀㡸㷢䋾䕢䛽䵙厏眨苲鮓鲊]>zhǎ; +[䖳䞢乍吒咤奓宱搾柵栅榨溠灹炸砟簎膪蚱詐诈醡]>zhà; +[捚摘斋斎榸齋]>zhāi; +[㡯宅礋]>zhái; +[窄鉙飵]>zhǎi; +[㩟债債寨瘵砦]>zhài; +[厃嶦旃旜栴毡氈氊沾瞻粘覘觇詀詹譫讝谵趈邅鉆霑飦饘驙鱣鳣鸇鹯]>zhān; +薝>zhán; +[㞡㠭䁴䎒䟋䡀䩅䱼嫸展崭嶃嶄拃搌斩斬椫榐樿橏琖皽盏盞蹍輾辗醆颭飐魙]>zhǎn; +[㟞㺘㻵䋎䗃䘺䪌䱠佔偡占嶘战戦戰栈桟棧椾湛站綻绽菚蘸虥虦覱蹔輚轏]>zhàn; +[傽墇嫜张張彰慞暲樟漳獐璋章粻蔁蟑遧鄣餦騿鱆麞]>zhāng; +[仉掌涨漲礃绱長长鞝]>zhǎng; +[㕩㙣㽴丈仗嶂帐帳幛扙杖涱痮瘬瘴瞕緔胀脹賬账障]>zhàng; +[妱巶招昭柖盄窼釗鉊鍣钊駋]>zhāo; +[㕚㺐䈃䝖找沼爪瑵菬]>zhǎo; +[㡽㨄㷖䃍䈇䍜䍮䮓兆召垗旐曌枛櫂炤照燳狣瞾笊箌罩羄肁肇肈詔诏赵趙雿鵫]>zhào; +[嗻嫬晢晣螫遮]>zhē; +[㞏㪿㯰䊞䎲䐑䐲䓆䝃䝕乇厇哲啠喆嚞埑悊折摺歽瓋砓磔籷耴虴蜇褶襵詟謫謺讁讋讘谪輒輙辄銸鮿鸅]>zhé; +[啫禇者赭踷]>zhě; +[䂞䏳䗪䠦䩾䵭柘檡浙烢蟅这這鷓鹧]>zhè; +[着著蔗]>zhe; +[侦偵堻媜嫃寊帪揁搸斟栕桢桭楨榛樼溱潧澵獉珍珎瑊甄眞真砧碪磌祯禎禛箴籈縝缜胗臻蒖蒧蓁薽貞贞轃遉酙針鉁錱鍼针靕駗鱵]>zhēn; +[㐱㪛䂦䂧䑐䪴䫬弫抌抮昣晸枕畛疹眕稹笉紾絼縥聄袗裖診诊軫轸辴鬒黰]>zhěn; +[㓄㣀㮳㯢㴨䊶䏖䝩䟴䨯䲴䳲侲圳塦挋振揕敒敶朕栚瑱眹蜄誫賑赈鎭鎮镇阵陣震鴆鸩]>zhèn; +[争佂埩姃峥崝崢征徰徴徵怔挣掙炡烝爭狰猙癥眐睁睜筝箏篜聇蒸諍诤踭鉦錚鏳钲铮鬇]>zhēng; +[䡕愸抍拯掟撜整氶糽]>zhěng; +[㡠㡧㱏㽀䂻䈣䛫䥌䥭䦛䦶帧幀政正症証證证郑鄭鋥锃鴊𠔻]>zhèng; +[之卮吱坧墌嬂巵搘支枝枳栀梔椥榰汁汥疧知祇祗祬秓秖秪綕織织肢胑胝脂芝蜘衼跖隻馶鳷鴟鴲鵄鸱鼅]>zhī; +[㙷㜼㨁䐈䟈䱥䵂侄値值儨劕埴執妷姪慹懫执摭擿柣桎植樴殖漐犆瓆瓡直禃秷稙絷縶聀职職膱蘵蛰蟄蟙褁貭踯蹠躑軄釞馽]>zhí; +[㧻㮹㲛䅩䇛䌤䎺䛗䳅劧厎只咫址坁夂帋恉扺抧指旨晊栺止沚洔淽滍疻砋祉紙纸芷藢襧訨趾軹轵酯阯黹]>zhǐ; +[㕄㗌㗧㘉㛿㝂㣥㨖㴛䄺䆈䇽䉅䉜䏄䏯䐭䑇䓌䕌䚦䝷䞃䡹䥍䦯䫕䬹䭁䱨偫傂制厔垁娡寘峙崻帙帜幟庢庤廌彘徏徝志忮憄懥挃挚掷搱摯擲旘智梽楖櫍治洷淛滞滯潌炙熫狾猘璏畤疐痔痣礩祑秩秲稚稺穉窒筫紩緻置翐胵腟膣至致臸芖蛭螲袟袠製覟觗觯觶誌豑豒豸質贄质贽跱踬躓輊轾迣郅銍鋕鑕铚锧陟雉駤騭騺驇骘鯯鴙鷙鸷]>zhì; +[中伀刣妐幒彸忠忪柊汷泈潨炂煄盅籦終终舯蔠螽衳衷蹱鈡銿鍾鐘钟锺鴤]>zhōng; +[㣫冢喠塚尰歱瘇种種肿腫踵塚]>zhǒng; +[㐺㲴䱰乑仲众偅堹妕媑湩狆眾祌筗緟茽蚛衆衶諥重]>zhòng; +[侜周啁喌州徟洀洲淍烐珘盩矪粥脽舟譸诪賙赒輈輖辀週銂騆鵃鸼]>zhōu; +[㛩妯軸轴]>zhóu; +[䎻䖞晭疛睭箒肘菷鯞]>zhǒu; +[㑇㑳㔌㥮㼙㾭䇠䈙䋓䐍䛆䩜䶇伷僽冑呪咒咮噣宙昼晝甃皱皺籀籒籕粙紂縐繇纣绉胄荮葤詋酎駎驟骤𤏲]>zhòu; +帚>zhou; +[侏劯朱株槠橥櫧櫫洙潴瀦猪珠硃祩秼絑茱蛛蝫蠩袾誅諸诛诸豬跦邾銖铢鮢鯺鴸鼄猪諸]>zhū; +[㔉䌵䕽䘚䟉䥮䮱劚灟炢烛燭爥窋竹竺笁笜築篫舳茿蓫蠋蠾趉躅逐鱁]>zhú; +[㵭䘢䰞丶主嘱囑壴孎宔拄斸欘渚濐煑煮瘃瞩矚砫罜詝貯贮陼麈]>zhǔ; +[㑏㝉㤖㧣㫂㹥㺛㾻㿾䇡䇧䍆䎷䐢䝒䝬䬡䭖伫佇住助坾嵀杼柱柷樦殶注炷疰眝祝竚筑筯箸紵紸纻羜翥苧莇蛀註諔跓軴鉒鋳鑄铸飳馵駐驻麆]>zhù; +[抓挝撾檛膼髽]>zhuā; +拽>zhuāi; +[跩𨋯]>zhuǎi; +[专叀塼嫥専專瑼甎砖磚篿耑膞蟤跧鄟顓颛鱄鷒]>zhuān; +[䡱孨竱転轉转]>zhuǎn; +[䉵僎啭囀堟撰灷瑑篆篹籑縳腞蒃襈譔賺贃赚饌馔]>zhuàn; +[妆妝娤庄桩梉樁粧糚荘莊装裝]>zhuāng; +[壮壯壵撞焋状狀]>zhuàng; +[娺椎追錐锥隹騅骓鴭鵻]>zhuī; +沝>zhuǐ; +[䄌坠墜惴桘甀硾礈窡笍縋缒膇諈譵贅赘鑆餟鵽]>zhuì; +[綴缀]>zhui; +[宒棆窀肫衠諄谆迍]>zhūn; +[准埻準綧隼鶽]>zhǔn; +稕>zhùn; +[㑁倬卓捉桌棹穛穱𠭴]>zhuō; +[㣿㪬㭬㺟䅵䕴䶂剢叕啄啅圴妰彴拙撯擆擢斀斫斮斱斲斵晫梲棁棳椓槕櫡汋泎浊浞涿濁濯灂灼炪烵焯琸硺禚篧籗籱絀绌罬茁蠗諑謶诼踔酌鋜錣鐯鐲镯鷟]>zhuó; +丵>zhuǒ; +劅>zhuò; +[兹呲咨嗞姕姿孖孜孳孶嵫栥椔淄湽滋澬玆甾禌秶稵粢紎緇缁茊茲菑葘諮谘貲資赀资趦輜輺辎鄑鈭錙鍿鎡锱镃頾頿髭鯔鲻鴜鶅鶿鷀鹚鼒齍齜龇]>zī; +[㜽㧗㺭䔂䘣仔吇呰啙姉姊杍梓榟滓矷秄秭笫籽紫耔胏芓茈虸訾訿釨]>zǐ; +[㰣㰷㱴䅆䐉倳剚字恣扻渍漬牸眥眦胔胾自茡]>zì; +子>zi; +[倧堫宗嵏嵕嵸惾朡棕椶熧猣磫稯綜緃緵综翪腙艐葼蝬豵踨踪蹤鑁騌騣骔鬃鬉鬷鯮鯼]>zōng; +[㢔㷓㹅䰌偬傯总惣愡捴揔搃摠熜総縂總蓗]>zǒng; +[䍟䝋倊昮猔疭瘲碂粽糉糭縦縱纵]>zòng; +[媰掫棷棸箃緅菆諏謅诌诹邹郰鄒鄹陬騶驺鯫鲰黀齱齺]>zōu; +[走赱]>zǒu; +[㔿㵵䠫奏揍]>zòu; +[租蒩]>zū; +[㞺㰵㲞䅸䚝䯿䱣卆卒哫崒崪捽族稡足踤踿蹵鎐鏃镞]>zú; +[䔃䖕俎爼珇祖組组詛诅阻靻]>zǔ; +[躜躦鑽钻]>zuān; +[䂎䌣籫繤纂纉纘缵]>zuǎn; +[䤸揝攥鑚饡]>zuàn; +[厜嶉樶纗羧脧蟕]>zuī; +[䮔嘴噿嶊嶵洅濢璻觜]>zuǐ; +[㝡㠑㰎䘹晬最栬槜檇檌祽絊罪蕞襊辠酔酻醉鋷錊]>zuì; +[尊嶟樽繜罇遵鐏鷷]>zūn; +[僔噂壿撙譐]>zǔn; +[捘燇銌鱒鳟]>zùn; +嘬>zuō; +[㸲䎰䝫䞰昨椊琢秨稓筰葃鈼]>zuó; +[㝾佐咗唨左毑繓]>zuǒ; +[㑅㘀㘴㛗㭮䋏䔘作侳做唑坐夎岝岞座怍柞祚糳胙葄袏酢阼]>zuò; +# End RAW data for converting CJK characters + +# fallbacks + +## | yi < i; +## | wu < u; +## | bi < b; +## | ci < c; +## | di < d; +## | fu < f; +## | gu < g; +## | he < h; +## | ji < j; +## | ku < k; +## | li < l; +## | mi < m; +## | pi < p; +## | qi < q; +## | l < r; +## | si < s; +## | ti < t; +## | f < v; +## | wa < w; +## | xi < x; +## | yi < y; +## | zi < z; + +# filter out the half-width hangul +# :: [^\uFFBE-\uFFEE] fullwidth-halfwidth (); +## :: (lower) ; diff --git a/icu4c/source/data/translit/Han_Spacedhan.txt b/icu4c/source/data/translit/Han_Spacedhan.txt new file mode 100644 index 00000000000..85607b58e72 --- /dev/null +++ b/icu4c/source/data/translit/Han_Spacedhan.txt @@ -0,0 +1,24 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Only intended for internal use +:: fullwidth-halfwidth; + +。 > '.'; + +$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]]; +$initialPunct = [:Ps:][:Pi:]; + +# add space between any Han or terminal punctuation and letters, and +# between letters and Han or initial punct + +[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ; +[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ; + +# remove spacing between ideographs and other letters + + < [:Ideographic:] { ' ' } [:Letter:] ; + < [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ; + diff --git a/icu4c/source/data/translit/Hebrew_Latin.txt b/icu4c/source/data/translit/Hebrew_Latin.txt new file mode 100644 index 00000000000..4e0635cc0a0 --- /dev/null +++ b/icu4c/source/data/translit/Hebrew_Latin.txt @@ -0,0 +1,109 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Transliteration table for Hebrew +# Based on the UNGEGN table at: +# http://www.eki.ee/wgrs/rom1_he.pdf +# +# Exceptions: +# - Accents are added to disambiguate letters +# - Combinations of dagesh, shin/sin dot that produce different +# letters are not yet encoded. +# +# To test, open: +# http://oss.software.ibm.com/cgi-bin/icu/tr +# Click Edit, paste in this file, Save As hebrew-latin/XXX +# (where XXX is a username) +# Now go back to the main window, and try it out. +# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2 +# Paste in hebrew text in Input, and hit Transliterate. +# +# For more information, see" +# http://oss.software.ibm.com/icu/userguide/Transliteration.html + +:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ; +:: nfkd (nfc) ; +$letterAfter = [:M:]* [:L:] ; + +# move longer items here to avoid masking + +ח <> ẖ ; +צ <> ẕ } $letterAfter; +ץ <> ẕ ; +ש <> ş ; +ת <> ţ ; + +א <> ʼ ; +ב <> b ; +ג <> g ; +ד <> d ; +ה <> h ; +ו <> w ; +ז <> z ; +ט <> t ; +י <> y ; +כ <> k } $letterAfter; +ך <> k ; +ל <> l ; +מ <> m } $letterAfter; +ם <> m ; +נ <> n } $letterAfter; +ן <> n ; +ס <> s ; +ע <> ʻ ; +פ <> p } $letterAfter; +ף <> p ; +ק <> q ; +ר <> r ; + + װ > | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV + ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD + ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD + + +ּ <> ̇ ; # dagesh just goes to overdot for now +ׁ <> ̌ ; # shin dot -> sh +ׂ <> ̂ ; # sin dot -> s + +# points +$above = [^[:ccc=0:][:ccc=230:]]*; + +‎ֲ‎ > à ; +‎ֲ‎ $1< a ($above) ̀; + +‎ָ‎ > á ; +‎ָ‎ $1 < a ($above) ́; + +‎ֱ‎ > è ; +‎ֱ‎ $1 < e ($above) ̀; + +‎ֵ‎ > é ; +‎ֵ‎ $1 < e ($above) ́; + +‎ְ‎ > e ̆ ; +‎ְ‎ $1 < e ($above) ̆; + +‎ֹ‎ > ò ; +‎ֹ‎ $1 < o ($above) ̀; + +ִ <> i ; +ֻ <> u ; +ַ <> a ; +ֶ <> e ; +ֳ <> o ; + +\u05BF <> ̄ ; + +# fallbacks +ק < c ; +פ < f } $letterAfter; +ף < f ; +ז < j ; +ו < v ; +כס < x ; + +:: (lower); +:: nfc (nfd) ; +:: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]); \ No newline at end of file diff --git a/icu4c/source/data/translit/Hiragana_Katakana.txt b/icu4c/source/data/translit/Hiragana_Katakana.txt new file mode 100644 index 00000000000..78a97165311 --- /dev/null +++ b/icu4c/source/data/translit/Hiragana_Katakana.txt @@ -0,0 +1,207 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# note: a global filter is more efficient, but MUST include all source chars +:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ; +:: NFKC (); + +# Hiragana-Katakana + +# This is largely a one-to-one mapping, but it has a +# few kinks: + +# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no +# Hiragana equivalents. We use Hiragana wa/wi/we/wo +# (308F-3092) with a voicing mark (3099), which is +# semantically equivalent. However, this is a non- +# roundtripping transformation. + +# 2. The Katakana small ka/ke (30F5,30F6) have no +# Hiragana equiavlents. We convert them to normal +# Hiragana ka/ke (304B,3051). This is a one-way +# information-losing transformation and precludes +# round-tripping of 30F5 and 30F6. + +# 3. The combining marks 3099-309C are in the Hiragana +# block, but they apply to Katakana as well, so we +# leave them untouched. + +# 4. The Katakana prolonged sound mark 30FC doubles the +# preceding vowel. This is a one-way information- +# losing transformation from Katakana to Hiragana. + +# 5. The Katakana middle dot separates words in foreign +# expressions; we leave this unmodified. + +# The above points preclude successful round-trip +# transformations of arbitrary input text. However, +# they provide naturalistic results that should conform +# to user expectations. + + +# Combining equivalents va/vi/ve/vo +わ゙ <> ヷ; +ゐ゙ <> ヸ; +ゑ゙ <> ヹ; +を゙ <> ヺ; + +# One-to-one mappings, main block +# 3041:3094 <> 30A1:30F4 +# 309D,E <> 30FD,E +ぁ <> ァ; +あ <> ア; +ぃ <> ィ; +い <> イ; +ぅ <> ゥ; +う <> ウ; +ぇ <> ェ; +え <> エ; +ぉ <> ォ; +お <> オ; +か <> カ; +が <> ガ; +き <> キ; +ぎ <> ギ; +く <> ク; +ぐ <> グ; +け <> ケ; +げ <> ゲ; +こ <> コ; +ご <> ゴ; +さ <> サ; +ざ <> ザ; +し <> シ; +じ <> ジ; +す <> ス; +ず <> ズ; +せ <> セ; +ぜ <> ゼ; +そ <> ソ; +ぞ <> ゾ; +た <> タ; +だ <> ダ; +ち <> チ; +ぢ <> ヂ; +っ <> ッ; +つ <> ツ; +づ <> ヅ; +て <> テ; +で <> デ; +と <> ト; +ど <> ド; +な <> ナ; +に <> ニ; +ぬ <> ヌ; +ね <> ネ; +の <> ノ; +は <> ハ; +ば <> バ; +ぱ <> パ; +ひ <> ヒ; +び <> ビ; +ぴ <> ピ; +ふ <> フ; +ぶ <> ブ; +ぷ <> プ; +へ <> ヘ; +べ <> ベ; +ぺ <> ペ; +ほ <> ホ; +ぼ <> ボ; +ぽ <> ポ; +ま <> マ; +み <> ミ; +む <> ム; +め <> メ; +も <> モ; +ゃ <> ャ; +や <> ヤ; +ゅ <> ュ; +ゆ <> ユ; +ょ <> ョ; +よ <> ヨ; +ら <> ラ; +り <> リ; +る <> ル; +れ <> レ; +ろ <> ロ; +ゎ <> ヮ; +わ <> ワ; +ゐ <> ヰ; +ゑ <> ヱ; +を <> ヲ; +ん <> ン; +ゔ <> ヴ; +ゝ <> ヽ; +ゞ <> ヾ; + +# One-way Katakana-Hiragana xform of small K ka/ke to +# normal H ka/ke. +か < ヵ; +け < ヶ; + +# Katakana followed by a prolonged sound mark 30FC has +# its final vowel doubled. This is a Katakana-Hiragana +# one-way information-losing transformation. We +# include the small Katakana (e.g., small A 3041) and +# do not distinguish them from their large +# counterparts. It doesn't make sense to double a +# small counterpart vowel as a small Hiragana vowel, so +# we don't do so. In natural text this should never +# occur anyway. If a 30FC is seen without a preceding +# vowel sound (e.g., after n 30F3) we do not change it. + +### $long = ー; + +# The following categories are Hiragana, not Katakana +# as might be expected, since by the time we get to the +# 30FC, the preceding character will have already been +# transformed to Hiragana. + +# {The following mechanically generated from the +# Unicode 3.0 data:} + +$xa = [ \ +ぁ あ か が さ ざ \ +た だ な は ば ぱ \ +ま ゃ や ら ゎ わ \ +]; + +$xi = [ \ +ぃ い き ぎ し じ \ +ち ぢ に ひ び ぴ \ +み り ゐ \ +]; + +$xu = [ \ +ぅ う く ぐ す ず \ +っ つ づ ぬ ふ ぶ \ +ぷ む ゅ ゆ る ゔ \ +]; + +$xe = [ \ +ぇ え け げ せ ぜ \ +て で ね へ べ ぺ \ +め れ ゑ \ +]; + +$xo = [ \ +ぉ お こ ご そ ぞ \ +と ど の ほ ぼ ぽ \ +も ょ よ ろ を \ +]; + +あ < $xa {ー}; +い < $xi {ー}; +う < $xu {ー}; +え < $xe {ー}; +お < $xo {ー}; + +:: (NFKC) ; + +# note: a global filter is more efficient, but MUST include all source chars!! +:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]); + +# eof diff --git a/icu4c/source/data/translit/Hiragana_Latin.txt b/icu4c/source/data/translit/Hiragana_Latin.txt new file mode 100644 index 00000000000..ed0688173c1 --- /dev/null +++ b/icu4c/source/data/translit/Hiragana_Latin.txt @@ -0,0 +1,14 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +:: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ; +:: NFD ; + +:: Hiragana-Katakana; +:: Katakana-Latin; + +:: NFC ; +:: (Lower) ; +:: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ; diff --git a/icu4c/source/data/translit/InterIndic_Bengali.txt b/icu4c/source/data/translit/InterIndic_Bengali.txt new file mode 100644 index 00000000000..5f31228b9c7 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Bengali.txt @@ -0,0 +1,147 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Bengali +#:: NFD (NFC) ; +\uE001>\u0981; # SIGN CANDRABINDU +\uE002>\u0982; # SIGN ANUSVARA +\uE003>\u0983; # SIGN VISARGA +\uE004>\u0985; # FALLBACK TO LETTER A +\uE005>\u0985; # LETTER A +\uE006>\u0986; # LETTER AA +\uE007>\u0987; # LETTER I +\uE008>\u0988; # LETTER II +\uE009>\u0989; # LETTER U +\uE00A>\u098A; # LETTER UU +\uE00B>\u098B; # LETTER VOCALIC R +\uE00C>\u098C; # LETTER VOCALIC L +\uE00D>\u098F; # FALLBACK +\uE00E>\u098F; # FALLBACK +\uE00F>\u098F; # LETTER E +\uE010>\u0990; # LETTER AI +\uE011>\u0993; # FALLBACK +\uE012>\u0993; # FALLBACK +\uE013>\u0993; # LETTER O +\uE014>\u0994; # LETTER AU +\uE015>\u0995; # LETTER KA +\uE016>\u0996; # LETTER KHA +\uE017>\u0997; # LETTER GA +\uE018>\u0998; # LETTER GHA +\uE019>\u0999; # LETTER NGA +\uE01A>\u099A; # LETTER CA +\uE01B>\u099B; # LETTER CHA +\uE01C>\u099C; # LETTER JA +\uE01D>\u099D; # LETTER JHA +\uE01E>\u099E; # LETTER NYA +\uE01F>\u099F; # LETTER TTA +\uE020>\u09A0; # LETTER TTHA +\uE021>\u09A1; # LETTER DDA +\uE022>\u09A2; # LETTER DDHA +\uE023>\u09A3; # LETTER NNA +\uE024>\u09A4; # LETTER TA +\uE025>\u09A5; # LETTER THA +\uE026>\u09A6; # LETTER DA +\uE027>\u09A7; # LETTER DHA +\uE028>\u09A8; # LETTER NA +\uE029>\u09A8\u09BC; # REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA +\uE02A>\u09AA; # LETTER PA +\uE02B>\u09AB; # LETTER PHA +\uE02C>\u09AC; # LETTER BA +\uE02D>\u09AD; # LETTER BHA +\uE02E>\u09AE; # LETTER MA +\uE02F>\u09AF; # LETTER YA +\uE030>\u09B0; # LETTER RA +\uE031>\u09B0\u09BC; # FALLBACK to RA +\uE032>\u09B2; # LETTER LA +\uE033>\u09B2; # REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA +\uE034>\u09B2; # REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA +\uE035>\u09AC; # REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA +\uE036>\u09B6; # LETTER SHA +\uE037>\u09B7; # LETTER SSA +\uE038>\u09B8; # LETTER SA +\uE039>\u09B9; # LETTER HA +\uE03C>\u09BC; # SIGN NUKTA +\uE03D>\u09bd; # SIGN AVAGRAHA +\uE03E>\u09BE; # VOWEL SIGN AA +\uE03F>\u09BF; # VOWEL SIGN I +\uE040>\u09C0; # VOWEL SIGN II +\uE041>\u09C1; # VOWEL SIGN U +\uE042>\u09C2; # VOWEL SIGN UU +\uE043>\u09C3; # VOWEL SIGN VOCALIC R +\uE044>\u09C4; # VOWEL SIGN VOCALIC RR +\uE045>\u09C7; # REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E +\uE046>\u09C7; # FALLBACK +\uE047>\u09C7; # VOWEL SIGN E +\uE048>\u09C8; # VOWEL SIGN AI +\uE049>\u09C7\u09BE; # REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O +\uE04A>\u09C7\u09BE; # FALLBACK +\uE04B>\u09C7\u09BE; # VOWEL SIGN O +\uE04C>\u09C7\u09D7; # VOWEL SIGN AU +\uE04D>\u09CD; # SIGN VIRAMA +\uE050>\u0993\u0982; # InterIndic-Bengali: OM +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\uE055>; # LENGTH MARK +\uE056>\u09C8; # REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI +\uE057>\u09D7; # AU LENGTH MARK +\uE058>\u0995\u09BC; # FALLBACK +\uE059>\u0996\u09BC; # REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA +\uE05A>\u0997\u09BC; # REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA +\uE05B>\u099C\u09BC; # REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA +\uE05C>\u09A1\u09BC; # FALLBACK +\uE05D>\u09A2\u09BC; # LETTER RHA +\uE05E>\u09AB\u09BC; # REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA +\uE05F>\u09AF\u09BC; # LETTER YYA +\uE060>\u09E0; # LETTER VOCALIC RR +\uE061>\u09E1; # LETTER VOCALIC LL +\uE062>\u09E2; # VOWEL SIGN VOCALIC L +\uE063>\u09E3; # VOWEL SIGN VOCALIC LL +\uE064>\u0964; # DANDA +\uE065>\u0965; # DOUBLE DANDA +\uE066>\u09E6; # DIGIT ZERO +\uE067>\u09E7; # DIGIT ONE +\uE068>\u09E8; # DIGIT TWO +\uE069>\u09E9; # DIGIT THREE +\uE06A>\u09EA; # DIGIT FOUR +\uE06B>\u09EB; # DIGIT FIVE +\uE06C>\u09EC; # DIGIT SIX +\uE06D>\u09ED; # DIGIT SEVEN +\uE06E>\u09EE; # DIGIT EIGHT +\uE06F>\u09EF; # DIGIT NINE +\ue070>; # ABBREVIATION SIGN +\ue071>\u09F0; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u09F1; # LETTER RA WITH LOWER DIAGONAL +\ue073>\u09F2; # RUPEE MARK +\ue074>\u09F3; # RUPEE SIGN +\ue075>\u09F4; # CURRENCY NUMERATOR ONE +\ue076>\u09F5; # CURRENCY NUMERATOR TWO +\ue077>\u09F6; # CURRENCY NUMERATOR THREE +\ue078>\u09F7; # CURRENCY NUMERATOR FOUR +\ue079>\u09F8; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>\u09F9; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>\u09FA; # ISSHAR +\uE07C>; # TIPPI +\uE07D>; # ADDAK +\uE07E>; # IRI +\uE07F>; # URA +\uE080>; # EK ONKAR +\uE081>\u09AC; # FALLBACK FOR ORIYA LETTER WA +0 > \u09E6; # FALLBACK FOR TAMIL +1 > \u09E7; + + + + + + + + + + + +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Devanagari.txt b/icu4c/source/data/translit/InterIndic_Devanagari.txt new file mode 100644 index 00000000000..5b921c4f19b --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Devanagari.txt @@ -0,0 +1,158 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Devanagari +#:: NFD (NFC) ; +#Rules for Decomposed characters + \ue028\ue03c > \u0929; #\ue029 + \ue030\ue03c > \u0931; #\ue031 + \ue033\ue03c > \u0934; #\ue034 + \ue015\ue03c > \u0958; #\ue058 LETTER QA (For Urdu) + \ue016\ue03c > \u0959; #\ue059 LETTER KHHA (For Urdu) + \ue017\ue03c > \u095a; #\ue05a LETTER GHHA (For Urdu) + \ue01c\ue03c > \u095b; #\ue05b LETTER ZA (For Urdu) + \ue021\ue03c > \u095c; #\ue05c LETTER DDDHA (pronounced RRA) + \ue022\ue03c > \u095d; #\ue05d LETTER RHA (pronounced RRHA) + \ue02b\ue03c > \u095e; #\ue05e LETTER FA + \ue02f\ue03c > \u095f; #\ue05f LETTER YYA + + #Decomposed compatibility transliterations + \ue012\ue057>\u0914; # FALLBACK FOR TAMIL AU + 0 > \u0966; # FALLBACK FOR TAMIL + 1 > \u0967; + + \ue055>; # FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK + \ue056>; # FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK + \ue057>; # FALLBACK BLOW AWAY TAMIL AU LENGTH MARK + + \ue001 > \u0901; # SIGN CANDRABINDU + \ue002 > \u0902; # SIGN ANUSVARA + \ue003 > \u0903; # SIGN VISARGA + \ue004 > \u0904; # SIGN SHORT A + \ue005 > \u0905; # LETTER A + \ue006 > \u0906; # LETTER AA + \ue007 > \u0907; # LETTER I + \ue008 > \u0908; # LETTER II + \ue009 > \u0909; # LETTER U + \ue00a > \u090a; # LETTER UU + \ue00b > \u090b; # LETTER VOCALIC R + \ue00c > \u090c; # LETTER VOCALIC L + \ue00d > \u090d; # LETTER CANDRA E (For representing English sounds) + \ue00e > \u090e; # LETTER SHORT E(For Southern Scripts) + \ue00f > \u090f; # LETTER E + \ue010 > \u0910; # LETTER AI + \ue011 > \u0911; # LETTER CANDRA O (For representing English sounds) + \ue012 > \u0912; # LETTER SHORT O (For Southern Scripts) + \ue013 > \u0913; # LETTER O + \ue014 > \u0914; # LETTER AU + \ue015 > \u0915; # LETTER KA + \ue016 > \u0916; # LETTER KHA + \ue017 > \u0917; # LETTER GA + \ue018 > \u0918; # LETTER GHA + \ue019 > \u0919; # LETTER NGA + \ue01a > \u091a; # LETTER CA + \ue01b > \u091b; # LETTER CHA + \ue01c > \u091c; # LETTER JA + \ue01d > \u091d; # LETTER JHA + \ue01e > \u091e; # LETTER NYA + \ue01f > \u091f; # LETTER TTA + \ue020 > \u0920; # LETTER TTHA + \ue021 > \u0921; # LETTER DDA + \ue022 > \u0922; # LETTER DDHA + \ue023 > \u0923; # LETTER NNA + \ue024 > \u0924; # LETTER TA + \ue025 > \u0925; # LETTER THA + \ue026 > \u0926; # LETTER DA + \ue027 > \u0927; # LETTER DHA + \ue028 > \u0928; # LETTER NA + \ue029 > \u0929; # LETTER NNNA + \ue02a > \u092a; # LETTER PA + \ue02b > \u092b; # LETTER PHA + \ue02c > \u092c; # LETTER BA + \ue02d > \u092d; # LETTER BHA + \ue02e > \u092e; # LETTER MA + \ue02f > \u092f; # LETTER YA + \ue030 > \u0930; # LETTER RA + \ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts) + #\ue031 > \u0930; + \ue032 > \u0932; # LETTER LA + \ue033 > \u0933; # LETTER LLA + \ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts) + #\ue034 > \u0933; + \ue035 > \u0935; # LETTER VA + \ue036 > \u0936; # LETTER SHA + \ue037 > \u0937; # LETTER SSA + \ue038 > \u0938; # LETTER SA + \ue039 > \u0939; # LETTER HA + \ue03c > \u093c; # SIGN NUKTA + \ue03d > \u093d; # SIGN AVAGRAHA + \ue03e > \u093e; # VOWEL SIGN AA + \ue03f > \u093f; # VOWEL SIGN I + \ue040 > \u0940; # VOWEL SIGN II + \ue041 > \u0941; # VOWEL SIGN U + \ue042 > \u0942; # VOWEL SIGN UU + \ue043 > \u0943; # VOWEL SIGN VOCALIC R + \ue044 > \u0944; # VOWEL SIGN VOCALIC RR + \ue045 > \u0945; # VOWEL SIGN CANDRA E + \ue046 > \u0946; # VOWEL SIGN SHORT E + \ue047 > \u0947; # VOWEL SIGN E + \ue048 > \u0948; # VOWEL SIGN AI + \ue049 > \u0949; # VOWEL SIGN CANDRA O + \ue04a > \u094a; # VOWEL SIGN SHORT O + \ue04b > \u094b; # VOWEL SIGN O + \ue04c > \u094c; # VOWEL SIGN AU + \ue04d > \u094d; # SIGN VIRAMA + \ue050 > \u0950; # OM + \ue051 > \u0951; # STRESS SIGN UDATTA + \ue052 > \u0952; # STRESS SIGN ANUDATTA + \ue053 > \u0953; # GRAVE ACCENT + \ue054 > \u0954; # ACUTE ACCENT + \ue058 > \u0958; # LETTER QA (For Urdu) + \ue059 > \u0959; # LETTER KHHA (For Urdu) + \ue05a > \u095a; # LETTER GHHA (For Urdu) + \ue05b > \u095b; # LETTER ZA (For Urdu) + \ue05c > \u095c; # LETTER DDDHA (pronounced RRA) + \ue05d > \u095d; # LETTER RHA (pronounced RRHA) + \ue05e > \u095e; # LETTER FA + \ue05f > \u095f; # LETTER YYA + \ue060 > \u0960; # LETTER VOCALIC RR + \ue061 > \u0961; # LETTER VOCALIC LL + \ue062 > \u0962; # VOWEL SIGN VOCALIC L + \ue063 > \u0963; # VOWEL SIGN VOCALIC LL + \ue064 > \u0964; # DANDA + \ue065 > \u0965; # DOUBLE DANDA + \ue066 > \u0966; # DIGIT ZERO + \ue067 > \u0967; # DIGIT ONE + \ue068 > \u0968; # DIGIT TWO + \ue069 > \u0969; # DIGIT THREE + \ue06a > \u096a; # DIGIT FOUR + \ue06b > \u096b; # DIGIT FIVE + \ue06c > \u096c; # DIGIT SIX + \ue06d > \u096d; # DIGIT SEVEN + \ue06e > \u096e; # DIGIT EIGHT + \ue06f > \u096f; # DIGIT NINE + + \ue070>\u0970; # ABBREVIATION SIGN + \ue071>\u0930; # LETTER RA WITH MIDDLE DIAGONAL + \ue072>\u0930; # LETTER RA WITH LOWER DIAGONAL + \ue073>; # RUPEE MARK + \ue074>\u0930\u0942; # RUPEE SIGN + \ue075>; # CURRENCY NUMERATOR ONE + \ue076>; # CURRENCY NUMERATOR TWO + \ue077>; # CURRENCY NUMERATOR THREE + \ue078>; # CURRENCY NUMERATOR FOUR + \ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR + \ue07A>; # CURRENCY DENOMINATOR SIXTEEN + \ue07B>; # ISSHAR + \uE07C>; # TIPPI + \uE07D>; # ADDAK + \uE07E>; # IRI + \uE07F>; # URA + \uE080>; # EK ONKAR + \uE081>\u0935; # FALLBACK FOR ORIYA LETTER WA + +# \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN +# :: NFC; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Gujarati.txt b/icu4c/source/data/translit/InterIndic_Gujarati.txt new file mode 100644 index 00000000000..92382453213 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Gujarati.txt @@ -0,0 +1,138 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Gujarati +#:: NFD (NFC) ; +\ue001>\u0a81; # SIGN CANDRABINDU +\ue002>\u0a82; # SIGN ANUSVARA +\ue003>\u0a83; # SIGN VISARGA +\uE004>\u0a85; # FALLBACK TO LETTER A +\ue005>\u0a85; # LETTER A +\ue006>\u0a86; # LETTER AA +\ue007>\u0a87; # LETTER I +\ue008>\u0a88; # LETTER II +\ue009>\u0a89; # LETTER U +\ue00a>\u0a8a; # LETTER UU +\ue00b>\u0a8b; # LETTER VOCALIC R +\ue00c>\u0a8c; # LETTER VOCALIC L +\ue00d>\u0a8d; # GUJARATI VOWEL CANDRA E +\ue00e>\u0a8f; # FALLBACK +\ue00f>\u0a8f; # InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E) +\ue010>\u0a90; # LETTER AI +\ue011>\u0a91; # FALLBACK +\ue012>\u0a93; # FALLBACK +\ue013>\u0a93; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O) +\ue014>\u0a94; # LETTER AU +\ue015>\u0a95; # LETTER KA +\ue016>\u0a96; # LETTER KHA +\ue017>\u0a97; # LETTER GA +\ue018>\u0a98; # LETTER GHA +\ue019>\u0a99; # LETTER NGA +\ue01a>\u0a9a; # LETTER CA +\ue01b>\u0a9b; # LETTER CHA +\ue01c>\u0a9c; # LETTER JA +\ue01d>\u0a9d; # LETTER JHA +\ue01e>\u0a9e; # LETTER NYA +\ue01f>\u0a9f; # LETTER TTA +\ue020>\u0aa0; # LETTER TTHA +\ue021>\u0aa1; # LETTER DDA +\ue022>\u0aa2; # LETTER DDHA +\ue023>\u0aa3; # LETTER NNA +\ue024>\u0aa4; # LETTER TA +\ue025>\u0aa5; # LETTER THA +\ue026>\u0aa6; # LETTER DA +\ue027>\u0aa7; # LETTER DHA +\ue028>\u0aa8; # LETTER NA +\ue029>\u0aa8\u0abc; # FALLBACK to NA+NUKTA +\ue02a>\u0aaa; # LETTER PA +\ue02b>\u0aab; # LETTER PHA +\ue02c>\u0aac; # LETTER BA +\ue02d>\u0aad; # LETTER BHA +\ue02e>\u0aae; # LETTER MA +\ue02f>\u0aaf; # LETTER YA +\ue030>\u0ab0; # LETTER RA +\ue031>\u0ab0\u0abc; # FALLBACK +\ue032>\u0ab2; # LETTER LA +\ue033>\u0ab3; # LETTER LLA +\ue034>\u0ab3\u0abc; # LETTER LLLA>LETTER LLA+NUKTA +\ue035>\u0ab5; # LETTER VA +\ue036>\u0ab6; # LETTER SHA +\ue037>\u0ab7; # LETTER SSA +\ue038>\u0ab8; # LETTER SA +\ue039>\u0ab9; # LETTER HA +\ue03c>\u0abc; # SIGN NUKTA +\ue03d>\u0abd; # SIGN AVAGRAHA +\ue03e>\u0abe; # VOWEL SIGN AA +\ue03f>\u0abf; # VOWEL SIGN I +\ue040>\u0ac0; # VOWEL SIGN II +\ue041>\u0ac1; # VOWEL SIGN U +\ue042>\u0ac2; # VOWEL SIGN UU +\ue043>\u0ac3; # VOWEL SIGN VOCALIC R +\ue044>\u0ac4; # VOWEL SIGN VOCALIC RR +\ue045>\u0ac5; # VOWEL SIGN CANDRA E +\ue046>\u0ac7; # FALLBACK +\ue047>\u0ac7; # InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E) +\ue048>\u0ac8; # VOWEL SIGN AI +\ue049>\u0ac9; # VOWEL SIGN CANDRA O +\ue04a>\u0acb; # FALLBACK +\ue04b>\u0acb; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O) +\ue04c>\u0acc; # VOWEL SIGN AU +\ue04d>\u0acd; # SIGN VIRAMA +\ue050>\u0ad0; # OM +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK +\ue056>\u0ac8; # REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI +\ue057>\u0acc; # REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU +\ue058>\u0a95\u0abc; # FALLBACK +\ue059>\u0a96\u0abc; # REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA +\ue05a>\u0a97\u0abc; # REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA +\ue05b>\u0a9c\u0abc; # REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA +\ue05c>\u0aa1\u0abc; # FALLBACK +\ue05d>\u0aa2\u0abc; # REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA +\ue05e>\u0aab\u0abc; # REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA +\ue05f>\u0aaf\u0abc; # REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA +\ue060>\u0ae0; # LETTER VOCALIC RR +\ue061>\u0ae1; # LETTER VOCALIC LL +\ue062>\u0abf\u0abc; # REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA +\ue063>\u0ac0\u0abc; # REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA +\uE064>\u0964; # DANDA +\uE065>\u0965; # DOUBLE DANDA +\ue066>\u0ae6; # DIGIT ZERO +\ue067>\u0ae7; # DIGIT ONE +\ue068>\u0ae8; # DIGIT TWO +\ue069>\u0ae9; # DIGIT THREE +\ue06a>\u0aea; # DIGIT FOUR +\ue06b>\u0aeb; # DIGIT FIVE +\ue06c>\u0aec; # DIGIT SIX +\ue06d>\u0aed; # DIGIT SEVEN +\ue06e>\u0aee; # DIGIT EIGHT +\ue06f>\u0aef; # DIGIT NINE +\ue070>; # ABBREVIATION SIGN +\ue071>\u0ab0; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u0ab0; # LETTER RA WITH LOWER DIAGONAL +\ue073>; # RUPEE MARK +\ue074>; # RUPEE SIGN +\ue075>; # CURRENCY NUMERATOR ONE +\ue076>; # CURRENCY NUMERATOR TWO +\ue077>; # CURRENCY NUMERATOR THREE +\ue078>; # CURRENCY NUMERATOR FOUR +\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>; # ISSHAR +\uE07C>; # TIPPI +\uE07D>; # ADDAK +\uE07E>; # IRI +\uE07F>; # URA +\uE080>; # EK ONKAR +\uE081>\u0ab5; # FALLBACK FOR ORIYA LETTER WA +0 > \u0ae6; # FALLBACK FOR TAMIL +1 > \u0ae7; + +#\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Gurmukhi.txt b/icu4c/source/data/translit/InterIndic_Gurmukhi.txt new file mode 100644 index 00000000000..a2236da96a0 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Gurmukhi.txt @@ -0,0 +1,147 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Gurmukhi +#:: NFD (NFC) ; +$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D]; +$consonant = [\u0A15-\u0A39]; + +\ue001>\u0a01; # SIGN CHANDRABINDU +#rules for BINDI + +# Anusvara is equivalent to BINDI when preceeded by a vowel +$vowel{\ue002>\u0a02; # SIGN ANUSVARA (\u0a02 = SIGN BINDI) +# else is equivalent to TIPPI +$consonant{\ue002>\u0a70; # SIGN TIPPI +\ue002>\u0a02; + +\ue003>; # FALLBACK BLOW AWAY SIGN VISARGA +\uE004>\u0a05; # FALLBACK TO LETTER A +\ue005>\u0a05; # LETTER A +\ue006>\u0a06; # LETTER AA +\ue007>\u0a07; # LETTER I +\ue008>\u0a08; # LETTER II +\ue009>\u0a09; # LETTER U +\ue00a>\u0a0a; # LETTER UU +\ue00b>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I +\ue00c>\u0a33; # FALLBACK +\ue00d>\u0a0f; # FALLBACK +\ue00e>\u0a0f; # FALLBACK +\ue00f>\u0a0f; # LETTER EE +\ue010>\u0a10; # LETTER AI +\ue011>\u0a13; # FALLBACK +\ue012>\u0a13; # FALLBACK +\ue013>\u0a13; # LETTER OO +\ue014>\u0a14; # LETTER AU +\ue015>\u0a15; # LETTER KA +\ue016>\u0a16; # LETTER KHA +\ue017>\u0a17; # LETTER GA +\ue018>\u0a18; # LETTER GHA +\ue019>\u0a19; # LETTER NGA +\ue01a>\u0a1a; # LETTER CA +\ue01b>\u0a1b; # LETTER CHA +\ue01c>\u0a1c; # LETTER JA +\ue01d>\u0a1d; # LETTER JHA +\ue01e>\u0a1e; # LETTER NYA +\ue01f>\u0a1f; # LETTER TTA +\ue020>\u0a20; # LETTER TTHA +\ue021>\u0a21; # LETTER DDA +\ue022>\u0a22; # LETTER DDHA +\ue023>\u0a23; # LETTER NNA +\ue024>\u0a24; # LETTER TA +\ue025>\u0a25; # LETTER THA +\ue026>\u0a26; # LETTER DA +\ue027>\u0a27; # LETTER DHA +\ue028>\u0a28; # LETTER NA +\ue029>\u0a28\u0a3c; # REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA +\ue02a>\u0a2a; # LETTER PA +\ue02b>\u0a2b; # LETTER PHA +\ue02c>\u0a2c; # LETTER BA +\ue02d>\u0a2d; # LETTER BHA +\ue02e>\u0a2e; # LETTER MA +\ue02f>\u0a2f; # LETTER YA +\ue030>\u0a30; # LETTER RA +\ue031>\u0a30\u0a3c; # FALLBACK LETTER RA+NUKTA +\ue032>\u0a32; # LETTER LA +\ue033>\u0a33; # LETTER LLA +\ue034>\u0a33; # REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA +\ue035>\u0a35; # LETTER VA +\ue036>\u0a36; # LETTER SHA +\ue037>\u0a36; # REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA +\ue038>\u0a38; # LETTER SA +\ue039>\u0a39; # LETTER HA +\ue03c>\u0a3c; # SIGN NUKTA +\ue03d>; # FALLBACK BLOW AWAY SIGN AVAGRAHA +\ue03e>\u0a3e; # VOWEL SIGN AA +\ue03f>\u0a3f; # VOWEL SIGN I +\ue040>\u0a40; # VOWEL SIGN II +\ue041>\u0a41; # VOWEL SIGN U +\ue042>\u0a42; # VOWEL SIGN UU +\ue043>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R +\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR +\ue045>\u0a48; # REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI +\ue046>\u0a47; # FALLABCK +\ue047>\u0a47; # VOWEL SIGN EE +\ue048>\u0a48; # VOWEL SIGN AI +\ue049>\u0a4c; # REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU +\ue04a>\u0a4b; # FALLBACK +\ue04b>\u0a4b; # VOWEL SIGN OO +\ue04c>\u0a4c; # VOWEL SIGN AU +\ue04d>\u0a4d; # SIGN VIRAMA +\ue050>\u0a0f\u0a02; # FALLBACK to OO+BINDI : OM +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\ue055>; # FALLBACK BLOW AWAY LENGTH MARK +\ue056>\u0a48; # REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI +\ue057>\u0a4c; # REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU +\ue058>\u0a15\u0a3c; # FALLBACK RA+ NUKTA +\ue059>\u0a59; # LETTER KHHA +\ue05a>\u0a5a; # LETTER GHHA +\ue05b>\u0a5b; # LETTER ZA +\ue05c>\u0a5c; # LETTER RRA +\ue05d>\u0a22\u0a3c; # REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA +\ue05e>\u0a5e; # LETTER FA +\ue05f>\u0a2f\u0a3c; # REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA +\ue060>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I +\ue061>\u0a32\u0a3c; # +\ue062>\u0a3f\u0a3c; # REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA +\ue063>\u0a40\u0a3c; # REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA +\uE064>\u0964; # DANDA +\uE065>\u0965; # DOUBLE DANDA +\ue066>\u0a66; # DIGIT ZERO +\ue067>\u0a67; # DIGIT ONE +\ue068>\u0a68; # DIGIT TWO +\ue069>\u0a69; # DIGIT THREE +\ue06a>\u0a6a; # DIGIT FOUR +\ue06b>\u0a6b; # DIGIT FIVE +\ue06c>\u0a6c; # DIGIT SIX +\ue06d>\u0a6d; # DIGIT SEVEN +\ue06e>\u0a6e; # DIGIT EIGHT +\ue06f>\u0a6f; # DIGIT NINE +\ue070>; # ABBREVIATION SIGN +\ue071>\u0a30; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u0a30; # LETTER RA WITH LOWER DIAGONAL +\ue073>; # RUPEE MARK +\ue074>; # RUPEE SIGN +\ue075>; # CURRENCY NUMERATOR ONE +\ue076>; # CURRENCY NUMERATOR TWO +\ue077>; # CURRENCY NUMERATOR THREE +\ue078>; # CURRENCY NUMERATOR FOUR +\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>; # ISSHAR +\uE07C>\u0a70; # TIPPI +\uE07D>\u0a71; # ADDAK +\uE07E>\u0a72; # IRI +\uE07F>\u0a73; # URA +\uE080>\u0a74; # EK ONKAR +\uE081>\u0a35; # FALLBACK FOR ORIYA LETTER WA + +0 > \u0a66; # FALLBACK FOR TAMIL +1 > \u0a67; +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Kannada.txt b/icu4c/source/data/translit/InterIndic_Kannada.txt new file mode 100644 index 00000000000..18ea84ae2bc --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Kannada.txt @@ -0,0 +1,141 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Kannada +#:: NFD (NFC) ; +\ue033\ue03c>\u0cde; # LETTER FA +\ue001>\u0c82; # REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA +\ue002>\u0c82; # SIGN ANUSVARA +\ue003>\u0c83; # SIGN VISARGA +\uE004>\u0c85; # FALLBACK TO LETTER A +\ue005>\u0c85; # LETTER A +\ue006>\u0c86; # LETTER AA +\ue007>\u0c87; # LETTER I +\ue008>\u0c88; # LETTER II +\ue009>\u0c89; # LETTER U +\ue00a>\u0c8a; # LETTER UU +\ue00b>\u0c8b; # LETTER VOCALIC R +\ue00c>\u0c8c; # LETTER VOCALIC L +\ue00d>\u0c8e; # LETTER E +\ue00e>\u0c8e; # FALLBACK +\ue00f>\u0c8f; # LETTER EE +\ue010>\u0c90; # LETTER AI +\ue011>\u0c92; # FALLBACK +\ue012>\u0c92; # LETTER O +\ue013>\u0c93; # LETTER OO +\ue014>\u0c94; # LETTER AU +\ue015>\u0c95; # LETTER KA +\ue016>\u0c96; # LETTER KHA +\ue017>\u0c97; # LETTER GA +\ue018>\u0c98; # LETTER GHA +\ue019>\u0c99; # LETTER NGA +\ue01a>\u0c9a; # LETTER CA +\ue01b>\u0c9b; # LETTER CHA +\ue01c>\u0c9c; # LETTER JA +\ue01d>\u0c9d; # LETTER JHA +\ue01e>\u0c9e; # LETTER NYA +\ue01f>\u0c9f; # LETTER TTA +\ue020>\u0ca0; # LETTER TTHA +\ue021>\u0ca1; # LETTER DDA +\ue022>\u0ca2; # LETTER DDHA +\ue023>\u0ca3; # LETTER NNA +\ue024>\u0ca4; # LETTER TA +\ue025>\u0ca5; # LETTER THA +\ue026>\u0ca6; # LETTER DA +\ue027>\u0ca7; # LETTER DHA +\ue028>\u0ca8; # LETTER NA +\ue029>\u0ca8; # REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA +\ue02a>\u0caa; # LETTER PA +\ue02b>\u0cab; # LETTER PHA +\ue02c>\u0cac; # LETTER BA +\ue02d>\u0cad; # LETTER BHA +\ue02e>\u0cae; # LETTER MA +\ue02f>\u0caf; # LETTER YA +\ue030\ue03c>\u0cb1; +\ue030>\u0cb0; # LETTER RA +\ue031>\u0cb1; # LETTER RRA +\ue032>\u0cb2; # LETTER LA +\ue033>\u0cb3; # LETTER LLA +\ue034>\u0cde; # REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA +\ue035>\u0cb5; # LETTER VA +\ue036>\u0cb6; # LETTER SHA +\ue037>\u0cb7; # LETTER SSA +\ue038>\u0cb8; # LETTER SA +\ue039>\u0cb9; # LETTER HA + +\ue03c>\u0cbc; # NUKTA +\ue03d>\u0cbd; # AVAGRAHA + +\ue03e>\u0cbe; # VOWEL SIGN AA +\ue03f>\u0cbf; # VOWEL SIGN I +\ue040>\u0cc0; # VOWEL SIGN II +\ue041>\u0cc1; # VOWEL SIGN U +\ue042>\u0cc2; # VOWEL SIGN UU +\ue043>\u0cc3; # VOWEL SIGN VOCALIC R +\ue044>\u0cc4; # VOWEL SIGN VOCALIC RR +\ue045>\u0cc6; # REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E +\ue046>\u0cc6; # VOWEL SIGN E +\ue047>\u0cc7; # VOWEL SIGN EE +\ue048>\u0cc8; # VOWEL SIGN AI +\ue049>\u0cca; # REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O +\ue04a>\u0cca; # VOWEL SIGN O +\ue04b>\u0ccb; # VOWEL SIGN OO +\ue04c>\u0ccc; # VOWEL SIGN AU +\ue04d>\u0ccd; # SIGN VIRAMA +\ue050>\u0c93\u0c82; # REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\ue055>\u0cd5; # LENGTH MARK +\ue056>\u0cd6; # AI LENGTH MARK +\ue057>\u0ccc; # REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU +\ue058>\u0c95; # FALLBACK +\ue059>\u0c96; # REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA +\ue05a>\u0c97; # REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA +\ue05b>\u0c9c; # REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA +\ue05c>\u0ca2; # FALLBACK +\ue05d>\u0ca2; # REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA +\ue05e>\u0cde; # LETTER FA +\ue05f>\u0caf; # REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA +\ue060>\u0ce0; # LETTER VOCALIC RR +\ue061>\u0ce1; # LETTER VOCALIC LL +\ue062>\u0cbf; # REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I +\ue063>\u0cc0; # REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II +\ue064>'.' ; # FALLBACK FOR DANDA +\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA +\ue066>\u0ce6; # DIGIT ZERO +\ue067>\u0ce7; # DIGIT ONE +\ue068>\u0ce8; # DIGIT TWO +\ue069>\u0ce9; # DIGIT THREE +\ue06a>\u0cea; # DIGIT FOUR +\ue06b>\u0ceb; # DIGIT FIVE +\ue06c>\u0cec; # DIGIT SIX +\ue06d>\u0ced; # DIGIT SEVEN +\ue06e>\u0cee; # DIGIT EIGHT +\ue06f>\u0cef; # DIGIT NINE +\ue070>; # ABBREVIATION SIGN +\ue071>\u0cb0; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u0cb0; # LETTER RA WITH LOWER DIAGONAL +\ue073>; # RUPEE MARK +\ue074>; # RUPEE SIGN +\ue075>; # CURRENCY NUMERATOR ONE +\ue076>; # CURRENCY NUMERATOR TWO +\ue077>; # CURRENCY NUMERATOR THREE +\ue078>; # CURRENCY NUMERATOR FOUR +\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>; # ISSHAR +\uE07C>; # TIPPI +\uE07D>; # ADDAK +\uE07E>; # IRI +\uE07F>; # URA +\uE080>; # EK ONKAR +\uE081>\u0cb5; # FALLBACK FOR ORIYA LETTER WA +0 > \u0ce6; # FALLBACK FOR TAMIL +1 > \u0ce7; + +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Latin.txt b/icu4c/source/data/translit/InterIndic_Latin.txt new file mode 100644 index 00000000000..f734a0db6e9 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Latin.txt @@ -0,0 +1,529 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Latin + #\u0e00 reserved + #consonants + $chandrabindu=\ue001; + $anusvara=\ue002; + $visarga=\ue003; + #\u0e004 reserved + # w represents the stand-alone form + $wa=\ue005; + $waa=\ue006; + $wi=\ue007; + $wii=\ue008; + $wu=\ue009; + $wuu=\ue00a; + $wr=\ue00b; + $wl=\ue00c; + $wce=\ue00d; # LETTER CANDRA E + $wse=\ue00e; # LETTER SHORT E + $we=\ue00f; # \u090f LETTER E + $wai=\ue010; + $wco=\ue011; # LETTER CANDRA O + $wso=\ue012; # LETTER SHORT O + $wo=\ue013; # \u0913 LETTER O + $wau=\ue014; + $ka=\ue015; + $kha=\ue016; + $ga=\ue017; + $gha=\ue018; + $nga=\ue019; + $ca=\ue01a; + $cha=\ue01b; + $ja=\ue01c; + $jha=\ue01d; + $nya=\ue01e; + $tta=\ue01f; + $ttha=\ue020; + $dda=\ue021; + $ddha=\ue022; + $nna=\ue023; + $ta=\ue024; + $tha=\ue025; + $da=\ue026; + $dha=\ue027; + $na=\ue028; + $ena=\ue029; #compatibility + $pa=\ue02a; + $pha=\ue02b; + $ba=\ue02c; + $bha=\ue02d; + $ma=\ue02e; + $ya=\ue02f; + $ra=\ue030; + $vva=\ue081; + $rra=\ue031; + $la=\ue032; + $lla=\ue033; + $ela=\ue034; #compatibility + $va=\ue035; + $sha=\ue036; + $ssa=\ue037; + $sa=\ue038; + $ha=\ue039; +#\u093a Reserved +#\u093b Reserved + $nukta=\ue03c; + $avagraha=\ue03d; # SIGN AVAGRAHA + # represents the dependent form + $aa=\ue03e; + $i=\ue03f; + $ii=\ue040; + $u=\ue041; + $uu=\ue042; + $rh=\ue043; + $lh=\ue044; + $ce=\ue045; #VOWEL SIGN CANDRA E + $se=\ue046; #VOWEL SIGN SHORT E + $e=\ue047; + $ai=\ue048; + $co=\ue049; # VOWEL SIGN CANDRA O + $so=\ue04a; # VOWEL SIGN SHORT O + $o=\ue04b; # \u094b + $au=\ue04c; + $virama=\ue04d; +# \u094e Reserved +# \u094f Reserved + $om=\ue050; # OM + \ue051>; # UNMAPPED STRESS SIGN UDATTA + \ue052>; # UNMAPPED STRESS SIGN ANUDATTA + \ue053>; # UNMAPPED GRAVE ACCENT + \ue054>; # UNMAPPED ACUTE ACCENT + $lm = \ue055;# Telugu Length Mark + $ailm=\ue056;# AI Length Mark + $aulm=\ue057;# AU Length Mark + #urdu compatibity forms + $uka=\ue058; + $ukha=\ue059; + $ugha=\ue05a; + $ujha=\ue05b; + $uddha=\ue05c; + $udha=\ue05d; + $ufa=\ue05e; + $uya=\ue05f; + $wrr=\ue060; + $wll=\ue061; + $rrh=\ue062; + $llh=\ue063; + $danda=\ue064; + $doubleDanda=\ue065; + $zero=\ue066; # DIGIT ZERO + $one=\ue067; # DIGIT ONE + $two=\ue068; # DIGIT TWO + $three=\ue069; # DIGIT THREE + $four=\ue06a; # DIGIT FOUR + $five=\ue06b; # DIGIT FIVE + $six=\ue06c; # DIGIT SIX + $seven=\ue06d; # DIGIT SEVEN + $eight=\ue06e; # DIGIT EIGHT + $nine=\ue06f; # DIGIT NINE + +# \u0970>; # UNMAPPED ABBREVIATION SIGN + $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c]; + $depVowelBelow=[\ue041-\ue044]; + # $x was originally called '&'; $z was '%' + $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; + $z=[bcdfghjklmnpqrstvwxyz]; + $vowels=[aeiour\u0304\u0325\u0306]; + $forceIndependentMatra = [^[[:L:][\u0300-\u034c]]]; + ###################################################################### + # convert from Native letters to Latin letters + ###################################################################### + #transliterations for anusvara + $anusvara} [$ka$kha$ga$gha$nga] > n\u0307; + $anusvara} [$ca$cha$ja$jha$nya] > n\u0304; + $anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323; + $anusvara} [$ta$tha$da$dha$na] > n ; + $anusvara} [$pa$pha$ba$bha$ma] > m ; + $anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ; + $anusvara> m\u0307; + + # Urdu compatibility + $ya$nukta}$x > y\u0307 ; + $ya$nukta$virama > y\u0307 ; + $ya$nukta > y\u0307a ; + + $la$nukta }$x > l\u0331 ; + $la$nukta$virama > l\u0331 ; + $la$nukta > l\u0331a ; + + $na$nukta }$x > n\u0331 ; + $na$nukta$virama > n\u0331 ; + $na$nukta > n\u0331a ; + + $ena }$x > n\u0331 ; + $ena$virama > n\u0331 ; + $ena > n\u0331a ; + $uka > qa ; + $ka$nukta }$x > q ; + $ka$nukta$virama > q ; + $ka$nukta > qa ; + $kha$nukta }$x > k\u0331h\u0331 ; + $kha$nukta$virama > k\u0331h\u0331 ; + $kha$nukta > k\u0331h\u0331a ; + $ukha$virama > k\u0331h\u0331; + $ukha > k\u0331h\u0331a; + $ugha > g\u0307a ; + $ga$nukta }$x > g\u0307 ; + $ga$nukta$virama > g\u0307 ; + $ga$nukta > g\u0307a ; + + $ujha > za ; + $ja$nukta }$x > z ; + $ja$nukta$virama > z ; + $ja$nukta > za ; + $ddha$nukta}$x > r\u0323h ; + $ddha$nukta$virama > r\u0323h ; + $ddha$nukta > r\u0323ha; + + $uddha}$x > r\u0323 ; + $uddha$virama > r\u0323 ; + $uddha > r\u0323a; + + $udha > r\u0323a ; + $dda$nukta}$x > r\u0323 ; + $dda$nukta$virama > r\u0323 ; + $dda$nukta > r\u0323a ; + $pha$nukta }$x > f ; + $pha$nukta$virama > f ; + $pha$nukta > fa ; + $ufa }$x > f ; + $ufa$virama > f ; + $ufa > fa ; + + $ra$nukta}$x > r\u0331; + $ra$nukta$virama > r\u0331; + $ra$nukta > r\u0331a; + $lla$nukta}$x > l\u0331; + $lla$nukta$virama > l\u0331; + $lla$nukta > l\u0331a; + + $ela}$x > l\u0331; + $ela$virama > l\u0331; + $ela > l\u0331a; + + $uya}$x > y\u0307; + $uya$virama > y\u0307; + $uya > y\u0307a; + + + # normal consonants + $ka$virama}$ha>k''; + $ka}$x>k; + $ka$virama>k; + $ka>ka; + $kha}$x>kh; + $kha$virama>kh; + $kha>kha; + $ga$virama}$ha>g''; + $ga}$x>g; + $ga$virama>g; + $ga>ga; + + $gha}$x>gh; + $gha$virama>gh; + $gha>gha; + + $nga}$x>n\u0307; + $nga$virama>n\u0307; + $nga>n\u0307a ; + $ca$virama}$ha>c''; + $ca}$x>c; + $ca$virama>c; + $ca>ca; + + $cha}$x>ch; + $cha$virama>ch; + $cha>cha; + $ja$virama}$ha>j''; + $ja}$x>j; + $ja$virama>j; + $ja>ja; + + $jha}$x>jh; + $jha$virama>jh; + $jha>jha; + + $nya }$x>n\u0303 ; + $nya$virama>n\u0303; + $nya > n\u0303a ; + + + $tta$virama}$ha>t\u0323''; + $tta}$x>t\u0323; + $tta$virama>t\u0323; + $tta>t\u0323a; + + $ttha}$x>t\u0323h; + $ttha$virama>t\u0323h; + $ttha>t\u0323ha; + $dda}$x$ha>d\u0323''; + $dda}$x>d\u0323; + $dda$virama>d\u0323; + $dda>d\u0323a; + + $ddha}$x>d\u0323h; + $ddha$virama>d\u0323h; + $ddha>d\u0323ha; + + $nna}$x>n\u0323 ; + $nna$virama>n\u0323; + $nna>n\u0323a ; + + + $ta$virama}$ha>t''; + $ta$virama}$ttha>t''; + $ta$virama}$tta>t''; + $ta$virama}$tha>t''; + $ta}$x>t; + $ta$virama>t; + $ta>ta; + $tha}$x>th; + $tha$virama>th; + $tha>tha; + + $da$virama}$ha>d''; + $da$virama}$ddha>d''; + $da$virama}$dda>d''; + $da$virama}$dha>d''; + $da}$x>d; + $da$virama>d; + $da>da; + $dha}$x>dh; + $dha$virama>dh; + $dha>dha; + $na$virama}$ga>n''; + $na$virama}$ya>n''; + $na}$x>n; + $na$virama>n; + $na>na; + + + $pa$virama}$ha>p''; + $pa}$x>p; + $pa$virama>p; + $pa>pa; + $pha}$x>ph; + $pha$virama>ph; + $pha>pha; + $ba$virama}$ha>b''; + $ba}$x>b; + $ba$virama>b; + $ba>ba; + + $bha}$x>bh; + $bha$virama>bh; + $bha>bha; + + $ma$virama}$ma>m''; + $ma}$x>m; + $ma$virama>m; + $ma>ma; + + $ya}$x>y; + $ya$virama>y; + $ya>ya; + $ra$virama}$ha>r''; + $ra}$x>r; + $ra$virama>r; + $ra>ra; + $vva$virama}$ha>w\u0307''; + $vva}$x>w\u0307; + $vva$virama>w\u0307; + $vva>w\u0307a; + $rra$virama}$ha>r\u0331''; + $rra}$x>r\u0331; + $rra$virama>r\u0331; + $rra>r\u0331a; + $la$virama}$ha>l''; + $la}$x>l; + $la$virama>l; + $la>la; + $lla$virama}$ha>l\u0323''; + $lla}$x>l\u0323; + $lla$virama>l\u0323; + $lla>l\u0323a; + $va}$x>v; + $va$virama>v; + $va>va; + $sa$virama}$ha>s''; + $sa$virama}$sha>s''; + $sa$virama}$ssa>s''; + $sa$virama}$sa>s''; + $sa}$x>s; + $sa$virama>s; + + #for gurmukhi + $sa$nukta}$x>s\u0301; + $sa$nukta$virama>s\u0301; + $sa$nukta>s\u0301a; + $sa>sa; + + $sha}$x>s\u0301; + $sha$virama>s\u0301; + $sha>s\u0301a; + + $ssa}$x>s\u0323; + $ssa$virama>s\u0323; + $ssa>s\u0323a; + $ha}$x>h; + $ha$virama>h; + $ha>ha; + + # dependent vowels (should never occur except following consonants) + $forceIndependentMatra{$aa > \u0314a\u0304 ; + $forceIndependentMatra{$ai > \u0314ai ; + $forceIndependentMatra{$au > \u0314au ; + $forceIndependentMatra{$ii > \u0314i\u0304 ; + $forceIndependentMatra{$i > \u0314i ; + $forceIndependentMatra{$uu > \u0314u\u0304 ; + $forceIndependentMatra{$u > \u0314u ; + $forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ; + $forceIndependentMatra{$rh > \u0314r\u0325 ; + $forceIndependentMatra{$llh > \u0314l\u0325\u0304 ; + $forceIndependentMatra{$lh > \u0314l\u0325 ; + $forceIndependentMatra{$e > \u0314e\u0304 ; + $forceIndependentMatra{$o > \u0314o\u0304 ; + #extra vowels + $forceIndependentMatra{$ce > \u0314e\u0306 ; + $forceIndependentMatra{$co > \u0314o\u0306 ; + $forceIndependentMatra{$se > \u0314e ; + $forceIndependentMatra{$so > \u0314o ; + $forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character + $forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character + $aa > a\u0304 ; + $ai > ai ; + $au > au ; + $ii > i\u0304 ; + $i > i ; + $uu > u\u0304 ; + $u > u ; + $rrh > r\u0325\u0304 ; + $rh > r\u0325 ; + $llh > l\u0325\u0304 ; + $lh > l\u0325 ; + $e > e\u0304 ; + $o > o\u0304 ; + #extra vowels + $ce > e\u0306 ; + $co > o\u0306 ; + $se > e ; + $so > o ; + #dependent vowels when following independent vowels. Generally Illegal only for roundtripping + $waa} $x > a\u0304\u0314 ; + $wai} $x > ai\u0314 ; + $wau} $x > au\u0314 ; + $wii} $x > i\u0304\u0314 ; + $wi } $x > i\u0314 ; + $wuu} $x > u\u0304\u0314 ; + $wu } $x > u\u0314 ; + $wrr} $x > r\u0325\u0304\u0314 ; + $wr } $x > r\u0325\u0314 ; + $wll} $x > l\u0325\u0304\u0314 ; + $wl } $x > l\u0325\u0314 ; + $we } $x > e\u0304\u0314 ; + $wo } $x > o\u0304\u0314 ; + $wa } $x > a\u0314 ; + #extra vowels + $wce} $x > e\u0306\u0314 ; + $wco} $x > o\u0306\u0314 ; + $wse} $x > e\u0314 ; + $wso} $x > o\u0314 ; + $om} $x > ''om\u0314 ; + + # independent vowels when preceeded by vowels + $vowels{$waa > ''a\u0304 ; + $vowels{$wai > ''ai ; + $vowels{$wau > ''au ; + $vowels{$wii > ''i\u0304 ; + $vowels{$wi > ''i ; + $vowels{$wuu > ''u\u0304 ; + $vowels{$wu > ''u ; + $vowels{$wrr > ''r\u0325\u0304 ; + $vowels{$wr > ''r\u0325 ; + $vowels{$wll > ''l\u0325\u0304 ; + $vowels{$wl > ''l\u0325 ; + $vowels{$we > ''e\u0304 ; + $vowels{$wo > ''o\u0304 ; + $vowels{$wa > ''a ; + #extra vowels + $vowels{$wce > ''e\u0306 ; + $vowels{$wco > ''o\u0306 ; + $vowels{$wse > ''e ; + $vowels{$wso > ''o ; + + # independent vowels (otherwise) + $waa > a\u0304 ; + $wai > ai ; + $wau > au ; + $wii > i\u0304 ; + $wi > i ; + $wuu > u\u0304 ; + $wu > u ; + $wrr > r\u0325\u0304 ; + $wr > r\u0325 ; + $wll > l\u0325\u0304 ; + $wl > l\u0325 ; + $we > e\u0304 ; + $wo > o\u0304 ; + $wa > a ; + #extra vowels + $wce > e\u0306 ; + $wco > o\u0306 ; + $wse > e ; + $wso > o ; + $om > ''om ; + + #stress marks + $avagraha > \u0315; + $chandrabindu$anusvara>\u0303; + $chandrabindu > m\u0310; + $visarga>h\u0323; + #numbers + $zero > 0; + $one > 1; + $two > 2; + $three > 3; + $four > 4; + $five > 5; + $six > 6; + $seven > 7; + $eight > 8; + $nine > 9; + $lm >; + $ailm >; + $aulm >; + + $danda>'.'; + $doubleDanda>'.'; + + \ue070>; # ABBREVIATION SIGN + # LETTER RA WITH MIDDLE DIAGONAL + \ue071}$x>ra; + \ue071$virama>r; + \ue071>ra; + # LETTER RA WITH LOWER DIAGONAL + \ue072}$x>ra; + \ue072$virama>r; + \ue072>ra; + + \ue073>; # RUPEE MARK + \ue074>; # RUPEE SIGN + \ue075>; # CURRENCY NUMERATOR ONE + \ue076>; # CURRENCY NUMERATOR TWO + \ue077>; # CURRENCY NUMERATOR THREE + \ue078>; # CURRENCY NUMERATOR FOUR + \ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR + \ue07A>; # CURRENCY DENOMINATOR SIXTEEN + \ue07B>; # ISSHAR + \uE07C>; # TIPPI + \uE07D>; # ADDAK + \uE07E>; # IRI + \uE07F>; # URA + \uE080>; # EK ONKAR + \uE004>; # DEVANAGARI VOWEL SIGN SHORT A + \ No newline at end of file diff --git a/icu4c/source/data/translit/InterIndic_Malayalam.txt b/icu4c/source/data/translit/InterIndic_Malayalam.txt new file mode 100644 index 00000000000..7b8c77324d1 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Malayalam.txt @@ -0,0 +1,141 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Malayalam +#:: NFD (NFC) ; +\ue001>\u0d02; # REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA +\ue002>\u0d02; # SIGN ANUSVARA +\ue003>\u0d03; # SIGN VISARGA +\uE004>\u0d05; # FALLBACK TO LETTER A +\ue005>\u0d05; # LETTER A +\ue006>\u0d06; # LETTER AA +\ue007>\u0d07; # LETTER I +\ue008>\u0d08; # LETTER II +\ue009>\u0d09; # LETTER U +\ue00a>\u0d0a; # LETTER UU +\ue00b>\u0d0b; # LETTER VOCALIC R +\ue00c>\u0d0c; # LETTER VOCALIC L +\ue00d>\u0d0e; # FALLLBACK LETTER E +\ue00e>\u0d0e; # LETTER E +\ue00f>\u0d0f; # LETTER EE +\ue010>\u0d10; # LETTER AI +\ue011>\u0d12; # FALLBACK TO O +\ue012>\u0d12; # LETTER O +\ue013>\u0d13; # LETTER OO +\ue014>\u0d14; # LETTER AU +\ue015>\u0d15; # LETTER KA +\ue016>\u0d16; # LETTER KHA +\ue017>\u0d17; # LETTER GA +\ue018>\u0d18; # LETTER GHA +\ue019>\u0d19; # LETTER NGA +\ue01a>\u0d1a; # LETTER CA +\ue01b>\u0d1b; # LETTER CHA +\ue01c>\u0d1c; # LETTER JA +\ue01d>\u0d1d; # LETTER JHA +\ue01e>\u0d1e; # LETTER NYA +\ue01f>\u0d1f; # LETTER TTA +\ue020>\u0d20; # LETTER TTHA +\ue021>\u0d21; # LETTER DDA +\ue022>\u0d22; # LETTER DDHA +\ue023>\u0d23; # LETTER NNA +\ue024>\u0d24; # LETTER TA +\ue025>\u0d25; # LETTER THA +\ue026>\u0d26; # LETTER DA +\ue027>\u0d27; # LETTER DHA +\ue028>\u0d28; # LETTER NA +\ue029>\u0d28; # REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA +\ue02a>\u0d2a; # LETTER PA +\ue02b>\u0d2b; # LETTER PHA +\ue02c>\u0d2c; # LETTER BA +\ue02d>\u0d2d; # LETTER BHA +\ue02e>\u0d2e; # LETTER MA +\ue02f>\u0d2f; # LETTER YA +\ue030\ue03c>\u0d31; +\ue030>\u0d30; # LETTER RA +\ue031>\u0d31; # LETTER RRA +\ue032>\u0d32; # LETTER LA +\ue033\ue03c>\u0d34; +\ue033>\u0d33; # LETTER LLA +\ue034>\u0d34; # LETTER LLLA +\ue035>\u0d35; # LETTER VA +\ue036>\u0d36; # LETTER SHA +\ue037>\u0d37; # LETTER SSA +\ue038>\u0d38; # LETTER SA +\ue039>\u0d39; # LETTER HA + +\ue03c>; # FALLBACK BLOW AWAY NUKTA +\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA + +\ue03e>\u0d3e; # VOWEL SIGN AA +\ue03f>\u0d3f; # VOWEL SIGN I +\ue040>\u0d40; # VOWEL SIGN II +\ue041>\u0d41; # VOWEL SIGN U +\ue042>\u0d42; # VOWEL SIGN UU +\ue043>\u0d43; # VOWEL SIGN VOCALIC R +\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR +\ue045>\u0d3e; # REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA +\ue046>\u0d46; # VOWEL SIGN E +\ue047>\u0d47; # VOWEL SIGN EE +\ue048>\u0d48; # VOWEL SIGN AI +\ue049>\u0d4b; # REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO +\ue04a>\u0d4a; # VOWEL SIGN O +\ue04b>\u0d4b; # VOWEL SIGN OO +\ue04c>\u0d4c; # VOWEL SIGN AU +\ue04d>\u0d4d; # SIGN VIRAMA +\ue050>\u0d13\u0d02; # UNMAPPED InterIndic-Malayalam: OM +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\ue055>; # FALLBACK BLOW AWAY LENGTH MARK +\ue056>\u0d48; # REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI +\ue057>\u0d57; # AU LENGTH MARK +\ue058>\u0d15; # FALLBACK +\ue059>\u0d16; # REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA +\ue05a>\u0d17; # REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA +\ue05b>\u0d1c; # REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA +\ue05d>\u0d22; # REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA +\ue05c>\u0d21; # FALLBACK +\ue05e>\u0d2b; # REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA +\ue05f>\u0d2f; # REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA +\ue060>\u0d60; # LETTER VOCALIC RR +\ue061>\u0d61; # LETTER VOCALIC LL +\ue062>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L +\ue063>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL +\ue064>'.' ; # FALLBACK FOR DANDA +\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA +\ue066>\u0d66; # DIGIT ZERO +\ue067>\u0d67; # DIGIT ONE +\ue068>\u0d68; # DIGIT TWO +\ue069>\u0d69; # DIGIT THREE +\ue06a>\u0d6a; # DIGIT FOUR +\ue06b>\u0d6b; # DIGIT FIVE +\ue06c>\u0d6c; # DIGIT SIX +\ue06d>\u0d6d; # DIGIT SEVEN +\ue06e>\u0d6e; # DIGIT EIGHT +\ue06f>\u0d6f; # DIGIT NINE +\ue070>; # ABBREVIATION SIGN +\ue071>\u0d30; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u0d30; # LETTER RA WITH LOWER DIAGONAL +\ue073>; # RUPEE MARK +\ue074>; # RUPEE SIGN +\ue075>; # CURRENCY NUMERATOR ONE +\ue076>; # CURRENCY NUMERATOR TWO +\ue077>; # CURRENCY NUMERATOR THREE +\ue078>; # CURRENCY NUMERATOR FOUR +\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>; # ISSHAR +\uE07C>; # TIPPI +\uE07D>; # ADDAK +\uE07E>; # IRI +\uE07F>; # URA +\uE080>; # EK ONKAR +\uE081>\u0d35; # FALLBACK FOR ORIYA LETTER WA +0 > \u0d66; # FALLBACK FOR TAMIL +1 > \u0d67; + +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Oriya.txt b/icu4c/source/data/translit/InterIndic_Oriya.txt new file mode 100644 index 00000000000..0726e279828 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Oriya.txt @@ -0,0 +1,137 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Oriya +#:: NFD (NFC) ; +\ue001>\u0b01; # SIGN CANDRABINDU +\ue002>\u0b02; # SIGN ANUSVARA +\ue003>\u0b03; # SIGN VISARGA +\uE004>\u0b05; # FALLBACK TO LETTER A +\ue005>\u0b05; # LETTER A +\ue006>\u0b06; # LETTER AA +\ue007>\u0b07; # LETTER I +\ue008>\u0b08; # LETTER II +\ue009>\u0b09; # LETTER U +\ue00a>\u0b0a; # LETTER UU +\ue00b>\u0b0b; # LETTER VOCALIC R +\ue00c>\u0b0c; # LETTER VOCALIC L +\ue00d>\u0b0f; # FALLBACK +\ue00e>\u0b0f; # FALLBACK +\ue00f>\u0b0f; # LETTER E +\ue010>\u0b10; # LETTER AI +\ue011>\u0b13; # FALLBACK +\ue012>\u0b13; # FALLBACK +\ue013>\u0b13; # FALLBACK LETTER OO (\u0b13 = LETTER O) +\ue014>\u0b14; # LETTER AU +\ue015>\u0b15; # LETTER KA +\ue016>\u0b16; # LETTER KHA +\ue017>\u0b17; # LETTER GA +\ue018>\u0b18; # LETTER GHA +\ue019>\u0b19; # LETTER NGA +\ue01a>\u0b1a; # LETTER CA +\ue01b>\u0b1b; # LETTER CHA +\ue01c>\u0b1c; # LETTER JA +\ue01d>\u0b1d; # LETTER JHA +\ue01e>\u0b1e; # LETTER NYA +\ue01f>\u0b1f; # LETTER TTA +\ue020>\u0b20; # LETTER TTHA +\ue021>\u0b21; # LETTER DDA +\ue022>\u0b22; # LETTER DDHA +\ue023>\u0b23; # LETTER NNA +\ue024>\u0b24; # LETTER TA +\ue025>\u0b25; # LETTER THA +\ue026>\u0b26; # LETTER DA +\ue027>\u0b27; # LETTER DHA +\ue028>\u0b28; # LETTER NA +\ue029>\u0b28\u0b3c; # FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA +\ue02a>\u0b2a; # LETTER PA +\ue02b>\u0b2b; # LETTER PHA +\ue02c>\u0b2c; # LETTER BA +\ue02d>\u0b2d; # LETTER BHA +\ue02e>\u0b2e; # LETTER MA +\ue02f>\u0b2f; # LETTER YA +\ue030>\u0b30; # LETTER RA +\ue031>\u0b5c; # LETTER RRA +\ue032>\u0b32; # LETTER LA +\ue033>\u0b33; # LETTER LLA +\ue034>\u0b33\u0b3c; # FALLBACK LETTER LLLA>LETTER LLA +\ue035>\u0b35; # LETTER VA +\ue036>\u0b36; # LETTER SHA +\ue037>\u0b37; # LETTER SSA +\ue038>\u0b38; # LETTER SA +\ue039>\u0b39; # LETTER HA +\ue03c>\u0b3c; # SIGN NUKTA +\ue03d>\u0b3d; # SIGN AVAGRAHA +\ue03e>\u0b3e; # VOWEL SIGN AA +\ue03f>\u0b3f; # VOWEL SIGN I +\ue040>\u0b40; # VOWEL SIGN II +\ue041>\u0b41; # VOWEL SIGN U +\ue042>\u0b42; # VOWEL SIGN UU +\ue043>\u0b43; # VOWEL SIGN VOCALIC R +\ue044>\u0b43\u0b3c; # FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA +\ue045>\u0b47; # FALLBACK +\ue046>\u0b47; # FALLBACK +\ue047>\u0b47; # VOWEL SIGN E +\ue048>\u0b48; # VOWEL SIGN AI +\ue049>\u0b4b; # FALLBACK +\ue04a>\u0b4b; # FALLBACK +\ue04b>\u0b4b; # VOWEL SIGN E +\ue04c>\u0b4c; # VOWEL SIGN AU +\ue04d>\u0b4d; # SIGN VIRAMA +\ue050>\u0b13\u0b01; # FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK +\ue056>\u0b56; # AI LENGTH MARK +\ue057>\u0b57; # AU LENGTH MARK +\ue059>\u0b16\u0b3c; # FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA +\ue058>\u0b15\u0b3c; # FALLBACK +\ue05a>\u0b17\u0b3c; # FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA +\ue05b>\u0b1c\u0b3c; # FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA +\ue05c>\u0b21\u0b3c; # FALLBACK +\ue05d>\u0b5d; # LETTER RHA +\ue05e>\u0b2b\u0b3c; # FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA +\ue05f>\u0b5f; # LETTER YYA +\ue060>\u0b60; # LETTER VOCALIC RR +\ue061>\u0b61; # LETTER VOCALIC LL +\ue062>\u0b56\u0b3c; # FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA +\ue063>\u0b57\u0b3c; # FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA +\uE064>\u0964; # DANDA +\uE065>\u0965; # DOUBLE DANDA +\ue066>\u0b66; # DIGIT ZERO +\ue067>\u0b67; # DIGIT ONE +\ue068>\u0b68; # DIGIT TWO +\ue069>\u0b69; # DIGIT THREE +\ue06a>\u0b6a; # DIGIT FOUR +\ue06b>\u0b6b; # DIGIT FIVE +\ue06c>\u0b6c; # DIGIT SIX +\ue06d>\u0b6d; # DIGIT SEVEN +\ue06e>\u0b6e; # DIGIT EIGHT +\ue06f>\u0b6f; # DIGIT NINE +\ue070>; # ABBREVIATION SIGN +\ue071>\u0b30; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u0b30; # LETTER RA WITH LOWER DIAGONAL +\ue073>; # RUPEE MARK +\ue074>; # RUPEE SIGN +\ue075>; # CURRENCY NUMERATOR ONE +\ue076>; # CURRENCY NUMERATOR TWO +\ue077>; # CURRENCY NUMERATOR THREE +\ue078>; # CURRENCY NUMERATOR FOUR +\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>\u0B70; # ISSHAR +\uE07C>; # TIPPI +\uE07D>; # ADDAK +\uE07E>; # IRI +\uE07F>; # URA +\uE080>; # EK ONKAR +\uE081>\u0B71; # LETTER WA +0 > \u0b66; # FALLBACK FOR TAMIL +1 > \u0b67; + +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Tamil.txt b/icu4c/source/data/translit/InterIndic_Tamil.txt new file mode 100644 index 00000000000..08889cba931 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Tamil.txt @@ -0,0 +1,151 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Tamil +#:: NFD (NFC) ; +\ue001>\u0b82; # FALLBACK SIGN CANDRABINDU +\ue002>\u0b82; # SIGN ANUSVARA +\ue003>\u0b83; # SIGN VISARGA +\uE004>\u0b85; # FALLBACK TO LETTER A +\ue005>\u0b85; # LETTER A +\ue006>\u0b86; # LETTER AA +\ue007>\u0b87; # LETTER I +\ue008>\u0b88; # LETTER II +\ue009>\u0b89; # LETTER U +\ue00a>\u0b8a; # LETTER UU +\ue00b>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I +\ue00c>\u0bb2; # FALLBACK LETTER LA +\ue00d>\u0b8f; # FALLBACK +\ue00e>\u0b8e; # LETTER E +\ue00f>\u0b8f; # LETTER EE +\ue010>\u0b90; # LETTER AI +\ue011>\u0b92; # FALLBACK +\ue012>\u0b92; # LETTER O +\ue013>\u0b93; # LETTER OO +\ue014>\u0b94; # LETTER AU +\ue015>\u0b95; # LETTER KA +\ue016>\u0b95; # REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA +\ue017>\u0b95; # REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA +\ue018>\u0b95; # REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA +\ue019>\u0b99; # LETTER NGA +\ue01a>\u0b9a; # LETTER CA +\ue01b>\u0b9a; # REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA +\ue01c>\u0b9c; # LETTER JA +\ue01d>\u0b9a; # REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA +\ue01e>\u0b9e; # LETTER NYA +\ue01f>\u0b9f; # LETTER TTA +\ue020>\u0b9f; # REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA +\ue021>\u0b9f; # REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA +\ue022>\u0b9f; # REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA +\ue023>\u0ba3; # LETTER NNA +\ue024>\u0ba4; # LETTER TA +\ue025>\u0ba4; # REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA +\ue026>\u0ba4; # REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA +\ue027>\u0ba4; # REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA +\ue028\ue03c>\u0ba9; +\ue028>\u0ba8; # LETTER NA +\ue029>\u0ba9; # LETTER NNNA +\ue02a>\u0baa; # LETTER PA +\ue02b>\u0baa; # REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA +\ue02c>\u0baa; # REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA +\ue02d>\u0baa; # REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA +\ue02e>\u0bae; # LETTER MA +\ue02f>\u0baf; # LETTER YA +\ue030\ue03c>\u0bb1; +\ue030>\u0bb0; # LETTER RA +\ue031>\u0bb1; # LETTER RRA +\ue032>\u0bb2; # LETTER LA +\ue033\ue03c>\u0bb4; +\ue033>\u0bb3; # LETTER LLA +\ue034>\u0bb4; # LETTER LLLA +\ue035>\u0bb5; # LETTER VA +\ue036>\u0bb7; # REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA +\ue037>\u0bb7; # LETTER SSA +\ue038>\u0bb8; # LETTER SA +\ue039>\u0bb9; # LETTER HA + +\ue03c>; # FALLBACK BLOW AWAY NUKTA +\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA + +\ue03e>\u0bbe; # VOWEL SIGN AA +\ue03f>\u0bbf; # VOWEL SIGN I +\ue040>\u0bc0; # VOWEL SIGN II +\ue041>\u0bc1; # VOWEL SIGN U +\ue042>\u0bc2; # VOWEL SIGN UU +\ue043>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I +\ue044>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I +\ue045>\u0bbe; # REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA +\ue046>\u0bc6; # VOWEL SIGN E +\ue047>\u0bc7; # VOWEL SIGN EE +\ue048>\u0bc8; # VOWEL SIGN AI +\ue049>\u0bbe; # REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA +\ue04a>\u0bca; # VOWEL SIGN O +\ue04b>\u0bcb; # VOWEL SIGN OO +\ue04c>\u0bcc; # VOWEL SIGN AU +\ue04d>\u0bcd; # SIGN VIRAMA +\ue050>\u0b93\u0bae\u0bcd; # REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK +\ue056>\u0bc8; # REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI +\ue057>\u0bd7; # AU LENGTH MARK +\ue058>\u0b95; # FALLBACK +\ue059>\u0b95; # REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA +\ue05a>\u0b95; # REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA +\ue05b>\u0b9c; # REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA +\ue05c>\u0ba4; # FALLBACK +\ue05d>\u0b9f; # REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA +\ue05e>\u0baa; # REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA +\ue05f>\u0baf; # REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA +\ue060>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I +\ue061>\u0bb3; # FALLBACK LETTER LLA +\ue062>\u0bbf; # FALLBACK VOWEL SIGN VOCALIC L +\ue063>\u0bc0; # FALLBACK VOWEL SIGN VOCALIC LL +\ue064>'.' ; # FALLBACK FOR DANDA +\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA + +\ue066>\u0030; # FALLBACK DIGIT ZERO + +\ue067\ue066\ue066\ue066>\u0bF2; +\ue067\ue066\ue066>\u0bf1; +\ue067\ue066>\u0bF0; + +\ue067>\u0be7; # DIGIT ONE +\ue068>\u0be8; # DIGIT TWO +\ue069>\u0be9; # DIGIT THREE +\ue06a>\u0bea; # DIGIT FOUR +\ue06b>\u0beb; # DIGIT FIVE +\ue06c>\u0bec; # DIGIT SIX +\ue06d>\u0bed; # DIGIT SEVEN +\ue06e>\u0bee; # DIGIT EIGHT +\ue06f>\u0bef; # DIGIT NINE + +\ue070>; # ABBREVIATION SIGN +\ue071>\u0bc0; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u0bc0; # LETTER RA WITH LOWER DIAGONAL +\ue073>; # RUPEE MARK +\ue074>; # RUPEE SIGN +\ue075>; # CURRENCY NUMERATOR ONE +\ue076>; # CURRENCY NUMERATOR TWO +\ue077>; # CURRENCY NUMERATOR THREE +\ue078>; # CURRENCY NUMERATOR FOUR +\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>; # ISSHAR +\uE07C>; # TIPPI +\uE07D>; # ADDAK +\uE07E>; # IRI +\uE07F>; # URA +\uE080>; # EK ONKAR +\uE081>\u0bb5; # FALLBACK FOR ORIYA LETTER WA + +1000 >\u0BF2; # NUMBER ONE THOUSAND +100 >\u0BF1; # NUMBER ONE HUNDRED +10 >\u0BF0; # NUMBER TEN + +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/InterIndic_Telugu.txt b/icu4c/source/data/translit/InterIndic_Telugu.txt new file mode 100644 index 00000000000..026cd209fb5 --- /dev/null +++ b/icu4c/source/data/translit/InterIndic_Telugu.txt @@ -0,0 +1,141 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# InterIndic-Telugu +#:: NFD (NFC) ; +\ue001>\u0c01; # SIGN CANDRABINDU +\ue002>\u0c02; # SIGN ANUSVARA +\ue003>\u0c03; # SIGN VISARGA +\uE004>\u0c05; # FALLBACK TO LETTER A +\ue005>\u0c05; # LETTER A +\ue006>\u0c06; # LETTER AA +\ue007>\u0c07; # LETTER I +\ue008>\u0c08; # LETTER II +\ue009>\u0c09; # LETTER U +\ue00a>\u0c0a; # LETTER UU +\ue00b>\u0c0b; # LETTER VOCALIC R +\ue00c>\u0c0c; # LETTER VOCALIC L +\ue00d>\u0c0E; # FALLBACK MAPPING +\ue00e>\u0c0E; # LETTER E +\ue00f>\u0c0f; # LETTER EE +\ue010>\u0c10; # LETTER AI +\ue011>\u0c12; # FALBACK MAPPING +\ue012>\u0c12; # LETTER O +\ue013>\u0c13; # LETTER OO +\ue014>\u0c14; # LETTER AU +\ue015>\u0c15; # LETTER KA +\ue016>\u0c16; # LETTER KHA +\ue017>\u0c17; # LETTER GA +\ue018>\u0c18; # LETTER GHA +\ue019>\u0c19; # LETTER NGA +\ue01a>\u0c1a; # LETTER CA +\ue01b>\u0c1b; # LETTER CHA +\ue01c>\u0c1c; # LETTER JA +\ue01d>\u0c1d; # LETTER JHA +\ue01e>\u0c1e; # LETTER NYA +\ue01f>\u0c1f; # LETTER TTA +\ue020>\u0c20; # LETTER TTHA +\ue021>\u0c21; # LETTER DDA +\ue022>\u0c22; # LETTER DDHA +\ue023>\u0c23; # LETTER NNA +\ue024>\u0c24; # LETTER TA +\ue025>\u0c25; # LETTER THA +\ue026>\u0c26; # LETTER DA +\ue027>\u0c27; # LETTER DHA +\ue028>\u0c28; # LETTER NA +\ue029>\u0c28; # REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA +\ue02a>\u0c2a; # LETTER PA +\ue02b>\u0c2b; # LETTER PHA +\ue02c>\u0c2c; # LETTER BA +\ue02d>\u0c2d; # LETTER BHA +\ue02e>\u0c2e; # LETTER MA +\ue02f>\u0c2f; # LETTER YA +\ue030\ue03c>\u0c31; +\ue030>\u0c30; # LETTER RA +\ue031>\u0c31; # LETTER RRA +\ue032>\u0c32; # LETTER LA +\ue033>\u0c33; # LETTER LLA +\ue034>\u0c33; # REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA +\ue035>\u0c35; # LETTER VA +\ue036>\u0c36; # LETTER SHA +\ue037>\u0c37; # LETTER SSA +\ue038>\u0c38; # LETTER SA +\ue039>\u0c39; # LETTER HA + +\ue03c>; # FALLBACK BLOW AWAY NUKTA +\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA + +\ue03e>\u0c3e; # VOWEL SIGN AA +\ue03f>\u0c3f; # VOWEL SIGN I +\ue040>\u0c40; # VOWEL SIGN II +\ue041>\u0c41; # VOWEL SIGN U +\ue042>\u0c42; # VOWEL SIGN UU +\ue043>\u0c43; # VOWEL SIGN VOCALIC R +\ue044>\u0c44; # VOWEL SIGN VOCALIC RR +\ue045>\u0c46; # VOWEL SIGN CANDRA E>VOWEL SIGN E +\ue046>\u0c46; # VOWEL SIGN E +\ue047>\u0c47; # VOWEL SIGN EE +\ue048>\u0c48; # VOWEL SIGN AI +\ue049>\u0c4a; # REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O +\ue04a>\u0c4a; # VOWEL SIGN O +\ue04b>\u0c4b; # VOWEL SIGN OO +\ue04c>\u0c4c; # VOWEL SIGN AU +\ue04d>\u0c4d; # SIGN VIRAMA +\ue050>\u0c13\u0c02; # REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA +\ue051>; +\ue052>; +\ue053>; +\ue054>; +\ue055>\u0c55; # LENGTH MARK +\ue056>\u0c56; # AI LENGTH MARK +\ue057>\u0c4c; # REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU +\ue058>\u0c15; # REMAP +\ue059>\u0c16; # REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA +\ue05a>\u0c17; # REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA +\ue05b>\u0c1c; # REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA +\ue05c>\u0c22; # REMAP +\ue05d>\u0c22; # REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA +\ue05e>\u0c2b; # REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA +\ue05f>\u0c2f; # REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA +\ue060>\u0c60; # LETTER VOCALIC RR +\ue061>\u0c61; # LETTER VOCALIC LL +\ue062>\u0c3f; # REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I +\ue063>\u0c40; # REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II +\ue064>'.' ; # FALLBACK FOR DANDA +\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA +\ue066>\u0c66; # DIGIT ZERO +\ue067>\u0c67; # DIGIT ONE +\ue068>\u0c68; # DIGIT TWO +\ue069>\u0c69; # DIGIT THREE +\ue06a>\u0c6a; # DIGIT FOUR +\ue06b>\u0c6b; # DIGIT FIVE +\ue06c>\u0c6c; # DIGIT SIX +\ue06d>\u0c6d; # DIGIT SEVEN +\ue06e>\u0c6e; # DIGIT EIGHT +\ue06f>\u0c6f; # DIGIT NINE + +\ue070>; # ABBREVIATION SIGN +\ue071>\u0c30; # LETTER RA WITH MIDDLE DIAGONAL +\ue072>\u0c30; # LETTER RA WITH LOWER DIAGONAL +\ue073>; # RUPEE MARK +\ue074>; # RUPEE SIGN +\ue075>; # CURRENCY NUMERATOR ONE +\ue076>; # CURRENCY NUMERATOR TWO +\ue077>; # CURRENCY NUMERATOR THREE +\ue078>; # CURRENCY NUMERATOR FOUR +\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR +\ue07A>; # CURRENCY DENOMINATOR SIXTEEN +\ue07B>; # ISSHAR +\uE07C>; # TIPPI +\uE07D>; # ADDAK +\uE07E>; # IRI +\uE07F>; # URA +\uE080>; # EK ONKAR +\uE081>\u0c35; # FALLBACK FOR ORIYA LETTER WA +0 > \u0c66; # FALLBACK FOR TAMIL +1 > \u0c67; + +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/Kannada_InterIndic.txt b/icu4c/source/data/translit/Kannada_InterIndic.txt new file mode 100644 index 00000000000..bc42c7caeac --- /dev/null +++ b/icu4c/source/data/translit/Kannada_InterIndic.txt @@ -0,0 +1,92 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Kannada-InterIndic +\u0CC6\u0CD5>\uE047; # VOWEL SIGN EE +\u0CC6\u0CCD\u0CD6>\uE048\ue04d; # VOWEL SIGN AI +\u0CC6\u0CD6>\uE048; # VOWEL SIGN AI +\u0CC6\u0CC2\u0CD5>\uE04B; # VOWEL SIGN OO +\u0CC6\u0CC2>\uE04A; # VOWEL SIGN O +\u0CBF\u0CD5>\uE040; # VOWEL SIGN II + +\u0C82>\uE002; # SIGN ANUSVARA +\u0C83>\uE003; # SIGN VISARGA +\u0C85>\uE005; # LETTER A +\u0C86>\uE006; # LETTER AA +\u0C87>\uE007; # LETTER I +\u0C88>\uE008; # LETTER II +\u0C89>\uE009; # LETTER U +\u0C8A>\uE00A; # LETTER UU +\u0C8B>\uE00B; # LETTER VOCALIC R +\u0C8C>\uE00C; # LETTER VOCALIC L +\u0C8E>\uE00E; # LETTER E +\u0C8F>\uE00F; # LETTER EE +\u0C90>\uE010; # LETTER AI +\u0C92>\uE012; # LETTER O +\u0C93>\uE013; # LETTER OO +\u0C94>\uE014; # LETTER AU +\u0C95>\uE015; # LETTER KA +\u0C96>\uE016; # LETTER KHA +\u0C97>\uE017; # LETTER GA +\u0C98>\uE018; # LETTER GHA +\u0C99>\uE019; # LETTER NGA +\u0C9A>\uE01A; # LETTER CA +\u0C9B>\uE01B; # LETTER CHA +\u0C9C>\uE01C; # LETTER JA +\u0C9D>\uE01D; # LETTER JHA +\u0C9E>\uE01E; # LETTER NYA +\u0C9F>\uE01F; # LETTER TTA +\u0CA0>\uE020; # LETTER TTHA +\u0CA1>\uE021; # LETTER DDA +\u0CA2>\uE022; # LETTER DDHA +\u0CA3>\uE023; # LETTER NNA +\u0CA4>\uE024; # LETTER TA +\u0CA5>\uE025; # LETTER THA +\u0CA6>\uE026; # LETTER DA +\u0CA7>\uE027; # LETTER DHA +\u0CA8>\uE028; # LETTER NA +\u0CAA>\uE02A; # LETTER PA +\u0CAB>\uE02B; # LETTER PHA +\u0CAC>\uE02C; # LETTER BA +\u0CAD>\uE02D; # LETTER BHA +\u0CAE>\uE02E; # LETTER MA +\u0CAF>\uE02F; # LETTER YA +\u0CB0>\uE030; # LETTER RA +\u0CB1>\uE031; # LETTER RRA +\u0CB2>\uE032; # LETTER LA +\u0CB3>\uE033; # LETTER LLA +\u0CB5>\uE035; # LETTER VA +\u0CB6>\uE036; # LETTER SHA +\u0CB7>\uE037; # LETTER SSA +\u0CB8>\uE038; # LETTER SA +\u0CB9>\uE039; # LETTER HA +\u0CBC>\uE03C; # SIGN NUKTA +\u0CBD>\uE03D; # AVAGRAHA +\u0CBE>\uE03E; # VOWEL SIGN AA +\u0CBF>\uE03F; # VOWEL SIGN I +\u0CC1>\uE041; # VOWEL SIGN U +\u0CC2>\uE042; # VOWEL SIGN UU +\u0CC3>\uE043; # VOWEL SIGN VOCALIC R +\u0CC4>\uE044; # VOWEL SIGN VOCALIC RR +\u0CC6>\uE046; # VOWEL SIGN E +\u0CCC>\uE04C; # VOWEL SIGN AU +\u0CCD>\uE04D; # SIGN VIRAMA +\u0CD5>\uE055; # LENGTH MARK +\u0CD6>\uE056; # AI LENGTH MARK +\u0CDE>\uE034; # LETTER LLLA +\u0CE0>\uE060; # LETTER VOCALIC RR +\u0CE1>\uE061; # LETTER VOCALIC LL +\u0CE6>\uE066; # DIGIT ZERO +\u0CE7>\uE067; # DIGIT ONE +\u0CE8>\uE068; # DIGIT TWO +\u0CE9>\uE069; # DIGIT THREE +\u0CEA>\uE06A; # DIGIT FOUR +\u0CEB>\uE06B; # DIGIT FIVE +\u0CEC>\uE06C; # DIGIT SIX +\u0CED>\uE06D; # DIGIT SEVEN +\u0CEE>\uE06E; # DIGIT EIGHT +\u0CEF>\uE06F; # DIGIT NINE + +# eof diff --git a/icu4c/source/data/translit/Latin_InterIndic.txt b/icu4c/source/data/translit/Latin_InterIndic.txt new file mode 100644 index 00000000000..d8bfbdd9f05 --- /dev/null +++ b/icu4c/source/data/translit/Latin_InterIndic.txt @@ -0,0 +1,383 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Latin-InterIndic + #:: NFD; + #\u0e00 reserved + #consonants + $chandrabindu=\ue001; + $anusvara=\ue002; + $visarga=\ue003; + #\u0e004 reserved + # w represents the stand-alone form + $wa=\ue005; + $waa=\ue006; + $wi=\ue007; + $wii=\ue008; + $wu=\ue009; + $wuu=\ue00a; + $wr=\ue00b; + $wl=\ue00c; + $wce=\ue00d; # LETTER CANDRA E + $wse=\ue00e; # LETTER SHORT E + $we=\ue00f; # \u090f LETTER E + $wai=\ue010; + $wco=\ue011; # LETTER CANDRA O + $wso=\ue012; # LETTER SHORT O + $wo=\ue013; # \u0913 LETTER O + $wau=\ue014; + $ka=\ue015; + $kha=\ue016; + $ga=\ue017; + $gha=\ue018; + $nga=\ue019; + $ca=\ue01a; + $cha=\ue01b; + $ja=\ue01c; + $jha=\ue01d; + $nya=\ue01e; + $tta=\ue01f; + $ttha=\ue020; + $dda=\ue021; + $ddha=\ue022; + $nna=\ue023; + $ta=\ue024; + $tha=\ue025; + $da=\ue026; + $dha=\ue027; + $na=\ue028; + $ena=\ue029; #compatibility + $pa=\ue02a; + $pha=\ue02b; + $ba=\ue02c; + $bha=\ue02d; + $ma=\ue02e; + $ya=\ue02f; + $ra=\ue030; + $rra=\ue031; + $la=\ue032; + $lla=\ue033; + $ela=\ue034; #compatibility + $va=\ue035; + $vva=\ue081; + $sha=\ue036; + $ssa=\ue037; + $sa=\ue038; + $ha=\ue039; +#\u093a Reserved +#\u093b Reserved + $nukta=\ue03c; + $avagraha=\ue03d; # SIGN AVAGRAHA + # represents the dependent form + $aa=\ue03e; + $i=\ue03f; + $ii=\ue040; + $u=\ue041; + $uu=\ue042; + $rh=\ue043; + $lh=\ue044; + $ce=\ue045; #VOWEL SIGN CANDRA E + $se=\ue046; #VOWEL SIGN SHORT E + $e=\ue047; + $ai=\ue048; + $co=\ue049; # VOWEL SIGN CANDRA O + $so=\ue04a; # VOWEL SIGN SHORT O + $o=\ue04b; # \u094b + $au=\ue04c; + $virama=\ue04d; +# \u094e Reserved +# \u094f Reserved + $om = \ue050; # OM +# \u0951>; # UNMAPPED STRESS SIGN UDATTA +# \u0952>; # UNMAPPED STRESS SIGN ANUDATTA +# \u0953>; # UNMAPPED GRAVE ACCENT +# \u0954>; # UNMAPPED ACUTE ACCENT + $lm = \ue055;# Telugu Length Mark + $ailm=\ue056;# AI Length Mark + $aulm=\ue057;# AU Length Mark + #urdu compatibity forms + $uka=\ue058; + $ukha=\ue059; + $ugha=\ue05a; + $ujha=\ue05b; + $uddha=\ue05c; + $udha=\ue05d; + $ufa=\ue05e; + $uya=\ue05f; + $wrr=\ue060; + $wll=\ue061; + $rrh=\ue062; + $llh=\ue063; + $danda=\ue064; + $doubleDanda=\ue065; + $zero=\ue066; # DIGIT ZERO + $one=\ue067; # DIGIT ONE + $two=\ue068; # DIGIT TWO + $three=\ue069; # DIGIT THREE + $four=\ue06a; # DIGIT FOUR + $five=\ue06b; # DIGIT FIVE + $six=\ue06c; # DIGIT SIX + $seven=\ue06d; # DIGIT SEVEN + $eight=\ue06e; # DIGIT EIGHT + $nine=\ue06f; # DIGIT NINE + # For all other scripts + $ecp0=\ue070; + $ecp1=\ue071; + $ecp2=\ue072; + $ecp3=\ue073; + $ecp4=\ue074; + $ecp5=\ue075; + $ecp6=\ue076; + $ecp7=\ue077; + $ecp8=\ue078; + $ecp9=\ue079; + $ecpA=\ue07a; + $ecpB=\ue07b; + $ecpC=\ue07c; + $ecpD=\ue07d; + $ecpE=\ue07e; + $ecpF=\ue07f; +# \u0970>; # UNMAPPED ABBREVIATION SIGN + $depVowelAbove=[\ue03e-\ue040\ue045-\ue04c]; + $depVowelBelow=[\ue041-\ue044]; + $endThing=[$danda$doubleDanda]; + # $x was originally called '&'; $z was '%' + $x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co]; + $z=[bcdfghjklmnpqrstvwxyz]; + $consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]]; + \u0315 > $avagraha; + \u0303>$chandrabindu$anusvara; + m\u0310>$chandrabindu; + h\u0323>$visarga; + x>$ka$virama$sa; +# convert to independent forms at start of word or syllable: +# dependent forms for roundtrip + \u0314a\u0304>$aa; + \u0314ai>$ai; + \u0314au>$au; + \u0314ii>$ii; + \u0314i\u0304>$ii; + \u0314i>$i; + \u0314u\u0304>$uu; + \u0314u>$u; + \u0314r\u0325\u0304>$rrh; + \u0314r\u0325>$rh; + \u0314l\u0325\u0304>$llh; + \u0314lh>$lh; + \u0314l\u0325>$lh; + \u0314e\u0304>$e; + \u0314o\u0304>$o; + \u0314a>; + \u0314e\u0306>$ce; + \u0314o\u0306>$co; + \u0314e>$se; + \u0314o>$so; + +# preceeded by consonants + $consonants{ a\u0304>$aa; + $consonants{ ai>$ai; + $consonants{ au>$au; + $consonants{ ii>$ii; + $consonants{ i\u0304>$ii; + $consonants{ i>$i; + $consonants{ u\u0304>$uu; + $consonants{ u>$u; + $consonants{ r\u0325\u0304>$rrh; + $consonants{ r\u0325a>$rh; + $consonants{ r\u0325>$rh; + $consonants{ l\u0325\u0304>$llh; + $consonants{ lh>$lh; + $consonants{ l\u0325>$lh; + $consonants{ e\u0304>$e; + $consonants{ o\u0304>$o; + $consonants{ e\u0306>$ce; + $consonants{ o\u0306>$co; + $consonants{ e>$se; + $consonants{ o>$so; + +# e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) + a\u0304>$waa; + ai>$wai; + au>$wau; + i\u0304>$wii; + i>$wi; + u\u0304>$wuu; + u>$wu; + r\u0325\u0304>$wrr; + r\u0325>$wr; + l\u0325\u0304>$wll; + lh>$wl; + l\u0325>$wl; + e\u0304>$we; + o\u0304>$wo; + a>$wa; + e\u0306>$wce; + o\u0306>$wco; + e>$wse; + ''om>$om; + o>$wso; + + # rules for anusvara + n}r\u0325 > $na|$virama; + n}l\u0325 > $na|$virama; + n}na > $na|$virama; + n\u0307}[kg] > $anusvara; + n\u0307}n\u0307 > $anusvara; + n\u0304}[cj] > $anusvara; + n\u0304}n\u0303 > $anusvara; + n\u0323}[tdn]\u0323 > $anusvara; + n}[tdn] > $anusvara; + m}[pbm] > $anusvara; + n}[ylvshr] > $anusvara; + m\u0307 > $anusvara; + + #urdu compatibility + q>$uka|$virama; + k\u0331h\u0331>$ukha |$virama; + g\u0307> $ugha | $virama; + z > $ujha |$virama; + f > $ufa|$virama; + + # dev + y\u0307>$uya|$virama; + l\u0331>$ela|$virama; + n\u0331>$ena|$virama; + n\u0307>$nga|$virama; + n\u0303>$nya|$virama; + n\u0323>$nna|$virama; + t\u0323h>$ttha|$virama; + t\u0323>$tta|$virama; + r\u0323h>$udha|$virama; + r\u0323>$uddha|$virama; + d\u0323h>$ddha|$virama; + d\u0323>$dda|$virama; + kh>$kha|$virama; + k>$ka|$virama; + gh>$gha|$virama; + g>$ga|$virama; + ch>$cha|$virama; + c>$ca|$virama; + jh>$jha|$virama; + j>$ja|$virama; + ny>$nya|$virama; + tth>$ttha|$virama; + ddh>$ddha|$virama; + th>$tha|$virama; + t>$ta|$virama; + dh>$dha|$virama; + d>$da|$virama; + n>$na|$virama; + ph>$pha|$virama; + p>$pa|$virama; + bh>$bha|$virama; + b>$ba|$virama; + m>$ma|$virama; + y>$ya|$virama; + r\u0331>$rra|$virama; + r>$ra|$virama; + l\u0323>$lla|$virama; + l>$la|$virama; + v>$va|$virama; + w\u0307>$vva|$virama; + w>$va|$virama; + sh>$sha|$virama; + ss>$ssa|$virama; + s\u0323>$ssa|$virama; + s\u0301>$sha|$virama; + s>$sa|$virama; + h>$ha|$virama; + '.'>$danda; + $danda'.'>$doubleDanda; + $depVowelAbove{'~'>$anusvara; + $depVowelBelow{'~'>$chandrabindu; +# convert to dependent forms after consonant with no vowel: +# e.g. kai -> {ka}{virama}ai -> {ka}{ai} + #$virama aa>$aa; + $virama a\u0304>$aa; + $virama ai>$ai; + $virama au>$au; + $virama ii>$ii; + $virama i\u0304>$ii; + $virama i>$i; + #$virama uu>$uu; + $virama u\u0304>$uu; + $virama u>$u; + #$virama rrh>$rrh; + $virama r\u0325\u0304>$rrh; + #$virama rh>$rh; + $virama r\u0325a>$rh; + $virama r\u0325>$rh; + $virama l\u0325\u0304>$llh; + $virama lh>$lh; + $virama l\u0325>$lh; + $virama e\u0304>$e; + $virama o\u0304>$o; + $virama a>; + $virama e\u0306>$ce; + $virama o\u0306>$co; + $virama e>$se; + $virama o>$so; + + +# otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} + #$virama''aa>$waa; + $virama''a\u0304>$waa; + $virama''ai>$wai; + $virama''au>$wau; + #$virama''ii>$wii; + $virama''i\u0304>$wii; + $virama''i>$wi; + #$virama''uu>$wuu; + $virama''u\u0304>$wuu; + $virama''u>$wu; + #$virama''rrh>$wrr; + $virama''r\u0325\u0304>$wrr; + #$virama''rh>$wr; + $virama''r\u0325>$wr; + $virama''l\u0325\u0304>$wll; + #$virama''lh>$wl; + $virama''l\u0325>$wl; + $virama''e\u0304>$we; + $virama''o\u0304>$wo; + $virama''a>$wa; + $virama''e\u0306>$wce; + $virama''o\u0306>$wco; + $virama''e>$wse; + $virama''o>$wso; +# no virama + ''a\u0304>$waa; + ''ai>$wai; + ''au>$wau; + ''i\u0304>$wii; + ''i>$wi; + ''u\u0304>$wuu; + ''u>$wu; + ''r\u0325\u0304>$wrr; + ''r\u0325>$wr; + ''l\u0325\u0304>$wll; + ''l\u0325>$wl; + ''e\u0304>$we; + ''o\u0304>$wo; + ''a>$wa; + ''e\u0306>$wce; + ''o\u0306>$wco; + ''e>$wse; + ''o>$wso; + + $virama } [$z] > $virama; + $virama } ' ' > $virama ; + $virama}$endThing>; + 0>$zero; + 1>$one; + 2>$two; + 3>$three; + 4>$four; + 5>$five; + 6>$six; + 7>$seven; + 8>$eight; + 9>$nine; + ''>; + #:: NFC (NFD) ; diff --git a/icu4c/source/data/translit/Latin_Jamo.txt b/icu4c/source/data/translit/Latin_Jamo.txt new file mode 100644 index 00000000000..9d6591d7a6f --- /dev/null +++ b/icu4c/source/data/translit/Latin_Jamo.txt @@ -0,0 +1,522 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in +#- the INDEX file. This transliterator is, by itself, not +#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or +#- inverses thereof. + +# Transliteration from Latin characters to Korean script is done in +# two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul +# transliteration is done algorithmically following Unicode 3.0 +# section 3.11. This file implements the Latin to Jamo +# transliteration using rules. + +# Jamo occupy the block 1100-11FF. Within this block there are three +# groups of characters: initial consonants or choseong (I), medial +# vowels or jungseong (M), and trailing consonants or jongseong (F). +# Standard Korean syllables are of the form I+M+F*. + +# Section 3.11 describes the use of 'filler' jamo to convert +# nonstandard syllables to standard form: the choseong filler 115F and +# the junseong filler 1160. In this transliterator, we will not use +# 115F or 1160. + +# We will, however, insert two 'null' jamo to make foreign words +# conform to Korean syllable structure. These are the null initial +# consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text, +# we will use the separator in order to disambiguate strings, +# e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G). + +# We will not use all of the characters in the jamo block. We will +# only use the 19 initials, 21 medials, and 27 finals possessing a +# jamo short name as defined in section 4.4 of the Unicode book. + +# Rules of thumb. These guidelines provide the basic framework +# for the rules. They are phrased in terms of Latin-Jamo transliteration. +# The Jamo-Latin rules derive from these, since the Jamo-Latin rules are +# just context-free transliteration of jamo to corresponding short names, +# with the addition of separators to maintain round-trip integrity +# in the context of the Latin-Jamo rules. + +# A sequence of vowels: +# - Take the longest sequence you can. If there are too many, or you don't +# have a starting consonant, introduce a 110B necessary. + +# A sequence of consonants. +# - First join the double consonants: G + G -> GG +# - In the remaining list, +# -- If there is no preceding vowel, take the first consonant, and insert EU +# after it. Continue with the rest of the consonants. +# -- If there is one consonant, attach to the following vowel +# -- If there are two consonants and a following vowel, attach one to the +# preceeding vowel, and one to the following vowel. +# -- If there are more than two consonants, join the first two together if you +# can: L + G => LG +# -- If you still end up with more than 2 consonants, insert EU after the +# first one, and continue with the rest of the consonants. + +#---------------------------------------------------------------------- +# Variables + +# Some latin consonants or consonant pairs only occur as initials, and +# some only as finals, but some occur as both. This makes some jamo +# consonants ambiguous when transliterated into latin. +# Initial only: IEUNG BB DD JJ R +# Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ +# Initial and Final: B C D G GG H J K M N P S SS T + + $Gi = \u1100; + $GGi = \u1101; + $Ni = \u1102; + $Di = \u1103; + $DD = \u1104; + $R = \u1105; + $Mi = \u1106; + $Bi = \u1107; + $BB = \u1108; + $Si = \u1109; + $SSi = \u110A; + $IEUNG = \u110B; # null initial, inserted during Latin-Jamo + $Ji = \u110C; + $JJ = \u110D; + $Ci = \u110E; + $Ki = \u110F; + $Ti = \u1110; + $Pi = \u1111; + $Hi = \u1112; + + $A = \u1161; + $AE = \u1162; + $YA = \u1163; + $YAE = \u1164; + $EO = \u1165; + $E = \u1166; + $YEO = \u1167; + $YE = \u1168; + $O = \u1169; + $WA = \u116A; + $WAE = \u116B; + $OE = \u116C; + $YO = \u116D; + $U = \u116E; + $WEO = \u116F; + $WE = \u1170; + $WI = \u1171; + $YU = \u1172; + $EU = \u1173; # null medial, inserted during Latin-Jamo + $YI = \u1174; + $I = \u1175; + + $Gf = \u11A8; + $GGf = \u11A9; + $GS = \u11AA; + $Nf = \u11AB; + $NJ = \u11AC; + $NH = \u11AD; + $Df = \u11AE; + $L = \u11AF; + $LG = \u11B0; + $LM = \u11B1; + $LB = \u11B2; + $LS = \u11B3; + $LT = \u11B4; + $LP = \u11B5; + $LH = \u11B6; + $Mf = \u11B7; + $Bf = \u11B8; + $BS = \u11B9; + $Sf = \u11BA; + $SSf = \u11BB; + $NG = \u11BC; + $Jf = \u11BD; + $Cf = \u11BE; + $Kf = \u11BF; + $Tf = \u11C0; + $Pf = \u11C1; + $Hf = \u11C2; + + $jamoInitial = [\u1100-\u1112]; + + $jamoMedial = [\u1161-\u1175]; + + $latinInitial = [bcdghjkmnprst]; + + # Any character in the latin transliteration of a medial + $latinMedial = [aeiouwy]; + + # The last character of the latin transliteration of a medial + $latinMedialEnd = [aeiou]; + + # Disambiguation separator + $sep = \'; + +#---------------------------------------------------------------------- +# Jamo-Latin + +# Jamo to latin is relatively simple, since it is the latin that is +# ambiguous. Most rules are straightforward, and we encode them below +# as simple add-on back rule, e.g.: + +# $jamoMedial {bs} > $BS; + +# becomes + +# $jamoMedial {bs} <> $BS; + +# Furthermore, we don't care about the ordering for Jamo-Latin because +# we are going from single characters, so we can very easily piggyback +# on the Latin-Jamo. + +# The main issue with Jamo-Latin is when to insert separators. +# Separators are inserted to obtain correct round trip behavior. For +# example, the sequence Ki A Gf Gi E, if transliterated to "kagge", +# would then round trip to Ki A GGi E. To prevent this, we insert a +# separator: "kag-ge". IMPORTANT: The need for separators depends +# very specifically on the behavior of the Latin-Jamo rules. A change +# in the Latin-Jamo behavior can completely change the way the +# separator insertion must be done. + +# First try to preserve actual separators in the jamo text by doubling +# them. This fixes problems like: +# (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol +# => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional +# -- if we don't care about losing separators in the jamo, we can delete +# this rule. + + $sep $sep <> $sep; + +# Triple consonants. For three consonants "axxx" we insert a +# separator between the first and second "x" if XXf, Xf, and Xi all +# exist, and we have A Xf XXi. This prevents the reverse +# transliteration to A XXf Xi. + + $sep < $latinMedialEnd g {} $GGi; + $sep < $latinMedialEnd s {} $SSi; + +# For vowels the rule is similar. If there is a vowel "ae" such that +# "a" by itself and "e" by itself are vowels, then we want to map A E +# to "a-e" so as not to round trip to AE. However, in the text Ki EO +# IEUNG E we don't need to map to "keo-e". "keoe" suffices. For +# vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be +# tested. NOTE: These rules used to have a left context of +# $latinInitial instead of [^$latinMedial]. The problem with this is +# sequences where an initial IEUNG is transliterated away: +# (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O) + + $sep < [^$latinMedial] [y w] e {} [$O $OE]; + $sep < [^$latinMedial] e {} [$O $OE $U]; + $sep < [^$latinMedial] [o a] {} [$E $EO $EU]; + $sep < [^$latinMedial] [w y] a {} [$E $EO $EU]; + +# Similar to the above, but with an intervening $IEUNG. + + $sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE]; + $sep < [^$latinMedial] e {} $IEUNG [$O $OE $U]; + $sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU]; + $sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU]; + +# Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E, +# where Xi also exists, must be transliterated as "ax-e" to prevent +# the round trip conversion to A Xi E. + + $sep < $latinMedialEnd b {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd c {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd d {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd g {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd h {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd j {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd k {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd m {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd n {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd p {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd s {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd t {} $IEUNG $jamoMedial; + +# Double finals followed by IEUNG. Similar to the single finals +# followed by IEUNG. Any latin consonant pair X Y, between medials, +# that we would split by Latin-Jamo, we must handle when it occurs as +# part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi +# E. + + $sep < $latinMedialEnd b s {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd g g {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd g s {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd l b {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd l g {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd l h {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd l m {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd l p {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd l s {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd l t {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd n g {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd n h {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd n j {} $IEUNG $jamoMedial; + $sep < $latinMedialEnd s s {} $IEUNG $jamoMedial; + +# Split doubles. Text of the form A Xi Xf E, where XXi also occurs, +# we transliterate as "ax-xe" to prevent round trip transliteration as +# A XXi E. + + $sep < $latinMedialEnd b {} $Bi $jamoMedial; + $sep < $latinMedialEnd d {} $Di $jamoMedial; + $sep < $latinMedialEnd j {} $Ji $jamoMedial; + $sep < $latinMedialEnd g {} $Gi $jamoMedial; + $sep < $latinMedialEnd s {} $Si $jamoMedial; + +# XYY. This corresponds to the XYY rule in Latin-Jamo. By default +# Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result, +# "xyy" forms that correspond to XYf Yi must be transliterated as +# "xy-y". + + $sep < $latinMedialEnd b s {} [$Si $SSi]; + $sep < $latinMedialEnd g s {} [$Si $SSi]; + $sep < $latinMedialEnd l b {} [$Bi $BB]; + $sep < $latinMedialEnd l g {} [$Gi $GGi]; + $sep < $latinMedialEnd l s {} [$Si $SSi]; + $sep < $latinMedialEnd n g {} [$Gi $GGi]; + $sep < $latinMedialEnd n j {} [$Ji $JJ]; + +# Deletion of IEUNG is handled below. + +#---------------------------------------------------------------------- +# Latin-Jamo + +# [Basic, context-free Jamo-Latin rules are embedded here too. See +# above.] + +# Split digraphs: Text of the form 'axye', where 'xy' is a final +# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and +# 'e' are medials, we want to transliterate this as A Xf Yi E rather +# than A XYf IEUNG E. We do NOT include text of the form "axxe", +# since that is handled differently below. These rules are generated +# programmatically from the jamo data. + + $jamoMedial {b s} $latinMedial > $Bf $Si; + $jamoMedial {g s} $latinMedial > $Gf $Si; + $jamoMedial {l b} $latinMedial > $L $Bi; + $jamoMedial {l g} $latinMedial > $L $Gi; + $jamoMedial {l h} $latinMedial > $L $Hi; + $jamoMedial {l m} $latinMedial > $L $Mi; + $jamoMedial {l p} $latinMedial > $L $Pi; + $jamoMedial {l s} $latinMedial > $L $Si; + $jamoMedial {l t} $latinMedial > $L $Ti; + $jamoMedial {n g} $latinMedial > $Nf $Gi; + $jamoMedial {n h} $latinMedial > $Nf $Hi; + $jamoMedial {n j} $latinMedial > $Nf $Ji; + +# Single consonants are initials: Text of the form 'axe', where 'x' +# can be an initial or a final, and 'a' and 'e' are medials, we want +# to transliterate as A Xi E rather than A Xf IEUNG E. + + $jamoMedial {b} $latinMedial > $Bi; + $jamoMedial {c} $latinMedial > $Ci; + $jamoMedial {d} $latinMedial > $Di; + $jamoMedial {g} $latinMedial > $Gi; + $jamoMedial {h} $latinMedial > $Hi; + $jamoMedial {j} $latinMedial > $Ji; + $jamoMedial {k} $latinMedial > $Ki; + $jamoMedial {m} $latinMedial > $Mi; + $jamoMedial {n} $latinMedial > $Ni; + $jamoMedial {p} $latinMedial > $Pi; + $jamoMedial {s} $latinMedial > $Si; + $jamoMedial {t} $latinMedial > $Ti; + +# Doubled initials. The sequence "axxe", where XX exists as an initial +# (XXi), and also Xi and Xf exist (true of all digraphs XX), we want +# to transliterate as A XXi E, rather than split to A Xf Xi E. + + $jamoMedial {b b} $latinMedial > $BB; + $jamoMedial {d d} $latinMedial > $DD; + $jamoMedial {j j} $latinMedial > $JJ; + $jamoMedial {g g} $latinMedial > $GGi; + $jamoMedial {s s} $latinMedial > $SSi; + +# XYY. Because doubled consonants bind more strongly than XY +# consonants, we must handle the sequence "axyy" specially. Here XYf +# and YYi must exist. In these cases, we map to Xf YYi rather than +# XYf. + + $jamoMedial {b} s s > $Bf; + $jamoMedial {g} s s > $Gf; + $jamoMedial {l} b b > $L; + $jamoMedial {l} g g > $L; + $jamoMedial {l} s s > $L; + $jamoMedial {n} g g > $Nf; + $jamoMedial {n} j j > $Nf; + +# Finals: Attach consonant with preceding medial to preceding medial. +# Do this BEFORE mapping consonants to initials. Longer keys must +# precede shorter keys that they start with, e.g., the rule for 'bs' +# must precede 'b'. + +# [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this +# block for Jamo-Latin.] + + $jamoMedial {bs} <> $BS; + $jamoMedial {b} <> $Bf; + $jamoMedial {c} <> $Cf; + $jamoMedial {d} <> $Df; + $jamoMedial {gg} <> $GGf; + $jamoMedial {gs} <> $GS; + $jamoMedial {g} <> $Gf; + $jamoMedial {h} <> $Hf; + $jamoMedial {j} <> $Jf; + $jamoMedial {k} <> $Kf; + $jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG; + $jamoMedial {lh} <> $LH; + $jamoMedial {lm} <> $LM; + $jamoMedial {lp} <> $LP; + $jamoMedial {ls} <> $LS; + $jamoMedial {lt} <> $LT; + $jamoMedial {l} <> $L; + $jamoMedial {m} <> $Mf; + $jamoMedial {ng} <> $NG; + $jamoMedial {nh} <> $NH; + $jamoMedial {nj} <> $NJ; + $jamoMedial {n} <> $Nf; + $jamoMedial {p} <> $Pf; + $jamoMedial {ss} <> $SSf; + $jamoMedial {s} <> $Sf; + $jamoMedial {t} <> $Tf; + +# Initials: Attach single consonant to following medial. Do this +# AFTER mapping finals. Longer keys must precede shorter keys that +# they start with, e.g., the rule for 'gg' must precede 'g'. + +# [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within +# this block for Jamo-Latin.] + + {gg} $latinMedial <> $GGi; + {g} $latinMedial <> $Gi; + {n} $latinMedial <> $Ni; + {dd} $latinMedial <> $DD; + {d} $latinMedial <> $Di; + {r} $latinMedial <> $R; + {m} $latinMedial <> $Mi; + {bb} $latinMedial <> $BB; + {b} $latinMedial <> $Bi; + {ss} $latinMedial <> $SSi; + {s} $latinMedial <> $Si; + {jj} $latinMedial <> $JJ; + {j} $latinMedial <> $Ji; + {c} $latinMedial <> $Ci; + {k} $latinMedial <> $Ki; + {t} $latinMedial <> $Ti; + {p} $latinMedial <> $Pi; + {h} $latinMedial <> $Hi; + +# 'r' in final position. Because of the equivalency of the 'l' and +# 'r' jamo (the glyphs are the same), we try to provide the same +# equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled +# below. If we see an 'r' in an apparent final position, treat it +# like 'l'. For example, "karka" => Ki A R EU Ki A without this rule. +# Instead, we want Ki A L Ki A. + + $jamoMedial {r} $latinInitial > | l; + +# Initial + Final: If we match the next rule, we have initial then +# final consonant with no intervening medial. We insert the null +# vowel BEFORE it to create a well-formed syllable. (In the next rule +# we insert a null vowel AFTER an anomalous initial.) + + $jamoInitial {} [bcdghjklmnpst] > $EU; + +# Initial + X: This block matches an initial consonant not followed by +# a medial. We insert the null vowel after it. We handle double +# initials explicitly here; for single initial consonants we insert EU +# (as Latin) after them and let standard rules do the rest. + +# BREAKS ROUND TRIP INTEGRITY + + gg > $GGi $EU; + dd > $DD $EU; + bb > $BB $EU; + ss > $SSi $EU; + jj > $JJ $EU; + + ([bcdghjkmnprst]) > | $1 eu; + +# X + Final: Finally we have to deal with a consonant that can only be +# interpreted as a final (not an initial) and which is preceded +# neither by an initial nor a medial. It is the start of the +# syllable, but cannot be. Most of these will already be handled by +# the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng' +# 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'. +# For this isolated case, we could add a null initial and medial, +# which would give "la" => IEUNG EU L IEUNG A, for example. A more +# economical solution is to transliterate isolated "l" (that is, +# initial "l") to "r". (Other similar conversions of consonants that +# occur neither as initials nor as finals are handled below.) + + l > | r; + +# Medials. If a medial is preceded by an initial, then we proceed +# normally. As usual, longer keys must precede shorter ones. + +# [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within +# this block for Jamo-Latin.] + + $jamoInitial {ae} <> $AE; + $jamoInitial {a} <> $A; + $jamoInitial {eo} <> $EO; + $jamoInitial {eu} <> $EU; + $jamoInitial {e} <> $E; + $jamoInitial {i} <> $I; + $jamoInitial {oe} <> $OE; + $jamoInitial {o} <> $O; + $jamoInitial {u} <> $U; + $jamoInitial {wae} <> $WAE; + $jamoInitial {wa} <> $WA; + $jamoInitial {weo} <> $WEO; + $jamoInitial {we} <> $WE; + $jamoInitial {wi} <> $WI; + $jamoInitial {yae} <> $YAE; + $jamoInitial {ya} <> $YA; + $jamoInitial {yeo} <> $YEO; + $jamoInitial {ye} <> $YE; + $jamoInitial {yi} <> $YI; + $jamoInitial {yo} <> $YO; + $jamoInitial {yu} <> $YU; + +# We may see an anomalous isolated 'w' or 'y'. In that case, we +# interpret it as 'wi' and 'yu', respectively. + +# BREAKS ROUND TRIP INTEGRITY + + $jamoInitial {w} > | wi; + $jamoInitial {y} > | yu; + +# Otherwise, insert a null consonant IEUNG before the medial (which is +# still an untransliterated latin vowel). + + ($latinMedial) > $IEUNG | $1; + +# Convert non-jamo latin consonants to equivalents. These occur as +# neither initials nor finals in jamo. 'l' occurs as a final, but not +# an initial; it is handled above. The following letters (left hand +# side) will never be output by Jamo-Latin. + + f > | p; + q > | k; + v > | b; + x > | ks; + z > | s; + +# Delete separators (Latin-Jamo). + + $sep > ; + +# Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels, +# since these may also occur in text. + + < $IEUNG; + +#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in +#- the INDEX file. This transliterator is, by itself, not +#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or +#- inverses thereof. + +# eof diff --git a/icu4c/source/data/translit/Latin_Katakana.txt b/icu4c/source/data/translit/Latin_Katakana.txt new file mode 100644 index 00000000000..5b7fc17605c --- /dev/null +++ b/icu4c/source/data/translit/Latin_Katakana.txt @@ -0,0 +1,495 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# note: a global filter is more efficient, but MUST include all source chars +#:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ; +# MINIMAL FILTER GENERATED FOR: Latin-Katakana +### WARNING -- must add width filter, both here and below!!! ### +:: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ; + +:: [:Latin:] fullwidth-halfwidth (); +:: NFD (NFC); +:: Lower (); # whenever transliterating from cased to uncased script, include this +# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese + +# Uses modified Hepburn. Small changes to make unambiguous. + +# | Kunrei-shiki: Hepburn/MHepburn +# | ------------------------------ +# | si: shi +# | si ~ya: sha +# | si ~yu: shu +# | si ~yo: sho +# | zi: ji +# | zi ~ya: ja +# | zi ~yu: ju +# | zi ~yo: jo +# | ti: chi +# | ti ~ya: cha +# | ti ~yu: chu +# | ti ~yu: cho +# | tu: tsu +# | di: ji/dji +# | du: zu/dzu +# | hu: fu + +# | For foreign words: +# | ----------------- +# | se ~i si +# | si ~e she +# | +# | ze ~i zi +# | zi ~e je +# | +# | te ~i ti +# | ti ~e che +# | te ~u tu +# | +# | de ~i di +# | de ~u du +# | de ~i di +# | +# | he ~u: hu +# | hu ~a fa +# | hu ~i fi +# | hu ~e he +# | hu ~o ho + +# Most small forms are generated, but if necessary +# explicit small forms are given with ~a, ~ya, etc. + +#------------------------------------------------------ +# Variables + +$vowel = [aeiou] ; +$consonant = [bcdfghjklmnpqrstvwxyz] ; +$macron = \u0304 ; + +# Variables used for doubled-consonants with tsu + +$kana = [\u3041-\u3094] ; + +$voice = [\u3099\u309B]; +$semivoice = [\u309A\u309C]; + +$k_start = [カキクケコかきくけこ] ; + +$s_start = [サシスセソさしすせそ] ; + +$j_start = [シし] $voice ; + +$t_start = [タチツテトたちつてと] ; + +$n_start = [ナニヌネノンなにぬねの] ; + +$h_start = [ハヒヘホはひへほ] ; +$f_start = [フふ] ; + +$m_start = [マミムメモまみむめも] ; + +$y_start = [ヤユヨやゆよ] ; + +$r_start = [ラリルレロらりるれろ] ; + +$w_start = [ワヰヱヲわゐゑを] ; + +$v_start = [ワヰヱヲ]゙ ; + +# if ン is followed by $n_quoter, then it needs an +# apostrophe after its romaji form to disambiguate it. +# e.g., ン ア ! = ナ, so represent as "n'a", not "na". + +$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ; + +$small_y = [ャィュェョ] ; + +$iteration = \u309D ; + +#------------------------------------------------------ +# katakana rules + +# Punctuation + +'.' <> 。; +',' <> 、; +# ' ' } [a-z] > ; # delete spaces before latin +# ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana + +# Iteration Mark +# Copy previous letter & marks + +# TODO +# | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration + +# Specials for katakana -- not shared with hiragana + +va <> ヷ ; +vi <> ヸ ; +ve <> ヹ ; +vo <> ヺ ; +'~ka' <> ヵ ; +'~ke' <> ヶ ; + +# ~~~ begin shared rules ~~~ + +#special + +ya < '~'ャ; +yi < '~'ィ ; +yu < '~'ュ; +ye < '~'ェ; +yo < '~'ョ; + +#normal + +a <> ア ; + +b | '~' < ヒ ゙} $small_y ; +by } $vowel > ビ | '~y' ; + +ba <> バ ; +bi <> ビ ; +bu <> ブ ; +be <> ベ ; +bo <> ボ ; + +c } i > | s ; +c } e > | s ; + +da <> ダ ; +di <> ディ ; +du <> デゥ ; +de <> デ ; +do <> ド ; +dzu <> ヅ ; +dja < ヂャ ; +dji'~i' < ヂィ ; # liu +dju < ヂュ ; +dje < ヂェ ; +djo < ヂョ ; +dji <> ヂ ; +dj } $vowel > ヂ | '~y' ; + +# TODO: QUESTION: use ĵĴżŻ instead of dj, dz + +cha < チャ ; +chi'~i' < チィ ; # liu +chu < チュ ; +che < チェ ; +cho < チョ ; +chi <> チ ; +ch } $vowel > チ | '~y' ; + +e <> エ ; + +g | '~' < ギ} $small_y ; +gy } $vowel > ギ | '~y' ; + +ga <> ガ ; +gi <> ギ ; +gu <> グ ; +ge <> ゲ ; +go <> ゴ ; + +i <> イ ; + +# j } $vowel > ジ | '~y' ; + +ja <> ジャ ; +ji'~i' < ジィ ; # liu +ju <> ジュ ; +je <> ジェ ; +jo <> ジョ ; +ji <> ジ ; + +k | '~' < キ} $small_y ; +ky } $vowel > キ | '~y' ; + +ka <> カ ; +ki <> キ ; +ku <> ク ; +ke <> ケ ; +ko <> コ ; + +m | '~' < ミ} $small_y ; +my } $vowel > ミ | '~y' ; + +ma <> マ ; +mi <> ミ ; +mu <> ム ; +me <> メ ; +mo <> モ ; + +m } [pbfv] > ン ; + +n | '~' < ニ } $small_y ; +ny } $vowel > ニ | '~y' ; + +na <> ナ ; +ni <> ニ ; +nu <> ヌ ; +ne <> ネ ; +no <> ノ ; + +o <> オ ; + +p | '~' < ピ } $small_y ; +py } $vowel > ピ | '~y' ; + +pa <> パ ; +pi <> ピ ; +pu <> プ ; +pe <> ペ ; +po <> ポ ; + +h | '~' < ヒ } $small_y ; +hy } $vowel > ヒ | '~y' ; + +ha <> ハ ; +hi <> ヒ ; +hu <> ヘゥ ; +he <> ヘ ; +ho <> ホ ; + +# f | '~' < フ } $small_y ; +# f } $vowel > フ | '~' ; + +fa <> ファ ; +fi <> フィ ; +fe <> フェ ; +fo <> フォ ; +fu <> フ ; + +r | '~' < リ } $small_y ; +ry } $vowel > リ | '~y' ; + +ra <> ラ ; +ri <> リ ; +ru <> ル ; +re <> レ ; +ro <> ロ ; + +za <> ザ ; +zi <> ゼィ ; +zu <> ズ ; +ze <> ゼ ; +zo <> ゾ ; + +sa <> サ ; +si <> セィ ; +su <> ス ; +se <> セ ; +so <> ソ ; + +sha < シャ ; +shi'~i' < シィ ; # liu +shu < シュ ; +she < シェ ; +sho < ショ ; +shi <> シ ; +sh } $vowel > シ | '~y' ; + +ta <> タ ; +ti <> ティ ; +tu <> テゥ ; +te <> テ ; +to <> ト ; + +tsu <> ツ ; + +# v } $vowel > ヴ | '~' ; + +#'v~a' < ヴァ ; # liu +#'v~i' < ヴィ ; # liu +#'v~e' < ヴェ ; # liu +#'v~o' < ヴォ ; # liu +vu <> ヴ ; + +u <> ウ ; + +# w } $vowel > ウ | '~' ; + +wa <> ワ ; +wi <> ヰ ; +wu > ウ ; +we <> ヱ ; +wo <> ヲ ; + +ya <> ヤ ; +yi > イ ; +yu <> ユ ; +ye > エ ; +yo <> ヨ ; + +# double consonants + +#specials +s } sh > ッ ; +t } ch > ッ ; + +#voiced + +j } j <> ッ } $j_start ; +b } b <> ッ } [$h_start$f_start] $voice; +d } d <> ッ } $t_start $voice; +g } g <> ッ } $k_start $voice; +p } p <> ッ } [$h_start$f_start] $semivoice; +# v } v <> ッ } [ワヰウヱヲう] $voice ; +z } z <> ッ } $s_start $voice; +v } v <> ッ } $v_start; + +# normal + +k } k <> ッ } $k_start ; +m } m <> ッ } $m_start ; +n } n <> ッ } $n_start ; +h } h <> ッ } $h_start ; +f } f <> ッ } $f_start ; +r } r <> ッ } $r_start ; +t } t <> ッ } $t_start ; +s } s <> ッ } $s_start ; + +w } w <> ッ } $w_start; +y } y <> ッ } $y_start; + +# completeness +x } x > ッ ; +c } k > ッ ; +c } c > ッ ; +c } q > ッ ; +l } l > ッ ; +q } q > ッ ; +# y } y > ッ ; +# w } w > ッ ; + +# prolonged vowel mark. this indicates a doubling of +# the preceding vowel sound + +#a < a { ー ; # liu +#e < e { ー ; # liu +#i < i { ー ; # liu +#o < o { ー ; # liu +#u < u { ー ; # liu + +$macron <> ー ; + +# small forms + +'~a' <> ァ ; +'~i' <> ィ ; +'~u' <> ゥ ; +'~e' <> ェ ; +'~o' <> ォ ; +'~tsu' <> ッ ; +'~wa' <> ヮ ; +'~ya' <> ャ ; +'~yi' > ィ ; +'~yu' <> ュ ; +'~ye' > ェ ; +'~yo' <> ョ ; + +# iteration marks +# TODO: make more accurate + +j $1 < sh (y* $vowel) {ヽ$voice ; +dj $1 < ch (y* $vowel) {ヽ$voice ; +dz $1 < ts (y* $vowel) {ヽ$voice ; + +g $1 < k (y* $vowel) {ヽ$voice ; +z $1 < s (y* $vowel) {ヽ$voice ; +d $1 < t (y* $vowel) {ヽ$voice ; +h $1 < b (y* $vowel) {ヽ$voice ; +v $1 < w (y* $vowel) {ヽ$voice ; + +sh $1 < sh (y* $vowel) {ヽ$voice ; +j $1 < j (y* $vowel) {ヽ$voice ; +ch $1 < ch (y* $vowel) {ヽ$voice ; +dj $1 < dj(y* $vowel) {ヽ$voice ; +ts $1 < ts (y* $vowel) {ヽ$voice ; +dz $1 < dz (y* $vowel) {ヽ$voice ; + +$1 < ($consonant y* $vowel) {ヽ$voice? ; +$1 < (.) {ヽ $voice? ; # otherwise repeat last character + < ヽ $voice? ; # delete if no characters found + +# h- rule: lengthens vowel if not followed by a vowel + +[aeiou] } h > ー ; + +# one-way latin- > kana rules. these do not occur in +# well-formed romaji representing actual japanese text. +# their purpose is to make all romaji map to kana of +# some sort. + +# the following are not really necessary, but produce +# slightly more natural results. + +cy > セィ ; +dy > ディ ; +hy > ヒ ; +sy > セィ ; +ty > ティ ; +zy > ゼィ ; + +h > ヘ ; + +# isolated consonants listed here so as not to mask +# longer rules above. + +ch > チ; +sh > シ ; +dz > ヅ ; +dj > ヂ; + +b > ブ ; +d > デ ; +g > グ ; +k > ク ; +m > ム ; +n'' < ン } $n_quoter ; +n <> ン ; +p > プ ; +r > ル ; +s > ス ; +t > テ ; +y > イ ; +z > ズ ; +v > ヴ ; + +f > フ; +j > ジ; +w > ウ; + +ß > | ss ; +æ > | e ; +ð > | d ; +ø > | u ; +þ > | th ; + +# simple substitutions using backup + +c > | k ; +l > | r ; +q > | k ; +x > | ks ; + +# ~~~ END shared rules ~~~ + +#------------------------------------------------------ +# Final cleanup + +'~' > ; # delete stray tildes between letters +[:Katakana:] { '' } [:Latin:] > ; # delete stray quotes between letters +# [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use + +:: NFC (NFD) ; +:: ([:Katakana:] halfwidth-fullwidth); + +# note: a global filter is more efficient, but MUST include all source chars!! +#:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]); +# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD +:: ( [[\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ; + +# eof diff --git a/icu4c/source/data/translit/Latin_NumericPinyin.txt b/icu4c/source/data/translit/Latin_NumericPinyin.txt new file mode 100644 index 00000000000..c1bfda8582b --- /dev/null +++ b/icu4c/source/data/translit/Latin_NumericPinyin.txt @@ -0,0 +1,41 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# According to the pinyin definitions I've been able to find: +# 'a', 'e' are the preferred bases +# otherwise 'o' +# otherwise last vowel + +# The trailing form of syllables are the following: +# "a", "ai", "ao", "an", "ang", +# "o", "ou", "ong", +# "e", "ei", "er", "en", "eng", +# "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong", +# "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng", +# "ü", "üe", "üan", "ün" +# so the letters the tone will 'hop' are: + +::NFD (NFC); +$tone = [\u0304\u0301\u030C\u0300\u0306] ; + +# Move the tone to the end of a syllable, and convert to number +e {($tone) r} > r &tone-digit($1); +($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1); +($tone) > &tone-digit($1); + +# The following backs up until it finds the right vowel, then deposits the tone + +$vowel = [aAeEiIoOuUüÜ]; +$consonant = [[a-z A-Z] - [$vowel]]; +$digit = [1-5]; +$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit); +$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit); +$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit); +&digit-tone($1) < [:letter:] {($digit)}; + +::NFC (NFD); + + + diff --git a/icu4c/source/data/translit/Malayalam_InterIndic.txt b/icu4c/source/data/translit/Malayalam_InterIndic.txt new file mode 100644 index 00000000000..3fd42e086c7 --- /dev/null +++ b/icu4c/source/data/translit/Malayalam_InterIndic.txt @@ -0,0 +1,85 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Malayalam-InterIndic +#:: NFD (NFC) ; + +\u0D02>\uE002; # SIGN ANUSVARA +\u0D03>\uE003; # SIGN VISARGA +\u0D05>\uE005; # LETTER A +\u0D06>\uE006; # LETTER AA +\u0D07>\uE007; # LETTER I +\u0D08>\uE008; # LETTER II +\u0D09>\uE009; # LETTER U +\u0D0A>\uE00A; # LETTER UU +\u0D0B>\uE00B; # LETTER VOCALIC R +\u0D0C>\uE00C; # LETTER VOCALIC L +\u0D0E>\uE00E; # LETTER E +\u0D0F>\uE00F; # LETTER EE +\u0D10>\uE010; # LETTER AI +\u0D12>\uE012; # LETTER O +\u0D13>\uE013; # LETTER OO +\u0D14>\uE014; # LETTER AU +\u0D15>\uE015; # LETTER KA +\u0D16>\uE016; # LETTER KHA +\u0D17>\uE017; # LETTER GA +\u0D18>\uE018; # LETTER GHA +\u0D19>\uE019; # LETTER NGA +\u0D1A>\uE01A; # LETTER CA +\u0D1B>\uE01B; # LETTER CHA +\u0D1C>\uE01C; # LETTER JA +\u0D1D>\uE01D; # LETTER JHA +\u0D1E>\uE01E; # LETTER NYA +\u0D1F>\uE01F; # LETTER TTA +\u0D20>\uE020; # LETTER TTHA +\u0D21>\uE021; # LETTER DDA +\u0D22>\uE022; # LETTER DDHA +\u0D23>\uE023; # LETTER NNA +\u0D24>\uE024; # LETTER TA +\u0D25>\uE025; # LETTER THA +\u0D26>\uE026; # LETTER DA +\u0D27>\uE027; # LETTER DHA +\u0D28>\uE028; # LETTER NA +\u0D2A>\uE02A; # LETTER PA +\u0D2B>\uE02B; # LETTER PHA +\u0D2C>\uE02C; # LETTER BA +\u0D2D>\uE02D; # LETTER BHA +\u0D2E>\uE02E; # LETTER MA +\u0D2F>\uE02F; # LETTER YA +\u0D30>\uE030; # LETTER RA +\u0D31>\uE031; # LETTER RRA +\u0D32>\uE032; # LETTER LA +\u0D33>\uE033; # LETTER LLA +\u0D34>\uE034; # LETTER LLLA +\u0D35>\uE035; # LETTER VA +\u0D36>\uE036; # LETTER SHA +\u0D37>\uE037; # LETTER SSA +\u0D38>\uE038; # LETTER SA +\u0D39>\uE039; # LETTER HA +\u0D3E>\uE03E; # VOWEL SIGN AA +\u0D3F>\uE03F; # VOWEL SIGN I +\u0D40>\uE040; # VOWEL SIGN II +\u0D41>\uE041; # VOWEL SIGN U +\u0D42>\uE042; # VOWEL SIGN UU +\u0D43>\uE043; # VOWEL SIGN VOCALIC R +\u0D46>\uE046; # VOWEL SIGN E +\u0D47>\uE047; # VOWEL SIGN EE +\u0D48>\uE048; # VOWEL SIGN AI +\u0D4D>\uE04D; # SIGN VIRAMA +\u0D57>\uE057; # AU LENGTH MARK +\u0D60>\uE060; # LETTER VOCALIC RR +\u0D61>\uE061; # LETTER VOCALIC LL +\u0D66>\uE066; # DIGIT ZERO +\u0D67>\uE067; # DIGIT ONE +\u0D68>\uE068; # DIGIT TWO +\u0D69>\uE069; # DIGIT THREE +\u0D6A>\uE06A; # DIGIT FOUR +\u0D6B>\uE06B; # DIGIT FIVE +\u0D6C>\uE06C; # DIGIT SIX +\u0D6D>\uE06D; # DIGIT SEVEN +\u0D6E>\uE06E; # DIGIT EIGHT +\u0D6F>\uE06F; # DIGIT NINE +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/Oriya_InterIndic.txt b/icu4c/source/data/translit/Oriya_InterIndic.txt new file mode 100644 index 00000000000..937d919cdf7 --- /dev/null +++ b/icu4c/source/data/translit/Oriya_InterIndic.txt @@ -0,0 +1,95 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Oriya-InterIndic +#:: NFD (NFC) ; +#\u0B21\u0B3C>\uE05C;# LETTER RRA +#\u0B22\u0B3C>\uE05D;# LETTER RHA +\u0B47\u0B56>\uE048;# VOWEL SIGN AI +\u0B47\u0B3E>\uE04B;# VOWEL SIGN O +\u0B47\u0B57>\uE04C;# VOWEL SIGN AU + +\u0B01>\uE001; # SIGN CANDRABINDU +\u0B02>\uE002; # SIGN ANUSVARA +\u0B03>\uE003; # SIGN VISARGA +\u0B05>\uE005; # LETTER A +\u0B06>\uE006; # LETTER AA +\u0B07>\uE007; # LETTER I +\u0B08>\uE008; # LETTER II +\u0B09>\uE009; # LETTER U +\u0B0A>\uE00A; # LETTER UU +\u0B0B>\uE00B; # LETTER VOCALIC R +\u0B0C>\uE00C; # LETTER VOCALIC L +\u0B0F>\uE00F; # LETTER E +\u0B10>\uE010; # LETTER AI +\u0B13>\uE013; # LETTER O +\u0B14>\uE014; # LETTER AU +\u0B15>\uE015; # LETTER KA +\u0B16>\uE016; # LETTER KHA +\u0B17>\uE017; # LETTER GA +\u0B18>\uE018; # LETTER GHA +\u0B19>\uE019; # LETTER NGA +\u0B1A>\uE01A; # LETTER CA +\u0B1B>\uE01B; # LETTER CHA +\u0B1C>\uE01C; # LETTER JA +\u0B1D>\uE01D; # LETTER JHA +\u0B1E>\uE01E; # LETTER NYA +\u0B1F>\uE01F; # LETTER TTA +\u0B20>\uE020; # LETTER TTHA +\u0B21>\uE021; # LETTER DDA +\u0B22>\uE022; # LETTER DDHA +\u0B23>\uE023; # LETTER NNA +\u0B24>\uE024; # LETTER TA +\u0B25>\uE025; # LETTER THA +\u0B26>\uE026; # LETTER DA +\u0B27>\uE027; # LETTER DHA +\u0B28>\uE028; # LETTER NA +\u0B2A>\uE02A; # LETTER PA +\u0B2B>\uE02B; # LETTER PHA +\u0B2C>\uE02C; # LETTER BA +\u0B2D>\uE02D; # LETTER BHA +\u0B2E>\uE02E; # LETTER MA +\u0B2F>\uE02F; # LETTER YA +\u0B30>\uE030; # LETTER RA +\u0B32>\uE032; # LETTER LA +\u0B33>\uE033; # LETTER LLA +\u0B35>\uE035; # LETTER VA +\u0B36>\uE036; # LETTER SHA +\u0B37>\uE037; # LETTER SSA +\u0B38>\uE038; # LETTER SA +\u0B39>\uE039; # LETTER HA +\u0B3C>\uE03C; # SIGN NUKTA +\u0B3D>\uE03D; # SIGN AVAGRAHA +\u0B3E>\uE03E; # VOWEL SIGN AA +\u0B3F>\uE03F; # VOWEL SIGN I +\u0B40>\uE040; # VOWEL SIGN II +\u0B41>\uE041; # VOWEL SIGN U +\u0B42>\uE042; # VOWEL SIGN UU +\u0B43>\uE043; # VOWEL SIGN VOCALIC R +\u0B47>\uE047; # VOWEL SIGN E +# +\u0B4D>\uE04D; # SIGN VIRAMA +\u0B56>\uE056; # AI LENGTH MARK +\u0B57>\uE057; # AU LENGTH MARK +\u0964>\ue064; # DANDA +\u0965>\ue065; # DOUBLE DANDA +# +\u0B5F>\uE05F; # LETTER YYA +\u0B60>\uE060; # LETTER VOCALIC RR +\u0B61>\uE061; # LETTER VOCALIC LL +\u0B66>\uE066; # DIGIT ZERO +\u0B67>\uE067; # DIGIT ONE +\u0B68>\uE068; # DIGIT TWO +\u0B69>\uE069; # DIGIT THREE +\u0B6A>\uE06A; # DIGIT FOUR +\u0B6B>\uE06B; # DIGIT FIVE +\u0B6C>\uE06C; # DIGIT SIX +\u0B6D>\uE06D; # DIGIT SEVEN +\u0B6E>\uE06E; # DIGIT EIGHT +\u0B6F>\uE06F; # DIGIT NINE +\u0B70>\ue07B; # ISSHAR +\u0B71>\ue081; # LETTER WA +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/Tamil_InterIndic.txt b/icu4c/source/data/translit/Tamil_InterIndic.txt new file mode 100644 index 00000000000..1fb92631eb1 --- /dev/null +++ b/icu4c/source/data/translit/Tamil_InterIndic.txt @@ -0,0 +1,76 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Tamil-InterIndic +#:: NFD (NFC) ; + +\u0BC6\u0BBE>\uE04A;# VOWEL SIGN O +\u0BC7\u0BBE>\uE04B;# VOWEL SIGN OO +\u0BC6\u0BD7>\uE04C;# VOWEL SIGN AU +\u0B92\u0BD7>\uE014;# LETTER AU + +\u0B82>\uE002; # SIGN ANUSVARA +\u0B83>\uE003; # SIGN VISARGA +\u0B85>\uE005; # LETTER A +\u0B86>\uE006; # LETTER AA +\u0B87>\uE007; # LETTER I +\u0B88>\uE008; # LETTER II +\u0B89>\uE009; # LETTER U +\u0B8A>\uE00A; # LETTER UU +\u0B8E>\uE00E; # LETTER E +\u0B8F>\uE00F; # LETTER EE +\u0B90>\uE010; # LETTER AI +\u0B92>\uE012; # LETTER O +\u0B93>\uE013; # LETTER OO +\u0B94>\uE014; # LETTER AU +\u0B95>\uE015; # LETTER KA +\u0B99>\uE019; # LETTER NGA +\u0B9A>\uE01A; # LETTER CA +\u0B9C>\uE01C; # LETTER JA +\u0B9E>\uE01E; # LETTER NYA +\u0B9F>\uE01F; # LETTER TTA +\u0BA3>\uE023; # LETTER NNA +\u0BA4>\uE024; # LETTER TA +\u0BA8>\uE028; # LETTER NA +\u0BA9>\uE029; # LETTER NNNA +\u0BAA>\uE02A; # LETTER PA +\u0BAE>\uE02E; # LETTER MA +\u0BAF>\uE02F; # LETTER YA +\u0BB0>\uE030; # LETTER RA +\u0BB1>\uE031; # LETTER RRA +\u0BB2>\uE032; # LETTER LA +\u0BB3>\uE033; # LETTER LLA +\u0BB4>\uE034; # LETTER LLLA +\u0BB5>\uE035; # LETTER VA +\u0BB7>\uE037; # LETTER SSA +\u0BB8>\uE038; # LETTER SA +\u0BB9>\uE039; # LETTER HA +\u0BBE>\uE03E; # VOWEL SIGN AA +\u0BBF>\uE03F; # VOWEL SIGN I +\u0BC0>\uE040; # VOWEL SIGN II +\u0BC1>\uE041; # VOWEL SIGN U +\u0BC2>\uE042; # VOWEL SIGN UU +\u0BC6>\uE046; # VOWEL SIGN E +\u0BC7>\uE047; # VOWEL SIGN EE +\u0BC8>\uE048; # VOWEL SIGN AI + +\u0BCD>\uE04D; # SIGN VIRAMA +\u0BD7>\uE057; # AU LENGTH MARK +\u0BE7>\uE067; # DIGIT ONE +\u0BE8>\uE068; # DIGIT TWO +\u0BE9>\uE069; # DIGIT THREE +\u0BEA>\uE06A; # DIGIT FOUR +\u0BEB>\uE06B; # DIGIT FIVE +\u0BEC>\uE06C; # DIGIT SIX +\u0BED>\uE06D; # DIGIT SEVEN +\u0BEE>\uE06E; # DIGIT EIGHT +\u0BEF>\uE06F; # DIGIT NINE +\u0BF0>\uE067\uE066; # UNMAPPED Tamil-InterIndic: NUMBER TEN +\u0BF1>\uE067\uE066\uE066; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED +\u0BF2>\uE067\uE066\uE066\uE066;# UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND +0>\ue066; + +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/Telugu_InterIndic.txt b/icu4c/source/data/translit/Telugu_InterIndic.txt new file mode 100644 index 00000000000..b8ce53064d5 --- /dev/null +++ b/icu4c/source/data/translit/Telugu_InterIndic.txt @@ -0,0 +1,90 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Telugu-InterIndic +#:: NFD (NFC) ; +\u0c46\u0c4d\u0c56>\ue048\ue04d; +\u0C46\u0C56>\uE048;# VOWEL SIGN AI +\u0C01>\uE001; # SIGN CANDRABINDU +\u0C02>\uE002; # SIGN ANUSVARA +\u0C03>\uE003; # SIGN VISARGA +\u0C05>\uE005; # LETTER A +\u0C06>\uE006; # LETTER AA +\u0C07>\uE007; # LETTER I +\u0C08>\uE008; # LETTER II +\u0C09>\uE009; # LETTER U +\u0C0A>\uE00A; # LETTER UU +\u0C0B>\uE00B; # LETTER VOCALIC R +\u0C0C>\uE00C; # LETTER VOCALIC L +\u0C0E>\uE00E; # LETTER E +\u0C0F>\uE00F; # LETTER EE +\u0C10>\uE010; # LETTER AI +\u0C12>\uE012; # LETTER O +\u0C13>\uE013; # LETTER OO +\u0C14>\uE014; # LETTER AU +\u0C15>\uE015; # LETTER KA +\u0C16>\uE016; # LETTER KHA +\u0C17>\uE017; # LETTER GA +\u0C18>\uE018; # LETTER GHA +\u0C19>\uE019; # LETTER NGA +\u0C1A>\uE01A; # LETTER CA +\u0C1B>\uE01B; # LETTER CHA +\u0C1C>\uE01C; # LETTER JA +\u0C1D>\uE01D; # LETTER JHA +\u0C1E>\uE01E; # LETTER NYA +\u0C1F>\uE01F; # LETTER TTA +\u0C20>\uE020; # LETTER TTHA +\u0C21>\uE021; # LETTER DDA +\u0C22>\uE022; # LETTER DDHA +\u0C23>\uE023; # LETTER NNA +\u0C24>\uE024; # LETTER TA +\u0C25>\uE025; # LETTER THA +\u0C26>\uE026; # LETTER DA +\u0C27>\uE027; # LETTER DHA +\u0C28>\uE028; # LETTER NA +\u0C2A>\uE02A; # LETTER PA +\u0C2B>\uE02B; # LETTER PHA +\u0C2C>\uE02C; # LETTER BA +\u0C2D>\uE02D; # LETTER BHA +\u0C2E>\uE02E; # LETTER MA +\u0C2F>\uE02F; # LETTER YA +\u0C30>\uE030; # LETTER RA +\u0C31>\uE031; # LETTER RRA +\u0C32>\uE032; # LETTER LA +\u0C33>\uE033; # LETTER LLA +\u0C35>\uE035; # LETTER VA +\u0C36>\uE036; # LETTER SHA +\u0C37>\uE037; # LETTER SSA +\u0C38>\uE038; # LETTER SA +\u0C39>\uE039; # LETTER HA +\u0C3E>\uE03E; # VOWEL SIGN AA +\u0C3F>\uE03F; # VOWEL SIGN I +\u0C40>\uE040; # VOWEL SIGN II +\u0C41>\uE041; # VOWEL SIGN U +\u0C42>\uE042; # VOWEL SIGN UU +\u0C43>\uE043; # VOWEL SIGN VOCALIC R +\u0C44>\uE044; # VOWEL SIGN VOCALIC RR +\u0C46>\uE046; # VOWEL SIGN E +\u0C47>\uE047; # VOWEL SIGN EE +\u0C4A>\uE04A; # VOWEL SIGN O +\u0C4B>\uE04B; # VOWEL SIGN OO +\u0C4C>\uE04C; # VOWEL SIGN AU +\u0C4D>\uE04D; # SIGN VIRAMA +\u0C55>\uE055; # LENGTH MARK +\u0C56>\uE056; # AI LENGTH MARK +\u0C60>\uE060; # LETTER VOCALIC RR +\u0C61>\uE061; # LETTER VOCALIC LL +\u0C66>\uE066; # DIGIT ZERO +\u0C67>\uE067; # DIGIT ONE +\u0C68>\uE068; # DIGIT TWO +\u0C69>\uE069; # DIGIT THREE +\u0C6A>\uE06A; # DIGIT FOUR +\u0C6B>\uE06B; # DIGIT FIVE +\u0C6C>\uE06C; # DIGIT SIX +\u0C6D>\uE06D; # DIGIT SEVEN +\u0C6E>\uE06E; # DIGIT EIGHT +\u0C6F>\uE06F; # DIGIT NINE +# :: NFC (NFD) ; +# eof diff --git a/icu4c/source/data/translit/ThaiLogical_Latin.txt b/icu4c/source/data/translit/ThaiLogical_Latin.txt new file mode 100644 index 00000000000..2c651077b63 --- /dev/null +++ b/icu4c/source/data/translit/ThaiLogical_Latin.txt @@ -0,0 +1,187 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Thai-Latin +# This set of rules follows ISO 11940 +# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf +# except that that does not mention an implicit vowel, so we use ọ +# +# The transcription is fairly ugly, so we ought to also do the UNGEGN version +# see: http://www.eki.ee/wgrs/rom1_th.pdf +# and probably make that the main variant. + +# Note: this is an internal file. The NFD/NFC is handled externally, in the index +# The insertion of spaces between words, the reversal of the vowels +# and the conversion of space to semicolon are done *outside* of these rules. +# So as far as these rules are concerned, the vowels are in logical order! + +# insert implicit vowel (and remove it going the other way) +# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically +#$consonant = [ก-ฮ]; +#$vowel = [ะ-ฺเ-ไ็]; + +#{ ( $consonant ) } [^$vowel ] > | $1  ; +# > ọ ; +# < ọ ; + +$notAbove = [^\p{ccc=0}\p{ccc=above}] ; +$notBelow = [^\p{ccc=0}\p{ccc=below}] ; + +# Consonants +# Warning: the 'h's need to be handled carefully! +# What we really want to say is the following, but we can't +# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ; + +# Since the only accents we care about that could cause problems are free-standing accents below, we use instead: +$freeStandingBelow = [\u0325 ]; +$hAccent = [ ̄ ̣] +$notHAccent0 = [^$freeStandingBelow$hAccent]; +$notHAccent1 = $freeStandingBelow [^$hAccent]; + +ห > h̄ ; # THAI CHARACTER HO HIP + ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering +ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK + +ข <> k̄h ; # THAI CHARACTER KHO KHAI +ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT +ฅ <> kʹh ; # THAI CHARACTER KHO KHON +ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG +ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI +ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI +ก <> k ; # THAI CHARACTER KO KAI + +ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO +ผ <> p̄h ; # THAI CHARACTER PHO PHUNG +พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN +พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN +ป <> p ; # THAI CHARACTER PO PLA + +ฉ <> c̄h ; # THAI CHARACTER CHO CHING +ฌ <> c̣h ; # THAI CHARACTER CHO CHOE +ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG +ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG +จ <> c ; # THAI CHARACTER CHO CHAN + +ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN +ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO +ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO +ถ <> t̄h ; # THAI CHARACTER THO THUNG +ธ <> ṭh ; # THAI CHARACTER THO THONG +ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN +ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN +#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. +ฏ <> t̩ ; # THAI CHARACTER TO PATAK +ต <> t ; # THAI CHARACTER TO TAO + +# since there is no singleton g (generated), don't worry about that. +ง <> ng ; # THAI CHARACTER NGO NGU +ณ <> ṇ ; # THAI CHARACTER NO NEN +น <> n ; # THAI CHARACTER NO NU + +ญ <> ỵ ; # THAI CHARACTER YO YING +ฎ <> ḍ ; # THAI CHARACTER DO CHADA +ด <> d ; # THAI CHARACTER DO DEK + +บ <> b ; # THAI CHARACTER BO BAIMAI +ฝ <> f̄ ; # THAI CHARACTER FO FA + ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering + +ม <> m ; # THAI CHARACTER MO MA +ย <> y ; # THAI CHARACTER YO YAK +ร <> r ; # THAI CHARACTER RO RUA +ฤ <> v ; # THAI CHARACTER RU +ฦ <> ł ; # THAI CHARACTER LU +ว <> w ; # THAI CHARACTER WO WAEN + +ศ <> ṣ̄ ; # THAI CHARACTER SO SALA*** + ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering +ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI +ส > s̄ ; # THAI CHARACTER SO SUA*** + ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering + +ฬ <> ḷ ; # THAI CHARACTER LO CHULA +ล <> l ; # THAI CHARACTER LO LING +ฟ <> f ; # THAI CHARACTER FO FAN + +อ <> x ; # THAI CHARACTER O ANG +ซ <> s ; # THAI CHARACTER SO SO + +# vowels + + ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT + +า > ā ; # THAI CHARACTER SARA AA + า | $1 < a ($notAbove*) ̄; # backward case, account for reordering + +# We deviate from ISO for SARA AM for disambiguation +ำ > a ̉; # THAI CHARACTER SARA AM + ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering + +ะ <> a ; # THAI CHARACTER SARA A + ี <> ī ; # THAI CHARACTER SARA II + ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering + + ื <> ụ̄ ; # THAI CHARACTER SARA UEE + ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering + + ึ <> ụ ; # THAI CHARACTER SARA UE + ู <> ū ; # THAI CHARACTER SARA UU + ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering + + ุ <> u ; # THAI CHARACTER SARA U + +ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI + +# ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT + +เ <> e ; # THAI CHARACTER SARA E +แ <> æ ; # THAI CHARACTER SARA AE +โ <> o ; # THAI CHARACTER SARA O +ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN +ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI +ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO + ็ <> ̆ ; # THAI CHARACTER MAITAIKHU + ่ <> ̀ ; # THAI CHARACTER MAI EK + ้ <> ̂ ; # THAI CHARACTER MAI THO + ๊ <> ́ ; # THAI CHARACTER MAI TRI + ๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA + ์ <> ̒ ; # THAI CHARACTER THANTHAKHAT + ๎ <> '~' ; # THAI CHARACTER YAMAKKAN + +# We deviate from ISO for disambiguation + ํ <> ̊ ; # THAI CHARACTER NIKHAHIT + +๏ <> § ; # THAI CHARACTER FONGMAN + +๐ <> 0 ; # THAI DIGIT ZERO +๑ <> 1 ; # THAI DIGIT ONE +๒ <> 2 ; # THAI DIGIT TWO +๓ <> 3 ; # THAI DIGIT THREE +๔ <> 4 ; # THAI DIGIT FOUR +๕ <> 5 ; # THAI DIGIT FIVE +๖ <> 6 ; # THAI DIGIT SIX +๗ <> 7 ; # THAI DIGIT SEVEN +๘ <> 8 ; # THAI DIGIT EIGHT +๙ <> 9 ; # THAI DIGIT NINE + +๚ <> '||' ; # THAI CHARACTER ANGKHANKHU + +๛ <> » ; # THAI CHARACTER KHOMUT +ๆ <> « ; # THAI CHARACTER MAIYAMOK + +# moved down to make shorter first +#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. + ฺ <> ˌ ; # THAI CHARACTER PHINTHU + ิ <> i ; # THAI CHARACTER SARA I + +# fallbacks + +| k < g ; +| k < h ; +| c < j ; +| k < q ; +| s < z ; + +:: (lower); diff --git a/icu4c/source/data/translit/Thai_ThaiLogical.txt b/icu4c/source/data/translit/Thai_ThaiLogical.txt new file mode 100644 index 00000000000..52764c55598 --- /dev/null +++ b/icu4c/source/data/translit/Thai_ThaiLogical.txt @@ -0,0 +1,26 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces +# The rules that convert space into semicolon are in another file; +# since they have to come BEFORE the break iterator + +$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ; + +# First convert the semicolon back + +' ' < $thai { '; ' } $thai; + +# Remove any other spaces between thai letters + +< $thai { ' ' } $thai; + +# Now vowels +$thai_reversing = [[:Logical_Order_Exception:] & $thai]; +$thai_non_reversing = [$thai - $thai_reversing ]; + +( $thai_reversing ) ( $thai_non_reversing ) > $2 $1; +# other direction +$2 $1 < ( $thai_non_reversing ) ( $thai_reversing ) ; \ No newline at end of file diff --git a/icu4c/source/data/translit/Thai_ThaiSemi.txt b/icu4c/source/data/translit/Thai_ThaiSemi.txt new file mode 100644 index 00000000000..ce1f3ff0ecb --- /dev/null +++ b/icu4c/source/data/translit/Thai_ThaiSemi.txt @@ -0,0 +1,11 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# The rules that convert space into semicolon are in this file; +# since they have to come BEFORE the break iterator. + +$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ; + +$thai { ' ' } $thai > '; ' ; \ No newline at end of file diff --git a/icu4c/source/data/translit/Tone_Digit.txt b/icu4c/source/data/translit/Tone_Digit.txt new file mode 100644 index 00000000000..489d79f1948 --- /dev/null +++ b/icu4c/source/data/translit/Tone_Digit.txt @@ -0,0 +1,11 @@ +#-------------------------------------------------------------------- +# Copyright (c) 1999-2004, International Business Machines +# Corporation and others. All Rights Reserved. +#-------------------------------------------------------------------- + +# Only intended for internal use +\u0304 <> 1; +\u0301 <> 2; +\u030C <> 3; +\u0300 <> 4; + < 5; \ No newline at end of file diff --git a/icu4c/source/data/translit/el.txt b/icu4c/source/data/translit/el.txt new file mode 100644 index 00000000000..5f166fabf00 --- /dev/null +++ b/icu4c/source/data/translit/el.txt @@ -0,0 +1,8 @@ +el{ + + TransliterateLATIN { + "UNGEGN", + "::Greek-Latin/UNGEGN;" + } +} + \ No newline at end of file diff --git a/icu4c/source/data/translit/en.txt b/icu4c/source/data/translit/en.txt new file mode 100644 index 00000000000..6c6ccd63e19 --- /dev/null +++ b/icu4c/source/data/translit/en.txt @@ -0,0 +1,22 @@ +// *************************************************************************** +// * +// * Copyright (C) 2004, International Business Machines +// * Corporation and others. All Rights Reserved. +// * +// *************************************************************************** +// + +en{ + + // Format for the display name of a Transliterator. + // This is the English form of this resource. + TransliteratorNamePattern { "{0,choice,0#|1#{1}|2#{1} to {2}}" } + + // Transliterator display names + // This is the English form of this resource. + // This list is currently incomplete, and care should be taken to translate these identifiers. + // TODO: Reorganize this data like Country, Currencies and Language tables. + "%Translit%Hex" { "Hex Escape" } + "%Translit%UnicodeName" { "Unicode Name" } + "%Translit%UnicodeChar" { "Unicode Character" } +} \ No newline at end of file diff --git a/icu4c/source/data/translit/root.txt b/icu4c/source/data/translit/root.txt new file mode 100644 index 00000000000..ed159e922d5 --- /dev/null +++ b/icu4c/source/data/translit/root.txt @@ -0,0 +1,752 @@ +// *************************************************************************** +// * +// * Copyright (C) 2004, International Business Machines +// * Corporation and others. All Rights Reserved. +// * +// *************************************************************************** +// + +root{ + + RuleBasedTransliteratorIDs{ +//-------------------------------------------------------------------- +//-------------------------------------------------------------------- +// +// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic +// system transliterators. It allows arbitrary mappings between +// transliterator IDs and file names, and also allows the system to +// define aliases for transliterators, so that "Latin-Hangul", for +// example, can be implemented transparently as the compound +// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these +// are invisible to the user, but can be composed together by the +// system to create visible transliterators. +// +// Blank lines and lines beginning with '#' are ignored. +// +// Lines in this file have one of the following forms (text not +// enclosed by <> is literal): +// +// :file::: +// :internal::: +// :alias: +// +// is the ID of the system transliterator being defined. These +// are public IDs enumerated by Transliterator.getAvailableIDs(), +// unless the second field is "internal". +// +// is a ResourceReader resource name. Currently these refer +// to file names under com/ibm/text/resources. This string is passed +// directly to ResourceReader, together with . +// +// is the character encoding to use when reading ; +// passed directly to ResourceReader. E.g., "UTF8". +// +// is either "FORWARD" or "REVERSE". +// +// is a string to be passed directly to +// Transliterator.getInstance(). The returned Transliterator object +// then has its ID changed to and is returned. + + +// Bidirectional rule files + + Fullwidth-Halfwidth { + file { + resource:include{"Fullwidth_Halfwidth.txt"} + direction{"FORWARD"} + } + } + Halfwidth-Fullwidth { + file { + resource:include{"Fullwidth_Halfwidth.txt"} + direction{"REVERSE"} + } + } + + Latin-Cyrillic { + file { + resource:include{"Cyrillic_Latin.txt"} + direction{"REVERSE"} + } + } + Cyrillic-Latin { + file { + resource:include{"Cyrillic_Latin.txt"} + direction{"FORWARD"} + } + } + + Latin-Hebrew { + file { + resource:include{"Hebrew_Latin.txt"} + direction{"REVERSE"} + } + } + Hebrew-Latin { + file { + resource:include{"Hebrew_Latin.txt"} + direction{"FORWARD"} + } + } + + Latin-Arabic { + file { + resource:include{"Arabic_Latin.txt"} + direction{"REVERSE"} + } + } + Arabic-Latin { + file { + resource:include{"Arabic_Latin.txt"} + direction{"FORWARD"} + } + } + + Tone-Digit { + internal { + resource:include{"Tone_Digit.txt"} + direction{"FORWARD"} + } + } + Digit-Tone { + internal { + resource:include{"Tone_Digit.txt"} + direction{"REVERSE"} + } + } + + Latin-NumericPinyin { + file { + resource:include{"Latin_NumericPinyin.txt"} + direction{"FORWARD"} + } + } + NumericPinyin-Latin { + file { + resource:include{"Latin_NumericPinyin.txt"} + direction{"REVERSE"} + } + } + + Han-Spacedhan { + internal { + resource:include{"Han_Spacedhan.txt"} + direction{"FORWARD"} + } + } + Spacedhan-Han { + alias {"null"} + } + + Han-Latin { + file { + resource:include{"Han_Latin.txt"} + direction{"FORWARD"} + } + } +//Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip! + Latin-Han { + alias {"null"} + } + +// Comment these out; they are only for testing +// Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE +// Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD + +//Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE +//Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD + + Latin-Greek { + file { + resource:include{"Greek_Latin.txt"} + direction{"REVERSE"} + } + } + Greek-Latin { + file { + resource:include{"Greek_Latin.txt"} + direction{"FORWARD"} + } + } + + Latin-Greek/UNGEGN { + file { + resource:include{"Greek_Latin_UNGEGN.txt"} + direction{"REVERSE"} + } + } + Greek-Latin/UNGEGN { + file { + resource:include{"Greek_Latin_UNGEGN.txt"} + direction{"FORWARD"} + } + } + + Latin-Katakana { + file { + resource:include{"Latin_Katakana.txt"} + direction{"FORWARD"} + } + } + Katakana-Latin { + file { + resource:include{"Latin_Katakana.txt"} + direction{"REVERSE"} + } + } + + Latin-Hiragana { + file { + resource:include{"Hiragana_Latin.txt"} + direction{"REVERSE"} + } + } + Hiragana-Latin { + file { + resource:include{"Hiragana_Latin.txt"} + direction{"FORWARD"} + } + } + +//Thai Stuff: will change if we get \b into Transliterator + + Thai-ThaiSemi { + internal { + resource:include{"Thai_ThaiSemi.txt"} + direction{"FORWARD"} + } + } + + Thai-ThaiLogical { + internal { + resource:include{"Thai_ThaiLogical.txt"} + direction{"FORWARD"} + } + } + ThaiLogical-Thai { + internal { + resource:include{"Thai_ThaiLogical.txt"} + direction{"REVERSE"} + } + } + + ThaiLogical-Latin { + internal { + resource:include{"ThaiLogical_Latin.txt"} + direction{"FORWARD"} + } + } + Latin-ThaiLogical { + internal { + resource:include{"ThaiLogical_Latin.txt"} + direction{"REVERSE"} + } + } + +// Must use the order below! +// We need two separate passes because of the Thai vowel reversal +// Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces + + Thai-Latin { + alias {"[[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC"} + } + Latin-Thai { + alias {"[[:Latin:][:Mn:][:Me:] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC"} + } + +// end of Thai Stuff + + Hiragana-Katakana { + file { + resource:include{"Hiragana_Katakana.txt"} + direction{"FORWARD"} + } + } + Katakana-Hiragana { + file { + resource:include{"Hiragana_Katakana.txt"} + direction{"REVERSE"} + } + } + + Any-Accents { + file { + resource:include{"Any_Accents.txt"} + direction{"FORWARD"} + } + } + Accents-Any { + file { + resource:include{"Any_Accents.txt"} + direction{"REVERSE"} + } + } + + Any-Publishing { + file { + resource:include{"Any_Publishing.txt"} + direction{"FORWARD"} + } + } + Publishing-Any { + file { + resource:include{"Any_Publishing.txt"} + direction{"REVERSE"} + } + } + +// Korean +// N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For +// Hangul output use Latin-Hangul. + + LowerLatin-Jamo { + internal { + resource:include{"Latin_Jamo.txt"} + direction{"FORWARD"} + } + } + Jamo-LowerLatin { + internal { + resource:include{"Latin_Jamo.txt"} + direction{"REVERSE"} + } + } + Latin-Jamo { + alias {"['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo"} + } + Jamo-Latin { + alias {"['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC"} + } + Latin-Hangul { + alias {"['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC"} + } + Hangul-Latin { + alias {"['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC"} + } + +// Inter-Indic composed rules + Latin-InterIndic { + internal { + resource:include{"Latin_InterIndic.txt"} + direction{"FORWARD"} + } + } + Devanagari-InterIndic { + internal { + resource:include{"Devanagari_InterIndic.txt"} + direction{"FORWARD"} + } + } + Bengali-InterIndic { + internal { + resource:include{"Bengali_InterIndic.txt"} + direction{"FORWARD"} + } + } + Gurmukhi-InterIndic { + internal { + resource:include{"Gurmukhi_InterIndic.txt"} + direction{"FORWARD"} + } + } + Gujarati-InterIndic { + internal { + resource:include{"Gujarati_InterIndic.txt"} + direction{"FORWARD"} + } + } + Oriya-InterIndic { + internal { + resource:include{"Oriya_InterIndic.txt"} + direction{"FORWARD"} + } + } + Tamil-InterIndic { + internal { + resource:include{"Tamil_InterIndic.txt"} + direction{"FORWARD"} + } + } + Telugu-InterIndic { + internal { + resource:include{"Telugu_InterIndic.txt"} + direction{"FORWARD"} + } + } + Kannada-InterIndic { + internal { + resource:include{"Kannada_InterIndic.txt"} + direction{"FORWARD"} + } + } + Malayalam-InterIndic { + internal { + resource:include{"Malayalam_InterIndic.txt"} + direction{"FORWARD"} + } + } + + InterIndic-Latin { + internal { + resource:include{"InterIndic_Latin.txt"} + direction{"FORWARD"} + } + } + InterIndic-Devanagari { + internal { + resource:include{"InterIndic_Devanagari.txt"} + direction{"FORWARD"} + } + } + InterIndic-Bengali { + internal { + resource:include{"InterIndic_Bengali.txt"} + direction{"FORWARD"} + } + } + InterIndic-Gurmukhi { + internal { + resource:include{"InterIndic_Gurmukhi.txt"} + direction{"FORWARD"} + } + } + InterIndic-Gujarati { + internal { + resource:include{"InterIndic_Gujarati.txt"} + direction{"FORWARD"} + } + } + InterIndic-Oriya { + internal { + resource:include{"InterIndic_Oriya.txt"} + direction{"FORWARD"} + } + } + InterIndic-Tamil { + internal { + resource:include{"InterIndic_Tamil.txt"} + direction{"FORWARD"} + } + } + InterIndic-Telugu { + internal { + resource:include{"InterIndic_Telugu.txt"} + direction{"FORWARD"} + } + } + InterIndic-Kannada { + internal { + resource:include{"InterIndic_Kannada.txt"} + direction{"FORWARD"} + } + } + InterIndic-Malayalam { + internal { + resource:include{"InterIndic_Malayalam.txt"} + direction{"FORWARD"} + } + } + +//Latin-Indic transliterators + Latin-Devanagari { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC"} + } + Latin-Bengali { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC"} + } + Latin-Gurmukhi { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Latin-Gujarati { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC"} + } + Latin-Oriya { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC"} + } + Latin-Tamil { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC"} + } + Latin-Telugu { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC"} + } + Latin-Kannada { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC"} + } + Latin-Malayalam { + alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC"} + } + +//Indic-Latin transliterators + Devanagari-Latin { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC"} + } + Bengali-Latin { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC"} + } + Gurmukhi-Latin { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC"} + } + Gujarati-Latin { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC"} + } + Oriya-Latin { + alias {"[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC"} + } + Tamil-Latin { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC"} + } + Telugu-Latin { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC"} + } + Kannada-Latin { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC"} + } + Malayalam-Latin { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC"} + } + + Devanagari-Bengali { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC"} + } + Devanagari-Gurmukhi { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Devanagari-Gujarati { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC"} + } + Devanagari-Oriya { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC"} + } + Devanagari-Tamil { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC"} + } + Devanagari-Telugu { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC"} + } + Devanagari-Kannada { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC"} + } + Devanagari-Malayalam { + alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC"} + } + + Bengali-Devanagari { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC"} + } + Bengali-Gurmukhi { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Bengali-Gujarati { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC"} + } + Bengali-Oriya { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC"} + } + Bengali-Tamil { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC"} + } + Bengali-Telugu { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC"} + } + Bengali-Kannada { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC"} + } + Bengali-Malayalam { + alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC"} + } + + Gurmukhi-Devanagari { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC"} + } + Gurmukhi-Bengali { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC"} + } + Gurmukhi-Gujarati { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC"} + } + Gurmukhi-Oriya { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC"} + } + Gurmukhi-Tamil { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC"} + } + Gurmukhi-Telugu { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC"} + } + Gurmukhi-Kannada { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC"} + } + Gurmukhi-Malayalam { + alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC"} + } + + Gujarati-Devanagari { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC"} + } + Gujarati-Bengali { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC"} + } + Gujarati-Gurmukhi { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Gujarati-Oriya { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC"} + } + Gujarati-Tamil { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC"} + } + Gujarati-Telugu { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC"} + } + Gujarati-Kannada { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC"} + } + Gujarati-Malayalam { + alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC"} + } + + Oriya-Devanagari { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC"} + } + Oriya-Bengali { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC"} + } + Oriya-Gurmukhi { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Oriya-Gujarati { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC"} + } + Oriya-Tamil { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC"} + } + Oriya-Telugu { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC"} + } + Oriya-Kannada { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC"} + } + Oriya-Malayalam { + alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC"} + } + + Tamil-Devanagari { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC"} + } + Tamil-Bengali { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC"} + } + Tamil-Gurmukhi { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Tamil-Gujarati { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC"} + } + Tamil-Oriya { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC"} + } + Tamil-Telugu { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC"} + } + Tamil-Kannada { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC"} + } + Tamil-Malayalam { + alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC"} + } + + Telugu-Devanagari { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC"} + } + Telugu-Bengali { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC"} + } + Telugu-Gurmukhi { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Telugu-Gujarati { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC"} + } + Telugu-Oriya { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC"} + } + Telugu-Tamil { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC"} + } + Telugu-Kannada { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC"} + } + Telugu-Malayalam { + alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC"} + } + + Kannada-Devanagari { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC"} + } + Kannada-Bengali { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC"} + } + Kannada-Gurmukhi { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Kannada-Gujarati { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC"} + } + Kannada-Oriya { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC"} + } + Kannada-Tamil { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC"} + } + Kannada-Telugu { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC"} + } + Kannada-Malayalam { + alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC"} + } + + Malayalam-Devanagari { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC"} + } + Malayalam-Bengali { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC"} + } + Malayalam-Gurmukhi { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC"} + } + Malayalam-Gujarati { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC"} + } + Malayalam-Oriya { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC"} + } + Malayalam-Tamil { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC"} + } + Malayalam-Telugu { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC"} + } + Malayalam-Kannada { + alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC"} + } + +// eof + } + TransliteratorNamePattern { + // Format for the display name of a Transliterator. + // This is the language-neutral form of this resource. + "{0,choice,0#|1#{1}|2#{1}-{2}}" // Display name + } + + // Transliterator display names + // This is the English form of this resource. + "%Translit%Hex" { "%Translit%Hex" } + "%Translit%UnicodeName" { "%Translit%UnicodeName" } + "%Translit%UnicodeChar" { "%Translit%UnicodeChar" } + + TransliterateLATIN{ + "", + "" + } + +} diff --git a/icu4c/source/data/translit/t_Any_Accents.txt b/icu4c/source/data/translit/t_Any_Accents.txt deleted file mode 100644 index 1445b032a15..00000000000 --- a/icu4c/source/data/translit/t_Any_Accents.txt +++ /dev/null @@ -1,306 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Any_Accents.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Any_Accents - -t_Any_Accents { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -":: NFD (NFC) ;" - -// to do: make reversible - -// define special conversion characters. -// varients of this could use different characters, or set one or the other to null. - -"$pre = \\\< ;" -"$post = \\\> ;" - -// Provide keyboard equivalents for common diacritics used in transliteration - -"$pre \\\` $post <> \u0300 ;" // COMBINING GRAVE ACCENT -"$pre \\\' $post <> \u0301 ;" // COMBINING ACUTE ACCENT -"$pre \\\^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT -"$pre \\\~ $post <> \u0303 ;" // COMBINING TILDE -"$pre \\\- $post <> \u0304 ;" // COMBINING MACRON -"$pre \\\" $post <> \u0308 ;" // COMBINING DIAERESIS -"$pre \\\* $post <> \u030A ;" // COMBINING RING ABOVE -"$pre \\\, $post <> \u0327 ;" // COMBINING CEDILLA -"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY -"$pre \\\. $post <> \u0323 ;" // COMBINING DOT BELOW - -// Combine common characters - -"$pre AE $post <> \u00C6 ;" // LATIN CAPITAL LETTER AE -"$pre ae $post <> \u00E6 ;" // LATIN SMALL LETTER AE -"$pre D $post <> \u00D0 ;" // LATIN CAPITAL LETTER ETH -"$pre d $post <> \u00F0 ;" // LATIN SMALL LETTER ETH -"$pre O'/' $post <> \u00D8 ;" // LATIN CAPITAL LETTER O WITH STROKE -"$pre o'/' $post <> \u00F8 ;" // LATIN SMALL LETTER O WITH STROKE -"$pre TH $post <> \u00DE ;" // LATIN CAPITAL LETTER THORN -"$pre th $post <> \u00FE ;" // LATIN SMALL LETTER THORN -"$pre OE $post <> \u0152 ;" // LATIN CAPITAL LIGATURE OE -"$pre oe $post <> \u0153 ;" // LATIN SMALL LIGATURE OE - -"$pre ss $post <> \u00DF ;" // LATIN SMALL LETTER SHARP S - -"$pre NG $post <> \u014A ;" // LATIN CAPITAL LETTER ENG -"$pre ng $post <> \u014B ;" // LATIN SMALL LETTER ENG - -"$pre T $post <> \u0398 ;" // THETA -"$pre t $post <> \u03B8 ;" // THETA -"$pre SH $post <> \u01A9 ;" // LATIN CAPITAL LETTER ESH -"$pre sh $post <> \u0283 ;" // LATIN SMALL LETTER ESH -"$pre ZH $post <> \u01B7 ;" // LATIN CAPITAL LETTER EZH -"$pre zh $post <> \u0292 ;" // LATIN SMALL LETTER EZH - -"$pre U $post <> \u01B1 ;" // LATIN CAPITAL LETTER UPSILON -"$pre u $post <> \u028A ;" // LATIN SMALL LETTER UPSILON -"$pre A $post <> \u018F ;" // LATIN CAPITAL LETTER SCHWA -"$pre a $post <> \u0259 ;" // LATIN SMALL LETTER SCHWA -"$pre O $post <> \u0186 ;" // LATIN CAPITAL LETTER OPEN O -"$pre o $post <> \u0254 ;" // LATIN SMALL LETTER OPEN O -"$pre E $post <> \u0190 ;" // LATIN CAPITAL LETTER OPEN E -"$pre e $post <> \u025B ;" // LATIN SMALL LETTER OPEN E - -// three that don't have uppercases - -"$pre '?' $post <> \u0294 ;" // LATIN LETTER GLOTTAL STOP -"$pre i $post <> \u026A ;" // LATIN LETTER SMALL CAPITAL I -"$pre v $post <> \u028C ;" // LATIN SMALL LETTER TURNED V - -// Additional Characters that may be added in the future - -// $pre XXX $post <> \u0306 ; # COMBINING BREVE -// $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE -// $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE -// $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT -// $pre XXX $post <> \u030C ; # COMBINING CARON -// $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT -// $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE -// $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE -// $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE -// $pre XXX $post <> \u031B ; # COMBINING HORN -// $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW -// $pre XXX $post <> \u0325 ; # COMBINING RING BELOW -// $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW -// $pre XXX $post <> \u0328 ; # COMBINING OGONEK -// $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW -// $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW -// $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW -// $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW - -// $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR -// $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR -// $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE -// $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE -// $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE -// $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE -// $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I -// $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA -// $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT -// $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT -// $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE -// $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE -// $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE -// $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE -// $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE -// $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S -// $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE -// $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK -// $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR -// $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR -// $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX -// $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX -// $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK -// $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK -// $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D -// $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK -// $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR -// $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR -// $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA -// $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E -// $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK -// $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK -// $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK -// $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA -// $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV -// $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA -// $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE -// $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK -// $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK -// $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR -// $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE -// $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M -// $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK -// $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG -// $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE -// $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI -// $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI -// $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK -// $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK -// $pre YYY $post <> \u01A6 ; # LATIN LETTER YR -// $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO -// $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO -// $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP -// $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK -// $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK -// $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK -// $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK -// $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK -// $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK -// $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK -// $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE -// $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE -// $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED -// $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED -// $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL -// $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE -// $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE -// $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE -// $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE -// $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN -// $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK -// $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK -// $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK -// $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK -// $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON -// $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON -// $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON -// $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ -// $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J -// $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ -// $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ -// $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J -// $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ -// $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E -// $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE -// $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE -// $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ -// $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z -// $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ -// $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR -// $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN -// $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH -// $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH -// $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU -// $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU -// $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK -// $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK -// $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A -// $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA -// $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA -// $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK -// $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL -// $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL -// $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK -// $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E -// $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK -// $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E -// $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK -// $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E -// $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE -// $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK -// $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G -// $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G -// $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA -// $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN -// $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H -// $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK -// $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK -// $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE -// $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA -// $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE -// $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT -// $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK -// $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH -// $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M -// $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG -// $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK -// $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK -// $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK -// $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N -// $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O -// $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE -// $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA -// $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI -// $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R -// $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG -// $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK -// $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG -// $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL -// $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK -// $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK -// $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R -// $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R -// $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK -// $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK -// $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH -// $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL -// $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T -// $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK -// $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR -// $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK -// $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W -// $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y -// $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y -// $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK -// $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL -// $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL -// $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP -// $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE -// $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP -// $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C -// $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK -// $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B -// $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E -// $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK -// $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H -// $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL -// $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K -// $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L -// $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK -// $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE -// $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE -// $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH -// $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH -// $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL -// $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH -// $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH -// $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL -// $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH -// $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH -// $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH -// $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE -// $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE -// $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H -// $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK -// $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J -// $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R -// $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R -// $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK -// $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R -// $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W -// $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y -// $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA -// $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L -// $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S -// $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X -// $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP -// $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING -// $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N - -":: NFC (NFD) ;" - } -} diff --git a/icu4c/source/data/translit/t_Any_Publishing.txt b/icu4c/source/data/translit/t_Any_Publishing.txt deleted file mode 100644 index 7dd39703111..00000000000 --- a/icu4c/source/data/translit/t_Any_Publishing.txt +++ /dev/null @@ -1,50 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Any_Publishing.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Any_Publishing - -t_Any_Publishing { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Test case -// "The" "(quick)" ('brown') `fox' ` jumped -- "over?" - -// Variables - -"$single = \\\' ;" -"$space = ' ' ;" -"$double = \\\" ;" -"$back = \\\` ;" -"$tab = '\u0008' ;" -"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;" - -// fix UNIX quotes - -"$back $back > “ ;" -"$back > ‘ ;" - -// fix typewriter quotes, by context - -"$makeRight {$double} <> “ ;" -"$double <> ” ;" - -"$makeRight {$single} <> ‘ ;" -"$single <> ’;" - -// fix multiple spaces and hyphens - -"$space {$space} > ;" -"'--' <> — ;" - } -} diff --git a/icu4c/source/data/translit/t_Arab_Latn.txt b/icu4c/source/data/translit/t_Arab_Latn.txt deleted file mode 100644 index 16d5256a8ac..00000000000 --- a/icu4c/source/data/translit/t_Arab_Latn.txt +++ /dev/null @@ -1,162 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Arabic_Latin.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Arabic_Latin - -t_Arab_Latn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Generally follows UNGEGN -// Occasionally deviates in the direction of ISO 233 -// a) where required for disambiguation. -// b) with underdot instead of cedilla for letter like SAD, since -// those are explicitly in Unicode for transliteration. -// c) with extra non-Arabic-language letters, like PEH - -// Does *not* do assimilation of "al", nor hyphenation. -// While it could be done, we need to determine whether a prefix "al" could -// occur other than as the definite article (since no space is used). - -":: [[:Arabic:] [‎ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;" -":: NFKD (NFC);" -"$disambig = ̱ ;" -"$disambig2 = ̰ ;" -"$under = ̣ ;" - -"$notAbove = [[:^ccc=0:]&[:^ccc=230:]];" - -// non-letters - - "٫ <> '.' $disambig ;" // ARABIC DECIMAL SEPARATOR - "٬ <> ',' $disambig ;" // ARABIC THOUSANDS SEPARATOR -// ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate - -"، <> ',' ;" // ARABIC COMMA - "؛ <> ';' ;" // ARABIC SEMICOLON - "؟ <> '?' ;" // ARABIC QUESTION MARK - "٪ <> '%' ;" // ARABIC PERCENT SIGN - - "۰ <> 0 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ZERO - "۱ <> 1 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ONE - "۲ <> 2 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT TWO - "۳ <> 3 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT THREE - "۴ <> 4 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FOUR - "۵ <> 5 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FIVE - "۶ <> 6 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SIX - "۷ <> 7 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SEVEN - "۸ <> 8 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT EIGHT - "۹ <> 9 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT NINE - - "٠ <> 0 ;" // ARABIC-INDIC DIGIT ZERO - "١ <> 1 ;" // ARABIC-INDIC DIGIT ONE - "٢ <> 2 ;" // ARABIC-INDIC DIGIT TWO - "٣ <> 3 ;" // ARABIC-INDIC DIGIT THREE - "٤ <> 4 ;" // ARABIC-INDIC DIGIT FOUR - "٥ <> 5 ;" // ARABIC-INDIC DIGIT FIVE - "٦ <> 6 ;" // ARABIC-INDIC DIGIT SIX - "٧ <> 7 ;" // ARABIC-INDIC DIGIT SEVEN - "٨ <> 8 ;" // ARABIC-INDIC DIGIT EIGHT - "٩ <> 9 ;" // ARABIC-INDIC DIGIT NINE - -// letters - -// long vowels - "َا<> ā ;" // ARABIC FATHA, ARABIC LETTER ALEF - "ُو <> ū ;" // ARABIC DAMMA, ARABIC LETTER WAW - "ِي <> ī ;" // ARABIC KASRA, ARABIC LETTER YEH - -// longer items moved here to prevent masking - "ث <> t h $disambig ;" // ARABIC LETTER THEH - "ذ <> d h $disambig ;" // ARABIC LETTER THAL - "ش <> s h $disambig ;" // ARABIC LETTER SHEEN - "ص <> s $under ;" // ARABIC LETTER SAD - "ض <> d $under ;" // ARABIC LETTER DAD - "ط <> t $under ;" // ARABIC LETTER TAH - "ظ <> z $under ;" // ARABIC LETTER ZAH - "غ <> g h $disambig ;" // ARABIC LETTER GHAIN - -// WARNING: special case -// will be canonically ordered as -// so on the return, we have to skip over (but preserve) the half-ring below (or others like it) -// ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS - - "ة <> t \u0308 ;" // ARABIC LETTER TEH MARBUTA - "ة | $1 < t ($notAbove+) \u0308 ;" // ARABIC LETTER TEH MARBUTA - -// non-Arabic language - "ژ <> z h $disambig ;" // ARABIC LETTER JEH - "ڭ <> n $disambig g ;" // ARABIC LETTER NG - "ۋ <> v $disambig ;" // ARABIC LETTER VE - "ی <> y $disambig2 ;" // ARABIC LETTER FARSI YEH - -// Arabic language - - "ء <> ʾ ;" // ARABIC LETTER HAMZA - "ا <> a $under;" // ARABIC LETTER ALEF - "ب <> b ;" // ARABIC LETTER BEH - "ت <> t ;" // ARABIC LETTER TEH - "ج <> j ;" // ARABIC LETTER JEEM - "ح <> h $under ;" // ARABIC LETTER HAH - "خ <> k h $disambig ;" // ARABIC LETTER KHAH - "د <> d ;" // ARABIC LETTER DAL - "ر <> r ;" // ARABIC LETTER REH - "ز <> z ;" // ARABIC LETTER ZAIN - "س <> s ;" // ARABIC LETTER SEEN - "ع <> ʿ ;" // ARABIC LETTER AIN - "ـ > ;" // ARABIC TATWEEL - "ف <> f ;" // ARABIC LETTER FEH - "ق <> q ;" // ARABIC LETTER QAF - "ك <> k ;" // ARABIC LETTER KAF - "ل <> l ;" // ARABIC LETTER LAM - "م <> m ;" // ARABIC LETTER MEEM - "ن <> n ;" // ARABIC LETTER NOON - "ه <> h ;" // ARABIC LETTER HEH - "و <> w ;" // ARABIC LETTER WAW - "ى <> y $disambig ;" // ARABIC LETTER ALEF MAKSURA - "ي <> y ;" // ARABIC LETTER YEH - "ً <> aⁿ ;" // ARABIC FATHATAN - "ٌ <> uⁿ ;" // ARABIC DAMMATAN - "ٍ <> iⁿ ;" // ARABIC KASRATAN - "َ <> a ;" // ARABIC FATHA - "ُ <> u ;" // ARABIC DAMMA - "ِ <> i ;" // ARABIC KASRA - "ّ <> ̃ ;" // ARABIC SHADDA - "ْ <> ̊ ;" // ARABIC SUKUN - -// special combining marks - "ٓ <> ̂ ;" // ARABIC MADDAH ABOVE - "ٔ <> ̉ ;" // ARABIC HAMZA ABOVE - "ٕ <> ̹ ;" // ARABIC HAMZA BELOW - -// Some non-Arabic language (not in UNGEGN) - "پ <> p ;" // ARABIC LETTER PEH - "چ <> c h $disambig ;" // ARABIC LETTER TCHEH - "ڤ <> v ;" // ARABIC LETTER VEH -// ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW -// ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW - "گ <> g ;" // ARABIC LETTER GAF - -// fallbacks -"| s < c } [eiy];" -"| k < c ;" -"| i < e ;" -"| u < o ;" -"| ks < x ;" -"| n < ‎ⁿ;" - -":: (lower) ;" -"::NFC (NFD);" -":: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );" - } -} diff --git a/icu4c/source/data/translit/t_Beng_InterIndic.txt b/icu4c/source/data/translit/t_Beng_InterIndic.txt deleted file mode 100644 index 6eb2a5c350d..00000000000 --- a/icu4c/source/data/translit/t_Beng_InterIndic.txt +++ /dev/null @@ -1,119 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Bengali_InterIndic.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Bengali_InterIndic - -t_Beng_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Bengali-InterIndic - -"\u09C7\u09BE>\uE04B;" // VOWEL SIGN O -"\u09C7\u09D7>\uE04C;" // VOWEL SIGN AU -"\u0981>\uE001;" // SIGN CANDRABINDU -"\u0982>\uE002;" // SIGN ANUSVARA -"\u0983>\uE003;" // SIGN VISARGA -"\u0985>\uE005;" // LETTER A -"\u0986>\uE006;" // LETTER AA -"\u0987>\uE007;" // LETTER I -"\u0988>\uE008;" // LETTER II -"\u0989>\uE009;" // LETTER U -"\u098A>\uE00A;" // LETTER UU -"\u098B>\uE00B;" // LETTER VOCALIC R -"\u098C>\uE00C;" // LETTER VOCALIC L -"\u098F>\uE00F;" // LETTER E -"\u0990>\uE010;" // LETTER AI -"\u0993>\uE013;" // LETTER O -"\u0994>\uE014;" // LETTER AU -"\u0995>\uE015;" // LETTER KA -"\u0996>\uE016;" // LETTER KHA -"\u0997>\uE017;" // LETTER GA -"\u0998>\uE018;" // LETTER GHA -"\u0999>\uE019;" // LETTER NGA -"\u099A>\uE01A;" // LETTER CA -"\u099B>\uE01B;" // LETTER CHA -"\u099C>\uE01C;" // LETTER JA -"\u099D>\uE01D;" // LETTER JHA -"\u099E>\uE01E;" // LETTER NYA -"\u099F>\uE01F;" // LETTER TTA -"\u09A0>\uE020;" // LETTER TTHA -"\u09A1>\uE021;" // LETTER DDA -"\u09A2>\uE022;" // LETTER DDHA -"\u09A3>\uE023;" // LETTER NNA -"\u09A4>\uE024;" // LETTER TA -"\u09A5>\uE025;" // LETTER THA -"\u09A6>\uE026;" // LETTER DA -"\u09A7>\uE027;" // LETTER DHA -"\u09A8>\uE028;" // LETTER NA -"\u09AA>\uE02A;" // LETTER PA -"\u09AB>\uE02B;" // LETTER PHA -"\u09AC>\uE02C;" // LETTER BA -"\u09AD>\uE02D;" // LETTER BHA -"\u09AE>\uE02E;" // LETTER MA -"\u09AF>\uE02F;" // LETTER YA -"\u09B0>\uE030;" // LETTER RA -"\u09B2>\uE032;" // LETTER LA -"\u09B6>\uE036;" // LETTER SHA -"\u09B7>\uE037;" // LETTER SSA -"\u09B8>\uE038;" // LETTER SA -"\u09B9>\uE039;" // LETTER HA -"\u09BC>\uE03C;" // SIGN NUKTA -"\u09BD>\uE03D;" // SIGN AVAGRAHA -"\u09BE>\uE03E;" // VOWEL SIGN AA -"\u09BF>\uE03F;" // VOWEL SIGN I -"\u09C0>\uE040;" // VOWEL SIGN II -"\u09C1>\uE041;" // VOWEL SIGN U -"\u09C2>\uE042;" // VOWEL SIGN UU -"\u09C3>\uE043;" // VOWEL SIGN VOCALIC R -"\u09C4>\uE044;" // VOWEL SIGN VOCALIC RR -"\u09C7>\uE047;" // VOWEL SIGN E -"\u09C8>\uE048;" // VOWEL SIGN AI -"\u09CB>\uE04B;" -"\u09CC>\uE04C;" -// -"\u09CD>\uE04D;" // SIGN VIRAMA -"\u09D7>\uE057;" // AU LENGTH MARK -// -"\u09E0>\uE060;" // LETTER VOCALIC RR -"\u09E1>\uE061;" // LETTER VOCALIC LL -"\u09E2>\uE062;" // VOWEL SIGN VOCALIC L -"\u09E3>\uE063;" // VOWEL SIGN VOCALIC LL -"\u09E6>\uE066;" // DIGIT ZERO -"\u09E7>\uE067;" // DIGIT ONE -"\u09E8>\uE068;" // DIGIT TWO -"\u09E9>\uE069;" // DIGIT THREE -"\u09EA>\uE06A;" // DIGIT FOUR -"\u09EB>\uE06B;" // DIGIT FIVE -"\u09EC>\uE06C;" // DIGIT SIX -"\u09ED>\uE06D;" // DIGIT SEVEN -"\u09EE>\uE06E;" // DIGIT EIGHT -"\u09EF>\uE06F;" // DIGIT NINE -"\u09F0>\ue071;" // Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL -"\u09F1>\ue072;" // Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL -"\u09F2>\ue073;" // Bengali-InterIndic: RUPEE MARK -"\u09F3>\ue074;" // Bengali-InterIndic: RUPEE SIGN -"\u09F4>\ue075;" // Bengali-InterIndic: CURRENCY NUMERATOR ONE -"\u09F5>\ue076;" // Bengali-InterIndic: CURRENCY NUMERATOR TWO -"\u09F6>\ue077;" // Bengali-InterIndic: CURRENCY NUMERATOR THREE -"\u09F7>\ue078;" // Bengali-InterIndic: CURRENCY NUMERATOR FOUR -"\u09F8>\ue079;" // Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\u09F9>\ue07A;" // Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN -"\u09FA>\ue07B;" // ISSHAR - -"\u0964>\ue064;" // DANDA -"\u0965>\ue065;" // DOUBLE DANDA -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_Cyrl_Latn.txt b/icu4c/source/data/translit/t_Cyrl_Latn.txt deleted file mode 100644 index a742fc8bdee..00000000000 --- a/icu4c/source/data/translit/t_Cyrl_Latn.txt +++ /dev/null @@ -1,322 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Cyrillic_Latin.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Cyrillic_Latin - -t_Cyrl_Latn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -// TODO: add remaining characters -// Should add variants for Russian-English, Russian-German -// Those can use this as a base, and then remap cases -// like a $hat to ya or ja. - -// :: [\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ; -//## WARNING, \u0308 must be added to the generated filters, in both directions ### -// MINIMAL FILTER -":: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;" -":: NFD (NFC) ;" - -"$modprime = \u02B9;" -"$modprime2 = \u02BA;" - -"$grave = \u0300;" -"$acute = \u0301;" -"$hat = \u0302;" -"$breve = \u0306 ;" -"$dot = \u0307 ;" -"$caron = \u030C ;" -"$comma = \u0326 ;" -"$under = \u0331 ;" - -// move up so not masked - -"я <> a $hat ;" // CYRILLIC SMALL LETTER YA -"Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA - -"ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE -"Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE -// ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER -// Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER -// ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE -// Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE -// ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE -// Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE - -"э <> e $acute;" // CYRILLIC SMALL LETTER E -"Э <> E $acute;" // CYRILLIC CAPITAL LETTER E -"є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE -"Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE - -"ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA -"Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA -"щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA -"Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA - -"ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE -"Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE -// ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE -// Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE - -"ю <> u $hat ;" // CYRILLIC SMALL LETTER YU -"Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU - -"і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -"І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -"ј <> j $caron;" // CYRILLIC SMALL LETTER JE -"Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE - -"љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE -"Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE -"њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE -"Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE - -"ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE -"Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE - -"џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE -"Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE - -// Normal order - -"а <> a ;" // CYRILLIC SMALL LETTER A -"А <> A ;" // CYRILLIC CAPITAL LETTER A -"ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA -"Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA -"ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE -"Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE -"б <> b ;" // CYRILLIC SMALL LETTER BE -"Б <> B ;" // CYRILLIC CAPITAL LETTER BE -"в <> v ;" // CYRILLIC SMALL LETTER VE -"В <> V ;" // CYRILLIC CAPITAL LETTER VE - -"ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN -"Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN -"ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE -"Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE -"ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK -"Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK -"г <> g ;" // CYRILLIC SMALL LETTER GHE -"Г <> G ;" // CYRILLIC CAPITAL LETTER GHE - -"д <> d;" // CYRILLIC SMALL LETTER DE -"Д <> D;" // CYRILLIC CAPITAL LETTER DE -"ђ <> đ ;" // CYRILLIC SMALL LETTER DJE -"Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE -"ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER -"Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER -"е <> e ;" // CYRILLIC SMALL LETTER IE -"Е <> E;" // CYRILLIC CAPITAL LETTER IE - -"ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE -"Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE - -// җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER -// Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER - -"з <> z ;" // CYRILLIC SMALL LETTER ZE -"З <> Z;" // CYRILLIC CAPITAL LETTER ZE - -"й <> j ;" // CYRILLIC SMALL LETTER I -"Й <> J ;" // CYRILLIC CAPITAL LETTER I -"и <> i ;" // CYRILLIC SMALL LETTER I -"И <> I ;" // CYRILLIC CAPITAL LETTER I - -"к <> k ;" // CYRILLIC SMALL LETTER KA -"К <> K;" // CYRILLIC CAPITAL LETTER KA - -// қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER -// Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER -// ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK -// Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK -// ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA -// Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA -// ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE -// Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE -// ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE -// Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE -"л <> l ;" // CYRILLIC SMALL LETTER EL -"Л <> L;" // CYRILLIC CAPITAL LETTER EL - -"м <> m ;" // CYRILLIC SMALL LETTER EM -"М <> M ;" // CYRILLIC CAPITAL LETTER EM -"н <> n ;" // CYRILLIC SMALL LETTER EN -"Н <> N;" // CYRILLIC CAPITAL LETTER EN -// ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER -// Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER -// ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK -// Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK -// ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE -// Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE - -"о <> o ;" // CYRILLIC SMALL LETTER O -"О <> O ;" // CYRILLIC CAPITAL LETTER O -// ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O -// Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O -"п <> p ;" // CYRILLIC SMALL LETTER PE -"П <> P ;" // CYRILLIC CAPITAL LETTER PE -// ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK -// Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK -// ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA -// Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA -"р <> r ;" // CYRILLIC SMALL LETTER ER -"Р <> R ;" // CYRILLIC CAPITAL LETTER ER -// ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK -// Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK -"с <> s ;" // CYRILLIC SMALL LETTER ES -"С <> S ;" // CYRILLIC CAPITAL LETTER ES -// ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER -// Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER -"т <> t ;" // CYRILLIC SMALL LETTER TE -"Т <> T ;" // CYRILLIC CAPITAL LETTER TE -// ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER -// Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER - -"у <> u ;" // CYRILLIC SMALL LETTER U -"У <> U ;" // CYRILLIC CAPITAL LETTER U -// ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U -// Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U -// ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE -// Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE -// ѹ <> XXX ; # CYRILLIC SMALL LETTER UK -// Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK -"ф <> f ;" // CYRILLIC SMALL LETTER EF -"Ф <> F ;" // CYRILLIC CAPITAL LETTER EF -"х <> h ;" // CYRILLIC SMALL LETTER HA -"Х <> H;" // CYRILLIC CAPITAL LETTER HA -// ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER -// Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER -// һ <> XXX ; # CYRILLIC SMALL LETTER SHHA -// Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA -// ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA -// Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA -// ѿ <> XXX ; # CYRILLIC SMALL LETTER OT -// Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT -// ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO -// Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO -// ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA -// Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA -"ц <> c ;" // CYRILLIC SMALL LETTER TSE -"Ц <> C;" // CYRILLIC CAPITAL LETTER TSE -// ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE -// Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE - -// ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE -// Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE -// ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER -// Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER - - -"Ъ <> $modprime2 $under ;" // CYRILLIC CAPITAL LETTER HARD SIGN -"ъ <> $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN -"Ь <> $modprime $under ;" // CYRILLIC CAPITAL LETTER SOFT SIGN -"ь <> $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN - -"ы <> y ;" // CYRILLIC SMALL LETTER YERU -"Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU - -// ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN -// Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN -// ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT -// Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT - -// ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E -// Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E -// ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS -// Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS -// ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS -// Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS -// ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS -// Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS -// ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS -// Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS -// ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI -// Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI -// ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI -// Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI -// ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA -// Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA -// ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA -// Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA -// ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA -// Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA -// Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA -//## ӑ <> XXX ; # CYRILLIC SMALL LETTER A -//## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A -//## ӓ <> XXX ; # CYRILLIC SMALL LETTER A -//## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A -//## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA -//## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA -//## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE -//## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE -//## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE -//## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE -//## ё <> XXX ; # CYRILLIC SMALL LETTER IE -//## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE -//## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE -//## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE -//## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE -//## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE -//## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE -//## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE -//## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE -//## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE -//## ѝ <> XXX ; # CYRILLIC SMALL LETTER I -//## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I -//## ӣ <> XXX ; # CYRILLIC SMALL LETTER I -//## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I -//## ӥ <> XXX ; # CYRILLIC SMALL LETTER I -//## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I -//## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -//## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I -//## ӧ <> XXX ; # CYRILLIC SMALL LETTER O -//## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O -//## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O -//## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O -//## ќ <> XXX ; # CYRILLIC SMALL LETTER KA -//## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA -//## ӯ <> XXX ; # CYRILLIC SMALL LETTER U -//## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U -//## ў <> XXX ; # CYRILLIC SMALL LETTER U -//## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U -//## ӱ <> XXX ; # CYRILLIC SMALL LETTER U -//## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U -//## ӳ <> XXX ; # CYRILLIC SMALL LETTER U -//## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U -//## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE -//## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE -//## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU -//## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU -//## ӭ <> XXX ; # CYRILLIC SMALL LETTER E -//## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E -//## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA -//## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA - -// Completeness -"$ignore = [[:Mark:]''] * ;" -"| k < q ;" -"| K < Q ;" -"| u < w ;" -"| U < W ;" -"| KS < X } $ignore [:UppercaseLetter:] ;" -"| KS < [:UppercaseLetter:] $ignore { X ;" -"| Ks < X ;" -"| ks < x ;" - -":: NFC (NFD) ;" -// note: a global filter is more efficient, but MUST include all source chars!! -// :: ([\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]); -// MINIMAL FILTER: Latin-Cyrillic -":: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;" - } -} diff --git a/icu4c/source/data/translit/t_Deva_InterIndic.txt b/icu4c/source/data/translit/t_Deva_InterIndic.txt deleted file mode 100644 index 0c51f4adc5d..00000000000 --- a/icu4c/source/data/translit/t_Deva_InterIndic.txt +++ /dev/null @@ -1,133 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Devanagari_InterIndic.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Devanagari_InterIndic - -t_Deva_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Devanagari-InterIndic -// :: NFD; -//Rules for Decomposed characters - - - "\u0901>\uE001;" // SIGN CANDRABINDU - "\u0902>\uE002;" // SIGN ANUSVARA - "\u0903>\uE003;" // SIGN VISARGA - "\u0904>\uE004;" // SIGN SHORT A - "\u0905>\uE005;" // LETTER A - "\u0906>\uE006;" // LETTER AA - "\u0907>\uE007;" // LETTER I - "\u0908>\uE008;" // LETTER II - "\u0909>\uE009;" // LETTER U - "\u090A>\uE00A;" // LETTER UU - "\u090B>\uE00B;" // LETTER VOCALIC R - "\u090C>\uE00C;" // LETTER VOCALIC L - "\u090D>\uE00D;" // LETTER CANDRA E (For representing English sounds) - "\u090E>\uE00E;" // UNMAPPED LETTER SHORT E(For Southern Scripts) - "\u090F>\uE00F;" // LETTER E - "\u0910>\uE010;" // LETTER AI - "\u0911>\uE011;" // LETTER CANDRA O (For representing English sounds) - "\u0912>\uE012;" // UNMAPPED LETTER SHORT O (For Southern Scripts) - "\u0913>\uE013;" // LETTER O - "\u0914>\uE014;" // LETTER AU - "\u0915>\uE015;" // LETTER KA - "\u0916>\uE016;" // LETTER KHA - "\u0917>\uE017;" // LETTER GA - "\u0918>\uE018;" // LETTER GHA - "\u0919>\uE019;" // LETTER NGA - "\u091A>\uE01A;" // LETTER CA - "\u091B>\uE01B;" // LETTER CHA - "\u091C>\uE01C;" // LETTER JA - "\u091D>\uE01D;" // LETTER JHA - "\u091E>\uE01E;" // LETTER NYA - "\u091F>\uE01F;" // LETTER TTA - "\u0920>\uE020;" // LETTER TTHA - "\u0921>\uE021;" // LETTER DDA - "\u0922>\uE022;" // LETTER DDHA - "\u0923>\uE023;" // LETTER NNA - "\u0924>\uE024;" // LETTER TA - "\u0925>\uE025;" // LETTER THA - "\u0926>\uE026;" // LETTER DA - "\u0927>\uE027;" // LETTER DHA - "\u0928>\uE028;" // LETTER NA - "\u0929>\uE029;" - "\u092A>\uE02A;" // LETTER PA - "\u092B>\uE02B;" // LETTER PHA - "\u092C>\uE02C;" // LETTER BA - "\u092D>\uE02D;" // LETTER BHA - "\u092E>\uE02E;" // LETTER MA - "\u092F>\uE02F;" // LETTER YA - "\u0930>\uE030;" // LETTER RA - "\u0931>\uE031;" - "\u0932>\uE032;" // LETTER LA - "\u0933>\uE033;" // LETTER LLA - "\u0934>\uE034;" - - "\u0935>\uE035;" // LETTER VA - "\u0936>\uE036;" // LETTER SHA - "\u0937>\uE037;" // LETTER SSA - "\u0938>\uE038;" // LETTER SA - "\u0939>\uE039;" // LETTER HA - "\u093C>\uE03C;" // SIGN NUKTA - "\u093D>\uE03D;" // SIGN AVAGRAHA - "\u093E>\uE03E;" // VOWEL SIGN AA - "\u093F>\uE03F;" // VOWEL SIGN I - "\u0940>\uE040;" // VOWEL SIGN II - "\u0941>\uE041;" // VOWEL SIGN U - "\u0942>\uE042;" // VOWEL SIGN UU - "\u0943>\uE043;" // VOWEL SIGN VOCALIC R - "\u0944>\uE044;" // VOWEL SIGN VOCALIC RR - "\u0945>\uE045;" // VOWEL SIGN CANDRA E - "\u0946>\uE046;" // UNMAPPED VOWEL SIGN SHORT E - "\u0947>\uE047;" // VOWEL SIGN E - "\u0948>\uE048;" // VOWEL SIGN AI - "\u0949>\uE049;" // VOWEL SIGN CANDRA O - "\u094A>\uE04A;" // UNMAPPED VOWEL SIGN SHORT O - "\u094B>\uE04B;" // VOWEL SIGN O - "\u094C>\uE04C;" // VOWEL SIGN AU - "\u094D>\uE04D;" // SIGN VIRAMA - "\u0950>\uE050;" // OM - "\u0951>\uE051;" // UNMAPPED STRESS SIGN UDATTA - "\u0952>\uE052;" // UNMAPPED STRESS SIGN ANUDATTA - "\u0953>\uE053;" // UNMAPPED GRAVE ACCENT - "\u0954>\uE054;" // UNMAPPED ACUTE ACCENT - "\u0958>\uE058;" - "\u0959>\uE059;" - "\u095A>\uE05a;" - "\u095B>\uE05b;" - "\u095C>\uE05c;" - "\u095D>\uE05d;" - "\u095E>\uE05e;" - "\u095F>\uE05f;" - "\u0960>\uE060;" // LETTER VOCALIC RR - "\u0961>\uE061;" // LETTER VOCALIC LL - "\u0962>\uE062;" // VOWEL SIGN VOCALIC L - "\u0963>\uE063;" // VOWEL SIGN VOCALIC LL - "\u0964>\ue064;" // DANDA - "\u0965>\ue065;" // DOUBLE DANDA - "\u0966>\uE066;" // DIGIT ZERO - "\u0967>\uE067;" // DIGIT ONE - "\u0968>\uE068;" // DIGIT TWO - "\u0969>\uE069;" // DIGIT THREE - "\u096A>\uE06A;" // DIGIT FOUR - "\u096B>\uE06B;" // DIGIT FIVE - "\u096C>\uE06C;" // DIGIT SIX - "\u096D>\uE06D;" // DIGIT SEVEN - "\u096E>\uE06E;" // DIGIT EIGHT - "\u096F>\uE06F;" // DIGIT NINE - "\u0970>\uE070;" // Devanagari-InterIndic: ABBREVIATION SIGN -// :: NFC (NFD) ; - } -} diff --git a/icu4c/source/data/translit/t_FWidth_HWidth.txt b/icu4c/source/data/translit/t_FWidth_HWidth.txt deleted file mode 100644 index ae11485fe20..00000000000 --- a/icu4c/source/data/translit/t_FWidth_HWidth.txt +++ /dev/null @@ -1,287 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Fullwidth_Halfwidth.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Fullwidth_Halfwidth - -t_FWidth_HWidth { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Fullwidth-Halfwidth - -// Mechanically generated from Unicode Character Database -// IDEOGRAPHIC SPACE then added, and -// FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON - -// multicharacter - -"ガ<>ガ;" // to KATAKANA LETTER GA -"ギ<>ギ;" // to KATAKANA LETTER GI -"グ<>グ;" // to KATAKANA LETTER GU -"ゲ<>ゲ;" // to KATAKANA LETTER GE -"ゴ<>ゴ;" // to KATAKANA LETTER GO -"ザ<>ザ;" // to KATAKANA LETTER ZA -"ジ<>ジ;" // to KATAKANA LETTER ZI -"ズ<>ズ;" // to KATAKANA LETTER ZU -"ゼ<>ゼ;" // to KATAKANA LETTER ZE -"ゾ<>ゾ;" // to KATAKANA LETTER ZO -"ダ<>ダ;" // to KATAKANA LETTER DA -"ヂ<>ヂ;" // to KATAKANA LETTER DI -"ヅ<>ヅ;" // to KATAKANA LETTER DU -"デ<>デ;" // to KATAKANA LETTER DE -"ド<>ド;" // to KATAKANA LETTER DO -"バ<>バ;" // to KATAKANA LETTER BA -"パ<>パ;" // to KATAKANA LETTER PA -"ビ<>ビ;" // to KATAKANA LETTER BI -"ピ<>ピ;" // to KATAKANA LETTER PI -"ブ<>ブ;" // to KATAKANA LETTER BU -"プ<>プ;" // to KATAKANA LETTER PU -"ベ<>ベ;" // to KATAKANA LETTER BE -"ペ<>ペ;" // to KATAKANA LETTER PE -"ボ<>ボ;" // to KATAKANA LETTER BO -"ポ<>ポ;" // to KATAKANA LETTER PO -"ヴ<>ヴ;" // to KATAKANA LETTER VU -"ヷ<>ヷ;" // to KATAKANA LETTER VA -"ヺ<>ヺ;" // to KATAKANA LETTER VO - -// single character - -"!<>'!';" // from FULLWIDTH EXCLAMATION MARK -""<>'\\\"';" // from FULLWIDTH QUOTATION MARK -"#<>'#';" // from FULLWIDTH NUMBER SIGN -"$<>'$';" // from FULLWIDTH DOLLAR SIGN -"%<>'%';" // from FULLWIDTH PERCENT SIGN -"&<>'&';" // from FULLWIDTH AMPERSAND -"'<>'';" // from FULLWIDTH APOSTROPHE -"(<>'(';" // from FULLWIDTH LEFT PARENTHESIS -")<>')';" // from FULLWIDTH RIGHT PARENTHESIS -"*<>'*';" // from FULLWIDTH ASTERISK -"+<>'+';" // from FULLWIDTH PLUS SIGN -",<>',';" // from FULLWIDTH COMMA -"-<>'-';" // from FULLWIDTH HYPHEN-MINUS -".<>'.';" // from FULLWIDTH FULL STOP -"/<>'/';" // from FULLWIDTH SOLIDUS -"0<>'0';" // from FULLWIDTH DIGIT ZERO -"1<>'1';" // from FULLWIDTH DIGIT ONE -"2<>'2';" // from FULLWIDTH DIGIT TWO -"3<>'3';" // from FULLWIDTH DIGIT THREE -"4<>'4';" // from FULLWIDTH DIGIT FOUR -"5<>'5';" // from FULLWIDTH DIGIT FIVE -"6<>'6';" // from FULLWIDTH DIGIT SIX -"7<>'7';" // from FULLWIDTH DIGIT SEVEN -"8<>'8';" // from FULLWIDTH DIGIT EIGHT -"9<>'9';" // from FULLWIDTH DIGIT NINE -":<>':';" // from FULLWIDTH COLON -";<>';';" // from FULLWIDTH SEMICOLON -"<<>'<';" // from FULLWIDTH LESS-THAN SIGN -"=<>'=';" // from FULLWIDTH EQUALS SIGN -"><>'>';" // from FULLWIDTH GREATER-THAN SIGN -"?<>'?';" // from FULLWIDTH QUESTION MARK -"@<>'@';" // from FULLWIDTH COMMERCIAL AT -"A<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A -"B<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B -"C<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C -"D<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D -"E<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E -"F<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F -"G<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G -"H<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H -"I<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I -"J<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J -"K<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K -"L<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L -"M<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M -"N<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N -"O<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O -"P<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P -"Q<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q -"R<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R -"S<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S -"T<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T -"U<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U -"V<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V -"W<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W -"X<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X -"Y<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y -"Z<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z -"[<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET -"\<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu} -"]<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET -"^<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT -"_<>'_';" // from FULLWIDTH LOW LINE -"`<>'`';" // from FULLWIDTH GRAVE ACCENT -"a<>a;" // from FULLWIDTH LATIN SMALL LETTER A -"b<>b;" // from FULLWIDTH LATIN SMALL LETTER B -"c<>c;" // from FULLWIDTH LATIN SMALL LETTER C -"d<>d;" // from FULLWIDTH LATIN SMALL LETTER D -"e<>e;" // from FULLWIDTH LATIN SMALL LETTER E -"f<>f;" // from FULLWIDTH LATIN SMALL LETTER F -"g<>g;" // from FULLWIDTH LATIN SMALL LETTER G -"h<>h;" // from FULLWIDTH LATIN SMALL LETTER H -"i<>i;" // from FULLWIDTH LATIN SMALL LETTER I -"j<>j;" // from FULLWIDTH LATIN SMALL LETTER J -"k<>k;" // from FULLWIDTH LATIN SMALL LETTER K -"l<>l;" // from FULLWIDTH LATIN SMALL LETTER L -"m<>m;" // from FULLWIDTH LATIN SMALL LETTER M -"n<>n;" // from FULLWIDTH LATIN SMALL LETTER N -"o<>o;" // from FULLWIDTH LATIN SMALL LETTER O -"p<>p;" // from FULLWIDTH LATIN SMALL LETTER P -"q<>q;" // from FULLWIDTH LATIN SMALL LETTER Q -"r<>r;" // from FULLWIDTH LATIN SMALL LETTER R -"s<>s;" // from FULLWIDTH LATIN SMALL LETTER S -"t<>t;" // from FULLWIDTH LATIN SMALL LETTER T -"u<>u;" // from FULLWIDTH LATIN SMALL LETTER U -"v<>v;" // from FULLWIDTH LATIN SMALL LETTER V -"w<>w;" // from FULLWIDTH LATIN SMALL LETTER W -"x<>x;" // from FULLWIDTH LATIN SMALL LETTER X -"y<>y;" // from FULLWIDTH LATIN SMALL LETTER Y -"z<>z;" // from FULLWIDTH LATIN SMALL LETTER Z -"{<>'{';" // from FULLWIDTH LEFT CURLY BRACKET -"|<>'|';" // from FULLWIDTH VERTICAL LINE -"}<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET -"~<>'~';" // from FULLWIDTH TILDE -"。<>。;" // to HALFWIDTH IDEOGRAPHIC FULL STOP -"「<>「;" // to HALFWIDTH LEFT CORNER BRACKET -"」<>」;" // to HALFWIDTH RIGHT CORNER BRACKET -"、<>、;" // to HALFWIDTH IDEOGRAPHIC COMMA -"・<>・;" // to HALFWIDTH KATAKANA MIDDLE DOT -"ヲ<>ヲ;" // to HALFWIDTH KATAKANA LETTER WO -"ァ<>ァ;" // to HALFWIDTH KATAKANA LETTER SMALL A -"ィ<>ィ;" // to HALFWIDTH KATAKANA LETTER SMALL I -"ゥ<>ゥ;" // to HALFWIDTH KATAKANA LETTER SMALL U -"ェ<>ェ;" // to HALFWIDTH KATAKANA LETTER SMALL E -"ォ<>ォ;" // to HALFWIDTH KATAKANA LETTER SMALL O -"ャ<>ャ;" // to HALFWIDTH KATAKANA LETTER SMALL YA -"ュ<>ュ;" // to HALFWIDTH KATAKANA LETTER SMALL YU -"ョ<>ョ;" // to HALFWIDTH KATAKANA LETTER SMALL YO -"ッ<>ッ;" // to HALFWIDTH KATAKANA LETTER SMALL TU -"ー<>ー;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -"ア<>ア;" // to HALFWIDTH KATAKANA LETTER A -"イ<>イ;" // to HALFWIDTH KATAKANA LETTER I -"ウ<>ウ;" // to HALFWIDTH KATAKANA LETTER U -"エ<>エ;" // to HALFWIDTH KATAKANA LETTER E -"オ<>オ;" // to HALFWIDTH KATAKANA LETTER O -"カ<>カ;" // to HALFWIDTH KATAKANA LETTER KA -"キ<>キ;" // to HALFWIDTH KATAKANA LETTER KI -"ク<>ク;" // to HALFWIDTH KATAKANA LETTER KU -"ケ<>ケ;" // to HALFWIDTH KATAKANA LETTER KE -"コ<>コ;" // to HALFWIDTH KATAKANA LETTER KO -"サ<>サ;" // to HALFWIDTH KATAKANA LETTER SA -"シ<>シ;" // to HALFWIDTH KATAKANA LETTER SI -"ス<>ス;" // to HALFWIDTH KATAKANA LETTER SU -"セ<>セ;" // to HALFWIDTH KATAKANA LETTER SE -"ソ<>ソ;" // to HALFWIDTH KATAKANA LETTER SO -"タ<>タ;" // to HALFWIDTH KATAKANA LETTER TA -"チ<>チ;" // to HALFWIDTH KATAKANA LETTER TI -"ツ<>ツ;" // to HALFWIDTH KATAKANA LETTER TU -"テ<>テ;" // to HALFWIDTH KATAKANA LETTER TE -"ト<>ト;" // to HALFWIDTH KATAKANA LETTER TO -"ナ<>ナ;" // to HALFWIDTH KATAKANA LETTER NA -"ニ<>ニ;" // to HALFWIDTH KATAKANA LETTER NI -"ヌ<>ヌ;" // to HALFWIDTH KATAKANA LETTER NU -"ネ<>ネ;" // to HALFWIDTH KATAKANA LETTER NE -"ノ<>ノ;" // to HALFWIDTH KATAKANA LETTER NO -"ハ<>ハ;" // to HALFWIDTH KATAKANA LETTER HA -"ヒ<>ヒ;" // to HALFWIDTH KATAKANA LETTER HI -"フ<>フ;" // to HALFWIDTH KATAKANA LETTER HU -"ヘ<>ヘ;" // to HALFWIDTH KATAKANA LETTER HE -"ホ<>ホ;" // to HALFWIDTH KATAKANA LETTER HO -"マ<>マ;" // to HALFWIDTH KATAKANA LETTER MA -"ミ<>ミ;" // to HALFWIDTH KATAKANA LETTER MI -"ム<>ム;" // to HALFWIDTH KATAKANA LETTER MU -"メ<>メ;" // to HALFWIDTH KATAKANA LETTER ME -"モ<>モ;" // to HALFWIDTH KATAKANA LETTER MO -"ヤ<>ヤ;" // to HALFWIDTH KATAKANA LETTER YA -"ユ<>ユ;" // to HALFWIDTH KATAKANA LETTER YU -"ヨ<>ヨ;" // to HALFWIDTH KATAKANA LETTER YO -"ラ<>ラ;" // to HALFWIDTH KATAKANA LETTER RA -"リ<>リ;" // to HALFWIDTH KATAKANA LETTER RI -"ル<>ル;" // to HALFWIDTH KATAKANA LETTER RU -"レ<>レ;" // to HALFWIDTH KATAKANA LETTER RE -"ロ<>ロ;" // to HALFWIDTH KATAKANA LETTER RO -"ワ<>ワ;" // to HALFWIDTH KATAKANA LETTER WA -"ン<>ン;" // to HALFWIDTH KATAKANA LETTER N -"゙<>゙;" // to HALFWIDTH KATAKANA VOICED SOUND MARK -"゚<>゚;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -"ᅠ<>ᅠ;" // to HALFWIDTH HANGUL FILLER -"ᄀ<>ᄀ;" // to HALFWIDTH HANGUL LETTER KIYEOK -"ᄁ<>ᄁ;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK -"ᆪ<>ᆪ;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS -"ᄂ<>ᄂ;" // to HALFWIDTH HANGUL LETTER NIEUN -"ᆬ<>ᆬ;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC -"ᆭ<>ᆭ;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH -"ᄃ<>ᄃ;" // to HALFWIDTH HANGUL LETTER TIKEUT -"ᄄ<>ᄄ;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT -"ᄅ<>ᄅ;" // to HALFWIDTH HANGUL LETTER RIEUL -"ᆰ<>ᆰ;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK -"ᆱ<>ᆱ;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM -"ᆲ<>ᆲ;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP -"ᆳ<>ᆳ;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS -"ᆴ<>ᆴ;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH -"ᆵ<>ᆵ;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH -"ᄚ<>ᄚ;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH -"ᄆ<>ᄆ;" // to HALFWIDTH HANGUL LETTER MIEUM -"ᄇ<>ᄇ;" // to HALFWIDTH HANGUL LETTER PIEUP -"ᄈ<>ᄈ;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP -"ᄡ<>ᄡ;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS -"ᄉ<>ᄉ;" // to HALFWIDTH HANGUL LETTER SIOS -"ᄊ<>ᄊ;" // to HALFWIDTH HANGUL LETTER SSANGSIOS -"ᄋ<>ᄋ;" // to HALFWIDTH HANGUL LETTER IEUNG -"ᄌ<>ᄌ;" // to HALFWIDTH HANGUL LETTER CIEUC -"ᄍ<>ᄍ;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC -"ᄎ<>ᄎ;" // to HALFWIDTH HANGUL LETTER CHIEUCH -"ᄏ<>ᄏ;" // to HALFWIDTH HANGUL LETTER KHIEUKH -"ᄐ<>ᄐ;" // to HALFWIDTH HANGUL LETTER THIEUTH -"ᄑ<>ᄑ;" // to HALFWIDTH HANGUL LETTER PHIEUPH -"ᄒ<>ᄒ;" // to HALFWIDTH HANGUL LETTER HIEUH -"ᅡ<>ᅡ;" // to HALFWIDTH HANGUL LETTER A -"ᅢ<>ᅢ;" // to HALFWIDTH HANGUL LETTER AE -"ᅣ<>ᅣ;" // to HALFWIDTH HANGUL LETTER YA -"ᅤ<>ᅤ;" // to HALFWIDTH HANGUL LETTER YAE -"ᅥ<>ᅥ;" // to HALFWIDTH HANGUL LETTER EO -"ᅦ<>ᅦ;" // to HALFWIDTH HANGUL LETTER E -"ᅧ<>ᅧ;" // to HALFWIDTH HANGUL LETTER YEO -"ᅨ<>ᅨ;" // to HALFWIDTH HANGUL LETTER YE -"ᅩ<>ᅩ;" // to HALFWIDTH HANGUL LETTER O -"ᅪ<>ᅪ;" // to HALFWIDTH HANGUL LETTER WA -"ᅫ<>ᅫ;" // to HALFWIDTH HANGUL LETTER WAE -"ᅬ<>ᅬ;" // to HALFWIDTH HANGUL LETTER OE -"ᅭ<>ᅭ;" // to HALFWIDTH HANGUL LETTER YO -"ᅮ<>ᅮ;" // to HALFWIDTH HANGUL LETTER U -"ᅯ<>ᅯ;" // to HALFWIDTH HANGUL LETTER WEO -"ᅰ<>ᅰ;" // to HALFWIDTH HANGUL LETTER WE -"ᅱ<>ᅱ;" // to HALFWIDTH HANGUL LETTER WI -"ᅲ<>ᅲ;" // to HALFWIDTH HANGUL LETTER YU -"ᅳ<>ᅳ;" // to HALFWIDTH HANGUL LETTER EU -"ᅴ<>ᅴ;" // to HALFWIDTH HANGUL LETTER YI -"ᅵ<>ᅵ;" // to HALFWIDTH HANGUL LETTER I -"¢<>'¢';" // from FULLWIDTH CENT SIGN -"£<>'£';" // from FULLWIDTH POUND SIGN -"¬<>'¬';" // from FULLWIDTH NOT SIGN -" ̄<>'¯';" // from FULLWIDTH MACRON -"' '<>' ';" // ideographic space (place this after MACRON) -"¦<>'¦';" // from FULLWIDTH BROKEN BAR -"¥<>'¥';" // from FULLWIDTH YEN SIGN -"₩<>₩;" // from FULLWIDTH WON SIGN -"│<>│;" // to HALFWIDTH FORMS LIGHT VERTICAL -"'←'<>'←';" // to HALFWIDTH LEFTWARDS ARROW -"↑<>↑;" // to HALFWIDTH UPWARDS ARROW -"'→'<>'→';" // to HALFWIDTH RIGHTWARDS ARROW -"↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW -"■<>■;" // to HALFWIDTH BLACK SQUARE -"○<>○;" // to HALFWIDTH WHITE CIRCLE - -// eof - - } -} diff --git a/icu4c/source/data/translit/t_Grek_Latn.txt b/icu4c/source/data/translit/t_Grek_Latn.txt deleted file mode 100644 index 2c3f3c2b083..00000000000 --- a/icu4c/source/data/translit/t_Grek_Latn.txt +++ /dev/null @@ -1,361 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Greek_Latin.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Greek_Latin - -t_Grek_Latn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Rules are predicated on running NFD first, and NFC afterwards -// :: [\\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ; -// MINIMAL FILTER GENERATED FOR: Greek-Latin -":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;" - -":: NFD (NFC) ;" - -// TEST CASES - -// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος -// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ -// ᾳ ῃ ῳ ὃ ὄ -// ὠς ὡς ὢς ὣς -// Ὠς Ὡς Ὢς Ὣς -// ὨΣ ὩΣ ὪΣ ὫΣ -// Ạ, ạ, Ẹ, ẹ, Ọ, ọ - -// Useful variables - -"$lower = [[:latin:][:greek:] & [:Ll:]];" -"$glower = [[:greek:] & [:Ll:]];" -"$upper = [[:latin:][:greek:] & [:Lu:]] ;" -"$accent = [:M:] ;" - -// NOTE: restrict to just the Greek & Latin accents that we care about -// TODO: broaden out once interation is fixed -"$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;" - -"$macron = \u0304 ;" -"$ddot = \u0308 ;" -"$ddotmac = [$ddot$macron];" - -"$lcgvowel = [αεηιουω] ;" -"$ucgvowel = [ΑΕΗΙΟΥΩ] ;" -"$gvowel = [$lcgvowel $ucgvowel] ;" -"$lcgvowelC = [$lcgvowel $accent] ;" - -"$evowel = [aeiouyAEIOUY];" -"$evowel2 = [iuyIUY];" -"$vowel = [ $evowel $gvowel] ;" - -"$gammaLike = [ΓΚΞΧγκξχϰ] ;" -"$egammaLike = [GKXCgkxc] ;" -"$smooth = ̓ ;" -"$rough = ̔ ;" -"$iotasub = ͅ ;" - -"$evowel_i = [$evowel-[iI]] ;" -"$evowel2_i = [uyUY];" - -"$underbar = \u0331;" - -"$afterLetter = [:L:] [[:M:]\\\']* ;" -"$beforeLetter = [[:M:]\\\']* [:L:] ;" -"$beforeLower = $accent * $lower ;" - -"$notLetter = [^[:L:][:M:]] ;" -"$under = ̱;" - -// Fix punctuation -// preserve original -"\\\: <> \\\: $under ;" -"\\\? <> \\\? $under ;" - -"\\\; <> \\\? ;" -"· <> \\\: ;" - -// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve - -"\u0342 <> \u0302 ;" - -// IOTA: convert iota subscript to iota -// first make previous alpha long! - -"$accent_minus = [[$accent]-[$iotasub$macron]];" - -"Α } $accent_minus * $iotasub > | Α $macron ;" -"α } $accent_minus * $iotasub > | α $macron ;" - -// now convert to uppercase if after uppercase, ow to lowercase - -"$upper $accent * { $iotasub > I ;" -"$iotasub > i ;" - -"| $1 $iotasub < ($evowel $macron $accentMinus *) i ;" -"| $1 $iotasub < ($evowel $macron $accentMinus *) I ;" - -// BREATHING - -// Convert rough breathing to h, and move before letters. - -// Make A ` x = > H a x - - "Α ($macron?) $rough } $beforeLower > H | α $1;" - "Ε $rough } $beforeLower > H | ε;" - "Η $rough } $beforeLower > H | η ;" - "Ι ($ddot?) $rough } $beforeLower > H | ι $1;" - "Ο $rough } $beforeLower > H | ο ;" - "Υ $rough } $beforeLower > H | υ ;" - "Ω ($ddot?) $rough } $beforeLower > H | ω $1;" - -// Make A x ` = > H a x - -"Α ($glower $macron?) $rough > H | α $1 ;" -"Ε ($glower) $rough > H | ε $1 ;" -"Η ($glower) $rough > H | η $1 ;" -"Ι ($glower $ddot?) $rough > H | ι $1 ;" -"Ο ($glower) $rough > H | ο $1 ;" -"Υ ($glower) $rough > H | υ $1 ;" -"Ω ($glower $ddot?) $rough > H | ω $1 ;" - -//Otherwise, make x ` into h x and X ` into H X - -"($lcgvowel + $ddotmac? ) $rough > h | $1 ;" -"($gvowel + $ddotmac? ) $rough > H | $1 ;" - -// Go backwards with H - -"| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;" -"| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;" -"| $1 $rough < h ($evowel $macron? $ddot?) ;" - -"| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;" -"| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;" -"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;" - -// titlecase, have to fix individually -// in the future, we should add &uppercase() to make this easier - -"| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;" -"| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;" -"| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;" -"| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;" -"| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;" -"| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;" - -"| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;" -"| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;" -"| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;" -"| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;" -"| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;" -"| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;" - -"| A $1 $rough < H a ($macron? $ddot? ) ;" -"| E $1 $rough < H e ($macron? $ddot? ) ;" -"| I $1 $rough < H i ($macron? $ddot? ) ;" -"| O $1 $rough < H o ($macron? $ddot? ) ;" -"| U $1 $rough < H u ($macron? $ddot? ) ;" -"| Y $1 $rough < H y ($macron? $ddot? ) ;" - -// Now do smooth - -//delete smooth breathing for Latin -"$smooth > ;" - -// insert in Greek -// the assumption is that all Marks are on letters. - - "| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;" - "| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;" - "| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;" - -// TODO: preserve smooth/rough breathing if not -// on initial vowel sequence - -// need to have these up here so the rules don't mask - -// remove now superfluous macron when returning - -"Α < A $macron ;" -"α < a $macron ;" - -"η <> e $macron ;" -"Η <> E $macron ;" - -"φ <> ph ;" -"Ψ } $beforeLower <> Ps ;" -"Ψ <> PS ;" - -"Φ } $beforeLower <> Ph ;" -"Φ <> PH ;" -"ψ <> ps ;" - -"ω <> o $macron ;" -"Ω <> O $macron;" - -// NORMAL - -"α <> a ;" -"Α <> A ;" - -"β <> b ;" -"Β <> B ;" - -"γ } $gammaLike <> n } $egammaLike ;" -"γ <> g ;" -"Γ } $gammaLike <> N } $egammaLike ;" -"Γ <> G ;" - -"δ <> d ;" -"Δ <> D ;" - -"ε <> e ;" -"Ε <> E ;" - -"ζ <> z ;" -"Ζ <> Z ;" - -"θ <> th ;" -"Θ } $beforeLower <> Th ;" -"Θ <> TH ;" - -"ι <> i ;" -"Ι <> I ;" - -"κ <> k ;" -"Κ <> K ;" - -"λ <> l ;" -"Λ <> L ;" - -"μ <> m ;" -"Μ <> M ;" - -"ν } $gammaLike > n\\\' ;" -"ν <> n ;" -"Ν } $gammaLike <> N\\\' ;" -"Ν <> N ;" - -"ξ <> x ;" -"Ξ <> X ;" - -"ο <> o ;" -"Ο <> O ;" - -"π <> p ;" -"Π <> P ;" - -"ρ $rough <> rh;" -"Ρ $rough } $beforeLower <> Rh ;" -"Ρ $rough <> RH ;" -"ρ <> r ;" -"Ρ <> R ;" - -// insert separator before things that turn into s - -"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;" - -// special S variants - -"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L -"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L -"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L -"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L - -// underbar means exception - -// before a letter, initial -"ς } $beforeLetter <> s $underbar } $beforeLetter;" -"σ } $beforeLetter <> s } $beforeLetter;" - -// otherwise, after a letter = final -"$afterLetter { σ <> $afterLetter { s $underbar;" -"$afterLetter { ς <> $afterLetter { s ;" - -// otherwise (isolated) = initial -"ς <> s $underbar;" -"σ <> s ;" - -// [Pp] { Σ <> \\\'S ; -"Σ <> S ;" - -"τ <> t ;" -"Τ <> T ;" - -"$vowel {υ } <> u ;" -"υ <> y ;" -"$vowel { Υ <> U ;" -"Υ <> Y ;" - -"χ <> ch ;" -"Χ } $beforeLower <> Ch ;" -"Χ <> CH ;" - -// Completeness for ASCII - -"$ignore = [[:Mark:]''] * ;" - -"| k < c ;" -"| ph < f ;" -"| i < j ;" -"| k < q ;" -"| b < v } $vowel ;" -"| b < w } $vowel;" -"| u < v ;" -"| u < w;" -"| K < C ;" -"| Ph < F ;" -"| I < J ;" -"| K < Q ;" -"| B < V } $vowel ;" -"| B < W } $vowel ;" -"| U < V ;" -"| U < W ;" - -"$rough } $ignore [:UppercaseLetter:] > H ;" -"$ignore [:UppercaseLetter:] { $rough > H ;" -"$rough < H ;" -"$rough <> h ;" - -// Completeness for Greek - -"ϐ > | β ;" -"ϑ > | θ ;" -"ϒ > | Υ ;" -"ϕ > | φ ;" -"ϖ > | π ;" - -"ϰ > | κ ;" -"ϱ > | ρ ;" -"ϲ > | σ ;" -"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL -"ϳ > j ;" -"ϴ > | Θ ;" -"ϵ > | ε ;" - -"µ > | μ ;" - - "ͺ > i;" - -// delete any trailing ' marks used for roundtripping - - "< [Ππ] { \\\' } [Ss] ;" - "< [Νν] { \\\' } $egammaLike ;" - -"::NFC (NFD) ;" -// ([\\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ; -// ([\\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ; -// MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD -":: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;" - } -} diff --git a/icu4c/source/data/translit/t_Grek_Latn_UNGEGN.txt b/icu4c/source/data/translit/t_Grek_Latn_UNGEGN.txt deleted file mode 100644 index 1a4b1ad2418..00000000000 --- a/icu4c/source/data/translit/t_Grek_Latn_UNGEGN.txt +++ /dev/null @@ -1,268 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Greek_Latin_UNGEGN - -t_Grek_Latn_UNGEGN { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -// For modern Greek, based on UNGEGN rules. - -// Rules are predicated on running NFD first, and NFC afterwards -// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN -// WARNING: need to add accents to both filters ### -// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ; - -":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;" -"::NFD (NFC) ;" - -// Useful variables - -"$lower = [[:latin:][:greek:] & [:Ll:]] ;" -"$upper = [[:latin:][:greek:] & [:Lu:]] ;" -"$accent = [[:Mn:][:Me:]] ;" - -"$macron = ̄ ;" -"$ddot = ̈ ;" - -"$lcgvowel = [αεηιουω] ;" -"$ucgvowel = [ΑΕΗΙΟΥΩ] ;" -"$gvowel = [$lcgvowel $ucgvowel] ;" -"$lcgvowelC = [$lcgvowel $accent] ;" - -"$evowel = [aeiouyAEIOUY];" -"$vowel = [ $evowel $gvowel] ;" - -"$beforeLower = $accent * $lower ;" - -"$gammaLike = [ΓΚΞΧγκξχϰ] ;" -"$egammaLike = [GKXCgkxc] ;" -"$smooth = ̓ ;" -"$rough = ̔ ;" -"$iotasub = ͅ ;" - -"$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;" - -"$under = ̱;" - -"$caron = ̌;" - -"$afterLetter = [:L:] [\\\'$accent]* ;" -"$beforeLetter = [\\\'$accent]* [:L:] ;" - -// Fix punctuation - -// preserve orginal -"\\\: <> \\\: $under ;" -"\\\? <> \\\? $under ;" - -"\\\; <> \\\? ;" -"· <> \\\: ;" - -// Fix any ancient characters that creep in - -"͂ > ́ ;" -"̂ > ́ ;" -"̀ > ́ ;" -"$smooth > ;" -"$rough > ;" -"$iotasub > ;" -"ͺ > ;" - -// need to have these up here so the rules don't mask - -"η <> i $under ;" -"Η <> I $under ;" - -"Ψ } $beforeLower <> Ps ;" -"Ψ <> PS ;" -"ψ <> ps ;" - -"ω <> o $under ;" -"Ω <> O $under;" - -// at begining or end of word, convert mp to b - -"[^[:L:]$accent] { μπ > b ;" -"μπ } [^[:L:]$accent] > b ;" -"[^[:L:]$accent] { [Μμ][Ππ] > B ;" -"[Μμ][Ππ] } [^[:L:]$accent] > B ;" - -"μπ < b ;" -"Μπ < B } $beforeLower ;" -"ΜΠ < B ;" - -// handle diphthongs ending with upsilon - -"ου <> ou ;" -"ΟΥ <> OU ;" -"Ου <> Ou ;" -"οΥ <> oU ;" - -"$fmaker = [aeiAEI] $under ? ;" -"$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate - -"$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;" -"υ $1 < ( $shiftForwardVowels )* v $under ;" - -"$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;" -"υ $1 < ( $shiftForwardVowels )* f $under ;" - -"$fmaker { Υ } $softener <> V $under ;" -"$fmaker { Υ <> U $under ;" - -"υ <> y ;" -"Υ <> Y ;" - -// NORMAL - -"α <> a ;" -"Α <> A ;" - -"β <> v ;" -"Β <> V ;" - -"γ } $gammaLike <> n } $egammaLike ;" -"γ <> g ;" -"Γ } $gammaLike <> N } $egammaLike ;" -"Γ <> G ;" - -"δ <> d ;" -"Δ <> D ;" - -"ε <> e ;" -"Ε <> E ;" - -"ζ <> z ;" -"Ζ <> Z ;" - -"θ <> th ;" -"Θ } $beforeLower <> Th ;" -"Θ <> TH ;" - -"ι <> i ;" -"Ι <> I ;" - -"κ <> k ;" -"Κ <> K ;" - -"λ <> l ;" -"Λ <> L ;" - -"μ <> m ;" -"Μ <> M ;" - -"ν } $gammaLike > n\\\' ;" -"ν <> n ;" -"Ν } $gammaLike <> N\\\' ;" -"Ν <> N ;" - -"ξ <> x ;" -"Ξ <> X ;" - -"ο <> o ;" -"Ο <> O ;" - -"π <> p ;" -"Π <> P ;" - -"ρ <> r ;" -"Ρ <> R ;" - -// insert separator before things that turn into s -"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;" - -// special S variants - -"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L -"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L -"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L -"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L - -// Caron means exception - -// before a letter, initial -"ς } $beforeLetter <> s $under } $beforeLetter;" -"σ } $beforeLetter <> s } $beforeLetter;" - -// otherwise, after a letter = final -"$afterLetter { σ <> $afterLetter { s $under;" -"$afterLetter { ς <> $afterLetter { s ;" - -// otherwise (isolated) = initial -"ς <> s $under;" -"σ <> s ;" - -// [Pp] { Σ <> \\\'S ; -"Σ <> S ;" - -"τ <> t ;" -"Τ <> T ;" - -"φ <> f ;" -"Φ <> F ;" - -"χ <> ch ;" -"Χ } $beforeLower <> Ch ;" -"Χ <> CH ;" - -// Completeness for ASCII - -// $ignore = [[:Mark:]''] * ; - -"| ch < h ;" -"| k < c ;" -"| i < j ;" -"| k < q ;" -"| b < u } $vowel ;" -"| b < w } $vowel ;" -"| y < u ;" -"| y < w ;" - -"| Ch < H ;" -"| K < C ;" -"| I < J ;" -"| K < Q ;" -"| B < W } $vowel ;" -"| B < U } $vowel ;" -"| Y < W ;" -"| Y < U ;" - -// Completeness for Greek - -"ϐ > | β ;" -"ϑ > | θ ;" -"ϒ > | Υ ;" -"ϕ > | φ ;" -"ϖ > | π ;" - -"ϰ > | κ ;" -"ϱ > | ρ ;" -"ϲ > | σ ;" -"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL -"ϳ > j ;" -"ϴ > | Θ ;" -"ϵ > | ε ;" -"µ > | μ ;" - -// delete any trailing ' marks used for roundtripping - - "< [Ππ] { \\\' } [Ss] ;" - "< [Νν] { \\\' } $egammaLike ;" - -"::NFC (NFD) ;" - -// MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD -":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;" - } -} diff --git a/icu4c/source/data/translit/t_Gujr_InterIndic.txt b/icu4c/source/data/translit/t_Gujr_InterIndic.txt deleted file mode 100644 index a663ab1d034..00000000000 --- a/icu4c/source/data/translit/t_Gujr_InterIndic.txt +++ /dev/null @@ -1,107 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Gujarati_InterIndic.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Gujarati_InterIndic - -t_Gujr_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Gujarati-InterIndic -//:: NFD (NFC) ; -"\u0a81>\ue001;" // SIGN CANDRABINDU -"\u0a82>\ue002;" // SIGN ANUSVARA -"\u0a83>\ue003;" // SIGN VISARGA -"\u0a85>\ue005;" // LETTER A -"\u0a86>\ue006;" // LETTER AA -"\u0a87>\ue007;" // LETTER I -"\u0a88>\ue008;" // LETTER II -"\u0a89>\ue009;" // LETTER U -"\u0a8a>\ue00a;" // LETTER UU -"\u0a8b>\ue00b;" // LETTER VOCALIC R -"\u0a8c>\ue00c;" // LETTER VOCALLIC L -"\u0a8d>\ue00d;" // VOWEL CANDRA E -"\u0a8f>\ue00f;" // LETTER E -"\u0a90>\ue010;" // LETTER AI -"\u0a91>\ue011;" // VOWEL CANDRA O -"\u0a93>\ue013;" // LETTER O -"\u0a94>\ue014;" // LETTER AU -"\u0a95>\ue015;" // LETTER KA -"\u0a96>\ue016;" // LETTER KHA -"\u0a97>\ue017;" // LETTER GA -"\u0a98>\ue018;" // LETTER GHA -"\u0a99>\ue019;" // LETTER NGA -"\u0a9a>\ue01a;" // LETTER CA -"\u0a9b>\ue01b;" // LETTER CHA -"\u0a9c>\ue01c;" // LETTER JA -"\u0a9d>\ue01d;" // LETTER JHA -"\u0a9e>\ue01e;" // LETTER NYA -"\u0a9f>\ue01f;" // LETTER TTA -"\u0aa0>\ue020;" // LETTER TTHA -"\u0aa1>\ue021;" // LETTER DDA -"\u0aa2>\ue022;" // LETTER DDHA -"\u0aa3>\ue023;" // LETTER NNA -"\u0aa4>\ue024;" // LETTER TA -"\u0aa5>\ue025;" // LETTER THA -"\u0aa6>\ue026;" // LETTER DA -"\u0aa7>\ue027;" // LETTER DHA -"\u0aa8>\ue028;" // LETTER NA -"\u0aaa>\ue02a;" // LETTER PA -"\u0aab>\ue02b;" // LETTER PHA -"\u0aac>\ue02c;" // LETTER BA -"\u0aad>\ue02d;" // LETTER BHA -"\u0aae>\ue02e;" // LETTER MA -"\u0aaf>\ue02f;" // LETTER YA -"\u0ab0>\ue030;" // LETTER RA -"\u0ab2>\ue032;" // LETTER LA -"\u0ab3>\ue033;" // LETTER LLA -"\u0ab5>\ue035;" // LETTER VA -"\u0ab6>\ue036;" // LETTER SHA -"\u0ab7>\ue037;" // LETTER SSA -"\u0ab8>\ue038;" // LETTER SA -"\u0ab9>\ue039;" // LETTER HA -"\u0abc>\ue03c;" // SIGN NUKTA -"\u0abd>\ue03d;" // SIGN AVAGRAHA -"\u0abe>\ue03e;" // VOWEL SIGN AA -"\u0abf>\ue03f;" // VOWEL SIGN I -"\u0ac0>\ue040;" // VOWEL SIGN II -"\u0ac1>\ue041;" // VOWEL SIGN U -"\u0ac2>\ue042;" // VOWEL SIGN UU -"\u0ac3>\ue043;" // VOWEL SIGN VOCALIC R -"\u0ac4>\ue044;" // VOWEL SIGN VOCALIC RR -"\u0ac5>\ue045;" // VOWEL SIGN CANDRA E -"\u0ac7>\ue047;" // VOWEL SIGN E -"\u0ac8>\ue048;" // VOWEL SIGN AI -"\u0ac9>\ue049;" // VOWEL SIGN CANDRA O -"\u0acb>\ue04b;" // VOWEL SIGN O -"\u0acc>\ue04c;" // VOWEL SIGN AU -"\u0acd>\ue04d;" // SIGN VIRAMA -"\u0ad0>\ue050;" // OM -"\u0ae0>\ue060;" // LETTER VOCALIC RR -"\u0ae1>\ue061;" // LETTER VOCALIC LL -"\u0ae6>\ue066;" // DIGIT ZERO -"\u0ae7>\ue067;" // DIGIT ONE -"\u0ae8>\ue068;" // DIGIT TWO -"\u0ae9>\ue069;" // DIGIT THREE -"\u0aea>\ue06a;" // DIGIT FOUR -"\u0aeb>\ue06b;" // DIGIT FIVE -"\u0aec>\ue06c;" // DIGIT SIX -"\u0aed>\ue06d;" // DIGIT SEVEN -"\u0aee>\ue06e;" // DIGIT EIGHT -"\u0aef>\ue06f;" // DIGIT NINE -"\u0964>\ue064;" // DANDA -"\u0965>\ue065;" // DOUBLE DANDA -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_Guru_InterIndic.txt b/icu4c/source/data/translit/t_Guru_InterIndic.txt deleted file mode 100644 index 0c11a0a5257..00000000000 --- a/icu4c/source/data/translit/t_Guru_InterIndic.txt +++ /dev/null @@ -1,111 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Gurmukhi_InterIndic.txt -// Date: Tue May 18 17:24:48 2004 -//-------------------------------------------------------------------- - -// Gurmukhi_InterIndic - -t_Guru_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Gurmukhi-InterIndic -//:: NFD (NFC) ; - -//\u0A16\u0A3C>\uE059; # LETTER KHHA -//\u0A17\u0A3C>\uE05A; # LETTER GHHA -//\u0A1C\u0A3C>\uE05B; # LETTER ZA -//\u0A38\u0A3C>\uE036; # LETTER SHA -//\u0A32\u0A3C>\uE033; # LETTER LLA -//\u0A2B\u0A3C>\uE05E; # LETTER FA -"\u0A01>\uE001;" // SIGN CHANDRABINDU -"\u0A02>\uE002;" // SIGN BINDI -"\u0A05>\uE005;" // LETTER A -"\u0A06>\uE006;" // LETTER AA -"\u0A07>\uE007;" // LETTER I -"\u0A08>\uE008;" // LETTER II -"\u0A09>\uE009;" // LETTER U -"\u0A0A>\uE00A;" // LETTER UU -"\u0A0C>\uE032;" // FALLBACK : VOCALLIC LA -"\u0A0F>\uE00F;" // LETTER EE -"\u0A10>\uE010;" // LETTER AI -"\u0A13>\uE013;" // LETTER OO -"\u0A14>\uE014;" // LETTER AU -"\u0A15>\uE015;" // LETTER KA -"\u0A16>\uE016;" // LETTER KHA -"\u0A17>\uE017;" // LETTER GA -"\u0A18>\uE018;" // LETTER GHA -"\u0A19>\uE019;" // LETTER NGA -"\u0A1A>\uE01A;" // LETTER CA -"\u0A1B>\uE01B;" // LETTER CHA -"\u0A1C>\uE01C;" // LETTER JA -"\u0A1D>\uE01D;" // LETTER JHA -"\u0A1E>\uE01E;" // LETTER NYA -"\u0A1F>\uE01F;" // LETTER TTA -"\u0A20>\uE020;" // LETTER TTHA -"\u0A21>\uE021;" // LETTER DDA -"\u0A22>\uE022;" // LETTER DDHA -"\u0A23>\uE023;" // LETTER NNA -"\u0A24>\uE024;" // LETTER TA -"\u0A25>\uE025;" // LETTER THA -"\u0A26>\uE026;" // LETTER DA -"\u0A27>\uE027;" // LETTER DHA -"\u0A28>\uE028;" // LETTER NA -"\u0A2A>\uE02A;" // LETTER PA -"\u0A2B>\uE02B;" // LETTER PHA -"\u0A2C>\uE02C;" // LETTER BA -"\u0A2D>\uE02D;" // LETTER BHA -"\u0A2E>\uE02E;" // LETTER MA -"\u0A2F>\uE02F;" // LETTER YA -"\u0A30>\uE030;" // LETTER RA -"\u0A32>\uE032;" // LETTER LA -"\u0a33>\uE033;" // FALLBACK -"\u0A35>\uE035;" // LETTER VA -"\u0a36>\ue036;" -"\u0A38\\\0a3c>\ue036;" // FALLBACK -"\u0A38>\uE038;" // LETTER SA -"\u0A39>\uE039;" // LETTER HA -"\u0A3C>\uE03C;" // SIGN NUKTA -"\u0A3E>\uE03E;" // VOWEL SIGN AA -"\u0A3F>\uE03F;" // VOWEL SIGN I -"\u0A40>\uE040;" // VOWEL SIGN II -"\u0A41>\uE041;" // VOWEL SIGN U -"\u0A42>\uE042;" // VOWEL SIGN UU -"\u0A47>\uE047;" // VOWEL SIGN EE -"\u0A48>\uE048;" // VOWEL SIGN AI -"\u0A4B>\uE04B;" // VOWEL SIGN OO -"\u0A4C>\uE04C;" // VOWEL SIGN AU -"\u0A4D>\uE04D;" // SIGN VIRAMA - -"\u0A5C>\uE05C;" // LETTER RRA - -"\u0A66>\uE066;" // DIGIT ZERO -"\u0A67>\uE067;" // DIGIT ONE -"\u0A68>\uE068;" // DIGIT TWO -"\u0A69>\uE069;" // DIGIT THREE -"\u0A6A>\uE06A;" // DIGIT FOUR -"\u0A6B>\uE06B;" // DIGIT FIVE -"\u0A6C>\uE06C;" // DIGIT SIX -"\u0A6D>\uE06D;" // DIGIT SEVEN -"\u0A6E>\uE06E;" // DIGIT EIGHT -"\u0A6F>\uE06F;" // DIGIT NINE -"\u0A70>\uE07C;" // TIPPI -"\u0A71>\uE07D;" // ADDAK -"\u0A72>\uE07E;" // IRI -"\u0A73>\uE07F;" // URA -"\u0A74>\uE080;" // EK ONKAR -"\u0964>\ue064;" // DANDA -"\u0965>\ue065;" // DOUBLE DANDA -// :: NFC (NFD) ; -// eof - - } -} diff --git a/icu4c/source/data/translit/t_Hani_Latn.txt b/icu4c/source/data/translit/t_Hani_Latn.txt deleted file mode 100644 index 8c506eda70d..00000000000 --- a/icu4c/source/data/translit/t_Hani_Latn.txt +++ /dev/null @@ -1,1455 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat -// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Han_Latin.txt -// Date: Fri May 28 17:07:31 2004 -//-------------------------------------------------------------------- - -// Han_Latin - -t_Hani_Latn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Warning: does not do round-trip mapping!! - -// Convert CJK characters -"::Han-Spacedhan();" - -// Start RAW data for converting CJK characters -"[吖呵錒锕阿]>ā;" -"嗄>á;" -"啊>a;" -"[哀哎唉埃挨銰鎄锿]>āi;" -"[㱯䠹䶣啀嘊嵦捱敱敳癌皑皚騃]>ái;" -"[㑸㗨㢊䑂䨠佁娾昹欸毐矮蔼藹躷霭靄馤]>ǎi;" -"[㕌㗒㘷㝶㤅㿄䀳䅬䔽䝽䬵伌僾叆嗳噯塧壒嫒嬡愛懓懝暧曖濭爱瑷璦皧瞹砨硋碍礙艾薆譪賹鑀隘靉鴱]>ài;" -"[侒媕安峎峖庵氨痷盦盫腌腤菴萻葊蓭誝諳谙銨铵鞌鞍馣鵪鶕鹌]>ān;" -"[䜙啽玵雸]>án;" -"[㜝㽢䁆䅖俺唵垵埯揞晻罯隌]>ǎn;" -"[㟁㱘㸩䅁䎏䎨䬓䮗䯥儑匎堓岸按暗案桉洝犴荌錌闇鮟黬黯]>àn;" -"骯>āng;" -"[㭿䀚䒢䩕䭹䭺卬岇昂昻]>áng;" -"[䍩軮雵]>ǎng;" -"[㦹㼜枊盎醠]>àng;" -"[凹柪梎軪]>āo;" -"[㟼㠂㿰䐿䚫䥝䦋䵅厫嗷嗸嶅廒摮敖滶熬爊獒獓璈磝翱翺聱蔜螯謷謸遨鏊鏖隞驁骜鰲鳌鷔鼇𦪈]>áo;" -"[㑃㤇䞝䯠䴈媪媼抝拗狕芺袄襖镺𥜌]>ǎo;" -"[㕭㘬㘭㜜㜩㠗㥿䁱䜒䫨䮯傲坳垇奡奥奧嫯岙嶴慠懊扷擙澳詏𩼈]>ào;" -"[仈八哵岜扒捌朳玐疤粑羓芭豝釟鲃]>bā;" -"[㔜㧊䟦䳁䳊叐坺墢妭抜拔炦犮秡胈茇菝詙跋軷鈸钹颰馛魃鼥]>bá;" -"[㞎把鈀钯靶]>bǎ;" -"[㖠㶚䃻䆉䇑䎬䎱䥯䩗䩻䰾䱝坝垻壩弝欛灞爸猈覇霸]>bà;" -"[叭吧巴笆罢罷]>ba;" -"掰>bāi;" -"[㼟㿟䳆白]>bái;" -"[㗗㼣䙓佰捭摆擺柏百矲粨絔襬]>bǎi;" -"[㗑㠔䒔䢙䴽庍拜拝敗稗粺蛽贁败]>bài;" -"[扳搬攽斑斒朌班瘢癍般虨螌褩辬頒颁]>bān;" -"[㩯㸞㺜䉽䬳坂岅昄板版粄舨蝂鈑钣闆阪]>bǎn;" -"[㚘㪵䕰伴办半姅怑拌湴瓣秚絆绊辦鉡靽]>bàn;" -"扮>ban;" -"[垹帮幇幚幫捠梆浜縍邦邫鞤]>bāng;" -"[㔙㮄䟺榜牓綁绑膀]>bǎng;" -"[㭋㯁㾦䂜䎧䖫䧛䰷傍塝棒磅稖艕蚌蜯謗谤鎊镑]>bàng;" -"[勹包孢笣胞苞蕔褒襃闁骲]>bāo;" -"[㵡㿺䈏䥤䨌䨔䪨嫑瓟窇薄雹]>báo;" -"[㙅㲏㻄䎂䭋䳈䳰䴐保堡堢宝宲寚寳寶怉珤緥葆褓褴賲靌飽饱駂鳵鴇鸨]>bǎo;" -"[㙸㫧㲒䤖儤勽報忁报抱暴曓煲爆犦菢虣袌豹趵鉋鑤铇鮑鲍鸔]>bào;" -"[伓卑岥庳悲揹杯桮椑盃碑禆綼萆藣錃鵯鹎]>bēi;" -"[㤳北鉳]>běi;" -"[㓈㔨㛝㣁㰆㶔㷶㸢㸬㸽㻗㼎㾱䁅䋳䔒䠙䡶䩀䰽俻倍偝偹備僃备孛悖惫愂憊昁梖焙牬犕狈狽珼琲糒紴背蓓被褙貝贝軰輩辈邶郥鄁鋇鐾钡鞴韛]>bèi;" -"[呗唄]>bei;" -"[奔泍渀犇錛锛]>bēn;" -"[㡷㮺夲奙本楍畚苯]>běn;" -"[㤓㨧㱵䬱倴坌捹撪桳笨輽逩]>bèn;" -"[伻嘣崩嵭祊綳閍]>bēng;" -"甭>béng;" -"[㑟䋽䙀䩬䭰䳞埲玤琫繃绷菶誁鞛]>běng;" -"[㱶㷯䨻塴搒槰泵蠯跰蹦迸逬鏰镚]>bèng;" -"[偪屄毴皀皕稫芘蓖螕豍逼颷]>bī;" -"[㮰䨆䵄荸魮鼻]>bí;" -"[㚰㠲㪏㻶䃾䇷䏢䘡䠋䣥佊俾匕吡啚夶妣彼朼柀比沘滗潷疕秕笔筆粃蚍貏鄙]>bǐ;" -"[㓖㗉㘠㘩㙄㡀㡙㢰㢶㢸㧙㪤㮿㯇㱸㳼㵥㵨㹃㻫㿫䀣䁹䄶䊧䋔䌟䎵䏶䕗䖩䟆䟤䦘䧗䩛䪐䫁䫾䬛䭮䮡䯗佖咇哔嗶坒堛壁奰妼婢嬖币幣庇廦弊弻弼彃必怭愊愎敝斃枈柲梐楅檘毕毖毙湢滭煏熚狴獘獙珌璧畀畢疪痹痺睤睥碧筚箄-箆篦篳粊縪罼聛腷臂苾荜蓽蔽薜蜌袐裨襞襣觱詖诐貱賁贔贲赑跸蹕躃躄避邲鄨鄪鉍鎞鏎鐴铋閇閉閟闭陛鞸韠飶饆馝駜驆骳髀魓鮅鰏鲾鵖鷝鷩鼊]>bì;" -"[煸牑猵甂砭稨笾箯籩糄編编蝙边辺邉邊鍽鞭鯾鯿鳊]>biān;" -"[㦚䁵匾惼扁碥窆藊褊貶贬]>biǎn;" -"[㝸㣐㭓㲢㳎㳒㴜㵷㺹㻞䉸䒪䛒䡢䪻便匥卞变変弁徧忭抃揙昪汳汴玣緶缏艑苄覍變辡辧-辩辫辮辯遍釆閞鴘]>biàn;" -"[儦墂幖彪标標滮瀌灬熛爂猋瘭磦穮脿臕謤贆鏢鑣镖镳颩颮飆飇飍飑飙飚驫骉髟麃麅]>biāo;" -"[㟽㠒㯱㯹䔸婊檦表裱褾諘錶飈]>biǎo;" -"[㧼䞄俵覅鰾鳔]>biào;" -"[憋虌鱉鳖鼈龞]>biē;" -"[䠥䭱別别莂蟞襒蹩]>bié;" -"[㿜瘪癟蛂]>biě;" -"[㢼䉲䋢䏟彆徶]>biè;" -"[宾彬斌椕槟檳汃滨濒濱瀕瑸璸矉繽缤蠙豩豳賓賔邠鑌镔霦顮馪驞]>bīn;" -"[䐔傧儐摈擯殡殯膑臏髌髕髩鬂鬓鬢]>bìn;" -"[仌兵冫冰掤栟梹氷絣]>bīng;" -"[㨀䋑䓑䴵丙偋怲抦昞昺柄炳眪禀秉稟窉苪蛃邴鉼陃鞞餅餠饼]>bǐng;" -"[䈂䗒並併倂傡寎并幷摒栤棅病竝鈵靐鮩]>bìng;" -"[剝剥哱嶓拨撥播柭波玻癶盋砵碆缽菠袚蹳鉢钵驋髉鮁鱍鲅]>bō;" -"[㗘㝿㟑㧳㩧㩭㪍㬍㬧㱟㴾㶿㹀䂍䊿䍨䍸䑈䒄䗚䙏䞳䟛䢌䢪䥬䪇䪬䫊䬪䭦䭯䮀䮂䯋䰊䶈亳伯侼僰勃博嚗壆嶏帛愽懪挬搏敀栢桲欂泺浡渤煿牔犻猼瓝礡礴箔簙簿糪胉脖舶艊苩葧蔔袯襏襮誖謈豰踄踣郣鉑鋍鎛鑮铂镈餑餺饽馎馞駁駮驳髆鮊鲌鵓鹁]>bó;" -"[箥簸蚾跛駊]>bǒ;" -"[㖕孹挀擘檗疈繴蘗譒]>bò;" -"[卜啵膊]>bo;" -"[峬庯晡逋餔鵏]>bū;" -"[㙛㨐䀯䋠䒈䪁䪔卟哺捕补補鳪]>bǔ;" -"[㘵㚴㳍㻉㾟䊇䍌䏽䑰䝵䬏䳝䴝䴺不佈吥咘埔埗埠布怖悑捗步歨歩篰荹蔀部郶鈽钚钸餢鮬]>bù;" -"[嚓擦]>cā;" -"礤>cǎ;" -"[䟃䵽囃]>cà;" -"猜>cāi;" -"[㒲䴭才材溨犲纔裁財财]>cái;" -"[㥒䌽䐆䣋倸婇彩採棌睬綵跴踩采]>cǎi;" -"[䌨䰂埰寀縩菜蔡]>cài;" -"[傪参-叅喰湌蓡謲飡餐驂骖]>cān;" -"[㥇㨻㱚㺑䍼䏼䑶䗝䗞䘉䙁䝳䣟䫮䳻嬠嬱惭慙慚残殘蚕蝅蠶蠺]>cán;" -"[㘔㜗㦧㿊䅟䬫惨慘憯朁黪黲]>cǎn;" -"[㛑㣓㻮㽩䛹澯灿燦璨粲薒]>càn;" -"[仓仺伧倉傖凔嵢沧滄舱艙苍蒼螥鶬鸧]>cāng;" -"[㵴㶓藏鑶]>cáng;" -"[䅮䢢賶]>càng;" -"[撡操糙鄵]>cāo;" -"[㜖㯥䄚䏆䐬嘈嶆曹曺槽漕艚蓸螬褿鏪]>cáo;" -"[䒑愺懆艸草騲]>cǎo;" -"[䒃肏襙]>cào;" -"[㥽㨲㩍䇲䈟䊂䔴䜺侧側冊册厕厠嫧廁恻惻憡拺敇测測畟矠笧策筞筴箣粣茦萗蓛遪頙]>cè;" -"[嵾梫]>cēn;" -"[㞥㻸䃡䅾䤁䨙䯔䲋埁岑梣橬涔笒]>cén;" -"曽>cēng;" -"[㬝䁬䉕层層嶒曾碀竲鄫]>céng;" -"[㣒蹭]>cèng;" -"[偛嗏嫅扠挿插揷杈槎疀肞臿艖銟靫餷馇]>chā;" -"[㢉㢒㪯㫅䁟䆛䑘䕓䤩䲦䶪垞察嵖搽查査檫痄碴秅茶詧鍤锸𦉆]>chá;" -"[䰈蹅鑔镲]>chǎ;" -"[㛳㢎㣾㤞䊬䒲䓭䟕䡨侘奼姹岔差汊紁衩訍詫诧]>chà;" -"叉>cha;" -"[拆芆釵钗]>chāi;" -"[㑪㾹䓱侪儕喍柴祡豺]>chái;" -"茝>chǎi;" -"[㳗䘍囆瘥虿蠆袃]>chài;" -"[幨惉搀攙梴袩裧襜辿鋓]>chān;" -"[㔆㙻㢆㶣㸥㺥䂁䜛䡪䡲䣑䤫䧯僝儃劖嚵婵嬋孱巉廛棎欃毚湹潹潺澶瀍瀺煘獑磛禪緾繵纏纒缠艬蝉蟬蟾誗讒谗躔鄽酁鋋鑱镵饞馋]>chán;" -"[㢟㦃㯆㹌㹽䊲䐮䑎䤘䥀䩶䴼䵐丳产冁刬剗剷囅嵼幝摌旵浐滻灛燀產産簅繟蒇蕆諂譂讇谄鏟铲閳闡阐驏骣]>chǎn;" -"[㙴㬄㵌䀡䠨䪜䱿儳忏懴懺硟羼韂顫颤]>chàn;" -"[伥倀娼昌晿淐猖琩菖裮錩锠閶阊鯧鲳鼚]>chāng;" -"[㙊㦂䗅䠆䯴仧偿償兏嘗嚐塲嫦尝常徜瑺瓺甞肠腸膓苌萇鋿鏛镸鱨鲿]>cháng;" -"[㫤䕋䠀僘厂厰场場廠昶氅鋹]>chǎng;" -"[䩨倡唱怅悵暢焻畅畼誯韔鬯]>chàng;" -"敞>chang;" -"[弨怊抄欩罺訬超鈔钞]>chāo;" -"[䄻䬤䰫嘲巢巣晁朝樔潮窲謿轈鄛鼂鼌]>cháo;" -"[㶤㷅䎐䏚吵巐炒煼眧麨]>chǎo;" -"[仦仯耖觘]>chào;" -"[伡俥唓砗硨莗蛼車车]>chē;" -"[㨋㵔䋲䞣䰩偖扯撦]>chě;" -"[㔭㥉㬚㯙㱌㵃㾝㿭䁤䑲䒆䚢䛸䜠䧪䨁勶坼屮彻徹掣撤澈烲爡瞮硩聅轍辙迠]>chè;" -"[棽琛瞋諃謓賝郴]>chēn;" -"[㕴㫳㲀㴴㽸䆣䒞䚘䜟䟢䢅䢈䢻䣅䤟塵宸尘屒忱愖敐曟樄沈沉煁臣茞莀莐蔯薼螴訦諶谌軙辰迧鈂陈陳霃鷐麎]>chén;" -"[䫖墋捵硶碜磣祳贂趻踸鍖]>chěn;" -"[㧱䞋儬儭嚫夦榇櫬疢藽衬襯讖谶趁趂齓齔龀]>chèn;" -"晨>chen;" -"[䞓偁噌埥憆撐撑棦橕檉泟浾爯琤瞠称稱穪竀緽蛏蟶赪赬鏿阷頳饓]>chēng;" -"[㞼㨃㲂㼩䁎䄇䆑䆵䆸䇸䔲䗊䧕䫆䮪丞乗乘呈城埕堘塍塖娍宬峸惩憕懲成承挰掁揨晟枨棖椉橙洆浈湞澂澄瀓珵珹程窚筬絾脀脭荿虰裎誠诚郕酲鋮铖騬]>chéng;" -"[侱塣庱徎悜睈逞騁骋]>chěng;" -"[㐼䀕牚秤靗]>chèng;" -"[吃哧喫嗤噄媸彨彲摛欫瓻痴癡眵瞝笞絺蚩螭訵誺郗骴魑黐齝]>chī;" -"[㓾㙜㞴㢮㮛䈕䐤䔟䙙䛂䜄䞾䪧䮈䶔䶵倁坻墀岻弛彽徥徲持歭池汦泜竾筂箈箎篪耛茌茬荎蚳謘貾赿趍踟迟遅遟遲馳驰]>chí;" -"[㘜㟂㢁㢋㱀㳏㶴䊼䑛䜵䜻侈叺呎垑姼尺恀恥拸搋欼歯粎耻蚇袳裭褫誃鉹齒齿]>chǐ;" -"[㒆㓼㔑㞿㡿㽚䇼䗖䟷䠠䤲䮻䰡䳵乿侙傺勅叱啻彳恜慗憏懘抶敕斥杘栻淔灻炽烾熾痓痸瘛眙翄翅翤觢赤趩跮踅遫鉓銐飭饎饬鶒鷘]>chì;" -"[充冲嘃徸忡憃憧摏沖浺珫舂茺衝蹖𢥞]>chōng;" -"[㓽㹐䌬䖝䳯崇崈漴痋翀虫蝩蟲褈隀]>chóng;" -"[埫宠寵]>chǒng;" -"[㧤揰銃铳𣀒]>chòng;" -"[婤怞抽搊犨犫瘳篘霌]>chōu;" -"[㐜㛶㤽㦞㨶㵞㿧䇺䊭䌧䌷䓓䛬䥒䲖仇俦儔嚋嬦帱幬惆愁懤栦椆焘燽燾畴疇皗稠筹籌紬絒綢绸菗薵裯詶讎讐踌躊酧醻雔雠]>chóu;" -"[䪮丑丒侴偢吜杻杽瞅矁醜魗]>chǒu;" -"[䔏殠簉臭臰霔]>chòu;" -"酬>chou;" -"[出初岀貙齣]>chū;" -"[㕏㕑㡡㶆㼥䅳䎝䎤䟞䠂䠧刍厨幮廚曯橱櫉櫥滁犓篨耝耡芻蒢蒭蕏藸蜍趎蹰躇躕鉏鋤锄除雏雛鶵𦷝]>chú;" -"[㹼䊰䖏䙘储儲処憷杵椘楮檚濋础礎褚齭齼]>chǔ;" -"[㔘㗰㙇㤕㤘䙕䜴䟣䦌䧁䮞亍俶傗儊处怵拀搐敊斣斶欪歜泏滀琡畜矗竌竐臅荲處触觸豖踀遚鄐閦黜]>chù;" -"楚>chu;" -"䫄>chuà;" -"揣>chuāi;" -"[㪓膗]>chuái;" -"㪜>chuǎi;" -"[䦟䦤䦷踹]>chuài;" -"[巛川氚瑏穿]>chuān;" -"[㯌㼷䁣伝传傳圌暷椽歂舩船諯輲遄]>chuán;" -"[㱛僢喘堾腨舛]>chuǎn;" -"[串汌賗釧钏]>chuàn;" -"[䆫刅戧摐牎牕疮瘡窓窗窻]>chuāng;" -"[㡖䃥䚒䡴䭚噇幢床橦牀疒]>chuáng;" -"[㵂䇬摤漺闖闯]>chuǎng;" -"[䎫创刱剏剙創怆愴獊]>chuàng;" -"[吹炊]>chuī;" -"[㓃㝽㥨㩾䄲䍋䳠倕垂埀捶搥棰槌湷箠篅腄菙錘鎚锤陲顀]>chuí;" -"䞼>chuǐ;" -"龡>chuì;" -"[媋旾春暙椿櫄膥萅鶞]>chūn;" -"[㝄㝇㵮㸪䏝䐇䓐䔚䣨䣩䥎䫃唇憌浱淳湻滣純纯脣莼蒓蓴醇醕陙鯙]>chún;" -"[㖺㿤䄝䏛䐏䞐䦮偆惷睶箺萶蠢踳]>chǔn;" -"[鶉鹑]>chun;" -"[戳鎈齹]>chuō;" -"犳>chuó;" -"[㚟㲋䂐䃗䄪䆯䇍䋘䍳䓎䮕啜嚽娖婥惙擉歠涰珿畷磭綽繛绰腏諁趠輟辍辵辶逴酫醊鑡齪龊]>chuò;" -"[偨庛疵薋蠀赼趀趑髊]>cī;" -"[㓨㘂㘹㤵䂣䆅䈘䖪䛐䧳䨏䭣䲿䳄垐嬨慈柌濨珁瓷甆磁礠祠糍茨詞词辝辞辤辭雌飺餈]>cí;" -"[㠿佌此泚玼皉紪跐]>cǐ;" -"[㞖㡹㢀㩞㹂䓧䗹䦻䯸䰍䳐伺佽刺刾朿次絘莿蚝蛓螆]>cì;" -"[匆囪囱忩怱悤憁暰枞棇樅樬樷漗焧燪瑽璁瞛篵緫繱聡聦聪聰苁葱蓯蔥蟌鍐鍯鏓鏦騘驄骢]>cōng;" -"[㼻䉘䕺䳷丛从叢婃孮従徖從悰慒淙漎潀灇爜琮誴賨賩錝]>cóng;" -"[欉藂謥]>còng;" -"㫶>cǒu;" -"[傶凑楱湊腠輳辏]>còu;" -"[粗觕麁麄麤]>cū;" -"[䓚䢐徂殂豠]>cú;" -"[㗤㰗䃚䎌䙯䛤䟟䠓䠞䥄䥘䬨促噈塶憱梀槭殧猝瘄瘯簇縬脨蔟趗踧蹙蹴醋鼀]>cù;" -"[撺攛汆蹿躥鋑]>cuān;" -"[㠝㭫䆘䰖劗巑櫕]>cuán;" -"[㵀㸑殩熶爨窜竄篡簒鑹镩]>cuàn;" -"[催嗺墔崔摧榱槯獕磪竴鏙]>cuī;" -"[凗慛]>cuí;" -"[㵏㷃䊫䧽漼璀皠趡]>cuǐ;" -"[㝮㥞㧘㯔㯜㱖㳃㷪䂱䃀䄟䆊伜倅啐啛忰悴毳淬焠琗疩瘁竁粋粹紣綷翆翠脃脆脺膬膵臎萃顇]>cuì;" -"[村澊皴邨]>cūn;" -"[存拵袸]>cún;" -"[刌忖]>cǔn;" -"[䍎吋寸籿]>cùn;" -"[搓撮睉磋蒫蹉遳]>cuō;" -"[㟇㽨䠡䣜䴾嵯嵳痤矬蔖虘醝鹺鹾]>cuó;" -"[䂳瑳縒脞]>cuǒ;" -"[䐣䟶䱜剉剒厝挫措歵莝莡蓌逪銼錯锉错齚齰]>cuò;" -"[哒噠墶搭撘耷荅褡]>dā;" -"[㜓㯚㾑㿯䃮䐊䑽䩢䳴䵣匒呾妲怛溚炟畗畣笪答繨荙薘蟽褟詚达迖逹達鐽靼鞑韃]>dá;" -"打>dǎ;" -"[大眔]>dà;" -"[㟷瘩]>da;" -"[呆呔懛獃]>dāi;" -"[䚞䚟傣歹歺逮]>dǎi;" -"[㐲㞭㫹㯂㶡㻖㿃䈆䒫代叇埭岱帒带帯帶廗待怠戴柋殆汏瀻玳瑇甙簤紿緿绐艜袋襶貸贷跢蹛軚軩迨霴靆黛黱]>dài;" -"[丹儋勯匰单単單噡妉媅担擔殚殫甔眈砃箪簞耼耽聃聸襌躭郸鄲酖頕黕]>dān;" -"[㔊㕪㽎䃫䉞䮰䱋亶伔刐掸撢撣澸玬瓭疸紞胆膽衴黵]>dǎn;" -"[㗖㡺㫜㱽㲷㵅㺗䄷䉷䨢䨵䩥䭛䳉但僤啖啗啿噉嚪帎弹弾彈惮憚憺旦暺柦氮沊淡潬澹狚疍瘅癉癚窞腅舕萏蛋蜑觛誕诞鉭钽霮饏馾駳髧鴠]>dàn;" -"[儅噹嵣当澢珰璫當筜簹艡蟷裆襠]>dāng;" -"[䣊䣣党挡擋攩欓灙譡讜谠黨]>dǎng;" -"[䑗䦒凼圵垱壋婸宕愓档檔氹潒璗瓽盪瞊砀碭礑簜荡菪蕩蘯趤逿闣雼]>dàng;" -"[鐺铛]>dang;" -"[刀刂幍忉朷氘舠釖魛鱽]>dāo;" -"[㠀㿒䆃䌦䲽壔导導岛島嶋嶌嶹捣捯搗擣祷禂禱蹈隝隯]>dǎo;" -"[䧂倒到噵悼檤瓙盗盜稲稻纛翿菿衜衟軇道]>dào;" -"[㝵㤫㥀㥁㯖䙷䙸嘚徳德恴惪淂鍀锝]>dé;" -"[地得的]>de;" -"[噔嬁灯燈璒登竳簦覴豋蹬鐙镫]>dēng;" -"[䒭戥朩等]>děng;" -"[䠬䮴僜凳墱嶝櫈瞪磴邓鄧隥霯]>dèng;" -"[仾低堤墑滴眡磾羝菂袛趆鍉鞮]>dī;" -"[㣙㰅㹍䊮䨀䨤䮤䯼䴞䵠仢唙啇嘀嚁嫡廸敌敵梑涤滌潪狄笛篴籴糴翟荻蔋蔐藡覿觌豴蹢迪鏑镝靮頔鸐]>dí;" -"[㡳㪆㭽䂡䍕䢑䣌䱃呧坘埞底弤抵拞掋柢氐牴砥聜苖茋菧觝詆诋軧邸阺骶]>dǐ;" -"[㢩㦅㼵䀸䀿䏑䑭䑯䞶䟡䧝䩘䩚䱱䶍偙僀哋啲坔埊墆墬奃娣嶳帝弟怟慸摕旳杕梊棣楴樀渧焍玓甋睇碲祶禘第締缔肑腣蒂蔕虳蝃螮諦谛踶軑轪递逓遞遰釱鉪馰]>dì;" -"嗲>diǎ;" -"[傎厧嵮巅巓巔掂攧敁槇槙滇甸瘨癫癲蹎顚顛颠齻]>diān;" -"䟍>dián;" -"[㚲㸃䍄䓦典嚸奌婰敟点琠痶碘蕇踮點]>diǎn;" -"[㓠㝪㞟㥆㵤㶘㼭䧃佃坫垫墊壂奠婝店惦扂橂殿淀澱玷琔电痁癜磹簟蜔鈿钿阽電靛]>diàn;" -"[凋刁叼奝彫扚殦汈琱瞗碉虭蛁貂雕鮉鯛鲷鳭鵰鼦]>diāo;" -"[㹿䉆屌釕钌]>diǎo;" -"[㒛㪕䂪䂽䔙䠼䵲伄吊弔掉瘹窎窵竨莜蓧藋訋調调釣銱鋽鑃钓铞魡]>diào;" -"[爹跌]>diē;" -"[㑙㥈㦶㩸㩹㫼㬪㭯㲲㲳㷸㻡䏲䘭䞇䞕䠟䪥䮢䲀䳀䴑叠喋垤堞峌嵽恎戜挕昳曡氎牃牒瓞畳疉疊眣眰碟絰绖聑胅臷艓苵蜨蝶褋褺詄諜谍趃跕蹀迭镻鰈鲽]>dié;" -"[惵耊耋]>diè;" -"[丁仃叮奵帄庰玎疔盯釘钉靪]>dīng;" -"婈>díng;" -"[㫀㴿㼗嵿檙濎薡酊鐤頂顶鼎鼑]>dǐng;" -"[㝎啶娗定忊椗矴碇碠磸腚訂订錠锭顁飣饤]>dìng;" -"[丟丢乣銩铥]>diū;" -"[东倲冬咚埬娻岽崠崬昸東氡氭涷笗苳菄蝀鯟鶇鸫鼕]>dōng;" -"[㖦㨂䂢䵔墥嬞懂箽董]>dǒng;" -"[㑈㓊㗢㜱㢥㼯䅍䍶䞒働冻凍动動垌戙挏栋棟洞眮胨胴腖霘駧]>dòng;" -"[兜兠吺唗橷篼蔸郖都]>dōu;" -"[㞳㪷䕱唞抖敨枓枡蚪阧陡]>dǒu;" -"[㛒㢄㷆䄈䕆䛠䬦斗梪毭浢痘窦竇脰荳豆逗鋀閗闘餖饾鬥鬦鬪鬬鬭]>dòu;" -"[厾嘟督醏闍阇都]>dū;" -"[㱩㸿㾄䓯䙱䢱䪅䫳䮷儥凟匵嬻椟櫝殰毒涜渎瀆牍牘犊犢独獨瓄皾碡蝳読讀讟读豄贕鑟韇韣韥騳髑黩黷]>dú;" -"[䀾䈞䐗堵帾暏琽睹笃篤裻覩賭赌錖]>dǔ;" -"[䄍䅊䟻䲧喥妒妬度杜殬渡秺簬簵肚荰螙蠧蠹鍍镀靯]>dù;" -"[偳剬媏端褍鍴]>duān;" -"短>duǎn;" -"[㫁㱭䠪塅断斷椴段毈煅瑖碫簖籪緞缎腶葮躖鍛锻]>duàn;" -"[垖堆塠嵟痽磓頧]>duī;" -"陮>duǐ;" -"[㙂㟋㠚㬣㳔㵽䇏䇤䔪䨴䨺䬈䬽䯟兊兌兑对対對怼憝憞懟濧瀩碓祋綐薱譈轛鐓鐜镦队隊]>duì;" -"[吨噸墩-墫弴惇撉撴敦犜礅蜳蹲蹾驐]>dūn;" -"[趸躉𣎴]>dǔn;" -"[㬿䤜伅囤崸庉扽沌潡燉盹盾砘踲逇遁遯鈍钝頓顿鶨]>dùn;" -"[剟咄哆嚉多夛崜]>duō;" -"[㣞䐾凙剫夺奪悳掇敓敚敠敪椯毲痥莌裰襗踱鈬鐸铎鮵]>duó;" -"[㔍㖼㙐㛆㛊㥩㻔䒳䙤䠤䤪䩣䫂䯬亸哚嚲垛垜埵奲憜挅挆朶痑綞缍趓躱躲鍺锗鬌]>duǒ;" -"[㧷㻧䅜䍴䑨䙃䙟䤻䩔刴剁堕墮墯尮嶞惰柁柮桗炧炨舵跥跺陊陏飿饳]>duò;" -"朵>duo;" -"[妸妿娿婀婐屙峉痾鈳钶]>ē;" -"[㼂㼰䄉䕏䖸䩹䱮䳗䳘俄吪娥峨峩枙涐珴皒睋硪磀莪蛾訛誐譌讹迗鈋隲頟額额騀魤鵝鵞鹅]>é;" -"[㼢噁]>ě;" -"[㓵㔩㕎㖾㗁㟧㠋㡋㦍㧖㩵㮙㱦㷈䆓䑥䑪䓊䔾䙳䛖䝈䞩䣞䫷䳬偔僫卾厄呃呝咢咹噩圔垩堊堨堮岋崿嶭恶悪惡愕戹扼掠搤搹櫮湂琧略砐硆胺腭苊萼蕚蚅蝁覨詻諤讍谔豟貖軛軶轭遌遏鄂鍔鑩锷閼阏阨阸頞顎颚餓餩饿魥鰐鱷鳄鶚鹗齶]>è;" -"[誒诶]>éi;" -"[奀恩煾蒽]>ēn;" -"䅰>ěn;" -"[䊐䬶䭓䭡摁]>èn;" -"[㖇㜨㧫㮕䋩䎟䎠䮘侕児唲栭檽洏而耏聏胹荋袻輀轜陑隭髵鮞鲕鴯鸸]>ér;" -"[㚷㢽䋙䌺尒-尔峏栮洱爾珥耳薾迩邇餌饵駬]>ěr;" -"[㒃㛅䎶䏪䣵二佴刵咡弍弐樲毦眲衈誀貮貳贰鉺铒]>èr;" -"[傠发彂橃沷発發瞂]>fā;" -"[㕹㘺䇅䣹乏伐垡姂栰疺砝筏罚罰罸茷藅閥阀]>fá;" -"[䂲佱法灋髮]>fǎ;" -"[㛲䒥珐琺蕟髪]>fà;" -"[僠噃嬏帆幡旙旛杋番笲籓繙翻轓飜鱕]>fān;" -"[㠶㸋㺕䀀䀟䉒䊩䋣䋦䌓䡊䪛䪤䫶䭵䮳凡-凣勫墦忛憣柉棥樊橎瀪瀿烦煩燔璠矾礬籵緐繁羳膰舧蕃薠藩蘩蠜襎蹯釩鐇钒颿鷭]>fán;" -"[䒠䛀反瓪軡返魬]>fǎn;" -"[㕨㝃㤆㴀㶗㼝㽹䉊䐪䒦䣲奿嬎梵氾汎泛滼犯畈盕笵範范訉販贩軓軬鄤飯飰饭飯]>fàn;" -"[匚方枋汸淓牥芳蚄邡鈁钫鴋]>fāng;" -"[㤃埅妨房肪防魴鰟鲂鳑]>fáng;" -"[㑂㕫㧍㯐䢍䦈䲱仿倣旊昉昘瓬眆紡纺舫訪访髣鶭]>fǎng;" -"[放趽]>fàng;" -"坊>fang;" -"[啡妃婓扉渄猆緋绯菲蜚裶霏非靟飛飝飞馡騑騛鯡鲱]>fēi;" -"[䈈淝肥腓萉蜰]>féi;" -"[㥱䕁䨽䨾匪奜悱斐朏棐榧篚翡蕜誹诽餥]>fěi;" -"[㔗㩌㭭㵒䆏䉬䑔䕠䚨䛍䠊䤵䰁俷剕厞吠屝废廃廢昲曊杮櫠沸濷狒疿痱癈砩肺胇芾蟦費费鐨镄靅鼣]>fèi;" -"[兝分吩岎帉昐朆氛燓砏紛纷翂芬衯訜酚鈖隫雰餴饙鳻𦐈]>fēn;" -"[㷊㸮䩿䯨䴅坆坟墳妢幩弅枌梤棻棼橨汾濆炃焚燌獖玢秎羒肦蒶蕡蚠蚡豮豶轒鐼馚馩魵黂鼖鼢]>fén;" -"[㥹粉羵黺]>fěn;" -"[㖹㮥㿎份偾僨坋奋奮忿愤憤瀵粪糞膹鱝鲼]>fèn;" -"[丰仹偑僼凨凬凮堼夆妦寷封峯峰崶捀枫桻楓檒沣灃烽熢犎猦疯瘋盽砜碸篈莑葑蘴蜂蠭豐鄷酆鋒鏠锋靊風飌风麷]>fēng;" -"[㦀㵯䏎䙜䩼冯堸摓沨浲渢漨綘艂逢馮]>féng;" -"[䟪唪覂諷讽]>fěng;" -"[㡝俸凤奉湗焨煈甮縫缝賵赗鳯鳳鴌]>fèng;" -"[仏坲]>fó;" -"[䳕剻哹紑裦]>fóu;" -"[否殕缶缹缻芣雬鴀]>fǒu;" -"[椱竎]>fòu;" -"[伕呋妋姇孵尃怤懯敷旉枹柎泭玞砆稃筟糐綒罦肤膚荴衭豧趺跗邞鄜鈇鳺麩麬麱麸]>fū;" -"[㚕㜑㟊㠅㤔㪄㫙䃽䋹䌿䍖䑧䒀䔰䕎䘠䞞䟮䡍䨗䪙䵗䵾乀伏佛俘冹凫刜匐咈嚩垘孚岪巿帗幅幞弗彿怫払扶拂服枎柫栿桴棴氟洑浮涪澓炥烰玸琈甶畉畐癁祓福符笰箙粰紱紼絥綍绂绋罘翇艀艴芙苻茀茯莩菔葍虙蚨蜉蝠諨踾輻辐郛鉘鉜韍韨颫髴鮄鳧鳬鴔鵩鶝福]>fú;" -"[㓡㕮䋨䌗䓛䗄䩉䫍䫝䯽乶俌俛俯呒嘸府弣抚拊捬撫斧暊滏焤盙秿簠腐腑莆蚥蜅輔辅郙釜釡頫鬴鯆黼]>fǔ;" -"[㙏㚆㤱㬼㳇㵗㽬㾈䂤䎅䒇䘀䘄䝾䞜䞯䞸䟔䠵䦣䧞䨱䭸䮛䯱付偩冨副圑坿复妇婏婦媍嬔富峊復榑父祔稪紨緮縛缚胕腹萯蕧蚹蛗蝜蝮袝複褔覆訃詂讣負賦賻负赋赙赴輹鍑鍢阜阝附陚馥駙驸鮒鰒鲋鳆黻𦱖]>fù;" -"[傅咐夫甫袱]>fu;" -"[嘎嘠]>gā;" -"[尜錷]>gá;" -"[尕玍𠁥]>gǎ;" -"尬>gà;" -"[侅垓姟峐晐畡祴胲荄該该豥賅赅郂陔隑]>gāi;" -"[䪱忋改絠]>gǎi;" -"[㕢㧉㮣䏗丐乢匃匄戤摡概槩槪溉漑瓂盖葢蓋賌鈣钙𩕭]>gài;" -"[乹乾亁凲坩尲尴尶尷嵅忓攼杆柑泔玕甘疳矸竿筸粓肝芉苷虷蜬迀鳱]>gān;" -"仠>gán;" -"[㺂䃭䇞䔈䤗䵟感擀敢桿橄澉灨皯盰秆稈衦贑赶趕鱤鳡]>gǎn;" -"[䯎䲺倝凎干幹旰榦檊汵涻淦簳紺绀詌贛赣骭]>gàn;" -"[冈冮刚剛堈堽岡掆杠棡牨犅碙笐綱纲缸罁罓罡肛舡鋼鎠钢]>gāng;" -"[㟠㟵㽘䴚岗崗港]>gǎng;" -"[戅-戇槓焵筻]>gàng;" -"[槔槹橰櫜滜皋皐睾篙糕羔羙膏臯餻高髙鷎鼛𦤎]>gāo;" -"[㚏㚖㾸䗣夰搞暠杲槁檺稾稿筶縞缟菒藁藳]>gǎo;" -"[叝吿告煰祮祰禞誥诰郜鋯锆鯌]>gào;" -"[割咯哥圪戈戓戨搁擱歌渮滒牁牫牱疙肐胳謌鎶鴚鴿鸽麧𪃿]>gē;" -"[㗆㝓㠷㦴㨰㪾㵧㷴䆟䈓䐙䕻䗘䘁䛋䛿䢔䨣䩐䪂䪺䫦佮匌呄嗝噶塥愅挌搿敋格槅獦膈臵茖蛒裓觡諽輵轕郃鎘镉閣閤阁阖隔革鞷韐韚骼鮯鰪齃]>gé;" -"[哿擖笴舸葛]>gě;" -"[䧄个個各嗰箇虼鉻铬]>gè;" -"[給给]>gěi;" -"[刯剆根跟]>gēn;" -"哏>gén;" -"䫀>gěn;" -"[㫔㮓亘亙艮茛]>gèn;" -"[庚揯搄浭畊秔稉粳絙絚緪縆羮羹耕菮賡赓鶊鹒]>gēng;" -"[㾘䋁䌄哽埂峺挭梗綆绠耿莄郠骾鯁鲠]>gěng;" -"[䱍䱎䱭䱴堩更]>gèng;" -"[供公功匑厷塨宫宮工幊弓恭愩攻杛玜疘碽篢糼肱觥觵躬躳釭魟龏龔龚]>gōng;" -"[㤨㧬㫒㭟㺬㼦䂬䡗巩拱拲栱汞珙穬蛬銾鋛鞏]>gǒng;" -"[㓋㔶㯯䇨䢚共廾羾貢贡]>gòng;" -"蚣>gong;" -"[勾沟溝篝緱缑耩芶褠鈎鉤钩鞲韝]>gōu;" -"[㺃岣枸狗玽笱耇-耉苟茩蚼豿]>gǒu;" -"[㗕㜌㝅㝤㨌㳶䃓䝭䞀傋冓坸垢够夠姤媾彀搆撀构構煹覯觏訽詬诟購购遘雊骺]>gòu;" -"[估呱夃姑嫴孤柧橭沽泒痼笟箍箛罛苽菰蛄觚軱辜酤鈲鮕鴣鸪]>gū;" -"[䜼䮩鶻鹘]>gú;" -"[㒴㚉㯏㼋㾶䀇䀜䀦䀰䅽䊺䍍䐨䡩䵻古唂唃嘏尳愲扢杚榖毂淈濲瀔焸牯狜皷皼盬瞽穀糓縎罟羖股脵臌蓇薣蛊蛌蠱詁诂谷轂鈷钴餶馉骨鼓鼔]>gǔ;" -"[㧽㽽䍛䓢䶜僱凅固堌崓崮怘故梏棝榾牿祻稒錮锢雇頋顧顾鯝鲴鶮]>gù;" -"[咕菇]>gu;" -"[刮劀懖栝桰煱瓜瘑筈緺聒胍脶腡葀趏踻銽頢颳騧鴰鸹]>guā;" -"[㒷䈑冎剐剮叧寡]>guǎ;" -"[卦啩坬挂掛絓罣罫褂詿诖髺]>guà;" -"乖>guāi;" -"[拐枴柺箉]>guǎi;" -"[㧔㷇㽇䂯䊽叏夬怪恠旝癐]>guài;" -"[倌关冠官棺瘝癏蒄覌観觀观関闗關鱞]>guān;" -"[䏓䗆䘾䦎䩪䪀䲘琯痯筦管脘舘輨錧館馆鳤館]>guǎn;" -"[㮡㴦䌯䎚䗰䙛䙮䝺丱悹悺惯慣掼摜樌毌泴涫潅灌爟瓘盥矔礶祼罆罐謴貫贯遦鏆鑵雚鱹鸛鹳]>guàn;" -"[侊僙光咣垙姯洸灮炗炛烡珖胱茪輄銧黆]>guāng;" -"[广広廣犷獷]>guǎng;" -"[㫛桄櫎臦臩逛]>guàng;" -"[亀傀圭妫媯嫢嬀帰归摫椝槻槼歸溈珪瑰璝瓌皈硅窐膭袿規规邽郌閨闺鬶鬹鮭鲑龜龟]>guī;" -"潙>guí;" -"[㔳㧪㨳㩻㲹㸵䁛䍯䞈䞨䣀䤥佹匦匭厬垝姽宄庋庪恑攱晷氿癸祪簋蛫蟡觤詭诡軌轨陒鬼]>guǐ;" -"[㙺㪈䇈䌆䍷䐴䖯䙆䝿䠩䯣䰎䳏刽刿劊劌匮匱嶡巜攰昋暩柜桂櫃溎炔筀蓕襘貴贵跪鞼鱥]>guì;" -"裩>gūn;" -"[㙥㫎㯻䃂䎾䜇丨掍滚滾磙緄绲蓘蔉衮袞袬輥辊鮌鯀鲧]>gǔn;" -"[䵪棍璭睴]>gùn;" -"[埚堝墎崞濄蝈蟈郭鈛鍋锅]>guō;" -"[㕵㖪㚍㶁䂸䆐䐸䤋䬎囯囶囻国圀國帼幗慖掴摑漍簂聝腘膕虢馘]>guó;" -"[㞅䙨䴹惈果椁槨淉猓粿綶菓蜾裹輠鐹餜馃]>guǒ;" -"[㳀腂过過]>guò;" -"[啯嘓]>guo;" -"[哈𠀀]>hā;" -"蛤>há;" -"[咍咳嗨]>hāi;" -"[㜾㨟䠽䯐䱺孩还還頦颏骸]>hái;" -"[海烸酼醢]>hǎi;" -"[㤥㦟㧡㺔䇋亥嗐害氦餀饚駭骇𠀅𥩲]>hài;" -"[唅嫨憨炶甝蚶谽酣頇顸馠魽鼾]>hān;" -"[㖤㙈㙔㟏㟔㮀㶰㼨䈄䗙䤴䥁䨡䮧䶃佄函凾含圅娢寒崡晗梒涵焓琀筨肣邗邯鋡韓韩]>hán;" -"[㘎㘕㘚㙳㵎㸁㺖㽉䍐䍑䓍䓿䛞厈喊浫罕蔊豃鬫]>hǎn;" -"[㑵㒈㜦㢨㨔㪋㲦㵄㽳䁔䌍䎯䏷䐄䕿䖔䘶䧲䫲傼哻垾娨屽悍憾扞捍撼攌旱晘晥暵汉汗浛涆淊漢澏瀚焊熯猂皔睅翰莟菡蛿蜭螒譀豻貋釬銲鋎閈闬雗頷顄颌颔馯駻鶾]>hàn;" -"夯>hāng;" -"[㤚䀪䘕䲳斻杭桁沆肮航苀蚢貥迒頏颃魧]>háng;" -"[汻酐]>hǎng;" -"[䟘䣈䦳䴂]>hàng;" -"[侾嚆蒿薅]>hāo;" -"[㕺㠙㩝㬔䝥䧫儫勂嗥嘷噑嚎壕椃毫濠獆獋獔籇蠔諕譹豪]>háo;" -"[好恏郝]>hǎo;" -"[㘪㙱㚪㝀㞻㬶㵆䒵䚽䝞䧚䪽䬉䯫傐号哠峼悎昊昦晧暤暭曍浩淏滈澔瀥灏灝皓皜皞皡皥秏耗聕薃號鄗鎬镐顥颢鰝]>hào;" -"[喝嗬峆抲訶诃]>hē;" -"[㓭㔠㕡㥺㪉㭘㭱㮝㮫㹇㿣䃒䅂䒩䕣䞦䢗䫘䳚䶅何劾厒合咊和哬啝姀廅惒敆曷柇核楁毼河涸滆澕熆狢盇盉盍盒礉禾秴篕紇纥翮耠荷菏萂蒚蚵蝎螛覈訸詥貈貉釛鉌閡闔阂鞨頜餲魺鲄鶡鹖齕龁龢]>hé;" -"[㕰㦦㬞㵑㷎㷤䎋䓼䚂䪚䳽䴳䵱佫俰嗃壑暍焃煂熇爀癋皬碋翯蠚袔褐賀贺赫隺靎靏鶴鸖鹤鶴]>hè;" -"[嘿潶黑黒]>hēi;" -"[㯊拫痕鞎]>hén;" -"[䓳佷很狠詪]>hěn;" -"恨>hèn;" -"[亨哼悙脝諻]>hēng;" -"[㔰㶇䄓䒛䬖䬝䯒姮恆恒揘楻横橫珩蘅衡誙鐄鑅韹鴴鸻黉黌]>héng;" -"[啈撔澋絎绗]>hèng;" -"[叿呍哄揈渹烘焢薨訇谾軣輷轟轰鍧顭鬨]>hōng;" -"[㖓㢬䂫䃔䆖䉺䍔䜫䞑䡌䡏䧆䨎䩑䪦䫹䫺䲨仜吰垬妅娂宏宖峵嵤弘彋汯泓洪浤渱潂灴玒硔硡竑竤篊粠紅紘紭綋红纮翃翝耾舼苰荭葒葓虹訌讧谹谼鈜鉷鋐閎闳霟鞃鴻鸿]>hóng;" -"[㬴䀧唝嗊晎]>hǒng;" -"[㶹澒蕻鍙閧]>hòng;" -"[㗋㤧㬋㮢㺅䂉䗔䙈䫛䳧侯喉帿猴瘊睺矦篌糇翭葔銗鍭餱鯸齁]>hóu;" -"[㖃㸸吼犼]>hǒu;" -"[㕈㫗䞧䪷厚后垕堠後洉缿豞逅郈鄇鮜鱟鲎鲘]>hòu;" -"候>hou;" -"[匢匫呼唿啒嘑垀寣峘幠忽惚昒曶欻歑泘滹烀烼苸虍虖軤轷雐]>hū;" -"[㗅㪶㯛㹱㾰㿥䁫䈸䉉䉿䊀䎁䔯䚛䞱䠒䧼䩴䭅䭌䭍䮸喖嘝囫壶壷壺媩弧抇搰斛楜槲湖瀫焀煳狐猢瑚瓳箶絗縠胡葫蔛蝴螜衚觳醐鍸頶餬鬍魱鰗鵠鶘鶦鹄鹕]>hú;" -"[䗂乕浒滸琥萀虎虝]>hǔ;" -"[㕆㦿㨭㸦㺉䇘䍓䕶䛎䨥䨼䪝䲵互冱冴嚛婟嫭嫮岵帍弖怙戶-戸戽扈护摢擭昈枑楛槴沍沪滬熩瓠祜笏簄綔蔰謼護鄠頀鳸]>hù;" -"[乎唬糊]>hu;" -"[哗嘩花芲錵鷨𢄶]>huā;" -"[㕲㟆㠏㦊㭉㮯䅿䏦䔢䱻䶤华嬅崋滑狯猾磆華蕐螖譁釫鏵铧驊骅]>huá;" -"蘳>huǎ;" -"[㓰㕦㕷㚌㠢㦎㩇䛡䠉划劃化婳嫿摦杹桦槬樺澅画畫畵繣舙觟話諣譮话]>huà;" -"竵>huāi;" -"[㜳䃶䈭䴜佪徊怀懐懷槐櫰淮瀤耲褢褱踝]>huái;" -"[咶坏壊壞孬蘹蘾諙]>huài;" -"[嚾懽歓犿獾讙貛酄驩鴅鵍]>huān;" -"[㡲㦥㵹㶎㿪䍺䝠䥧䦡䭴䮝䴟嬛寏寰懁捖桓梡洹澴狟环環瓛糫繯缳羦肒荁萈萑豲貆鍰鐶锾镮闤阛雈鬟]>huán;" -"[㣪㬊䈠唍嵈睆緩缓藧輐]>huǎn;" -"[㓉㕕㪱㬇㹖㼫䀓䀨䆠䯘唤喚喛垸奂奐宦幻患愌换換擐梙槵涣渙漶烉焕煥瑍痪瘓豢轘逭鰀]>huàn;" -"[欢歡]>huan;" -"[嚝塃巟慌朚肓荒衁]>huāng;" -"[㞷㾠㾮䅣䊗䊣䍿䐵䑟䞹䪄䮲䳨偟兤凰喤堭墴媓崲徨惶撗湟潢煌熿獚瑝璜癀皇磺穔篁簧艎葟蝗蟥趪遑鍠锽隍餭騜鰉鱑鳇鷬黃黄𪏙]>huáng;" -"[㤺㬻䁜䌙䐠宺幌怳恍愰晄榥滉炾熀皝詤謊谎鎤𣄙]>huǎng;" -"[㨪曂皩軦]>huàng;" -"晃>huang;" -"[咴噅噕婎幑徽恢悝拻挥揮撝晖暉洃瀈灰烣煇禈翚翬袆褘詼诙豗輝辉隓隳顪鰴麾]>huī;" -"[囘回囬廻廽恛洄痐茴蚘蛔蛕蜖迴逥鮰]>huí;" -"[㩓㷐䃣䏨䛼悔檓毀毁毇烠燬芔虺蝰譭𠧩]>huǐ;" -"[㑰㑹㒑㜇㞧㤬㥣㨤㨹㩨㬩㰥㱱㷄㻅䂕䅏䇻䌇䕇䙌䙡䛛䜋䤧䧥䩈䫭会僡儶匯卉喙嘒噧嚖圚嬇寭廆彗彙彚恚恵惠慧憓晦暳會槥橞櫘殨汇泋湏滙潓濊烩燴獩璯瞺秽穢篲絵繐繢繪绘缋翙翽荟蔧蕙薈藱螝蟪詯誨諱譓譿讳诲賄贿鏸鐬闠阓靧韢頮颒餯饖]>huì;" -"[婚惛惽昏昬棔殙涽睧睯荤葷閽阍]>hūn;" -"[㑮㨡䛰䫟䰟䴷堚忶棞楎浑渾珲琿繉轋顐餛餫馄魂鼲]>hún;" -"[睔鯶]>hǔn;" -"[㥵䅙䅱䚠䧰俒倱圂慁混溷焝觨諢诨]>hùn;" -"[劐豁鍃锪]>huō;" -"[䄆䄑䣶䯏佸活秮秳萿鈥钬]>huó;" -"[伙夥火邩]>huǒ;" -"[㗲㘞㦜㦯㨯㯉㸌䁨䂄䄀䉟䋭䦚䰥剨咟嗀嚄嚯嚿奯彟彠惑或捇掝攉旤曤楇檴沎湱濩瀖獲瓁癨眓矆矐矱砉礊祸禍穫耯臒臛艧获蓃藿蠖謋讗貨货鑊镬閄雘霍霩靃韄騞鱯鳠鸌鹱嗀]>huò;" -"硧>iǒng;" -"[䤠丌乩僟击刉刏剞勣叽咭唧喞嗘嘰圾基墼姫姬尐屐峜嵆嵇擊朞机枅樍機櫅毄激犄玑璣畸畿矶磯禨积稘稽積笄筓箕簊績绩羁羇羈耭聻肌虀虮蛣襀覉覊觭諅譏譤讥賫賷赍跡跻蹟躋躸迹銈鐖鑇鑙隮雞鞿韲飢饑饥鰿鳮鶏鷄鸡麡齎齏齑𠼻]>jī;" -"[㔕㖢㗊㗱㘍㙫㞃㠍㠎㡇㡮㤂㥛㧀㭲㮟㮨㱞㲺㴕㻷㽺㾊䁒䐕䐚䚐䞘䟌䣢䩯䯂䲯䳭亟亼伋佶偮卙即卽及吉堲塉姞嫉岌嵴嶯庴彶忣急愱戢揤撃擮极棘楫極槉橶檝殛汲湒漃潗濈焏狤疾瘠皍礏禝笈箿籍級级耤脊膌芨莋蒺蓻蕀蕺蝍螏襋觙踖蹐轚郆鈒銡鍓鏶钑集雦雧霵鞊鴶鶺鸄鹡]>jí;" -"[㚡㞆㞛㞦㦸㨈㴉䍤䢳䤒丮几妀己幾戟挤掎撠擠泲犱玘穖蟣踦鈘魕魢鱾麂]>jǐ;" -"[㑧㒫㙨㠖㠱㡭㡶㥍㭰㰟㲅㳵㸄㹄㻑㾒㾵䋟䐀䒁䓫䓽䗁䜞䝸䠏䢋䦇䨖䮺䰏䶓䶩伎兾冀剂剤劑哜嚌坖垍塈妓季寂寄彐彑忌悸惎懻技旡-旣暨曁梞檕檵洎济済漈濟瀱猤璾痵瘈癠瞡祭稩稷穄穊穧紀継繼纪继罽臮芰茍茤葪蓟蔇薊蘎蘮蘻裚褀覬觊計記誋计记跽际際霁霽驥骥髻鬾魝鮆鯚鯽鰶鱀鱭鲚鲫鵋齌𡜱]>jì;" -"[輯辑]>ji;" -"[乫伽佳傢加嘉夹夾家幏拁枷毠泇犌猳珈痂笳耞腵茄葭袈豭貑跏迦鉫鎵镓鴐麚]>jiā;" -"[㪴㮖㼪㿓䀫䀹䕛䛟䩡唊圿埉恝戛戞扴梜浃浹硈舺荚莢蛱蛺袷裌跲郏郟鉿鋏铗铪鞂鞈頬頰颊餄饸鵊]>jiá;" -"[㕅䑝仮假叚婽岬徦斚斝椵榎槚檟玾甲瘕胛賈贾鉀钾]>jiǎ;" -"[䁍价価價嫁架榢駕驾]>jià;" -"稼>jia;" -"[偂兼冿囏坚堅奸姦姧尖惤戋戔揃搛椷樫櫼歼殲湔瀐瀸煎熞熸牋犍猏玪监監睷碊礛笺箋篯籛緘縑缄缣肩艰艱菅菺葌蒹蕑蕳虃豜豣鑯間间靬鞬鞯韀韉顅餰馢騝鬋魐鰜鰹鲣鳒鳽鵳鶼鹣麉]>jiān;" -"[㔓㨵㳨㶕䄯䅐䉍䛳䟰䩆䭠䮿䯛䯡䵡䵤䶠俭倹儉减剪囝堿寋弿戩戬拣挸捡揀撿暕柬检検檢減湕瀽瑐睑瞼硷碱笕筧简簡絸繭翦茧藆蠒裥襇襺謇謭譾谫趼蹇鐗鐧锏鰔鹸鹻鹼]>jiǎn;" -"[㓺㔋㣤㦗㨴㯺㰄㺝䇟䟅䤔䥜䧖䬻䭈䭕䵖䵛件侟俴俿健僣僭剑剣剱劍劎劒劔建徤擶旔枧栫梘楗榗毽洊涧渐溅漸澗濺瀳牮珔磵箭糋繝腱臶舰艦荐蔪薦螹襉見覵覸见諌諓諫譼谏賎賤贱趝践踐踺釼鉴鋻鍳鍵鏩鑑鑒鑬鑳键餞饯鰎]>jiàn;" -"[僵壃姜将將摪橿殭江浆漿瓨畕畺疅疆礓繮缰翞茳葁薑螀螿豇韁鱂鳉𤕭]>jiāng;" -"[㢡㯍䁰䉃䋌䒂䙹奖奨奬桨槳滰獎膙蒋蔣襁講讲顜]>jiǎng;" -"[䞪匞夅嵹弜弶摾洚犟糡糨絳绛蔃袶謽酱醤醬降𢘸]>jiàng;" -"匠>jiang;" -"[䴔交僬嘄姣娇嬌峧嶕憍憿椒浇澆焦燋礁穚胶膠膲芁茭茮蕉蛟蟂蟭詨跤郊鐎驕骄鱎鴵鵁鷍鷦鷮鹪𨺹]>jiāo;" -"嫶>jiáo;" -"[㩰㭂㳅㽱㽲䀊䁶䘨䚩䠛䥞䴛佼侥僥儌剿劋勦孂徺挢捁搅摷撟撹攪敽敿晈暞曒湬漅灚烄煍狡璬皎皦矫矯筊絞繳绞缴脚腳臫虠蟜角譑賋踋鉸铰餃饺鮫鲛]>jiǎo;" -"[㠐㬭㰾䂃䆗䣤䪒叫呌嘂嘦噍噭嬓峤嶠徼挍敎教敫斠滘漖潐珓皭窌窖訆譥較轎轿较酵醮醶釂]>jiào;" -"[喈嗟堦巀接掲揭擑椄湝瑎皆秸稭腉菨薢蝔街謯阶階鶛]>jiē;" -"[㓗㓤㔾㘶㛃㝌㞯㦢㨗㨩㮞㮮㸅䀷䂒䂝䂶䅥䌖䕙䗻䣠䥛䲙倢偈偼傑刦刧刼劫劼卩卪喼婕孑岊崨嵥幯截拮捷搩擳昅杰栉栨桀楬楶榤櫛櫭洁滐潔瀄犵疖癤睫碣竭節結结羯脻节莭葜蓵蜐蠘蠞蠽衱袺訐詰誱讦诘趌踕迼鉣鍻鐑頡颉騔鮚鲒]>jié;" -"[媎檞解觧飷]>jiě;" -"[㑘㝏㠹㾏㿍䁓䇒䔿䛺䯰䰺䱄䲸丯介借吤唶堺屆届岕庎徣悈戒楐犗玠琾界畍疥砎紒繲艥芥藉蚧褯誡诫躤鎅魀魪𡽱]>jiè;" -"姐>jie;" -"[今埐嶜巾惍斤津珒瑧矜祲筋紟荕菳衿襟觔金钅鹶黅]>jīn;" -"[㝻㬐㯸㹏䐶䒺䤐䥆䭙仅侭僅儘卺堇尽巹慬槿殣漌瑾盡紧緊菫覲觐謹谨錦锦饉馑]>jǐn;" -"[㨷㬜㯲㰹㱈㴆㶦㶳䀆䆮䋮䌝䑤䖐䗯䝲䫴䶖伒僸凚劤劲勁噤嚍墐妗嫤嬧搢晉晋暜枃歏浕浸溍濅濜烬燼琎瑨璡璶瘽禁縉缙肵荩蓳藎賮贐赆近进進靳齽]>jìn;" -"[䴖京亰兢坕坙婛巠惊旌旍晶泾涇猄箐精経經经聙腈茎荆荊莖菁葏驚鯨鲸鵛鶁鶄麖麠黥鼱精]>jīng;" -"[㘫䜘丼井儆刭剄宑憬憼景暻汬烃烴燛璟璥穽肼蟼警阱頚頸颈]>jǐng;" -"[㕋㢣㣏㬌㵾㹵䔔䡖䵞俓倞傹净凈境妌婙婧弪弳径徑擏敬曔桱梷浄淨濪瀞獍痉痙竞竟竧竫競竸胫脛葝誩踁迳逕鏡镜靓靖静靚靜靖]>jìng;" -"睛>jing;" -"[冂冋坰垧埛扃蘏蘔駉駫]>jiōng;" -"[㓏㖥㢠㤯㷗㷡䌹䐃䢛侰僒冏囧幜泂炅炯烱煚煛熲皛窘絅綗褧迥逈顈颎]>jiǒng;" -"[㑋澃]>jiòng;" -"[丩勼啾揂揪揫朻樛湫牞究糺糾纠萛觓轇醔阄鬏鬮鳩鸠]>jiū;" -"㺵>jiú;" -"[㡱久乆九奺灸玖紤舏赳酒镹韭韮]>jiǔ;" -"[㠇㧕㩆㲃㶭㺩䅢䆒䊆䊘䓘䛮䡂䳎䳔僦匓匛匶厩咎媨就廄廏廐慦捄救旧柩柾桕疚臼舅舊鯦鷲鹫麔齨]>jiù;" -"[㞐凥刟娵居岨崌抅拘椐檋沮涺狙琚疽眗罝腒艍苴菹葅蜛裾趄跔踙陱雎鞠駒驹鮈鴡鶋]>jū;" -"[㘲㥌㩴㮂㽤䋰䏱䕮䗇䜯䡞䤎䪕䰬䱡䴗侷匊婅局巈挶掬桔梮椈橘毩毱泦淗焗犑狊箤粷菊蘜諊趜跼踘蹫躹輂郹鄓鋦锔閰鞫駶驧鵙鵴鶪鼳]>jú;" -"[䃊䄔䅓䈮䢹䶥举咀弆挙擧椇榉榘櫸欅竘筥舉莒蒟蝺袓跙踽齟龃]>jǔ;" -"[㘌㜘㞫㠪㨿㩀㬬㳥䆽䛯䣰䱟䵕䶙俱倨倶具冣剧劇勮句埧埾壉姖寠屦屨岠巨怇怚惧愳懼拒拠据據昛歫泃洰澽炬焣犋秬窭窶簴粔絇耟聚苣虡蚷詎讵貗距踞躆遽邭醵鉅鋸鐻钜锯颶飓駏鮔]>jù;" -"矩>ju;" -"[剶姢娟捐朘涓蠲裐鎸鐫镌鵑鹃]>juān;" -"[㷷卷埢捲臇菤𩜇]>juǎn;" -"[㢧㢾㪻㯞䄅䌸䖭䚈䡓䳪倦劵勌勬巻帣慻桊淃狷獧瓹眷睊睠絹绢罥羂鄄錈锩韏飬鬳]>juàn;" -"[噘屩撅]>juē;" -"[㓸㔃㔢㟲㤜㩱㭈㭾㰐㵐㷾㸕㹟㻕䀗䁷䆕䆢䇶䋉䍊䏐䏣䐘䖼䘿䙠䝌䞷䠇䡈䦆䦼亅倔傕僪决刔劂勪厥噱嚼孒孓屫崛崫嵑嶥弡彏憠憰戄抉挗捔掘撧攫斍柽桷橛橜欔欮殌氒決潏焆焳熦爑爝爴爵獗玃玦玨珏瑴疦瘚矍矡砄絕絶绝臄芵蕝蕨蚗蟨蟩蠼覐覚覺觉觖觼訣譎诀谲谻貜赽趹蹶蹷躩逫鈌鐍鐝钁镢鱊鱖鳜鴃鷢龣𧽸𩪗]>jué;" -"[䞵䟾]>juě;" -"[鴂𠢤]>juè;" -"[军君均姰桾汮皲皸皹碅莙菌蚐袀覠軍鈞銁銞钧頵鮶鲪麇麏麕]>jūn;" -"蜠>jǔn;" -"[㑺㒞㓴㕙㝦㴫㻒㽙䇹䕑䜭䝍俊儁呁寯峻懏捃攈攟晙殾浚濬焌珺畯睃竣箘箟葰蔨郡隽雋餕馂駿骏鵘]>jùn;" -"咖>kā;" -"[佧卡咔胩鉲]>kǎ;" -"髂>kà;" -"[奒开揩痎鐦锎開]>kāi;" -"[㡁䁗䐩䒓凯凱剀剴嘅垲塏恺愷慨暟楷蒈輆鍇鎧铠锴闓闿颽]>kǎi;" -"[㲉䡷勓忾愒愾欬炌炏烗礚]>kài;" -"[刊勘堪嵁戡栞龕龛]>kān;" -"[㸝䶫侃偘冚坎埳塪崁惂槛檻欿歁歞砍莰輡轗顑]>kǎn;" -"[䀍䘓墈看瞰矙磡竷衎闞阚]>kàn;" -"[嫝康忼慷槺漮穅粇糠躿鏮鱇]>kāng;" -"[扛摃]>káng;" -"䡉>kǎng;" -"[㰠亢伉匟囥抗炕犺砊邟鈧钪閌闶]>kàng;" -"[尻髛鷱]>kāo;" -"攷>káo;" -"[䯪丂拷栲槀洘烤燺稁考薧鮳鲓𥬯]>kǎo;" -"[䐧犒銬铐靠]>kào;" -"[峇柯棵樖犐珂疴瞌磕科稞窠簻胢苛萪薖蝌趷軻轲顆颗髁]>kē;" -"[壳殼]>ké;" -"[㞹㪃㪙㪡㪼㰤㵣可坷岢嶱敤渇渴炣礍]>kě;" -"[㕉㤩㾧䙐䶗克刻剋勀勊喀嗑垎堁娔客尅恪愘愙揢搕榼氪溘碦緙缂衉課课醘騍骒]>kè;" -"[啃垦墾恳懇肎肯肻豤錹齗齦龂龈]>kěn;" -"[㸧掯珢硍裉褃]>kèn;" -"[劥吭坈坑奟妔挳摼牼硁硜硻銵鍞鏗铿阬]>kēng;" -"䡰>kěng;" -"[倥埪崆悾涳硿空箜錓鵼]>kōng;" -"[㤟孔恐]>kǒng;" -"[㸜控鞚]>kòng;" -"[剾彄抠摳芤袧]>kōu;" -"[㔚劶口]>kǒu;" -"[㓂㰯㲄㽛䳟䳹佝冦叩宼寇怐扣敂滱瞉窛筘簆蔲蔻釦鷇𦶲]>kòu;" -"[刳哭圐堀扝枯桍窟胐跍軲轱郀顝骷]>kū;" -"[䇢苦]>kǔ;" -"[㒂㠸俈喾嚳库庫焅瘔矻秙絝绔袴裤褲趶酷]>kù;" -"[侉咵夸姱荂誇]>kuā;" -"[垮恗銙錁锞]>kuǎ;" -"[㐄䋀挎胯跨骻]>kuà;" -"呙>kuāi;" -"[㧟䓒擓蒯]>kuǎi;" -"[㔞㙕㙗㟴㬮㱮䈛䭝䯤䶐侩儈凷哙噲块塊墤廥快桧檜欳浍澮獪禬筷糩脍膾郐鄶駃鬠鱠鲙]>kuài;" -"[宽寛寬臗髋髖]>kuān;" -"[㯘䕀䥗䲌欵款歀窾]>kuǎn;" -"䤭>kuàn;" -"[劻匡匩哐恇框洭硄筐誆诓軭邼]>kuāng;" -"[㤮抂狂誑诳鵟]>kuáng;" -"[俇夼黋]>kuǎng;" -"[䊯䵃儣况卝圹壙岲懬懭旷昿曠況爌眖眶矌矿礦絖纊纩貺贶躀邝鄺鉱鑛]>kuàng;" -"[亏刲岿巋盔窥窺茥藈蘬虧鍷闚]>kuī;" -"[㙓㚝㨒䕫䟸䤆䧶䯓䳫喹夔奎戣揆晆暌楏楑櫆湀犪睽聧葵蘷虁躨逵鄈鍨頯馗騤骙魁]>kuí;" -"[㛻䠑䦱䫥煃跬蹞頍]>kuǐ;" -"[㕟䈐䍪䕚喟嘳媿尯愦愧憒撌槶樻溃潰瞆瞶篑簣籄聩聭聵腃蒉蔮蕢鐀鑎餽饋馈騩]>kuì;" -"[坤堃婫崐崑惃昆晜焜猑琨瑻菎蜫裈褌貇醌錕锟騉髠髡髨鯤鲲鵾鹍]>kūn;" -"[㩲䠅壸壼悃捆梱硱祵稇稛綑裍閫閸阃齫]>kǔn;" -"[㫻困涃睏]>kùn;" -"擃>kuǒ;" -"[㗥䟯䦢䯺姡廓彉彍扩拡括挄擴漷濶蛞闊阔鞟鞹]>kuò;" -"[嚹垃拉柆磖翋菈]>lā;" -"[㕇揦邋]>lá;" -"[䟑喇藞]>lǎ;" -"[㸊㻋㻝䂰䃳䏀䓥䗶䝓䪉䱫䶛剌揧攋楋爉瓎瘌腊臈臘蜡蝋蝲蠟辢辣鑞镴鬎鯻癩]>là;" -"[啦鞡]>la;" -"[㚓㥎䅘䋱䚅䠭䧒來俫倈婡孻崃崍庲徕徠来梾棶涞淶猍琜筙箂莱萊逨郲錸铼騋鯠鶆麳]>lái;" -"[䂾唻]>lǎi;" -"[㠣㾢䄤䓶䲚勑娕櫴濑瀨瀬癞癩睐睞籁籟藾襰賚賴赉赖頼顂鵣]>lài;" -"[㑣㘓㞩㦨㳕䆾䍀䑌䦨䪍䰐䳿儖兰厱啉囒婪岚嵐幱懢拦攔斓斕栏欄欗澜瀾灆灡燣燷璼礷篮籃籣糷繿葻蓝藍蘫蘭襕襤襴譋讕谰躝钄闌阑韊]>lán;" -"[㛦㨫㩜㰖䊖䌫壈嬾孄孏懒懶揽擥攬榄欖浨漤灠覧覽览醂顲𡒄]>lǎn;" -"[㜮㱫䃹嚂壏滥濫烂燗爁爛爤瓓纜缆鑭镧]>làn;" -"啷>lāng;" -"[㝗㟍㢃㱢㾿䆡䡙䯖䱶俍勆嫏廊桹榔樃欴狼琅瑯稂筤艆莨蓈蓢蜋螂踉躴郎郞鋃锒]>láng;" -"[㓪㙟㫰㮾㾗䀶䁁塱朖朗朤烺硠誏閬阆]>lǎng;" -"[䍚䕞埌崀浪蒗]>làng;" -"[捞撈]>lāo;" -"[㗦㞠㟉㟹㨓䃕䜎䝁䲏僗劳労勞哰唠嘮崂嶗憥浶牢痨癆磱窂簩蟧醪鐒铹]>láo;" -"[㟙㧯䇭䕩䝤䳓䵏佬咾恅栳橑狫老荖轑銠铑]>lǎo;" -"[嫪憦橯涝澇耢耮軂髝]>lào;" -"[㔹㖀㦡乐仂叻哷忇扐捋楽樂氻泐玏砳竻簕肋艻阞韷頱鰳鳓]>lè;" -"[了餎饹]>le;" -"勒>lēi;" -"[㒍㔣㵢㹎䉓䍣䐯䨓壨嫘擂檑欙瓃畾縲纍纝缧罍羸蘲虆蠝鐳鑘镭雷靁鼺]>léi;" -"[㑍㒦㙼㡞㶟㼍㿔䉂䛶䣂䴎傫儡儽厽垒壘樏櫐櫑洡漯灅瘣癗磊磥礨絫耒蕌蕾藟蘽誄讄诔轠鑸頛鸓]>lěi;" -"[㭩㲕㴃䉪䍥䒹䢮䣦䮑埒攂泪涙淚礌礧禷类累纇蘱酹銇錑頪類颣𩔗]>lèi;" -"嘞>lei;" -"[䉄䬋倰塄崚棱楞稜薐]>léng;" -"冷>lěng;" -"[䚏䮚堎愣踜]>lèng;" -"唎>lī;" -"[㒿㓯㠟㦒㰀㰚㴝㷰㹈㿛䄜䅻䉫䊍䋥䍠䍦䔆䔣䔧䖥䖽䖿䙰䣓䣫䱘䴻䵓䵩刕剓剺劙厘喱嚟囄嫠孋孷廲悡攡梨梩梸棃樆氂漓漦灕犁犂璃瓈睝离穲篱籬粍粚糎縭缡罹艃菞蓠蔾藜蘺蜊蟍褵謧貍邌醨釐鋫錅鏫鑗離騹驪骊鯬鱺鲡鵹鸝鹂黎黧]>lí;" -"[㸚㾖䗍䤚䧉俚兣娌峛峢峲欐欚浬澧理盠礼禮粴蟸蠡裏豊逦邐醴里鋰锂鯉鱧鲤鳢礼]>lǐ;" -"[㑦㒧㔏㕸㗚㘑㟳㡂㤡㤦㧰㬏㮚㯤㱹㺡㻎㻺㼖㽁㽝㾐㿨䁻䃯䅄䇐䊪䍽䓞䔁䔉䘈䚕䟏䟐䡃䤙䥶䬅䬆䮋䮥䰛䰜䲞䴄䴡䶘丽例俐俪傈儮儷凓利力励勵历厉厤厯厲吏呖唳嚦囇坜塛壢婯屴岦巁悧悷慄戻戾搮攊攦攭斄暦曆曞朸枥栎栗栛栵棙櫔櫟櫪歴歷沥沴涖溧濿瀝爄爏犡猁珕琍瑮瓅瓑瓥疠疬痢癘癧皪盭矋砅砬砺砾磿礪礫礰禲秝立笠筣篥粒粝糲綟纅脷苈苙茘荔莅莉蒞藶蚸蛎蛠蜧蝷蠇蠣蠫詈讈赲跞躒轢轣轹郦酈鉝隶-隸雳雴靂靋鬁鬲鱱鱳鳨鴗鷅鷑麗麜𥝢]>lì;" -"[哩李狸裡]>li;" -"[俩倆]>liǎ;" -"[㓎㜕㝺㟀㡘㢘㥕㦁㶌㺦㼓㾾䁠䃛䆂䏈䙺䥥䨬䭑亷僆劆匲匳嗹噒奁奩奱嫾帘廉怜慩憐梿槤櫣涟溓漣濂濓熑燫琏璉磏稴簾籢籨縺翴联聫聮聯臁莲蓮薕螊蠊裢褳覝謰譧蹥连連鄻鎌鐮镰鬑鰱鲢𢅏]>lián;" -"[㦑㪘㯬㰈㰸䇜䌞嬚摙羷脸膦臉莶薟]>liǎn;" -"[㜃㜻㪝㱨㶑㼑堜媡恋戀敛斂楝歛殓殮浰湅潋澰瀲炼煉瑓練纞练萰蔹蘝蘞裣襝錬鍊鏈链鰊]>liàn;" -"[㹁䝶䣼䭪凉墚梁椋樑涼粮粱糧綡良輬辌駺]>liáng;" -"[㒳㔝䓣䠃䩫両两兩唡啢掚緉脼蜽裲魉魎]>liǎng;" -"[亮哴喨悢晾湸諒谅輌輛辆量鍄]>liàng;" -"[撩蹽]>liāo;" -"[㙩㝋㵳䜍䜮䝀䨅僚嘹嫽寥寮屪嵺嶚嶛廫憀摎敹暸漻潦熮獠璙疗療窷簝繚缭聊膋膫蟟豂賿蹘辽遼鐐镣顟飂飉髎鷚鷯鹨鹩]>liáo;" -"[㶫䄦䑠䩍憭瞭蓼鄝镽]>liǎo;" -"[㡻㺒䉼䍡䎆䢧尞尥尦廖撂料炓燎爒]>liào;" -"巤>liē;" -"䟩>liě;" -"[㤠㧜㬯㭞㯿㲱㸹㼲㽟䁽䅀䉭䓟䜲䟹䴕儠冽列劣劽埓姴挒捩擸洌浖烈煭犣猎獵睙聗脟茢蛚蛶裂趔躐迾颲鬛鬣鮤鱲鴷]>liè;" -"咧>lie;" -"[㔂㝝㷠䚬䢯䫐䮼临亃厸壣嶙惏斴晽暽林淋潾瀶燐獜琳璘甐疄痳瞵矝碄磷箖粼綝繗罧翷臨轔辚遴邻鄰鏻隣霖驎鱗鳞麐麟]>lín;" -"[㐭㨆䕲僯凛凜廩廪懍懔撛檁檩澟癛癝菻]>lǐn;" -"[㖁䉮䗲䫰吝恡悋橉焛粦蔺藺蹸躏躙躪轥閵]>lìn;" -"[拎昤]>līng;" -"[㖫㡵㥄㦭㪮㬡㯪㱥㲆㸳㻏㾉䄥䈊䉁䉖䉹䌢䍅䔖䕘䖅䙥䚖䠲䡼䡿䧙䨩䯍䰱䴇䴒䴫凌刢囹坽夌姈孁岺朎柃棂櫺欞泠淩澪灵燯爧狑玲琌瓴皊睖砱碐祾秢竛笭紷綾绫羚翎聆舲苓菱蔆蕶蘦蛉衑裬詅跉軨輘酃醽鈴錂铃閝陵零霊霛霝靈駖魿鯪鲮鴒鸰鹷麢齡齢龄龗]>líng;" -"[呤岭嶺彾袊阾領领]>lǐng;" -"[令另掕炩]>lìng;" -"伶>ling;" -"㶈>liǒng;" -"[溜熘蹓]>liū;" -"[㐬㽞䉧䋷䗜䚧䬟䭷䰘䱖䱞䶉刘劉嚠媹嵧懰旈旒榴橊沠流浏瀏琉瑠瑬璢畄留畱疁瘤癅硫蒥蓅藰蟉裗遛鎏鎦鏐镏镠飀飅飗駠駵騮驑骝鰡鶹鹠麍]>liú;" -"[嬼柳栁桺橮珋綹绺罶羀鉚鋶铆锍飹]>liǔ;" -"[㙀㨨㶯㽌䄂六坴塯廇澑畂磂翏鐂雡霤餾馏鬸]>liù;" -"[㚅㝫㡣㦕㰍䃧䆍䏊䙪䥢䪊咙嚨屸嶐巃巄昽曨朧栊櫳泷湰滝漋瀧爖珑瓏癃眬矓砻礱礲竜笼篭簼籠聋聾胧茏蕯蘢蠪蠬襱豅躘鏧鑨隆霳靇鸗龍龒龓龙]>lóng;" -"[㙙㴳䡁儱垄垅壟壠拢攏竉陇隴]>lǒng;" -"[㑝㛞㟖㢅哢徿挵梇硦衖贚]>lòng;" -"窿>long;" -"[䁖瞜]>lōu;" -"[㟺㥪㲎㺏䄛䅹䝏䣚䫫䮫䱾剅娄婁廔慺楼樓熡耧耬艛蒌蔞蝼螻謱軁遱鞻髅髏鷜]>lóu;" -"[㪹塿嵝嶁搂摟甊篓簍]>lǒu;" -"[㔷屚漏瘺鏤镂陋]>lòu;" -"[喽嘍]>lou;" -"[噜嚕]>lū;" -"[㠠㢳㪭㭔㱺㿖䡎䮉䰕卢嚧垆壚庐廬攎曥栌櫨泸瀘炉爐獹玈璷瓐盧矑籚纑罏胪臚艫芦蘆蠦轤轳鑪顱颅髗魲鱸鲈鸕鸬黸]>lú;" -"[㔪㢚㯭䕡䲐卤塷掳撸擄擼樐橹櫓滷瀂硵磠舻艣艪蓾虏虜鏀鐪鑥镥魯鲁鹵]>lǔ;" -"[㓐㖨㛬㜙㟤㦇㪐㪖㫽㯝㯟㼾䃙䌒䎑䎼䐂䘵䚄䟿䡜䩮䱚䴪侓僇剹勎勠圥垏娽峍廘彔录戮摝椂樚淕淥渌漉潞熝琭璐甪盝睩硉碌磟祿禄稑穋箓簏簶籙粶膔菉蔍蕗虂螰觮觻賂赂趢路踛蹗輅轆辂辘逯醁錄録錴鏕鏴陆陸露騄騼鯥鵦鵱鷺鹭鹿麓]>lù;" -"[榈櫖櫚氀爈瘘瘻膢藘閭闾馿驢驴]>lǘ;" -"[㭚㻲㾔侣侶偻僂儢吕呂屡屢履挔捛旅梠溇漊祣稆穞穭絽縷缕膂膐褛褸郘鋁铝]>lǚ;" -"[㔧㠥㲶䔞䢖䥨勴寽嵂律慮氯滤濾率箻綠緑绿膟葎虑鑢]>lǜ;" -"[㝈㡩㱍䖂䜌圝圞娈孌孪孿峦巒挛曫栾欒滦灓灤癵羉脔臠虊銮鑾鵉鸞鸾]>luán;" -"卵>luǎn;" -"[乱亂薍釠]>luàn;" -"[攣癴]>lüán;" -"㨼>luè;" -"[㑼㔀䂮䌎䛚䤣圙擽畧稤鋝鋢锊]>lüè;" -"[抡掄]>lūn;" -"[㖮㷍䈁䑳仑伦侖倫囵圇婨崘崙惀沦淪溣綸纶腀菕蜦踚輪轮錀陯鯩]>lún;" -"[埨稐耣]>lǔn;" -"[碖論论]>lùn;" -"絯>lǜn;" -"[啰囉罗]>luō;" -"[㑩㼈㽋䊨䯁儸摞椤欏氇氌猡玀箩籮羅萝蔂蘿螺覙覶覼逻邏鏍鑼锣镙饠騾驘骡鸁]>luó;" -"[㒩㦬㩡㰁㱻倮攞曪瘰癳砢臝蓏蠃裸躶鎯]>luǒ;" -"[㓢㴖㿚䀩䇔䈷䌱䌴嗠峈洛濼烙犖珞硌笿絡纙络荦落袼酪雒駱骆鮥鴼鵅]>luò;" -"[妈媽嬤嬷]>mā;" -"[㦄䗫䳸犘痲蔴蟇麻]>má;" -"[㐷䣕䣖溤玛瑪码碼蚂螞鎷馬马鰢鷌]>mǎ;" -"[㑻㜫㨸㾺䯦傌唛嘜帓杩榪犸獁睰祃禡罵閁駡骂]>mà;" -"[吗嗎嘛蟆]>ma;" -"[㜥㼮䁲䚑䨪埋薶霾]>mái;" -"[买嘪荬蕒買鷶]>mǎi;" -"[䈿䘑䜕䨫䮮佅劢勱卖売眿脈脉蝐賣迈邁霢麥麦]>mài;" -"[㒼㗄㙢䅼䊡䐽䑱䛲䟂䯶䰋姏悗慲摱槾璊瞒瞞蛮蠻謾谩蹒蹣顢颟饅馒鬘鰻鳗]>mán;" -"[㛧䜱屘満满滿矕螨蟎襔鏋鮸]>mǎn;" -"[㗈㡢㬅㵘䕕䝡䝢䡬僈墁嫚幔慢曼漫澫澷熳獌縵缦蔄蔓鏝镘鬗]>màn;" -"[㝑㟌㟿㡛㻊䀮䅒䈍䟥䵨吂哤娏尨忙恾杗杧氓汒浝牤牻狵痝盲盳硭笀芒茫蘉蛖邙釯鋩铓駹鼆]>máng;" -"[㙁㟐㬒䁳䒎䖟壾漭硥茻莽莾蟒蠎]>mǎng;" -"[猫貓]>māo;" -"[㝟㲠䅦兞堥媌嫹旄枆毛渵牦犛矛罞茅蝥蟊覒軞酕錨锚髦髳鶜]>máo;" -"[㚹㧇冇卯夘峁戼昴泖笷茆]>mǎo;" -"[㒵㒻㡌㧌㪞㫯㮘㴘㺺㿞䀤䋃䓮䡚䫉冐冒媢帽懋暓柕楙毷瑁皃眊瞐耄艒芼茂萺蓩袤貌貿贸鄮]>mào;" -"[么麼]>me;" -"[㶬㺳䊈䍙䒽䤂呅堳塺媒嵋徾攗枚栂梅楣楳槑沒没湄湈煤猸玫珻瑂眉睂禖篃脄脢腜苺莓葿郿酶鋂鎇镅霉鶥鹛黴𪃏]>méi;" -"[䆀䓺䜸凂媄媺嬍嵄挴毎每浼渼燘美鎂镁黣]>měi;" -"[㭑䀛䉋䊊䰨䰪䵢妹媚寐抺昧沬煝痗眛睸祙蝞袂跊韎鬽魅]>mèi;" -"[㡈㨺䊟䝧䫒扪捫樠穈菛虋鍆钔門閅门]>mén;" -"暪>měn;" -"[㥃㦖㱪㵍悶懑懣焖燜闷]>mèn;" -"[们們]>men;" -"[㙹㠓㩚䀄䇇䉚䑃䑅䒐䓝䗈䙦䙩䤓䰒䲛䴌䴿䵆儚冡幪懞懵曚朦橗檬氋濛獴甍甿盟瞢矇矒礞艨苎莔萌萠蒙蕄虻蝱鄳鄸雺靀饛鯍鸏鹲]>méng;" -"[㚞䏵勐猛瓾艋蜢蠓錳锰鯭]>měng;" -"[㜴㝱䠢䥂䥰夢夣孟懜梦溕霥霿]>mèng;" -"[咪嘧眯瞇]>mī;" -"[㜷㟜㠧㣆㩢㸏䊳䋛䌕䌘䍘䕳䕷䛧䤍䥸䪾䴢冞弥彌戂擟攠檷瀰爢猕獼瓕祢籋糜縻罙蒾蘪蘼詸謎谜迷醚醾醿釄镾鸍麊麋麛𨢥]>mí;" -"[㝥㥝㰽㳽䭧䱊侎孊弭敉沵洣渳濔灖眫米脒葞蔝銤靡]>mǐ;" -"[㜆㨠㫘㳴㴵㵋㸓䁇䉾䌏䌐䌩䖑䛉䛑䣾䤉䭩䮭冖冪塓宓宻密峚幂幎幦榓樒櫁汨泌淧淿滵漞濗熐祕秘簚糸羃蔤藌蜜蠠覓覔覛觅謐谧鼏]>mì;" -"[㒙㝰㬆㮌㰃䃇䏃䫵䰓婂媔嬵宀棉檰櫋眠矈矊矏綿緜绵臱芇蝒醎]>mián;" -"[㛯㤁㻰䀎䤄丏偭免冕勉勔喕娩愐汅沔湎眄絻緬缅腼葂麪麫]>miǎn;" -"[㴐糆面靣麵麺]>miàn;" -"喵>miāo;" -"[㑤㠺䁧䖢描瞄緢苗鱙鶓鹋]>miáo;" -"[㦝䅺劰杪淼渺眇秒篎緲缈藐邈]>miǎo;" -"[妙庙庿廟玅竗]>miào;" -"[乜吀咩哶孭羋芈]>miē;" -"[㒝䁾䈼䘊䩏幭懱搣櫗滅灭礣篾蔑薎蠛衊覕鑖鱴鴓]>miè;" -"[㟩㟭㢯䁕䂥䃉䋋䟨䡑䡻䪸䲄姄岷崏忞忟怋捪敯旻旼民珉琘瑉痻盿砇碈緍緡缗罠苠鈱錉鍲閺]>mín;" -"[㞶㥸㨉䡅僶冺刡勄悯愍慜憫抿敃敏暋泯渑湣潣澠皿笢簢閔閩闵闽鰵鳘黽黾]>mǐn;" -"榠>mīng;" -"[㝠䄙䆨䆩䊅䫤佲冥名嫇明暝朙洺溟猽眀瞑蓂螟覭鄍酩銘铭鳴鸣]>míng;" -"[㟰㫥凕姳慏眳茗]>mǐng;" -"[䒌命詺]>mìng;" -"[謬谬]>miù;" -"摸>mō;" -"[䃺䉑䯢劘嚤嚰嫫摩摹擵模橅磨糢膜蘑謨谟饃饝馍髍魔]>mó;" -"[䩋懡抹]>mǒ;" -"[㱄㱳㷬㷵㹮䁼䁿䏞䒬䘃䜆䬴䮬䱅䳮䴲嗼嚜圽塻墨妺嫼寞帞慔昩暯末枺歾歿殁沫洦湐漠瀎爅瘼皌眜眽瞙砞礳秣粖絈縸纆耱茉莈莫蓦藦蛨蟔衇袹謩貃貊貘鄚銆鏌镆陌霡靺驀鬕魩默黙]>mò;" -"[庅麽]>mo;" -"哞>mōu;" -"[㭌䏬䗋䥐䱕侔劺洠牟眸瞴繆缪蛑蟱謀谋鉾鍪鴾麰]>móu;" -"[䍒某踇]>mǒu;" -"[愗瞀]>mòu;" -"[䱯恈毪氁獏譕]>mú;" -"[䥈亩姆姥峔母牡牳畆畒畝畞畮砪胟鉧]>mǔ;" -"[㒇㜈㣎㧅㾇䀲䊾䑵䧔仫募坶墓幕幙慕暮木楘毣沐炑牧狇目睦穆苜莯蚞鉬钼雮霂鞪鶩鹜]>mù;" -"拇>mu;" -"嗯>ń;" -"㐻>ň;" -"[䏧䛔䫱嗱拏拿誽鎿镎]>ná;" -"[乸哪雫]>nǎ;" -"[㗙㨥㴸䀑䅞䇣䇱䈫䎎䖓䖧䟜䪏䱹妠捺笝納纳肭蒳衲袦豽貀軜那鈉钠靹魶]>nà;" -"[㾍䍲䘅䯮摨熋釢]>nái;" -"[乃奶妳嬭廼氖疓艿迺]>nǎi;" -"[㮈㮏㲡倷奈柰榒渿耐萘螚褦錼鼐]>nài;" -"囡>nān;" -"[㓓㽖䔜䕼䛁䶲侽南喃奻娚暔枏枬柟楠男畘莮萳諵难難]>nán;" -"[㫱䁪䈒䔳戁揇湳罱腩蝻赧]>nǎn;" -"婻>nàn;" -"[乪囔]>nāng;" -"[䂇嚢囊欜]>náng;" -"[㶞攮曩灢饢馕]>nǎng;" -"[㒄儾齉]>nàng;" -"[㞪㺀䃩䄩䑋䛝䫸䴃呶夒峱嶩巎巙怓憹挠撓桡橈猱獶獿硇繷詉譊鐃铙髐𥑪]>náo;" -"[㑎㛴㺁䜀䜧匘垴堖嫐恼悩惱瑙碯脑腦]>nǎo;" -"[淖閙闹鬧]>nào;" -"[䎪䭆訥讷]>nè;" -"[吶呐呢]>ne;" -"[㼏䲎娞脮腇餒馁鮾鯘]>něi;" -"[㕯㖏㘨㨅䡾䳖內内氝]>nèi;" -"黁>nēn;" -"齳>něn;" -"[㜛㯎㶧嫩嫰]>nèn;" -"[㴰䏻嬣能薴]>néng;" -"㲌>nèng;" -"㕶>ng̀;" -"妮>nī;" -"[㞾㪒㹸䘦䘽䛏䝚倪坭埿婗尼屔怩泥淣狋猊秜籾臡蚭蜺觬貎跜輗郳霓鯓鯢鲵麑齯]>ní;" -"[㣇㵫䕥䦵䧇䭲䰯伱伲你儗儞孴抳拟擬旎柅狔禰苨薿鈮鉨铌隬馜]>nǐ;" -"[㠜㥾㦐㲻䁥䘌䵑䵒匿堄嫟嬺屰嶷惄愵昵暱氼溺痆睨糑縌胒腻膩迡逆鷁鷊鹝鹢]>nì;" -"[䄭䄹䩞䬯年拈秊秥鮎鯰鲇鲶黏]>nián;" -"[㘝㞋䚓捻撚撵攆碾簐跈蹨躎輦辇]>niǎn;" -"[㲽卄唸埝姩廿念涊淰艌鼰齞]>niàn;" -"[嬢孃]>niáng;" -"[䖆酿醸釀]>niàng;" -"娘>niang;" -"[㒟㜵㠡㭤䃵䐁䙚䦊䮍嫋嬝嬲樢茑蔦袅裊褭鳥鸟𢶑𢸣]>niǎo;" -"[㞙㳮尿脲]>niào;" -"[惗捏揑踗鈢鉩錜鑈]>niē;" -"[㡪苶]>nié;" -"[㖖㘿㙞㚔㜸㩶㮆㴪㸎䂼䄒䌜䜓䯀䯅䯵啮喦嗫噛嚙囁囓圼孼孽嵲帇摰敜枿棿槸櫱涅湼疌篞糱糵聂聶臬臲菍蘖蠥踂蹑躡鎳鑷钀镊镍闑陧隉顳颞齧]>niè;" -"[㤛䋻䚾䛘囜您]>nín;" -"拰>nǐn;" -"[㝕㲰䗿䭢儜凝咛嚀宁寍寕寗寜寧拧擰柠檸狞獰甯聍聹鑏鬡鸋]>níng;" -"[橣矃]>nǐng;" -"[㣷㿦䔭佞侫泞濘]>nìng;" -"妞>niū;" -"[䀔䒜牛]>niú;" -"[㺲䏔忸扭炄狃紐纽莥鈕钮靵]>niǔ;" -"[䋴衂]>niù;" -"[㶶䁸䢉侬儂农哝噥檂欁浓濃癑禯秾穠脓膿蕽襛農辳醲鬞齈]>nóng;" -"䵜>nǒng;" -"[弄挊]>nòng;" -"[㝹䨲羺譨]>nóu;" -"[䅶䘫䰭啂槈獳耨譳鎒鐞]>nòu;" -"[㚢伮奴孥笯蒘駑驽鴑]>nú;" -"[努弩砮胬]>nǔ;" -"[䢞怒抐搙]>nù;" -"[女籹釹钕]>nǚ;" -"[㵖䖡䘐䚼䶊恧朒衄]>nǜ;" -"[㬉䎡䙇暖渜煖煗餪]>nuǎn;" -"偄>nuàn;" -"[䖈䖋硸]>nuè;" -"[䨋疟瘧虐謔谑]>nüè;" -"[㑚㔮㰙傩儺娜挪捼梛郍]>nuó;" -"[㛂㡅橠砈砹]>nuǒ;" -"[㐡䚥愞懦懧挼掿搦搻稬穤糥糯諾诺蹃逽]>nuò;" -"[喔噢]>ō;" -"哦>ó;" -"[呕嘔櫙欧歐殴毆熰瓯甌眍瞘謳讴鏂鴎鷗鸥]>ōu;" -"[吽齵]>óu;" -"[㒖㼴䚆䯚偶吘塸耦腢蕅藕]>ǒu;" -"[㛏䌂䌔怄慪沤漚]>òu;" -"[啪夿妑皅舥葩蚆趴]>pā;" -"[䯲䶕杷潖爬琶筢耙跁]>pá;" -"[帊帕怕袙]>pà;" -"拍>pāi;" -"[㵺俳徘排棑牌猅篺簰簲輫]>pái;" -"俖>pǎi;" -"[㭛䖰哌汖派湃蒎鎃]>pài;" -"[㐴攀潘畨眅砙]>pān;" -"[䃑䃲䈲䰉䰔媻幋搫柈槃瀊盘盤磐磻縏蒰蟠跘鎜鞶]>pán;" -"[坢奤]>pǎn;" -"[㳪冸判叛拚沜泮溿炍牉畔盼袢襻詊鋬鑻頄頖]>pàn;" -"[乓滂膖]>pāng;" -"[㤶㥬㫄䅭䨦䮾厐厖嫎庞庬彷徬旁篣胮舽蒡螃逄雱霶騯髈龎龐]>páng;" -"[䒍嗙耪覫]>pǎng;" -"[㜊炐肨胖]>pàng;" -"[抛拋脬]>pāo;" -"[㚿䛌䩝刨匏咆垉庖炰爮狍袍軳鞄齙龅]>páo;" -"跑>pǎo;" -"[㘐㯡䶌奅泡炮疱皰砲礟礮靤髱麭]>pào;" -"[呸垺妚娝岯怌柸肧胚衃醅]>pēi;" -"[㟝䣙䪹䫠䲹培毰碚裴裵賠赔邳阫陪陫]>péi;" -"[昢琣]>pěi;" -"[㤄㧩㫲㳈䊃伂佩姵帔斾旆沛浿珮笩苝轡辔配霈馷]>pèi;" -"[呠喯喷噴]>pēn;" -"[湓瓫盆葐]>pén;" -"[翉翸]>pěn;" -"歕>pèn;" -"[匉嘭怦恲抨梈漰澎烹砰硑磞軯閛駍]>pēng;" -"[㛔㥊䄘䡫䰃䴶倗傰埄堋塜塳弸彭憉挷朋棚椖樥淜痭硼稝竼纄膨芃蓬蟚蟛輣錋鑝韸韼驡髼鬅鬔鵬鹏]>péng;" -"[捧淎皏]>pěng;" -"[㼞掽椪甏碰踫]>pèng;" -"篷>peng;" -"[丕伾劈噼坯怶悂憵批披抷旇炋狉狓砒磇礕秛秠翍耚豾釽鈈鈹鉟銔铍霹髬髲鮍鲏鴄]>pī;" -"[㔥㯅啤埤壀朇枇毗毘焷犤玭琵疲皮笓紕纰罴羆肶脾腗膍蚽蜱螷豼貔郫鈚錍阰陴駓魾鼙]>pí;" -"[㨽䏘䚰䚹䤏䫌䰦仳匹噽嚭圮崥庀疋痞癖脴苉苤諀銢隦]>pǐ;" -"[㿙䑀䑄䠘䡟䤨䴙僻媲嫓屁揊擗淠渒潎澼濞甓礔譬辟闢鷿鸊]>pì;" -"[偏囨媥犏篇翩萹頨鶣]>piān;" -"[㛹㼐䮁楄楩胼腁諚諞谝賆蹁輧駢騈骈骿]>pián;" -"[覑貵]>piǎn;" -"[㓲㸤䏒片騗騙骗]>piàn;" -"[嘌彯旚翲薸螵飃飄飘魒]>piāo;" -"[㼼䕯䴩淲瓢竂蚫闝]>piáo;" -"[㩠㵱㹾摽殍犥皫瞟縹缥膘蔈藨醥]>piǎo;" -"[㬓㺓䏇僄剽勡嫖徱慓漂票篻顠驃骠]>piào;" -"[撆暼氕瞥]>piē;" -"[䥕丿撇鐅]>piě;" -"嫳>piè;" -"[姘拼涄礗]>pīn;" -"[㰋嚬娦嫔嬪獱琕薲貧贫頻顰频颦]>pín;" -"[品榀]>pǐn;" -"[䀻牝聘]>pìn;" -"[乒俜娉焩砯頩]>pīng;" -"[㺸㻂䍈䶄凭凴呯坪屏屛帡帲幈平慿憑枰泙洴玶瓶甁甹竮箳簈缾聠胓艵苹荓萍蓱蘋蚲蛢評评軿郱鮃鲆鵧𦚓]>píng;" -"[坡泊溌癹鉕鏺钷陂頗颇]>pō;" -"[㨇㰴嘙婆櫇皤蔢鄱]>pó;" -"[叵尀笸]>pǒ;" -"[㛘䄸䎊䞟䣪䣮䨰䪖䯙岶廹炇烞狛珀破砶粕蒪迫酦醗醱釙钋魄]>pò;" -"[泼潑]>po;" -"剖>pōu;" -"[㧵抔抙捊掊裒錇锫]>póu;" -"[㕻勏哣婄廍棓犃瓿]>pǒu;" -"咅>pòu;" -"[仆噗墣扑抪撲擈攴濮痡醭陠]>pū;" -"[㒒㯷㲫㺪䈬䈻䑑䔕䗱䧤䴆僕匍樸毞獛璞瞨穙箁纀脯菐菩葡蒱蒲襆襥蹼轐酺鏷镤]>pú;" -"[㹒䲕圃圤普朴檏氆浦溥潽烳誧諩譜谱鐠镨]>pǔ;" -"[㬥曝瀑舖舗鋪铺]>pù;" -"[七倛僛凄唭嘁墄妻娸悽慼慽攲期柒栖桤桼棲榿欺沏淒漆磎磩粞緀萋諆郪霋顣魌鶈鸂𠀁]>qī;" -"[㜎㟓㟚㟢㩽㯦䄢䅲䉻䐡䑴䓅䞚䟚䡋䧘䧵䩓䭶䭼䰇䱈䲬䳢䶒䶞亓亝其剘圻埼墘奇岐岓崎帺忯愭懠掑斉斊旂旗棊棋檱櫀歧淇濝猉玂琦琪璂畁畦疷碁碕祁祈祺禥竒粸綦綨纃翗耆脐臍艩芪荠萁萕蕲薺藄蘄蚑蚔蚚蛴蜝蜞蠐衹跂踑軝迉鄿釮錡錤锜頎颀騎騏骐骑鬐鬿鮨鯕鰭鲯鳍鵸鶀麒齊齐]>qí;" -"[㒅㞓㥓㩩㫓㾨䄎䄫䉝䋯䎢䏿䒻䔇䛴䡔䭫䭬乞企启呇唘啓啔啟婍屺岂敧晵杞棨槣盀綮綺绮芑芞裿諬豈起邔闙]>qǐ;" -"[㓞㞚㣬㮑㼤䀈䀙䁈䁉䅤䌌䏅䏌䏠䒗䙄䚉䚍䟄䢀䫔䬣䰴呮咠噐器夡契弃徛忔憇憩摖暣栔棄气気氣汔汽泣湆湇炁甈盵矵砌碛碶磜磧礘綥緝缉罊葺蟿訖諿讫迄鏚鼜]>qì;" -"戚>qi;" -"掐>qiā;" -"䠍>qiá;" -"[跒酠]>qiǎ;" -"[㓣㡊㤉䜑䨐䯊䶝匼圶帢恰殎洽]>qià;" -"[㡨仟佥僉兛千圱圲奷婜孯岍幵悭愆慳扦拪掔搴撁攐攑攓杄檶櫏欦汘汧牵牽签簽籤粁縴羟羥肷膁臤芊蚈褰諐謙譣谦谸迁遷釺鉛鋟鐱钎铅锓阡韆顩騫骞鬜鬝鵮鹐]>qiān;" -"[㦮㨜㩮㸫䁮䈤䍉䕭䖍䨿仱前媊岒忴扲拑掮揵榩歬漧潛潜濳灊燂箝葥虔鈐鉗銭錢钤钱钳雂騚鬵鰬黔黚]>qián;" -"[㧄䪈䭤嵰撖槏浅淺繾缱蜸譴谴遣]>qiǎn;" -"[㐸㜞㟻㪠㯠䈴䊴䥅䦲䫡倩傔儙刋堑塹壍嵌悓慊棈椠槧欠歉皘篏篟綪芡茜蒨蔳輤]>qiàn;" -"[呛嗆嶈戕戗斨枪椌槍溬牄猐玱瑲矼篬羌羗羫腔蜣謒跄蹌蹡錆鎗鏘鏹锖锵镪]>qiāng;" -"[㩖丬墙墻嫱嬙廧強强彊樯檣漒爿牆蔷薔蘠軖]>qiáng;" -"[㛨䅚傸勥墏抢搶磢繈繦]>qiǎng;" -"[䵁唴炝熗羻]>qiàng;" -"[劁墝墽嵪幧悄敲橇硗磽繑跷踍蹺蹻郻鄡鄥鍫鍬鏒鐰锹鞒頝骹髜]>qiāo;" -"[㚁㝯䀉䎗䩌䱁乔侨僑喬嘺嶣憔桥樵橋燆癄瞧硚礄簥荞蕎藮譙谯趫趬鐈鞽顦]>qiáo;" -"[㚽㡑㤍䲾巧愀釥]>qiǎo;" -"[㢗㪣㴥䃝䆻䇌俏僺峭帩撬撽窍竅翘翹誚诮躈陗鞘韒髚]>qiào;" -"[㚗䦧癿]>qié;" -"且>qiě;" -"[㓶㗫㛍㛙㤲㥦㫸㰰㰼㹤㾀㾜䟙䤿䬊切匧妾怯悏惬愜挈朅洯淁穕窃竊笡箧篋緁藒蛪踥鍥锲鯜]>qiè;" -"[亲侵媇嵚嶔欽瀙綅衾親誛钦顉駸骎鮼]>qīn;" -"[㘦㢙㩒㪁㮗䔷䖌䦦勤厪嗪噙嫀庈廑懃懄捦擒斳檎澿珡琴琹禽秦耹芩芹菦蚙螓蠄赺鈙靲鳹鵭]>qín;" -"[㝲㾛䠴坅寑寝寢昑曋螼赾]>qǐn;" -"[㞬㤈䈜吢吣唚寖寴抋揿搇撳沁菣]>qìn;" -"[倾傾卿啨圊寈氢氫氰淸清狅蜻軽輕轻郬靑青鯖鲭]>qīng;" -"[㯳䝼䞍䲔剠勍夝情擎晴暒樈檠殑甠晴]>qíng;" -"[㷫䔛䯧庼廎檾請请頃顷]>qǐng;" -"[䋜䌠凊庆慶掅殸汫漀碃磘磬罄謦鑋靘]>qìng;" -"[匔焪穹芎銎]>qiōng;" -"[㒌㧭㮪㷀㼇䅃䆳䊄䓖䛪䠻儝卭宆惸桏棾橩焭煢琼璚瓊睘瞏穷窮竆笻筇茕藑藭蛩赹跫輁邛]>qióng;" -"苘>qiǒng;" -"[㐀䆋丘丠坵媝恘楸秋秌穐篍緧萩蓲蚯蝵蟗蠤趥邱鞦鞧鰍鱃鳅鶖鹙龝𠀉𩝠]>qiū;" -"[㐤㕤㞗㟈㤹㥢㧨㭝㷕㺫䊵䎿䜪䟵䣇䤛俅厹叴唒囚崷巯巰扏朹梂毬求汓泅浗渞煪犰玌球璆皳盚紌絿肍脙艽苬莍虬虯蛷蝤裘觩訄訅賕赇逎逑遒酋釓釚銶钆鮂鯄鰌鰽鼽]>qiú;" -"[㼒䞭搝糗]>qiǔ;" -"[䟬䠗殏螑]>qiù;" -"[㠊伹佉凵匤区區呿坥屈岖岴嶇憈抾敺曲瞿砠祛胠蛆蛐袪誳趋趨躯軀镼阹駆駈驅驱髷魼鰸鱋鶌黢]>qū;" -"[㖆㜹㣄㯫㲘䀠䂂䋧䝣䞤䟊䵶佢劬匷忂懅戵斪朐欋氍淭渠灈璖璩痀癯磲籧翑翵胊臞菃葋蕖蘧螶蟝蠷衢豦躣軥鑺鴝鸜鸲麯麴麹鼩𪍸]>qú;" -"[䶚取娶浀竬筁紶詓齲龋]>qǔ;" -"[㧁㫢㰦䁦䒧䠐刞厺去湨覰覷覻觑詘诎趣閴闃阒麮鼁]>qù;" -"[圈圏姾弮悛棬箞絟鐉]>quān;" -"[㒰㟨㟫䀬䑏䟒䠰佺全啳婘孉峑巏恮惓拳搼权権權泉洤湶牷犈瑔痊硂筌荃葲蜷蠸觠詮诠踡輇辁醛銓铨顴颧駩騡鬈鰁鳈鷤齤]>quán;" -"[䊎呟奆汱犬琄甽畎綣縓绻虇詃]>quǎn;" -"[䄐券劝勧勸烇牶玔絭荈]>quàn;" -"[缺蒛𩨭]>quē;" -"瘸>qué;" -"[㕁㩁㰌㱋㱿㴶㾡䇎䦬䧿却卻咑埆塙墧寉崅悫愨慤搉榷殻毃灍燩琷皵硞确碏確碻礐礭舭趞闋闕阕阙雀]>què;" -"[鵲鹊]>que;" -"[囷夋峮杶踆輴逡鰆]>qūn;" -"[㪊㿏䭽宭帬漘羣群裙裠錞]>qún;" -"蝽>qǔn;" -"[儿兒]>r;" -"[㜣㲯㸐㾆䑙䖄䫇嘫然燃繎肰蚦蚺蛅衻袇袡髥髯𤡮]>rán;" -"[㚩㯗㿵䎃䒣䣸䤡冄冉呥姌媣染橪珃苒]>rǎn;" -"[㚂䉴儴勷攘瀼獽瓤禳穣穰蘘躟鬤]>ráng;" -"[嚷壌壤爙]>rǎng;" -"[懹譲讓让]>ràng;" -"[㹛䫞荛蕘蛲蟯襓饒饶]>ráo;" -"[㑱娆嬈扰擾]>rǎo;" -"[繞绕遶隢顤]>rào;" -"[喏惹]>rě;" -"[渃热熱]>rè;" -"[䌾䴦人亻仁壬忈忎朲秂紝絍纴芢鈓銋魜鴹]>rén;" -"[㣼䏕䏰䭃忍栠栣棯秹稔腍荏荵]>rěn;" -"[㠴㶵㸾䀼䇮䋕仞仭任刃刄妊姙屻恁扨杒梕牣紉纫肕衽袵訒認认讱賃赁軔軠轫靭靱韌韧飪餁饪鵀]>rèn;" -"扔>rēng;" -"[㭁㺱䄧䚮仍礽辸陾]>réng;" -"芿>rèng;" -"[䒤囸日氜衵釰鈤馹驲]>rì;" -"茸>rōng;" -"[㘇㝐㣑㭜㲓㲨㺎㼸䇀䇯䈶䘬䠜䡆䡥䤊䩸媶容嵘嶸戎曧栄榕榮榵毧溶烿熔爃狨瑢穁絨縙绒羢肜茙荣蓉蝾融螎蠑褣鎔镕駥髶鰫鷛]>róng;" -"[㲝䢇傇冗宂搑氄軵]>rǒng;" -"鴧>ròng;" -"[㖻㽥䐓䧷䰆媃揉柔楺沑渘瑈瓇禸脜腬葇蝚蹂輮鍒鍕鞣韖騥鰇鶔]>róu;" -"[煣粈糅]>rǒu;" -"[䄾宍肉]>ròu;" -"[㨎㹘䋈䰰儒嚅如孺帤挐曘桇渪濡燸筎臑茹蕠薷蠕袽襦邚醹顬颥鱬鴽]>rú;" -"[㦺乳侞擩汝肗辱銣铷]>rǔ;" -"[傉入嗕媷洳溽縟缛蓐褥鄏]>rù;" -"[䓴堧壖撋]>ruán;" -"[㼱㽭䞂䪭媆朊瑌瓀盶碝礝緛耎腝蝡軟輭软阮]>ruǎn;" -"[㮃䅑甤緌蕤]>ruí;" -"[惢桵橤繠蕊蕋蘂蘃]>ruǐ;" -"[㓹㛱㪫㲊䌼䓲叡壡枘汭瑞睿芮蚋蜹銳鋭锐]>ruì;" -"[犉瞤]>rún;" -"[㠈䦞橍润潤膶閏閠闰]>rùn;" -"[䐞偌叒婼弱楉焫爇箬篛若蒻鄀鰙鰯鶸]>ruò;" -"[仨撒]>sā;" -"[㒎䊛洒灑靸]>sǎ;" -"[㪪㳐㽂䘮䙣䬃卅摋攃櫒脎萨薩趿鎝颯飒馺]>sà;" -"[塞愢揌毢腮顋鰓鳃]>sāi;" -"[㗷䈢]>sǎi;" -"[僿嗮簺賽赛]>sài;" -"噻>sai;" -"[三叁嘇弎攕毵毿犙糁糝鬖]>sān;" -"[㧲䀐䉈䊉䫩仐伞傘糂糣糤繖鏾饊馓]>sǎn;" -"[㤾㪔㪚䫅俕散潵閐]>sàn;" -"[桑桒]>sāng;" -"[䡦䫙嗓搡磉褬鎟顙颡]>sǎng;" -"[丧喪]>sàng;" -"[慅掻搔溞瘙繅缫臊颾騒騷骚鰠鰺鱢鳋]>sāo;" -"[㛮䕅嫂扫掃]>sǎo;" -"[㿋䐹䖣喿埽氉矂]>sào;" -"[㒊㥶㮦㱇㴔㻭䉢䔼䨛啬嗇懎歮歰涩澀澁濇瀒瑟璱穑穡繬翜色譅趇轖銫铯雭飋]>sè;" -"[曑森椮槮甧穼篸蔘襂]>sēn;" -"[僧鬙]>sēng;" -"[乷刹剎唦帴杀桬榝樧殺沙煞猀痧砂硰粆紗纱莎蔱裟鎩铩髿魦鮻鯊鯋鲨]>shā;" -"[傻儍訯]>shǎ;" -"[㚫㛼㰱䈉䝊䮜䵘䶎倽厦唼啑啥喢帹廈歃箑翣萐閯霎]>shà;" -"[筛篩]>shāi;" -"繺>shǎi;" -"[㬠晒曬閷]>shài;" -"[删刪剼姍姗山幓彡挻搧杉檆潸澘烻煽狦珊穇笘縿羴羶脠舢芟苫衫跚軕邖]>shān;" -"[㚒㣣㨛㪎㶒䠾晱睒覢醦閃闪陕陝陿]>shǎn;" -"[㣌㪨䄠䆄䚲䥇䦂䦅䱇䱉䴮傓僐善墠墡嬗扇掞摲擅汕灗熌疝磰禅繕缮膳蟮蟺訕謆譱讪贍赡赸鄯釤銏鐥钐饍騸骟鱓鱔鳝]>shàn;" -"[伤傷商墒慯殇殤滳漡熵蔏螪觞觴謪鬺]>shāng;" -"[䬕扄晌賞赏]>shǎng;" -"[丄上姠尙尚蠰銄鑜]>shàng;" -"裳>shang;" -"[弰捎旓梢烧焼燒稍筲艄莦蛸輎髾]>shāo;" -"[㲈㸛勺杓牊玿竰韶]>sháo;" -"[㪢䒚䔠少]>shǎo;" -"[㷹䏴䙼䬰佋劭卲哨娋潲睄紹綤绍袑邵颵]>shào;" -"[奢檨譇賒賖赊輋]>shē;" -"[㭙㰒䁋䂠䞌佘揲舌虵蛇蛥鉈鍦铊]>shé;" -"[䬷捨舍騇]>shě;" -"[㒤㢵㴇䀅䁯䄕䌰䠶䤮䵥厍厙射弽慑慴懾摂摄攝欇涉渉滠灄猞社蔎蠂設设赦韘麝]>shè;" -"[伸侁兟呻妽姺娠屾峷扟柛氠深燊珅申砷籶籸紳绅胂葠薓裑訷詵诜身鉮駪鯵鲹鵢]>shēn;" -"[䰠什甚神神]>shén;" -"[㔤㜤㰂㵊㾕吲哂婶嬸审宷審弞渖瀋瞫矤矧覾訠諗讅谂谉邥頣頥魫]>shěn;" -"[㥲㰮㵕䆦侺愼慎抻昚椹涁渗滲瘆瘎瘮眒眘肾脤腎葚蜃鋠]>shèn;" -"[升呏声斘昇殅泩湦焺牲狌珄生甡甥笙聲苼鉎阩陞鵿鼪]>shēng;" -"[䱆憴晠溗畻繩绳譝鱦]>shéng;" -"[㗂㼳㾪䁞䚇䪿偗省眚箵]>shěng;" -"[䎴䞉剩剰勝圣墭嵊榺琞盛聖胜蕂貹賸]>shèng;" -"[䴓呞失尸屍师師施浉湤湿溮溼濕狮獅箷絁葹蒒蓍虱蝨褷襹詩诗邿釶鈟鉇鯴鰤鲺鳲鳾鶳鸤]>shī;" -"[㖷㫑㵓䂖䖨䦹䶡乭十埘塒实実寔實峕时旹時榯湁湜溡炻石祏莳蒔蚀蝕遈鉐食飠饣鰣鲥鼫鼭]>shí;" -"[㕜㹬㹷䒨䦠乨使兘史始宩屎矢笶豕鉂駛驶]>shǐ;" -"[㒾㔺㮶㱁㸷㹝䁺䊓䏡䛈䟗䤱䩃䭄䰄世丗亊事仕似侍冟势勢卋卶叓呩嗜噬士奭嬕室崼市式弑弒忕恃戠戺拭揓是昰枾柹柿氏澨烒眂眎睗示筮簭翨舐舓襫視视試誓諟諡謚试谥豉貰贳軾轼适逝適遾釈释釋鈰鉃鉽銴铈飾餙餝饰鯷鳀齛𠀍]>shì;" -"[匙拾識识]>shi;" -"[収收荍]>shōu;" -"[㝊䭭垨守手掱艏首]>shǒu;" -"[㖟㥅䛵兽受售壽夀寿授涭狩獸璹痩瘦綬绶鏉]>shòu;" -"[书倏倐儵叔姝抒摅摴攄書杸枢梳樗樞橾殊殳疎疏紓綀纾舒蔬踈軗輸输陎鵨]>shū;" -"[䃞䝪䴰埱塾孰尗掓淑焂熟秫菽虪襡贖赎跾鸀]>shú;" -"[㒔㟬㯮㳆㻿䑕䞖䠱䩳婌属屬暑潻癙盨署薥薯藷蜀襩钃黍鼠鼡]>shǔ;" -"[㛸㜐㣽㶖㷂㽰㾁䆝䉀䎉䘤䜹䝂䢤䩱侸凁墅尌庶庻恕戍数數曙朮术束树樜樹沭漱潄澍濖竖竪絉荗蒁術袕裋豎述鉥錰鏣鮛鶐]>shù;" -"[刷唰]>shuā;" -"耍>shuǎ;" -"誜>shuà;" -"[孈摔縗缞衰]>shuāi;" -"甩>shuǎi;" -"[䢦卛咰帅帥繂蟀]>shuài;" -"[拴栓閂闩]>shuān;" -"[䧠涮]>shuàn;" -"[双孀孇欆礵艭雙霜驦骦鷞鸘鹴]>shuāng;" -"[㼽䗮䫪塽慡樉爽縔騻]>shuǎng;" -"[㦼䡯灀]>shuàng;" -"[誰谁]>shuí;" -"水>shuǐ;" -"[㽷䭨娷帨涗涚睡瞓祱稅税蛻蜕裞]>shuì;" -"[吮揗楯賰]>shǔn;" -"[㥧䀢䀵䑞橓瞚瞬舜蕣順顺鬊]>shùn;" -"[哾說説说]>shuō;" -"[䀥䈾䌃妁揱搠朔槊欶洬烁爍獡矟硕碩芍蒴鎙鑠铄]>shuò;" -"[丝偲凘厮厶司咝嘶噝媤廝思撕斯楒榹泀澌燍磃禗禠私簛籭糹絲緦纟缌罳蕬虒蛳蜤螄蟖蟴覗鉰鋖鐁颸飔騦鷥鸶]>sī;" -"𥐘>sí;" -"死>sǐ;" -"[㐌㕽㚶㣈㭒㸻㹑㾅䇃䎣䏤䦙亖佀俟儩兕嗣四姒娰孠寺巳杫柶汜泗泤洍涘瀃牭祀禩竢笥耜肂肆蕼貄賜赐釲鈻飤飼饲駟驷飼]>sì;" -"[倯凇娀崧嵩庺憽松枀柗梥檧淞濍硹菘蜙鍶锶鬆]>sōng;" -"[㧐㨦㩳䉥䜬傱嵷怂悚愯慫捒楤竦耸聳駷]>sǒng;" -"[㕬㮸䛦䢠宋訟誦讼诵送頌颂餸]>sòng;" -"[嗖廀廋捜搜摉溲獀艘蒐螋鄋醙鎪锼颼飕餿馊騪]>sōu;" -"[㖩㛐䈹䉤䏂䮟傁叜叟嗾擞擻櫢滫瞍籔薮藪]>sǒu;" -"[㵻瘶膄]>sòu;" -"嗽>sou;" -"[囌櫯甦稣穌苏蘇蘓酥]>sū;" -"俗>sú;" -"[㑉㑛㓘㔄㕖㜚㝛㨞㩋㪩㬘㯈㴋㴑㴼䃤䅇䌚䎘䏋䑿䔎䘻䛾䥔傃僳嗉塐塑夙嫊宿愫愬憟栜榡樕橚殐泝涑溯溸溹潥玊珟璛碿窣簌粛粟素縤肃肅膆蓿蔌藗蜶觫誎謖谡趚速遡遬鋉餗驌骕鱐鷫鹔]>sù;" -"[訴诉]>su;" -"[狻痠酸]>suān;" -"[㔯匴]>suǎn;" -"[祘笇筭算蒜]>suàn;" -"[倠哸夊攵毸浽滖濉熣眭睢綏绥芕荽荾虽雖鞖]>suī;" -"[㵦㻟䜔䢫遀隋随隨]>suí;" -"[䭉䯝巂瀡膸髄髓]>suǐ;" -"[㒸㞸㴚㻪㻽䅗䉌䍁䔹䠔䡵䥙亗埣嬘岁嵗旞檅檖歲歳澻煫燧璲瓍睟砕碎祟禭穂穗穟繀繸襚誶譢谇賥遂邃鐆鐩隧𡑞]>suì;" -"[孙孫搎槂狲猻荪蓀蕵薞飧飱]>sūn;" -"[㔼㡄㦏䁚损損榫笋筍箰簨鎨]>sǔn;" -"愻>sùn;" -"[傞唆嗍娑挱挲摍桫梭樎簑簔縮缩莏蓑趖蹜]>suō;" -"[㪽䂹䅴䈗䐝䖛䗢䞆䞽䣔䵀唢嗩所摵擌暛洓溑琐瑣璅瘷索褨鎍鎖鎻鏁鏼锁]>suǒ;" -"逤>suò;" -"嗦>suo;" -"[他嚃塌她它祂禢]>tā;" -"[㗳㺚塔墖榙]>tǎ;" -"[㒓㛥㣛㣵㧺㭼㯓㳠㳫㹺㿹䂿䈋䈳䌈䍇䍝䎓䑜䓠䜚䵬䶀䶁亣嗒嚺崉挞搨撻榻橽毾沓涾溻澾濌狧獭獺羍誻譶跶踏蹋蹹躂躢遝遢錔鎉鑉闒闥闼阘鞜鞳鮙鰨鳎龖龘]>tà;" -"[囼孡胎]>tāi;" -"[㒗㘆㙵㣍㬃㷘㸀䈚䑓䢰儓冭台坮嬯抬擡旲檯炱炲籉臺苔菭薹跆邰颱駘骀鮐鲐]>tái;" -"[㑷㥭䣭太夳忲态態曃汰泰溙燤肽舦酞鈦钛]>tài;" -"[啴嘽坍怹抩摊擹攤橝滩灘瘫癱緂舑舚譠貪贪]>tān;" -"[㲜㷋㽑䃪䉡䊤䕊倓嘾坛墰墵壇壜婒惔憛昙曇榃檀潭痰罈罎艢藫覃談譚谈谭貚郯醈醰錟鐔锬镡餤驔]>tán;" -"[㲭䆱䏙䞡䦔嗿坦忐憳憻毯璮禫膻菼袒襢贉醓黮]>tǎn;" -"[䐺䜖傝僋叹嘆埮探歎湠炭碳賧赕]>tàn;" -"[劏嘡坣汤湯羰蝪蹚鏜镗鼞]>tāng;" -"[㑽㙶㜍㭻㲥㼺䅯䉎䌅䣘䧜傏唐啺堂塘搪摚棠榶樘橖溏漟煻瑭磄禟篖糃糖糛膅膛蓎薚螗螳赯踼鄌醣鎕闛隚餹饄鶶]>táng;" -"[㒉㿩伖倘偒傥儻帑惝戃曭淌爣矘耥躺鎲钂镋𢠵]>tǎng;" -"[䟖摥烫燙趟鐋铴]>tàng;" -"[叨嫍弢慆掏搯槄洮涛滔濤瑫絛縚縧绦翢蜪詜謟轁鞱韜韬飸饕]>tāo;" -"[㹗䬞匋咷啕桃梼檮淘祹綯绹萄裪迯逃醄鋾錭陶鞀鞉饀駣騊鼗]>táo;" -"[䚯䚵䵚討讨]>tǎo;" -"[㚐套]>tào;" -"[㥂㧹忑忒慝棏特脦蚮蟘貣鋱铽鴏]>tè;" -"[膯鼟]>tēng;" -"[䒅䕨䠮䲍䲢儯幐滕漛疼籐籘縢腾蕛藤螣誊謄邆駦騰驣鰧]>téng;" -"[䴘剔梯踢鷈鷉]>tī;" -"[㖒㡗㣢䅠䔶䚣䛱䝰䣡䨑䬾偍厗啼嗁媂媞崹惿提漽珶瑅碮禔禵稊綈緹绨缇罤荑蝭褆謕趧蹄蹏遆醍鍗隄題题騠鮷鵜鶗鶙鹈鼶]>tí;" -"[䌡䣽䪆䶏体挮躰軆骵體]>tǐ;" -"[㗣㬱㯩䎮䙗䧅䯜䶑俤倜剃嚏嚔悌悐惕惖掦揥替歒殢涕睼籊薙褅趯迏逖逷銻鐟锑髢髰鬀鬄]>tì;" -"[屉屜]>ti;" -"[倎兲天婖沗添酟靔靝黇]>tiān;" -"[㧂䑚䟧䡒䡘䥖嗔塡填屇恬搷沺湉璳甛甜田畋畑盷窴胋菾闐阗鷆鷏]>tián;" -"[㐁㖭㙉㥏䄼䄽䐌䠄䣯䩄唺忝悿晪殄淟睓腆舔覥觍賟錪鍩锘靦餂]>tiǎn;" -"[㮇㶺掭煔]>tiàn;" -"[頲颋]>tian;" -"[佻庣恌挑旫祧聎蓨鮡]>tiāo;" -"[㑿㟘䎄䒒䖺䟭䩦䯾䱔䳂岧岹条條樤祒笤芀苕萔蜩趒迢鋚鎥鞗髫鰷鲦齠龆]>tiáo;" -"[㸠䠷嬥宨晀窕誂]>tiǎo;" -"[朓眺窱粜糶絩脁覜跳]>tiào;" -"[呫帖怗萜貼贴]>tiē;" -"[䥫鉄銕鋨鐡鐵铁锇驖鴩]>tiě;" -"[䴴䵿蛈飻餮]>tiè;" -"[厅厛听圢庁廰廳桯汀綎耓耵聴聼聽艼鞓]>tīng;" -"[㹶䗴䱓亭停婷嵉庭廷朾楟榳渟筳聤莛葶蜓蝏諪邒閮霆鯅鼮]>tíng;" -"[䋼䦐䵺侹挺梃涏烶珽町甼脡艇誔鋌铤]>tǐng;" -"[嗵恫樋炵熥狪痌蓪通]>tōng;" -"[㠉㠽㣚㣠㤏㮔㸗㼧㼿䂈䆚䮵䳋䴀䶱仝佟侗僮勭同哃峂峒峝庝彤晍曈朣桐氃浵潼烔燑爞犝獞瞳砼硐秱穜童筩粡絧罿膧艟茼蕫蚒詷迵酮鉖鉵銅铜餇鮦鲖鼨]>tóng;" -"[㛚㪌䆹姛捅桶筒統綂统]>tǒng;" -"[恸慟憅痛蘣衕]>tòng;" -"[偷偸婾媮鍮]>tōu;" -"[㓱㡏㢏䵉亠匬坄头投牏酘頭骰]>tóu;" -"[㪗䱏妵斢紏鈄钭黈]>tǒu;" -"[㖣䞬䟝透]>tòu;" -"[凸堗嶀廜捸涋痜禿秃突鋵]>tū;" -"[㭸㻌㻠㻬㻯䅷䖘䠈䣄䣝䤅䳜凃図图圖圗塗宊屠峹嵞庩徒怢捈揬梌湥潳瑹瘏稌筡腯荼葖蒤跿途酴鈯鍎馟駼鵚鶟鷋鷵鼵]>tú;" -"[吐唋土圡芏釷钍]>tǔ;" -"[兎兔堍菟鵵]>tù;" -"涂>tu;" -"[湍煓猯貒]>tuān;" -"[㩛䊜剸团団團慱抟摶槫漙糰鏄鷻]>tuán;" -"[䜝䵯畽疃黗]>tuǎn;" -"[彖褖]>tuàn;" -"[推蓷藬]>tuī;" -"[㢈㢑㾯㾽㿉㿗䀃䅪䍾䫋尵弚橔穨蘈蹪隤頹頺頽颓魋]>tuí;" -"[㞂㱣㾼俀僓腿蹆骽]>tuǐ;" -"[㦌㷟娧煺退駾]>tuì;" -"[吞呑啍噋旽暾朜涒炖焞]>tūn;" -"[㞘㩔㹠㼊坉屯忳臀臋芚訰豘豚軘霕飩饨魨鲀]>tún;" -"[㖔氽]>tǔn;" -"褪>tùn;" -"[仛佗侂侻咃托扥拕拖挩捝杔汑沰涶牠矺脫脱託讬飥饦馲驝]>tuō;" -"[㸰㸱㼠㾃䍫䡐䪑䭾䰿䴱坨堶岮沱沲狏砣砤碢紽袉詑跎迱酡阤陀陁鞁馱駄駞騨驒驮魠鮀鴕鸵鼉鼍鼧]>tuó;" -"[㟎䓕䲊妥媠嫷庹彵撱椭楕橢軃鰖鵎]>tuǒ;" -"[唾拓柝槖橐毤毻箨籜萚蘀跅]>tuò;" -"[駝驼]>tuo;" -"[劸嗗娲媧徍挖搲攨洼溛漥畖穵窊窪聉蛙鼃]>wā;" -"[㰪娃]>wá;" -"[㧚㼘佤咓瓦邷]>wǎ;" -"[䎳䚴䠚嗢婠淴腽膃袜襪韈韤]>wà;" -"哇>wa;" -"[㖞咼喎歪]>wāi;" -"[䠿外懀]>wài;" -"[剜帵弯彎湾潫灣蜿豌]>wān;" -"[㝴䯈丸刓完抏汍烷玩琓紈纨芄頑顽]>wán;" -"[㜶㽜㿸䂺䅋䖤䗕䘼䛷䝹䩊䳃倇埦婉宛惋挽晚晩晼梚椀浣澣琬畹皖盌睌睕碗綩綰绾脕莞菀萖踠輓鋄鋔錽鞔鯇鲩]>wǎn;" -"[㸘䥑万仴卍卐忨捥綄翫腕萬蟃貦贎鎫]>wàn;" -"[㑌尢尣尩-尫汪]>wāng;" -"[亡亾仼兦彺王莣蚟]>wáng;" -"[㓁㲿㳹㴏䋄䋞䰣往徃忹惘暀棢瀇網网罒罔菵蛧蝄誷輞辋迬魍]>wǎng;" -"[䛃䤑妄忘旺望朢迋]>wàng;" -"枉>wang;" -"[倭偎危喴威媁媙崴巍微愄揋椳楲渨溾烓煨燰碨萎葨葳蝛覣詴逶隇隈鰃鰄鳂]>wēi;" -"[㕒㣲㧑䉠䑊䔺䜅䝐䥩䧦唯囗囲围圍圩壝峗峞嵬帏帷幃惟桅欈沩洈涠湋溦潍潿濰瀢琟癓矀維维蒍蔿薇覹违違鄬醀鍏闈闱霺韋韦鮠]>wéi;" -"[㖐㞇㞑㟪㠕㢻㨊㬙㭏㱬䃬䈧䞔䪘䬐䬿䵋亹伟伪偉偽僞儰喡委娓寪尾崣嵔徫愇斖暐梶椲洧浘濻炜煒猥玮瑋痏痿硊磈緯纬腲艉芛苇荱葦蓶薳蘤蜲諉诿踓鍡隗韑韙韡韪頠颹骩-骫鮪鲔]>wěi;" -"[㥜㦣㷉䗽䘙䙿䜜䡺䪋䬑䭳䮹䲁䵳为位卫叞味喂媦嬒尉徻慰未渭為煟熭爲犚犩畏硙磑緭罻胃苿菋蔚薉藯蘶蜼螱衛衞褽謂讆讏谓贀躗躛軎轊錗鏏霨餧餵魏鮇鳚]>wèi;" -"[猬蝟]>wei;" -"[塭昷殟温溫瑥瘟瞃豱輼轀辒馧鰛鰮鳁]>wēn;" -"[䎹䎽䘇䰚彣文炆玟珳琝瘒紋纹聞芠蚉蚊螡蟁閿闅闦闻阌雯馼魰鳼鴍鴖鼤]>wén;" -"[㒚㖧㗃㝧㳷刎吻呡桽稳穏穩肳脗]>wěn;" -"[伆問妏抆揾搵汶渂熓璺紊莬问]>wèn;" -"[嗡嵡翁螉鎓霐鶲鹟]>wēng;" -"[㘢㜲䐥䤰勜塕奣暡浻滃瞈聬蓊]>wěng;" -"[瓮甕罋齆𦧅]>wèng;" -"[唩涡涹渦猧窝窩莴萵蜗蝸踒]>wō;" -"[㦱㧴䰀婑我捰]>wǒ;" -"[㠛㱧䁊䠎偓卧幄捾握斡楃沃涴渥濣焥肟臥蒦齷龌]>wò;" -"[乌剭呜嗚圬媉屋巫弙杇歍汙汚污洿烏瑦窏箼腛螐誣诬邬鄔鰞鴮鼿]>wū;" -"[㷻㹳㻍䍢䦜䫓䮏吳吴吾呉唔娪无梧毋洖浯無牾珸璑祦禑膴芜茣莁蕪蜈誈郚鋘鋙铻鯃鵐鷡鹀麌鼯]>wú;" -"[㐅㑄㬳㵲䒉䟼䡧䳇五仵侮倵儛午啎墲妩娒娬嫵嵨庑廡忤怃憮捂摀旿橆武潕玝珷甒碔舞躌鵡鹉]>wǔ;" -"[㐳㡔㽾䃖䎸䑁䛩䦍䳱俉兀勿卼坞垭塢奦婺寤屼岉嵍忢悞悟悮戊扤敄晤杌沕溩焐熃物痦矹窹粅芴蘁誤误迕逜遻鋈鎢钨阢隖雾霚霧靰騖骛齀兀]>wù;" -"[伍务務]>wu;" -"[䂀俙傒僁僖兮凞卥厀吸唏唽嘻噏夕奚娭媐嬆嬉屖嵠嶲巇希徆徯忚怷怸恓悉悕惁惜扱扸捿擕晞晰曦析桸榽樨橀欷歖氥浠淅溪潝烯焈煕熄熈熙熹-熻燨爔牺犀犧琋瓗皙睎睳瞦硒禧稀窸糦縘繥羲肹膝舾莃菥蒵蜥螅螇蟋蠵覀觹觽觿譆谿豀豨豯貕蹊酅醯錫鏭鑴锡隵雟騱驨鵗黊鼷凞𥋟]>xī;" -"[㔒㠄㤴㦻㩗㳧㵿㽯㿇䀘䏮䫣习媳嶍席昔棤椺槢檄欯漝焟焬獥瘜習蒠蓆薂袭裼襲覡觋謵趘郋鎴钖隰霫飁騽鰼鳛]>xí;" -"[䢄匸喜囍壐屣徙憘憙敼暿枲洗漇狶玺璽矖簁縰纚葈葸蓰蟢諰蹝躧酾釃霼鱚]>xǐ;" -"[㑶㙾㚛㞒㣟㤸㥡㭡㸍㹫䈪䊠䐼䓇䙽䚷䛥䜁䧍䨳䩤䮎䲪係卌呬咥喺嚊嚱墍屃屓屭忥怬恄戏戯戱戲晳椞歙汐滊潟澙熂犔盻矽磶禊稧穸系細綌繋繫细绤翕翖肸舃舄蕮虩衋覤謑赥赩郄郤鄎釳釸鎎闟阋隙隟餼饩鬩黖齂]>xì;" -"[息西]>xi;" -"[傄呷煆煵疨瞎虾蝦谺鍜閕颬]>xiā;" -"[㗇㘡㰺㽠䖎䖖䘥䛅䦖䪗䫗侠俠冾匣峡峽搳敮暇柙炠烚狎狭狹珨瑕硖硤碬磍祫笚筪縀縖翈舝蕸赮轄辖遐鎋陜霞騢魻鰕黠]>xiá;" -"閜>xiǎ;" -"[㗿㙤丅下吓嚇夏夓懗欱疜睱罅芐鏬鶷]>xià;" -"[仚僊先嘕奾嬐孅屳廯忺憸掀掺摻暹杴枮氙澖珗祅祆秈籼纎纖纤苮蓒蘐褼襳訮跹蹮躚酰銛鍁铦锨韯韱馦鮮鱻鲜]>xiān;" -"[㘅㘋㛾㡉㢺㭹㮭㳄㳭㵪䒸䕔䝨䦥䲗䶢咸唌啣嗛娴娹婱嫌嫺嫻弦憪涎燅甉痃痫癇癎瞯瞷礥絃羬胘舷葴藖蚿蛝衔衘諴賢贒贤輱銜閑閒闲鷳鷴鷼鹇鹹]>xián;" -"[㜪㧋㧥㫫㬎㭠㯀㶍㿅䉳䗾䘆䚚䜢䢾䥪䧋䧮冼尟尠崄嶮幰搟攇显櫶毨灦烍燹狝猃獫獮玁癣癬礆禒筅箲藓蘚赻跣銑鍌铣险険險韅顕顯鼸齴]>xiǎn;" -"[㔵㡾㦓㩈㪇㬗㺌䀏䁂䃱䃸䉯䏹䞁䤼䧟䨘䨷䱤䵇䶟伣俔僩僴县咞哯垷姭娊宪岘峴憲撊晛橌涀瀗献獻现現県睍粯糮絤綫線縣线缐羡羨腺臔臽苋莧蚬蜆袨誢豏轞鋧錎限陥陷霰餡馅麲𠜎]>xiàn;" -"仙>xian;" -"[㐮乡厢啌廂忀欀湘瓖相箱緗纕缃膷芗葙薌襄郷鄉鄊鄕鑲镶香驤骧麘]>xiāng;" -"[㟄䔗䜶佭庠栙祥絴翔詳详跭祥]>xiáng;" -"[㗽䊑䖮享亯响奛嶑想晑響飨餉饗饟饷鮝鯗鱶鲞]>xiǎng;" -"[㟟䐟䢽像向嚮塂巷恦曏橡珦萫蚃蟓蠁襐象鐌闀闂項项鱌]>xiàng;" -"[呺哓哮啋嘋嘐嘵嚣嚻囂婋宯宵庨彇憢撨枭枵梟櫹歊毊消潇潚瀟灱灲烋焇獢痚痟硝硣穘窙箫箾簘簫綃绡翛膮萧萷蕭藃虈虓蟏蟰蠨踃逍銷销霄驍骁髇魈鮹鴞鸮]>xiāo;" -"[㑾㚣㬵䒝洨笅訤誵郩]>xiáo;" -"[䒕䥵小晓暁曉皢筱筿篠謏]>xiǎo;" -"[㔅㗛㤊㹲䊥䕧俲傚効咲啸嘨嘯娎孝恔效敩斅斆校歗涍熽笑肖藠誟鞩]>xiào;" -"[些歇猲薛蠍褉]>xiē;" -"[㖿㙝㥟㨙㩉㩦㩪㭨䔑䕵䙎䡡䭎偕劦勰协協嗋垥奊恊愶慀拹挟挾搚携撷擷攜斜旪熁燲籺絜綊緳纈缬翓胁脅脇脥膎蝢衺襭諧谐邪鋣鞋鞵龤𩋘𩋧]>xié;" -"[㕐㝍䥱䥾写冩寫藛躠]>xiě;" -"[㒠㓔㔎㖑㙰㞕㣯㣰㦪㨝㰔㰡㳦㳿㴬㴮㴽㸉㽊䉏䉣䊝䕈䙊䙝䚳䚸䢡䦏䦑䩧䲒䵦亵伳偰卨卸塮妎媟屑屟屧嶰廨徢懈揳斺暬械楔榍榭泄泻洩渫澥瀉瀣灺焎燮爕獬疶祄禼糏紲絏絬緤绁缷薤蟹蠏衸褻謝谢躞邂鞢韰駴骱齘齥]>xiè;" -"[䜣俽兓噷妡廞心忻惞新昕杺欣歆炘盺芯莘薪訢辛鈊鋅鑫锌馨騂骍]>xīn;" -"[㚯䰼攳樳襑鄩]>xín;" -"伈>xǐn;" -"[㐰㛛㭄䒖䚱䛨䜗伩信卂囟孞焮煡脪舋衅訫軐釁阠顖馸]>xìn;" -"[垶惺星曐煋猩瑆皨篂腥蛵觪觲鍟馫鮏鯹]>xīng;" -"[㐩㓝㣜㼛䣆䤯侀刑坓型娙形洐濴烆硎筕胻行邢郉鈃鉶銒鋞钘铏陉陘雽餳饧]>xíng;" -"[㝭㨘㮐䳙擤渻睲醒]>xǐng;" -"[㓑㼬䁄䂔䓷䛭䰢倖兴姓婞嬹幸性悻杏涬緈臖興荇莕]>xìng;" -"[㐫兄兇凶匈哅忷恟汹洶胷胸訩詾讻賯]>xiōng;" -"[䧺熊赨雄]>xióng;" -"[夐敻詗诇]>xiòng;" -"[休俢修咻庥樇烌羞脩臹茠蓚貅銝鎀鏅饈馐髤髹鵂鸺]>xiū;" -"[㱙朽糔綇]>xiǔ;" -"[㗜㾋嗅嘼岫峀溴珛琇秀綉繍繡绣袖褎褏銹鏥鏽锈齅]>xiù;" -"[偦吁呴嘘噓墟媭嬃嬬幁戌揟旴晇楈欨欰歔歘疞盱稰籲縃繻胥蕦虗虚虛蝑訏譃鑐需須须驉鬚魖]>xū;" -"[䍱俆徐蒣]>xú;" -"[㑔㑯㞰㥠䅡䔓冔喣姁昫栩湑煦珝糈許詡諝许诩谞鄦醑]>xǔ;" -"[㐨㕛㖅㗵㘧㚜㜅㜿㞊㤢㦽㰲㵰㷦㺷㾥䂆䋶䘏䙒䛙䜡䢕䣱䣴䦗䦽䬔䱛䳳伵侐勖勗卹叙垿壻婿序怴恤慉敍敘旭-旯朂槒殈汿沀洫溆漵潊烅珬盢瞁瞲稸窢絮緒緖續绪续聟芧蓄藇藚訹賉酗銊頊顼魆魣鱮]>xù;" -"[佡儇吅咺塇媗宣弲愃愋揎昍晅暄梋煊瑄睻矎禤翧翾萱蕿藼蝖蠉諠諼譞谖軒轩鋗鍹鶱]>xuān;" -"[㘣㳬㹡䁢䗠䮄䲂䲻䴉䴋伭妶嫙悬懸旋暶檈漩玄玹琁璇璿蜁誸鹮]>xuán;" -"[㾌䍻䠣喧暅烜选選]>xuǎn;" -"[㧦㳙䍗䘩䝮䧎䩙䩰埍怰昡楥楦泫渲潠炫眩眴碹絢縼繏绚蔙衒讂贙鉉鏇铉镟鞙颴駽]>xuàn;" -"[削吙屵蒆辥靴鞾]>xuē;" -"[㖸㧒㶅㿱䫻䱑乴学學峃嶨斈泶澩燢茓觷雤鷽鸴𥄴]>xué;" -"[㡜䨮雪鱈鳕]>xuě;" -"[㞽䎀䤕䫼䬂䭥吷坹岤桖泬烕穴血謞趐]>xuè;" -"[勋勛勲勳坃埙塤壎壦曛焄熏燻獯矄纁臐蔒薫薰蘍醺𤑕]>xūn;" -"[㜄㝁㨚㰊㰬㽦䋸䖲䙉偱噚寻尋峋巡廵循恂挦撏旬杊枔栒桪槆橁毥洵浔潃潯灥燖珣璕畃紃荀荨蕁蟳詢询郇馴駨驯鱏鱘鲟]>xún;" -"[㢲䛜䞊䭀伨侚噀埈奞巺巽徇殉汛爋狥蕈訊訙讯賐迅迿逊遜鑂陖韗顨鵔鵕]>xùn;" -"[訓训]>xun;" -"[丫劜压圧壓孲庘押椏鴉鴨鵶鸦鸭]>yā;" -"[㧎䄰䊦伢厓堐岈崕崖枒桠涯漄牙犽猚笌芽蚜衙齖]>yá;" -"[㿿䪵厊哑唖啞庌痖瘂蕥雃雅]>yǎ;" -"[㝞㰳䅉䝟䢝䦪䯉䰲䵝亚亜亞俹嚈圠埡娅婭挜掗揠氩氬猰玡砑稏窫聐襾訝讶軋轧迓錏鐚铔齾]>yà;" -"呀>ya;" -"睚>yái;" -"[偣剦啱嫣嬮崦懕懨淹漹烟焉煙猒珚篶胭臙菸蔫鄢酀醃閹阉黭]>yān;" -"[㗴㘖㘙㫟㳂㶄㿕㿼䀋䀽䂴䇾䊙䌪䓂䕾䖗䗡䢥䤷䱲䶮严厳嚴埏塩壛壧妍姸娫娮孍岩嵒嵓巌巖巗延揅昖楌檐櫩沿湺炎狿琂盐研硏碞礹筵簷綖莚葕蔅虤蜒言詽讠郔鈆閻阎顃顏顔颜鹽麙麣]>yán;" -"[㓧㕣㚧㢂㫃㭺䁙䄋䊻䎦䗺䣍䲓乵俨偃儼兖兗剡匽厣厴噞夵奄姶嵃嶖巘巚弇愝戭扊抁掩揜曮棪椼檿沇渰渷演琰甗眼硽罨萒蝘衍裺褗躽遃郾酓隒験魇魘鰋鶠黡黤黶鼴鼹龑]>yǎn;" -"[㛪㢛㦔㬫㷔㷳㷼䂩䅧䑍䜩䢭䨄䭘䳛䳺䴏偐傿厌厭咽唁喭嚥堰墕妟姲婩嬊嬿宴彥彦恹敥晏暥曣椻滟灎灔灧灩焔焰焱燄燕爓牪砚硯艳艶艷覎觃觾諺讌讞谚谳豓豔贋贗赝酽醼釅閆闫隁雁餍饜騐騴驗驠验鳫鴈鴳鷃鷰]>yàn;" -"[佒咉央姎柍殃泱眏秧紻胦鉠鴦鸯]>yāng;" -"[㟅㬕䁑䖹䬗佯劷垟崵徉扬揚敭旸昜暘杨楊洋炀烊煬玚珜瑒疡瘍眻禓羊羏蛘諹輰鍚鐊阦阳陽霷颺飏鰑鸉]>yáng;" -"[㔦䇦䑆䒋䬬仰傟养勨坱岟慃懩抰攁氧氱炴痒癢蝆鞅養餋駚]>yǎng;" -"[㨾㺊㿮䬺䭐䵮怏恙样様樣漾瀁羕詇]>yàng;" -"[吆喓妖幺枖楆腰葽訞邀]>yāo;" -"[㨱㮁䂚䆙䉰䋂䌊䌛䍃䔄䖴䚺䚻䢣䬙倄傜嗂垚堯姚媱尧尭峣崤嶢嶤徭愮揺搖摇暚榣殽淆烑爻猇猺珧瑤瑶窑窯窰肴蘨謠謡谣軺轺遙遥邎銚铫颻飖餆餚鰩鳐𨍳]>yáo;" -"[㟱㢓㫏㫐㴭䁏䁘䆞䴠䶧仸偠咬夭婹宎岆崾抭杳柼榚殀溔眑窅窈窔舀苭蓔闄騕鷕鼼齩]>yǎo;" -"[㔽㝔㞁㵸㿑㿢䋤䑬䙅曜熎燿獟矅穾筄耀艞药葯薬藥袎要覞讑趭鑰钥靿鷂鹞]>yào;" -"[倻噎晔蠮]>yē;" -"[䓉䥺峫捓揶擨椰琊瑘耶釾鎁铘]>yé;" -"[㙒也冶吔嘢埜壄漜野]>yě;" -"[㐖㖡㖶㗼㙪㝣㥷㩎㪑㱉㸣䈎䤳䤶䥟䥡䧨䭟䲜业亱偞僷叶啘墷夜嶪嶫忦擛擪擫曄曅曗曵枼枽楪業殗殜液澲烨煠燁爗皣瞱瞸葉謁譺谒邺鄴鍱鎑鐷靥靨頁页餣饁馌驜鵺鸈]>yè;" -"[爷爺]>ye;" -"[㘈一伊依医吚咿噫壱壹夁嫛弌悘揖檹欹毉泆洢溰漪燚猗瑿祎禕稦繄蛜衣譩郼醫陭餏饻鷖鹥黟黳𣘦]>yī;" -"[㚦㝖㞔㥴㦾㰘㺿䄬䇵䐅䐖䖊䞅䩟䬁䬮䮊䱌䲑䴊乁仪侇儀冝凒匜咦圯夷姨宐宧寲峓嶬巸弬彛-彞怡恞扅暆杝枱柂桋椸歋沂沶洟熪珆瓵疑痍眤眱移簃羠胰苐萓蛦螔袘袲觺訑詒謻讉诒貤貽贻跠迻遗遺酏鈶鏔頉頤顊颐飴饴鮧鴺鸃]>yí;" -"[㕥㠯㩘㫊㰝㰻䝝䧧䰙乙以倚偯崺已庡扆扡掜攺旑旖晲栘椅檥矣礒笖肔胣舣艤苡苢蚁螘蟻衪輢轙迆迤逘釔鉯銥钇铱顗鳦齮]>yǐ;" -"[㐹㑊㑜㑥㓷㔴㖂㘁㘊㙠㙯㚤㛕㜋㜒㡫㡼㢞㣂㣻㦉㦤㱅㱲㲼㳑㴁㴒㵝㵩㶠㹓㹭㽈䄁䄿䆿䇩䉨䋚䋵䌻䎈䓃䓈䓹䔬䕍䖁䗑䗟䗷䘝䘸䝘䝯䢃䣧䦴䬥䭂䭇䭞䭿䯆䱒䴬乂义亄亦亿仡伇伿佚佾俋億兿刈劓劮勚勩呓呭呹唈嗌囈圛垼埶埸墿奕嫕嬑嬟寱屹峄嶧帟帠幆廙异弈弋役忆怈怿悒悥意憶懌懿抑抴挹捙掖撎敡斁易晹曀曎曳杙枍枻栧棭榏槷檍欭歝殔殪殹毅洂浂浥浳湙溢潩澺瀷炈焲熠熤熼燡燱獈玴異疫痬瘗瘞瘱癔益睪瞖秇穓竩緆縊繶繹绎缢羛義羿翊翌翳翼肄肊腋膉臆艗艺芅苅蓺薏藙藝蘙虉蛡蜴螠袣裔裛褹襼訲訳詍詣誼譯議讛议译诣谊豙豛豷跇軼轶逸邑醳醷釴鈠鎰鐿镒镱阣隿霬靾鞥顡饐駅驛驿骮鯣鶂鶃鷧鷾黓齸益逸𥜥]>yì;" -"宜>yi;" -"曕>yiàn;" -"鴁>yiāo;" -"[侌凐喑噾囙因垔堙姻婣峾廕愔慇摿歅殷氤洇洕湮溵瘖禋秵筃絪緸茵蒑裀諲銦铟闉阥阴陰陻隂霒霠鞇音韽韾駰骃黫]>yīn;" -"[㕂㖗㙬㝙㞤㸒㹜㹞䓄䖜䪩冘吟唫噖嚚圁垠夤婬寅崟崯斦檭殥泿淫滛烎犾狺璌碒苂荶蔩蟫訔訚訡誾鄞釿鈝銀银霪鰥鳏鷣]>yín;" -"[㐆㡥㥯㥼㦩㧈㱃㾙䇙䌥䒡䤺䨸乚尹嶾廴引檃櫽淾濦瘾癮磤紖縯纼蘟蚓螾讔趛鈏隐隠隱靷飮飲饮馻]>yǐn;" -"[㣧㪦㴈㼉䕃䚿䡛䤃䲟印垽堷慭憖憗懚朄檼湚濥猌癊窨胤茚荫蔭酳鮣]>yìn;" -"[偀嘤嚶婴媖嫈嬰孆孾应応應撄攖朠桜楧樱櫻渶煐瑛璎瓔甇甖碤礯緓纓绬缨罂罃罌膺英莺蘡蝧蠳褮譻鍈鑍锳霙韺鴬鶧鶯鷪鷹鸎鸚鹦鹰]>yīng;" -"[㢍㨕㴄㵬㹙㹚㿘䁝䃷䑉䕦䪯僌営塋嬴巆廮攍楹櫿溁滎潆濙濚瀛瀠瀯熒營瑩盁盈籝籯縈茔荥荧莹萤-萦萾藀蛍蝿螢覮謍贏赢迎]>yíng;" -"[㯋㲟䀴䨍䭊䭗巊影梬浧潁瀴璄瘿癭矨穎郢頴颍颕颖]>yǐng;" -"[㑞䁐䙬䤝噟媵摬映暎滢瀅硬膡蓥譍賏鎣鐛鞕]>yìng;" -"[蝇蠅]>ying;" -"[哟唷喲]>yō;" -"[㐯傭嗈噰墉壅庸廱慵拥擁澭瀜灉痈癕癰蕹邕郺鄘鏞镛雍雝饔𠆌]>yōng;" -"[㝘䗤喁嫆嫞槦滽牅顒颙鱅鳙]>yóng;" -"[㙲㦷㷏㽫䞻俑傛勇勈咏埇塎嵱彮恿悀惥愑愹慂搈柡栐永泳涌湧甬禜臃蛹詠踊踴鯒鲬]>yǒng;" -"[㞲㶲佣用苚醟]>yòng;" -"[优優呦嚘幽忧怮悠憂攸櫌瀀纋耰鄾麀]>yōu;" -"[㒡㕱㘥㚭㛜㫍㳺㻀㽕䑻䖻䚃䢊䢟囮尤峳怣斿楢櫾沋油浟游滺犹猶猷由疣秞肬莤莸蕕蚰蝣訧輏輶逌逰遊邮郵鈾铀駀魷鮋鯈鱿鲉]>yóu;" -"[㮋㰶㾞䅎䒴䬀䱂䳑丣偤卣唀岰庮懮有栯梄槱泑湵牖禉羐羑聈苃莠蚴蜏酉銪铕黝]>yǒu;" -"[㓜㕗㤑㹨㺠䀁䆜䛻䞥亴佑侑又右囿姷宥峟幼柚牰狖祐糿誘诱貁迶酭釉鴢鼬]>yòu;" -"友>you;" -"[唹扜毹毺淤瘀盓穻箊紆纡虶迂迃陓]>yū;" -"[㒜㚥㤤㥔㥚㥥㦛㪀㬂㬰㳛㶛㷒㺞㺮㼶䁩䂛䃋䄏䄨䍂䏸䐳䔡䗨䜽䢓䩒䰻䱷䲣于亐伃余俞兪堣堬妤娛娯娱媀嬩崳嵎嵛愉愚扵揄於旟杅桙楡楰榆欤歈歟歶渔渝湡漁澞狳玗玙瑜璵畬畭畲盂睮硢禺窬竽籅緰羭腧腴臾舁舆艅茰萸蕍蘛虞蝓螸衧褕覦觎諛謣谀踰輿轝逾邘鄃釪鍝隃隅雓雩餘馀骬髃魚鮽鰅鱼鵌鷠鸆鸒]>yú;" -"[㑨㒁㔱㙑㝢㠘㡰㣃㲾㺄㼌䣁䥏䨞与予伛俁俣偊傴噳圄圉宇寙屿峿嶼庾懙敔斔斞楀瑀瘐祤禹窳篽羽聥與萭蓹蘌螤語语貐鄅酑雨齬龉羽]>yǔ;" -"[㠨㳚㽣䁌䂊䆷䈅䉛䋖䍞䖇䘘䘱䛕䢩䨒䬄䮇䮙䴁䵫俼哊喅喐喩喻噊圫域堉墺妪嫗寓峪嶎庽彧御忬悆悇惐愈慾戫昱棛棜棫櫲欎欝欥欲毓汩浴淢淯滪澦灪焴煜燏燠爩狱獄獝玉琙瘉癒矞砡硲礇礖礜禦秗稢稶穥籞緎繘罭聿肀育芋芌茟萮蒮蓣蕷薁蜟蜮裕誉諭譽谕豫軉輍逳遇遹郁醧鈺銉鋊錥鐭钰閾阈隩霱預预飫饇饫馭騟驈驭鬰鬱鬻魊鳿鴥鴪鵒鷸鹆鹬黦龥]>yù;" -"[冤剈噮囦嬽寃悁惌棩淵渁渆渊渕灁眢箢肙葾蒬蜎蜵裷鋺駌鳶鴛鵷鸢鸳鹓鼘鼝]>yuān;" -"[㟶㥳㹉䖠䬧䲮䳒䳣元原厡厵员員园圆圎園圓圜垣塬妧媴嫄岏援杬榞榬橼櫞沅湲源溒爰猨猭猿獂笎緣縁缘羱芫萲蒝薗蚖蝝蝯螈袁貟贠轅辕邍邧鎱騵魭鶢鶰黿鼋]>yuán;" -"[䛄䛇䩩妴远逺遠]>yuǎn;" -"[㤪㥐㭇䅈䏍䬇䬼傆夗媛怨愿掾瑗禐苑衏裫褑褤謜院願]>yuàn;" -"[曰曱箹約约]>yuē;" -"[哕噦]>yuě;" -"[㜧㜰㬦㰛㹊䋐䖃䟠䠯䡇䢁䢲䤦䥃䶳刖妜岄岳嶽恱悅悦戉抈捳月枂樾泧瀹爚狘玥礿禴篗籆籥籰粤粵蘥蚎蚏越跀跃躍軏鈅鉞钺閱閲阅鸑鸙龠]>yuè;" -"[奫晕暈氲氳煴緼縕缊蒀蒕蝹贇赟]>yūn;" -"[㚃㜏䉙䢵云勻匀妘愪昀榅榲橒沄涢溳澐熅熉畇眃秐筠筼篔紜縜纭耘耺芸荺蒷蕓郧鄖鋆雲饂]>yún;" -"[䆬䇖䞫䡝䤞䦾䨶䪳傊允喗抎殒殞狁玧磒褞賱輑鈗阭陨隕霣]>yǔn;" -"[㚺㞌㟦䚋䩵䲰䵴囩夽孕恽惲愠慍枟熨緷腪蕰蕴薀藴蘊运運郓鄆酝醖醞韞韫韵韻鶤]>yùn;" -"[匝咂帀抸沞迊鉔]>zā;" -"[䕹䞙䪞偺囐嶻杂砸磼襍雑雜雥韴魳]>zá;" -"[咋鮺鲝]>zǎ;" -"[哉栽渽災灾烖賳𢦏]>zāi;" -"[㞨㱰㴓䏁䣬䮨宰崽縡]>zǎi;" -"[䵧侢傤儎再在扗載载酨]>zài;" -"[兂簪簮鐕]>zān;" -"[倃咱喒糌]>zán;" -"[㤰儧儹噆寁拶撍攅攒攢昝桚沯礸禶趱趲]>zǎn;" -"[㜺㟛㣅囋暂暫欑濽灒瓉瓒瓚穳襸讃讚賛贊赞鄼酂酇錾鏨]>zàn;" -"[匨牂羘臜臢蔵賍賘贓贜赃髒]>zāng;" -"臧>záng;" -"[駔驵]>zǎng;" -"[㘸塟奘弉脏臓臟葬銺]>zàng;" -"[傮糟蹧遭]>zāo;" -"[䥣凿醩鑿]>záo;" -"[䲃早枣棗澡璪繰缲薻藻蚤]>zǎo;" -"[㲧㿷䜊唕唣噪慥梍灶燥皁皂竃竈艁譟趮躁造髞]>zào;" -"[㖽㣱㳻䃎䇥䕉䕪䰹䶦则則啧嘖崱帻幘択择捑擇沢泽溭澤皟瞔笮箦簀耫舴荝萴蠈蠌諎謮責賾责赜迮鰂鲗]>zé;" -"[㳁仄夨庂昃昗汄稄]>zè;" -"[戝賊贼鱡]>zéi;" -"怎>zěn;" -"[䫈譖譛谮]>zèn;" -"[増增憎橧熷璔矰磳繒缯罾譄驓]>zēng;" -"㽪>zěng;" -"[䙢䰝甑贈赠]>zèng;" -"[偧剳哳喳扎抯挓揸摣柤楂樝渣猹皶皻觰齄齇]>zhā;" -"[㱜㴙䥷䵵劄札牐甴箚紥紮蚻蠿譗鍘铡閘闸霅]>zhá;" -"[㒀㡸㷢䋾䕢䛽䵙厏眨苲鮓鲊]>zhǎ;" -"[䖳䞢乍吒咤奓宱搾柵栅榨溠灹炸砟簎膪蚱詐诈醡]>zhà;" -"[捚摘斋斎榸齋]>zhāi;" -"[㡯宅礋]>zhái;" -"[窄鉙飵]>zhǎi;" -"[㩟债債寨瘵砦]>zhài;" -"[厃嶦旃旜栴毡氈氊沾瞻粘覘觇詀詹譫讝谵趈邅鉆霑飦饘驙鱣鳣鸇鹯]>zhān;" -"薝>zhán;" -"[㞡㠭䁴䎒䟋䡀䩅䱼嫸展崭嶃嶄拃搌斩斬椫榐樿橏琖皽盏盞蹍輾辗醆颭飐魙]>zhǎn;" -"[㟞㺘㻵䋎䗃䘺䪌䱠佔偡占嶘战戦戰栈桟棧椾湛站綻绽菚蘸虥虦覱蹔輚轏]>zhàn;" -"[傽墇嫜张張彰慞暲樟漳獐璋章粻蔁蟑遧鄣餦騿鱆麞]>zhāng;" -"[仉掌涨漲礃绱長长鞝]>zhǎng;" -"[㕩㙣㽴丈仗嶂帐帳幛扙杖涱痮瘬瘴瞕緔胀脹賬账障]>zhàng;" -"[妱巶招昭柖盄窼釗鉊鍣钊駋]>zhāo;" -"[㕚㺐䈃䝖找沼爪瑵菬]>zhǎo;" -"[㡽㨄㷖䃍䈇䍜䍮䮓兆召垗旐曌枛櫂炤照燳狣瞾笊箌罩羄肁肇肈詔诏赵趙雿鵫]>zhào;" -"[嗻嫬晢晣螫遮]>zhē;" -"[㞏㪿㯰䊞䎲䐑䐲䓆䝃䝕乇厇哲啠喆嚞埑悊折摺歽瓋砓磔籷耴虴蜇褶襵詟謫謺讁讋讘谪輒輙辄銸鮿鸅]>zhé;" -"[啫禇者赭踷]>zhě;" -"[䂞䏳䗪䠦䩾䵭柘檡浙烢蟅这這鷓鹧]>zhè;" -"[着著蔗]>zhe;" -"[侦偵堻媜嫃寊帪揁搸斟栕桢桭楨榛樼溱潧澵獉珍珎瑊甄眞真砧碪磌祯禎禛箴籈縝缜胗臻蒖蒧蓁薽貞贞轃遉酙針鉁錱鍼针靕駗鱵]>zhēn;" -"[㐱㪛䂦䂧䑐䪴䫬弫抌抮昣晸枕畛疹眕稹笉紾絼縥聄袗裖診诊軫轸辴鬒黰]>zhěn;" -"[㓄㣀㮳㯢㴨䊶䏖䝩䟴䨯䲴䳲侲圳塦挋振揕敒敶朕栚瑱眹蜄誫賑赈鎭鎮镇阵陣震鴆鸩]>zhèn;" -"[争佂埩姃峥崝崢征徰徴徵怔挣掙炡烝爭狰猙癥眐睁睜筝箏篜聇蒸諍诤踭鉦錚鏳钲铮鬇]>zhēng;" -"[䡕愸抍拯掟撜整氶糽]>zhěng;" -"[㡠㡧㱏㽀䂻䈣䛫䥌䥭䦛䦶帧幀政正症証證证郑鄭鋥锃鴊𠔻]>zhèng;" -"[之卮吱坧墌嬂巵搘支枝枳栀梔椥榰汁汥疧知祇祗祬秓秖秪綕織织肢胑胝脂芝蜘衼跖隻馶鳷鴟鴲鵄鸱鼅]>zhī;" -"[㙷㜼㨁䐈䟈䱥䵂侄値值儨劕埴執妷姪慹懫执摭擿柣桎植樴殖漐犆瓆瓡直禃秷稙絷縶聀职職膱蘵蛰蟄蟙褁貭踯蹠躑軄釞馽]>zhí;" -"[㧻㮹㲛䅩䇛䌤䎺䛗䳅劧厎只咫址坁夂帋恉扺抧指旨晊栺止沚洔淽滍疻砋祉紙纸芷藢襧訨趾軹轵酯阯黹]>zhǐ;" -"[㕄㗌㗧㘉㛿㝂㣥㨖㴛䄺䆈䇽䉅䉜䏄䏯䐭䑇䓌䕌䚦䝷䞃䡹䥍䦯䫕䬹䭁䱨偫傂制厔垁娡寘峙崻帙帜幟庢庤廌彘徏徝志忮憄懥挃挚掷搱摯擲旘智梽楖櫍治洷淛滞滯潌炙熫狾猘璏畤疐痔痣礩祑秩秲稚稺穉窒筫紩緻置翐胵腟膣至致臸芖蛭螲袟袠製覟觗觯觶誌豑豒豸質贄质贽跱踬躓輊轾迣郅銍鋕鑕铚锧陟雉駤騭騺驇骘鯯鴙鷙鸷]>zhì;" -"[中伀刣妐幒彸忠忪柊汷泈潨炂煄盅籦終终舯蔠螽衳衷蹱鈡銿鍾鐘钟锺鴤]>zhōng;" -"[㣫冢喠塚尰歱瘇种種肿腫踵塚]>zhǒng;" -"[㐺㲴䱰乑仲众偅堹妕媑湩狆眾祌筗緟茽蚛衆衶諥重]>zhòng;" -"[侜周啁喌州徟洀洲淍烐珘盩矪粥脽舟譸诪賙赒輈輖辀週銂騆鵃鸼]>zhōu;" -"[㛩妯軸轴]>zhóu;" -"[䎻䖞晭疛睭箒肘菷鯞]>zhǒu;" -"[㑇㑳㔌㥮㼙㾭䇠䈙䋓䐍䛆䩜䶇伷僽冑呪咒咮噣宙昼晝甃皱皺籀籒籕粙紂縐繇纣绉胄荮葤詋酎駎驟骤𤏲]>zhòu;" -"帚>zhou;" -"[侏劯朱株槠橥櫧櫫洙潴瀦猪珠硃祩秼絑茱蛛蝫蠩袾誅諸诛诸豬跦邾銖铢鮢鯺鴸鼄猪諸]>zhū;" -"[㔉䌵䕽䘚䟉䥮䮱劚灟炢烛燭爥窋竹竺笁笜築篫舳茿蓫蠋蠾趉躅逐鱁]>zhú;" -"[㵭䘢䰞丶主嘱囑壴孎宔拄斸欘渚濐煑煮瘃瞩矚砫罜詝貯贮陼麈]>zhǔ;" -"[㑏㝉㤖㧣㫂㹥㺛㾻㿾䇡䇧䍆䎷䐢䝒䝬䬡䭖伫佇住助坾嵀杼柱柷樦殶注炷疰眝祝竚筑筯箸紵紸纻羜翥苧莇蛀註諔跓軴鉒鋳鑄铸飳馵駐驻麆]>zhù;" -"[抓挝撾檛膼髽]>zhuā;" -"拽>zhuāi;" -"[跩𨋯]>zhuǎi;" -"[专叀塼嫥専專瑼甎砖磚篿耑膞蟤跧鄟顓颛鱄鷒]>zhuān;" -"[䡱孨竱転轉转]>zhuǎn;" -"[䉵僎啭囀堟撰灷瑑篆篹籑縳腞蒃襈譔賺贃赚饌馔]>zhuàn;" -"[妆妝娤庄桩梉樁粧糚荘莊装裝]>zhuāng;" -"[壮壯壵撞焋状狀]>zhuàng;" -"[娺椎追錐锥隹騅骓鴭鵻]>zhuī;" -"沝>zhuǐ;" -"[䄌坠墜惴桘甀硾礈窡笍縋缒膇諈譵贅赘鑆餟鵽]>zhuì;" -"[綴缀]>zhui;" -"[宒棆窀肫衠諄谆迍]>zhūn;" -"[准埻準綧隼鶽]>zhǔn;" -"稕>zhùn;" -"[㑁倬卓捉桌棹穛穱𠭴]>zhuō;" -"[㣿㪬㭬㺟䅵䕴䶂剢叕啄啅圴妰彴拙撯擆擢斀斫斮斱斲斵晫梲棁棳椓槕櫡汋泎浊浞涿濁濯灂灼炪烵焯琸硺禚篧籗籱絀绌罬茁蠗諑謶诼踔酌鋜錣鐯鐲镯鷟]>zhuó;" -"丵>zhuǒ;" -"劅>zhuò;" -"[兹呲咨嗞姕姿孖孜孳孶嵫栥椔淄湽滋澬玆甾禌秶稵粢紎緇缁茊茲菑葘諮谘貲資赀资趦輜輺辎鄑鈭錙鍿鎡锱镃頾頿髭鯔鲻鴜鶅鶿鷀鹚鼒齍齜龇]>zī;" -"[㜽㧗㺭䔂䘣仔吇呰啙姉姊杍梓榟滓矷秄秭笫籽紫耔胏芓茈虸訾訿釨]>zǐ;" -"[㰣㰷㱴䅆䐉倳剚字恣扻渍漬牸眥眦胔胾自茡]>zì;" -"子>zi;" -"[倧堫宗嵏嵕嵸惾朡棕椶熧猣磫稯綜緃緵综翪腙艐葼蝬豵踨踪蹤鑁騌騣骔鬃鬉鬷鯮鯼]>zōng;" -"[㢔㷓㹅䰌偬傯总惣愡捴揔搃摠熜総縂總蓗]>zǒng;" -"[䍟䝋倊昮猔疭瘲碂粽糉糭縦縱纵]>zòng;" -"[媰掫棷棸箃緅菆諏謅诌诹邹郰鄒鄹陬騶驺鯫鲰黀齱齺]>zōu;" -"[走赱]>zǒu;" -"[㔿㵵䠫奏揍]>zòu;" -"[租蒩]>zū;" -"[㞺㰵㲞䅸䚝䯿䱣卆卒哫崒崪捽族稡足踤踿蹵鎐鏃镞]>zú;" -"[䔃䖕俎爼珇祖組组詛诅阻靻]>zǔ;" -"[躜躦鑽钻]>zuān;" -"[䂎䌣籫繤纂纉纘缵]>zuǎn;" -"[䤸揝攥鑚饡]>zuàn;" -"[厜嶉樶纗羧脧蟕]>zuī;" -"[䮔嘴噿嶊嶵洅濢璻觜]>zuǐ;" -"[㝡㠑㰎䘹晬最栬槜檇檌祽絊罪蕞襊辠酔酻醉鋷錊]>zuì;" -"[尊嶟樽繜罇遵鐏鷷]>zūn;" -"[僔噂壿撙譐]>zǔn;" -"[捘燇銌鱒鳟]>zùn;" -"嘬>zuō;" -"[㸲䎰䝫䞰昨椊琢秨稓筰葃鈼]>zuó;" -"[㝾佐咗唨左毑繓]>zuǒ;" -"[㑅㘀㘴㛗㭮䋏䔘作侳做唑坐夎岝岞座怍柞祚糳胙葄袏酢阼]>zuò;" -// End RAW data for converting CJK characters - -// fallbacks - -//# | yi < i; -//# | wu < u; -//# | bi < b; -//# | ci < c; -//# | di < d; -//# | fu < f; -//# | gu < g; -//# | he < h; -//# | ji < j; -//# | ku < k; -//# | li < l; -//# | mi < m; -//# | pi < p; -//# | qi < q; -//# | l < r; -//# | si < s; -//# | ti < t; -//# | f < v; -//# | wa < w; -//# | xi < x; -//# | yi < y; -//# | zi < z; - -// filter out the half-width hangul -// :: [^\uFFBE-\uFFEE] fullwidth-halfwidth (); -//# :: (lower) ; - } -} diff --git a/icu4c/source/data/translit/t_Hani_SpHan.txt b/icu4c/source/data/translit/t_Hani_SpHan.txt deleted file mode 100644 index 14140cc4655..00000000000 --- a/icu4c/source/data/translit/t_Hani_SpHan.txt +++ /dev/null @@ -1,39 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat -// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Han_Spacedhan.txt -// Date: Fri May 28 17:07:31 2004 -//-------------------------------------------------------------------- - -// Han_Spacedhan - -t_Hani_SpHan { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Only intended for internal use -":: fullwidth-halfwidth;" - -"。 > '.';" - -"$terminalPunct = [\\\.\\\,\\\:\\\;\\\?\\\!.,:?!。、;[:Pe:][:Pf:]];" -"$initialPunct = [:Ps:][:Pi:];" - -// add space between any Han or terminal punctuation and letters, and -// between letters and Han or initial punct - -"[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;" -"[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;" - -// remove spacing between ideographs and other letters - - "< [:Ideographic:] { ' ' } [:Letter:] ;" - "< [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;" - - } -} diff --git a/icu4c/source/data/translit/t_Hebr_Latn.txt b/icu4c/source/data/translit/t_Hebr_Latn.txt deleted file mode 100644 index bf845386fbd..00000000000 --- a/icu4c/source/data/translit/t_Hebr_Latn.txt +++ /dev/null @@ -1,124 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat -// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Hebrew_Latin.txt -// Date: Fri May 28 17:07:31 2004 -//-------------------------------------------------------------------- - -// Hebrew_Latin - -t_Hebr_Latn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Transliteration table for Hebrew -// Based on the UNGEGN table at: -// http://www.eki.ee/wgrs/rom1_he.pdf -// -// Exceptions: -// - Accents are added to disambiguate letters -// - Combinations of dagesh, shin/sin dot that produce different -// letters are not yet encoded. -// -// To test, open: -// http://oss.software.ibm.com/cgi-bin/icu/tr -// Click Edit, paste in this file, Save As hebrew-latin/XXX -// (where XXX is a username) -// Now go back to the main window, and try it out. -// Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2 -// Paste in hebrew text in Input, and hit Transliterate. -// -// For more information, see" -// http://oss.software.ibm.com/icu/userguide/Transliteration.html - -":: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;" -":: nfkd (nfc) ;" -"$letterAfter = [:M:]* [:L:] ;" - -// move longer items here to avoid masking - -"ח <> ẖ ;" -"צ <> ẕ } $letterAfter;" -"ץ <> ẕ ;" -"ש <> ş ;" -"ת <> ţ ;" - -"א <> ʼ ;" -"ב <> b ;" -"ג <> g ;" -"ד <> d ;" -"ה <> h ;" -"ו <> w ;" -"ז <> z ;" -"ט <> t ;" -"י <> y ;" -"כ <> k } $letterAfter;" -"ך <> k ;" -"ל <> l ;" -"מ <> m } $letterAfter;" -"ם <> m ;" -"נ <> n } $letterAfter;" -"ן <> n ;" -"ס <> s ;" -"ע <> ʻ ;" -"פ <> p } $letterAfter;" -"ף <> p ;" -"ק <> q ;" -"ר <> r ;" - - "װ > | וו;" // HEBREW LIGATURE YIDDISH DOUBLE VAV - "ױ > | וי;" // HEBREW LIGATURE YIDDISH VAV YOD - "ײ > | יי ;" // HEBREW LIGATURE YIDDISH DOUBLE YOD - - -"ּ <> ̇ ;" // dagesh just goes to overdot for now -"ׁ <> ̌ ;" // shin dot -> sh -"ׂ <> ̂ ;" // sin dot -> s - -// points -"$above = [^[:ccc=0:][:ccc=230:]]*;" - -"‎ֲ‎ > à ;" -"‎ֲ‎ $1< a ($above) ̀;" - -"‎ָ‎ > á ;" -"‎ָ‎ $1 < a ($above) ́;" - -"‎ֱ‎ > è ;" -"‎ֱ‎ $1 < e ($above) ̀;" - -"‎ֵ‎ > é ;" -"‎ֵ‎ $1 < e ($above) ́;" - -"‎ְ‎ > e ̆ ;" -"‎ְ‎ $1 < e ($above) ̆;" - -"‎ֹ‎ > ò ;" -"‎ֹ‎ $1 < o ($above) ̀;" - -"ִ <> i ;" -"ֻ <> u ;" -"ַ <> a ;" -"ֶ <> e ;" -"ֳ <> o ;" - -"\u05BF <> ̄ ;" - -// fallbacks -"ק < c ;" -"פ < f } $letterAfter;" -"ף < f ;" -"ז < j ;" -"ו < v ;" -"כס < x ;" - -":: (lower);" -":: nfc (nfd) ;" -":: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);" - } -} diff --git a/icu4c/source/data/translit/t_Hira_Kana.txt b/icu4c/source/data/translit/t_Hira_Kana.txt deleted file mode 100644 index 46dc7e52b02..00000000000 --- a/icu4c/source/data/translit/t_Hira_Kana.txt +++ /dev/null @@ -1,223 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Hiragana_Katakana - -t_Hira_Kana { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// note: a global filter is more efficient, but MUST include all source chars -":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;" -":: NFKC ();" - -// Hiragana-Katakana - -// This is largely a one-to-one mapping, but it has a -// few kinks: - -// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no -// Hiragana equivalents. We use Hiragana wa/wi/we/wo -// (308F-3092) with a voicing mark (3099), which is -// semantically equivalent. However, this is a non- -// roundtripping transformation. - -// 2. The Katakana small ka/ke (30F5,30F6) have no -// Hiragana equiavlents. We convert them to normal -// Hiragana ka/ke (304B,3051). This is a one-way -// information-losing transformation and precludes -// round-tripping of 30F5 and 30F6. - -// 3. The combining marks 3099-309C are in the Hiragana -// block, but they apply to Katakana as well, so we -// leave them untouched. - -// 4. The Katakana prolonged sound mark 30FC doubles the -// preceding vowel. This is a one-way information- -// losing transformation from Katakana to Hiragana. - -// 5. The Katakana middle dot separates words in foreign -// expressions; we leave this unmodified. - -// The above points preclude successful round-trip -// transformations of arbitrary input text. However, -// they provide naturalistic results that should conform -// to user expectations. - - -// Combining equivalents va/vi/ve/vo -"わ゙ <> ヷ;" -"ゐ゙ <> ヸ;" -"ゑ゙ <> ヹ;" -"を゙ <> ヺ;" - -// One-to-one mappings, main block -// 3041:3094 <> 30A1:30F4 -// 309D,E <> 30FD,E -"ぁ <> ァ;" -"あ <> ア;" -"ぃ <> ィ;" -"い <> イ;" -"ぅ <> ゥ;" -"う <> ウ;" -"ぇ <> ェ;" -"え <> エ;" -"ぉ <> ォ;" -"お <> オ;" -"か <> カ;" -"が <> ガ;" -"き <> キ;" -"ぎ <> ギ;" -"く <> ク;" -"ぐ <> グ;" -"け <> ケ;" -"げ <> ゲ;" -"こ <> コ;" -"ご <> ゴ;" -"さ <> サ;" -"ざ <> ザ;" -"し <> シ;" -"じ <> ジ;" -"す <> ス;" -"ず <> ズ;" -"せ <> セ;" -"ぜ <> ゼ;" -"そ <> ソ;" -"ぞ <> ゾ;" -"た <> タ;" -"だ <> ダ;" -"ち <> チ;" -"ぢ <> ヂ;" -"っ <> ッ;" -"つ <> ツ;" -"づ <> ヅ;" -"て <> テ;" -"で <> デ;" -"と <> ト;" -"ど <> ド;" -"な <> ナ;" -"に <> ニ;" -"ぬ <> ヌ;" -"ね <> ネ;" -"の <> ノ;" -"は <> ハ;" -"ば <> バ;" -"ぱ <> パ;" -"ひ <> ヒ;" -"び <> ビ;" -"ぴ <> ピ;" -"ふ <> フ;" -"ぶ <> ブ;" -"ぷ <> プ;" -"へ <> ヘ;" -"べ <> ベ;" -"ぺ <> ペ;" -"ほ <> ホ;" -"ぼ <> ボ;" -"ぽ <> ポ;" -"ま <> マ;" -"み <> ミ;" -"む <> ム;" -"め <> メ;" -"も <> モ;" -"ゃ <> ャ;" -"や <> ヤ;" -"ゅ <> ュ;" -"ゆ <> ユ;" -"ょ <> ョ;" -"よ <> ヨ;" -"ら <> ラ;" -"り <> リ;" -"る <> ル;" -"れ <> レ;" -"ろ <> ロ;" -"ゎ <> ヮ;" -"わ <> ワ;" -"ゐ <> ヰ;" -"ゑ <> ヱ;" -"を <> ヲ;" -"ん <> ン;" -"ゔ <> ヴ;" -"ゝ <> ヽ;" -"ゞ <> ヾ;" - -// One-way Katakana-Hiragana xform of small K ka/ke to -// normal H ka/ke. -"か < ヵ;" -"け < ヶ;" - -// Katakana followed by a prolonged sound mark 30FC has -// its final vowel doubled. This is a Katakana-Hiragana -// one-way information-losing transformation. We -// include the small Katakana (e.g., small A 3041) and -// do not distinguish them from their large -// counterparts. It doesn't make sense to double a -// small counterpart vowel as a small Hiragana vowel, so -// we don't do so. In natural text this should never -// occur anyway. If a 30FC is seen without a preceding -// vowel sound (e.g., after n 30F3) we do not change it. - -//## $long = ー; - -// The following categories are Hiragana, not Katakana -// as might be expected, since by the time we get to the -// 30FC, the preceding character will have already been -// transformed to Hiragana. - -// {The following mechanically generated from the -// Unicode 3.0 data:} - -"$xa = [" -"ぁ あ か が さ ざ" -"た だ な は ば ぱ" -"ま ゃ や ら ゎ わ" -"];" - -"$xi = [" -"ぃ い き ぎ し じ" -"ち ぢ に ひ び ぴ" -"み り ゐ" -"];" - -"$xu = [" -"ぅ う く ぐ す ず" -"っ つ づ ぬ ふ ぶ" -"ぷ む ゅ ゆ る ゔ" -"];" - -"$xe = [" -"ぇ え け げ せ ぜ" -"て で ね へ べ ぺ" -"め れ ゑ" -"];" - -"$xo = [" -"ぉ お こ ご そ ぞ" -"と ど の ほ ぼ ぽ" -"も ょ よ ろ を" -"];" - -"あ < $xa {ー};" -"い < $xi {ー};" -"う < $xu {ー};" -"え < $xe {ー};" -"お < $xo {ー};" - -":: (NFKC) ;" - -// note: a global filter is more efficient, but MUST include all source chars!! -":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);" - -// eof - } -} diff --git a/icu4c/source/data/translit/t_Hira_Latn.txt b/icu4c/source/data/translit/t_Hira_Latn.txt deleted file mode 100644 index 68219de3ba8..00000000000 --- a/icu4c/source/data/translit/t_Hira_Latn.txt +++ /dev/null @@ -1,30 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Hiragana_Latin.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Hiragana_Latin - -t_Hira_Latn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -":: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ;" -":: NFD ;" - -":: Hiragana-Katakana;" -":: Katakana-Latin;" - -":: NFC ;" -":: (Lower) ;" -":: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ;" - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Beng.txt b/icu4c/source/data/translit/t_InterIndic_Beng.txt deleted file mode 100644 index 87a14ebcc57..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Beng.txt +++ /dev/null @@ -1,163 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Bengali.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Bengali - -t_InterIndic_Beng { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Bengali -//:: NFD (NFC) ; -"\uE001>\u0981;" // SIGN CANDRABINDU -"\uE002>\u0982;" // SIGN ANUSVARA -"\uE003>\u0983;" // SIGN VISARGA -"\uE004>\u0985;" // FALLBACK TO LETTER A -"\uE005>\u0985;" // LETTER A -"\uE006>\u0986;" // LETTER AA -"\uE007>\u0987;" // LETTER I -"\uE008>\u0988;" // LETTER II -"\uE009>\u0989;" // LETTER U -"\uE00A>\u098A;" // LETTER UU -"\uE00B>\u098B;" // LETTER VOCALIC R -"\uE00C>\u098C;" // LETTER VOCALIC L -"\uE00D>\u098F;" // FALLBACK -"\uE00E>\u098F;" // FALLBACK -"\uE00F>\u098F;" // LETTER E -"\uE010>\u0990;" // LETTER AI -"\uE011>\u0993;" // FALLBACK -"\uE012>\u0993;" // FALLBACK -"\uE013>\u0993;" // LETTER O -"\uE014>\u0994;" // LETTER AU -"\uE015>\u0995;" // LETTER KA -"\uE016>\u0996;" // LETTER KHA -"\uE017>\u0997;" // LETTER GA -"\uE018>\u0998;" // LETTER GHA -"\uE019>\u0999;" // LETTER NGA -"\uE01A>\u099A;" // LETTER CA -"\uE01B>\u099B;" // LETTER CHA -"\uE01C>\u099C;" // LETTER JA -"\uE01D>\u099D;" // LETTER JHA -"\uE01E>\u099E;" // LETTER NYA -"\uE01F>\u099F;" // LETTER TTA -"\uE020>\u09A0;" // LETTER TTHA -"\uE021>\u09A1;" // LETTER DDA -"\uE022>\u09A2;" // LETTER DDHA -"\uE023>\u09A3;" // LETTER NNA -"\uE024>\u09A4;" // LETTER TA -"\uE025>\u09A5;" // LETTER THA -"\uE026>\u09A6;" // LETTER DA -"\uE027>\u09A7;" // LETTER DHA -"\uE028>\u09A8;" // LETTER NA -"\uE029>\u09A8\u09BC;" // REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA -"\uE02A>\u09AA;" // LETTER PA -"\uE02B>\u09AB;" // LETTER PHA -"\uE02C>\u09AC;" // LETTER BA -"\uE02D>\u09AD;" // LETTER BHA -"\uE02E>\u09AE;" // LETTER MA -"\uE02F>\u09AF;" // LETTER YA -"\uE030>\u09B0;" // LETTER RA -"\uE031>\u09B0\u09BC;" // FALLBACK to RA -"\uE032>\u09B2;" // LETTER LA -"\uE033>\u09B2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA -"\uE034>\u09B2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA -"\uE035>\u09AC;" // REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA -"\uE036>\u09B6;" // LETTER SHA -"\uE037>\u09B7;" // LETTER SSA -"\uE038>\u09B8;" // LETTER SA -"\uE039>\u09B9;" // LETTER HA -"\uE03C>\u09BC;" // SIGN NUKTA -"\uE03D>\u09bd;" // SIGN AVAGRAHA -"\uE03E>\u09BE;" // VOWEL SIGN AA -"\uE03F>\u09BF;" // VOWEL SIGN I -"\uE040>\u09C0;" // VOWEL SIGN II -"\uE041>\u09C1;" // VOWEL SIGN U -"\uE042>\u09C2;" // VOWEL SIGN UU -"\uE043>\u09C3;" // VOWEL SIGN VOCALIC R -"\uE044>\u09C4;" // VOWEL SIGN VOCALIC RR -"\uE045>\u09C7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E -"\uE046>\u09C7;" // FALLBACK -"\uE047>\u09C7;" // VOWEL SIGN E -"\uE048>\u09C8;" // VOWEL SIGN AI -"\uE049>\u09C7\u09BE;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O -"\uE04A>\u09C7\u09BE;" // FALLBACK -"\uE04B>\u09C7\u09BE;" // VOWEL SIGN O -"\uE04C>\u09C7\u09D7;" // VOWEL SIGN AU -"\uE04D>\u09CD;" // SIGN VIRAMA -"\uE050>\u0993\u0982;" // InterIndic-Bengali: OM -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\uE055>;" // LENGTH MARK -"\uE056>\u09C8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI -"\uE057>\u09D7;" // AU LENGTH MARK -"\uE058>\u0995\u09BC;" // FALLBACK -"\uE059>\u0996\u09BC;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA -"\uE05A>\u0997\u09BC;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA -"\uE05B>\u099C\u09BC;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA -"\uE05C>\u09A1\u09BC;" // FALLBACK -"\uE05D>\u09A2\u09BC;" // LETTER RHA -"\uE05E>\u09AB\u09BC;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA -"\uE05F>\u09AF\u09BC;" // LETTER YYA -"\uE060>\u09E0;" // LETTER VOCALIC RR -"\uE061>\u09E1;" // LETTER VOCALIC LL -"\uE062>\u09E2;" // VOWEL SIGN VOCALIC L -"\uE063>\u09E3;" // VOWEL SIGN VOCALIC LL -"\uE064>\u0964;" // DANDA -"\uE065>\u0965;" // DOUBLE DANDA -"\uE066>\u09E6;" // DIGIT ZERO -"\uE067>\u09E7;" // DIGIT ONE -"\uE068>\u09E8;" // DIGIT TWO -"\uE069>\u09E9;" // DIGIT THREE -"\uE06A>\u09EA;" // DIGIT FOUR -"\uE06B>\u09EB;" // DIGIT FIVE -"\uE06C>\u09EC;" // DIGIT SIX -"\uE06D>\u09ED;" // DIGIT SEVEN -"\uE06E>\u09EE;" // DIGIT EIGHT -"\uE06F>\u09EF;" // DIGIT NINE -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u09F0;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u09F1;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>\u09F2;" // RUPEE MARK -"\ue074>\u09F3;" // RUPEE SIGN -"\ue075>\u09F4;" // CURRENCY NUMERATOR ONE -"\ue076>\u09F5;" // CURRENCY NUMERATOR TWO -"\ue077>\u09F6;" // CURRENCY NUMERATOR THREE -"\ue078>\u09F7;" // CURRENCY NUMERATOR FOUR -"\ue079>\u09F8;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>\u09F9;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>\u09FA;" // ISSHAR -"\uE07C>;" // TIPPI -"\uE07D>;" // ADDAK -"\uE07E>;" // IRI -"\uE07F>;" // URA -"\uE080>;" // EK ONKAR -"\uE081>\u09AC;" // FALLBACK FOR ORIYA LETTER WA -"0 > \u09E6;" // FALLBACK FOR TAMIL -"1 > \u09E7;" - - - - - - - - - - - -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Deva.txt b/icu4c/source/data/translit/t_InterIndic_Deva.txt deleted file mode 100644 index 7c962e33acf..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Deva.txt +++ /dev/null @@ -1,174 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Devanagari.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Devanagari - -t_InterIndic_Deva { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Devanagari -//:: NFD (NFC) ; -//Rules for Decomposed characters - "\ue028\ue03c > \u0929;" //\ue029 - "\ue030\ue03c > \u0931;" //\ue031 - "\ue033\ue03c > \u0934;" //\ue034 - "\ue015\ue03c > \u0958;" //\ue058 LETTER QA (For Urdu) - "\ue016\ue03c > \u0959;" //\ue059 LETTER KHHA (For Urdu) - "\ue017\ue03c > \u095a;" //\ue05a LETTER GHHA (For Urdu) - "\ue01c\ue03c > \u095b;" //\ue05b LETTER ZA (For Urdu) - "\ue021\ue03c > \u095c;" //\ue05c LETTER DDDHA (pronounced RRA) - "\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA) - "\ue02b\ue03c > \u095e;" //\ue05e LETTER FA - "\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA - - //Decomposed compatibility transliterations - "\ue012\ue057>\u0914;" // FALLBACK FOR TAMIL AU - "0 > \u0966;" // FALLBACK FOR TAMIL - "1 > \u0967;" - - "\ue055>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK - "\ue056>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK - "\ue057>;" // FALLBACK BLOW AWAY TAMIL AU LENGTH MARK - - "\ue001 > \u0901;" // SIGN CANDRABINDU - "\ue002 > \u0902;" // SIGN ANUSVARA - "\ue003 > \u0903;" // SIGN VISARGA - "\ue004 > \u0904;" // SIGN SHORT A - "\ue005 > \u0905;" // LETTER A - "\ue006 > \u0906;" // LETTER AA - "\ue007 > \u0907;" // LETTER I - "\ue008 > \u0908;" // LETTER II - "\ue009 > \u0909;" // LETTER U - "\ue00a > \u090a;" // LETTER UU - "\ue00b > \u090b;" // LETTER VOCALIC R - "\ue00c > \u090c;" // LETTER VOCALIC L - "\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds) - "\ue00e > \u090e;" // LETTER SHORT E(For Southern Scripts) - "\ue00f > \u090f;" // LETTER E - "\ue010 > \u0910;" // LETTER AI - "\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds) - "\ue012 > \u0912;" // LETTER SHORT O (For Southern Scripts) - "\ue013 > \u0913;" // LETTER O - "\ue014 > \u0914;" // LETTER AU - "\ue015 > \u0915;" // LETTER KA - "\ue016 > \u0916;" // LETTER KHA - "\ue017 > \u0917;" // LETTER GA - "\ue018 > \u0918;" // LETTER GHA - "\ue019 > \u0919;" // LETTER NGA - "\ue01a > \u091a;" // LETTER CA - "\ue01b > \u091b;" // LETTER CHA - "\ue01c > \u091c;" // LETTER JA - "\ue01d > \u091d;" // LETTER JHA - "\ue01e > \u091e;" // LETTER NYA - "\ue01f > \u091f;" // LETTER TTA - "\ue020 > \u0920;" // LETTER TTHA - "\ue021 > \u0921;" // LETTER DDA - "\ue022 > \u0922;" // LETTER DDHA - "\ue023 > \u0923;" // LETTER NNA - "\ue024 > \u0924;" // LETTER TA - "\ue025 > \u0925;" // LETTER THA - "\ue026 > \u0926;" // LETTER DA - "\ue027 > \u0927;" // LETTER DHA - "\ue028 > \u0928;" // LETTER NA - "\ue029 > \u0929;" // LETTER NNNA - "\ue02a > \u092a;" // LETTER PA - "\ue02b > \u092b;" // LETTER PHA - "\ue02c > \u092c;" // LETTER BA - "\ue02d > \u092d;" // LETTER BHA - "\ue02e > \u092e;" // LETTER MA - "\ue02f > \u092f;" // LETTER YA - "\ue030 > \u0930;" // LETTER RA - "\ue031 > \u0931;" // LETTER RRA (Eyelash RA for Southern scripts) - //\ue031 > \u0930; - "\ue032 > \u0932;" // LETTER LA - "\ue033 > \u0933;" // LETTER LLA - "\ue034 > \u0934;" // LETTER LLLA (LLLA for Southern scripts) - //\ue034 > \u0933; - "\ue035 > \u0935;" // LETTER VA - "\ue036 > \u0936;" // LETTER SHA - "\ue037 > \u0937;" // LETTER SSA - "\ue038 > \u0938;" // LETTER SA - "\ue039 > \u0939;" // LETTER HA - "\ue03c > \u093c;" // SIGN NUKTA - "\ue03d > \u093d;" // SIGN AVAGRAHA - "\ue03e > \u093e;" // VOWEL SIGN AA - "\ue03f > \u093f;" // VOWEL SIGN I - "\ue040 > \u0940;" // VOWEL SIGN II - "\ue041 > \u0941;" // VOWEL SIGN U - "\ue042 > \u0942;" // VOWEL SIGN UU - "\ue043 > \u0943;" // VOWEL SIGN VOCALIC R - "\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR - "\ue045 > \u0945;" // VOWEL SIGN CANDRA E - "\ue046 > \u0946;" // VOWEL SIGN SHORT E - "\ue047 > \u0947;" // VOWEL SIGN E - "\ue048 > \u0948;" // VOWEL SIGN AI - "\ue049 > \u0949;" // VOWEL SIGN CANDRA O - "\ue04a > \u094a;" // VOWEL SIGN SHORT O - "\ue04b > \u094b;" // VOWEL SIGN O - "\ue04c > \u094c;" // VOWEL SIGN AU - "\ue04d > \u094d;" // SIGN VIRAMA - "\ue050 > \u0950;" // OM - "\ue051 > \u0951;" // STRESS SIGN UDATTA - "\ue052 > \u0952;" // STRESS SIGN ANUDATTA - "\ue053 > \u0953;" // GRAVE ACCENT - "\ue054 > \u0954;" // ACUTE ACCENT - "\ue058 > \u0958;" // LETTER QA (For Urdu) - "\ue059 > \u0959;" // LETTER KHHA (For Urdu) - "\ue05a > \u095a;" // LETTER GHHA (For Urdu) - "\ue05b > \u095b;" // LETTER ZA (For Urdu) - "\ue05c > \u095c;" // LETTER DDDHA (pronounced RRA) - "\ue05d > \u095d;" // LETTER RHA (pronounced RRHA) - "\ue05e > \u095e;" // LETTER FA - "\ue05f > \u095f;" // LETTER YYA - "\ue060 > \u0960;" // LETTER VOCALIC RR - "\ue061 > \u0961;" // LETTER VOCALIC LL - "\ue062 > \u0962;" // VOWEL SIGN VOCALIC L - "\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL - "\ue064 > \u0964;" // DANDA - "\ue065 > \u0965;" // DOUBLE DANDA - "\ue066 > \u0966;" // DIGIT ZERO - "\ue067 > \u0967;" // DIGIT ONE - "\ue068 > \u0968;" // DIGIT TWO - "\ue069 > \u0969;" // DIGIT THREE - "\ue06a > \u096a;" // DIGIT FOUR - "\ue06b > \u096b;" // DIGIT FIVE - "\ue06c > \u096c;" // DIGIT SIX - "\ue06d > \u096d;" // DIGIT SEVEN - "\ue06e > \u096e;" // DIGIT EIGHT - "\ue06f > \u096f;" // DIGIT NINE - - "\ue070>\u0970;" // ABBREVIATION SIGN - "\ue071>\u0930;" // LETTER RA WITH MIDDLE DIAGONAL - "\ue072>\u0930;" // LETTER RA WITH LOWER DIAGONAL - "\ue073>;" // RUPEE MARK - "\ue074>\u0930\u0942;" // RUPEE SIGN - "\ue075>;" // CURRENCY NUMERATOR ONE - "\ue076>;" // CURRENCY NUMERATOR TWO - "\ue077>;" // CURRENCY NUMERATOR THREE - "\ue078>;" // CURRENCY NUMERATOR FOUR - "\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR - "\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN - "\ue07B>;" // ISSHAR - "\uE07C>;" // TIPPI - "\uE07D>;" // ADDAK - "\uE07E>;" // IRI - "\uE07F>;" // URA - "\uE080>;" // EK ONKAR - "\uE081>\u0935;" // FALLBACK FOR ORIYA LETTER WA - -// \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN -// :: NFC; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Gujr.txt b/icu4c/source/data/translit/t_InterIndic_Gujr.txt deleted file mode 100644 index fc008e82edd..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Gujr.txt +++ /dev/null @@ -1,154 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Gujarati.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Gujarati - -t_InterIndic_Gujr { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Gujarati -//:: NFD (NFC) ; -"\ue001>\u0a81;" // SIGN CANDRABINDU -"\ue002>\u0a82;" // SIGN ANUSVARA -"\ue003>\u0a83;" // SIGN VISARGA -"\uE004>\u0a85;" // FALLBACK TO LETTER A -"\ue005>\u0a85;" // LETTER A -"\ue006>\u0a86;" // LETTER AA -"\ue007>\u0a87;" // LETTER I -"\ue008>\u0a88;" // LETTER II -"\ue009>\u0a89;" // LETTER U -"\ue00a>\u0a8a;" // LETTER UU -"\ue00b>\u0a8b;" // LETTER VOCALIC R -"\ue00c>\u0a8c;" // LETTER VOCALIC L -"\ue00d>\u0a8d;" // GUJARATI VOWEL CANDRA E -"\ue00e>\u0a8f;" // FALLBACK -"\ue00f>\u0a8f;" // InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E) -"\ue010>\u0a90;" // LETTER AI -"\ue011>\u0a91;" // FALLBACK -"\ue012>\u0a93;" // FALLBACK -"\ue013>\u0a93;" // UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O) -"\ue014>\u0a94;" // LETTER AU -"\ue015>\u0a95;" // LETTER KA -"\ue016>\u0a96;" // LETTER KHA -"\ue017>\u0a97;" // LETTER GA -"\ue018>\u0a98;" // LETTER GHA -"\ue019>\u0a99;" // LETTER NGA -"\ue01a>\u0a9a;" // LETTER CA -"\ue01b>\u0a9b;" // LETTER CHA -"\ue01c>\u0a9c;" // LETTER JA -"\ue01d>\u0a9d;" // LETTER JHA -"\ue01e>\u0a9e;" // LETTER NYA -"\ue01f>\u0a9f;" // LETTER TTA -"\ue020>\u0aa0;" // LETTER TTHA -"\ue021>\u0aa1;" // LETTER DDA -"\ue022>\u0aa2;" // LETTER DDHA -"\ue023>\u0aa3;" // LETTER NNA -"\ue024>\u0aa4;" // LETTER TA -"\ue025>\u0aa5;" // LETTER THA -"\ue026>\u0aa6;" // LETTER DA -"\ue027>\u0aa7;" // LETTER DHA -"\ue028>\u0aa8;" // LETTER NA -"\ue029>\u0aa8\u0abc;" // FALLBACK to NA+NUKTA -"\ue02a>\u0aaa;" // LETTER PA -"\ue02b>\u0aab;" // LETTER PHA -"\ue02c>\u0aac;" // LETTER BA -"\ue02d>\u0aad;" // LETTER BHA -"\ue02e>\u0aae;" // LETTER MA -"\ue02f>\u0aaf;" // LETTER YA -"\ue030>\u0ab0;" // LETTER RA -"\ue031>\u0ab0\u0abc;" // FALLBACK -"\ue032>\u0ab2;" // LETTER LA -"\ue033>\u0ab3;" // LETTER LLA -"\ue034>\u0ab3\u0abc;" // LETTER LLLA>LETTER LLA+NUKTA -"\ue035>\u0ab5;" // LETTER VA -"\ue036>\u0ab6;" // LETTER SHA -"\ue037>\u0ab7;" // LETTER SSA -"\ue038>\u0ab8;" // LETTER SA -"\ue039>\u0ab9;" // LETTER HA -"\ue03c>\u0abc;" // SIGN NUKTA -"\ue03d>\u0abd;" // SIGN AVAGRAHA -"\ue03e>\u0abe;" // VOWEL SIGN AA -"\ue03f>\u0abf;" // VOWEL SIGN I -"\ue040>\u0ac0;" // VOWEL SIGN II -"\ue041>\u0ac1;" // VOWEL SIGN U -"\ue042>\u0ac2;" // VOWEL SIGN UU -"\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R -"\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR -"\ue045>\u0ac5;" // VOWEL SIGN CANDRA E -"\ue046>\u0ac7;" // FALLBACK -"\ue047>\u0ac7;" // InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E) -"\ue048>\u0ac8;" // VOWEL SIGN AI -"\ue049>\u0ac9;" // VOWEL SIGN CANDRA O -"\ue04a>\u0acb;" // FALLBACK -"\ue04b>\u0acb;" // UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O) -"\ue04c>\u0acc;" // VOWEL SIGN AU -"\ue04d>\u0acd;" // SIGN VIRAMA -"\ue050>\u0ad0;" // OM -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\ue055>;" // UNMAPPED InterIndic-Gujarati: LENGTH MARK -"\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI -"\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU -"\ue058>\u0a95\u0abc;" // FALLBACK -"\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA -"\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA -"\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA -"\ue05c>\u0aa1\u0abc;" // FALLBACK -"\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA -"\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA -"\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA -"\ue060>\u0ae0;" // LETTER VOCALIC RR -"\ue061>\u0ae1;" // LETTER VOCALIC LL -"\ue062>\u0abf\u0abc;" // REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA -"\ue063>\u0ac0\u0abc;" // REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA -"\uE064>\u0964;" // DANDA -"\uE065>\u0965;" // DOUBLE DANDA -"\ue066>\u0ae6;" // DIGIT ZERO -"\ue067>\u0ae7;" // DIGIT ONE -"\ue068>\u0ae8;" // DIGIT TWO -"\ue069>\u0ae9;" // DIGIT THREE -"\ue06a>\u0aea;" // DIGIT FOUR -"\ue06b>\u0aeb;" // DIGIT FIVE -"\ue06c>\u0aec;" // DIGIT SIX -"\ue06d>\u0aed;" // DIGIT SEVEN -"\ue06e>\u0aee;" // DIGIT EIGHT -"\ue06f>\u0aef;" // DIGIT NINE -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u0ab0;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u0ab0;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>;" // RUPEE MARK -"\ue074>;" // RUPEE SIGN -"\ue075>;" // CURRENCY NUMERATOR ONE -"\ue076>;" // CURRENCY NUMERATOR TWO -"\ue077>;" // CURRENCY NUMERATOR THREE -"\ue078>;" // CURRENCY NUMERATOR FOUR -"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>;" // ISSHAR -"\uE07C>;" // TIPPI -"\uE07D>;" // ADDAK -"\uE07E>;" // IRI -"\uE07F>;" // URA -"\uE080>;" // EK ONKAR -"\uE081>\u0ab5;" // FALLBACK FOR ORIYA LETTER WA -"0 > \u0ae6;" // FALLBACK FOR TAMIL -"1 > \u0ae7;" - -//\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Guru.txt b/icu4c/source/data/translit/t_InterIndic_Guru.txt deleted file mode 100644 index 0b5f0cf22a1..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Guru.txt +++ /dev/null @@ -1,163 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Gurmukhi.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Gurmukhi - -t_InterIndic_Guru { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Gurmukhi -//:: NFD (NFC) ; -"$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];" -"$consonant = [\u0A15-\u0A39];" - -"\ue001>\u0A01;" // SIGN CHANDRABINDU -//rules for BINDI - -// Anusvara is equivalent to BINDI when preceeded by a vowel -"$vowel{\ue002>\u0a02;" // SIGN ANUSVARA (\u0a02 = SIGN BINDI) -// else is equivalent to TIPPI -"$consonant{\ue002>\u0a70;" // SIGN TIPPI -"\ue002>\u0a02;" - -"\ue003>;" // FALLBACK BLOW AWAY SIGN VISARGA -"\uE004>\u0a05;" // FALLBACK TO LETTER A -"\ue005>\u0a05;" // LETTER A -"\ue006>\u0a06;" // LETTER AA -"\ue007>\u0a07;" // LETTER I -"\ue008>\u0a08;" // LETTER II -"\ue009>\u0a09;" // LETTER U -"\ue00a>\u0a0a;" // LETTER UU -"\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I -"\ue00c>\u0a33;" // FALLBACK -"\ue00d>\u0a0f;" // FALLBACK -"\ue00e>\u0a0f;" // FALLBACK -"\ue00f>\u0a0f;" // LETTER EE -"\ue010>\u0a10;" // LETTER AI -"\ue011>\u0a13;" // FALLBACK -"\ue012>\u0a13;" // FALLBACK -"\ue013>\u0a13;" // LETTER OO -"\ue014>\u0a14;" // LETTER AU -"\ue015>\u0a15;" // LETTER KA -"\ue016>\u0a16;" // LETTER KHA -"\ue017>\u0a17;" // LETTER GA -"\ue018>\u0a18;" // LETTER GHA -"\ue019>\u0a19;" // LETTER NGA -"\ue01a>\u0a1a;" // LETTER CA -"\ue01b>\u0a1b;" // LETTER CHA -"\ue01c>\u0a1c;" // LETTER JA -"\ue01d>\u0a1d;" // LETTER JHA -"\ue01e>\u0a1e;" // LETTER NYA -"\ue01f>\u0a1f;" // LETTER TTA -"\ue020>\u0a20;" // LETTER TTHA -"\ue021>\u0a21;" // LETTER DDA -"\ue022>\u0a22;" // LETTER DDHA -"\ue023>\u0a23;" // LETTER NNA -"\ue024>\u0a24;" // LETTER TA -"\ue025>\u0a25;" // LETTER THA -"\ue026>\u0a26;" // LETTER DA -"\ue027>\u0a27;" // LETTER DHA -"\ue028>\u0a28;" // LETTER NA -"\ue029>\u0a28\u0a3c;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA -"\ue02a>\u0a2a;" // LETTER PA -"\ue02b>\u0a2b;" // LETTER PHA -"\ue02c>\u0a2c;" // LETTER BA -"\ue02d>\u0a2d;" // LETTER BHA -"\ue02e>\u0a2e;" // LETTER MA -"\ue02f>\u0a2f;" // LETTER YA -"\ue030>\u0a30;" // LETTER RA -"\ue031>\u0a30\u0a3c;" // FALLBACK LETTER RA+NUKTA -"\ue032>\u0a32;" // LETTER LA -"\ue033>\u0a33;" // LETTER LLA -"\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA -"\ue035>\u0a35;" // LETTER VA -"\ue036>\u0a36;" // LETTER SHA -"\ue037>\u0a36;" // REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA -"\ue038>\u0a38;" // LETTER SA -"\ue039>\u0a39;" // LETTER HA -"\ue03c>\u0a3c;" // SIGN NUKTA -"\ue03d>;" // FALLBACK BLOW AWAY SIGN AVAGRAHA -"\ue03e>\u0a3e;" // VOWEL SIGN AA -"\ue03f>\u0a3f;" // VOWEL SIGN I -"\ue040>\u0a40;" // VOWEL SIGN II -"\ue041>\u0a41;" // VOWEL SIGN U -"\ue042>\u0a42;" // VOWEL SIGN UU -"\ue043>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R -"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR -"\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI -"\ue046>\u0a47;" // FALLABCK -"\ue047>\u0a47;" // VOWEL SIGN EE -"\ue048>\u0a48;" // VOWEL SIGN AI -"\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU -"\ue04a>\u0a4b;" // FALLBACK -"\ue04b>\u0a4b;" // VOWEL SIGN OO -"\ue04c>\u0a4c;" // VOWEL SIGN AU -"\ue04d>\u0a4d;" // SIGN VIRAMA -"\ue050>\u0a0f\u0a02;" // FALLBACK to OO+BINDI : OM -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK -"\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI -"\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU -"\ue058>\u0a15\u0a3c;" // FALLBACK RA+ NUKTA -"\ue059>\u0a59;" // LETTER KHHA -"\ue05a>\u0a5a;" // LETTER GHHA -"\ue05b>\u0a5b;" // LETTER ZA -"\ue05c>\u0a5c;" // LETTER RRA -"\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA -"\ue05e>\u0a5e;" // LETTER FA -"\ue05f>\u0a2f\u0a3c;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA -"\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I -"\ue061>\u0a32\u0a3c;" // -"\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA -"\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA -"\uE064>\u0964;" // DANDA -"\uE065>\u0965;" // DOUBLE DANDA -"\ue066>\u0a66;" // DIGIT ZERO -"\ue067>\u0a67;" // DIGIT ONE -"\ue068>\u0a68;" // DIGIT TWO -"\ue069>\u0a69;" // DIGIT THREE -"\ue06a>\u0a6a;" // DIGIT FOUR -"\ue06b>\u0a6b;" // DIGIT FIVE -"\ue06c>\u0a6c;" // DIGIT SIX -"\ue06d>\u0a6d;" // DIGIT SEVEN -"\ue06e>\u0a6e;" // DIGIT EIGHT -"\ue06f>\u0a6f;" // DIGIT NINE -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u0a30;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u0a30;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>;" // RUPEE MARK -"\ue074>;" // RUPEE SIGN -"\ue075>;" // CURRENCY NUMERATOR ONE -"\ue076>;" // CURRENCY NUMERATOR TWO -"\ue077>;" // CURRENCY NUMERATOR THREE -"\ue078>;" // CURRENCY NUMERATOR FOUR -"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>;" // ISSHAR -"\uE07C>\u0a70;" // TIPPI -"\uE07D>\u0a71;" // ADDAK -"\uE07E>\u0a72;" // IRI -"\uE07F>\u0a73;" // URA -"\uE080>\u0a74;" // EK ONKAR -"\uE081>\u0a35;" // FALLBACK FOR ORIYA LETTER WA - -"0 > \u0a66;" // FALLBACK FOR TAMIL -"1 > \u0a67;" -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Knda.txt b/icu4c/source/data/translit/t_InterIndic_Knda.txt deleted file mode 100644 index ee5d745e9b4..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Knda.txt +++ /dev/null @@ -1,157 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Kannada.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Kannada - -t_InterIndic_Knda { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Kannada -//:: NFD (NFC) ; -"\ue033\ue03c>\u0cde;" // LETTER FA -"\ue001>\u0c82;" // REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA -"\ue002>\u0c82;" // SIGN ANUSVARA -"\ue003>\u0c83;" // SIGN VISARGA -"\uE004>\u0c85;" // FALLBACK TO LETTER A -"\ue005>\u0c85;" // LETTER A -"\ue006>\u0c86;" // LETTER AA -"\ue007>\u0c87;" // LETTER I -"\ue008>\u0c88;" // LETTER II -"\ue009>\u0c89;" // LETTER U -"\ue00a>\u0c8a;" // LETTER UU -"\ue00b>\u0c8b;" // LETTER VOCALIC R -"\ue00c>\u0c8c;" // LETTER VOCALIC L -"\ue00d>\u0c8e;" // LETTER E -"\ue00e>\u0c8e;" // FALLBACK -"\ue00f>\u0c8f;" // LETTER EE -"\ue010>\u0c90;" // LETTER AI -"\ue011>\u0c92;" // FALLBACK -"\ue012>\u0c92;" // LETTER O -"\ue013>\u0c93;" // LETTER OO -"\ue014>\u0c94;" // LETTER AU -"\ue015>\u0c95;" // LETTER KA -"\ue016>\u0c96;" // LETTER KHA -"\ue017>\u0c97;" // LETTER GA -"\ue018>\u0c98;" // LETTER GHA -"\ue019>\u0c99;" // LETTER NGA -"\ue01a>\u0c9a;" // LETTER CA -"\ue01b>\u0c9b;" // LETTER CHA -"\ue01c>\u0c9c;" // LETTER JA -"\ue01d>\u0c9d;" // LETTER JHA -"\ue01e>\u0c9e;" // LETTER NYA -"\ue01f>\u0c9f;" // LETTER TTA -"\ue020>\u0ca0;" // LETTER TTHA -"\ue021>\u0ca1;" // LETTER DDA -"\ue022>\u0ca2;" // LETTER DDHA -"\ue023>\u0ca3;" // LETTER NNA -"\ue024>\u0ca4;" // LETTER TA -"\ue025>\u0ca5;" // LETTER THA -"\ue026>\u0ca6;" // LETTER DA -"\ue027>\u0ca7;" // LETTER DHA -"\ue028>\u0ca8;" // LETTER NA -"\ue029>\u0ca8;" // REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA -"\ue02a>\u0caa;" // LETTER PA -"\ue02b>\u0cab;" // LETTER PHA -"\ue02c>\u0cac;" // LETTER BA -"\ue02d>\u0cad;" // LETTER BHA -"\ue02e>\u0cae;" // LETTER MA -"\ue02f>\u0caf;" // LETTER YA -"\ue030\ue03c>\u0cb1;" -"\ue030>\u0cb0;" // LETTER RA -"\ue031>\u0cb1;" // LETTER RRA -"\ue032>\u0cb2;" // LETTER LA -"\ue033>\u0cb3;" // LETTER LLA -"\ue034>\u0cde;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA -"\ue035>\u0cb5;" // LETTER VA -"\ue036>\u0cb6;" // LETTER SHA -"\ue037>\u0cb7;" // LETTER SSA -"\ue038>\u0cb8;" // LETTER SA -"\ue039>\u0cb9;" // LETTER HA - -"\ue03c>\u0cbc;" // NUKTA -"\ue03d>\u0cbd;" // AVAGRAHA - -"\ue03e>\u0cbe;" // VOWEL SIGN AA -"\ue03f>\u0cbf;" // VOWEL SIGN I -"\ue040>\u0cc0;" // VOWEL SIGN II -"\ue041>\u0cc1;" // VOWEL SIGN U -"\ue042>\u0cc2;" // VOWEL SIGN UU -"\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R -"\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR -"\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E -"\ue046>\u0cc6;" // VOWEL SIGN E -"\ue047>\u0cc7;" // VOWEL SIGN EE -"\ue048>\u0cc8;" // VOWEL SIGN AI -"\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O -"\ue04a>\u0cca;" // VOWEL SIGN O -"\ue04b>\u0ccb;" // VOWEL SIGN OO -"\ue04c>\u0ccc;" // VOWEL SIGN AU -"\ue04d>\u0ccd;" // SIGN VIRAMA -"\ue050>\u0c93\u0c82;" // REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\ue055>\u0cd5;" // LENGTH MARK -"\ue056>\u0cd6;" // AI LENGTH MARK -"\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU -"\ue058>\u0c95;" // FALLBACK -"\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA -"\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA -"\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA -"\ue05c>\u0ca2;" // FALLBACK -"\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA -"\ue05e>\u0cde;" // LETTER FA -"\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA -"\ue060>\u0ce0;" // LETTER VOCALIC RR -"\ue061>\u0ce1;" // LETTER VOCALIC LL -"\ue062>\u0cbf;" // REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I -"\ue063>\u0cc0;" // REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II -"\ue064>'.' ;" // FALLBACK FOR DANDA -"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA -"\ue066>\u0ce6;" // DIGIT ZERO -"\ue067>\u0ce7;" // DIGIT ONE -"\ue068>\u0ce8;" // DIGIT TWO -"\ue069>\u0ce9;" // DIGIT THREE -"\ue06a>\u0cea;" // DIGIT FOUR -"\ue06b>\u0ceb;" // DIGIT FIVE -"\ue06c>\u0cec;" // DIGIT SIX -"\ue06d>\u0ced;" // DIGIT SEVEN -"\ue06e>\u0cee;" // DIGIT EIGHT -"\ue06f>\u0cef;" // DIGIT NINE -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u0cb0;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u0cb0;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>;" // RUPEE MARK -"\ue074>;" // RUPEE SIGN -"\ue075>;" // CURRENCY NUMERATOR ONE -"\ue076>;" // CURRENCY NUMERATOR TWO -"\ue077>;" // CURRENCY NUMERATOR THREE -"\ue078>;" // CURRENCY NUMERATOR FOUR -"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>;" // ISSHAR -"\uE07C>;" // TIPPI -"\uE07D>;" // ADDAK -"\uE07E>;" // IRI -"\uE07F>;" // URA -"\uE080>;" // EK ONKAR -"\uE081>\u0cb5;" // FALLBACK FOR ORIYA LETTER WA -"0 > \u0ce6;" // FALLBACK FOR TAMIL -"1 > \u0ce7;" - -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Latn.txt b/icu4c/source/data/translit/t_InterIndic_Latn.txt deleted file mode 100644 index cc703a36225..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Latn.txt +++ /dev/null @@ -1,545 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Latin.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Latin - -t_InterIndic_Latn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Latin - //\u0e00 reserved - //consonants - "$chandrabindu=\ue001;" - "$anusvara=\ue002;" - "$visarga=\ue003;" - //\u0e004 reserved - // w represents the stand-alone form - "$wa=\ue005;" - "$waa=\ue006;" - "$wi=\ue007;" - "$wii=\ue008;" - "$wu=\ue009;" - "$wuu=\ue00a;" - "$wr=\ue00b;" - "$wl=\ue00c;" - "$wce=\ue00d;" // LETTER CANDRA E - "$wse=\ue00e;" // LETTER SHORT E - "$we=\ue00f;" // \u090f LETTER E - "$wai=\ue010;" - "$wco=\ue011;" // LETTER CANDRA O - "$wso=\ue012;" // LETTER SHORT O - "$wo=\ue013;" // \u0913 LETTER O - "$wau=\ue014;" - "$ka=\ue015;" - "$kha=\ue016;" - "$ga=\ue017;" - "$gha=\ue018;" - "$nga=\ue019;" - "$ca=\ue01a;" - "$cha=\ue01b;" - "$ja=\ue01c;" - "$jha=\ue01d;" - "$nya=\ue01e;" - "$tta=\ue01f;" - "$ttha=\ue020;" - "$dda=\ue021;" - "$ddha=\ue022;" - "$nna=\ue023;" - "$ta=\ue024;" - "$tha=\ue025;" - "$da=\ue026;" - "$dha=\ue027;" - "$na=\ue028;" - "$ena=\ue029;" //compatibility - "$pa=\ue02a;" - "$pha=\ue02b;" - "$ba=\ue02c;" - "$bha=\ue02d;" - "$ma=\ue02e;" - "$ya=\ue02f;" - "$ra=\ue030;" - "$vva=\ue081;" - "$rra=\ue031;" - "$la=\ue032;" - "$lla=\ue033;" - "$ela=\ue034;" //compatibility - "$va=\ue035;" - "$sha=\ue036;" - "$ssa=\ue037;" - "$sa=\ue038;" - "$ha=\ue039;" -//\u093a Reserved -//\u093b Reserved - "$nukta=\ue03c;" - "$avagraha=\ue03d;" // SIGN AVAGRAHA - // represents the dependent form - "$aa=\ue03e;" - "$i=\ue03f;" - "$ii=\ue040;" - "$u=\ue041;" - "$uu=\ue042;" - "$rh=\ue043;" - "$lh=\ue044;" - "$ce=\ue045;" //VOWEL SIGN CANDRA E - "$se=\ue046;" //VOWEL SIGN SHORT E - "$e=\ue047;" - "$ai=\ue048;" - "$co=\ue049;" // VOWEL SIGN CANDRA O - "$so=\ue04a;" // VOWEL SIGN SHORT O - "$o=\ue04b;" // \u094b - "$au=\ue04c;" - "$virama=\ue04d;" -// \u094e Reserved -// \u094f Reserved - "$om=\ue050;" // OM - "\ue051>;" // UNMAPPED STRESS SIGN UDATTA - "\ue052>;" // UNMAPPED STRESS SIGN ANUDATTA - "\ue053>;" // UNMAPPED GRAVE ACCENT - "\ue054>;" // UNMAPPED ACUTE ACCENT - "$lm = \ue055;"// Telugu Length Mark - "$ailm=\ue056;"// AI Length Mark - "$aulm=\ue057;"// AU Length Mark - //urdu compatibity forms - "$uka=\ue058;" - "$ukha=\ue059;" - "$ugha=\ue05a;" - "$ujha=\ue05b;" - "$uddha=\ue05c;" - "$udha=\ue05d;" - "$ufa=\ue05e;" - "$uya=\ue05f;" - "$wrr=\ue060;" - "$wll=\ue061;" - "$rrh=\ue062;" - "$llh=\ue063;" - "$danda=\ue064;" - "$doubleDanda=\ue065;" - "$zero=\ue066;" // DIGIT ZERO - "$one=\ue067;" // DIGIT ONE - "$two=\ue068;" // DIGIT TWO - "$three=\ue069;" // DIGIT THREE - "$four=\ue06a;" // DIGIT FOUR - "$five=\ue06b;" // DIGIT FIVE - "$six=\ue06c;" // DIGIT SIX - "$seven=\ue06d;" // DIGIT SEVEN - "$eight=\ue06e;" // DIGIT EIGHT - "$nine=\ue06f;" // DIGIT NINE - -// \u0970>; # UNMAPPED ABBREVIATION SIGN - "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];" - "$depVowelBelow=[\ue041-\ue044];" - // $x was originally called '&'; $z was '%' - "$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];" - "$z=[bcdfghjklmnpqrstvwxyz];" - "$vowels=[aeiour\u0304\u0325\u0306];" - "$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];" - //##################################################################### - // convert from Native letters to Latin letters - //##################################################################### - //transliterations for anusvara - "$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;" - "$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;" - "$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;" - "$anusvara} [$ta$tha$da$dha$na] > n ;" - "$anusvara} [$pa$pha$ba$bha$ma] > m ;" - "$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;" - "$anusvara> m\u0307;" - - // Urdu compatibility - "$ya$nukta}$x > y\u0307 ;" - "$ya$nukta$virama > y\u0307 ;" - "$ya$nukta > y\u0307a ;" - - "$la$nukta }$x > l\u0331 ;" - "$la$nukta$virama > l\u0331 ;" - "$la$nukta > l\u0331a ;" - - "$na$nukta }$x > n\u0331 ;" - "$na$nukta$virama > n\u0331 ;" - "$na$nukta > n\u0331a ;" - - "$ena }$x > n\u0331 ;" - "$ena$virama > n\u0331 ;" - "$ena > n\u0331a ;" - "$uka > qa ;" - "$ka$nukta }$x > q ;" - "$ka$nukta$virama > q ;" - "$ka$nukta > qa ;" - "$kha$nukta }$x > k\u0331h\u0331 ;" - "$kha$nukta$virama > k\u0331h\u0331 ;" - "$kha$nukta > k\u0331h\u0331a ;" - "$ukha$virama > k\u0331h\u0331;" - "$ukha > k\u0331h\u0331a;" - "$ugha > g\u0307a ;" - "$ga$nukta }$x > g\u0307 ;" - "$ga$nukta$virama > g\u0307 ;" - "$ga$nukta > g\u0307a ;" - - "$ujha > za ;" - "$ja$nukta }$x > z ;" - "$ja$nukta$virama > z ;" - "$ja$nukta > za ;" - "$ddha$nukta}$x > r\u0323h ;" - "$ddha$nukta$virama > r\u0323h ;" - "$ddha$nukta > r\u0323ha;" - - "$uddha}$x > r\u0323 ;" - "$uddha$virama > r\u0323 ;" - "$uddha > r\u0323a;" - - "$udha > r\u0323a ;" - "$dda$nukta}$x > r\u0323 ;" - "$dda$nukta$virama > r\u0323 ;" - "$dda$nukta > r\u0323a ;" - "$pha$nukta }$x > f ;" - "$pha$nukta$virama > f ;" - "$pha$nukta > fa ;" - "$ufa }$x > f ;" - "$ufa$virama > f ;" - "$ufa > fa ;" - - "$ra$nukta}$x > r\u0331;" - "$ra$nukta$virama > r\u0331;" - "$ra$nukta > r\u0331a;" - "$lla$nukta}$x > l\u0331;" - "$lla$nukta$virama > l\u0331;" - "$lla$nukta > l\u0331a;" - - "$ela}$x > l\u0331;" - "$ela$virama > l\u0331;" - "$ela > l\u0331a;" - - "$uya}$x > y\u0307;" - "$uya$virama > y\u0307;" - "$uya > y\u0307a;" - - - // normal consonants - "$ka$virama}$ha>k'';" - "$ka}$x>k;" - "$ka$virama>k;" - "$ka>ka;" - "$kha}$x>kh;" - "$kha$virama>kh;" - "$kha>kha;" - "$ga$virama}$ha>g'';" - "$ga}$x>g;" - "$ga$virama>g;" - "$ga>ga;" - - "$gha}$x>gh;" - "$gha$virama>gh;" - "$gha>gha;" - - "$nga}$x>n\u0307;" - "$nga$virama>n\u0307;" - "$nga>n\u0307a ;" - "$ca$virama}$ha>c'';" - "$ca}$x>c;" - "$ca$virama>c;" - "$ca>ca;" - - "$cha}$x>ch;" - "$cha$virama>ch;" - "$cha>cha;" - "$ja$virama}$ha>j'';" - "$ja}$x>j;" - "$ja$virama>j;" - "$ja>ja;" - - "$jha}$x>jh;" - "$jha$virama>jh;" - "$jha>jha;" - - "$nya }$x>n\u0303 ;" - "$nya$virama>n\u0303;" - "$nya > n\u0303a ;" - - - "$tta$virama}$ha>t\u0323'';" - "$tta}$x>t\u0323;" - "$tta$virama>t\u0323;" - "$tta>t\u0323a;" - - "$ttha}$x>t\u0323h;" - "$ttha$virama>t\u0323h;" - "$ttha>t\u0323ha;" - "$dda}$x$ha>d\u0323'';" - "$dda}$x>d\u0323;" - "$dda$virama>d\u0323;" - "$dda>d\u0323a;" - - "$ddha}$x>d\u0323h;" - "$ddha$virama>d\u0323h;" - "$ddha>d\u0323ha;" - - "$nna}$x>n\u0323 ;" - "$nna$virama>n\u0323;" - "$nna>n\u0323a ;" - - - "$ta$virama}$ha>t'';" - "$ta$virama}$ttha>t'';" - "$ta$virama}$tta>t'';" - "$ta$virama}$tha>t'';" - "$ta}$x>t;" - "$ta$virama>t;" - "$ta>ta;" - "$tha}$x>th;" - "$tha$virama>th;" - "$tha>tha;" - - "$da$virama}$ha>d'';" - "$da$virama}$ddha>d'';" - "$da$virama}$dda>d'';" - "$da$virama}$dha>d'';" - "$da}$x>d;" - "$da$virama>d;" - "$da>da;" - "$dha}$x>dh;" - "$dha$virama>dh;" - "$dha>dha;" - "$na$virama}$ga>n'';" - "$na$virama}$ya>n'';" - "$na}$x>n;" - "$na$virama>n;" - "$na>na;" - - - "$pa$virama}$ha>p'';" - "$pa}$x>p;" - "$pa$virama>p;" - "$pa>pa;" - "$pha}$x>ph;" - "$pha$virama>ph;" - "$pha>pha;" - "$ba$virama}$ha>b'';" - "$ba}$x>b;" - "$ba$virama>b;" - "$ba>ba;" - - "$bha}$x>bh;" - "$bha$virama>bh;" - "$bha>bha;" - - "$ma$virama}$ma>m'';" - "$ma}$x>m;" - "$ma$virama>m;" - "$ma>ma;" - - "$ya}$x>y;" - "$ya$virama>y;" - "$ya>ya;" - "$ra$virama}$ha>r'';" - "$ra}$x>r;" - "$ra$virama>r;" - "$ra>ra;" - "$vva$virama}$ha>w\u0307'';" - "$vva}$x>w\u0307;" - "$vva$virama>w\u0307;" - "$vva>w\u0307a;" - "$rra$virama}$ha>r\u0331'';" - "$rra}$x>r\u0331;" - "$rra$virama>r\u0331;" - "$rra>r\u0331a;" - "$la$virama}$ha>l'';" - "$la}$x>l;" - "$la$virama>l;" - "$la>la;" - "$lla$virama}$ha>l\u0323'';" - "$lla}$x>l\u0323;" - "$lla$virama>l\u0323;" - "$lla>l\u0323a;" - "$va}$x>v;" - "$va$virama>v;" - "$va>va;" - "$sa$virama}$ha>s'';" - "$sa$virama}$sha>s'';" - "$sa$virama}$ssa>s'';" - "$sa$virama}$sa>s'';" - "$sa}$x>s;" - "$sa$virama>s;" - - //for gurmukhi - "$sa$nukta}$x>s\u0301;" - "$sa$nukta$virama>s\u0301;" - "$sa$nukta>s\u0301a;" - "$sa>sa;" - - "$sha}$x>s\u0301;" - "$sha$virama>s\u0301;" - "$sha>s\u0301a;" - - "$ssa}$x>s\u0323;" - "$ssa$virama>s\u0323;" - "$ssa>s\u0323a;" - "$ha}$x>h;" - "$ha$virama>h;" - "$ha>ha;" - - // dependent vowels (should never occur except following consonants) - "$forceIndependentMatra{$aa > \u0314a\u0304 ;" - "$forceIndependentMatra{$ai > \u0314ai ;" - "$forceIndependentMatra{$au > \u0314au ;" - "$forceIndependentMatra{$ii > \u0314i\u0304 ;" - "$forceIndependentMatra{$i > \u0314i ;" - "$forceIndependentMatra{$uu > \u0314u\u0304 ;" - "$forceIndependentMatra{$u > \u0314u ;" - "$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;" - "$forceIndependentMatra{$rh > \u0314r\u0325 ;" - "$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;" - "$forceIndependentMatra{$lh > \u0314l\u0325 ;" - "$forceIndependentMatra{$e > \u0314e\u0304 ;" - "$forceIndependentMatra{$o > \u0314o\u0304 ;" - //extra vowels - "$forceIndependentMatra{$ce > \u0314e\u0306 ;" - "$forceIndependentMatra{$co > \u0314o\u0306 ;" - "$forceIndependentMatra{$se > \u0314e ;" - "$forceIndependentMatra{$so > \u0314o ;" - "$forceIndependentMatra{$nukta >;" // Nukta cannot appear independently or as first character - "$forceIndependentMatra{$virama >;" // Virama cannot appear independently or as first character - "$aa > a\u0304 ;" - "$ai > ai ;" - "$au > au ;" - "$ii > i\u0304 ;" - "$i > i ;" - "$uu > u\u0304 ;" - "$u > u ;" - "$rrh > r\u0325\u0304 ;" - "$rh > r\u0325 ;" - "$llh > l\u0325\u0304 ;" - "$lh > l\u0325 ;" - "$e > e\u0304 ;" - "$o > o\u0304 ;" - //extra vowels - "$ce > e\u0306 ;" - "$co > o\u0306 ;" - "$se > e ;" - "$so > o ;" - //dependent vowels when following independent vowels. Generally Illegal only for roundtripping - "$waa} $x > a\u0304\u0314 ;" - "$wai} $x > ai\u0314 ;" - "$wau} $x > au\u0314 ;" - "$wii} $x > i\u0304\u0314 ;" - "$wi } $x > i\u0314 ;" - "$wuu} $x > u\u0304\u0314 ;" - "$wu } $x > u\u0314 ;" - "$wrr} $x > r\u0325\u0304\u0314 ;" - "$wr } $x > r\u0325\u0314 ;" - "$wll} $x > l\u0325\u0304\u0314 ;" - "$wl } $x > l\u0325\u0314 ;" - "$we } $x > e\u0304\u0314 ;" - "$wo } $x > o\u0304\u0314 ;" - "$wa } $x > a\u0314 ;" - //extra vowels - "$wce} $x > e\u0306\u0314 ;" - "$wco} $x > o\u0306\u0314 ;" - "$wse} $x > e\u0314 ;" - "$wso} $x > o\u0314 ;" - "$om} $x > ''om\u0314 ;" - - // independent vowels when preceeded by vowels - "$vowels{$waa > ''a\u0304 ;" - "$vowels{$wai > ''ai ;" - "$vowels{$wau > ''au ;" - "$vowels{$wii > ''i\u0304 ;" - "$vowels{$wi > ''i ;" - "$vowels{$wuu > ''u\u0304 ;" - "$vowels{$wu > ''u ;" - "$vowels{$wrr > ''r\u0325\u0304 ;" - "$vowels{$wr > ''r\u0325 ;" - "$vowels{$wll > ''l\u0325\u0304 ;" - "$vowels{$wl > ''l\u0325 ;" - "$vowels{$we > ''e\u0304 ;" - "$vowels{$wo > ''o\u0304 ;" - "$vowels{$wa > ''a ;" - //extra vowels - "$vowels{$wce > ''e\u0306 ;" - "$vowels{$wco > ''o\u0306 ;" - "$vowels{$wse > ''e ;" - "$vowels{$wso > ''o ;" - - // independent vowels (otherwise) - "$waa > a\u0304 ;" - "$wai > ai ;" - "$wau > au ;" - "$wii > i\u0304 ;" - "$wi > i ;" - "$wuu > u\u0304 ;" - "$wu > u ;" - "$wrr > r\u0325\u0304 ;" - "$wr > r\u0325 ;" - "$wll > l\u0325\u0304 ;" - "$wl > l\u0325 ;" - "$we > e\u0304 ;" - "$wo > o\u0304 ;" - "$wa > a ;" - //extra vowels - "$wce > e\u0306 ;" - "$wco > o\u0306 ;" - "$wse > e ;" - "$wso > o ;" - "$om > ''om ;" - - //stress marks - "$avagraha > \u0315;" - "$chandrabindu$anusvara>\u0303;" - "$chandrabindu > m\u0310;" - "$visarga>h\u0323;" - //numbers - "$zero > 0;" - "$one > 1;" - "$two > 2;" - "$three > 3;" - "$four > 4;" - "$five > 5;" - "$six > 6;" - "$seven > 7;" - "$eight > 8;" - "$nine > 9;" - "$lm >;" - "$ailm >;" - "$aulm >;" - - "$danda>'.';" - "$doubleDanda>'.';" - - "\ue070>;" // ABBREVIATION SIGN - // LETTER RA WITH MIDDLE DIAGONAL - "\ue071}$x>ra;" - "\ue071$virama>r;" - "\ue071>ra;" - // LETTER RA WITH LOWER DIAGONAL - "\ue072}$x>ra;" - "\ue072$virama>r;" - "\ue072>ra;" - - "\ue073>;" // RUPEE MARK - "\ue074>;" // RUPEE SIGN - "\ue075>;" // CURRENCY NUMERATOR ONE - "\ue076>;" // CURRENCY NUMERATOR TWO - "\ue077>;" // CURRENCY NUMERATOR THREE - "\ue078>;" // CURRENCY NUMERATOR FOUR - "\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR - "\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN - "\ue07B>;" // ISSHAR - "\uE07C>;" // TIPPI - "\uE07D>;" // ADDAK - "\uE07E>;" // IRI - "\uE07F>;" // URA - "\uE080>;" // EK ONKAR - "\uE004>;" // DEVANAGARI VOWEL SIGN SHORT A - - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Mlym.txt b/icu4c/source/data/translit/t_InterIndic_Mlym.txt deleted file mode 100644 index 061da0d4d6a..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Mlym.txt +++ /dev/null @@ -1,157 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Malayalam.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Malayalam - -t_InterIndic_Mlym { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Malayalam -//:: NFD (NFC) ; -"\ue001>\u0d02;" // REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA -"\ue002>\u0d02;" // SIGN ANUSVARA -"\ue003>\u0d03;" // SIGN VISARGA -"\uE004>\u0d05;" // FALLBACK TO LETTER A -"\ue005>\u0d05;" // LETTER A -"\ue006>\u0d06;" // LETTER AA -"\ue007>\u0d07;" // LETTER I -"\ue008>\u0d08;" // LETTER II -"\ue009>\u0d09;" // LETTER U -"\ue00a>\u0d0a;" // LETTER UU -"\ue00b>\u0d0b;" // LETTER VOCALIC R -"\ue00c>\u0d0c;" // LETTER VOCALIC L -"\ue00d>\u0d0e;" // FALLLBACK LETTER E -"\ue00e>\u0d0e;" // LETTER E -"\ue00f>\u0d0f;" // LETTER EE -"\ue010>\u0d10;" // LETTER AI -"\ue011>\u0d12;" // FALLBACK TO O -"\ue012>\u0d12;" // LETTER O -"\ue013>\u0d13;" // LETTER OO -"\ue014>\u0d14;" // LETTER AU -"\ue015>\u0d15;" // LETTER KA -"\ue016>\u0d16;" // LETTER KHA -"\ue017>\u0d17;" // LETTER GA -"\ue018>\u0d18;" // LETTER GHA -"\ue019>\u0d19;" // LETTER NGA -"\ue01a>\u0d1a;" // LETTER CA -"\ue01b>\u0d1b;" // LETTER CHA -"\ue01c>\u0d1c;" // LETTER JA -"\ue01d>\u0d1d;" // LETTER JHA -"\ue01e>\u0d1e;" // LETTER NYA -"\ue01f>\u0d1f;" // LETTER TTA -"\ue020>\u0d20;" // LETTER TTHA -"\ue021>\u0d21;" // LETTER DDA -"\ue022>\u0d22;" // LETTER DDHA -"\ue023>\u0d23;" // LETTER NNA -"\ue024>\u0d24;" // LETTER TA -"\ue025>\u0d25;" // LETTER THA -"\ue026>\u0d26;" // LETTER DA -"\ue027>\u0d27;" // LETTER DHA -"\ue028>\u0d28;" // LETTER NA -"\ue029>\u0d28;" // REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA -"\ue02a>\u0d2a;" // LETTER PA -"\ue02b>\u0d2b;" // LETTER PHA -"\ue02c>\u0d2c;" // LETTER BA -"\ue02d>\u0d2d;" // LETTER BHA -"\ue02e>\u0d2e;" // LETTER MA -"\ue02f>\u0d2f;" // LETTER YA -"\ue030\ue03c>\u0d31;" -"\ue030>\u0d30;" // LETTER RA -"\ue031>\u0d31;" // LETTER RRA -"\ue032>\u0d32;" // LETTER LA -"\ue033\ue03c>\u0d34;" -"\ue033>\u0d33;" // LETTER LLA -"\ue034>\u0d34;" // LETTER LLLA -"\ue035>\u0d35;" // LETTER VA -"\ue036>\u0d36;" // LETTER SHA -"\ue037>\u0d37;" // LETTER SSA -"\ue038>\u0d38;" // LETTER SA -"\ue039>\u0d39;" // LETTER HA - -"\ue03c>;" // FALLBACK BLOW AWAY NUKTA -"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA - -"\ue03e>\u0d3e;" // VOWEL SIGN AA -"\ue03f>\u0d3f;" // VOWEL SIGN I -"\ue040>\u0d40;" // VOWEL SIGN II -"\ue041>\u0d41;" // VOWEL SIGN U -"\ue042>\u0d42;" // VOWEL SIGN UU -"\ue043>\u0d43;" // VOWEL SIGN VOCALIC R -"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR -"\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA -"\ue046>\u0d46;" // VOWEL SIGN E -"\ue047>\u0d47;" // VOWEL SIGN EE -"\ue048>\u0d48;" // VOWEL SIGN AI -"\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO -"\ue04a>\u0d4a;" // VOWEL SIGN O -"\ue04b>\u0d4b;" // VOWEL SIGN OO -"\ue04c>\u0d4c;" // VOWEL SIGN AU -"\ue04d>\u0d4d;" // SIGN VIRAMA -"\ue050>\u0d13\u0d02;" // UNMAPPED InterIndic-Malayalam: OM -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK -"\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI -"\ue057>\u0d57;" // AU LENGTH MARK -"\ue058>\u0d15;" // FALLBACK -"\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA -"\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA -"\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA -"\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA -"\ue05c>\u0d21;" // FALLBACK -"\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA -"\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA -"\ue060>\u0d60;" // LETTER VOCALIC RR -"\ue061>\u0d61;" // LETTER VOCALIC LL -"\ue062>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L -"\ue063>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL -"\ue064>'.' ;" // FALLBACK FOR DANDA -"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA -"\ue066>\u0d66;" // DIGIT ZERO -"\ue067>\u0d67;" // DIGIT ONE -"\ue068>\u0d68;" // DIGIT TWO -"\ue069>\u0d69;" // DIGIT THREE -"\ue06a>\u0d6a;" // DIGIT FOUR -"\ue06b>\u0d6b;" // DIGIT FIVE -"\ue06c>\u0d6c;" // DIGIT SIX -"\ue06d>\u0d6d;" // DIGIT SEVEN -"\ue06e>\u0d6e;" // DIGIT EIGHT -"\ue06f>\u0d6f;" // DIGIT NINE -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u0d30;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u0d30;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>;" // RUPEE MARK -"\ue074>;" // RUPEE SIGN -"\ue075>;" // CURRENCY NUMERATOR ONE -"\ue076>;" // CURRENCY NUMERATOR TWO -"\ue077>;" // CURRENCY NUMERATOR THREE -"\ue078>;" // CURRENCY NUMERATOR FOUR -"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>;" // ISSHAR -"\uE07C>;" // TIPPI -"\uE07D>;" // ADDAK -"\uE07E>;" // IRI -"\uE07F>;" // URA -"\uE080>;" // EK ONKAR -"\uE081>\u0d35;" // FALLBACK FOR ORIYA LETTER WA -"0 > \u0d66;" // FALLBACK FOR TAMIL -"1 > \u0d67;" - -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Orya.txt b/icu4c/source/data/translit/t_InterIndic_Orya.txt deleted file mode 100644 index ee6438dc251..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Orya.txt +++ /dev/null @@ -1,153 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Oriya.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Oriya - -t_InterIndic_Orya { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Oriya -//:: NFD (NFC) ; -"\ue001>\u0b01;" // SIGN CANDRABINDU -"\ue002>\u0b02;" // SIGN ANUSVARA -"\ue003>\u0b03;" // SIGN VISARGA -"\uE004>\u0b05;" // FALLBACK TO LETTER A -"\ue005>\u0b05;" // LETTER A -"\ue006>\u0b06;" // LETTER AA -"\ue007>\u0b07;" // LETTER I -"\ue008>\u0b08;" // LETTER II -"\ue009>\u0b09;" // LETTER U -"\ue00a>\u0b0a;" // LETTER UU -"\ue00b>\u0b0b;" // LETTER VOCALIC R -"\ue00c>\u0b0c;" // LETTER VOCALIC L -"\ue00d>\u0b0f;" // FALLBACK -"\ue00e>\u0b0f;" // FALLBACK -"\ue00f>\u0b0f;" // LETTER E -"\ue010>\u0b10;" // LETTER AI -"\ue011>\u0b13;" // FALLBACK -"\ue012>\u0b13;" // FALLBACK -"\ue013>\u0b13;" // FALLBACK LETTER OO (\u0b13 = LETTER O) -"\ue014>\u0b14;" // LETTER AU -"\ue015>\u0b15;" // LETTER KA -"\ue016>\u0b16;" // LETTER KHA -"\ue017>\u0b17;" // LETTER GA -"\ue018>\u0b18;" // LETTER GHA -"\ue019>\u0b19;" // LETTER NGA -"\ue01a>\u0b1a;" // LETTER CA -"\ue01b>\u0b1b;" // LETTER CHA -"\ue01c>\u0b1c;" // LETTER JA -"\ue01d>\u0b1d;" // LETTER JHA -"\ue01e>\u0b1e;" // LETTER NYA -"\ue01f>\u0b1f;" // LETTER TTA -"\ue020>\u0b20;" // LETTER TTHA -"\ue021>\u0b21;" // LETTER DDA -"\ue022>\u0b22;" // LETTER DDHA -"\ue023>\u0b23;" // LETTER NNA -"\ue024>\u0b24;" // LETTER TA -"\ue025>\u0b25;" // LETTER THA -"\ue026>\u0b26;" // LETTER DA -"\ue027>\u0b27;" // LETTER DHA -"\ue028>\u0b28;" // LETTER NA -"\ue029>\u0b28\u0b3c;" // FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA -"\ue02a>\u0b2a;" // LETTER PA -"\ue02b>\u0b2b;" // LETTER PHA -"\ue02c>\u0b2c;" // LETTER BA -"\ue02d>\u0b2d;" // LETTER BHA -"\ue02e>\u0b2e;" // LETTER MA -"\ue02f>\u0b2f;" // LETTER YA -"\ue030>\u0b30;" // LETTER RA -"\ue031>\u0b5c;" // LETTER RRA -"\ue032>\u0b32;" // LETTER LA -"\ue033>\u0b33;" // LETTER LLA -"\ue034>\u0b33\u0b3c;" // FALLBACK LETTER LLLA>LETTER LLA -"\ue035>\u0b35;" // LETTER VA -"\ue036>\u0b36;" // LETTER SHA -"\ue037>\u0b37;" // LETTER SSA -"\ue038>\u0b38;" // LETTER SA -"\ue039>\u0b39;" // LETTER HA -"\ue03c>\u0b3c;" // SIGN NUKTA -"\ue03d>\u0b3d;" // SIGN AVAGRAHA -"\ue03e>\u0b3e;" // VOWEL SIGN AA -"\ue03f>\u0b3f;" // VOWEL SIGN I -"\ue040>\u0b40;" // VOWEL SIGN II -"\ue041>\u0b41;" // VOWEL SIGN U -"\ue042>\u0b42;" // VOWEL SIGN UU -"\ue043>\u0b43;" // VOWEL SIGN VOCALIC R -"\ue044>\u0b43\u0b3c;" // FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA -"\ue045>\u0b47;" // FALLBACK -"\ue046>\u0b47;" // FALLBACK -"\ue047>\u0b47;" // VOWEL SIGN E -"\ue048>\u0b48;" // VOWEL SIGN AI -"\ue049>\u0b4b;" // FALLBACK -"\ue04a>\u0b4b;" // FALLBACK -"\ue04b>\u0b4b;" // VOWEL SIGN E -"\ue04c>\u0b4c;" // VOWEL SIGN AU -"\ue04d>\u0b4d;" // SIGN VIRAMA -"\ue050>\u0b13\u0b01;" // FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\ue055>;" // UNMAPPED InterIndic-Oriya: LENGTH MARK -"\ue056>\u0b56;" // AI LENGTH MARK -"\ue057>\u0b57;" // AU LENGTH MARK -"\ue059>\u0b16\u0b3c;" // FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA -"\ue058>\u0b15\u0b3c;" // FALLBACK -"\ue05a>\u0b17\u0b3c;" // FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA -"\ue05b>\u0b1c\u0b3c;" // FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA -"\ue05c>\u0b21\u0b3c;" // FALLBACK -"\ue05d>\u0b5d;" // LETTER RHA -"\ue05e>\u0b2b\u0b3c;" // FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA -"\ue05f>\u0b5f;" // LETTER YYA -"\ue060>\u0b60;" // LETTER VOCALIC RR -"\ue061>\u0b61;" // LETTER VOCALIC LL -"\ue062>\u0b56\u0b3c;" // FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA -"\ue063>\u0b57\u0b3c;" // FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA -"\uE064>\u0964;" // DANDA -"\uE065>\u0965;" // DOUBLE DANDA -"\ue066>\u0b66;" // DIGIT ZERO -"\ue067>\u0b67;" // DIGIT ONE -"\ue068>\u0b68;" // DIGIT TWO -"\ue069>\u0b69;" // DIGIT THREE -"\ue06a>\u0b6a;" // DIGIT FOUR -"\ue06b>\u0b6b;" // DIGIT FIVE -"\ue06c>\u0b6c;" // DIGIT SIX -"\ue06d>\u0b6d;" // DIGIT SEVEN -"\ue06e>\u0b6e;" // DIGIT EIGHT -"\ue06f>\u0b6f;" // DIGIT NINE -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u0b30;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u0b30;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>;" // RUPEE MARK -"\ue074>;" // RUPEE SIGN -"\ue075>;" // CURRENCY NUMERATOR ONE -"\ue076>;" // CURRENCY NUMERATOR TWO -"\ue077>;" // CURRENCY NUMERATOR THREE -"\ue078>;" // CURRENCY NUMERATOR FOUR -"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>\u0B70;" // ISSHAR -"\uE07C>;" // TIPPI -"\uE07D>;" // ADDAK -"\uE07E>;" // IRI -"\uE07F>;" // URA -"\uE080>;" // EK ONKAR -"\uE081>\u0B71;" // LETTER WA -"0 > \u0b66;" // FALLBACK FOR TAMIL -"1 > \u0b67;" - -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Taml.txt b/icu4c/source/data/translit/t_InterIndic_Taml.txt deleted file mode 100644 index 3a5617e5139..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Taml.txt +++ /dev/null @@ -1,167 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Tamil.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Tamil - -t_InterIndic_Taml { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Tamil -//:: NFD (NFC) ; -"\ue001>\u0b82;" // FALLBACK SIGN CANDRABINDU -"\ue002>\u0b82;" // SIGN ANUSVARA -"\ue003>\u0b83;" // SIGN VISARGA -"\uE004>\u0b85;" // FALLBACK TO LETTER A -"\ue005>\u0b85;" // LETTER A -"\ue006>\u0b86;" // LETTER AA -"\ue007>\u0b87;" // LETTER I -"\ue008>\u0b88;" // LETTER II -"\ue009>\u0b89;" // LETTER U -"\ue00a>\u0b8a;" // LETTER UU -"\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I -"\ue00c>\u0bb2;" // FALLBACK LETTER LA -"\ue00d>\u0b8f;" // FALLBACK -"\ue00e>\u0b8e;" // LETTER E -"\ue00f>\u0b8f;" // LETTER EE -"\ue010>\u0b90;" // LETTER AI -"\ue011>\u0b92;" // FALLBACK -"\ue012>\u0b92;" // LETTER O -"\ue013>\u0b93;" // LETTER OO -"\ue014>\u0b94;" // LETTER AU -"\ue015>\u0b95;" // LETTER KA -"\ue016>\u0b95;" // REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA -"\ue017>\u0b95;" // REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA -"\ue018>\u0b95;" // REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA -"\ue019>\u0b99;" // LETTER NGA -"\ue01a>\u0b9a;" // LETTER CA -"\ue01b>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA -"\ue01c>\u0b9c;" // LETTER JA -"\ue01d>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA -"\ue01e>\u0b9e;" // LETTER NYA -"\ue01f>\u0b9f;" // LETTER TTA -"\ue020>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA -"\ue021>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA -"\ue022>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA -"\ue023>\u0ba3;" // LETTER NNA -"\ue024>\u0ba4;" // LETTER TA -"\ue025>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA -"\ue026>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA -"\ue027>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA -"\ue028\ue03c>\u0ba9;" -"\ue028>\u0ba8;" // LETTER NA -"\ue029>\u0ba9;" // LETTER NNNA -"\ue02a>\u0baa;" // LETTER PA -"\ue02b>\u0baa;" // REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA -"\ue02c>\u0baa;" // REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA -"\ue02d>\u0baa;" // REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA -"\ue02e>\u0bae;" // LETTER MA -"\ue02f>\u0baf;" // LETTER YA -"\ue030\ue03c>\u0bb1;" -"\ue030>\u0bb0;" // LETTER RA -"\ue031>\u0bb1;" // LETTER RRA -"\ue032>\u0bb2;" // LETTER LA -"\ue033\ue03c>\u0bb4;" -"\ue033>\u0bb3;" // LETTER LLA -"\ue034>\u0bb4;" // LETTER LLLA -"\ue035>\u0bb5;" // LETTER VA -"\ue036>\u0bb7;" // REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA -"\ue037>\u0bb7;" // LETTER SSA -"\ue038>\u0bb8;" // LETTER SA -"\ue039>\u0bb9;" // LETTER HA - -"\ue03c>;" // FALLBACK BLOW AWAY NUKTA -"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA - -"\ue03e>\u0bbe;" // VOWEL SIGN AA -"\ue03f>\u0bbf;" // VOWEL SIGN I -"\ue040>\u0bc0;" // VOWEL SIGN II -"\ue041>\u0bc1;" // VOWEL SIGN U -"\ue042>\u0bc2;" // VOWEL SIGN UU -"\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I -"\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I -"\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA -"\ue046>\u0bc6;" // VOWEL SIGN E -"\ue047>\u0bc7;" // VOWEL SIGN EE -"\ue048>\u0bc8;" // VOWEL SIGN AI -"\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA -"\ue04a>\u0bca;" // VOWEL SIGN O -"\ue04b>\u0bcb;" // VOWEL SIGN OO -"\ue04c>\u0bcc;" // VOWEL SIGN AU -"\ue04d>\u0bcd;" // SIGN VIRAMA -"\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\ue055>;" // UNMAPPED InterIndic-Tamil: LENGTH MARK -"\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI -"\ue057>\u0bd7;" // AU LENGTH MARK -"\ue058>\u0b95;" // FALLBACK -"\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA -"\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA -"\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA -"\ue05c>\u0ba4;" // FALLBACK -"\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA -"\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA -"\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA -"\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I -"\ue061>\u0bb3;" // FALLBACK LETTER LLA -"\ue062>\u0bbf;" // FALLBACK VOWEL SIGN VOCALIC L -"\ue063>\u0bc0;" // FALLBACK VOWEL SIGN VOCALIC LL -"\ue064>'.' ;" // FALLBACK FOR DANDA -"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA - -"\ue066>\u0030;" // FALLBACK DIGIT ZERO - -"\ue067\ue066\ue066\ue066>\u0bF2;" -"\ue067\ue066\ue066>\u0bf1;" -"\ue067\ue066>\u0bF0;" - -"\ue067>\u0be7;" // DIGIT ONE -"\ue068>\u0be8;" // DIGIT TWO -"\ue069>\u0be9;" // DIGIT THREE -"\ue06a>\u0bea;" // DIGIT FOUR -"\ue06b>\u0beb;" // DIGIT FIVE -"\ue06c>\u0bec;" // DIGIT SIX -"\ue06d>\u0bed;" // DIGIT SEVEN -"\ue06e>\u0bee;" // DIGIT EIGHT -"\ue06f>\u0bef;" // DIGIT NINE - -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u0bc0;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u0bc0;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>;" // RUPEE MARK -"\ue074>;" // RUPEE SIGN -"\ue075>;" // CURRENCY NUMERATOR ONE -"\ue076>;" // CURRENCY NUMERATOR TWO -"\ue077>;" // CURRENCY NUMERATOR THREE -"\ue078>;" // CURRENCY NUMERATOR FOUR -"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>;" // ISSHAR -"\uE07C>;" // TIPPI -"\uE07D>;" // ADDAK -"\uE07E>;" // IRI -"\uE07F>;" // URA -"\uE080>;" // EK ONKAR -"\uE081>\u0bb5;" // FALLBACK FOR ORIYA LETTER WA - -"1000 >\u0BF2;" // NUMBER ONE THOUSAND -"100 >\u0BF1;" // NUMBER ONE HUNDRED -"10 >\u0BF0;" // NUMBER TEN - -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_InterIndic_Telu.txt b/icu4c/source/data/translit/t_InterIndic_Telu.txt deleted file mode 100644 index f9ee4218927..00000000000 --- a/icu4c/source/data/translit/t_InterIndic_Telu.txt +++ /dev/null @@ -1,157 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_InterIndic_Telugu.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// InterIndic_Telugu - -t_InterIndic_Telu { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// InterIndic-Telugu -//:: NFD (NFC) ; -"\ue001>\u0c01;" // SIGN CANDRABINDU -"\ue002>\u0c02;" // SIGN ANUSVARA -"\ue003>\u0c03;" // SIGN VISARGA -"\uE004>\u0c05;" // FALLBACK TO LETTER A -"\ue005>\u0c05;" // LETTER A -"\ue006>\u0c06;" // LETTER AA -"\ue007>\u0c07;" // LETTER I -"\ue008>\u0c08;" // LETTER II -"\ue009>\u0c09;" // LETTER U -"\ue00a>\u0c0a;" // LETTER UU -"\ue00b>\u0c0b;" // LETTER VOCALIC R -"\ue00c>\u0c0c;" // LETTER VOCALIC L -"\ue00d>\u0c0E;" // FALLBACK MAPPING -"\ue00e>\u0c0E;" // LETTER E -"\ue00f>\u0c0f;" // LETTER EE -"\ue010>\u0c10;" // LETTER AI -"\ue011>\u0c12;" // FALBACK MAPPING -"\ue012>\u0c12;" // LETTER O -"\ue013>\u0c13;" // LETTER OO -"\ue014>\u0c14;" // LETTER AU -"\ue015>\u0c15;" // LETTER KA -"\ue016>\u0c16;" // LETTER KHA -"\ue017>\u0c17;" // LETTER GA -"\ue018>\u0c18;" // LETTER GHA -"\ue019>\u0c19;" // LETTER NGA -"\ue01a>\u0c1a;" // LETTER CA -"\ue01b>\u0c1b;" // LETTER CHA -"\ue01c>\u0c1c;" // LETTER JA -"\ue01d>\u0c1d;" // LETTER JHA -"\ue01e>\u0c1e;" // LETTER NYA -"\ue01f>\u0c1f;" // LETTER TTA -"\ue020>\u0c20;" // LETTER TTHA -"\ue021>\u0c21;" // LETTER DDA -"\ue022>\u0c22;" // LETTER DDHA -"\ue023>\u0c23;" // LETTER NNA -"\ue024>\u0c24;" // LETTER TA -"\ue025>\u0c25;" // LETTER THA -"\ue026>\u0c26;" // LETTER DA -"\ue027>\u0c27;" // LETTER DHA -"\ue028>\u0c28;" // LETTER NA -"\ue029>\u0c28;" // REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA -"\ue02a>\u0c2a;" // LETTER PA -"\ue02b>\u0c2b;" // LETTER PHA -"\ue02c>\u0c2c;" // LETTER BA -"\ue02d>\u0c2d;" // LETTER BHA -"\ue02e>\u0c2e;" // LETTER MA -"\ue02f>\u0c2f;" // LETTER YA -"\ue030\ue03c>\u0c31;" -"\ue030>\u0c30;" // LETTER RA -"\ue031>\u0c31;" // LETTER RRA -"\ue032>\u0c32;" // LETTER LA -"\ue033>\u0c33;" // LETTER LLA -"\ue034>\u0c33;" // REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA -"\ue035>\u0c35;" // LETTER VA -"\ue036>\u0c36;" // LETTER SHA -"\ue037>\u0c37;" // LETTER SSA -"\ue038>\u0c38;" // LETTER SA -"\ue039>\u0c39;" // LETTER HA - -"\ue03c>;" // FALLBACK BLOW AWAY NUKTA -"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA - -"\ue03e>\u0c3e;" // VOWEL SIGN AA -"\ue03f>\u0c3f;" // VOWEL SIGN I -"\ue040>\u0c40;" // VOWEL SIGN II -"\ue041>\u0c41;" // VOWEL SIGN U -"\ue042>\u0c42;" // VOWEL SIGN UU -"\ue043>\u0c43;" // VOWEL SIGN VOCALIC R -"\ue044>\u0c44;" // VOWEL SIGN VOCALIC RR -"\ue045>\u0c46;" // VOWEL SIGN CANDRA E>VOWEL SIGN E -"\ue046>\u0c46;" // VOWEL SIGN E -"\ue047>\u0c47;" // VOWEL SIGN EE -"\ue048>\u0c48;" // VOWEL SIGN AI -"\ue049>\u0c4a;" // REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O -"\ue04a>\u0c4a;" // VOWEL SIGN O -"\ue04b>\u0c4b;" // VOWEL SIGN OO -"\ue04c>\u0c4c;" // VOWEL SIGN AU -"\ue04d>\u0c4d;" // SIGN VIRAMA -"\ue050>\u0c13\u0c02;" // REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA -"\ue051>;" -"\ue052>;" -"\ue053>;" -"\ue054>;" -"\ue055>\u0c55;" // LENGTH MARK -"\ue056>\u0c56;" // AI LENGTH MARK -"\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU -"\ue058>\u0c15;" // REMAP -"\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA -"\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA -"\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA -"\ue05c>\u0c22;" // REMAP -"\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA -"\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA -"\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA -"\ue060>\u0c60;" // LETTER VOCALIC RR -"\ue061>\u0c61;" // LETTER VOCALIC LL -"\ue062>\u0c3f;" // REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I -"\ue063>\u0c40;" // REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II -"\ue064>'.' ;" // FALLBACK FOR DANDA -"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA -"\ue066>\u0c66;" // DIGIT ZERO -"\ue067>\u0c67;" // DIGIT ONE -"\ue068>\u0c68;" // DIGIT TWO -"\ue069>\u0c69;" // DIGIT THREE -"\ue06a>\u0c6a;" // DIGIT FOUR -"\ue06b>\u0c6b;" // DIGIT FIVE -"\ue06c>\u0c6c;" // DIGIT SIX -"\ue06d>\u0c6d;" // DIGIT SEVEN -"\ue06e>\u0c6e;" // DIGIT EIGHT -"\ue06f>\u0c6f;" // DIGIT NINE - -"\ue070>;" // ABBREVIATION SIGN -"\ue071>\u0c30;" // LETTER RA WITH MIDDLE DIAGONAL -"\ue072>\u0c30;" // LETTER RA WITH LOWER DIAGONAL -"\ue073>;" // RUPEE MARK -"\ue074>;" // RUPEE SIGN -"\ue075>;" // CURRENCY NUMERATOR ONE -"\ue076>;" // CURRENCY NUMERATOR TWO -"\ue077>;" // CURRENCY NUMERATOR THREE -"\ue078>;" // CURRENCY NUMERATOR FOUR -"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR -"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN -"\ue07B>;" // ISSHAR -"\uE07C>;" // TIPPI -"\uE07D>;" // ADDAK -"\uE07E>;" // IRI -"\uE07F>;" // URA -"\uE080>;" // EK ONKAR -"\uE081>\u0c35;" // FALLBACK FOR ORIYA LETTER WA -"0 > \u0c66;" // FALLBACK FOR TAMIL -"1 > \u0c67;" - -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_Knda_InterIndic.txt b/icu4c/source/data/translit/t_Knda_InterIndic.txt deleted file mode 100644 index 05a3c6c0373..00000000000 --- a/icu4c/source/data/translit/t_Knda_InterIndic.txt +++ /dev/null @@ -1,108 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Kannada_InterIndic.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Kannada_InterIndic - -t_Knda_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Kannada-InterIndic -"\u0CC6\u0CD5>\uE047;" // VOWEL SIGN EE -"\u0CC6\u0CCD\u0CD6>\uE048\ue04d;" // VOWEL SIGN AI -"\u0CC6\u0CD6>\uE048;" // VOWEL SIGN AI -"\u0CC6\u0CC2\u0CD5>\uE04B;" // VOWEL SIGN OO -"\u0CC6\u0CC2>\uE04A;" // VOWEL SIGN O -"\u0CBF\u0CD5>\uE040;" // VOWEL SIGN II - -"\u0C82>\uE002;" // SIGN ANUSVARA -"\u0C83>\uE003;" // SIGN VISARGA -"\u0C85>\uE005;" // LETTER A -"\u0C86>\uE006;" // LETTER AA -"\u0C87>\uE007;" // LETTER I -"\u0C88>\uE008;" // LETTER II -"\u0C89>\uE009;" // LETTER U -"\u0C8A>\uE00A;" // LETTER UU -"\u0C8B>\uE00B;" // LETTER VOCALIC R -"\u0C8C>\uE00C;" // LETTER VOCALIC L -"\u0C8E>\uE00E;" // LETTER E -"\u0C8F>\uE00F;" // LETTER EE -"\u0C90>\uE010;" // LETTER AI -"\u0C92>\uE012;" // LETTER O -"\u0C93>\uE013;" // LETTER OO -"\u0C94>\uE014;" // LETTER AU -"\u0C95>\uE015;" // LETTER KA -"\u0C96>\uE016;" // LETTER KHA -"\u0C97>\uE017;" // LETTER GA -"\u0C98>\uE018;" // LETTER GHA -"\u0C99>\uE019;" // LETTER NGA -"\u0C9A>\uE01A;" // LETTER CA -"\u0C9B>\uE01B;" // LETTER CHA -"\u0C9C>\uE01C;" // LETTER JA -"\u0C9D>\uE01D;" // LETTER JHA -"\u0C9E>\uE01E;" // LETTER NYA -"\u0C9F>\uE01F;" // LETTER TTA -"\u0CA0>\uE020;" // LETTER TTHA -"\u0CA1>\uE021;" // LETTER DDA -"\u0CA2>\uE022;" // LETTER DDHA -"\u0CA3>\uE023;" // LETTER NNA -"\u0CA4>\uE024;" // LETTER TA -"\u0CA5>\uE025;" // LETTER THA -"\u0CA6>\uE026;" // LETTER DA -"\u0CA7>\uE027;" // LETTER DHA -"\u0CA8>\uE028;" // LETTER NA -"\u0CAA>\uE02A;" // LETTER PA -"\u0CAB>\uE02B;" // LETTER PHA -"\u0CAC>\uE02C;" // LETTER BA -"\u0CAD>\uE02D;" // LETTER BHA -"\u0CAE>\uE02E;" // LETTER MA -"\u0CAF>\uE02F;" // LETTER YA -"\u0CB0>\uE030;" // LETTER RA -"\u0CB1>\uE031;" // LETTER RRA -"\u0CB2>\uE032;" // LETTER LA -"\u0CB3>\uE033;" // LETTER LLA -"\u0CB5>\uE035;" // LETTER VA -"\u0CB6>\uE036;" // LETTER SHA -"\u0CB7>\uE037;" // LETTER SSA -"\u0CB8>\uE038;" // LETTER SA -"\u0CB9>\uE039;" // LETTER HA -"\u0CBC>\uE03C;" // SIGN NUKTA -"\u0CBD>\uE03D;" // AVAGRAHA -"\u0CBE>\uE03E;" // VOWEL SIGN AA -"\u0CBF>\uE03F;" // VOWEL SIGN I -"\u0CC1>\uE041;" // VOWEL SIGN U -"\u0CC2>\uE042;" // VOWEL SIGN UU -"\u0CC3>\uE043;" // VOWEL SIGN VOCALIC R -"\u0CC4>\uE044;" // VOWEL SIGN VOCALIC RR -"\u0CC6>\uE046;" // VOWEL SIGN E -"\u0CCC>\uE04C;" // VOWEL SIGN AU -"\u0CCD>\uE04D;" // SIGN VIRAMA -"\u0CD5>\uE055;" // LENGTH MARK -"\u0CD6>\uE056;" // AI LENGTH MARK -"\u0CDE>\uE034;" // LETTER LLLA -"\u0CE0>\uE060;" // LETTER VOCALIC RR -"\u0CE1>\uE061;" // LETTER VOCALIC LL -"\u0CE6>\uE066;" // DIGIT ZERO -"\u0CE7>\uE067;" // DIGIT ONE -"\u0CE8>\uE068;" // DIGIT TWO -"\u0CE9>\uE069;" // DIGIT THREE -"\u0CEA>\uE06A;" // DIGIT FOUR -"\u0CEB>\uE06B;" // DIGIT FIVE -"\u0CEC>\uE06C;" // DIGIT SIX -"\u0CED>\uE06D;" // DIGIT SEVEN -"\u0CEE>\uE06E;" // DIGIT EIGHT -"\u0CEF>\uE06F;" // DIGIT NINE - -// eof - } -} diff --git a/icu4c/source/data/translit/t_Latn_InterIndic.txt b/icu4c/source/data/translit/t_Latn_InterIndic.txt deleted file mode 100644 index 374555b97fa..00000000000 --- a/icu4c/source/data/translit/t_Latn_InterIndic.txt +++ /dev/null @@ -1,399 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Latin_InterIndic.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Latin_InterIndic - -t_Latn_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Latin-InterIndic - //:: NFD; - //\u0e00 reserved - //consonants - "$chandrabindu=\ue001;" - "$anusvara=\ue002;" - "$visarga=\ue003;" - //\u0e004 reserved - // w represents the stand-alone form - "$wa=\ue005;" - "$waa=\ue006;" - "$wi=\ue007;" - "$wii=\ue008;" - "$wu=\ue009;" - "$wuu=\ue00a;" - "$wr=\ue00b;" - "$wl=\ue00c;" - "$wce=\ue00d;" // LETTER CANDRA E - "$wse=\ue00e;" // LETTER SHORT E - "$we=\ue00f;" // \u090f LETTER E - "$wai=\ue010;" - "$wco=\ue011;" // LETTER CANDRA O - "$wso=\ue012;" // LETTER SHORT O - "$wo=\ue013;" // \u0913 LETTER O - "$wau=\ue014;" - "$ka=\ue015;" - "$kha=\ue016;" - "$ga=\ue017;" - "$gha=\ue018;" - "$nga=\ue019;" - "$ca=\ue01a;" - "$cha=\ue01b;" - "$ja=\ue01c;" - "$jha=\ue01d;" - "$nya=\ue01e;" - "$tta=\ue01f;" - "$ttha=\ue020;" - "$dda=\ue021;" - "$ddha=\ue022;" - "$nna=\ue023;" - "$ta=\ue024;" - "$tha=\ue025;" - "$da=\ue026;" - "$dha=\ue027;" - "$na=\ue028;" - "$ena=\ue029;" //compatibility - "$pa=\ue02a;" - "$pha=\ue02b;" - "$ba=\ue02c;" - "$bha=\ue02d;" - "$ma=\ue02e;" - "$ya=\ue02f;" - "$ra=\ue030;" - "$rra=\ue031;" - "$la=\ue032;" - "$lla=\ue033;" - "$ela=\ue034;" //compatibility - "$va=\ue035;" - "$vva=\ue081;" - "$sha=\ue036;" - "$ssa=\ue037;" - "$sa=\ue038;" - "$ha=\ue039;" -//\u093a Reserved -//\u093b Reserved - "$nukta=\ue03c;" - "$avagraha=\ue03d;" // SIGN AVAGRAHA - // represents the dependent form - "$aa=\ue03e;" - "$i=\ue03f;" - "$ii=\ue040;" - "$u=\ue041;" - "$uu=\ue042;" - "$rh=\ue043;" - "$lh=\ue044;" - "$ce=\ue045;" //VOWEL SIGN CANDRA E - "$se=\ue046;" //VOWEL SIGN SHORT E - "$e=\ue047;" - "$ai=\ue048;" - "$co=\ue049;" // VOWEL SIGN CANDRA O - "$so=\ue04a;" // VOWEL SIGN SHORT O - "$o=\ue04b;" // \u094b - "$au=\ue04c;" - "$virama=\ue04d;" -// \u094e Reserved -// \u094f Reserved - "$om = \ue050;" // OM -// \u0951>; # UNMAPPED STRESS SIGN UDATTA -// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA -// \u0953>; # UNMAPPED GRAVE ACCENT -// \u0954>; # UNMAPPED ACUTE ACCENT - "$lm = \ue055;"// Telugu Length Mark - "$ailm=\ue056;"// AI Length Mark - "$aulm=\ue057;"// AU Length Mark - //urdu compatibity forms - "$uka=\ue058;" - "$ukha=\ue059;" - "$ugha=\ue05a;" - "$ujha=\ue05b;" - "$uddha=\ue05c;" - "$udha=\ue05d;" - "$ufa=\ue05e;" - "$uya=\ue05f;" - "$wrr=\ue060;" - "$wll=\ue061;" - "$rrh=\ue062;" - "$llh=\ue063;" - "$danda=\ue064;" - "$doubleDanda=\ue065;" - "$zero=\ue066;" // DIGIT ZERO - "$one=\ue067;" // DIGIT ONE - "$two=\ue068;" // DIGIT TWO - "$three=\ue069;" // DIGIT THREE - "$four=\ue06a;" // DIGIT FOUR - "$five=\ue06b;" // DIGIT FIVE - "$six=\ue06c;" // DIGIT SIX - "$seven=\ue06d;" // DIGIT SEVEN - "$eight=\ue06e;" // DIGIT EIGHT - "$nine=\ue06f;" // DIGIT NINE - // For all other scripts - "$ecp0=\ue070;" - "$ecp1=\ue071;" - "$ecp2=\ue072;" - "$ecp3=\ue073;" - "$ecp4=\ue074;" - "$ecp5=\ue075;" - "$ecp6=\ue076;" - "$ecp7=\ue077;" - "$ecp8=\ue078;" - "$ecp9=\ue079;" - "$ecpA=\ue07a;" - "$ecpB=\ue07b;" - "$ecpC=\ue07c;" - "$ecpD=\ue07d;" - "$ecpE=\ue07e;" - "$ecpF=\ue07f;" -// \u0970>; # UNMAPPED ABBREVIATION SIGN - "$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];" - "$depVowelBelow=[\ue041-\ue044];" - "$endThing=[$danda$doubleDanda];" - // $x was originally called '&'; $z was '%' - "$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];" - "$z=[bcdfghjklmnpqrstvwxyz];" - "$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];" - "\u0315 > $avagraha;" - "\u0303>$chandrabindu$anusvara;" - "m\u0310>$chandrabindu;" - "h\u0323>$visarga;" - "x>$ka$virama$sa;" -// convert to independent forms at start of word or syllable: -// dependent forms for roundtrip - "\u0314a\u0304>$aa;" - "\u0314ai>$ai;" - "\u0314au>$au;" - "\u0314ii>$ii;" - "\u0314i\u0304>$ii;" - "\u0314i>$i;" - "\u0314u\u0304>$uu;" - "\u0314u>$u;" - "\u0314r\u0325\u0304>$rrh;" - "\u0314r\u0325>$rh;" - "\u0314l\u0325\u0304>$llh;" - "\u0314lh>$lh;" - "\u0314l\u0325>$lh;" - "\u0314e\u0304>$e;" - "\u0314o\u0304>$o;" - "\u0314a>;" - "\u0314e\u0306>$ce;" - "\u0314o\u0306>$co;" - "\u0314e>$se;" - "\u0314o>$so;" - -// preceeded by consonants - "$consonants{ a\u0304>$aa;" - "$consonants{ ai>$ai;" - "$consonants{ au>$au;" - "$consonants{ ii>$ii;" - "$consonants{ i\u0304>$ii;" - "$consonants{ i>$i;" - "$consonants{ u\u0304>$uu;" - "$consonants{ u>$u;" - "$consonants{ r\u0325\u0304>$rrh;" - "$consonants{ r\u0325a>$rh;" - "$consonants{ r\u0325>$rh;" - "$consonants{ l\u0325\u0304>$llh;" - "$consonants{ lh>$lh;" - "$consonants{ l\u0325>$lh;" - "$consonants{ e\u0304>$e;" - "$consonants{ o\u0304>$o;" - "$consonants{ e\u0306>$ce;" - "$consonants{ o\u0306>$co;" - "$consonants{ e>$se;" - "$consonants{ o>$so;" - -// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai}) - "a\u0304>$waa;" - "ai>$wai;" - "au>$wau;" - "i\u0304>$wii;" - "i>$wi;" - "u\u0304>$wuu;" - "u>$wu;" - "r\u0325\u0304>$wrr;" - "r\u0325>$wr;" - "l\u0325\u0304>$wll;" - "lh>$wl;" - "l\u0325>$wl;" - "e\u0304>$we;" - "o\u0304>$wo;" - "a>$wa;" - "e\u0306>$wce;" - "o\u0306>$wco;" - "e>$wse;" - "''om>$om;" - "o>$wso;" - - // rules for anusvara - "n}r\u0325 > $na|$virama;" - "n}l\u0325 > $na|$virama;" - "n}na > $na|$virama;" - "n\u0307}[kg] > $anusvara;" - "n\u0307}n\u0307 > $anusvara;" - "n\u0304}[cj] > $anusvara;" - "n\u0304}n\u0303 > $anusvara;" - "n\u0323}[tdn]\u0323 > $anusvara;" - "n}[tdn] > $anusvara;" - "m}[pbm] > $anusvara;" - "n}[ylvshr] > $anusvara;" - "m\u0307 > $anusvara;" - - //urdu compatibility - "q>$uka|$virama;" - "k\u0331h\u0331>$ukha |$virama;" - "g\u0307> $ugha | $virama;" - "z > $ujha |$virama;" - "f > $ufa|$virama;" - - // dev - "y\u0307>$uya|$virama;" - "l\u0331>$ela|$virama;" - "n\u0331>$ena|$virama;" - "n\u0307>$nga|$virama;" - "n\u0303>$nya|$virama;" - "n\u0323>$nna|$virama;" - "t\u0323h>$ttha|$virama;" - "t\u0323>$tta|$virama;" - "r\u0323h>$udha|$virama;" - "r\u0323>$uddha|$virama;" - "d\u0323h>$ddha|$virama;" - "d\u0323>$dda|$virama;" - "kh>$kha|$virama;" - "k>$ka|$virama;" - "gh>$gha|$virama;" - "g>$ga|$virama;" - "ch>$cha|$virama;" - "c>$ca|$virama;" - "jh>$jha|$virama;" - "j>$ja|$virama;" - "ny>$nya|$virama;" - "tth>$ttha|$virama;" - "ddh>$ddha|$virama;" - "th>$tha|$virama;" - "t>$ta|$virama;" - "dh>$dha|$virama;" - "d>$da|$virama;" - "n>$na|$virama;" - "ph>$pha|$virama;" - "p>$pa|$virama;" - "bh>$bha|$virama;" - "b>$ba|$virama;" - "m>$ma|$virama;" - "y>$ya|$virama;" - "r\u0331>$rra|$virama;" - "r>$ra|$virama;" - "l\u0323>$lla|$virama;" - "l>$la|$virama;" - "v>$va|$virama;" - "w\u0307>$vva|$virama;" - "w>$va|$virama;" - "sh>$sha|$virama;" - "ss>$ssa|$virama;" - "s\u0323>$ssa|$virama;" - "s\u0301>$sha|$virama;" - "s>$sa|$virama;" - "h>$ha|$virama;" - "'.'>$danda;" - "$danda'.'>$doubleDanda;" - "$depVowelAbove{'~'>$anusvara;" - "$depVowelBelow{'~'>$chandrabindu;" -// convert to dependent forms after consonant with no vowel: -// e.g. kai -> {ka}{virama}ai -> {ka}{ai} - //$virama aa>$aa; - "$virama a\u0304>$aa;" - "$virama ai>$ai;" - "$virama au>$au;" - "$virama ii>$ii;" - "$virama i\u0304>$ii;" - "$virama i>$i;" - //$virama uu>$uu; - "$virama u\u0304>$uu;" - "$virama u>$u;" - //$virama rrh>$rrh; - "$virama r\u0325\u0304>$rrh;" - //$virama rh>$rh; - "$virama r\u0325a>$rh;" - "$virama r\u0325>$rh;" - "$virama l\u0325\u0304>$llh;" - "$virama lh>$lh;" - "$virama l\u0325>$lh;" - "$virama e\u0304>$e;" - "$virama o\u0304>$o;" - "$virama a>;" - "$virama e\u0306>$ce;" - "$virama o\u0306>$co;" - "$virama e>$se;" - "$virama o>$so;" - - -// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai} - //$virama''aa>$waa; - "$virama''a\u0304>$waa;" - "$virama''ai>$wai;" - "$virama''au>$wau;" - //$virama''ii>$wii; - "$virama''i\u0304>$wii;" - "$virama''i>$wi;" - //$virama''uu>$wuu; - "$virama''u\u0304>$wuu;" - "$virama''u>$wu;" - //$virama''rrh>$wrr; - "$virama''r\u0325\u0304>$wrr;" - //$virama''rh>$wr; - "$virama''r\u0325>$wr;" - "$virama''l\u0325\u0304>$wll;" - //$virama''lh>$wl; - "$virama''l\u0325>$wl;" - "$virama''e\u0304>$we;" - "$virama''o\u0304>$wo;" - "$virama''a>$wa;" - "$virama''e\u0306>$wce;" - "$virama''o\u0306>$wco;" - "$virama''e>$wse;" - "$virama''o>$wso;" -// no virama - "''a\u0304>$waa;" - "''ai>$wai;" - "''au>$wau;" - "''i\u0304>$wii;" - "''i>$wi;" - "''u\u0304>$wuu;" - "''u>$wu;" - "''r\u0325\u0304>$wrr;" - "''r\u0325>$wr;" - "''l\u0325\u0304>$wll;" - "''l\u0325>$wl;" - "''e\u0304>$we;" - "''o\u0304>$wo;" - "''a>$wa;" - "''e\u0306>$wce;" - "''o\u0306>$wco;" - "''e>$wse;" - "''o>$wso;" - - "$virama } [$z] > $virama;" - "$virama } ' ' > $virama ;" - "$virama}$endThing>;" - "0>$zero;" - "1>$one;" - "2>$two;" - "3>$three;" - "4>$four;" - "5>$five;" - "6>$six;" - "7>$seven;" - "8>$eight;" - "9>$nine;" - "''>;" - //:: NFC (NFD) ; - } -} diff --git a/icu4c/source/data/translit/t_Latn_Jamo.txt b/icu4c/source/data/translit/t_Latn_Jamo.txt deleted file mode 100644 index feab406d030..00000000000 --- a/icu4c/source/data/translit/t_Latn_Jamo.txt +++ /dev/null @@ -1,538 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Latin_Jamo.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Latin_Jamo - -t_Latn_Jamo { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -//- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in -//- the INDEX file. This transliterator is, by itself, not -//- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or -//- inverses thereof. - -// Transliteration from Latin characters to Korean script is done in -// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul -// transliteration is done algorithmically following Unicode 3.0 -// section 3.11. This file implements the Latin to Jamo -// transliteration using rules. - -// Jamo occupy the block 1100-11FF. Within this block there are three -// groups of characters: initial consonants or choseong (I), medial -// vowels or jungseong (M), and trailing consonants or jongseong (F). -// Standard Korean syllables are of the form I+M+F*. - -// Section 3.11 describes the use of 'filler' jamo to convert -// nonstandard syllables to standard form: the choseong filler 115F and -// the junseong filler 1160. In this transliterator, we will not use -// 115F or 1160. - -// We will, however, insert two 'null' jamo to make foreign words -// conform to Korean syllable structure. These are the null initial -// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text, -// we will use the separator in order to disambiguate strings, -// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G). - -// We will not use all of the characters in the jamo block. We will -// only use the 19 initials, 21 medials, and 27 finals possessing a -// jamo short name as defined in section 4.4 of the Unicode book. - -// Rules of thumb. These guidelines provide the basic framework -// for the rules. They are phrased in terms of Latin-Jamo transliteration. -// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are -// just context-free transliteration of jamo to corresponding short names, -// with the addition of separators to maintain round-trip integrity -// in the context of the Latin-Jamo rules. - -// A sequence of vowels: -// - Take the longest sequence you can. If there are too many, or you don't -// have a starting consonant, introduce a 110B necessary. - -// A sequence of consonants. -// - First join the double consonants: G + G -> GG -// - In the remaining list, -// -- If there is no preceding vowel, take the first consonant, and insert EU -// after it. Continue with the rest of the consonants. -// -- If there is one consonant, attach to the following vowel -// -- If there are two consonants and a following vowel, attach one to the -// preceeding vowel, and one to the following vowel. -// -- If there are more than two consonants, join the first two together if you -// can: L + G => LG -// -- If you still end up with more than 2 consonants, insert EU after the -// first one, and continue with the rest of the consonants. - -//---------------------------------------------------------------------- -// Variables - -// Some latin consonants or consonant pairs only occur as initials, and -// some only as finals, but some occur as both. This makes some jamo -// consonants ambiguous when transliterated into latin. -// Initial only: IEUNG BB DD JJ R -// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ -// Initial and Final: B C D G GG H J K M N P S SS T - - "$Gi = \u1100;" - "$GGi = \u1101;" - "$Ni = \u1102;" - "$Di = \u1103;" - "$DD = \u1104;" - "$R = \u1105;" - "$Mi = \u1106;" - "$Bi = \u1107;" - "$BB = \u1108;" - "$Si = \u1109;" - "$SSi = \u110A;" - "$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo - "$Ji = \u110C;" - "$JJ = \u110D;" - "$Ci = \u110E;" - "$Ki = \u110F;" - "$Ti = \u1110;" - "$Pi = \u1111;" - "$Hi = \u1112;" - - "$A = \u1161;" - "$AE = \u1162;" - "$YA = \u1163;" - "$YAE = \u1164;" - "$EO = \u1165;" - "$E = \u1166;" - "$YEO = \u1167;" - "$YE = \u1168;" - "$O = \u1169;" - "$WA = \u116A;" - "$WAE = \u116B;" - "$OE = \u116C;" - "$YO = \u116D;" - "$U = \u116E;" - "$WEO = \u116F;" - "$WE = \u1170;" - "$WI = \u1171;" - "$YU = \u1172;" - "$EU = \u1173;" // null medial, inserted during Latin-Jamo - "$YI = \u1174;" - "$I = \u1175;" - - "$Gf = \u11A8;" - "$GGf = \u11A9;" - "$GS = \u11AA;" - "$Nf = \u11AB;" - "$NJ = \u11AC;" - "$NH = \u11AD;" - "$Df = \u11AE;" - "$L = \u11AF;" - "$LG = \u11B0;" - "$LM = \u11B1;" - "$LB = \u11B2;" - "$LS = \u11B3;" - "$LT = \u11B4;" - "$LP = \u11B5;" - "$LH = \u11B6;" - "$Mf = \u11B7;" - "$Bf = \u11B8;" - "$BS = \u11B9;" - "$Sf = \u11BA;" - "$SSf = \u11BB;" - "$NG = \u11BC;" - "$Jf = \u11BD;" - "$Cf = \u11BE;" - "$Kf = \u11BF;" - "$Tf = \u11C0;" - "$Pf = \u11C1;" - "$Hf = \u11C2;" - - "$jamoInitial = [\u1100-\u1112];" - - "$jamoMedial = [\u1161-\u1175];" - - "$latinInitial = [bcdghjkmnprst];" - - // Any character in the latin transliteration of a medial - "$latinMedial = [aeiouwy];" - - // The last character of the latin transliteration of a medial - "$latinMedialEnd = [aeiou];" - - // Disambiguation separator - "$sep = \\\';" - -//---------------------------------------------------------------------- -// Jamo-Latin - -// Jamo to latin is relatively simple, since it is the latin that is -// ambiguous. Most rules are straightforward, and we encode them below -// as simple add-on back rule, e.g.: - -// $jamoMedial {bs} > $BS; - -// becomes - -// $jamoMedial {bs} <> $BS; - -// Furthermore, we don't care about the ordering for Jamo-Latin because -// we are going from single characters, so we can very easily piggyback -// on the Latin-Jamo. - -// The main issue with Jamo-Latin is when to insert separators. -// Separators are inserted to obtain correct round trip behavior. For -// example, the sequence Ki A Gf Gi E, if transliterated to "kagge", -// would then round trip to Ki A GGi E. To prevent this, we insert a -// separator: "kag-ge". IMPORTANT: The need for separators depends -// very specifically on the behavior of the Latin-Jamo rules. A change -// in the Latin-Jamo behavior can completely change the way the -// separator insertion must be done. - -// First try to preserve actual separators in the jamo text by doubling -// them. This fixes problems like: -// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol -// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional -// -- if we don't care about losing separators in the jamo, we can delete -// this rule. - - "$sep $sep <> $sep;" - -// Triple consonants. For three consonants "axxx" we insert a -// separator between the first and second "x" if XXf, Xf, and Xi all -// exist, and we have A Xf XXi. This prevents the reverse -// transliteration to A XXf Xi. - - "$sep < $latinMedialEnd g {} $GGi;" - "$sep < $latinMedialEnd s {} $SSi;" - -// For vowels the rule is similar. If there is a vowel "ae" such that -// "a" by itself and "e" by itself are vowels, then we want to map A E -// to "a-e" so as not to round trip to AE. However, in the text Ki EO -// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For -// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be -// tested. NOTE: These rules used to have a left context of -// $latinInitial instead of [^$latinMedial]. The problem with this is -// sequences where an initial IEUNG is transliterated away: -// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O) - - "$sep < [^$latinMedial] [y w] e {} [$O $OE];" - "$sep < [^$latinMedial] e {} [$O $OE $U];" - "$sep < [^$latinMedial] [o a] {} [$E $EO $EU];" - "$sep < [^$latinMedial] [w y] a {} [$E $EO $EU];" - -// Similar to the above, but with an intervening $IEUNG. - - "$sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];" - "$sep < [^$latinMedial] e {} $IEUNG [$O $OE $U];" - "$sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];" - "$sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];" - -// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E, -// where Xi also exists, must be transliterated as "ax-e" to prevent -// the round trip conversion to A Xi E. - - "$sep < $latinMedialEnd b {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd c {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd d {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd g {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd h {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd j {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd k {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd m {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd n {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd p {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd s {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd t {} $IEUNG $jamoMedial;" - -// Double finals followed by IEUNG. Similar to the single finals -// followed by IEUNG. Any latin consonant pair X Y, between medials, -// that we would split by Latin-Jamo, we must handle when it occurs as -// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi -// E. - - "$sep < $latinMedialEnd b s {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd g g {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd g s {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd l b {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd l g {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd l h {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd l m {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd l p {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd l s {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd l t {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd n g {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd n h {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd n j {} $IEUNG $jamoMedial;" - "$sep < $latinMedialEnd s s {} $IEUNG $jamoMedial;" - -// Split doubles. Text of the form A Xi Xf E, where XXi also occurs, -// we transliterate as "ax-xe" to prevent round trip transliteration as -// A XXi E. - - "$sep < $latinMedialEnd b {} $Bi $jamoMedial;" - "$sep < $latinMedialEnd d {} $Di $jamoMedial;" - "$sep < $latinMedialEnd j {} $Ji $jamoMedial;" - "$sep < $latinMedialEnd g {} $Gi $jamoMedial;" - "$sep < $latinMedialEnd s {} $Si $jamoMedial;" - -// XYY. This corresponds to the XYY rule in Latin-Jamo. By default -// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result, -// "xyy" forms that correspond to XYf Yi must be transliterated as -// "xy-y". - - "$sep < $latinMedialEnd b s {} [$Si $SSi];" - "$sep < $latinMedialEnd g s {} [$Si $SSi];" - "$sep < $latinMedialEnd l b {} [$Bi $BB];" - "$sep < $latinMedialEnd l g {} [$Gi $GGi];" - "$sep < $latinMedialEnd l s {} [$Si $SSi];" - "$sep < $latinMedialEnd n g {} [$Gi $GGi];" - "$sep < $latinMedialEnd n j {} [$Ji $JJ];" - -// Deletion of IEUNG is handled below. - -//---------------------------------------------------------------------- -// Latin-Jamo - -// [Basic, context-free Jamo-Latin rules are embedded here too. See -// above.] - -// Split digraphs: Text of the form 'axye', where 'xy' is a final -// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and -// 'e' are medials, we want to transliterate this as A Xf Yi E rather -// than A XYf IEUNG E. We do NOT include text of the form "axxe", -// since that is handled differently below. These rules are generated -// programmatically from the jamo data. - - "$jamoMedial {b s} $latinMedial > $Bf $Si;" - "$jamoMedial {g s} $latinMedial > $Gf $Si;" - "$jamoMedial {l b} $latinMedial > $L $Bi;" - "$jamoMedial {l g} $latinMedial > $L $Gi;" - "$jamoMedial {l h} $latinMedial > $L $Hi;" - "$jamoMedial {l m} $latinMedial > $L $Mi;" - "$jamoMedial {l p} $latinMedial > $L $Pi;" - "$jamoMedial {l s} $latinMedial > $L $Si;" - "$jamoMedial {l t} $latinMedial > $L $Ti;" - "$jamoMedial {n g} $latinMedial > $Nf $Gi;" - "$jamoMedial {n h} $latinMedial > $Nf $Hi;" - "$jamoMedial {n j} $latinMedial > $Nf $Ji;" - -// Single consonants are initials: Text of the form 'axe', where 'x' -// can be an initial or a final, and 'a' and 'e' are medials, we want -// to transliterate as A Xi E rather than A Xf IEUNG E. - - "$jamoMedial {b} $latinMedial > $Bi;" - "$jamoMedial {c} $latinMedial > $Ci;" - "$jamoMedial {d} $latinMedial > $Di;" - "$jamoMedial {g} $latinMedial > $Gi;" - "$jamoMedial {h} $latinMedial > $Hi;" - "$jamoMedial {j} $latinMedial > $Ji;" - "$jamoMedial {k} $latinMedial > $Ki;" - "$jamoMedial {m} $latinMedial > $Mi;" - "$jamoMedial {n} $latinMedial > $Ni;" - "$jamoMedial {p} $latinMedial > $Pi;" - "$jamoMedial {s} $latinMedial > $Si;" - "$jamoMedial {t} $latinMedial > $Ti;" - -// Doubled initials. The sequence "axxe", where XX exists as an initial -// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want -// to transliterate as A XXi E, rather than split to A Xf Xi E. - - "$jamoMedial {b b} $latinMedial > $BB;" - "$jamoMedial {d d} $latinMedial > $DD;" - "$jamoMedial {j j} $latinMedial > $JJ;" - "$jamoMedial {g g} $latinMedial > $GGi;" - "$jamoMedial {s s} $latinMedial > $SSi;" - -// XYY. Because doubled consonants bind more strongly than XY -// consonants, we must handle the sequence "axyy" specially. Here XYf -// and YYi must exist. In these cases, we map to Xf YYi rather than -// XYf. - - "$jamoMedial {b} s s > $Bf;" - "$jamoMedial {g} s s > $Gf;" - "$jamoMedial {l} b b > $L;" - "$jamoMedial {l} g g > $L;" - "$jamoMedial {l} s s > $L;" - "$jamoMedial {n} g g > $Nf;" - "$jamoMedial {n} j j > $Nf;" - -// Finals: Attach consonant with preceding medial to preceding medial. -// Do this BEFORE mapping consonants to initials. Longer keys must -// precede shorter keys that they start with, e.g., the rule for 'bs' -// must precede 'b'. - -// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this -// block for Jamo-Latin.] - - "$jamoMedial {bs} <> $BS;" - "$jamoMedial {b} <> $Bf;" - "$jamoMedial {c} <> $Cf;" - "$jamoMedial {d} <> $Df;" - "$jamoMedial {gg} <> $GGf;" - "$jamoMedial {gs} <> $GS;" - "$jamoMedial {g} <> $Gf;" - "$jamoMedial {h} <> $Hf;" - "$jamoMedial {j} <> $Jf;" - "$jamoMedial {k} <> $Kf;" - "$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;" - "$jamoMedial {lh} <> $LH;" - "$jamoMedial {lm} <> $LM;" - "$jamoMedial {lp} <> $LP;" - "$jamoMedial {ls} <> $LS;" - "$jamoMedial {lt} <> $LT;" - "$jamoMedial {l} <> $L;" - "$jamoMedial {m} <> $Mf;" - "$jamoMedial {ng} <> $NG;" - "$jamoMedial {nh} <> $NH;" - "$jamoMedial {nj} <> $NJ;" - "$jamoMedial {n} <> $Nf;" - "$jamoMedial {p} <> $Pf;" - "$jamoMedial {ss} <> $SSf;" - "$jamoMedial {s} <> $Sf;" - "$jamoMedial {t} <> $Tf;" - -// Initials: Attach single consonant to following medial. Do this -// AFTER mapping finals. Longer keys must precede shorter keys that -// they start with, e.g., the rule for 'gg' must precede 'g'. - -// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within -// this block for Jamo-Latin.] - - "{gg} $latinMedial <> $GGi;" - "{g} $latinMedial <> $Gi;" - "{n} $latinMedial <> $Ni;" - "{dd} $latinMedial <> $DD;" - "{d} $latinMedial <> $Di;" - "{r} $latinMedial <> $R;" - "{m} $latinMedial <> $Mi;" - "{bb} $latinMedial <> $BB;" - "{b} $latinMedial <> $Bi;" - "{ss} $latinMedial <> $SSi;" - "{s} $latinMedial <> $Si;" - "{jj} $latinMedial <> $JJ;" - "{j} $latinMedial <> $Ji;" - "{c} $latinMedial <> $Ci;" - "{k} $latinMedial <> $Ki;" - "{t} $latinMedial <> $Ti;" - "{p} $latinMedial <> $Pi;" - "{h} $latinMedial <> $Hi;" - -// 'r' in final position. Because of the equivalency of the 'l' and -// 'r' jamo (the glyphs are the same), we try to provide the same -// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled -// below. If we see an 'r' in an apparent final position, treat it -// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule. -// Instead, we want Ki A L Ki A. - - "$jamoMedial {r} $latinInitial > | l;" - -// Initial + Final: If we match the next rule, we have initial then -// final consonant with no intervening medial. We insert the null -// vowel BEFORE it to create a well-formed syllable. (In the next rule -// we insert a null vowel AFTER an anomalous initial.) - - "$jamoInitial {} [bcdghjklmnpst] > $EU;" - -// Initial + X: This block matches an initial consonant not followed by -// a medial. We insert the null vowel after it. We handle double -// initials explicitly here; for single initial consonants we insert EU -// (as Latin) after them and let standard rules do the rest. - -// BREAKS ROUND TRIP INTEGRITY - - "gg > $GGi $EU;" - "dd > $DD $EU;" - "bb > $BB $EU;" - "ss > $SSi $EU;" - "jj > $JJ $EU;" - - "([bcdghjkmnprst]) > | $1 eu;" - -// X + Final: Finally we have to deal with a consonant that can only be -// interpreted as a final (not an initial) and which is preceded -// neither by an initial nor a medial. It is the start of the -// syllable, but cannot be. Most of these will already be handled by -// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng' -// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'. -// For this isolated case, we could add a null initial and medial, -// which would give "la" => IEUNG EU L IEUNG A, for example. A more -// economical solution is to transliterate isolated "l" (that is, -// initial "l") to "r". (Other similar conversions of consonants that -// occur neither as initials nor as finals are handled below.) - - "l > | r;" - -// Medials. If a medial is preceded by an initial, then we proceed -// normally. As usual, longer keys must precede shorter ones. - -// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within -// this block for Jamo-Latin.] - - "$jamoInitial {ae} <> $AE;" - "$jamoInitial {a} <> $A;" - "$jamoInitial {eo} <> $EO;" - "$jamoInitial {eu} <> $EU;" - "$jamoInitial {e} <> $E;" - "$jamoInitial {i} <> $I;" - "$jamoInitial {oe} <> $OE;" - "$jamoInitial {o} <> $O;" - "$jamoInitial {u} <> $U;" - "$jamoInitial {wae} <> $WAE;" - "$jamoInitial {wa} <> $WA;" - "$jamoInitial {weo} <> $WEO;" - "$jamoInitial {we} <> $WE;" - "$jamoInitial {wi} <> $WI;" - "$jamoInitial {yae} <> $YAE;" - "$jamoInitial {ya} <> $YA;" - "$jamoInitial {yeo} <> $YEO;" - "$jamoInitial {ye} <> $YE;" - "$jamoInitial {yi} <> $YI;" - "$jamoInitial {yo} <> $YO;" - "$jamoInitial {yu} <> $YU;" - -// We may see an anomalous isolated 'w' or 'y'. In that case, we -// interpret it as 'wi' and 'yu', respectively. - -// BREAKS ROUND TRIP INTEGRITY - - "$jamoInitial {w} > | wi;" - "$jamoInitial {y} > | yu;" - -// Otherwise, insert a null consonant IEUNG before the medial (which is -// still an untransliterated latin vowel). - - "($latinMedial) > $IEUNG | $1;" - -// Convert non-jamo latin consonants to equivalents. These occur as -// neither initials nor finals in jamo. 'l' occurs as a final, but not -// an initial; it is handled above. The following letters (left hand -// side) will never be output by Jamo-Latin. - - "f > | p;" - "q > | k;" - "v > | b;" - "x > | ks;" - "z > | s;" - -// Delete separators (Latin-Jamo). - - "$sep > ;" - -// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels, -// since these may also occur in text. - - "< $IEUNG;" - -//- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in -//- the INDEX file. This transliterator is, by itself, not -//- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or -//- inverses thereof. - -// eof - } -} diff --git a/icu4c/source/data/translit/t_Latn_Kana.txt b/icu4c/source/data/translit/t_Latn_Kana.txt deleted file mode 100644 index 623d9097a62..00000000000 --- a/icu4c/source/data/translit/t_Latn_Kana.txt +++ /dev/null @@ -1,511 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Latin_Katakana.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Latin_Katakana - -t_Latn_Kana { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// note: a global filter is more efficient, but MUST include all source chars -//:: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ; -// MINIMAL FILTER GENERATED FOR: Latin-Katakana -//## WARNING -- must add width filter, both here and below!!! ### -":: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;" - -":: [:Latin:] fullwidth-halfwidth ();" -":: NFD (NFC);" -":: Lower ();" // whenever transliterating from cased to uncased script, include this -// :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese - -// Uses modified Hepburn. Small changes to make unambiguous. - -// | Kunrei-shiki: Hepburn/MHepburn -// | ------------------------------ -// | si: shi -// | si ~ya: sha -// | si ~yu: shu -// | si ~yo: sho -// | zi: ji -// | zi ~ya: ja -// | zi ~yu: ju -// | zi ~yo: jo -// | ti: chi -// | ti ~ya: cha -// | ti ~yu: chu -// | ti ~yu: cho -// | tu: tsu -// | di: ji/dji -// | du: zu/dzu -// | hu: fu - -// | For foreign words: -// | ----------------- -// | se ~i si -// | si ~e she -// | -// | ze ~i zi -// | zi ~e je -// | -// | te ~i ti -// | ti ~e che -// | te ~u tu -// | -// | de ~i di -// | de ~u du -// | de ~i di -// | -// | he ~u: hu -// | hu ~a fa -// | hu ~i fi -// | hu ~e he -// | hu ~o ho - -// Most small forms are generated, but if necessary -// explicit small forms are given with ~a, ~ya, etc. - -//------------------------------------------------------ -// Variables - -"$vowel = [aeiou] ;" -"$consonant = [bcdfghjklmnpqrstvwxyz] ;" -"$macron = \u0304 ;" - -// Variables used for doubled-consonants with tsu - -"$kana = [\u3041-\u3094] ;" - -"$voice = [\u3099\u309B];" -"$semivoice = [\u309A\u309C];" - -"$k_start = [カキクケコかきくけこ] ;" - -"$s_start = [サシスセソさしすせそ] ;" - -"$j_start = [シし] $voice ;" - -"$t_start = [タチツテトたちつてと] ;" - -"$n_start = [ナニヌネノンなにぬねの] ;" - -"$h_start = [ハヒヘホはひへほ] ;" -"$f_start = [フふ] ;" - -"$m_start = [マミムメモまみむめも] ;" - -"$y_start = [ヤユヨやゆよ] ;" - -"$r_start = [ラリルレロらりるれろ] ;" - -"$w_start = [ワヰヱヲわゐゑを] ;" - -"$v_start = [ワヰヱヲ]゙ ;" - -// if ン is followed by $n_quoter, then it needs an -// apostrophe after its romaji form to disambiguate it. -// e.g., ン ア ! = ナ, so represent as "n'a", not "na". - -"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;" - -"$small_y = [ャィュェョ] ;" - -"$iteration = \u309D ;" - -//------------------------------------------------------ -// katakana rules - -// Punctuation - -"'.' <> 。;" -"',' <> 、;" -// ' ' } [a-z] > ; # delete spaces before latin -// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana - -// Iteration Mark -// Copy previous letter & marks - -// TODO -// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration - -// Specials for katakana -- not shared with hiragana - -"va <> ヷ ;" -"vi <> ヸ ;" -"ve <> ヹ ;" -"vo <> ヺ ;" -"'~ka' <> ヵ ;" -"'~ke' <> ヶ ;" - -// ~~~ begin shared rules ~~~ - -//special - -"ya < '~'ャ;" -"yi < '~'ィ ;" -"yu < '~'ュ;" -"ye < '~'ェ;" -"yo < '~'ョ;" - -//normal - -"a <> ア ;" - -"b | '~' < ヒ ゙} $small_y ;" -"by } $vowel > ビ | '~y' ;" - -"ba <> バ ;" -"bi <> ビ ;" -"bu <> ブ ;" -"be <> ベ ;" -"bo <> ボ ;" - -"c } i > | s ;" -"c } e > | s ;" - -"da <> ダ ;" -"di <> ディ ;" -"du <> デゥ ;" -"de <> デ ;" -"do <> ド ;" -"dzu <> ヅ ;" -"dja < ヂャ ;" -"dji'~i' < ヂィ ;" // liu -"dju < ヂュ ;" -"dje < ヂェ ;" -"djo < ヂョ ;" -"dji <> ヂ ;" -"dj } $vowel > ヂ | '~y' ;" - -// TODO: QUESTION: use ĵĴżŻ instead of dj, dz - -"cha < チャ ;" -"chi'~i' < チィ ;" // liu -"chu < チュ ;" -"che < チェ ;" -"cho < チョ ;" -"chi <> チ ;" -"ch } $vowel > チ | '~y' ;" - -"e <> エ ;" - -"g | '~' < ギ} $small_y ;" -"gy } $vowel > ギ | '~y' ;" - -"ga <> ガ ;" -"gi <> ギ ;" -"gu <> グ ;" -"ge <> ゲ ;" -"go <> ゴ ;" - -"i <> イ ;" - -// j } $vowel > ジ | '~y' ; - -"ja <> ジャ ;" -"ji'~i' < ジィ ;" // liu -"ju <> ジュ ;" -"je <> ジェ ;" -"jo <> ジョ ;" -"ji <> ジ ;" - -"k | '~' < キ} $small_y ;" -"ky } $vowel > キ | '~y' ;" - -"ka <> カ ;" -"ki <> キ ;" -"ku <> ク ;" -"ke <> ケ ;" -"ko <> コ ;" - -"m | '~' < ミ} $small_y ;" -"my } $vowel > ミ | '~y' ;" - -"ma <> マ ;" -"mi <> ミ ;" -"mu <> ム ;" -"me <> メ ;" -"mo <> モ ;" - -"m } [pbfv] > ン ;" - -"n | '~' < ニ } $small_y ;" -"ny } $vowel > ニ | '~y' ;" - -"na <> ナ ;" -"ni <> ニ ;" -"nu <> ヌ ;" -"ne <> ネ ;" -"no <> ノ ;" - -"o <> オ ;" - -"p | '~' < ピ } $small_y ;" -"py } $vowel > ピ | '~y' ;" - -"pa <> パ ;" -"pi <> ピ ;" -"pu <> プ ;" -"pe <> ペ ;" -"po <> ポ ;" - -"h | '~' < ヒ } $small_y ;" -"hy } $vowel > ヒ | '~y' ;" - -"ha <> ハ ;" -"hi <> ヒ ;" -"hu <> ヘゥ ;" -"he <> ヘ ;" -"ho <> ホ ;" - -// f | '~' < フ } $small_y ; -// f } $vowel > フ | '~' ; - -"fa <> ファ ;" -"fi <> フィ ;" -"fe <> フェ ;" -"fo <> フォ ;" -"fu <> フ ;" - -"r | '~' < リ } $small_y ;" -"ry } $vowel > リ | '~y' ;" - -"ra <> ラ ;" -"ri <> リ ;" -"ru <> ル ;" -"re <> レ ;" -"ro <> ロ ;" - -"za <> ザ ;" -"zi <> ゼィ ;" -"zu <> ズ ;" -"ze <> ゼ ;" -"zo <> ゾ ;" - -"sa <> サ ;" -"si <> セィ ;" -"su <> ス ;" -"se <> セ ;" -"so <> ソ ;" - -"sha < シャ ;" -"shi'~i' < シィ ;" // liu -"shu < シュ ;" -"she < シェ ;" -"sho < ショ ;" -"shi <> シ ;" -"sh } $vowel > シ | '~y' ;" - -"ta <> タ ;" -"ti <> ティ ;" -"tu <> テゥ ;" -"te <> テ ;" -"to <> ト ;" - -"tsu <> ツ ;" - -// v } $vowel > ヴ | '~' ; - -//'v~a' < ヴァ ; # liu -//'v~i' < ヴィ ; # liu -//'v~e' < ヴェ ; # liu -//'v~o' < ヴォ ; # liu -"vu <> ヴ ;" - -"u <> ウ ;" - -// w } $vowel > ウ | '~' ; - -"wa <> ワ ;" -"wi <> ヰ ;" -"wu > ウ ;" -"we <> ヱ ;" -"wo <> ヲ ;" - -"ya <> ヤ ;" -"yi > イ ;" -"yu <> ユ ;" -"ye > エ ;" -"yo <> ヨ ;" - -// double consonants - -//specials -"s } sh > ッ ;" -"t } ch > ッ ;" - -//voiced - -"j } j <> ッ } $j_start ;" -"b } b <> ッ } [$h_start$f_start] $voice;" -"d } d <> ッ } $t_start $voice;" -"g } g <> ッ } $k_start $voice;" -"p } p <> ッ } [$h_start$f_start] $semivoice;" -// v } v <> ッ } [ワヰウヱヲう] $voice ; -"z } z <> ッ } $s_start $voice;" -"v } v <> ッ } $v_start;" - -// normal - -"k } k <> ッ } $k_start ;" -"m } m <> ッ } $m_start ;" -"n } n <> ッ } $n_start ;" -"h } h <> ッ } $h_start ;" -"f } f <> ッ } $f_start ;" -"r } r <> ッ } $r_start ;" -"t } t <> ッ } $t_start ;" -"s } s <> ッ } $s_start ;" - -"w } w <> ッ } $w_start;" -"y } y <> ッ } $y_start;" - -// completeness -"x } x > ッ ;" -"c } k > ッ ;" -"c } c > ッ ;" -"c } q > ッ ;" -"l } l > ッ ;" -"q } q > ッ ;" -// y } y > ッ ; -// w } w > ッ ; - -// prolonged vowel mark. this indicates a doubling of -// the preceding vowel sound - -//a < a { ー ; # liu -//e < e { ー ; # liu -//i < i { ー ; # liu -//o < o { ー ; # liu -//u < u { ー ; # liu - -"$macron <> ー ;" - -// small forms - -"'~a' <> ァ ;" -"'~i' <> ィ ;" -"'~u' <> ゥ ;" -"'~e' <> ェ ;" -"'~o' <> ォ ;" -"'~tsu' <> ッ ;" -"'~wa' <> ヮ ;" -"'~ya' <> ャ ;" -"'~yi' > ィ ;" -"'~yu' <> ュ ;" -"'~ye' > ェ ;" -"'~yo' <> ョ ;" - -// iteration marks -// TODO: make more accurate - -"j $1 < sh (y* $vowel) {ヽ$voice ;" -"dj $1 < ch (y* $vowel) {ヽ$voice ;" -"dz $1 < ts (y* $vowel) {ヽ$voice ;" - -"g $1 < k (y* $vowel) {ヽ$voice ;" -"z $1 < s (y* $vowel) {ヽ$voice ;" -"d $1 < t (y* $vowel) {ヽ$voice ;" -"h $1 < b (y* $vowel) {ヽ$voice ;" -"v $1 < w (y* $vowel) {ヽ$voice ;" - -"sh $1 < sh (y* $vowel) {ヽ$voice ;" -"j $1 < j (y* $vowel) {ヽ$voice ;" -"ch $1 < ch (y* $vowel) {ヽ$voice ;" -"dj $1 < dj(y* $vowel) {ヽ$voice ;" -"ts $1 < ts (y* $vowel) {ヽ$voice ;" -"dz $1 < dz (y* $vowel) {ヽ$voice ;" - -"$1 < ($consonant y* $vowel) {ヽ$voice? ;" -"$1 < (.) {ヽ $voice? ;" // otherwise repeat last character - "< ヽ $voice? ;" // delete if no characters found - -// h- rule: lengthens vowel if not followed by a vowel - -"[aeiou] } h > ー ;" - -// one-way latin- > kana rules. these do not occur in -// well-formed romaji representing actual japanese text. -// their purpose is to make all romaji map to kana of -// some sort. - -// the following are not really necessary, but produce -// slightly more natural results. - -"cy > セィ ;" -"dy > ディ ;" -"hy > ヒ ;" -"sy > セィ ;" -"ty > ティ ;" -"zy > ゼィ ;" - -"h > ヘ ;" - -// isolated consonants listed here so as not to mask -// longer rules above. - -"ch > チ;" -"sh > シ ;" -"dz > ヅ ;" -"dj > ヂ;" - -"b > ブ ;" -"d > デ ;" -"g > グ ;" -"k > ク ;" -"m > ム ;" -"n'' < ン } $n_quoter ;" -"n <> ン ;" -"p > プ ;" -"r > ル ;" -"s > ス ;" -"t > テ ;" -"y > イ ;" -"z > ズ ;" -"v > ヴ ;" - -"f > フ;" -"j > ジ;" -"w > ウ;" - -"ß > | ss ;" -"æ > | e ;" -"ð > | d ;" -"ø > | u ;" -"þ > | th ;" - -// simple substitutions using backup - -"c > | k ;" -"l > | r ;" -"q > | k ;" -"x > | ks ;" - -// ~~~ END shared rules ~~~ - -//------------------------------------------------------ -// Final cleanup - -"'~' > ;" // delete stray tildes between letters -"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters -// [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use - -":: NFC (NFD) ;" -":: ([:Katakana:] halfwidth-fullwidth);" - -// note: a global filter is more efficient, but MUST include all source chars!! -//:: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]); -// MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD -":: ( [[\\\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;" - -// eof - } -} diff --git a/icu4c/source/data/translit/t_Latn_NPinyn.txt b/icu4c/source/data/translit/t_Latn_NPinyn.txt deleted file mode 100644 index ab8df116f88..00000000000 --- a/icu4c/source/data/translit/t_Latn_NPinyn.txt +++ /dev/null @@ -1,56 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat -// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Latin_NumericPinyin.txt -// Date: Fri May 28 17:07:31 2004 -//-------------------------------------------------------------------- - -// Latin_NumericPinyin - -t_Latn_NPinyn { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// According to the pinyin definitions I've been able to find: -// 'a', 'e' are the preferred bases -// otherwise 'o' -// otherwise last vowel - -// The trailing form of syllables are the following: -// "a", "ai", "ao", "an", "ang", -// "o", "ou", "ong", -// "e", "ei", "er", "en", "eng", -// "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong", -// "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng", -// "ü", "üe", "üan", "ün" -// so the letters the tone will 'hop' are: - -"::NFD (NFC);" -"$tone = [\u0304\u0301\u030C\u0300\u0306] ;" - -// Move the tone to the end of a syllable, and convert to number -"e {($tone) r} > r &tone-digit($1);" -"($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1);" -"($tone) > &tone-digit($1);" - -// The following backs up until it finds the right vowel, then deposits the tone - -"$vowel = [aAeEiIoOuUüÜ];" -"$consonant = [[a-z A-Z] - [$vowel]];" -"$digit = [1-5];" -"$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);" -"$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);" -"$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit);" -"&digit-tone($1) < [:letter:] {($digit)};" - -"::NFC (NFD);" - - - - } -} diff --git a/icu4c/source/data/translit/t_Mlym_InterIndic.txt b/icu4c/source/data/translit/t_Mlym_InterIndic.txt deleted file mode 100644 index 6420f76eb44..00000000000 --- a/icu4c/source/data/translit/t_Mlym_InterIndic.txt +++ /dev/null @@ -1,101 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Malayalam_InterIndic.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Malayalam_InterIndic - -t_Mlym_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Malayalam-InterIndic -//:: NFD (NFC) ; - -"\u0D02>\uE002;" // SIGN ANUSVARA -"\u0D03>\uE003;" // SIGN VISARGA -"\u0D05>\uE005;" // LETTER A -"\u0D06>\uE006;" // LETTER AA -"\u0D07>\uE007;" // LETTER I -"\u0D08>\uE008;" // LETTER II -"\u0D09>\uE009;" // LETTER U -"\u0D0A>\uE00A;" // LETTER UU -"\u0D0B>\uE00B;" // LETTER VOCALIC R -"\u0D0C>\uE00C;" // LETTER VOCALIC L -"\u0D0E>\uE00E;" // LETTER E -"\u0D0F>\uE00F;" // LETTER EE -"\u0D10>\uE010;" // LETTER AI -"\u0D12>\uE012;" // LETTER O -"\u0D13>\uE013;" // LETTER OO -"\u0D14>\uE014;" // LETTER AU -"\u0D15>\uE015;" // LETTER KA -"\u0D16>\uE016;" // LETTER KHA -"\u0D17>\uE017;" // LETTER GA -"\u0D18>\uE018;" // LETTER GHA -"\u0D19>\uE019;" // LETTER NGA -"\u0D1A>\uE01A;" // LETTER CA -"\u0D1B>\uE01B;" // LETTER CHA -"\u0D1C>\uE01C;" // LETTER JA -"\u0D1D>\uE01D;" // LETTER JHA -"\u0D1E>\uE01E;" // LETTER NYA -"\u0D1F>\uE01F;" // LETTER TTA -"\u0D20>\uE020;" // LETTER TTHA -"\u0D21>\uE021;" // LETTER DDA -"\u0D22>\uE022;" // LETTER DDHA -"\u0D23>\uE023;" // LETTER NNA -"\u0D24>\uE024;" // LETTER TA -"\u0D25>\uE025;" // LETTER THA -"\u0D26>\uE026;" // LETTER DA -"\u0D27>\uE027;" // LETTER DHA -"\u0D28>\uE028;" // LETTER NA -"\u0D2A>\uE02A;" // LETTER PA -"\u0D2B>\uE02B;" // LETTER PHA -"\u0D2C>\uE02C;" // LETTER BA -"\u0D2D>\uE02D;" // LETTER BHA -"\u0D2E>\uE02E;" // LETTER MA -"\u0D2F>\uE02F;" // LETTER YA -"\u0D30>\uE030;" // LETTER RA -"\u0D31>\uE031;" // LETTER RRA -"\u0D32>\uE032;" // LETTER LA -"\u0D33>\uE033;" // LETTER LLA -"\u0D34>\uE034;" // LETTER LLLA -"\u0D35>\uE035;" // LETTER VA -"\u0D36>\uE036;" // LETTER SHA -"\u0D37>\uE037;" // LETTER SSA -"\u0D38>\uE038;" // LETTER SA -"\u0D39>\uE039;" // LETTER HA -"\u0D3E>\uE03E;" // VOWEL SIGN AA -"\u0D3F>\uE03F;" // VOWEL SIGN I -"\u0D40>\uE040;" // VOWEL SIGN II -"\u0D41>\uE041;" // VOWEL SIGN U -"\u0D42>\uE042;" // VOWEL SIGN UU -"\u0D43>\uE043;" // VOWEL SIGN VOCALIC R -"\u0D46>\uE046;" // VOWEL SIGN E -"\u0D47>\uE047;" // VOWEL SIGN EE -"\u0D48>\uE048;" // VOWEL SIGN AI -"\u0D4D>\uE04D;" // SIGN VIRAMA -"\u0D57>\uE057;" // AU LENGTH MARK -"\u0D60>\uE060;" // LETTER VOCALIC RR -"\u0D61>\uE061;" // LETTER VOCALIC LL -"\u0D66>\uE066;" // DIGIT ZERO -"\u0D67>\uE067;" // DIGIT ONE -"\u0D68>\uE068;" // DIGIT TWO -"\u0D69>\uE069;" // DIGIT THREE -"\u0D6A>\uE06A;" // DIGIT FOUR -"\u0D6B>\uE06B;" // DIGIT FIVE -"\u0D6C>\uE06C;" // DIGIT SIX -"\u0D6D>\uE06D;" // DIGIT SEVEN -"\u0D6E>\uE06E;" // DIGIT EIGHT -"\u0D6F>\uE06F;" // DIGIT NINE -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_Orya_InterIndic.txt b/icu4c/source/data/translit/t_Orya_InterIndic.txt deleted file mode 100644 index 14309ad307f..00000000000 --- a/icu4c/source/data/translit/t_Orya_InterIndic.txt +++ /dev/null @@ -1,111 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Oriya_InterIndic.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Oriya_InterIndic - -t_Orya_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Oriya-InterIndic -//:: NFD (NFC) ; -//\u0B21\u0B3C>\uE05C;# LETTER RRA -//\u0B22\u0B3C>\uE05D;# LETTER RHA -"\u0B47\u0B56>\uE048;"// VOWEL SIGN AI -"\u0B47\u0B3E>\uE04B;"// VOWEL SIGN O -"\u0B47\u0B57>\uE04C;"// VOWEL SIGN AU - -"\u0B01>\uE001;" // SIGN CANDRABINDU -"\u0B02>\uE002;" // SIGN ANUSVARA -"\u0B03>\uE003;" // SIGN VISARGA -"\u0B05>\uE005;" // LETTER A -"\u0B06>\uE006;" // LETTER AA -"\u0B07>\uE007;" // LETTER I -"\u0B08>\uE008;" // LETTER II -"\u0B09>\uE009;" // LETTER U -"\u0B0A>\uE00A;" // LETTER UU -"\u0B0B>\uE00B;" // LETTER VOCALIC R -"\u0B0C>\uE00C;" // LETTER VOCALIC L -"\u0B0F>\uE00F;" // LETTER E -"\u0B10>\uE010;" // LETTER AI -"\u0B13>\uE013;" // LETTER O -"\u0B14>\uE014;" // LETTER AU -"\u0B15>\uE015;" // LETTER KA -"\u0B16>\uE016;" // LETTER KHA -"\u0B17>\uE017;" // LETTER GA -"\u0B18>\uE018;" // LETTER GHA -"\u0B19>\uE019;" // LETTER NGA -"\u0B1A>\uE01A;" // LETTER CA -"\u0B1B>\uE01B;" // LETTER CHA -"\u0B1C>\uE01C;" // LETTER JA -"\u0B1D>\uE01D;" // LETTER JHA -"\u0B1E>\uE01E;" // LETTER NYA -"\u0B1F>\uE01F;" // LETTER TTA -"\u0B20>\uE020;" // LETTER TTHA -"\u0B21>\uE021;" // LETTER DDA -"\u0B22>\uE022;" // LETTER DDHA -"\u0B23>\uE023;" // LETTER NNA -"\u0B24>\uE024;" // LETTER TA -"\u0B25>\uE025;" // LETTER THA -"\u0B26>\uE026;" // LETTER DA -"\u0B27>\uE027;" // LETTER DHA -"\u0B28>\uE028;" // LETTER NA -"\u0B2A>\uE02A;" // LETTER PA -"\u0B2B>\uE02B;" // LETTER PHA -"\u0B2C>\uE02C;" // LETTER BA -"\u0B2D>\uE02D;" // LETTER BHA -"\u0B2E>\uE02E;" // LETTER MA -"\u0B2F>\uE02F;" // LETTER YA -"\u0B30>\uE030;" // LETTER RA -"\u0B32>\uE032;" // LETTER LA -"\u0B33>\uE033;" // LETTER LLA -"\u0B35>\uE035;" // LETTER VA -"\u0B36>\uE036;" // LETTER SHA -"\u0B37>\uE037;" // LETTER SSA -"\u0B38>\uE038;" // LETTER SA -"\u0B39>\uE039;" // LETTER HA -"\u0B3C>\uE03C;" // SIGN NUKTA -"\u0B3D>\uE03D;" // SIGN AVAGRAHA -"\u0B3E>\uE03E;" // VOWEL SIGN AA -"\u0B3F>\uE03F;" // VOWEL SIGN I -"\u0B40>\uE040;" // VOWEL SIGN II -"\u0B41>\uE041;" // VOWEL SIGN U -"\u0B42>\uE042;" // VOWEL SIGN UU -"\u0B43>\uE043;" // VOWEL SIGN VOCALIC R -"\u0B47>\uE047;" // VOWEL SIGN E -// -"\u0B4D>\uE04D;" // SIGN VIRAMA -"\u0B56>\uE056;" // AI LENGTH MARK -"\u0B57>\uE057;" // AU LENGTH MARK -"\u0964>\ue064;" // DANDA -"\u0965>\ue065;" // DOUBLE DANDA -// -"\u0B5F>\uE05F;" // LETTER YYA -"\u0B60>\uE060;" // LETTER VOCALIC RR -"\u0B61>\uE061;" // LETTER VOCALIC LL -"\u0B66>\uE066;" // DIGIT ZERO -"\u0B67>\uE067;" // DIGIT ONE -"\u0B68>\uE068;" // DIGIT TWO -"\u0B69>\uE069;" // DIGIT THREE -"\u0B6A>\uE06A;" // DIGIT FOUR -"\u0B6B>\uE06B;" // DIGIT FIVE -"\u0B6C>\uE06C;" // DIGIT SIX -"\u0B6D>\uE06D;" // DIGIT SEVEN -"\u0B6E>\uE06E;" // DIGIT EIGHT -"\u0B6F>\uE06F;" // DIGIT NINE -"\u0B70>\ue07B;" // ISSHAR -"\u0B71>\ue081;" // LETTER WA -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_Taml_InterIndic.txt b/icu4c/source/data/translit/t_Taml_InterIndic.txt deleted file mode 100644 index 8a71eb16338..00000000000 --- a/icu4c/source/data/translit/t_Taml_InterIndic.txt +++ /dev/null @@ -1,92 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Tamil_InterIndic.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Tamil_InterIndic - -t_Taml_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Tamil-InterIndic -//:: NFD (NFC) ; - -"\u0BC6\u0BBE>\uE04A;"// VOWEL SIGN O -"\u0BC7\u0BBE>\uE04B;"// VOWEL SIGN OO -"\u0BC6\u0BD7>\uE04C;"// VOWEL SIGN AU -"\u0B92\u0BD7>\uE014;"// LETTER AU - -"\u0B82>\uE002;" // SIGN ANUSVARA -"\u0B83>\uE003;" // SIGN VISARGA -"\u0B85>\uE005;" // LETTER A -"\u0B86>\uE006;" // LETTER AA -"\u0B87>\uE007;" // LETTER I -"\u0B88>\uE008;" // LETTER II -"\u0B89>\uE009;" // LETTER U -"\u0B8A>\uE00A;" // LETTER UU -"\u0B8E>\uE00E;" // LETTER E -"\u0B8F>\uE00F;" // LETTER EE -"\u0B90>\uE010;" // LETTER AI -"\u0B92>\uE012;" // LETTER O -"\u0B93>\uE013;" // LETTER OO -"\u0B94>\uE014;" // LETTER AU -"\u0B95>\uE015;" // LETTER KA -"\u0B99>\uE019;" // LETTER NGA -"\u0B9A>\uE01A;" // LETTER CA -"\u0B9C>\uE01C;" // LETTER JA -"\u0B9E>\uE01E;" // LETTER NYA -"\u0B9F>\uE01F;" // LETTER TTA -"\u0BA3>\uE023;" // LETTER NNA -"\u0BA4>\uE024;" // LETTER TA -"\u0BA8>\uE028;" // LETTER NA -"\u0BA9>\uE029;" // LETTER NNNA -"\u0BAA>\uE02A;" // LETTER PA -"\u0BAE>\uE02E;" // LETTER MA -"\u0BAF>\uE02F;" // LETTER YA -"\u0BB0>\uE030;" // LETTER RA -"\u0BB1>\uE031;" // LETTER RRA -"\u0BB2>\uE032;" // LETTER LA -"\u0BB3>\uE033;" // LETTER LLA -"\u0BB4>\uE034;" // LETTER LLLA -"\u0BB5>\uE035;" // LETTER VA -"\u0BB7>\uE037;" // LETTER SSA -"\u0BB8>\uE038;" // LETTER SA -"\u0BB9>\uE039;" // LETTER HA -"\u0BBE>\uE03E;" // VOWEL SIGN AA -"\u0BBF>\uE03F;" // VOWEL SIGN I -"\u0BC0>\uE040;" // VOWEL SIGN II -"\u0BC1>\uE041;" // VOWEL SIGN U -"\u0BC2>\uE042;" // VOWEL SIGN UU -"\u0BC6>\uE046;" // VOWEL SIGN E -"\u0BC7>\uE047;" // VOWEL SIGN EE -"\u0BC8>\uE048;" // VOWEL SIGN AI - -"\u0BCD>\uE04D;" // SIGN VIRAMA -"\u0BD7>\uE057;" // AU LENGTH MARK -"\u0BE7>\uE067;" // DIGIT ONE -"\u0BE8>\uE068;" // DIGIT TWO -"\u0BE9>\uE069;" // DIGIT THREE -"\u0BEA>\uE06A;" // DIGIT FOUR -"\u0BEB>\uE06B;" // DIGIT FIVE -"\u0BEC>\uE06C;" // DIGIT SIX -"\u0BED>\uE06D;" // DIGIT SEVEN -"\u0BEE>\uE06E;" // DIGIT EIGHT -"\u0BEF>\uE06F;" // DIGIT NINE -"\u0BF0>\uE067\uE066;" // UNMAPPED Tamil-InterIndic: NUMBER TEN -"\u0BF1>\uE067\uE066\uE066;" // UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED -"\u0BF2>\uE067\uE066\uE066\uE066;"// UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND -"0>\ue066;" - -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_Telu_InterIndic.txt b/icu4c/source/data/translit/t_Telu_InterIndic.txt deleted file mode 100644 index d1160bb10af..00000000000 --- a/icu4c/source/data/translit/t_Telu_InterIndic.txt +++ /dev/null @@ -1,106 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: dumpICUrules.bat -// Source: ../../../impl/data/Transliterator_Telugu_InterIndic.txt -// Date: Tue May 18 17:24:49 2004 -//-------------------------------------------------------------------- - -// Telugu_InterIndic - -t_Telu_InterIndic { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Telugu-InterIndic -//:: NFD (NFC) ; -"\u0c46\u0c4d\u0c56>\ue048\ue04d;" -"\u0C46\u0C56>\uE048;"// VOWEL SIGN AI -"\u0C01>\uE001;" // SIGN CANDRABINDU -"\u0C02>\uE002;" // SIGN ANUSVARA -"\u0C03>\uE003;" // SIGN VISARGA -"\u0C05>\uE005;" // LETTER A -"\u0C06>\uE006;" // LETTER AA -"\u0C07>\uE007;" // LETTER I -"\u0C08>\uE008;" // LETTER II -"\u0C09>\uE009;" // LETTER U -"\u0C0A>\uE00A;" // LETTER UU -"\u0C0B>\uE00B;" // LETTER VOCALIC R -"\u0C0C>\uE00C;" // LETTER VOCALIC L -"\u0C0E>\uE00E;" // LETTER E -"\u0C0F>\uE00F;" // LETTER EE -"\u0C10>\uE010;" // LETTER AI -"\u0C12>\uE012;" // LETTER O -"\u0C13>\uE013;" // LETTER OO -"\u0C14>\uE014;" // LETTER AU -"\u0C15>\uE015;" // LETTER KA -"\u0C16>\uE016;" // LETTER KHA -"\u0C17>\uE017;" // LETTER GA -"\u0C18>\uE018;" // LETTER GHA -"\u0C19>\uE019;" // LETTER NGA -"\u0C1A>\uE01A;" // LETTER CA -"\u0C1B>\uE01B;" // LETTER CHA -"\u0C1C>\uE01C;" // LETTER JA -"\u0C1D>\uE01D;" // LETTER JHA -"\u0C1E>\uE01E;" // LETTER NYA -"\u0C1F>\uE01F;" // LETTER TTA -"\u0C20>\uE020;" // LETTER TTHA -"\u0C21>\uE021;" // LETTER DDA -"\u0C22>\uE022;" // LETTER DDHA -"\u0C23>\uE023;" // LETTER NNA -"\u0C24>\uE024;" // LETTER TA -"\u0C25>\uE025;" // LETTER THA -"\u0C26>\uE026;" // LETTER DA -"\u0C27>\uE027;" // LETTER DHA -"\u0C28>\uE028;" // LETTER NA -"\u0C2A>\uE02A;" // LETTER PA -"\u0C2B>\uE02B;" // LETTER PHA -"\u0C2C>\uE02C;" // LETTER BA -"\u0C2D>\uE02D;" // LETTER BHA -"\u0C2E>\uE02E;" // LETTER MA -"\u0C2F>\uE02F;" // LETTER YA -"\u0C30>\uE030;" // LETTER RA -"\u0C31>\uE031;" // LETTER RRA -"\u0C32>\uE032;" // LETTER LA -"\u0C33>\uE033;" // LETTER LLA -"\u0C35>\uE035;" // LETTER VA -"\u0C36>\uE036;" // LETTER SHA -"\u0C37>\uE037;" // LETTER SSA -"\u0C38>\uE038;" // LETTER SA -"\u0C39>\uE039;" // LETTER HA -"\u0C3E>\uE03E;" // VOWEL SIGN AA -"\u0C3F>\uE03F;" // VOWEL SIGN I -"\u0C40>\uE040;" // VOWEL SIGN II -"\u0C41>\uE041;" // VOWEL SIGN U -"\u0C42>\uE042;" // VOWEL SIGN UU -"\u0C43>\uE043;" // VOWEL SIGN VOCALIC R -"\u0C44>\uE044;" // VOWEL SIGN VOCALIC RR -"\u0C46>\uE046;" // VOWEL SIGN E -"\u0C47>\uE047;" // VOWEL SIGN EE -"\u0C4A>\uE04A;" // VOWEL SIGN O -"\u0C4B>\uE04B;" // VOWEL SIGN OO -"\u0C4C>\uE04C;" // VOWEL SIGN AU -"\u0C4D>\uE04D;" // SIGN VIRAMA -"\u0C55>\uE055;" // LENGTH MARK -"\u0C56>\uE056;" // AI LENGTH MARK -"\u0C60>\uE060;" // LETTER VOCALIC RR -"\u0C61>\uE061;" // LETTER VOCALIC LL -"\u0C66>\uE066;" // DIGIT ZERO -"\u0C67>\uE067;" // DIGIT ONE -"\u0C68>\uE068;" // DIGIT TWO -"\u0C69>\uE069;" // DIGIT THREE -"\u0C6A>\uE06A;" // DIGIT FOUR -"\u0C6B>\uE06B;" // DIGIT FIVE -"\u0C6C>\uE06C;" // DIGIT SIX -"\u0C6D>\uE06D;" // DIGIT SEVEN -"\u0C6E>\uE06E;" // DIGIT EIGHT -"\u0C6F>\uE06F;" // DIGIT NINE -// :: NFC (NFD) ; -// eof - } -} diff --git a/icu4c/source/data/translit/t_Tone_Digit.txt b/icu4c/source/data/translit/t_Tone_Digit.txt deleted file mode 100644 index 1edf4d563c6..00000000000 --- a/icu4c/source/data/translit/t_Tone_Digit.txt +++ /dev/null @@ -1,26 +0,0 @@ - // -*- Coding: utf-8; -*- -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat -// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Tone_Digit.txt -// Date: Fri May 28 17:07:31 2004 -//-------------------------------------------------------------------- - -// Tone_Digit - -t_Tone_Digit { - Rule { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- - -// Only intended for internal use -"\u0304 <> 1;" -"\u0301 <> 2;" -"\u030C <> 3;" -"\u0300 <> 4;" - "< 5;" - } -} diff --git a/icu4c/source/data/translit/translit_index.txt b/icu4c/source/data/translit/translit_index.txt deleted file mode 100644 index 416f53581f0..00000000000 --- a/icu4c/source/data/translit/translit_index.txt +++ /dev/null @@ -1,275 +0,0 @@ -//-------------------------------------------------------------------- -// Copyright (c) 1999-2004, International Business Machines -// Corporation and others. All Rights Reserved. -//-------------------------------------------------------------------- -// THIS IS A MACHINE-GENERATED FILE -// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat -// Source: Transliterator_index.txt -// Date: Fri May 28 17:07:30 2004 -//-------------------------------------------------------------------- - -//-------------------------------------------------------------------- -// N.B.: This file has been generated mechanically from the -// corresponding ICU4J file, which is the master file that receives -// primary updates. The colon-delimited fields have been split into -// separate strings. For 'file' and 'internal' lines, the encoding -// field has been deleted, since the encoding is processed at build -// time in ICU4C. Certain large rule sets not intended for general -// use have been commented out with the notation "Java only". -//-------------------------------------------------------------------- - -translit_index { - RuleBasedTransliteratorIDs { -//-------------------------------------------------------------------- -//-------------------------------------------------------------------- -// -// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic -// system transliterators. It allows arbitrary mappings between -// transliterator IDs and file names, and also allows the system to -// define aliases for transliterators, so that "Latin-Hangul", for -// example, can be implemented transparently as the compound -// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these -// are invisible to the user, but can be composed together by the -// system to create visible transliterators. -// -// Blank lines and lines beginning with '#' are ignored. -// -// Lines in this file have one of the following forms (text not -// enclosed by <> is literal): -// -// :file::: -// :internal::: -// :alias: -// -// is the ID of the system transliterator being defined. These -// are public IDs enumerated by Transliterator.getAvailableIDs(), -// unless the second field is "internal". -// -// is a ResourceReader resource name. Currently these refer -// to file names under com/ibm/text/resources. This string is passed -// directly to ResourceReader, together with . -// -// is the character encoding to use when reading ; -// passed directly to ResourceReader. E.g., "UTF8". -// -// is either "FORWARD" or "REVERSE". -// -// is a string to be passed directly to -// Transliterator.getInstance(). The returned Transliterator object -// then has its ID changed to and is returned. - - -// Bidirectional rule files - -{ "Fullwidth-Halfwidth", "file", "t_FWidth_HWidth", "FORWARD" }, -{ "Halfwidth-Fullwidth", "file", "t_FWidth_HWidth", "REVERSE" }, - -{ "Latin-Cyrillic", "file", "t_Cyrl_Latn", "REVERSE" }, -{ "Cyrillic-Latin", "file", "t_Cyrl_Latn", "FORWARD" }, - -{ "Latin-Hebrew", "file", "t_Hebr_Latn", "REVERSE" }, -{ "Hebrew-Latin", "file", "t_Hebr_Latn", "FORWARD" }, - -{ "Latin-Arabic", "file", "t_Arab_Latn", "REVERSE" }, -{ "Arabic-Latin", "file", "t_Arab_Latn", "FORWARD" }, - -{ "Tone-Digit", "internal", "t_Tone_Digit", "FORWARD" }, -{ "Digit-Tone", "internal", "t_Tone_Digit", "REVERSE" }, - -{ "Latin-NumericPinyin", "file", "t_Latn_NPinyn", "FORWARD" }, -{ "NumericPinyin-Latin", "file", "t_Latn_NPinyn", "REVERSE" }, - -{ "Han-Spacedhan", "internal", "t_Hani_SpHan", "FORWARD" }, -{ "Spacedhan-Han", "alias", "null", "" }, - -{ "Han-Latin", "file", "t_Hani_Latn", "FORWARD" }, -//Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip! -{ "Latin-Han", "alias", "null", "" }, - -// Comment these out; they are only for testing -// Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE -// Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD - -//Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE -//Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD - -{ "Latin-Greek", "file", "t_Grek_Latn", "REVERSE" }, -{ "Greek-Latin", "file", "t_Grek_Latn", "FORWARD" }, - -{ "Latin-Greek/UNGEGN", "file", "t_Grek_Latn_UNGEGN", "REVERSE" }, -{ "Greek-Latin/UNGEGN", "file", "t_Grek_Latn_UNGEGN", "FORWARD" }, - -{ "Latin-Katakana", "file", "t_Latn_Kana", "FORWARD" }, -{ "Katakana-Latin", "file", "t_Latn_Kana", "REVERSE" }, - -{ "Latin-Hiragana", "file", "t_Hira_Latn", "REVERSE" }, -{ "Hiragana-Latin", "file", "t_Hira_Latn", "FORWARD" }, - -//Thai Stuff: will change if we get \b into Transliterator - -// Java only: { "Thai-ThaiSemi", "internal", "-", "FORWARD" }, - -// Java only: { "Thai-ThaiLogical", "internal", "-", "FORWARD" }, -// Java only: { "ThaiLogical-Thai", "internal", "-", "REVERSE" }, - -// Java only: { "ThaiLogical-Latin", "internal", "-", "FORWARD" }, -// Java only: { "Latin-ThaiLogical", "internal", "-", "REVERSE" }, - -// Must use the order below! -// We need two separate passes because of the Thai vowel reversal -// Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces - -{ "Thai-Latin", "alias", "[[", "thai", "] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC", "" }, -{ "Latin-Thai", "alias", "[[", "Latin", "][", "Mn", "][", "Me", "] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC", "" }, - -// end of Thai Stuff - -{ "Hiragana-Katakana", "file", "t_Hira_Kana", "FORWARD" }, -{ "Katakana-Hiragana", "file", "t_Hira_Kana", "REVERSE" }, - -{ "Any-Accents", "file", "t_Any_Accents", "FORWARD" }, -{ "Accents-Any", "file", "t_Any_Accents", "REVERSE" }, - -{ "Any-Publishing", "file", "t_Any_Publishing", "FORWARD" }, -{ "Publishing-Any", "file", "t_Any_Publishing", "REVERSE" }, - -// Korean -// N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For -// Hangul output use Latin-Hangul. - -{ "LowerLatin-Jamo", "internal", "t_Latn_Jamo", "FORWARD" }, -{ "Jamo-LowerLatin", "internal", "t_Latn_Jamo", "REVERSE" }, -{ "Latin-Jamo", "alias", "['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo", "" }, -{ "Jamo-Latin", "alias", "['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC", "" }, -{ "Latin-Hangul", "alias", "['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC", "" }, -{ "Hangul-Latin", "alias", "['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC", "" }, - -// Inter-Indic composed rules -{ "Latin-InterIndic", "internal", "t_Latn_InterIndic", "FORWARD" }, -{ "Devanagari-InterIndic", "internal", "t_Deva_InterIndic", "FORWARD" }, -{ "Bengali-InterIndic", "internal", "t_Beng_InterIndic", "FORWARD" }, -{ "Gurmukhi-InterIndic", "internal", "t_Guru_InterIndic", "FORWARD" }, -{ "Gujarati-InterIndic", "internal", "t_Gujr_InterIndic", "FORWARD" }, -{ "Oriya-InterIndic", "internal", "t_Orya_InterIndic", "FORWARD" }, -{ "Tamil-InterIndic", "internal", "t_Taml_InterIndic", "FORWARD" }, -{ "Telugu-InterIndic", "internal", "t_Telu_InterIndic", "FORWARD" }, -{ "Kannada-InterIndic", "internal", "t_Knda_InterIndic", "FORWARD" }, -{ "Malayalam-InterIndic", "internal", "t_Mlym_InterIndic", "FORWARD" }, - -{ "InterIndic-Latin", "internal", "t_InterIndic_Latn", "FORWARD" }, -{ "InterIndic-Devanagari", "internal", "t_InterIndic_Deva", "FORWARD" }, -{ "InterIndic-Bengali", "internal", "t_InterIndic_Beng", "FORWARD" }, -{ "InterIndic-Gurmukhi", "internal", "t_InterIndic_Guru", "FORWARD" }, -{ "InterIndic-Gujarati", "internal", "t_InterIndic_Gujr", "FORWARD" }, -{ "InterIndic-Oriya", "internal", "t_InterIndic_Orya", "FORWARD" }, -{ "InterIndic-Tamil", "internal", "t_InterIndic_Taml", "FORWARD" }, -{ "InterIndic-Telugu", "internal", "t_InterIndic_Telu", "FORWARD" }, -{ "InterIndic-Kannada", "internal", "t_InterIndic_Knda", "FORWARD" }, -{ "InterIndic-Malayalam", "internal", "t_InterIndic_Mlym", "FORWARD" }, - -//Latin-Indic transliterators -{ "Latin-Devanagari", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Latin-Bengali", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Latin-Gurmukhi", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Latin-Gujarati", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Latin-Oriya", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Latin-Tamil", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Latin-Telugu", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Latin-Kannada", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Latin-Malayalam", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC", "" }, - -//Indic-Latin transliterators -{ "Devanagari-Latin", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Bengali-Latin", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Gurmukhi-Latin", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Gujarati-Latin", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Oriya-Latin", "alias", "[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Tamil-Latin", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Telugu-Latin", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Kannada-Latin", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC", "" }, -{ "Malayalam-Latin", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC", "" }, - -{ "Devanagari-Bengali", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Devanagari-Gurmukhi", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Devanagari-Gujarati", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Devanagari-Oriya", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Devanagari-Tamil", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Devanagari-Telugu", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Devanagari-Kannada", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Devanagari-Malayalam", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Bengali-Devanagari", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Bengali-Gurmukhi", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Bengali-Gujarati", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Bengali-Oriya", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Bengali-Tamil", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Bengali-Telugu", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Bengali-Kannada", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Bengali-Malayalam", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Gurmukhi-Devanagari", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Gurmukhi-Bengali", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Gurmukhi-Gujarati", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Gurmukhi-Oriya", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Gurmukhi-Tamil", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Gurmukhi-Telugu", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Gurmukhi-Kannada", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Gurmukhi-Malayalam", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Gujarati-Devanagari", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Gujarati-Bengali", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Gujarati-Gurmukhi", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Gujarati-Oriya", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Gujarati-Tamil", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Gujarati-Telugu", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Gujarati-Kannada", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Gujarati-Malayalam", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Oriya-Devanagari", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Oriya-Bengali", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Oriya-Gurmukhi", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Oriya-Gujarati", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Oriya-Tamil", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Oriya-Telugu", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Oriya-Kannada", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Oriya-Malayalam", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Tamil-Devanagari", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Tamil-Bengali", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Tamil-Gurmukhi", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Tamil-Gujarati", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Tamil-Oriya", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Tamil-Telugu", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Tamil-Kannada", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Tamil-Malayalam", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Telugu-Devanagari", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Telugu-Bengali", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Telugu-Gurmukhi", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Telugu-Gujarati", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Telugu-Oriya", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Telugu-Tamil", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Telugu-Kannada", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC", "" }, -{ "Telugu-Malayalam", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Kannada-Devanagari", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Kannada-Bengali", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Kannada-Gurmukhi", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Kannada-Gujarati", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Kannada-Oriya", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Kannada-Tamil", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Kannada-Telugu", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Kannada-Malayalam", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC", "" }, - -{ "Malayalam-Devanagari", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC", "" }, -{ "Malayalam-Bengali", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC", "" }, -{ "Malayalam-Gurmukhi", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC", "" }, -{ "Malayalam-Gujarati", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC", "" }, -{ "Malayalam-Oriya", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC", "" }, -{ "Malayalam-Tamil", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC", "" }, -{ "Malayalam-Telugu", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC", "" }, -{ "Malayalam-Kannada", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC", "" }, - -// eof - } -} diff --git a/icu4c/source/data/translit/trnsfiles.mk b/icu4c/source/data/translit/trnsfiles.mk index 8b4a868b2fa..19ceeffac12 100644 --- a/icu4c/source/data/translit/trnsfiles.mk +++ b/icu4c/source/data/translit/trnsfiles.mk @@ -19,44 +19,8 @@ # * To REPLACE the default list and only build with a few # transliterators: # _____________________________________________________ -# | TRANLIST_SOURCE = translit_index.txt translit_Any_Publishing.txt +# | TRANLIST_SOURCE = el.txt th.txt # # -TRANSLIT_SOURCE=t_Any_Accents.txt\ -t_Any_Publishing.txt\ -t_Arab_Latn.txt\ -t_Beng_InterIndic.txt\ -t_Cyrl_Latn.txt\ -t_Deva_InterIndic.txt\ -t_FWidth_HWidth.txt\ -t_Grek_Latn.txt\ -t_Grek_Latn_UNGEGN.txt\ -t_Gujr_InterIndic.txt\ -t_Guru_InterIndic.txt\ -t_Hani_Latn.txt\ -t_Hebr_Latn.txt\ -t_Hira_Kana.txt\ -t_Hira_Latn.txt\ -t_InterIndic_Beng.txt\ -t_InterIndic_Deva.txt\ -t_InterIndic_Gujr.txt\ -t_InterIndic_Guru.txt\ -t_InterIndic_Knda.txt\ -t_InterIndic_Latn.txt\ -t_InterIndic_Mlym.txt\ -t_InterIndic_Orya.txt\ -t_InterIndic_Taml.txt\ -t_InterIndic_Telu.txt\ -t_Knda_InterIndic.txt\ -t_Latn_InterIndic.txt\ -t_Latn_Jamo.txt\ -t_Latn_Kana.txt\ -t_Mlym_InterIndic.txt\ -t_Orya_InterIndic.txt\ -t_Taml_InterIndic.txt\ -t_Telu_InterIndic.txt\ -t_Latn_NPinyn.txt\ -t_Tone_Digit.txt\ -t_Hani_SpHan.txt\ -translit_index.txt +TRANSLIT_SOURCE=el.txt en.txt diff --git a/icu4c/source/i18n/rbt.h b/icu4c/source/i18n/rbt.h index 989433b8628..355d9943899 100644 --- a/icu4c/source/i18n/rbt.h +++ b/icu4c/source/i18n/rbt.h @@ -17,6 +17,9 @@ #include "unicode/translit.h" #include "unicode/utypes.h" #include "unicode/parseerr.h" +#include "unicode/udata.h" + +#define U_ICUDATA_TRANSLIT U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "translit" U_NAMESPACE_BEGIN diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index b48d37cf776..06ea5748c78 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -725,7 +725,7 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& id, UnicodeString& result) { UErrorCode status = U_ZERO_ERROR; - ResourceBundle bundle(u_getDataDirectory(), inLocale, status); + ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status); // Suspend checking status until later... @@ -1411,13 +1411,23 @@ UBool Transliterator::initializeRegistry() { } /* The following code parses the index table located in - * icu/data/translit_index.txt. The index is an n x 4 table + * icu/data/translit/root.txt. The index is an n x 4 table * that follows this format: - * - * :file:: - * :internal:: - * :alias:: - * + * { + * file{ + * resource{""} + * direction{""} + * } + * } + * { + * internal{ + * resource{""} + * direction{"{ + * alias{" is the ID of the system transliterator being defined. These * are public IDs enumerated by Transliterator.getAvailableIDs(), * unless the second field is "internal". @@ -1434,10 +1444,10 @@ UBool Transliterator::initializeRegistry() { * * The extra blank field on "alias" lines is to make the array square. */ - static const char translit_index[] = "translit_index"; + //static const char translit_index[] = "translit_index"; UResourceBundle *bundle, *transIDs, *colBund; - bundle = ures_openDirect(0, translit_index, &status); + bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open root bundle*/, &status); transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); int32_t row, maxRows; @@ -1445,11 +1455,11 @@ UBool Transliterator::initializeRegistry() { maxRows = ures_getSize(transIDs); for (row = 0; row < maxRows; row++) { colBund = ures_getByIndex(transIDs, row, 0, &status); - - if (U_SUCCESS(status) && ures_getSize(colBund) == 4) { - UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status); - UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0); - UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status); + if (U_SUCCESS(status)) { + UnicodeString id = ures_getKey(colBund); + UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); + const char* typeStr = ures_getKey(res); + UChar type = (UChar)*typeStr; if (U_SUCCESS(status)) { switch (type) { @@ -1458,9 +1468,11 @@ UBool Transliterator::initializeRegistry() { // 'file' or 'internal'; // row[2]=resource, row[3]=direction { + + UnicodeString resString = ures_getUnicodeStringByKey(res, "resource", &status); UBool visible = (type == 0x0066 /*f*/); UTransDirection dir = - (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) == + (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == 0x0046 /*F*/) ? UTRANS_FORWARD : UTRANS_REVERSE; registry->put(id, resString, dir, visible); @@ -1468,12 +1480,13 @@ UBool Transliterator::initializeRegistry() { break; case 0x61: // 'a' // 'alias'; row[2]=createInstance argument + UnicodeString resString = ures_getUnicodeString(res, &status); registry->put(id, resString, TRUE); break; } } + ures_close(res); } - ures_close(colBund); } } diff --git a/icu4c/source/i18n/transreg.cpp b/icu4c/source/i18n/transreg.cpp index 839dd11eb6f..11a378ba46c 100644 --- a/icu4c/source/i18n/transreg.cpp +++ b/icu4c/source/i18n/transreg.cpp @@ -202,7 +202,7 @@ Spec::Spec(const UnicodeString& theSpec) : top(theSpec) { UErrorCode status = U_ZERO_ERROR; CharString topch(top); Locale toploc(topch); - res = new ResourceBundle(u_getDataDirectory(), toploc, status); + res = new ResourceBundle(U_ICUDATA_TRANSLIT, toploc, status); /* test for NULL */ if (res == 0) { return; @@ -1208,10 +1208,10 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID // 2-d array at static init time, as a locale language. We're // just using the locale mechanism to map through to a file // name; this in no way represents an actual locale. - CharString ch(entry->stringArg); - UResourceBundle *bundle = ures_openDirect(0, ch, &status); - UnicodeString rules = ures_getUnicodeStringByKey(bundle, RB_RULE, &status); - ures_close(bundle); + //CharString ch(entry->stringArg); + //UResourceBundle *bundle = ures_openDirect(0, ch, &status); + UnicodeString rules = entry->stringArg; + //ures_close(bundle); if (U_FAILURE(status)) { // We have a failure of some kind. Remove the ID from the diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp index 222dd0b18f1..8e7ee5c85ad 100644 --- a/icu4c/source/test/intltest/intltest.cpp +++ b/icu4c/source/test/intltest/intltest.cpp @@ -31,6 +31,7 @@ #include "cstring.h" #include "umutex.h" #include "uassert.h" +#include "cmemory.h" #ifdef XP_MAC_CONSOLE #include @@ -1491,6 +1492,16 @@ UBool IntlTest::assertEquals(const UnicodeString& message, const char* actual) { return assertEquals(extractToAssertBuf(message), expected, actual); } +//-------------------------------------------------------------------- +// Time bomb - allows temporary behavior that expires at a given +// release +//-------------------------------------------------------------------- + +UBool IntlTest::isICUVersionAtLeast(const UVersionInfo x) { + UVersionInfo v; + u_getVersion(v); + return (uprv_memcmp(v, x, U_MAX_VERSION_LENGTH) >= 0); +} #if !UCONFIG_NO_FORMATTING UBool IntlTest::assertEquals(const UnicodeString& message, diff --git a/icu4c/source/test/intltest/intltest.h b/icu4c/source/test/intltest/intltest.h index 4a12879d03d..b1bbe8239dc 100644 --- a/icu4c/source/test/intltest/intltest.h +++ b/icu4c/source/test/intltest/intltest.h @@ -131,6 +131,12 @@ public: */ static float random(); + /** + * Ascertain the version of ICU. Useful for + * time bomb testing + */ + UBool isICUVersionAtLeast(const UVersionInfo x); + protected: /* JUnit-like assertions. Each returns TRUE if it succeeds. */ UBool assertTrue(const char* message, UBool condition, UBool quiet=FALSE); diff --git a/icu4c/source/test/intltest/intltest.vcproj b/icu4c/source/test/intltest/intltest.vcproj index 7214089d051..15c4c714b35 100644 --- a/icu4c/source/test/intltest/intltest.vcproj +++ b/icu4c/source/test/intltest/intltest.vcproj @@ -345,16 +345,6 @@ RelativePath=".\tsputil.h"> - - - - - - @@ -816,6 +806,16 @@ RelativePath=".\trnserr.h"> + + + + + + diff --git a/icu4c/source/test/intltest/transapi.cpp b/icu4c/source/test/intltest/transapi.cpp index 5bd632f4935..79495d163c8 100644 --- a/icu4c/source/test/intltest/transapi.cpp +++ b/icu4c/source/test/intltest/transapi.cpp @@ -26,6 +26,9 @@ #include "unicode/rep.h" #include "unicode/locid.h" #include "unicode/uniset.h" + +static const UVersionInfo ICU_31 = {3,1,0,0}; + int32_t getInt(UnicodeString str) { char buffer[20]; @@ -84,6 +87,9 @@ void TransliteratorAPITest::TestgetID() { for (i=0; i-1 && isICUVersionAtLeast(ICU_31)){ + continue; + } t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status); if(t == 0){ errln("FAIL: " + ID); diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 42d71f0cc50..435eb0a4eec 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -37,6 +37,7 @@ #include "unesctrn.h" #include "uni2name.h" #include "cstring.h" +#include "cmemory.h" #include /*********************************************************************** @@ -186,6 +187,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, } } +static const UVersionInfo ICU_31 = {3,1,0,0}; /** * Make sure every system transliterator can be instantiated. * @@ -220,6 +222,9 @@ void TransliteratorTest::TestInstantiation() { i + ") != getAvailableIDs().snext()"); continue; } + if(id2.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){ + continue; + } UParseError parseError; UErrorCode status = U_ZERO_ERROR; Transliterator* t = Transliterator::createInstance(id, @@ -3472,7 +3477,10 @@ void TransliteratorTest::TestIncrementalProgress(void) { Transliterator::getAvailableVariant(k, source, target, variant); UnicodeString id = source + "-" + target + "/" + variant; - + + if(id.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){ + continue; + } Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status); if (U_FAILURE(status)) { errln((UnicodeString)"FAIL: Could not create " + id);