mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-3925 separate Transliterator data to its own tree
X-SVN-Rev: 16095
This commit is contained in:
parent
bf8a7e1793
commit
123132b8e6
89 changed files with 8683 additions and 8503 deletions
|
@ -73,7 +73,7 @@ ICUCOL=coll
|
|||
#
|
||||
ICURBNF=rbnf
|
||||
|
||||
# ICUTRANSLIT
|
||||
# ICUTRNS
|
||||
# The directory that contains trfiles.mk files along with *.txt transliterator files
|
||||
#
|
||||
ICUTRNS=translit
|
||||
|
@ -248,7 +248,10 @@ TRANLIT_SOURCE=$(TRANSLIT_SOURCE) $(TRANSLIT_SOURCE_LOCAL)
|
|||
!MESSAGE Warning: cannot find "trnsfiles.mk"
|
||||
!ENDIF
|
||||
|
||||
TRANSLIT_FILES = $(TRANSLIT_SOURCE:.txt=.res)
|
||||
TRANSLIT_FILES = $(ICUTRNS)\root.txt $(TRANSLIT_ALIAS_SOURCE) $(TRANSLIT_SOURCE)
|
||||
TRANSLIT_RES_FILES = $(TRANSLIT_FILES:.txt =.res translit\)
|
||||
TRANSLIT_RES_FILES = $(TRANSLIT_RES_FILES:.txt=.res)
|
||||
TRANSLIT_RES_FILES = $(TRANSLIT_RES_FILES:translit\ =translit\)
|
||||
|
||||
# Read list of miscellaneous resource bundle files
|
||||
!IF EXISTS("$(ICUSRCDATA)\$(ICUMISC2)\miscfiles.mk")
|
||||
|
@ -268,6 +271,7 @@ MISC_FILES = $(MISC_SOURCE:.txt=.res)
|
|||
INDEX_RES_FILES = res_index.res
|
||||
INDEX_COL_FILES = $(ICUCOL)\res_index.res
|
||||
INDEX_RBNF_FILES = $(ICURBNF)\res_index.res
|
||||
#INDEX_TRANSLIT_FILES = $(ICUTRNS)\res_index.res
|
||||
|
||||
#
|
||||
# Break iterator data files.
|
||||
|
@ -276,7 +280,7 @@ BRK_SOURCE_FILES = sent.txt char.txt line.txt word.txt title.txt line_th.txt wor
|
|||
BRK_FILES=$(BRK_SOURCE_FILES:.txt=.brk)
|
||||
|
||||
# don't include COL_FILES
|
||||
ALL_RES = $(INDEX_RES_FILES) $(RB_FILES) $(TRANSLIT_FILES) $(MISC_FILES)
|
||||
ALL_RES = $(INDEX_RES_FILES) $(RB_FILES) $(MISC_FILES)
|
||||
|
||||
#############################################################################
|
||||
#
|
||||
|
@ -293,7 +297,7 @@ ALL : GODATA "$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" "$(TESTDATAOUT)\testdata.dat"
|
|||
#
|
||||
# testdata - nmake will invoke pkgdata, which will create testdata.dat
|
||||
#
|
||||
"$(TESTDATAOUT)\testdata.dat": "$(ICUBLD)\ucadata.icu" $(TRANSLIT_FILES) $(MISC_FILES) $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe
|
||||
"$(TESTDATAOUT)\testdata.dat": "$(ICUBLD)\ucadata.icu" $(TRANSLIT_RES_FILES) $(MISC_FILES) $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe
|
||||
@cd "$(TESTDATA)"
|
||||
@echo building testdata...
|
||||
nmake /nologo /f "$(TESTDATA)\testdata.mak" TESTDATA=. ICUTOOLS="$(ICUTOOLS)" ICUP="$(ICUP)" CFG=$(CFG) TESTDATAOUT="$(TESTDATAOUT)" ICUDATA="$(ICUDATA)" TESTDATABLD="$(TESTDATABLD)"
|
||||
|
@ -303,7 +307,7 @@ ALL : GODATA "$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" "$(TESTDATAOUT)\testdata.dat"
|
|||
# move the .dll and .lib files to their final destination afterwards.
|
||||
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
|
||||
#
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
|
||||
@echo Building icu data
|
||||
cd "$(ICUBLD)"
|
||||
@"$(ICUP)\bin\pkgdata" -f -e $(U_ICUDATA_NAME) -v $(ICU_PACKAGE_MODE) -c -p $(ICUPKG) -T "$(ICUTMP)" -L $(U_ICUDATA_NAME) -d "$(ICUBLD)" -s . <<"$(ICUTMP)\pkgdatain.txt"
|
||||
|
@ -325,6 +329,8 @@ $(ICUCOL)\res_index.res
|
|||
$(RBNF_RES_FILES:.res =.res
|
||||
)
|
||||
$(ICURBNF)\res_index.res
|
||||
$(TRANSLIT_RES_FILES:.res =.res
|
||||
)
|
||||
$(BRK_FILES:.brk =.brk
|
||||
)
|
||||
<<KEEP
|
||||
|
@ -342,6 +348,7 @@ GODATA :
|
|||
@if not exist "$(ICUBLD)\$(NULL)" mkdir "$(ICUBLD)"
|
||||
@if not exist "$(ICUBLD)\$(ICUCOL)\$(NULL)" mkdir "$(ICUBLD)\$(ICUCOL)"
|
||||
@if not exist "$(ICUBLD)\$(ICURBNF)\$(NULL)" mkdir "$(ICUBLD)\$(ICURBNF)"
|
||||
@if not exist "$(ICUBLD)\$(ICUTRNS)\$(NULL)" mkdir "$(ICUBLD)\$(ICUTRNS)"
|
||||
@if not exist "$(TESTDATAOUT)\$(NULL)" mkdir "$(TESTDATAOUT)"
|
||||
@if not exist "$(TESTDATABLD)\$(NULL)" mkdir "$(TESTDATABLD)"
|
||||
@cd "$(ICUBLD)"
|
||||
|
@ -364,6 +371,9 @@ CLEAN : GODATA
|
|||
@cd "$(ICUBLD)\$(ICURBNF)"
|
||||
-@erase "*.res"
|
||||
-@erase "*.txt"
|
||||
@cd "$(ICUBLD)\$(ICUTRNS)"
|
||||
-@erase "*.res"
|
||||
-@erase "*.txt"
|
||||
@cd "$(ICUOUT)"
|
||||
-@erase "*.dat"
|
||||
@cd "$(ICUTMP)"
|
||||
|
@ -393,11 +403,6 @@ CLEAN : GODATA
|
|||
@echo Generating converters
|
||||
@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -c -d"$(ICUBLD)" $<
|
||||
|
||||
# Batch inference rule for creating transliterator resource files
|
||||
{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUTRNS)}.txt.res::
|
||||
@echo Making Transliterator Resource Bundle files
|
||||
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" $<
|
||||
|
||||
# Batch inference rule for creating miscellaneous resource files
|
||||
# TODO: -q option is specified to squelch the 120+ warnings about
|
||||
# empty intvectors and binary elements. Unfortunately, this may
|
||||
|
@ -412,6 +417,18 @@ CLEAN : GODATA
|
|||
@echo Making Locale Resource Bundle files
|
||||
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" $<
|
||||
|
||||
$(INDEX_RES_FILES):
|
||||
@echo Generating <<res_index.txt
|
||||
// Warning this file is automatically generated
|
||||
res_index {
|
||||
InstalledLocales {
|
||||
$(GENRB_SOURCE:.txt= {""}
|
||||
)
|
||||
}
|
||||
}
|
||||
<<KEEP
|
||||
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" .\res_index.txt
|
||||
|
||||
{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUCOL)}.txt{$(ICUCOL)}.res::
|
||||
@echo Making Collation files
|
||||
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -i "$(ICUBLD)" -d"$(ICUBLD)\$(ICUCOL)" $<
|
||||
|
@ -444,18 +461,10 @@ res_index {
|
|||
<<KEEP
|
||||
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)\$(ICURBNF)" .\$(ICURBNF)\res_index.txt
|
||||
|
||||
{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUTRNS)}.txt{$(ICUTRNS)}.res::
|
||||
@echo Making Transliterator files
|
||||
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -i "$(ICUBLD)" -d"$(ICUBLD)\$(ICUTRNS)" $<
|
||||
|
||||
$(INDEX_RES_FILES):
|
||||
@echo Generating <<res_index.txt
|
||||
// Warning this file is automatically generated
|
||||
res_index {
|
||||
InstalledLocales {
|
||||
$(GENRB_SOURCE:.txt= {""}
|
||||
)
|
||||
}
|
||||
}
|
||||
<<KEEP
|
||||
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" .\res_index.txt
|
||||
|
||||
# DLL version information
|
||||
# If you modify this, modify winmode.c in pkgdata.
|
||||
|
@ -477,7 +486,7 @@ res_index {
|
|||
# Targets for uprops.icu
|
||||
"$(ICUBLD)\uprops.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\genprops\$(CFG)\genprops.exe" "$(ICUBLD)\pnames.icu"
|
||||
@echo Creating data file for Unicode Character Properties
|
||||
@"$(ICUTOOLS)\genprops\$(CFG)\genprops" -u $(UNICODE_VERSION) -i "$(ICUBLD)" -d "$(ICUBLD)" -s "$(ICUUNIDATA)"
|
||||
@"$(ICUTOOLS)\genprops\$(CFG)\genprops" -u $(UNICODE_VERSION) -i "$(ICUBLD)" -s "$(ICUUNIDATA)" -d "$(ICUBLD)"
|
||||
|
||||
# Targets for unorm.icu
|
||||
"$(ICUBLD)\unorm.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe"
|
||||
|
@ -502,6 +511,6 @@ res_index {
|
|||
|
||||
$(UCM_SOURCE) : {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe
|
||||
|
||||
$(TRANSLIT_SOURCE) $(MISC_SOURCE) $(RB_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unorm.icu"
|
||||
$(MISC_SOURCE) $(RB_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unorm.icu"
|
||||
|
||||
$(BRK_SOURCE_FILES) : "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu"
|
||||
|
|
290
icu4c/source/data/translit/Any_Accents.txt
Normal file
290
icu4c/source/data/translit/Any_Accents.txt
Normal file
|
@ -0,0 +1,290 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
:: NFD (NFC) ;
|
||||
|
||||
# to do: make reversible
|
||||
|
||||
# define special conversion characters.
|
||||
# varients of this could use different characters, or set one or the other to null.
|
||||
|
||||
$pre = \< ;
|
||||
$post = \> ;
|
||||
|
||||
# Provide keyboard equivalents for common diacritics used in transliteration
|
||||
|
||||
$pre \` $post <> \u0300 ; # COMBINING GRAVE ACCENT
|
||||
$pre \' $post <> \u0301 ; # COMBINING ACUTE ACCENT
|
||||
$pre \^ $post <> \u0302 ; # COMBINING CIRCUMFLEX ACCENT
|
||||
$pre \~ $post <> \u0303 ; # COMBINING TILDE
|
||||
$pre \- $post <> \u0304 ; # COMBINING MACRON
|
||||
$pre \" $post <> \u0308 ; # COMBINING DIAERESIS
|
||||
$pre \* $post <> \u030A ; # COMBINING RING ABOVE
|
||||
$pre \, $post <> \u0327 ; # COMBINING CEDILLA
|
||||
$pre '/' $post <> \u0338 ; # COMBINING LONG SOLIDUS OVERLAY
|
||||
$pre \. $post <> \u0323 ; # COMBINING DOT BELOW
|
||||
|
||||
# Combine common characters
|
||||
|
||||
$pre AE $post <> \u00C6 ; # LATIN CAPITAL LETTER AE
|
||||
$pre ae $post <> \u00E6 ; # LATIN SMALL LETTER AE
|
||||
$pre D $post <> \u00D0 ; # LATIN CAPITAL LETTER ETH
|
||||
$pre d $post <> \u00F0 ; # LATIN SMALL LETTER ETH
|
||||
$pre O'/' $post <> \u00D8 ; # LATIN CAPITAL LETTER O WITH STROKE
|
||||
$pre o'/' $post <> \u00F8 ; # LATIN SMALL LETTER O WITH STROKE
|
||||
$pre TH $post <> \u00DE ; # LATIN CAPITAL LETTER THORN
|
||||
$pre th $post <> \u00FE ; # LATIN SMALL LETTER THORN
|
||||
$pre OE $post <> \u0152 ; # LATIN CAPITAL LIGATURE OE
|
||||
$pre oe $post <> \u0153 ; # LATIN SMALL LIGATURE OE
|
||||
|
||||
$pre ss $post <> \u00DF ; # LATIN SMALL LETTER SHARP S
|
||||
|
||||
$pre NG $post <> \u014A ; # LATIN CAPITAL LETTER ENG
|
||||
$pre ng $post <> \u014B ; # LATIN SMALL LETTER ENG
|
||||
|
||||
$pre T $post <> \u0398 ; # THETA
|
||||
$pre t $post <> \u03B8 ; # THETA
|
||||
$pre SH $post <> \u01A9 ; # LATIN CAPITAL LETTER ESH
|
||||
$pre sh $post <> \u0283 ; # LATIN SMALL LETTER ESH
|
||||
$pre ZH $post <> \u01B7 ; # LATIN CAPITAL LETTER EZH
|
||||
$pre zh $post <> \u0292 ; # LATIN SMALL LETTER EZH
|
||||
|
||||
$pre U $post <> \u01B1 ; # LATIN CAPITAL LETTER UPSILON
|
||||
$pre u $post <> \u028A ; # LATIN SMALL LETTER UPSILON
|
||||
$pre A $post <> \u018F ; # LATIN CAPITAL LETTER SCHWA
|
||||
$pre a $post <> \u0259 ; # LATIN SMALL LETTER SCHWA
|
||||
$pre O $post <> \u0186 ; # LATIN CAPITAL LETTER OPEN O
|
||||
$pre o $post <> \u0254 ; # LATIN SMALL LETTER OPEN O
|
||||
$pre E $post <> \u0190 ; # LATIN CAPITAL LETTER OPEN E
|
||||
$pre e $post <> \u025B ; # LATIN SMALL LETTER OPEN E
|
||||
|
||||
# three that don't have uppercases
|
||||
|
||||
$pre '?' $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
|
||||
$pre i $post <> \u026A ; # LATIN LETTER SMALL CAPITAL I
|
||||
$pre v $post <> \u028C ; # LATIN SMALL LETTER TURNED V
|
||||
|
||||
# Additional Characters that may be added in the future
|
||||
|
||||
# $pre XXX $post <> \u0306 ; # COMBINING BREVE
|
||||
# $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
|
||||
# $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
|
||||
# $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
|
||||
# $pre XXX $post <> \u030C ; # COMBINING CARON
|
||||
# $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
|
||||
# $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
|
||||
# $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
|
||||
# $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
|
||||
# $pre XXX $post <> \u031B ; # COMBINING HORN
|
||||
# $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
|
||||
# $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
|
||||
# $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
|
||||
# $pre XXX $post <> \u0328 ; # COMBINING OGONEK
|
||||
# $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
# $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
|
||||
# $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
|
||||
# $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
|
||||
|
||||
# $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
|
||||
# $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
|
||||
# $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
|
||||
# $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
|
||||
# $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
# $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
|
||||
# $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
|
||||
# $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
|
||||
# $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
# $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
|
||||
# $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
|
||||
# $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
|
||||
# $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
# $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
|
||||
# $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
|
||||
# $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
|
||||
# $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
|
||||
# $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
|
||||
# $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
# $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
|
||||
# $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
|
||||
# $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
|
||||
# $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
|
||||
# $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
|
||||
# $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
|
||||
# $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
|
||||
# $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
# $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
|
||||
# $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
|
||||
# $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
|
||||
# $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
|
||||
# $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
|
||||
# $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
|
||||
# $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
|
||||
# $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
|
||||
# $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
|
||||
# $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
|
||||
# $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
|
||||
# $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
|
||||
# $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
|
||||
# $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
|
||||
# $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
|
||||
# $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
# $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
|
||||
# $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
# $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
|
||||
# $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
|
||||
# $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
|
||||
# $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
|
||||
# $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
|
||||
# $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
|
||||
# $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
|
||||
# $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
|
||||
# $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
|
||||
# $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
|
||||
# $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
|
||||
# $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
|
||||
# $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
|
||||
# $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
|
||||
# $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
|
||||
# $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
|
||||
# $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
|
||||
# $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
|
||||
# $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
|
||||
# $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
|
||||
# $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
|
||||
# $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
|
||||
# $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
|
||||
# $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
|
||||
# $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
|
||||
# $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
|
||||
# $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
|
||||
# $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
|
||||
# $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
|
||||
# $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
# $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
|
||||
# $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
|
||||
# $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
# $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
|
||||
# $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
|
||||
# $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
# $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
|
||||
# $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
|
||||
# $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
|
||||
# $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
|
||||
# $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
|
||||
# $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
# $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
|
||||
# $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
|
||||
# $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
|
||||
# $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
|
||||
# $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
|
||||
# $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
|
||||
# $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
|
||||
# $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
|
||||
# $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
|
||||
# $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
|
||||
# $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
|
||||
# $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
|
||||
# $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
|
||||
# $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
|
||||
# $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
|
||||
# $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
|
||||
# $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
|
||||
# $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
|
||||
# $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
|
||||
# $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
|
||||
# $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
|
||||
# $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
|
||||
# $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
|
||||
# $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
|
||||
# $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
|
||||
# $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
|
||||
# $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
|
||||
# $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
|
||||
# $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
|
||||
# $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
|
||||
# $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
|
||||
# $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
|
||||
# $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
|
||||
# $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
|
||||
# $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
|
||||
# $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
|
||||
# $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
|
||||
# $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
|
||||
# $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
|
||||
# $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
|
||||
# $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
|
||||
# $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
|
||||
# $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
|
||||
# $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
|
||||
# $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
|
||||
# $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
|
||||
# $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
|
||||
# $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
|
||||
# $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
|
||||
# $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
|
||||
# $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
|
||||
# $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
|
||||
# $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
|
||||
# $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
|
||||
# $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
|
||||
# $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
|
||||
# $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
|
||||
# $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
|
||||
# $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
|
||||
# $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
|
||||
# $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
|
||||
# $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
|
||||
# $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
|
||||
# $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
|
||||
# $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
|
||||
# $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
|
||||
# $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
|
||||
# $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
|
||||
# $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
|
||||
# $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
|
||||
# $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
|
||||
# $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
|
||||
# $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
|
||||
# $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
|
||||
# $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
|
||||
# $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
|
||||
# $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
|
||||
# $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
|
||||
# $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
|
||||
# $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
|
||||
# $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
|
||||
# $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
|
||||
# $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
|
||||
# $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
|
||||
# $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
|
||||
# $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
|
||||
# $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
|
||||
# $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
|
||||
# $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
|
||||
# $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
|
||||
# $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
|
||||
# $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
|
||||
# $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
|
||||
# $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
|
||||
# $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
|
||||
# $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
|
||||
# $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
|
||||
# $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
|
||||
# $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
|
||||
# $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
|
||||
# $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
|
||||
# $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
|
||||
# $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
|
||||
# $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
|
||||
# $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
|
||||
# $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
# $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
|
||||
:: NFC (NFD) ;
|
34
icu4c/source/data/translit/Any_Publishing.txt
Normal file
34
icu4c/source/data/translit/Any_Publishing.txt
Normal file
|
@ -0,0 +1,34 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Test case
|
||||
# "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
|
||||
|
||||
# Variables
|
||||
|
||||
$single = \' ;
|
||||
$space = ' ' ;
|
||||
$double = \" ;
|
||||
$back = \` ;
|
||||
$tab = '\u0008' ;
|
||||
$makeRight = [[:Z:][:Ps:][:Pi:]$] ;
|
||||
|
||||
# fix UNIX quotes
|
||||
|
||||
$back $back > “ ;
|
||||
$back > ‘ ;
|
||||
|
||||
# fix typewriter quotes, by context
|
||||
|
||||
$makeRight {$double} <> “ ;
|
||||
$double <> ” ;
|
||||
|
||||
$makeRight {$single} <> ‘ ;
|
||||
$single <> ’;
|
||||
|
||||
# fix multiple spaces and hyphens
|
||||
|
||||
$space {$space} > ;
|
||||
'--' <> — ;
|
146
icu4c/source/data/translit/Arabic_Latin.txt
Normal file
146
icu4c/source/data/translit/Arabic_Latin.txt
Normal file
|
@ -0,0 +1,146 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Generally follows UNGEGN <http://www.eki.ee/wgrs/rom1_ar.pdf>
|
||||
# Occasionally deviates in the direction of ISO 233 <http://homepage.mac.com/sirbinks/pdf/Arabic.pdf>
|
||||
# a) where required for disambiguation.
|
||||
# b) with underdot instead of cedilla for letter like SAD, since
|
||||
# those are explicitly in Unicode for transliteration.
|
||||
# c) with extra non-Arabic-language letters, like PEH
|
||||
|
||||
# Does *not* do assimilation of "al", nor hyphenation.
|
||||
# While it could be done, we need to determine whether a prefix "al" could
|
||||
# occur other than as the definite article (since no space is used).
|
||||
|
||||
:: [[:Arabic:] [ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;
|
||||
:: NFKD (NFC);
|
||||
$disambig = ̱ ;
|
||||
$disambig2 = ̰ ;
|
||||
$under = ̣ ;
|
||||
|
||||
$notAbove = [[:^ccc=0:]&[:^ccc=230:]];
|
||||
|
||||
# non-letters
|
||||
|
||||
٫ <> '.' $disambig ; # ARABIC DECIMAL SEPARATOR
|
||||
٬ <> ',' $disambig ; # ARABIC THOUSANDS SEPARATOR
|
||||
# ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate
|
||||
|
||||
، <> ',' ; # ARABIC COMMA
|
||||
؛ <> ';' ; # ARABIC SEMICOLON
|
||||
؟ <> '?' ; # ARABIC QUESTION MARK
|
||||
٪ <> '%' ; # ARABIC PERCENT SIGN
|
||||
|
||||
۰ <> 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
۱ <> 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
۲ <> 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
۳ <> 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
۴ <> 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
۵ <> 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
۶ <> 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
۷ <> 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
۸ <> 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
۹ <> 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
|
||||
٠ <> 0 ; # ARABIC-INDIC DIGIT ZERO
|
||||
١ <> 1 ; # ARABIC-INDIC DIGIT ONE
|
||||
٢ <> 2 ; # ARABIC-INDIC DIGIT TWO
|
||||
٣ <> 3 ; # ARABIC-INDIC DIGIT THREE
|
||||
٤ <> 4 ; # ARABIC-INDIC DIGIT FOUR
|
||||
٥ <> 5 ; # ARABIC-INDIC DIGIT FIVE
|
||||
٦ <> 6 ; # ARABIC-INDIC DIGIT SIX
|
||||
٧ <> 7 ; # ARABIC-INDIC DIGIT SEVEN
|
||||
٨ <> 8 ; # ARABIC-INDIC DIGIT EIGHT
|
||||
٩ <> 9 ; # ARABIC-INDIC DIGIT NINE
|
||||
|
||||
# letters
|
||||
|
||||
# long vowels
|
||||
َا<> ā ; # ARABIC FATHA, ARABIC LETTER ALEF
|
||||
ُو <> ū ; # ARABIC DAMMA, ARABIC LETTER WAW
|
||||
ِي <> ī ; # ARABIC KASRA, ARABIC LETTER YEH
|
||||
|
||||
# longer items moved here to prevent masking
|
||||
ث <> t h $disambig ; # ARABIC LETTER THEH
|
||||
ذ <> d h $disambig ; # ARABIC LETTER THAL
|
||||
ش <> s h $disambig ; # ARABIC LETTER SHEEN
|
||||
ص <> s $under ; # ARABIC LETTER SAD
|
||||
ض <> d $under ; # ARABIC LETTER DAD
|
||||
ط <> t $under ; # ARABIC LETTER TAH
|
||||
ظ <> z $under ; # ARABIC LETTER ZAH
|
||||
غ <> g h $disambig ; # ARABIC LETTER GHAIN
|
||||
|
||||
# WARNING: special case
|
||||
# <t, umlaut, half-ring below> will be canonically ordered as <t, half-ring below, umlaut>
|
||||
# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
|
||||
# ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
|
||||
|
||||
ة <> t \u0308 ; # ARABIC LETTER TEH MARBUTA
|
||||
ة | $1 < t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
|
||||
|
||||
# non-Arabic language
|
||||
ژ <> z h $disambig ; # ARABIC LETTER JEH
|
||||
ڭ <> n $disambig g ; # ARABIC LETTER NG
|
||||
ۋ <> v $disambig ; # ARABIC LETTER VE
|
||||
ی <> y $disambig2 ; # ARABIC LETTER FARSI YEH
|
||||
|
||||
# Arabic language
|
||||
|
||||
ء <> ʾ ; # ARABIC LETTER HAMZA
|
||||
ا <> a $under; # ARABIC LETTER ALEF
|
||||
ب <> b ; # ARABIC LETTER BEH
|
||||
ت <> t ; # ARABIC LETTER TEH
|
||||
ج <> j ; # ARABIC LETTER JEEM
|
||||
ح <> h $under ; # ARABIC LETTER HAH
|
||||
خ <> k h $disambig ; # ARABIC LETTER KHAH
|
||||
د <> d ; # ARABIC LETTER DAL
|
||||
ر <> r ; # ARABIC LETTER REH
|
||||
ز <> z ; # ARABIC LETTER ZAIN
|
||||
س <> s ; # ARABIC LETTER SEEN
|
||||
ع <> ʿ ; # ARABIC LETTER AIN
|
||||
ـ > ; # ARABIC TATWEEL
|
||||
ف <> f ; # ARABIC LETTER FEH
|
||||
ق <> q ; # ARABIC LETTER QAF
|
||||
ك <> k ; # ARABIC LETTER KAF
|
||||
ل <> l ; # ARABIC LETTER LAM
|
||||
م <> m ; # ARABIC LETTER MEEM
|
||||
ن <> n ; # ARABIC LETTER NOON
|
||||
ه <> h ; # ARABIC LETTER HEH
|
||||
و <> w ; # ARABIC LETTER WAW
|
||||
ى <> y $disambig ; # ARABIC LETTER ALEF MAKSURA
|
||||
ي <> y ; # ARABIC LETTER YEH
|
||||
ً <> aⁿ ; # ARABIC FATHATAN
|
||||
ٌ <> uⁿ ; # ARABIC DAMMATAN
|
||||
ٍ <> iⁿ ; # ARABIC KASRATAN
|
||||
َ <> a ; # ARABIC FATHA
|
||||
ُ <> u ; # ARABIC DAMMA
|
||||
ِ <> i ; # ARABIC KASRA
|
||||
ّ <> ̃ ; # ARABIC SHADDA
|
||||
ْ <> ̊ ; # ARABIC SUKUN
|
||||
|
||||
# special combining marks
|
||||
ٓ <> ̂ ; # ARABIC MADDAH ABOVE
|
||||
ٔ <> ̉ ; # ARABIC HAMZA ABOVE
|
||||
ٕ <> ̹ ; # ARABIC HAMZA BELOW
|
||||
|
||||
# Some non-Arabic language (not in UNGEGN)
|
||||
پ <> p ; # ARABIC LETTER PEH
|
||||
چ <> c h $disambig ; # ARABIC LETTER TCHEH
|
||||
ڤ <> v ; # ARABIC LETTER VEH
|
||||
# ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
|
||||
# ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
|
||||
گ <> g ; # ARABIC LETTER GAF
|
||||
|
||||
# fallbacks
|
||||
| s < c } [eiy];
|
||||
| k < c ;
|
||||
| i < e ;
|
||||
| u < o ;
|
||||
| ks < x ;
|
||||
| n < ⁿ;
|
||||
|
||||
:: (lower) ;
|
||||
::NFC (NFD);
|
||||
:: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );
|
103
icu4c/source/data/translit/Bengali_InterIndic.txt
Normal file
103
icu4c/source/data/translit/Bengali_InterIndic.txt
Normal file
|
@ -0,0 +1,103 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Bengali-InterIndic
|
||||
|
||||
\u09C7\u09BE>\uE04B; # VOWEL SIGN O
|
||||
\u09C7\u09D7>\uE04C; # VOWEL SIGN AU
|
||||
\u0981>\uE001; # SIGN CANDRABINDU
|
||||
\u0982>\uE002; # SIGN ANUSVARA
|
||||
\u0983>\uE003; # SIGN VISARGA
|
||||
\u0985>\uE005; # LETTER A
|
||||
\u0986>\uE006; # LETTER AA
|
||||
\u0987>\uE007; # LETTER I
|
||||
\u0988>\uE008; # LETTER II
|
||||
\u0989>\uE009; # LETTER U
|
||||
\u098A>\uE00A; # LETTER UU
|
||||
\u098B>\uE00B; # LETTER VOCALIC R
|
||||
\u098C>\uE00C; # LETTER VOCALIC L
|
||||
\u098F>\uE00F; # LETTER E
|
||||
\u0990>\uE010; # LETTER AI
|
||||
\u0993>\uE013; # LETTER O
|
||||
\u0994>\uE014; # LETTER AU
|
||||
\u0995>\uE015; # LETTER KA
|
||||
\u0996>\uE016; # LETTER KHA
|
||||
\u0997>\uE017; # LETTER GA
|
||||
\u0998>\uE018; # LETTER GHA
|
||||
\u0999>\uE019; # LETTER NGA
|
||||
\u099A>\uE01A; # LETTER CA
|
||||
\u099B>\uE01B; # LETTER CHA
|
||||
\u099C>\uE01C; # LETTER JA
|
||||
\u099D>\uE01D; # LETTER JHA
|
||||
\u099E>\uE01E; # LETTER NYA
|
||||
\u099F>\uE01F; # LETTER TTA
|
||||
\u09A0>\uE020; # LETTER TTHA
|
||||
\u09A1>\uE021; # LETTER DDA
|
||||
\u09A2>\uE022; # LETTER DDHA
|
||||
\u09A3>\uE023; # LETTER NNA
|
||||
\u09A4>\uE024; # LETTER TA
|
||||
\u09A5>\uE025; # LETTER THA
|
||||
\u09A6>\uE026; # LETTER DA
|
||||
\u09A7>\uE027; # LETTER DHA
|
||||
\u09A8>\uE028; # LETTER NA
|
||||
\u09AA>\uE02A; # LETTER PA
|
||||
\u09AB>\uE02B; # LETTER PHA
|
||||
\u09AC>\uE02C; # LETTER BA
|
||||
\u09AD>\uE02D; # LETTER BHA
|
||||
\u09AE>\uE02E; # LETTER MA
|
||||
\u09AF>\uE02F; # LETTER YA
|
||||
\u09B0>\uE030; # LETTER RA
|
||||
\u09B2>\uE032; # LETTER LA
|
||||
\u09B6>\uE036; # LETTER SHA
|
||||
\u09B7>\uE037; # LETTER SSA
|
||||
\u09B8>\uE038; # LETTER SA
|
||||
\u09B9>\uE039; # LETTER HA
|
||||
\u09BC>\uE03C; # SIGN NUKTA
|
||||
\u09BD>\uE03D; # SIGN AVAGRAHA
|
||||
\u09BE>\uE03E; # VOWEL SIGN AA
|
||||
\u09BF>\uE03F; # VOWEL SIGN I
|
||||
\u09C0>\uE040; # VOWEL SIGN II
|
||||
\u09C1>\uE041; # VOWEL SIGN U
|
||||
\u09C2>\uE042; # VOWEL SIGN UU
|
||||
\u09C3>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u09C4>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u09C7>\uE047; # VOWEL SIGN E
|
||||
\u09C8>\uE048; # VOWEL SIGN AI
|
||||
\u09CB>\uE04B;
|
||||
\u09CC>\uE04C;
|
||||
#
|
||||
\u09CD>\uE04D; # SIGN VIRAMA
|
||||
\u09D7>\uE057; # AU LENGTH MARK
|
||||
#
|
||||
\u09E0>\uE060; # LETTER VOCALIC RR
|
||||
\u09E1>\uE061; # LETTER VOCALIC LL
|
||||
\u09E2>\uE062; # VOWEL SIGN VOCALIC L
|
||||
\u09E3>\uE063; # VOWEL SIGN VOCALIC LL
|
||||
\u09E6>\uE066; # DIGIT ZERO
|
||||
\u09E7>\uE067; # DIGIT ONE
|
||||
\u09E8>\uE068; # DIGIT TWO
|
||||
\u09E9>\uE069; # DIGIT THREE
|
||||
\u09EA>\uE06A; # DIGIT FOUR
|
||||
\u09EB>\uE06B; # DIGIT FIVE
|
||||
\u09EC>\uE06C; # DIGIT SIX
|
||||
\u09ED>\uE06D; # DIGIT SEVEN
|
||||
\u09EE>\uE06E; # DIGIT EIGHT
|
||||
\u09EF>\uE06F; # DIGIT NINE
|
||||
\u09F0>\ue071; # Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
|
||||
\u09F1>\ue072; # Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
|
||||
\u09F2>\ue073; # Bengali-InterIndic: RUPEE MARK
|
||||
\u09F3>\ue074; # Bengali-InterIndic: RUPEE SIGN
|
||||
\u09F4>\ue075; # Bengali-InterIndic: CURRENCY NUMERATOR ONE
|
||||
\u09F5>\ue076; # Bengali-InterIndic: CURRENCY NUMERATOR TWO
|
||||
\u09F6>\ue077; # Bengali-InterIndic: CURRENCY NUMERATOR THREE
|
||||
\u09F7>\ue078; # Bengali-InterIndic: CURRENCY NUMERATOR FOUR
|
||||
\u09F8>\ue079; # Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\u09F9>\ue07A; # Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
|
||||
\u09FA>\ue07B; # ISSHAR
|
||||
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
306
icu4c/source/data/translit/Cyrillic_Latin.txt
Normal file
306
icu4c/source/data/translit/Cyrillic_Latin.txt
Normal file
|
@ -0,0 +1,306 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# TODO: add remaining characters
|
||||
# Should add variants for Russian-English, Russian-German
|
||||
# Those can use this as a base, and then remap cases
|
||||
# like a $hat to ya or ja.
|
||||
|
||||
# :: [\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
|
||||
### WARNING, \u0308 must be added to the generated filters, in both directions ###
|
||||
# MINIMAL FILTER
|
||||
:: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;
|
||||
:: NFD (NFC) ;
|
||||
|
||||
$modprime = \u02B9;
|
||||
$modprime2 = \u02BA;
|
||||
|
||||
$grave = \u0300;
|
||||
$acute = \u0301;
|
||||
$hat = \u0302;
|
||||
$breve = \u0306 ;
|
||||
$dot = \u0307 ;
|
||||
$caron = \u030C ;
|
||||
$comma = \u0326 ;
|
||||
$under = \u0331 ;
|
||||
|
||||
# move up so not masked
|
||||
|
||||
я <> a $hat ; # CYRILLIC SMALL LETTER YA
|
||||
Я <> A $hat ; # CYRILLIC CAPITAL LETTER YA
|
||||
|
||||
ч <> c $caron ; # CYRILLIC SMALL LETTER CHE
|
||||
Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE
|
||||
# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
||||
# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
||||
# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
||||
# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
|
||||
э <> e $acute; # CYRILLIC SMALL LETTER E
|
||||
Э <> E $acute; # CYRILLIC CAPITAL LETTER E
|
||||
є <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
Є <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
|
||||
ш <> s $caron ; # CYRILLIC SMALL LETTER SHA
|
||||
Ш <> S $caron ; # CYRILLIC CAPITAL LETTER SHA
|
||||
щ <> s $hat ; # CYRILLIC SMALL LETTER SHCHA
|
||||
Щ <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA
|
||||
|
||||
ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE
|
||||
Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE
|
||||
# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
||||
# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
|
||||
ю <> u $hat ; # CYRILLIC SMALL LETTER YU
|
||||
Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU
|
||||
|
||||
і <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
І <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
ј <> j $caron; # CYRILLIC SMALL LETTER JE
|
||||
Ј <> J $caron; # CYRILLIC CAPITAL LETTER JE
|
||||
|
||||
љ <> l $hat ; # CYRILLIC SMALL LETTER LJE
|
||||
Љ <> L $hat ; # CYRILLIC CAPITAL LETTER LJE
|
||||
њ <> n $hat ; # CYRILLIC SMALL LETTER NJE
|
||||
Њ <> N $hat ; # CYRILLIC CAPITAL LETTER NJE
|
||||
|
||||
ћ <> c $acute ; # CYRILLIC SMALL LETTER TSHE
|
||||
Ћ <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE
|
||||
|
||||
џ <> d $hat ; # CYRILLIC SMALL LETTER DZHE
|
||||
Џ <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE
|
||||
|
||||
# Normal order
|
||||
|
||||
а <> a ; # CYRILLIC SMALL LETTER A
|
||||
А <> A ; # CYRILLIC CAPITAL LETTER A
|
||||
ә <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA
|
||||
Ә <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
ӕ <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE
|
||||
Ӕ <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE
|
||||
б <> b ; # CYRILLIC SMALL LETTER BE
|
||||
Б <> B ; # CYRILLIC CAPITAL LETTER BE
|
||||
в <> v ; # CYRILLIC SMALL LETTER VE
|
||||
В <> V ; # CYRILLIC CAPITAL LETTER VE
|
||||
|
||||
ґ <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
Ґ <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
ғ <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE
|
||||
Ғ <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
ҕ <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
|
||||
Ҕ <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
г <> g ; # CYRILLIC SMALL LETTER GHE
|
||||
Г <> G ; # CYRILLIC CAPITAL LETTER GHE
|
||||
|
||||
д <> d; # CYRILLIC SMALL LETTER DE
|
||||
Д <> D; # CYRILLIC CAPITAL LETTER DE
|
||||
ђ <> đ ; # CYRILLIC SMALL LETTER DJE
|
||||
Ђ <> Đ ; # CYRILLIC CAPITAL LETTER DJE
|
||||
ҙ <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER
|
||||
Ҙ <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
е <> e ; # CYRILLIC SMALL LETTER IE
|
||||
Е <> E; # CYRILLIC CAPITAL LETTER IE
|
||||
|
||||
ж <> z $caron; # CYRILLIC SMALL LETTER ZHE
|
||||
Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE
|
||||
|
||||
# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
||||
# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
|
||||
з <> z ; # CYRILLIC SMALL LETTER ZE
|
||||
З <> Z; # CYRILLIC CAPITAL LETTER ZE
|
||||
|
||||
й <> j ; # CYRILLIC SMALL LETTER I
|
||||
Й <> J ; # CYRILLIC CAPITAL LETTER I
|
||||
и <> i ; # CYRILLIC SMALL LETTER I
|
||||
И <> I ; # CYRILLIC CAPITAL LETTER I
|
||||
|
||||
к <> k ; # CYRILLIC SMALL LETTER KA
|
||||
К <> K; # CYRILLIC CAPITAL LETTER KA
|
||||
|
||||
# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
||||
# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
||||
# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
|
||||
# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
||||
# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
||||
# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
л <> l ; # CYRILLIC SMALL LETTER EL
|
||||
Л <> L; # CYRILLIC CAPITAL LETTER EL
|
||||
|
||||
м <> m ; # CYRILLIC SMALL LETTER EM
|
||||
М <> M ; # CYRILLIC CAPITAL LETTER EM
|
||||
н <> n ; # CYRILLIC SMALL LETTER EN
|
||||
Н <> N; # CYRILLIC CAPITAL LETTER EN
|
||||
# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
||||
# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
|
||||
# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
|
||||
# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
|
||||
о <> o ; # CYRILLIC SMALL LETTER O
|
||||
О <> O ; # CYRILLIC CAPITAL LETTER O
|
||||
# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
п <> p ; # CYRILLIC SMALL LETTER PE
|
||||
П <> P ; # CYRILLIC CAPITAL LETTER PE
|
||||
# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
||||
# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
|
||||
# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
р <> r ; # CYRILLIC SMALL LETTER ER
|
||||
Р <> R ; # CYRILLIC CAPITAL LETTER ER
|
||||
# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
|
||||
# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
с <> s ; # CYRILLIC SMALL LETTER ES
|
||||
С <> S ; # CYRILLIC CAPITAL LETTER ES
|
||||
# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
||||
# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
т <> t ; # CYRILLIC SMALL LETTER TE
|
||||
Т <> T ; # CYRILLIC CAPITAL LETTER TE
|
||||
# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
||||
# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
|
||||
у <> u ; # CYRILLIC SMALL LETTER U
|
||||
У <> U ; # CYRILLIC CAPITAL LETTER U
|
||||
# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
|
||||
# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
||||
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
|
||||
# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
|
||||
ф <> f ; # CYRILLIC SMALL LETTER EF
|
||||
Ф <> F ; # CYRILLIC CAPITAL LETTER EF
|
||||
х <> h ; # CYRILLIC SMALL LETTER HA
|
||||
Х <> H; # CYRILLIC CAPITAL LETTER HA
|
||||
# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
||||
# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
||||
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
||||
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
||||
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
||||
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
||||
# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
||||
# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
||||
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
ц <> c ; # CYRILLIC SMALL LETTER TSE
|
||||
Ц <> C; # CYRILLIC CAPITAL LETTER TSE
|
||||
# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
|
||||
# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
|
||||
# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
|
||||
# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
# ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
# Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
|
||||
|
||||
Ъ <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
ъ <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN
|
||||
Ь <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
ь <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
|
||||
ы <> y ; # CYRILLIC SMALL LETTER YERU
|
||||
Ы <> Y ; # CYRILLIC CAPITAL LETTER YERU
|
||||
|
||||
# ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
|
||||
# Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
|
||||
# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
|
||||
|
||||
# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
|
||||
# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
|
||||
# Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
# ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
|
||||
# Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
|
||||
# ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
|
||||
# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
|
||||
# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
|
||||
# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
|
||||
# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
|
||||
# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
|
||||
# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
|
||||
# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
|
||||
# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
# Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
|
||||
# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
|
||||
### ӑ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
### ӓ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
### Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
### ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
|
||||
### Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
### ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
|
||||
### Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
|
||||
### ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
### Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
### ё <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
### Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
### ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
### Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
### ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
### Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
### ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
### Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
### ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
|
||||
### Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
|
||||
### ѝ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
### Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
### ӣ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
### Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
### ӥ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
### Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
### ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
### Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
### ӧ <> XXX ; # CYRILLIC SMALL LETTER O
|
||||
### Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
|
||||
### ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
### Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
### ќ <> XXX ; # CYRILLIC SMALL LETTER KA
|
||||
### Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
|
||||
### ӯ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ў <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ӱ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ӳ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
|
||||
### Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
|
||||
### ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
|
||||
### Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
|
||||
### ӭ <> XXX ; # CYRILLIC SMALL LETTER E
|
||||
### Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
|
||||
### ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
### Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
|
||||
# Completeness
|
||||
$ignore = [[:Mark:]''] * ;
|
||||
| k < q ;
|
||||
| K < Q ;
|
||||
| u < w ;
|
||||
| U < W ;
|
||||
| KS < X } $ignore [:UppercaseLetter:] ;
|
||||
| KS < [:UppercaseLetter:] $ignore { X ;
|
||||
| Ks < X ;
|
||||
| ks < x ;
|
||||
|
||||
:: NFC (NFD) ;
|
||||
# note: a global filter is more efficient, but MUST include all source chars!!
|
||||
# :: ([\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]);
|
||||
# MINIMAL FILTER: Latin-Cyrillic
|
||||
:: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;
|
117
icu4c/source/data/translit/Devanagari_InterIndic.txt
Normal file
117
icu4c/source/data/translit/Devanagari_InterIndic.txt
Normal file
|
@ -0,0 +1,117 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Devanagari-InterIndic
|
||||
# :: NFD;
|
||||
#Rules for Decomposed characters
|
||||
|
||||
|
||||
\u0901>\uE001; # SIGN CANDRABINDU
|
||||
\u0902>\uE002; # SIGN ANUSVARA
|
||||
\u0903>\uE003; # SIGN VISARGA
|
||||
\u0904>\uE004; # SIGN SHORT A
|
||||
\u0905>\uE005; # LETTER A
|
||||
\u0906>\uE006; # LETTER AA
|
||||
\u0907>\uE007; # LETTER I
|
||||
\u0908>\uE008; # LETTER II
|
||||
\u0909>\uE009; # LETTER U
|
||||
\u090A>\uE00A; # LETTER UU
|
||||
\u090B>\uE00B; # LETTER VOCALIC R
|
||||
\u090C>\uE00C; # LETTER VOCALIC L
|
||||
\u090D>\uE00D; # LETTER CANDRA E (For representing English sounds)
|
||||
\u090E>\uE00E; # UNMAPPED LETTER SHORT E(For Southern Scripts)
|
||||
\u090F>\uE00F; # LETTER E
|
||||
\u0910>\uE010; # LETTER AI
|
||||
\u0911>\uE011; # LETTER CANDRA O (For representing English sounds)
|
||||
\u0912>\uE012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
|
||||
\u0913>\uE013; # LETTER O
|
||||
\u0914>\uE014; # LETTER AU
|
||||
\u0915>\uE015; # LETTER KA
|
||||
\u0916>\uE016; # LETTER KHA
|
||||
\u0917>\uE017; # LETTER GA
|
||||
\u0918>\uE018; # LETTER GHA
|
||||
\u0919>\uE019; # LETTER NGA
|
||||
\u091A>\uE01A; # LETTER CA
|
||||
\u091B>\uE01B; # LETTER CHA
|
||||
\u091C>\uE01C; # LETTER JA
|
||||
\u091D>\uE01D; # LETTER JHA
|
||||
\u091E>\uE01E; # LETTER NYA
|
||||
\u091F>\uE01F; # LETTER TTA
|
||||
\u0920>\uE020; # LETTER TTHA
|
||||
\u0921>\uE021; # LETTER DDA
|
||||
\u0922>\uE022; # LETTER DDHA
|
||||
\u0923>\uE023; # LETTER NNA
|
||||
\u0924>\uE024; # LETTER TA
|
||||
\u0925>\uE025; # LETTER THA
|
||||
\u0926>\uE026; # LETTER DA
|
||||
\u0927>\uE027; # LETTER DHA
|
||||
\u0928>\uE028; # LETTER NA
|
||||
\u0929>\uE029;
|
||||
\u092A>\uE02A; # LETTER PA
|
||||
\u092B>\uE02B; # LETTER PHA
|
||||
\u092C>\uE02C; # LETTER BA
|
||||
\u092D>\uE02D; # LETTER BHA
|
||||
\u092E>\uE02E; # LETTER MA
|
||||
\u092F>\uE02F; # LETTER YA
|
||||
\u0930>\uE030; # LETTER RA
|
||||
\u0931>\uE031;
|
||||
\u0932>\uE032; # LETTER LA
|
||||
\u0933>\uE033; # LETTER LLA
|
||||
\u0934>\uE034;
|
||||
|
||||
\u0935>\uE035; # LETTER VA
|
||||
\u0936>\uE036; # LETTER SHA
|
||||
\u0937>\uE037; # LETTER SSA
|
||||
\u0938>\uE038; # LETTER SA
|
||||
\u0939>\uE039; # LETTER HA
|
||||
\u093C>\uE03C; # SIGN NUKTA
|
||||
\u093D>\uE03D; # SIGN AVAGRAHA
|
||||
\u093E>\uE03E; # VOWEL SIGN AA
|
||||
\u093F>\uE03F; # VOWEL SIGN I
|
||||
\u0940>\uE040; # VOWEL SIGN II
|
||||
\u0941>\uE041; # VOWEL SIGN U
|
||||
\u0942>\uE042; # VOWEL SIGN UU
|
||||
\u0943>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0944>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u0945>\uE045; # VOWEL SIGN CANDRA E
|
||||
\u0946>\uE046; # UNMAPPED VOWEL SIGN SHORT E
|
||||
\u0947>\uE047; # VOWEL SIGN E
|
||||
\u0948>\uE048; # VOWEL SIGN AI
|
||||
\u0949>\uE049; # VOWEL SIGN CANDRA O
|
||||
\u094A>\uE04A; # UNMAPPED VOWEL SIGN SHORT O
|
||||
\u094B>\uE04B; # VOWEL SIGN O
|
||||
\u094C>\uE04C; # VOWEL SIGN AU
|
||||
\u094D>\uE04D; # SIGN VIRAMA
|
||||
\u0950>\uE050; # OM
|
||||
\u0951>\uE051; # UNMAPPED STRESS SIGN UDATTA
|
||||
\u0952>\uE052; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
\u0953>\uE053; # UNMAPPED GRAVE ACCENT
|
||||
\u0954>\uE054; # UNMAPPED ACUTE ACCENT
|
||||
\u0958>\uE058;
|
||||
\u0959>\uE059;
|
||||
\u095A>\uE05a;
|
||||
\u095B>\uE05b;
|
||||
\u095C>\uE05c;
|
||||
\u095D>\uE05d;
|
||||
\u095E>\uE05e;
|
||||
\u095F>\uE05f;
|
||||
\u0960>\uE060; # LETTER VOCALIC RR
|
||||
\u0961>\uE061; # LETTER VOCALIC LL
|
||||
\u0962>\uE062; # VOWEL SIGN VOCALIC L
|
||||
\u0963>\uE063; # VOWEL SIGN VOCALIC LL
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
\u0966>\uE066; # DIGIT ZERO
|
||||
\u0967>\uE067; # DIGIT ONE
|
||||
\u0968>\uE068; # DIGIT TWO
|
||||
\u0969>\uE069; # DIGIT THREE
|
||||
\u096A>\uE06A; # DIGIT FOUR
|
||||
\u096B>\uE06B; # DIGIT FIVE
|
||||
\u096C>\uE06C; # DIGIT SIX
|
||||
\u096D>\uE06D; # DIGIT SEVEN
|
||||
\u096E>\uE06E; # DIGIT EIGHT
|
||||
\u096F>\uE06F; # DIGIT NINE
|
||||
\u0970>\uE070; # Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
# :: NFC (NFD) ;
|
271
icu4c/source/data/translit/Fullwidth_Halfwidth.txt
Normal file
271
icu4c/source/data/translit/Fullwidth_Halfwidth.txt
Normal file
|
@ -0,0 +1,271 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Fullwidth-Halfwidth
|
||||
|
||||
# Mechanically generated from Unicode Character Database
|
||||
# IDEOGRAPHIC SPACE then added, and
|
||||
# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
|
||||
|
||||
# multicharacter
|
||||
|
||||
ガ<>ガ; # to KATAKANA LETTER GA
|
||||
ギ<>ギ; # to KATAKANA LETTER GI
|
||||
グ<>グ; # to KATAKANA LETTER GU
|
||||
ゲ<>ゲ; # to KATAKANA LETTER GE
|
||||
ゴ<>ゴ; # to KATAKANA LETTER GO
|
||||
ザ<>ザ; # to KATAKANA LETTER ZA
|
||||
ジ<>ジ; # to KATAKANA LETTER ZI
|
||||
ズ<>ズ; # to KATAKANA LETTER ZU
|
||||
ゼ<>ゼ; # to KATAKANA LETTER ZE
|
||||
ゾ<>ゾ; # to KATAKANA LETTER ZO
|
||||
ダ<>ダ; # to KATAKANA LETTER DA
|
||||
ヂ<>ヂ; # to KATAKANA LETTER DI
|
||||
ヅ<>ヅ; # to KATAKANA LETTER DU
|
||||
デ<>デ; # to KATAKANA LETTER DE
|
||||
ド<>ド; # to KATAKANA LETTER DO
|
||||
バ<>バ; # to KATAKANA LETTER BA
|
||||
パ<>パ; # to KATAKANA LETTER PA
|
||||
ビ<>ビ; # to KATAKANA LETTER BI
|
||||
ピ<>ピ; # to KATAKANA LETTER PI
|
||||
ブ<>ブ; # to KATAKANA LETTER BU
|
||||
プ<>プ; # to KATAKANA LETTER PU
|
||||
ベ<>ベ; # to KATAKANA LETTER BE
|
||||
ペ<>ペ; # to KATAKANA LETTER PE
|
||||
ボ<>ボ; # to KATAKANA LETTER BO
|
||||
ポ<>ポ; # to KATAKANA LETTER PO
|
||||
ヴ<>ヴ; # to KATAKANA LETTER VU
|
||||
ヷ<>ヷ; # to KATAKANA LETTER VA
|
||||
ヺ<>ヺ; # to KATAKANA LETTER VO
|
||||
|
||||
# single character
|
||||
|
||||
!<>'!'; # from FULLWIDTH EXCLAMATION MARK
|
||||
"<>'\"'; # from FULLWIDTH QUOTATION MARK
|
||||
#<>'#'; # from FULLWIDTH NUMBER SIGN
|
||||
$<>'$'; # from FULLWIDTH DOLLAR SIGN
|
||||
%<>'%'; # from FULLWIDTH PERCENT SIGN
|
||||
&<>'&'; # from FULLWIDTH AMPERSAND
|
||||
'<>''; # from FULLWIDTH APOSTROPHE
|
||||
(<>'('; # from FULLWIDTH LEFT PARENTHESIS
|
||||
)<>')'; # from FULLWIDTH RIGHT PARENTHESIS
|
||||
*<>'*'; # from FULLWIDTH ASTERISK
|
||||
+<>'+'; # from FULLWIDTH PLUS SIGN
|
||||
,<>','; # from FULLWIDTH COMMA
|
||||
-<>'-'; # from FULLWIDTH HYPHEN-MINUS
|
||||
.<>'.'; # from FULLWIDTH FULL STOP
|
||||
/<>'/'; # from FULLWIDTH SOLIDUS
|
||||
0<>'0'; # from FULLWIDTH DIGIT ZERO
|
||||
1<>'1'; # from FULLWIDTH DIGIT ONE
|
||||
2<>'2'; # from FULLWIDTH DIGIT TWO
|
||||
3<>'3'; # from FULLWIDTH DIGIT THREE
|
||||
4<>'4'; # from FULLWIDTH DIGIT FOUR
|
||||
5<>'5'; # from FULLWIDTH DIGIT FIVE
|
||||
6<>'6'; # from FULLWIDTH DIGIT SIX
|
||||
7<>'7'; # from FULLWIDTH DIGIT SEVEN
|
||||
8<>'8'; # from FULLWIDTH DIGIT EIGHT
|
||||
9<>'9'; # from FULLWIDTH DIGIT NINE
|
||||
:<>':'; # from FULLWIDTH COLON
|
||||
;<>';'; # from FULLWIDTH SEMICOLON
|
||||
<<>'<'; # from FULLWIDTH LESS-THAN SIGN
|
||||
=<>'='; # from FULLWIDTH EQUALS SIGN
|
||||
><>'>'; # from FULLWIDTH GREATER-THAN SIGN
|
||||
?<>'?'; # from FULLWIDTH QUESTION MARK
|
||||
@<>'@'; # from FULLWIDTH COMMERCIAL AT
|
||||
A<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
B<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
C<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
D<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
E<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
F<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
G<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
H<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
I<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
J<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
K<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
L<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
M<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
N<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
O<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
P<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
Q<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
R<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
S<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
T<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
U<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
V<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
W<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
X<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
Y<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
Z<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
[<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
|
||||
\<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
]<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
^<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
_<>'_'; # from FULLWIDTH LOW LINE
|
||||
`<>'`'; # from FULLWIDTH GRAVE ACCENT
|
||||
a<>a; # from FULLWIDTH LATIN SMALL LETTER A
|
||||
b<>b; # from FULLWIDTH LATIN SMALL LETTER B
|
||||
c<>c; # from FULLWIDTH LATIN SMALL LETTER C
|
||||
d<>d; # from FULLWIDTH LATIN SMALL LETTER D
|
||||
e<>e; # from FULLWIDTH LATIN SMALL LETTER E
|
||||
f<>f; # from FULLWIDTH LATIN SMALL LETTER F
|
||||
g<>g; # from FULLWIDTH LATIN SMALL LETTER G
|
||||
h<>h; # from FULLWIDTH LATIN SMALL LETTER H
|
||||
i<>i; # from FULLWIDTH LATIN SMALL LETTER I
|
||||
j<>j; # from FULLWIDTH LATIN SMALL LETTER J
|
||||
k<>k; # from FULLWIDTH LATIN SMALL LETTER K
|
||||
l<>l; # from FULLWIDTH LATIN SMALL LETTER L
|
||||
m<>m; # from FULLWIDTH LATIN SMALL LETTER M
|
||||
n<>n; # from FULLWIDTH LATIN SMALL LETTER N
|
||||
o<>o; # from FULLWIDTH LATIN SMALL LETTER O
|
||||
p<>p; # from FULLWIDTH LATIN SMALL LETTER P
|
||||
q<>q; # from FULLWIDTH LATIN SMALL LETTER Q
|
||||
r<>r; # from FULLWIDTH LATIN SMALL LETTER R
|
||||
s<>s; # from FULLWIDTH LATIN SMALL LETTER S
|
||||
t<>t; # from FULLWIDTH LATIN SMALL LETTER T
|
||||
u<>u; # from FULLWIDTH LATIN SMALL LETTER U
|
||||
v<>v; # from FULLWIDTH LATIN SMALL LETTER V
|
||||
w<>w; # from FULLWIDTH LATIN SMALL LETTER W
|
||||
x<>x; # from FULLWIDTH LATIN SMALL LETTER X
|
||||
y<>y; # from FULLWIDTH LATIN SMALL LETTER Y
|
||||
z<>z; # from FULLWIDTH LATIN SMALL LETTER Z
|
||||
{<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
|
||||
|<>'|'; # from FULLWIDTH VERTICAL LINE
|
||||
}<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
|
||||
~<>'~'; # from FULLWIDTH TILDE
|
||||
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
|
||||
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
|
||||
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
|
||||
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
|
||||
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
|
||||
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
|
||||
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
|
||||
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
|
||||
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
|
||||
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
|
||||
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
|
||||
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
|
||||
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
|
||||
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
|
||||
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
|
||||
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
|
||||
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
|
||||
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
|
||||
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
|
||||
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
|
||||
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
|
||||
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
|
||||
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
|
||||
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
|
||||
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
|
||||
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
|
||||
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
|
||||
ノ<>ノ; # to HALFWIDTH KATAKANA LETTER NO
|
||||
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
|
||||
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
|
||||
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
|
||||
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
|
||||
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
|
||||
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
|
||||
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
|
||||
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
|
||||
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
|
||||
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
|
||||
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
|
||||
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
|
||||
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
|
||||
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
|
||||
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
|
||||
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
|
||||
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
|
||||
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
|
||||
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
|
||||
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
|
||||
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
ᅠ<>ᅠ; # to HALFWIDTH HANGUL FILLER
|
||||
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
|
||||
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
|
||||
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
|
||||
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
|
||||
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
|
||||
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
|
||||
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
|
||||
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
|
||||
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
|
||||
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
|
||||
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
|
||||
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
|
||||
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
|
||||
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
|
||||
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
|
||||
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
|
||||
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
|
||||
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
|
||||
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
|
||||
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
|
||||
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
|
||||
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
|
||||
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
|
||||
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
|
||||
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
|
||||
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
|
||||
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
|
||||
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
|
||||
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
|
||||
¢<>'¢'; # from FULLWIDTH CENT SIGN
|
||||
£<>'£'; # from FULLWIDTH POUND SIGN
|
||||
¬<>'¬'; # from FULLWIDTH NOT SIGN
|
||||
 ̄<>'¯'; # from FULLWIDTH MACRON
|
||||
' '<>' '; # ideographic space (place this after MACRON)
|
||||
¦<>'¦'; # from FULLWIDTH BROKEN BAR
|
||||
¥<>'¥'; # from FULLWIDTH YEN SIGN
|
||||
₩<>₩; # from FULLWIDTH WON SIGN
|
||||
│<>│; # to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
'←'<>'←'; # to HALFWIDTH LEFTWARDS ARROW
|
||||
↑<>↑; # to HALFWIDTH UPWARDS ARROW
|
||||
'→'<>'→'; # to HALFWIDTH RIGHTWARDS ARROW
|
||||
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
|
||||
■<>■; # to HALFWIDTH BLACK SQUARE
|
||||
○<>○; # to HALFWIDTH WHITE CIRCLE
|
||||
|
||||
# eof
|
||||
|
345
icu4c/source/data/translit/Greek_Latin.txt
Normal file
345
icu4c/source/data/translit/Greek_Latin.txt
Normal file
|
@ -0,0 +1,345 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Rules are predicated on running NFD first, and NFC afterwards
|
||||
# :: [\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
|
||||
# MINIMAL FILTER GENERATED FOR: Greek-Latin
|
||||
:: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;
|
||||
|
||||
:: NFD (NFC) ;
|
||||
|
||||
# TEST CASES
|
||||
|
||||
# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
||||
# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
||||
# ᾳ ῃ ῳ ὃ ὄ
|
||||
# ὠς ὡς ὢς ὣς
|
||||
# Ὠς Ὡς Ὢς Ὣς
|
||||
# ὨΣ ὩΣ ὪΣ ὫΣ
|
||||
# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
||||
|
||||
# Useful variables
|
||||
|
||||
$lower = [[:latin:][:greek:] & [:Ll:]];
|
||||
$glower = [[:greek:] & [:Ll:]];
|
||||
$upper = [[:latin:][:greek:] & [:Lu:]] ;
|
||||
$accent = [:M:] ;
|
||||
|
||||
# NOTE: restrict to just the Greek & Latin accents that we care about
|
||||
# TODO: broaden out once interation is fixed
|
||||
$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
|
||||
|
||||
$macron = \u0304 ;
|
||||
$ddot = \u0308 ;
|
||||
$ddotmac = [$ddot$macron];
|
||||
|
||||
$lcgvowel = [αεηιουω] ;
|
||||
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
||||
$gvowel = [$lcgvowel $ucgvowel] ;
|
||||
$lcgvowelC = [$lcgvowel $accent] ;
|
||||
|
||||
$evowel = [aeiouyAEIOUY];
|
||||
$evowel2 = [iuyIUY];
|
||||
$vowel = [ $evowel $gvowel] ;
|
||||
|
||||
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
||||
$egammaLike = [GKXCgkxc] ;
|
||||
$smooth = ̓ ;
|
||||
$rough = ̔ ;
|
||||
$iotasub = ͅ ;
|
||||
|
||||
$evowel_i = [$evowel-[iI]] ;
|
||||
$evowel2_i = [uyUY];
|
||||
|
||||
$underbar = \u0331;
|
||||
|
||||
$afterLetter = [:L:] [[:M:]\']* ;
|
||||
$beforeLetter = [[:M:]\']* [:L:] ;
|
||||
$beforeLower = $accent * $lower ;
|
||||
|
||||
$notLetter = [^[:L:][:M:]] ;
|
||||
$under = ̱;
|
||||
|
||||
# Fix punctuation
|
||||
# preserve original
|
||||
\: <> \: $under ;
|
||||
\? <> \? $under ;
|
||||
|
||||
\; <> \? ;
|
||||
· <> \: ;
|
||||
|
||||
# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||||
|
||||
\u0342 <> \u0302 ;
|
||||
|
||||
# IOTA: convert iota subscript to iota
|
||||
# first make previous alpha long!
|
||||
|
||||
$accent_minus = [[$accent]-[$iotasub$macron]];
|
||||
|
||||
Α } $accent_minus * $iotasub > | Α $macron ;
|
||||
α } $accent_minus * $iotasub > | α $macron ;
|
||||
|
||||
# now convert to uppercase if after uppercase, ow to lowercase
|
||||
|
||||
$upper $accent * { $iotasub > I ;
|
||||
$iotasub > i ;
|
||||
|
||||
| $1 $iotasub < ($evowel $macron $accentMinus *) i ;
|
||||
| $1 $iotasub < ($evowel $macron $accentMinus *) I ;
|
||||
|
||||
# BREATHING
|
||||
|
||||
# Convert rough breathing to h, and move before letters.
|
||||
|
||||
# Make A ` x = > H a x
|
||||
|
||||
Α ($macron?) $rough } $beforeLower > H | α $1;
|
||||
Ε $rough } $beforeLower > H | ε;
|
||||
Η $rough } $beforeLower > H | η ;
|
||||
Ι ($ddot?) $rough } $beforeLower > H | ι $1;
|
||||
Ο $rough } $beforeLower > H | ο ;
|
||||
Υ $rough } $beforeLower > H | υ ;
|
||||
Ω ($ddot?) $rough } $beforeLower > H | ω $1;
|
||||
|
||||
# Make A x ` = > H a x
|
||||
|
||||
Α ($glower $macron?) $rough > H | α $1 ;
|
||||
Ε ($glower) $rough > H | ε $1 ;
|
||||
Η ($glower) $rough > H | η $1 ;
|
||||
Ι ($glower $ddot?) $rough > H | ι $1 ;
|
||||
Ο ($glower) $rough > H | ο $1 ;
|
||||
Υ ($glower) $rough > H | υ $1 ;
|
||||
Ω ($glower $ddot?) $rough > H | ω $1 ;
|
||||
|
||||
#Otherwise, make x ` into h x and X ` into H X
|
||||
|
||||
($lcgvowel + $ddotmac? ) $rough > h | $1 ;
|
||||
($gvowel + $ddotmac? ) $rough > H | $1 ;
|
||||
|
||||
# Go backwards with H
|
||||
|
||||
| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;
|
||||
| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;
|
||||
| $1 $rough < h ($evowel $macron? $ddot?) ;
|
||||
|
||||
| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
|
||||
| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;
|
||||
| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;
|
||||
|
||||
# titlecase, have to fix individually
|
||||
# in the future, we should add &uppercase() to make this easier
|
||||
|
||||
| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;
|
||||
|
||||
| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;
|
||||
| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;
|
||||
| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;
|
||||
| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;
|
||||
| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;
|
||||
| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;
|
||||
|
||||
| A $1 $rough < H a ($macron? $ddot? ) ;
|
||||
| E $1 $rough < H e ($macron? $ddot? ) ;
|
||||
| I $1 $rough < H i ($macron? $ddot? ) ;
|
||||
| O $1 $rough < H o ($macron? $ddot? ) ;
|
||||
| U $1 $rough < H u ($macron? $ddot? ) ;
|
||||
| Y $1 $rough < H y ($macron? $ddot? ) ;
|
||||
|
||||
# Now do smooth
|
||||
|
||||
#delete smooth breathing for Latin
|
||||
$smooth > ;
|
||||
|
||||
# insert in Greek
|
||||
# the assumption is that all Marks are on letters.
|
||||
|
||||
| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;
|
||||
| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
|
||||
| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
|
||||
|
||||
# TODO: preserve smooth/rough breathing if not
|
||||
# on initial vowel sequence
|
||||
|
||||
# need to have these up here so the rules don't mask
|
||||
|
||||
# remove now superfluous macron when returning
|
||||
|
||||
Α < A $macron ;
|
||||
α < a $macron ;
|
||||
|
||||
η <> e $macron ;
|
||||
Η <> E $macron ;
|
||||
|
||||
φ <> ph ;
|
||||
Ψ } $beforeLower <> Ps ;
|
||||
Ψ <> PS ;
|
||||
|
||||
Φ } $beforeLower <> Ph ;
|
||||
Φ <> PH ;
|
||||
ψ <> ps ;
|
||||
|
||||
ω <> o $macron ;
|
||||
Ω <> O $macron;
|
||||
|
||||
# NORMAL
|
||||
|
||||
α <> a ;
|
||||
Α <> A ;
|
||||
|
||||
β <> b ;
|
||||
Β <> B ;
|
||||
|
||||
γ } $gammaLike <> n } $egammaLike ;
|
||||
γ <> g ;
|
||||
Γ } $gammaLike <> N } $egammaLike ;
|
||||
Γ <> G ;
|
||||
|
||||
δ <> d ;
|
||||
Δ <> D ;
|
||||
|
||||
ε <> e ;
|
||||
Ε <> E ;
|
||||
|
||||
ζ <> z ;
|
||||
Ζ <> Z ;
|
||||
|
||||
θ <> th ;
|
||||
Θ } $beforeLower <> Th ;
|
||||
Θ <> TH ;
|
||||
|
||||
ι <> i ;
|
||||
Ι <> I ;
|
||||
|
||||
κ <> k ;
|
||||
Κ <> K ;
|
||||
|
||||
λ <> l ;
|
||||
Λ <> L ;
|
||||
|
||||
μ <> m ;
|
||||
Μ <> M ;
|
||||
|
||||
ν } $gammaLike > n\' ;
|
||||
ν <> n ;
|
||||
Ν } $gammaLike <> N\' ;
|
||||
Ν <> N ;
|
||||
|
||||
ξ <> x ;
|
||||
Ξ <> X ;
|
||||
|
||||
ο <> o ;
|
||||
Ο <> O ;
|
||||
|
||||
π <> p ;
|
||||
Π <> P ;
|
||||
|
||||
ρ $rough <> rh;
|
||||
Ρ $rough } $beforeLower <> Rh ;
|
||||
Ρ $rough <> RH ;
|
||||
ρ <> r ;
|
||||
Ρ <> R ;
|
||||
|
||||
# insert separator before things that turn into s
|
||||
|
||||
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
|
||||
|
||||
# special S variants
|
||||
|
||||
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
# underbar means exception
|
||||
|
||||
# before a letter, initial
|
||||
ς } $beforeLetter <> s $underbar } $beforeLetter;
|
||||
σ } $beforeLetter <> s } $beforeLetter;
|
||||
|
||||
# otherwise, after a letter = final
|
||||
$afterLetter { σ <> $afterLetter { s $underbar;
|
||||
$afterLetter { ς <> $afterLetter { s ;
|
||||
|
||||
# otherwise (isolated) = initial
|
||||
ς <> s $underbar;
|
||||
σ <> s ;
|
||||
|
||||
# [Pp] { Σ <> \'S ;
|
||||
Σ <> S ;
|
||||
|
||||
τ <> t ;
|
||||
Τ <> T ;
|
||||
|
||||
$vowel {υ } <> u ;
|
||||
υ <> y ;
|
||||
$vowel { Υ <> U ;
|
||||
Υ <> Y ;
|
||||
|
||||
χ <> ch ;
|
||||
Χ } $beforeLower <> Ch ;
|
||||
Χ <> CH ;
|
||||
|
||||
# Completeness for ASCII
|
||||
|
||||
$ignore = [[:Mark:]''] * ;
|
||||
|
||||
| k < c ;
|
||||
| ph < f ;
|
||||
| i < j ;
|
||||
| k < q ;
|
||||
| b < v } $vowel ;
|
||||
| b < w } $vowel;
|
||||
| u < v ;
|
||||
| u < w;
|
||||
| K < C ;
|
||||
| Ph < F ;
|
||||
| I < J ;
|
||||
| K < Q ;
|
||||
| B < V } $vowel ;
|
||||
| B < W } $vowel ;
|
||||
| U < V ;
|
||||
| U < W ;
|
||||
|
||||
$rough } $ignore [:UppercaseLetter:] > H ;
|
||||
$ignore [:UppercaseLetter:] { $rough > H ;
|
||||
$rough < H ;
|
||||
$rough <> h ;
|
||||
|
||||
# Completeness for Greek
|
||||
|
||||
ϐ > | β ;
|
||||
ϑ > | θ ;
|
||||
ϒ > | Υ ;
|
||||
ϕ > | φ ;
|
||||
ϖ > | π ;
|
||||
|
||||
ϰ > | κ ;
|
||||
ϱ > | ρ ;
|
||||
ϲ > | σ ;
|
||||
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
ϳ > j ;
|
||||
ϴ > | Θ ;
|
||||
ϵ > | ε ;
|
||||
|
||||
µ > | μ ;
|
||||
|
||||
ͺ > i;
|
||||
|
||||
# delete any trailing ' marks used for roundtripping
|
||||
|
||||
< [Ππ] { \' } [Ss] ;
|
||||
< [Νν] { \' } $egammaLike ;
|
||||
|
||||
::NFC (NFD) ;
|
||||
# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
|
||||
# ([\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ;
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
|
||||
:: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;
|
252
icu4c/source/data/translit/Greek_Latin_UNGEGN.txt
Normal file
252
icu4c/source/data/translit/Greek_Latin_UNGEGN.txt
Normal file
|
@ -0,0 +1,252 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# For modern Greek, based on UNGEGN rules.
|
||||
|
||||
# Rules are predicated on running NFD first, and NFC afterwards
|
||||
# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
||||
# WARNING: need to add accents to both filters ###
|
||||
# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
|
||||
|
||||
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ;
|
||||
::NFD (NFC) ;
|
||||
|
||||
# Useful variables
|
||||
|
||||
$lower = [[:latin:][:greek:] & [:Ll:]] ;
|
||||
$upper = [[:latin:][:greek:] & [:Lu:]] ;
|
||||
$accent = [[:Mn:][:Me:]] ;
|
||||
|
||||
$macron = ̄ ;
|
||||
$ddot = ̈ ;
|
||||
|
||||
$lcgvowel = [αεηιουω] ;
|
||||
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
||||
$gvowel = [$lcgvowel $ucgvowel] ;
|
||||
$lcgvowelC = [$lcgvowel $accent] ;
|
||||
|
||||
$evowel = [aeiouyAEIOUY];
|
||||
$vowel = [ $evowel $gvowel] ;
|
||||
|
||||
$beforeLower = $accent * $lower ;
|
||||
|
||||
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
||||
$egammaLike = [GKXCgkxc] ;
|
||||
$smooth = ̓ ;
|
||||
$rough = ̔ ;
|
||||
$iotasub = ͅ ;
|
||||
|
||||
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
|
||||
|
||||
$under = ̱;
|
||||
|
||||
$caron = ̌;
|
||||
|
||||
$afterLetter = [:L:] [\'$accent]* ;
|
||||
$beforeLetter = [\'$accent]* [:L:] ;
|
||||
|
||||
# Fix punctuation
|
||||
|
||||
# preserve orginal
|
||||
\: <> \: $under ;
|
||||
\? <> \? $under ;
|
||||
|
||||
\; <> \? ;
|
||||
· <> \: ;
|
||||
|
||||
# Fix any ancient characters that creep in
|
||||
|
||||
͂ > ́ ;
|
||||
̂ > ́ ;
|
||||
̀ > ́ ;
|
||||
$smooth > ;
|
||||
$rough > ;
|
||||
$iotasub > ;
|
||||
ͺ > ;
|
||||
|
||||
# need to have these up here so the rules don't mask
|
||||
|
||||
η <> i $under ;
|
||||
Η <> I $under ;
|
||||
|
||||
Ψ } $beforeLower <> Ps ;
|
||||
Ψ <> PS ;
|
||||
ψ <> ps ;
|
||||
|
||||
ω <> o $under ;
|
||||
Ω <> O $under;
|
||||
|
||||
# at begining or end of word, convert mp to b
|
||||
|
||||
[^[:L:]$accent] { μπ > b ;
|
||||
μπ } [^[:L:]$accent] > b ;
|
||||
[^[:L:]$accent] { [Μμ][Ππ] > B ;
|
||||
[Μμ][Ππ] } [^[:L:]$accent] > B ;
|
||||
|
||||
μπ < b ;
|
||||
Μπ < B } $beforeLower ;
|
||||
ΜΠ < B ;
|
||||
|
||||
# handle diphthongs ending with upsilon
|
||||
|
||||
ου <> ou ;
|
||||
ΟΥ <> OU ;
|
||||
Ου <> Ou ;
|
||||
οΥ <> oU ;
|
||||
|
||||
$fmaker = [aeiAEI] $under ? ;
|
||||
$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
|
||||
|
||||
$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;
|
||||
υ $1 < ( $shiftForwardVowels )* v $under ;
|
||||
|
||||
$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;
|
||||
υ $1 < ( $shiftForwardVowels )* f $under ;
|
||||
|
||||
$fmaker { Υ } $softener <> V $under ;
|
||||
$fmaker { Υ <> U $under ;
|
||||
|
||||
υ <> y ;
|
||||
Υ <> Y ;
|
||||
|
||||
# NORMAL
|
||||
|
||||
α <> a ;
|
||||
Α <> A ;
|
||||
|
||||
β <> v ;
|
||||
Β <> V ;
|
||||
|
||||
γ } $gammaLike <> n } $egammaLike ;
|
||||
γ <> g ;
|
||||
Γ } $gammaLike <> N } $egammaLike ;
|
||||
Γ <> G ;
|
||||
|
||||
δ <> d ;
|
||||
Δ <> D ;
|
||||
|
||||
ε <> e ;
|
||||
Ε <> E ;
|
||||
|
||||
ζ <> z ;
|
||||
Ζ <> Z ;
|
||||
|
||||
θ <> th ;
|
||||
Θ } $beforeLower <> Th ;
|
||||
Θ <> TH ;
|
||||
|
||||
ι <> i ;
|
||||
Ι <> I ;
|
||||
|
||||
κ <> k ;
|
||||
Κ <> K ;
|
||||
|
||||
λ <> l ;
|
||||
Λ <> L ;
|
||||
|
||||
μ <> m ;
|
||||
Μ <> M ;
|
||||
|
||||
ν } $gammaLike > n\' ;
|
||||
ν <> n ;
|
||||
Ν } $gammaLike <> N\' ;
|
||||
Ν <> N ;
|
||||
|
||||
ξ <> x ;
|
||||
Ξ <> X ;
|
||||
|
||||
ο <> o ;
|
||||
Ο <> O ;
|
||||
|
||||
π <> p ;
|
||||
Π <> P ;
|
||||
|
||||
ρ <> r ;
|
||||
Ρ <> R ;
|
||||
|
||||
# insert separator before things that turn into s
|
||||
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
|
||||
|
||||
# special S variants
|
||||
|
||||
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
# Caron means exception
|
||||
|
||||
# before a letter, initial
|
||||
ς } $beforeLetter <> s $under } $beforeLetter;
|
||||
σ } $beforeLetter <> s } $beforeLetter;
|
||||
|
||||
# otherwise, after a letter = final
|
||||
$afterLetter { σ <> $afterLetter { s $under;
|
||||
$afterLetter { ς <> $afterLetter { s ;
|
||||
|
||||
# otherwise (isolated) = initial
|
||||
ς <> s $under;
|
||||
σ <> s ;
|
||||
|
||||
# [Pp] { Σ <> \'S ;
|
||||
Σ <> S ;
|
||||
|
||||
τ <> t ;
|
||||
Τ <> T ;
|
||||
|
||||
φ <> f ;
|
||||
Φ <> F ;
|
||||
|
||||
χ <> ch ;
|
||||
Χ } $beforeLower <> Ch ;
|
||||
Χ <> CH ;
|
||||
|
||||
# Completeness for ASCII
|
||||
|
||||
# $ignore = [[:Mark:]''] * ;
|
||||
|
||||
| ch < h ;
|
||||
| k < c ;
|
||||
| i < j ;
|
||||
| k < q ;
|
||||
| b < u } $vowel ;
|
||||
| b < w } $vowel ;
|
||||
| y < u ;
|
||||
| y < w ;
|
||||
|
||||
| Ch < H ;
|
||||
| K < C ;
|
||||
| I < J ;
|
||||
| K < Q ;
|
||||
| B < W } $vowel ;
|
||||
| B < U } $vowel ;
|
||||
| Y < W ;
|
||||
| Y < U ;
|
||||
|
||||
# Completeness for Greek
|
||||
|
||||
ϐ > | β ;
|
||||
ϑ > | θ ;
|
||||
ϒ > | Υ ;
|
||||
ϕ > | φ ;
|
||||
ϖ > | π ;
|
||||
|
||||
ϰ > | κ ;
|
||||
ϱ > | ρ ;
|
||||
ϲ > | σ ;
|
||||
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
ϳ > j ;
|
||||
ϴ > | Θ ;
|
||||
ϵ > | ε ;
|
||||
µ > | μ ;
|
||||
|
||||
# delete any trailing ' marks used for roundtripping
|
||||
|
||||
< [Ππ] { \' } [Ss] ;
|
||||
< [Νν] { \' } $egammaLike ;
|
||||
|
||||
::NFC (NFD) ;
|
||||
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
||||
:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
|
91
icu4c/source/data/translit/Gujarati_InterIndic.txt
Normal file
91
icu4c/source/data/translit/Gujarati_InterIndic.txt
Normal file
|
@ -0,0 +1,91 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Gujarati-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
\u0a81>\ue001; # SIGN CANDRABINDU
|
||||
\u0a82>\ue002; # SIGN ANUSVARA
|
||||
\u0a83>\ue003; # SIGN VISARGA
|
||||
\u0a85>\ue005; # LETTER A
|
||||
\u0a86>\ue006; # LETTER AA
|
||||
\u0a87>\ue007; # LETTER I
|
||||
\u0a88>\ue008; # LETTER II
|
||||
\u0a89>\ue009; # LETTER U
|
||||
\u0a8a>\ue00a; # LETTER UU
|
||||
\u0a8b>\ue00b; # LETTER VOCALIC R
|
||||
\u0a8c>\ue00c; # LETTER VOCALLIC L
|
||||
\u0a8d>\ue00d; # VOWEL CANDRA E
|
||||
\u0a8f>\ue00f; # LETTER E
|
||||
\u0a90>\ue010; # LETTER AI
|
||||
\u0a91>\ue011; # VOWEL CANDRA O
|
||||
\u0a93>\ue013; # LETTER O
|
||||
\u0a94>\ue014; # LETTER AU
|
||||
\u0a95>\ue015; # LETTER KA
|
||||
\u0a96>\ue016; # LETTER KHA
|
||||
\u0a97>\ue017; # LETTER GA
|
||||
\u0a98>\ue018; # LETTER GHA
|
||||
\u0a99>\ue019; # LETTER NGA
|
||||
\u0a9a>\ue01a; # LETTER CA
|
||||
\u0a9b>\ue01b; # LETTER CHA
|
||||
\u0a9c>\ue01c; # LETTER JA
|
||||
\u0a9d>\ue01d; # LETTER JHA
|
||||
\u0a9e>\ue01e; # LETTER NYA
|
||||
\u0a9f>\ue01f; # LETTER TTA
|
||||
\u0aa0>\ue020; # LETTER TTHA
|
||||
\u0aa1>\ue021; # LETTER DDA
|
||||
\u0aa2>\ue022; # LETTER DDHA
|
||||
\u0aa3>\ue023; # LETTER NNA
|
||||
\u0aa4>\ue024; # LETTER TA
|
||||
\u0aa5>\ue025; # LETTER THA
|
||||
\u0aa6>\ue026; # LETTER DA
|
||||
\u0aa7>\ue027; # LETTER DHA
|
||||
\u0aa8>\ue028; # LETTER NA
|
||||
\u0aaa>\ue02a; # LETTER PA
|
||||
\u0aab>\ue02b; # LETTER PHA
|
||||
\u0aac>\ue02c; # LETTER BA
|
||||
\u0aad>\ue02d; # LETTER BHA
|
||||
\u0aae>\ue02e; # LETTER MA
|
||||
\u0aaf>\ue02f; # LETTER YA
|
||||
\u0ab0>\ue030; # LETTER RA
|
||||
\u0ab2>\ue032; # LETTER LA
|
||||
\u0ab3>\ue033; # LETTER LLA
|
||||
\u0ab5>\ue035; # LETTER VA
|
||||
\u0ab6>\ue036; # LETTER SHA
|
||||
\u0ab7>\ue037; # LETTER SSA
|
||||
\u0ab8>\ue038; # LETTER SA
|
||||
\u0ab9>\ue039; # LETTER HA
|
||||
\u0abc>\ue03c; # SIGN NUKTA
|
||||
\u0abd>\ue03d; # SIGN AVAGRAHA
|
||||
\u0abe>\ue03e; # VOWEL SIGN AA
|
||||
\u0abf>\ue03f; # VOWEL SIGN I
|
||||
\u0ac0>\ue040; # VOWEL SIGN II
|
||||
\u0ac1>\ue041; # VOWEL SIGN U
|
||||
\u0ac2>\ue042; # VOWEL SIGN UU
|
||||
\u0ac3>\ue043; # VOWEL SIGN VOCALIC R
|
||||
\u0ac4>\ue044; # VOWEL SIGN VOCALIC RR
|
||||
\u0ac5>\ue045; # VOWEL SIGN CANDRA E
|
||||
\u0ac7>\ue047; # VOWEL SIGN E
|
||||
\u0ac8>\ue048; # VOWEL SIGN AI
|
||||
\u0ac9>\ue049; # VOWEL SIGN CANDRA O
|
||||
\u0acb>\ue04b; # VOWEL SIGN O
|
||||
\u0acc>\ue04c; # VOWEL SIGN AU
|
||||
\u0acd>\ue04d; # SIGN VIRAMA
|
||||
\u0ad0>\ue050; # OM
|
||||
\u0ae0>\ue060; # LETTER VOCALIC RR
|
||||
\u0ae1>\ue061; # LETTER VOCALIC LL
|
||||
\u0ae6>\ue066; # DIGIT ZERO
|
||||
\u0ae7>\ue067; # DIGIT ONE
|
||||
\u0ae8>\ue068; # DIGIT TWO
|
||||
\u0ae9>\ue069; # DIGIT THREE
|
||||
\u0aea>\ue06a; # DIGIT FOUR
|
||||
\u0aeb>\ue06b; # DIGIT FIVE
|
||||
\u0aec>\ue06c; # DIGIT SIX
|
||||
\u0aed>\ue06d; # DIGIT SEVEN
|
||||
\u0aee>\ue06e; # DIGIT EIGHT
|
||||
\u0aef>\ue06f; # DIGIT NINE
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
95
icu4c/source/data/translit/Gurmukhi_InterIndic.txt
Normal file
95
icu4c/source/data/translit/Gurmukhi_InterIndic.txt
Normal file
|
@ -0,0 +1,95 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Gurmukhi-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
|
||||
#\u0A16\u0A3C>\uE059; # LETTER KHHA
|
||||
#\u0A17\u0A3C>\uE05A; # LETTER GHHA
|
||||
#\u0A1C\u0A3C>\uE05B; # LETTER ZA
|
||||
#\u0A38\u0A3C>\uE036; # LETTER SHA
|
||||
#\u0A32\u0A3C>\uE033; # LETTER LLA
|
||||
#\u0A2B\u0A3C>\uE05E; # LETTER FA
|
||||
\u0A01>\ue001; # SIGN CHANDRABINDU
|
||||
\u0A02>\uE002; # SIGN BINDI
|
||||
\u0A05>\uE005; # LETTER A
|
||||
\u0A06>\uE006; # LETTER AA
|
||||
\u0A07>\uE007; # LETTER I
|
||||
\u0A08>\uE008; # LETTER II
|
||||
\u0A09>\uE009; # LETTER U
|
||||
\u0A0A>\uE00A; # LETTER UU
|
||||
\u0A0C>\uE032; # FALLBACK : VOCALLIC LA
|
||||
\u0A0F>\uE00F; # LETTER EE
|
||||
\u0A10>\uE010; # LETTER AI
|
||||
\u0A13>\uE013; # LETTER OO
|
||||
\u0A14>\uE014; # LETTER AU
|
||||
\u0A15>\uE015; # LETTER KA
|
||||
\u0A16>\uE016; # LETTER KHA
|
||||
\u0A17>\uE017; # LETTER GA
|
||||
\u0A18>\uE018; # LETTER GHA
|
||||
\u0A19>\uE019; # LETTER NGA
|
||||
\u0A1A>\uE01A; # LETTER CA
|
||||
\u0A1B>\uE01B; # LETTER CHA
|
||||
\u0A1C>\uE01C; # LETTER JA
|
||||
\u0A1D>\uE01D; # LETTER JHA
|
||||
\u0A1E>\uE01E; # LETTER NYA
|
||||
\u0A1F>\uE01F; # LETTER TTA
|
||||
\u0A20>\uE020; # LETTER TTHA
|
||||
\u0A21>\uE021; # LETTER DDA
|
||||
\u0A22>\uE022; # LETTER DDHA
|
||||
\u0A23>\uE023; # LETTER NNA
|
||||
\u0A24>\uE024; # LETTER TA
|
||||
\u0A25>\uE025; # LETTER THA
|
||||
\u0A26>\uE026; # LETTER DA
|
||||
\u0A27>\uE027; # LETTER DHA
|
||||
\u0A28>\uE028; # LETTER NA
|
||||
\u0A2A>\uE02A; # LETTER PA
|
||||
\u0A2B>\uE02B; # LETTER PHA
|
||||
\u0A2C>\uE02C; # LETTER BA
|
||||
\u0A2D>\uE02D; # LETTER BHA
|
||||
\u0A2E>\uE02E; # LETTER MA
|
||||
\u0A2F>\uE02F; # LETTER YA
|
||||
\u0A30>\uE030; # LETTER RA
|
||||
\u0A32>\uE032; # LETTER LA
|
||||
\u0a33>\uE033; # FALLBACK
|
||||
\u0A35>\uE035; # LETTER VA
|
||||
\u0a36>\ue036;
|
||||
\u0A38\0a3c>\ue036; # FALLBACK
|
||||
\u0A38>\uE038; # LETTER SA
|
||||
\u0A39>\uE039; # LETTER HA
|
||||
\u0A3C>\uE03C; # SIGN NUKTA
|
||||
\u0A3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0A3F>\uE03F; # VOWEL SIGN I
|
||||
\u0A40>\uE040; # VOWEL SIGN II
|
||||
\u0A41>\uE041; # VOWEL SIGN U
|
||||
\u0A42>\uE042; # VOWEL SIGN UU
|
||||
\u0A47>\uE047; # VOWEL SIGN EE
|
||||
\u0A48>\uE048; # VOWEL SIGN AI
|
||||
\u0A4B>\uE04B; # VOWEL SIGN OO
|
||||
\u0A4C>\uE04C; # VOWEL SIGN AU
|
||||
\u0A4D>\uE04D; # SIGN VIRAMA
|
||||
|
||||
\u0A5C>\uE05C; # LETTER RRA
|
||||
|
||||
\u0A66>\uE066; # DIGIT ZERO
|
||||
\u0A67>\uE067; # DIGIT ONE
|
||||
\u0A68>\uE068; # DIGIT TWO
|
||||
\u0A69>\uE069; # DIGIT THREE
|
||||
\u0A6A>\uE06A; # DIGIT FOUR
|
||||
\u0A6B>\uE06B; # DIGIT FIVE
|
||||
\u0A6C>\uE06C; # DIGIT SIX
|
||||
\u0A6D>\uE06D; # DIGIT SEVEN
|
||||
\u0A6E>\uE06E; # DIGIT EIGHT
|
||||
\u0A6F>\uE06F; # DIGIT NINE
|
||||
\u0A70>\uE07C; # TIPPI
|
||||
\u0A71>\uE07D; # ADDAK
|
||||
\u0A72>\uE07E; # IRI
|
||||
\u0A73>\uE07F; # URA
|
||||
\u0A74>\uE080; # EK ONKAR
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
||||
|
1440
icu4c/source/data/translit/Han_Latin.txt
Normal file
1440
icu4c/source/data/translit/Han_Latin.txt
Normal file
File diff suppressed because it is too large
Load diff
24
icu4c/source/data/translit/Han_Spacedhan.txt
Normal file
24
icu4c/source/data/translit/Han_Spacedhan.txt
Normal file
|
@ -0,0 +1,24 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Only intended for internal use
|
||||
:: fullwidth-halfwidth;
|
||||
|
||||
。 > '.';
|
||||
|
||||
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
|
||||
$initialPunct = [:Ps:][:Pi:];
|
||||
|
||||
# add space between any Han or terminal punctuation and letters, and
|
||||
# between letters and Han or initial punct
|
||||
|
||||
[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;
|
||||
[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;
|
||||
|
||||
# remove spacing between ideographs and other letters
|
||||
|
||||
< [:Ideographic:] { ' ' } [:Letter:] ;
|
||||
< [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
|
||||
|
109
icu4c/source/data/translit/Hebrew_Latin.txt
Normal file
109
icu4c/source/data/translit/Hebrew_Latin.txt
Normal file
|
@ -0,0 +1,109 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Transliteration table for Hebrew
|
||||
# Based on the UNGEGN table at:
|
||||
# http://www.eki.ee/wgrs/rom1_he.pdf
|
||||
#
|
||||
# Exceptions:
|
||||
# - Accents are added to disambiguate letters
|
||||
# - Combinations of dagesh, shin/sin dot that produce different
|
||||
# letters are not yet encoded.
|
||||
#
|
||||
# To test, open:
|
||||
# http://oss.software.ibm.com/cgi-bin/icu/tr
|
||||
# Click Edit, paste in this file, Save As hebrew-latin/XXX
|
||||
# (where XXX is a username)
|
||||
# Now go back to the main window, and try it out.
|
||||
# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
|
||||
# Paste in hebrew text in Input, and hit Transliterate.
|
||||
#
|
||||
# For more information, see"
|
||||
# http://oss.software.ibm.com/icu/userguide/Transliteration.html
|
||||
|
||||
:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;
|
||||
:: nfkd (nfc) ;
|
||||
$letterAfter = [:M:]* [:L:] ;
|
||||
|
||||
# move longer items here to avoid masking
|
||||
|
||||
ח <> ẖ ;
|
||||
צ <> ẕ } $letterAfter;
|
||||
ץ <> ẕ ;
|
||||
ש <> ş ;
|
||||
ת <> ţ ;
|
||||
|
||||
א <> ʼ ;
|
||||
ב <> b ;
|
||||
ג <> g ;
|
||||
ד <> d ;
|
||||
ה <> h ;
|
||||
ו <> w ;
|
||||
ז <> z ;
|
||||
ט <> t ;
|
||||
י <> y ;
|
||||
כ <> k } $letterAfter;
|
||||
ך <> k ;
|
||||
ל <> l ;
|
||||
מ <> m } $letterAfter;
|
||||
ם <> m ;
|
||||
נ <> n } $letterAfter;
|
||||
ן <> n ;
|
||||
ס <> s ;
|
||||
ע <> ʻ ;
|
||||
פ <> p } $letterAfter;
|
||||
ף <> p ;
|
||||
ק <> q ;
|
||||
ר <> r ;
|
||||
|
||||
װ > | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV
|
||||
ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD
|
||||
ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD
|
||||
|
||||
|
||||
ּ <> ̇ ; # dagesh just goes to overdot for now
|
||||
ׁ <> ̌ ; # shin dot -> sh
|
||||
ׂ <> ̂ ; # sin dot -> s
|
||||
|
||||
# points
|
||||
$above = [^[:ccc=0:][:ccc=230:]]*;
|
||||
|
||||
ֲ > à ;
|
||||
ֲ $1< a ($above) ̀;
|
||||
|
||||
ָ > á ;
|
||||
ָ $1 < a ($above) ́;
|
||||
|
||||
ֱ > è ;
|
||||
ֱ $1 < e ($above) ̀;
|
||||
|
||||
ֵ > é ;
|
||||
ֵ $1 < e ($above) ́;
|
||||
|
||||
ְ > e ̆ ;
|
||||
ְ $1 < e ($above) ̆;
|
||||
|
||||
ֹ > ò ;
|
||||
ֹ $1 < o ($above) ̀;
|
||||
|
||||
ִ <> i ;
|
||||
ֻ <> u ;
|
||||
ַ <> a ;
|
||||
ֶ <> e ;
|
||||
ֳ <> o ;
|
||||
|
||||
\u05BF <> ̄ ;
|
||||
|
||||
# fallbacks
|
||||
ק < c ;
|
||||
פ < f } $letterAfter;
|
||||
ף < f ;
|
||||
ז < j ;
|
||||
ו < v ;
|
||||
כס < x ;
|
||||
|
||||
:: (lower);
|
||||
:: nfc (nfd) ;
|
||||
:: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);
|
207
icu4c/source/data/translit/Hiragana_Katakana.txt
Normal file
207
icu4c/source/data/translit/Hiragana_Katakana.txt
Normal file
|
@ -0,0 +1,207 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars
|
||||
:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
|
||||
:: NFKC ();
|
||||
|
||||
# Hiragana-Katakana
|
||||
|
||||
# This is largely a one-to-one mapping, but it has a
|
||||
# few kinks:
|
||||
|
||||
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||||
# Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||||
# (308F-3092) with a voicing mark (3099), which is
|
||||
# semantically equivalent. However, this is a non-
|
||||
# roundtripping transformation.
|
||||
|
||||
# 2. The Katakana small ka/ke (30F5,30F6) have no
|
||||
# Hiragana equiavlents. We convert them to normal
|
||||
# Hiragana ka/ke (304B,3051). This is a one-way
|
||||
# information-losing transformation and precludes
|
||||
# round-tripping of 30F5 and 30F6.
|
||||
|
||||
# 3. The combining marks 3099-309C are in the Hiragana
|
||||
# block, but they apply to Katakana as well, so we
|
||||
# leave them untouched.
|
||||
|
||||
# 4. The Katakana prolonged sound mark 30FC doubles the
|
||||
# preceding vowel. This is a one-way information-
|
||||
# losing transformation from Katakana to Hiragana.
|
||||
|
||||
# 5. The Katakana middle dot separates words in foreign
|
||||
# expressions; we leave this unmodified.
|
||||
|
||||
# The above points preclude successful round-trip
|
||||
# transformations of arbitrary input text. However,
|
||||
# they provide naturalistic results that should conform
|
||||
# to user expectations.
|
||||
|
||||
|
||||
# Combining equivalents va/vi/ve/vo
|
||||
わ゙ <> ヷ;
|
||||
ゐ゙ <> ヸ;
|
||||
ゑ゙ <> ヹ;
|
||||
を゙ <> ヺ;
|
||||
|
||||
# One-to-one mappings, main block
|
||||
# 3041:3094 <> 30A1:30F4
|
||||
# 309D,E <> 30FD,E
|
||||
ぁ <> ァ;
|
||||
あ <> ア;
|
||||
ぃ <> ィ;
|
||||
い <> イ;
|
||||
ぅ <> ゥ;
|
||||
う <> ウ;
|
||||
ぇ <> ェ;
|
||||
え <> エ;
|
||||
ぉ <> ォ;
|
||||
お <> オ;
|
||||
か <> カ;
|
||||
が <> ガ;
|
||||
き <> キ;
|
||||
ぎ <> ギ;
|
||||
く <> ク;
|
||||
ぐ <> グ;
|
||||
け <> ケ;
|
||||
げ <> ゲ;
|
||||
こ <> コ;
|
||||
ご <> ゴ;
|
||||
さ <> サ;
|
||||
ざ <> ザ;
|
||||
し <> シ;
|
||||
じ <> ジ;
|
||||
す <> ス;
|
||||
ず <> ズ;
|
||||
せ <> セ;
|
||||
ぜ <> ゼ;
|
||||
そ <> ソ;
|
||||
ぞ <> ゾ;
|
||||
た <> タ;
|
||||
だ <> ダ;
|
||||
ち <> チ;
|
||||
ぢ <> ヂ;
|
||||
っ <> ッ;
|
||||
つ <> ツ;
|
||||
づ <> ヅ;
|
||||
て <> テ;
|
||||
で <> デ;
|
||||
と <> ト;
|
||||
ど <> ド;
|
||||
な <> ナ;
|
||||
に <> ニ;
|
||||
ぬ <> ヌ;
|
||||
ね <> ネ;
|
||||
の <> ノ;
|
||||
は <> ハ;
|
||||
ば <> バ;
|
||||
ぱ <> パ;
|
||||
ひ <> ヒ;
|
||||
び <> ビ;
|
||||
ぴ <> ピ;
|
||||
ふ <> フ;
|
||||
ぶ <> ブ;
|
||||
ぷ <> プ;
|
||||
へ <> ヘ;
|
||||
べ <> ベ;
|
||||
ぺ <> ペ;
|
||||
ほ <> ホ;
|
||||
ぼ <> ボ;
|
||||
ぽ <> ポ;
|
||||
ま <> マ;
|
||||
み <> ミ;
|
||||
む <> ム;
|
||||
め <> メ;
|
||||
も <> モ;
|
||||
ゃ <> ャ;
|
||||
や <> ヤ;
|
||||
ゅ <> ュ;
|
||||
ゆ <> ユ;
|
||||
ょ <> ョ;
|
||||
よ <> ヨ;
|
||||
ら <> ラ;
|
||||
り <> リ;
|
||||
る <> ル;
|
||||
れ <> レ;
|
||||
ろ <> ロ;
|
||||
ゎ <> ヮ;
|
||||
わ <> ワ;
|
||||
ゐ <> ヰ;
|
||||
ゑ <> ヱ;
|
||||
を <> ヲ;
|
||||
ん <> ン;
|
||||
ゔ <> ヴ;
|
||||
ゝ <> ヽ;
|
||||
ゞ <> ヾ;
|
||||
|
||||
# One-way Katakana-Hiragana xform of small K ka/ke to
|
||||
# normal H ka/ke.
|
||||
か < ヵ;
|
||||
け < ヶ;
|
||||
|
||||
# Katakana followed by a prolonged sound mark 30FC has
|
||||
# its final vowel doubled. This is a Katakana-Hiragana
|
||||
# one-way information-losing transformation. We
|
||||
# include the small Katakana (e.g., small A 3041) and
|
||||
# do not distinguish them from their large
|
||||
# counterparts. It doesn't make sense to double a
|
||||
# small counterpart vowel as a small Hiragana vowel, so
|
||||
# we don't do so. In natural text this should never
|
||||
# occur anyway. If a 30FC is seen without a preceding
|
||||
# vowel sound (e.g., after n 30F3) we do not change it.
|
||||
|
||||
### $long = ー;
|
||||
|
||||
# The following categories are Hiragana, not Katakana
|
||||
# as might be expected, since by the time we get to the
|
||||
# 30FC, the preceding character will have already been
|
||||
# transformed to Hiragana.
|
||||
|
||||
# {The following mechanically generated from the
|
||||
# Unicode 3.0 data:}
|
||||
|
||||
$xa = [ \
|
||||
ぁ あ か が さ ざ \
|
||||
た だ な は ば ぱ \
|
||||
ま ゃ や ら ゎ わ \
|
||||
];
|
||||
|
||||
$xi = [ \
|
||||
ぃ い き ぎ し じ \
|
||||
ち ぢ に ひ び ぴ \
|
||||
み り ゐ \
|
||||
];
|
||||
|
||||
$xu = [ \
|
||||
ぅ う く ぐ す ず \
|
||||
っ つ づ ぬ ふ ぶ \
|
||||
ぷ む ゅ ゆ る ゔ \
|
||||
];
|
||||
|
||||
$xe = [ \
|
||||
ぇ え け げ せ ぜ \
|
||||
て で ね へ べ ぺ \
|
||||
め れ ゑ \
|
||||
];
|
||||
|
||||
$xo = [ \
|
||||
ぉ お こ ご そ ぞ \
|
||||
と ど の ほ ぼ ぽ \
|
||||
も ょ よ ろ を \
|
||||
];
|
||||
|
||||
あ < $xa {ー};
|
||||
い < $xi {ー};
|
||||
う < $xu {ー};
|
||||
え < $xe {ー};
|
||||
お < $xo {ー};
|
||||
|
||||
:: (NFKC) ;
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars!!
|
||||
:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
|
||||
|
||||
# eof
|
14
icu4c/source/data/translit/Hiragana_Latin.txt
Normal file
14
icu4c/source/data/translit/Hiragana_Latin.txt
Normal file
|
@ -0,0 +1,14 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
:: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ;
|
||||
:: NFD ;
|
||||
|
||||
:: Hiragana-Katakana;
|
||||
:: Katakana-Latin;
|
||||
|
||||
:: NFC ;
|
||||
:: (Lower) ;
|
||||
:: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ;
|
147
icu4c/source/data/translit/InterIndic_Bengali.txt
Normal file
147
icu4c/source/data/translit/InterIndic_Bengali.txt
Normal file
|
@ -0,0 +1,147 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Bengali
|
||||
#:: NFD (NFC) ;
|
||||
\uE001>\u0981; # SIGN CANDRABINDU
|
||||
\uE002>\u0982; # SIGN ANUSVARA
|
||||
\uE003>\u0983; # SIGN VISARGA
|
||||
\uE004>\u0985; # FALLBACK TO LETTER A
|
||||
\uE005>\u0985; # LETTER A
|
||||
\uE006>\u0986; # LETTER AA
|
||||
\uE007>\u0987; # LETTER I
|
||||
\uE008>\u0988; # LETTER II
|
||||
\uE009>\u0989; # LETTER U
|
||||
\uE00A>\u098A; # LETTER UU
|
||||
\uE00B>\u098B; # LETTER VOCALIC R
|
||||
\uE00C>\u098C; # LETTER VOCALIC L
|
||||
\uE00D>\u098F; # FALLBACK
|
||||
\uE00E>\u098F; # FALLBACK
|
||||
\uE00F>\u098F; # LETTER E
|
||||
\uE010>\u0990; # LETTER AI
|
||||
\uE011>\u0993; # FALLBACK
|
||||
\uE012>\u0993; # FALLBACK
|
||||
\uE013>\u0993; # LETTER O
|
||||
\uE014>\u0994; # LETTER AU
|
||||
\uE015>\u0995; # LETTER KA
|
||||
\uE016>\u0996; # LETTER KHA
|
||||
\uE017>\u0997; # LETTER GA
|
||||
\uE018>\u0998; # LETTER GHA
|
||||
\uE019>\u0999; # LETTER NGA
|
||||
\uE01A>\u099A; # LETTER CA
|
||||
\uE01B>\u099B; # LETTER CHA
|
||||
\uE01C>\u099C; # LETTER JA
|
||||
\uE01D>\u099D; # LETTER JHA
|
||||
\uE01E>\u099E; # LETTER NYA
|
||||
\uE01F>\u099F; # LETTER TTA
|
||||
\uE020>\u09A0; # LETTER TTHA
|
||||
\uE021>\u09A1; # LETTER DDA
|
||||
\uE022>\u09A2; # LETTER DDHA
|
||||
\uE023>\u09A3; # LETTER NNA
|
||||
\uE024>\u09A4; # LETTER TA
|
||||
\uE025>\u09A5; # LETTER THA
|
||||
\uE026>\u09A6; # LETTER DA
|
||||
\uE027>\u09A7; # LETTER DHA
|
||||
\uE028>\u09A8; # LETTER NA
|
||||
\uE029>\u09A8\u09BC; # REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
|
||||
\uE02A>\u09AA; # LETTER PA
|
||||
\uE02B>\u09AB; # LETTER PHA
|
||||
\uE02C>\u09AC; # LETTER BA
|
||||
\uE02D>\u09AD; # LETTER BHA
|
||||
\uE02E>\u09AE; # LETTER MA
|
||||
\uE02F>\u09AF; # LETTER YA
|
||||
\uE030>\u09B0; # LETTER RA
|
||||
\uE031>\u09B0\u09BC; # FALLBACK to RA
|
||||
\uE032>\u09B2; # LETTER LA
|
||||
\uE033>\u09B2; # REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
|
||||
\uE034>\u09B2; # REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
|
||||
\uE035>\u09AC; # REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
|
||||
\uE036>\u09B6; # LETTER SHA
|
||||
\uE037>\u09B7; # LETTER SSA
|
||||
\uE038>\u09B8; # LETTER SA
|
||||
\uE039>\u09B9; # LETTER HA
|
||||
\uE03C>\u09BC; # SIGN NUKTA
|
||||
\uE03D>\u09bd; # SIGN AVAGRAHA
|
||||
\uE03E>\u09BE; # VOWEL SIGN AA
|
||||
\uE03F>\u09BF; # VOWEL SIGN I
|
||||
\uE040>\u09C0; # VOWEL SIGN II
|
||||
\uE041>\u09C1; # VOWEL SIGN U
|
||||
\uE042>\u09C2; # VOWEL SIGN UU
|
||||
\uE043>\u09C3; # VOWEL SIGN VOCALIC R
|
||||
\uE044>\u09C4; # VOWEL SIGN VOCALIC RR
|
||||
\uE045>\u09C7; # REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\uE046>\u09C7; # FALLBACK
|
||||
\uE047>\u09C7; # VOWEL SIGN E
|
||||
\uE048>\u09C8; # VOWEL SIGN AI
|
||||
\uE049>\u09C7\u09BE; # REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\uE04A>\u09C7\u09BE; # FALLBACK
|
||||
\uE04B>\u09C7\u09BE; # VOWEL SIGN O
|
||||
\uE04C>\u09C7\u09D7; # VOWEL SIGN AU
|
||||
\uE04D>\u09CD; # SIGN VIRAMA
|
||||
\uE050>\u0993\u0982; # InterIndic-Bengali: OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\uE055>; # LENGTH MARK
|
||||
\uE056>\u09C8; # REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>\u09D7; # AU LENGTH MARK
|
||||
\uE058>\u0995\u09BC; # FALLBACK
|
||||
\uE059>\u0996\u09BC; # REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
|
||||
\uE05A>\u0997\u09BC; # REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
|
||||
\uE05B>\u099C\u09BC; # REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
|
||||
\uE05C>\u09A1\u09BC; # FALLBACK
|
||||
\uE05D>\u09A2\u09BC; # LETTER RHA
|
||||
\uE05E>\u09AB\u09BC; # REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
|
||||
\uE05F>\u09AF\u09BC; # LETTER YYA
|
||||
\uE060>\u09E0; # LETTER VOCALIC RR
|
||||
\uE061>\u09E1; # LETTER VOCALIC LL
|
||||
\uE062>\u09E2; # VOWEL SIGN VOCALIC L
|
||||
\uE063>\u09E3; # VOWEL SIGN VOCALIC LL
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\uE066>\u09E6; # DIGIT ZERO
|
||||
\uE067>\u09E7; # DIGIT ONE
|
||||
\uE068>\u09E8; # DIGIT TWO
|
||||
\uE069>\u09E9; # DIGIT THREE
|
||||
\uE06A>\u09EA; # DIGIT FOUR
|
||||
\uE06B>\u09EB; # DIGIT FIVE
|
||||
\uE06C>\u09EC; # DIGIT SIX
|
||||
\uE06D>\u09ED; # DIGIT SEVEN
|
||||
\uE06E>\u09EE; # DIGIT EIGHT
|
||||
\uE06F>\u09EF; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u09F0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u09F1; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>\u09F2; # RUPEE MARK
|
||||
\ue074>\u09F3; # RUPEE SIGN
|
||||
\ue075>\u09F4; # CURRENCY NUMERATOR ONE
|
||||
\ue076>\u09F5; # CURRENCY NUMERATOR TWO
|
||||
\ue077>\u09F6; # CURRENCY NUMERATOR THREE
|
||||
\ue078>\u09F7; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>\u09F8; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>\u09F9; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>\u09FA; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u09AC; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u09E6; # FALLBACK FOR TAMIL
|
||||
1 > \u09E7;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
158
icu4c/source/data/translit/InterIndic_Devanagari.txt
Normal file
158
icu4c/source/data/translit/InterIndic_Devanagari.txt
Normal file
|
@ -0,0 +1,158 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Devanagari
|
||||
#:: NFD (NFC) ;
|
||||
#Rules for Decomposed characters
|
||||
\ue028\ue03c > \u0929; #\ue029
|
||||
\ue030\ue03c > \u0931; #\ue031
|
||||
\ue033\ue03c > \u0934; #\ue034
|
||||
\ue015\ue03c > \u0958; #\ue058 LETTER QA (For Urdu)
|
||||
\ue016\ue03c > \u0959; #\ue059 LETTER KHHA (For Urdu)
|
||||
\ue017\ue03c > \u095a; #\ue05a LETTER GHHA (For Urdu)
|
||||
\ue01c\ue03c > \u095b; #\ue05b LETTER ZA (For Urdu)
|
||||
\ue021\ue03c > \u095c; #\ue05c LETTER DDDHA (pronounced RRA)
|
||||
\ue022\ue03c > \u095d; #\ue05d LETTER RHA (pronounced RRHA)
|
||||
\ue02b\ue03c > \u095e; #\ue05e LETTER FA
|
||||
\ue02f\ue03c > \u095f; #\ue05f LETTER YYA
|
||||
|
||||
#Decomposed compatibility transliterations
|
||||
\ue012\ue057>\u0914; # FALLBACK FOR TAMIL AU
|
||||
0 > \u0966; # FALLBACK FOR TAMIL
|
||||
1 > \u0967;
|
||||
|
||||
\ue055>; # FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK
|
||||
\ue056>; # FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK
|
||||
\ue057>; # FALLBACK BLOW AWAY TAMIL AU LENGTH MARK
|
||||
|
||||
\ue001 > \u0901; # SIGN CANDRABINDU
|
||||
\ue002 > \u0902; # SIGN ANUSVARA
|
||||
\ue003 > \u0903; # SIGN VISARGA
|
||||
\ue004 > \u0904; # SIGN SHORT A
|
||||
\ue005 > \u0905; # LETTER A
|
||||
\ue006 > \u0906; # LETTER AA
|
||||
\ue007 > \u0907; # LETTER I
|
||||
\ue008 > \u0908; # LETTER II
|
||||
\ue009 > \u0909; # LETTER U
|
||||
\ue00a > \u090a; # LETTER UU
|
||||
\ue00b > \u090b; # LETTER VOCALIC R
|
||||
\ue00c > \u090c; # LETTER VOCALIC L
|
||||
\ue00d > \u090d; # LETTER CANDRA E (For representing English sounds)
|
||||
\ue00e > \u090e; # LETTER SHORT E(For Southern Scripts)
|
||||
\ue00f > \u090f; # LETTER E
|
||||
\ue010 > \u0910; # LETTER AI
|
||||
\ue011 > \u0911; # LETTER CANDRA O (For representing English sounds)
|
||||
\ue012 > \u0912; # LETTER SHORT O (For Southern Scripts)
|
||||
\ue013 > \u0913; # LETTER O
|
||||
\ue014 > \u0914; # LETTER AU
|
||||
\ue015 > \u0915; # LETTER KA
|
||||
\ue016 > \u0916; # LETTER KHA
|
||||
\ue017 > \u0917; # LETTER GA
|
||||
\ue018 > \u0918; # LETTER GHA
|
||||
\ue019 > \u0919; # LETTER NGA
|
||||
\ue01a > \u091a; # LETTER CA
|
||||
\ue01b > \u091b; # LETTER CHA
|
||||
\ue01c > \u091c; # LETTER JA
|
||||
\ue01d > \u091d; # LETTER JHA
|
||||
\ue01e > \u091e; # LETTER NYA
|
||||
\ue01f > \u091f; # LETTER TTA
|
||||
\ue020 > \u0920; # LETTER TTHA
|
||||
\ue021 > \u0921; # LETTER DDA
|
||||
\ue022 > \u0922; # LETTER DDHA
|
||||
\ue023 > \u0923; # LETTER NNA
|
||||
\ue024 > \u0924; # LETTER TA
|
||||
\ue025 > \u0925; # LETTER THA
|
||||
\ue026 > \u0926; # LETTER DA
|
||||
\ue027 > \u0927; # LETTER DHA
|
||||
\ue028 > \u0928; # LETTER NA
|
||||
\ue029 > \u0929; # LETTER NNNA
|
||||
\ue02a > \u092a; # LETTER PA
|
||||
\ue02b > \u092b; # LETTER PHA
|
||||
\ue02c > \u092c; # LETTER BA
|
||||
\ue02d > \u092d; # LETTER BHA
|
||||
\ue02e > \u092e; # LETTER MA
|
||||
\ue02f > \u092f; # LETTER YA
|
||||
\ue030 > \u0930; # LETTER RA
|
||||
\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
|
||||
#\ue031 > \u0930;
|
||||
\ue032 > \u0932; # LETTER LA
|
||||
\ue033 > \u0933; # LETTER LLA
|
||||
\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
|
||||
#\ue034 > \u0933;
|
||||
\ue035 > \u0935; # LETTER VA
|
||||
\ue036 > \u0936; # LETTER SHA
|
||||
\ue037 > \u0937; # LETTER SSA
|
||||
\ue038 > \u0938; # LETTER SA
|
||||
\ue039 > \u0939; # LETTER HA
|
||||
\ue03c > \u093c; # SIGN NUKTA
|
||||
\ue03d > \u093d; # SIGN AVAGRAHA
|
||||
\ue03e > \u093e; # VOWEL SIGN AA
|
||||
\ue03f > \u093f; # VOWEL SIGN I
|
||||
\ue040 > \u0940; # VOWEL SIGN II
|
||||
\ue041 > \u0941; # VOWEL SIGN U
|
||||
\ue042 > \u0942; # VOWEL SIGN UU
|
||||
\ue043 > \u0943; # VOWEL SIGN VOCALIC R
|
||||
\ue044 > \u0944; # VOWEL SIGN VOCALIC RR
|
||||
\ue045 > \u0945; # VOWEL SIGN CANDRA E
|
||||
\ue046 > \u0946; # VOWEL SIGN SHORT E
|
||||
\ue047 > \u0947; # VOWEL SIGN E
|
||||
\ue048 > \u0948; # VOWEL SIGN AI
|
||||
\ue049 > \u0949; # VOWEL SIGN CANDRA O
|
||||
\ue04a > \u094a; # VOWEL SIGN SHORT O
|
||||
\ue04b > \u094b; # VOWEL SIGN O
|
||||
\ue04c > \u094c; # VOWEL SIGN AU
|
||||
\ue04d > \u094d; # SIGN VIRAMA
|
||||
\ue050 > \u0950; # OM
|
||||
\ue051 > \u0951; # STRESS SIGN UDATTA
|
||||
\ue052 > \u0952; # STRESS SIGN ANUDATTA
|
||||
\ue053 > \u0953; # GRAVE ACCENT
|
||||
\ue054 > \u0954; # ACUTE ACCENT
|
||||
\ue058 > \u0958; # LETTER QA (For Urdu)
|
||||
\ue059 > \u0959; # LETTER KHHA (For Urdu)
|
||||
\ue05a > \u095a; # LETTER GHHA (For Urdu)
|
||||
\ue05b > \u095b; # LETTER ZA (For Urdu)
|
||||
\ue05c > \u095c; # LETTER DDDHA (pronounced RRA)
|
||||
\ue05d > \u095d; # LETTER RHA (pronounced RRHA)
|
||||
\ue05e > \u095e; # LETTER FA
|
||||
\ue05f > \u095f; # LETTER YYA
|
||||
\ue060 > \u0960; # LETTER VOCALIC RR
|
||||
\ue061 > \u0961; # LETTER VOCALIC LL
|
||||
\ue062 > \u0962; # VOWEL SIGN VOCALIC L
|
||||
\ue063 > \u0963; # VOWEL SIGN VOCALIC LL
|
||||
\ue064 > \u0964; # DANDA
|
||||
\ue065 > \u0965; # DOUBLE DANDA
|
||||
\ue066 > \u0966; # DIGIT ZERO
|
||||
\ue067 > \u0967; # DIGIT ONE
|
||||
\ue068 > \u0968; # DIGIT TWO
|
||||
\ue069 > \u0969; # DIGIT THREE
|
||||
\ue06a > \u096a; # DIGIT FOUR
|
||||
\ue06b > \u096b; # DIGIT FIVE
|
||||
\ue06c > \u096c; # DIGIT SIX
|
||||
\ue06d > \u096d; # DIGIT SEVEN
|
||||
\ue06e > \u096e; # DIGIT EIGHT
|
||||
\ue06f > \u096f; # DIGIT NINE
|
||||
|
||||
\ue070>\u0970; # ABBREVIATION SIGN
|
||||
\ue071>\u0930; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0930; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>\u0930\u0942; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0935; # FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
# \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
# :: NFC;
|
||||
# eof
|
138
icu4c/source/data/translit/InterIndic_Gujarati.txt
Normal file
138
icu4c/source/data/translit/InterIndic_Gujarati.txt
Normal file
|
@ -0,0 +1,138 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Gujarati
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0a81; # SIGN CANDRABINDU
|
||||
\ue002>\u0a82; # SIGN ANUSVARA
|
||||
\ue003>\u0a83; # SIGN VISARGA
|
||||
\uE004>\u0a85; # FALLBACK TO LETTER A
|
||||
\ue005>\u0a85; # LETTER A
|
||||
\ue006>\u0a86; # LETTER AA
|
||||
\ue007>\u0a87; # LETTER I
|
||||
\ue008>\u0a88; # LETTER II
|
||||
\ue009>\u0a89; # LETTER U
|
||||
\ue00a>\u0a8a; # LETTER UU
|
||||
\ue00b>\u0a8b; # LETTER VOCALIC R
|
||||
\ue00c>\u0a8c; # LETTER VOCALIC L
|
||||
\ue00d>\u0a8d; # GUJARATI VOWEL CANDRA E
|
||||
\ue00e>\u0a8f; # FALLBACK
|
||||
\ue00f>\u0a8f; # InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
|
||||
\ue010>\u0a90; # LETTER AI
|
||||
\ue011>\u0a91; # FALLBACK
|
||||
\ue012>\u0a93; # FALLBACK
|
||||
\ue013>\u0a93; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
|
||||
\ue014>\u0a94; # LETTER AU
|
||||
\ue015>\u0a95; # LETTER KA
|
||||
\ue016>\u0a96; # LETTER KHA
|
||||
\ue017>\u0a97; # LETTER GA
|
||||
\ue018>\u0a98; # LETTER GHA
|
||||
\ue019>\u0a99; # LETTER NGA
|
||||
\ue01a>\u0a9a; # LETTER CA
|
||||
\ue01b>\u0a9b; # LETTER CHA
|
||||
\ue01c>\u0a9c; # LETTER JA
|
||||
\ue01d>\u0a9d; # LETTER JHA
|
||||
\ue01e>\u0a9e; # LETTER NYA
|
||||
\ue01f>\u0a9f; # LETTER TTA
|
||||
\ue020>\u0aa0; # LETTER TTHA
|
||||
\ue021>\u0aa1; # LETTER DDA
|
||||
\ue022>\u0aa2; # LETTER DDHA
|
||||
\ue023>\u0aa3; # LETTER NNA
|
||||
\ue024>\u0aa4; # LETTER TA
|
||||
\ue025>\u0aa5; # LETTER THA
|
||||
\ue026>\u0aa6; # LETTER DA
|
||||
\ue027>\u0aa7; # LETTER DHA
|
||||
\ue028>\u0aa8; # LETTER NA
|
||||
\ue029>\u0aa8\u0abc; # FALLBACK to NA+NUKTA
|
||||
\ue02a>\u0aaa; # LETTER PA
|
||||
\ue02b>\u0aab; # LETTER PHA
|
||||
\ue02c>\u0aac; # LETTER BA
|
||||
\ue02d>\u0aad; # LETTER BHA
|
||||
\ue02e>\u0aae; # LETTER MA
|
||||
\ue02f>\u0aaf; # LETTER YA
|
||||
\ue030>\u0ab0; # LETTER RA
|
||||
\ue031>\u0ab0\u0abc; # FALLBACK
|
||||
\ue032>\u0ab2; # LETTER LA
|
||||
\ue033>\u0ab3; # LETTER LLA
|
||||
\ue034>\u0ab3\u0abc; # LETTER LLLA>LETTER LLA+NUKTA
|
||||
\ue035>\u0ab5; # LETTER VA
|
||||
\ue036>\u0ab6; # LETTER SHA
|
||||
\ue037>\u0ab7; # LETTER SSA
|
||||
\ue038>\u0ab8; # LETTER SA
|
||||
\ue039>\u0ab9; # LETTER HA
|
||||
\ue03c>\u0abc; # SIGN NUKTA
|
||||
\ue03d>\u0abd; # SIGN AVAGRAHA
|
||||
\ue03e>\u0abe; # VOWEL SIGN AA
|
||||
\ue03f>\u0abf; # VOWEL SIGN I
|
||||
\ue040>\u0ac0; # VOWEL SIGN II
|
||||
\ue041>\u0ac1; # VOWEL SIGN U
|
||||
\ue042>\u0ac2; # VOWEL SIGN UU
|
||||
\ue043>\u0ac3; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0ac4; # VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0ac5; # VOWEL SIGN CANDRA E
|
||||
\ue046>\u0ac7; # FALLBACK
|
||||
\ue047>\u0ac7; # InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
|
||||
\ue048>\u0ac8; # VOWEL SIGN AI
|
||||
\ue049>\u0ac9; # VOWEL SIGN CANDRA O
|
||||
\ue04a>\u0acb; # FALLBACK
|
||||
\ue04b>\u0acb; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
|
||||
\ue04c>\u0acc; # VOWEL SIGN AU
|
||||
\ue04d>\u0acd; # SIGN VIRAMA
|
||||
\ue050>\u0ad0; # OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
|
||||
\ue056>\u0ac8; # REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0acc; # REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0a95\u0abc; # FALLBACK
|
||||
\ue059>\u0a96\u0abc; # REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
\ue05a>\u0a97\u0abc; # REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
\ue05b>\u0a9c\u0abc; # REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
\ue05c>\u0aa1\u0abc; # FALLBACK
|
||||
\ue05d>\u0aa2\u0abc; # REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
\ue05e>\u0aab\u0abc; # REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
\ue05f>\u0aaf\u0abc; # REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
|
||||
\ue060>\u0ae0; # LETTER VOCALIC RR
|
||||
\ue061>\u0ae1; # LETTER VOCALIC LL
|
||||
\ue062>\u0abf\u0abc; # REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
\ue063>\u0ac0\u0abc; # REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\ue066>\u0ae6; # DIGIT ZERO
|
||||
\ue067>\u0ae7; # DIGIT ONE
|
||||
\ue068>\u0ae8; # DIGIT TWO
|
||||
\ue069>\u0ae9; # DIGIT THREE
|
||||
\ue06a>\u0aea; # DIGIT FOUR
|
||||
\ue06b>\u0aeb; # DIGIT FIVE
|
||||
\ue06c>\u0aec; # DIGIT SIX
|
||||
\ue06d>\u0aed; # DIGIT SEVEN
|
||||
\ue06e>\u0aee; # DIGIT EIGHT
|
||||
\ue06f>\u0aef; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0ab0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0ab0; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0ab5; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0ae6; # FALLBACK FOR TAMIL
|
||||
1 > \u0ae7;
|
||||
|
||||
#\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
147
icu4c/source/data/translit/InterIndic_Gurmukhi.txt
Normal file
147
icu4c/source/data/translit/InterIndic_Gurmukhi.txt
Normal file
|
@ -0,0 +1,147 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Gurmukhi
|
||||
#:: NFD (NFC) ;
|
||||
$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];
|
||||
$consonant = [\u0A15-\u0A39];
|
||||
|
||||
\ue001>\u0a01; # SIGN CHANDRABINDU
|
||||
#rules for BINDI
|
||||
|
||||
# Anusvara is equivalent to BINDI when preceeded by a vowel
|
||||
$vowel{\ue002>\u0a02; # SIGN ANUSVARA (\u0a02 = SIGN BINDI)
|
||||
# else is equivalent to TIPPI
|
||||
$consonant{\ue002>\u0a70; # SIGN TIPPI
|
||||
\ue002>\u0a02;
|
||||
|
||||
\ue003>; # FALLBACK BLOW AWAY SIGN VISARGA
|
||||
\uE004>\u0a05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0a05; # LETTER A
|
||||
\ue006>\u0a06; # LETTER AA
|
||||
\ue007>\u0a07; # LETTER I
|
||||
\ue008>\u0a08; # LETTER II
|
||||
\ue009>\u0a09; # LETTER U
|
||||
\ue00a>\u0a0a; # LETTER UU
|
||||
\ue00b>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
\ue00c>\u0a33; # FALLBACK
|
||||
\ue00d>\u0a0f; # FALLBACK
|
||||
\ue00e>\u0a0f; # FALLBACK
|
||||
\ue00f>\u0a0f; # LETTER EE
|
||||
\ue010>\u0a10; # LETTER AI
|
||||
\ue011>\u0a13; # FALLBACK
|
||||
\ue012>\u0a13; # FALLBACK
|
||||
\ue013>\u0a13; # LETTER OO
|
||||
\ue014>\u0a14; # LETTER AU
|
||||
\ue015>\u0a15; # LETTER KA
|
||||
\ue016>\u0a16; # LETTER KHA
|
||||
\ue017>\u0a17; # LETTER GA
|
||||
\ue018>\u0a18; # LETTER GHA
|
||||
\ue019>\u0a19; # LETTER NGA
|
||||
\ue01a>\u0a1a; # LETTER CA
|
||||
\ue01b>\u0a1b; # LETTER CHA
|
||||
\ue01c>\u0a1c; # LETTER JA
|
||||
\ue01d>\u0a1d; # LETTER JHA
|
||||
\ue01e>\u0a1e; # LETTER NYA
|
||||
\ue01f>\u0a1f; # LETTER TTA
|
||||
\ue020>\u0a20; # LETTER TTHA
|
||||
\ue021>\u0a21; # LETTER DDA
|
||||
\ue022>\u0a22; # LETTER DDHA
|
||||
\ue023>\u0a23; # LETTER NNA
|
||||
\ue024>\u0a24; # LETTER TA
|
||||
\ue025>\u0a25; # LETTER THA
|
||||
\ue026>\u0a26; # LETTER DA
|
||||
\ue027>\u0a27; # LETTER DHA
|
||||
\ue028>\u0a28; # LETTER NA
|
||||
\ue029>\u0a28\u0a3c; # REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0a2a; # LETTER PA
|
||||
\ue02b>\u0a2b; # LETTER PHA
|
||||
\ue02c>\u0a2c; # LETTER BA
|
||||
\ue02d>\u0a2d; # LETTER BHA
|
||||
\ue02e>\u0a2e; # LETTER MA
|
||||
\ue02f>\u0a2f; # LETTER YA
|
||||
\ue030>\u0a30; # LETTER RA
|
||||
\ue031>\u0a30\u0a3c; # FALLBACK LETTER RA+NUKTA
|
||||
\ue032>\u0a32; # LETTER LA
|
||||
\ue033>\u0a33; # LETTER LLA
|
||||
\ue034>\u0a33; # REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0a35; # LETTER VA
|
||||
\ue036>\u0a36; # LETTER SHA
|
||||
\ue037>\u0a36; # REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
|
||||
\ue038>\u0a38; # LETTER SA
|
||||
\ue039>\u0a39; # LETTER HA
|
||||
\ue03c>\u0a3c; # SIGN NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY SIGN AVAGRAHA
|
||||
\ue03e>\u0a3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0a3f; # VOWEL SIGN I
|
||||
\ue040>\u0a40; # VOWEL SIGN II
|
||||
\ue041>\u0a41; # VOWEL SIGN U
|
||||
\ue042>\u0a42; # VOWEL SIGN UU
|
||||
\ue043>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R
|
||||
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0a48; # REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
|
||||
\ue046>\u0a47; # FALLABCK
|
||||
\ue047>\u0a47; # VOWEL SIGN EE
|
||||
\ue048>\u0a48; # VOWEL SIGN AI
|
||||
\ue049>\u0a4c; # REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
|
||||
\ue04a>\u0a4b; # FALLBACK
|
||||
\ue04b>\u0a4b; # VOWEL SIGN OO
|
||||
\ue04c>\u0a4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0a4d; # SIGN VIRAMA
|
||||
\ue050>\u0a0f\u0a02; # FALLBACK to OO+BINDI : OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
|
||||
\ue056>\u0a48; # REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0a4c; # REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0a15\u0a3c; # FALLBACK RA+ NUKTA
|
||||
\ue059>\u0a59; # LETTER KHHA
|
||||
\ue05a>\u0a5a; # LETTER GHHA
|
||||
\ue05b>\u0a5b; # LETTER ZA
|
||||
\ue05c>\u0a5c; # LETTER RRA
|
||||
\ue05d>\u0a22\u0a3c; # REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
\ue05e>\u0a5e; # LETTER FA
|
||||
\ue05f>\u0a2f\u0a3c; # REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
|
||||
\ue060>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
\ue061>\u0a32\u0a3c; #
|
||||
\ue062>\u0a3f\u0a3c; # REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
\ue063>\u0a40\u0a3c; # REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\ue066>\u0a66; # DIGIT ZERO
|
||||
\ue067>\u0a67; # DIGIT ONE
|
||||
\ue068>\u0a68; # DIGIT TWO
|
||||
\ue069>\u0a69; # DIGIT THREE
|
||||
\ue06a>\u0a6a; # DIGIT FOUR
|
||||
\ue06b>\u0a6b; # DIGIT FIVE
|
||||
\ue06c>\u0a6c; # DIGIT SIX
|
||||
\ue06d>\u0a6d; # DIGIT SEVEN
|
||||
\ue06e>\u0a6e; # DIGIT EIGHT
|
||||
\ue06f>\u0a6f; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0a30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0a30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>\u0a70; # TIPPI
|
||||
\uE07D>\u0a71; # ADDAK
|
||||
\uE07E>\u0a72; # IRI
|
||||
\uE07F>\u0a73; # URA
|
||||
\uE080>\u0a74; # EK ONKAR
|
||||
\uE081>\u0a35; # FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
0 > \u0a66; # FALLBACK FOR TAMIL
|
||||
1 > \u0a67;
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
141
icu4c/source/data/translit/InterIndic_Kannada.txt
Normal file
141
icu4c/source/data/translit/InterIndic_Kannada.txt
Normal file
|
@ -0,0 +1,141 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Kannada
|
||||
#:: NFD (NFC) ;
|
||||
\ue033\ue03c>\u0cde; # LETTER FA
|
||||
\ue001>\u0c82; # REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
\ue002>\u0c82; # SIGN ANUSVARA
|
||||
\ue003>\u0c83; # SIGN VISARGA
|
||||
\uE004>\u0c85; # FALLBACK TO LETTER A
|
||||
\ue005>\u0c85; # LETTER A
|
||||
\ue006>\u0c86; # LETTER AA
|
||||
\ue007>\u0c87; # LETTER I
|
||||
\ue008>\u0c88; # LETTER II
|
||||
\ue009>\u0c89; # LETTER U
|
||||
\ue00a>\u0c8a; # LETTER UU
|
||||
\ue00b>\u0c8b; # LETTER VOCALIC R
|
||||
\ue00c>\u0c8c; # LETTER VOCALIC L
|
||||
\ue00d>\u0c8e; # LETTER E
|
||||
\ue00e>\u0c8e; # FALLBACK
|
||||
\ue00f>\u0c8f; # LETTER EE
|
||||
\ue010>\u0c90; # LETTER AI
|
||||
\ue011>\u0c92; # FALLBACK
|
||||
\ue012>\u0c92; # LETTER O
|
||||
\ue013>\u0c93; # LETTER OO
|
||||
\ue014>\u0c94; # LETTER AU
|
||||
\ue015>\u0c95; # LETTER KA
|
||||
\ue016>\u0c96; # LETTER KHA
|
||||
\ue017>\u0c97; # LETTER GA
|
||||
\ue018>\u0c98; # LETTER GHA
|
||||
\ue019>\u0c99; # LETTER NGA
|
||||
\ue01a>\u0c9a; # LETTER CA
|
||||
\ue01b>\u0c9b; # LETTER CHA
|
||||
\ue01c>\u0c9c; # LETTER JA
|
||||
\ue01d>\u0c9d; # LETTER JHA
|
||||
\ue01e>\u0c9e; # LETTER NYA
|
||||
\ue01f>\u0c9f; # LETTER TTA
|
||||
\ue020>\u0ca0; # LETTER TTHA
|
||||
\ue021>\u0ca1; # LETTER DDA
|
||||
\ue022>\u0ca2; # LETTER DDHA
|
||||
\ue023>\u0ca3; # LETTER NNA
|
||||
\ue024>\u0ca4; # LETTER TA
|
||||
\ue025>\u0ca5; # LETTER THA
|
||||
\ue026>\u0ca6; # LETTER DA
|
||||
\ue027>\u0ca7; # LETTER DHA
|
||||
\ue028>\u0ca8; # LETTER NA
|
||||
\ue029>\u0ca8; # REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0caa; # LETTER PA
|
||||
\ue02b>\u0cab; # LETTER PHA
|
||||
\ue02c>\u0cac; # LETTER BA
|
||||
\ue02d>\u0cad; # LETTER BHA
|
||||
\ue02e>\u0cae; # LETTER MA
|
||||
\ue02f>\u0caf; # LETTER YA
|
||||
\ue030\ue03c>\u0cb1;
|
||||
\ue030>\u0cb0; # LETTER RA
|
||||
\ue031>\u0cb1; # LETTER RRA
|
||||
\ue032>\u0cb2; # LETTER LA
|
||||
\ue033>\u0cb3; # LETTER LLA
|
||||
\ue034>\u0cde; # REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0cb5; # LETTER VA
|
||||
\ue036>\u0cb6; # LETTER SHA
|
||||
\ue037>\u0cb7; # LETTER SSA
|
||||
\ue038>\u0cb8; # LETTER SA
|
||||
\ue039>\u0cb9; # LETTER HA
|
||||
|
||||
\ue03c>\u0cbc; # NUKTA
|
||||
\ue03d>\u0cbd; # AVAGRAHA
|
||||
|
||||
\ue03e>\u0cbe; # VOWEL SIGN AA
|
||||
\ue03f>\u0cbf; # VOWEL SIGN I
|
||||
\ue040>\u0cc0; # VOWEL SIGN II
|
||||
\ue041>\u0cc1; # VOWEL SIGN U
|
||||
\ue042>\u0cc2; # VOWEL SIGN UU
|
||||
\ue043>\u0cc3; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0cc4; # VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0cc6; # REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\ue046>\u0cc6; # VOWEL SIGN E
|
||||
\ue047>\u0cc7; # VOWEL SIGN EE
|
||||
\ue048>\u0cc8; # VOWEL SIGN AI
|
||||
\ue049>\u0cca; # REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\ue04a>\u0cca; # VOWEL SIGN O
|
||||
\ue04b>\u0ccb; # VOWEL SIGN OO
|
||||
\ue04c>\u0ccc; # VOWEL SIGN AU
|
||||
\ue04d>\u0ccd; # SIGN VIRAMA
|
||||
\ue050>\u0c93\u0c82; # REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>\u0cd5; # LENGTH MARK
|
||||
\ue056>\u0cd6; # AI LENGTH MARK
|
||||
\ue057>\u0ccc; # REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0c95; # FALLBACK
|
||||
\ue059>\u0c96; # REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
|
||||
\ue05a>\u0c97; # REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
|
||||
\ue05b>\u0c9c; # REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
|
||||
\ue05c>\u0ca2; # FALLBACK
|
||||
\ue05d>\u0ca2; # REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
|
||||
\ue05e>\u0cde; # LETTER FA
|
||||
\ue05f>\u0caf; # REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
|
||||
\ue060>\u0ce0; # LETTER VOCALIC RR
|
||||
\ue061>\u0ce1; # LETTER VOCALIC LL
|
||||
\ue062>\u0cbf; # REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
\ue063>\u0cc0; # REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
\ue066>\u0ce6; # DIGIT ZERO
|
||||
\ue067>\u0ce7; # DIGIT ONE
|
||||
\ue068>\u0ce8; # DIGIT TWO
|
||||
\ue069>\u0ce9; # DIGIT THREE
|
||||
\ue06a>\u0cea; # DIGIT FOUR
|
||||
\ue06b>\u0ceb; # DIGIT FIVE
|
||||
\ue06c>\u0cec; # DIGIT SIX
|
||||
\ue06d>\u0ced; # DIGIT SEVEN
|
||||
\ue06e>\u0cee; # DIGIT EIGHT
|
||||
\ue06f>\u0cef; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0cb0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0cb0; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0cb5; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0ce6; # FALLBACK FOR TAMIL
|
||||
1 > \u0ce7;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
529
icu4c/source/data/translit/InterIndic_Latin.txt
Normal file
529
icu4c/source/data/translit/InterIndic_Latin.txt
Normal file
|
@ -0,0 +1,529 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Latin
|
||||
#\u0e00 reserved
|
||||
#consonants
|
||||
$chandrabindu=\ue001;
|
||||
$anusvara=\ue002;
|
||||
$visarga=\ue003;
|
||||
#\u0e004 reserved
|
||||
# w<vowel> represents the stand-alone form
|
||||
$wa=\ue005;
|
||||
$waa=\ue006;
|
||||
$wi=\ue007;
|
||||
$wii=\ue008;
|
||||
$wu=\ue009;
|
||||
$wuu=\ue00a;
|
||||
$wr=\ue00b;
|
||||
$wl=\ue00c;
|
||||
$wce=\ue00d; # LETTER CANDRA E
|
||||
$wse=\ue00e; # LETTER SHORT E
|
||||
$we=\ue00f; # \u090f LETTER E
|
||||
$wai=\ue010;
|
||||
$wco=\ue011; # LETTER CANDRA O
|
||||
$wso=\ue012; # LETTER SHORT O
|
||||
$wo=\ue013; # \u0913 LETTER O
|
||||
$wau=\ue014;
|
||||
$ka=\ue015;
|
||||
$kha=\ue016;
|
||||
$ga=\ue017;
|
||||
$gha=\ue018;
|
||||
$nga=\ue019;
|
||||
$ca=\ue01a;
|
||||
$cha=\ue01b;
|
||||
$ja=\ue01c;
|
||||
$jha=\ue01d;
|
||||
$nya=\ue01e;
|
||||
$tta=\ue01f;
|
||||
$ttha=\ue020;
|
||||
$dda=\ue021;
|
||||
$ddha=\ue022;
|
||||
$nna=\ue023;
|
||||
$ta=\ue024;
|
||||
$tha=\ue025;
|
||||
$da=\ue026;
|
||||
$dha=\ue027;
|
||||
$na=\ue028;
|
||||
$ena=\ue029; #compatibility
|
||||
$pa=\ue02a;
|
||||
$pha=\ue02b;
|
||||
$ba=\ue02c;
|
||||
$bha=\ue02d;
|
||||
$ma=\ue02e;
|
||||
$ya=\ue02f;
|
||||
$ra=\ue030;
|
||||
$vva=\ue081;
|
||||
$rra=\ue031;
|
||||
$la=\ue032;
|
||||
$lla=\ue033;
|
||||
$ela=\ue034; #compatibility
|
||||
$va=\ue035;
|
||||
$sha=\ue036;
|
||||
$ssa=\ue037;
|
||||
$sa=\ue038;
|
||||
$ha=\ue039;
|
||||
#\u093a Reserved
|
||||
#\u093b Reserved
|
||||
$nukta=\ue03c;
|
||||
$avagraha=\ue03d; # SIGN AVAGRAHA
|
||||
# <vowel> represents the dependent form
|
||||
$aa=\ue03e;
|
||||
$i=\ue03f;
|
||||
$ii=\ue040;
|
||||
$u=\ue041;
|
||||
$uu=\ue042;
|
||||
$rh=\ue043;
|
||||
$lh=\ue044;
|
||||
$ce=\ue045; #VOWEL SIGN CANDRA E
|
||||
$se=\ue046; #VOWEL SIGN SHORT E
|
||||
$e=\ue047;
|
||||
$ai=\ue048;
|
||||
$co=\ue049; # VOWEL SIGN CANDRA O
|
||||
$so=\ue04a; # VOWEL SIGN SHORT O
|
||||
$o=\ue04b; # \u094b
|
||||
$au=\ue04c;
|
||||
$virama=\ue04d;
|
||||
# \u094e Reserved
|
||||
# \u094f Reserved
|
||||
$om=\ue050; # OM
|
||||
\ue051>; # UNMAPPED STRESS SIGN UDATTA
|
||||
\ue052>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
\ue053>; # UNMAPPED GRAVE ACCENT
|
||||
\ue054>; # UNMAPPED ACUTE ACCENT
|
||||
$lm = \ue055;# Telugu Length Mark
|
||||
$ailm=\ue056;# AI Length Mark
|
||||
$aulm=\ue057;# AU Length Mark
|
||||
#urdu compatibity forms
|
||||
$uka=\ue058;
|
||||
$ukha=\ue059;
|
||||
$ugha=\ue05a;
|
||||
$ujha=\ue05b;
|
||||
$uddha=\ue05c;
|
||||
$udha=\ue05d;
|
||||
$ufa=\ue05e;
|
||||
$uya=\ue05f;
|
||||
$wrr=\ue060;
|
||||
$wll=\ue061;
|
||||
$rrh=\ue062;
|
||||
$llh=\ue063;
|
||||
$danda=\ue064;
|
||||
$doubleDanda=\ue065;
|
||||
$zero=\ue066; # DIGIT ZERO
|
||||
$one=\ue067; # DIGIT ONE
|
||||
$two=\ue068; # DIGIT TWO
|
||||
$three=\ue069; # DIGIT THREE
|
||||
$four=\ue06a; # DIGIT FOUR
|
||||
$five=\ue06b; # DIGIT FIVE
|
||||
$six=\ue06c; # DIGIT SIX
|
||||
$seven=\ue06d; # DIGIT SEVEN
|
||||
$eight=\ue06e; # DIGIT EIGHT
|
||||
$nine=\ue06f; # DIGIT NINE
|
||||
|
||||
# \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
|
||||
$depVowelBelow=[\ue041-\ue044];
|
||||
# $x was originally called '&'; $z was '%'
|
||||
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
|
||||
$z=[bcdfghjklmnpqrstvwxyz];
|
||||
$vowels=[aeiour\u0304\u0325\u0306];
|
||||
$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];
|
||||
######################################################################
|
||||
# convert from Native letters to Latin letters
|
||||
######################################################################
|
||||
#transliterations for anusvara
|
||||
$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;
|
||||
$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;
|
||||
$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;
|
||||
$anusvara} [$ta$tha$da$dha$na] > n ;
|
||||
$anusvara} [$pa$pha$ba$bha$ma] > m ;
|
||||
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;
|
||||
$anusvara> m\u0307;
|
||||
|
||||
# Urdu compatibility
|
||||
$ya$nukta}$x > y\u0307 ;
|
||||
$ya$nukta$virama > y\u0307 ;
|
||||
$ya$nukta > y\u0307a ;
|
||||
|
||||
$la$nukta }$x > l\u0331 ;
|
||||
$la$nukta$virama > l\u0331 ;
|
||||
$la$nukta > l\u0331a ;
|
||||
|
||||
$na$nukta }$x > n\u0331 ;
|
||||
$na$nukta$virama > n\u0331 ;
|
||||
$na$nukta > n\u0331a ;
|
||||
|
||||
$ena }$x > n\u0331 ;
|
||||
$ena$virama > n\u0331 ;
|
||||
$ena > n\u0331a ;
|
||||
$uka > qa ;
|
||||
$ka$nukta }$x > q ;
|
||||
$ka$nukta$virama > q ;
|
||||
$ka$nukta > qa ;
|
||||
$kha$nukta }$x > k\u0331h\u0331 ;
|
||||
$kha$nukta$virama > k\u0331h\u0331 ;
|
||||
$kha$nukta > k\u0331h\u0331a ;
|
||||
$ukha$virama > k\u0331h\u0331;
|
||||
$ukha > k\u0331h\u0331a;
|
||||
$ugha > g\u0307a ;
|
||||
$ga$nukta }$x > g\u0307 ;
|
||||
$ga$nukta$virama > g\u0307 ;
|
||||
$ga$nukta > g\u0307a ;
|
||||
|
||||
$ujha > za ;
|
||||
$ja$nukta }$x > z ;
|
||||
$ja$nukta$virama > z ;
|
||||
$ja$nukta > za ;
|
||||
$ddha$nukta}$x > r\u0323h ;
|
||||
$ddha$nukta$virama > r\u0323h ;
|
||||
$ddha$nukta > r\u0323ha;
|
||||
|
||||
$uddha}$x > r\u0323 ;
|
||||
$uddha$virama > r\u0323 ;
|
||||
$uddha > r\u0323a;
|
||||
|
||||
$udha > r\u0323a ;
|
||||
$dda$nukta}$x > r\u0323 ;
|
||||
$dda$nukta$virama > r\u0323 ;
|
||||
$dda$nukta > r\u0323a ;
|
||||
$pha$nukta }$x > f ;
|
||||
$pha$nukta$virama > f ;
|
||||
$pha$nukta > fa ;
|
||||
$ufa }$x > f ;
|
||||
$ufa$virama > f ;
|
||||
$ufa > fa ;
|
||||
|
||||
$ra$nukta}$x > r\u0331;
|
||||
$ra$nukta$virama > r\u0331;
|
||||
$ra$nukta > r\u0331a;
|
||||
$lla$nukta}$x > l\u0331;
|
||||
$lla$nukta$virama > l\u0331;
|
||||
$lla$nukta > l\u0331a;
|
||||
|
||||
$ela}$x > l\u0331;
|
||||
$ela$virama > l\u0331;
|
||||
$ela > l\u0331a;
|
||||
|
||||
$uya}$x > y\u0307;
|
||||
$uya$virama > y\u0307;
|
||||
$uya > y\u0307a;
|
||||
|
||||
|
||||
# normal consonants
|
||||
$ka$virama}$ha>k'';
|
||||
$ka}$x>k;
|
||||
$ka$virama>k;
|
||||
$ka>ka;
|
||||
$kha}$x>kh;
|
||||
$kha$virama>kh;
|
||||
$kha>kha;
|
||||
$ga$virama}$ha>g'';
|
||||
$ga}$x>g;
|
||||
$ga$virama>g;
|
||||
$ga>ga;
|
||||
|
||||
$gha}$x>gh;
|
||||
$gha$virama>gh;
|
||||
$gha>gha;
|
||||
|
||||
$nga}$x>n\u0307;
|
||||
$nga$virama>n\u0307;
|
||||
$nga>n\u0307a ;
|
||||
$ca$virama}$ha>c'';
|
||||
$ca}$x>c;
|
||||
$ca$virama>c;
|
||||
$ca>ca;
|
||||
|
||||
$cha}$x>ch;
|
||||
$cha$virama>ch;
|
||||
$cha>cha;
|
||||
$ja$virama}$ha>j'';
|
||||
$ja}$x>j;
|
||||
$ja$virama>j;
|
||||
$ja>ja;
|
||||
|
||||
$jha}$x>jh;
|
||||
$jha$virama>jh;
|
||||
$jha>jha;
|
||||
|
||||
$nya }$x>n\u0303 ;
|
||||
$nya$virama>n\u0303;
|
||||
$nya > n\u0303a ;
|
||||
|
||||
|
||||
$tta$virama}$ha>t\u0323'';
|
||||
$tta}$x>t\u0323;
|
||||
$tta$virama>t\u0323;
|
||||
$tta>t\u0323a;
|
||||
|
||||
$ttha}$x>t\u0323h;
|
||||
$ttha$virama>t\u0323h;
|
||||
$ttha>t\u0323ha;
|
||||
$dda}$x$ha>d\u0323'';
|
||||
$dda}$x>d\u0323;
|
||||
$dda$virama>d\u0323;
|
||||
$dda>d\u0323a;
|
||||
|
||||
$ddha}$x>d\u0323h;
|
||||
$ddha$virama>d\u0323h;
|
||||
$ddha>d\u0323ha;
|
||||
|
||||
$nna}$x>n\u0323 ;
|
||||
$nna$virama>n\u0323;
|
||||
$nna>n\u0323a ;
|
||||
|
||||
|
||||
$ta$virama}$ha>t'';
|
||||
$ta$virama}$ttha>t'';
|
||||
$ta$virama}$tta>t'';
|
||||
$ta$virama}$tha>t'';
|
||||
$ta}$x>t;
|
||||
$ta$virama>t;
|
||||
$ta>ta;
|
||||
$tha}$x>th;
|
||||
$tha$virama>th;
|
||||
$tha>tha;
|
||||
|
||||
$da$virama}$ha>d'';
|
||||
$da$virama}$ddha>d'';
|
||||
$da$virama}$dda>d'';
|
||||
$da$virama}$dha>d'';
|
||||
$da}$x>d;
|
||||
$da$virama>d;
|
||||
$da>da;
|
||||
$dha}$x>dh;
|
||||
$dha$virama>dh;
|
||||
$dha>dha;
|
||||
$na$virama}$ga>n'';
|
||||
$na$virama}$ya>n'';
|
||||
$na}$x>n;
|
||||
$na$virama>n;
|
||||
$na>na;
|
||||
|
||||
|
||||
$pa$virama}$ha>p'';
|
||||
$pa}$x>p;
|
||||
$pa$virama>p;
|
||||
$pa>pa;
|
||||
$pha}$x>ph;
|
||||
$pha$virama>ph;
|
||||
$pha>pha;
|
||||
$ba$virama}$ha>b'';
|
||||
$ba}$x>b;
|
||||
$ba$virama>b;
|
||||
$ba>ba;
|
||||
|
||||
$bha}$x>bh;
|
||||
$bha$virama>bh;
|
||||
$bha>bha;
|
||||
|
||||
$ma$virama}$ma>m'';
|
||||
$ma}$x>m;
|
||||
$ma$virama>m;
|
||||
$ma>ma;
|
||||
|
||||
$ya}$x>y;
|
||||
$ya$virama>y;
|
||||
$ya>ya;
|
||||
$ra$virama}$ha>r'';
|
||||
$ra}$x>r;
|
||||
$ra$virama>r;
|
||||
$ra>ra;
|
||||
$vva$virama}$ha>w\u0307'';
|
||||
$vva}$x>w\u0307;
|
||||
$vva$virama>w\u0307;
|
||||
$vva>w\u0307a;
|
||||
$rra$virama}$ha>r\u0331'';
|
||||
$rra}$x>r\u0331;
|
||||
$rra$virama>r\u0331;
|
||||
$rra>r\u0331a;
|
||||
$la$virama}$ha>l'';
|
||||
$la}$x>l;
|
||||
$la$virama>l;
|
||||
$la>la;
|
||||
$lla$virama}$ha>l\u0323'';
|
||||
$lla}$x>l\u0323;
|
||||
$lla$virama>l\u0323;
|
||||
$lla>l\u0323a;
|
||||
$va}$x>v;
|
||||
$va$virama>v;
|
||||
$va>va;
|
||||
$sa$virama}$ha>s'';
|
||||
$sa$virama}$sha>s'';
|
||||
$sa$virama}$ssa>s'';
|
||||
$sa$virama}$sa>s'';
|
||||
$sa}$x>s;
|
||||
$sa$virama>s;
|
||||
|
||||
#for gurmukhi
|
||||
$sa$nukta}$x>s\u0301;
|
||||
$sa$nukta$virama>s\u0301;
|
||||
$sa$nukta>s\u0301a;
|
||||
$sa>sa;
|
||||
|
||||
$sha}$x>s\u0301;
|
||||
$sha$virama>s\u0301;
|
||||
$sha>s\u0301a;
|
||||
|
||||
$ssa}$x>s\u0323;
|
||||
$ssa$virama>s\u0323;
|
||||
$ssa>s\u0323a;
|
||||
$ha}$x>h;
|
||||
$ha$virama>h;
|
||||
$ha>ha;
|
||||
|
||||
# dependent vowels (should never occur except following consonants)
|
||||
$forceIndependentMatra{$aa > \u0314a\u0304 ;
|
||||
$forceIndependentMatra{$ai > \u0314ai ;
|
||||
$forceIndependentMatra{$au > \u0314au ;
|
||||
$forceIndependentMatra{$ii > \u0314i\u0304 ;
|
||||
$forceIndependentMatra{$i > \u0314i ;
|
||||
$forceIndependentMatra{$uu > \u0314u\u0304 ;
|
||||
$forceIndependentMatra{$u > \u0314u ;
|
||||
$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;
|
||||
$forceIndependentMatra{$rh > \u0314r\u0325 ;
|
||||
$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;
|
||||
$forceIndependentMatra{$lh > \u0314l\u0325 ;
|
||||
$forceIndependentMatra{$e > \u0314e\u0304 ;
|
||||
$forceIndependentMatra{$o > \u0314o\u0304 ;
|
||||
#extra vowels
|
||||
$forceIndependentMatra{$ce > \u0314e\u0306 ;
|
||||
$forceIndependentMatra{$co > \u0314o\u0306 ;
|
||||
$forceIndependentMatra{$se > \u0314e ;
|
||||
$forceIndependentMatra{$so > \u0314o ;
|
||||
$forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character
|
||||
$forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character
|
||||
$aa > a\u0304 ;
|
||||
$ai > ai ;
|
||||
$au > au ;
|
||||
$ii > i\u0304 ;
|
||||
$i > i ;
|
||||
$uu > u\u0304 ;
|
||||
$u > u ;
|
||||
$rrh > r\u0325\u0304 ;
|
||||
$rh > r\u0325 ;
|
||||
$llh > l\u0325\u0304 ;
|
||||
$lh > l\u0325 ;
|
||||
$e > e\u0304 ;
|
||||
$o > o\u0304 ;
|
||||
#extra vowels
|
||||
$ce > e\u0306 ;
|
||||
$co > o\u0306 ;
|
||||
$se > e ;
|
||||
$so > o ;
|
||||
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
|
||||
$waa} $x > a\u0304\u0314 ;
|
||||
$wai} $x > ai\u0314 ;
|
||||
$wau} $x > au\u0314 ;
|
||||
$wii} $x > i\u0304\u0314 ;
|
||||
$wi } $x > i\u0314 ;
|
||||
$wuu} $x > u\u0304\u0314 ;
|
||||
$wu } $x > u\u0314 ;
|
||||
$wrr} $x > r\u0325\u0304\u0314 ;
|
||||
$wr } $x > r\u0325\u0314 ;
|
||||
$wll} $x > l\u0325\u0304\u0314 ;
|
||||
$wl } $x > l\u0325\u0314 ;
|
||||
$we } $x > e\u0304\u0314 ;
|
||||
$wo } $x > o\u0304\u0314 ;
|
||||
$wa } $x > a\u0314 ;
|
||||
#extra vowels
|
||||
$wce} $x > e\u0306\u0314 ;
|
||||
$wco} $x > o\u0306\u0314 ;
|
||||
$wse} $x > e\u0314 ;
|
||||
$wso} $x > o\u0314 ;
|
||||
$om} $x > ''om\u0314 ;
|
||||
|
||||
# independent vowels when preceeded by vowels
|
||||
$vowels{$waa > ''a\u0304 ;
|
||||
$vowels{$wai > ''ai ;
|
||||
$vowels{$wau > ''au ;
|
||||
$vowels{$wii > ''i\u0304 ;
|
||||
$vowels{$wi > ''i ;
|
||||
$vowels{$wuu > ''u\u0304 ;
|
||||
$vowels{$wu > ''u ;
|
||||
$vowels{$wrr > ''r\u0325\u0304 ;
|
||||
$vowels{$wr > ''r\u0325 ;
|
||||
$vowels{$wll > ''l\u0325\u0304 ;
|
||||
$vowels{$wl > ''l\u0325 ;
|
||||
$vowels{$we > ''e\u0304 ;
|
||||
$vowels{$wo > ''o\u0304 ;
|
||||
$vowels{$wa > ''a ;
|
||||
#extra vowels
|
||||
$vowels{$wce > ''e\u0306 ;
|
||||
$vowels{$wco > ''o\u0306 ;
|
||||
$vowels{$wse > ''e ;
|
||||
$vowels{$wso > ''o ;
|
||||
|
||||
# independent vowels (otherwise)
|
||||
$waa > a\u0304 ;
|
||||
$wai > ai ;
|
||||
$wau > au ;
|
||||
$wii > i\u0304 ;
|
||||
$wi > i ;
|
||||
$wuu > u\u0304 ;
|
||||
$wu > u ;
|
||||
$wrr > r\u0325\u0304 ;
|
||||
$wr > r\u0325 ;
|
||||
$wll > l\u0325\u0304 ;
|
||||
$wl > l\u0325 ;
|
||||
$we > e\u0304 ;
|
||||
$wo > o\u0304 ;
|
||||
$wa > a ;
|
||||
#extra vowels
|
||||
$wce > e\u0306 ;
|
||||
$wco > o\u0306 ;
|
||||
$wse > e ;
|
||||
$wso > o ;
|
||||
$om > ''om ;
|
||||
|
||||
#stress marks
|
||||
$avagraha > \u0315;
|
||||
$chandrabindu$anusvara>\u0303;
|
||||
$chandrabindu > m\u0310;
|
||||
$visarga>h\u0323;
|
||||
#numbers
|
||||
$zero > 0;
|
||||
$one > 1;
|
||||
$two > 2;
|
||||
$three > 3;
|
||||
$four > 4;
|
||||
$five > 5;
|
||||
$six > 6;
|
||||
$seven > 7;
|
||||
$eight > 8;
|
||||
$nine > 9;
|
||||
$lm >;
|
||||
$ailm >;
|
||||
$aulm >;
|
||||
|
||||
$danda>'.';
|
||||
$doubleDanda>'.';
|
||||
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
# LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue071}$x>ra;
|
||||
\ue071$virama>r;
|
||||
\ue071>ra;
|
||||
# LETTER RA WITH LOWER DIAGONAL
|
||||
\ue072}$x>ra;
|
||||
\ue072$virama>r;
|
||||
\ue072>ra;
|
||||
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE004>; # DEVANAGARI VOWEL SIGN SHORT A
|
||||
|
141
icu4c/source/data/translit/InterIndic_Malayalam.txt
Normal file
141
icu4c/source/data/translit/InterIndic_Malayalam.txt
Normal file
|
@ -0,0 +1,141 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Malayalam
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0d02; # REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
\ue002>\u0d02; # SIGN ANUSVARA
|
||||
\ue003>\u0d03; # SIGN VISARGA
|
||||
\uE004>\u0d05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0d05; # LETTER A
|
||||
\ue006>\u0d06; # LETTER AA
|
||||
\ue007>\u0d07; # LETTER I
|
||||
\ue008>\u0d08; # LETTER II
|
||||
\ue009>\u0d09; # LETTER U
|
||||
\ue00a>\u0d0a; # LETTER UU
|
||||
\ue00b>\u0d0b; # LETTER VOCALIC R
|
||||
\ue00c>\u0d0c; # LETTER VOCALIC L
|
||||
\ue00d>\u0d0e; # FALLLBACK LETTER E
|
||||
\ue00e>\u0d0e; # LETTER E
|
||||
\ue00f>\u0d0f; # LETTER EE
|
||||
\ue010>\u0d10; # LETTER AI
|
||||
\ue011>\u0d12; # FALLBACK TO O
|
||||
\ue012>\u0d12; # LETTER O
|
||||
\ue013>\u0d13; # LETTER OO
|
||||
\ue014>\u0d14; # LETTER AU
|
||||
\ue015>\u0d15; # LETTER KA
|
||||
\ue016>\u0d16; # LETTER KHA
|
||||
\ue017>\u0d17; # LETTER GA
|
||||
\ue018>\u0d18; # LETTER GHA
|
||||
\ue019>\u0d19; # LETTER NGA
|
||||
\ue01a>\u0d1a; # LETTER CA
|
||||
\ue01b>\u0d1b; # LETTER CHA
|
||||
\ue01c>\u0d1c; # LETTER JA
|
||||
\ue01d>\u0d1d; # LETTER JHA
|
||||
\ue01e>\u0d1e; # LETTER NYA
|
||||
\ue01f>\u0d1f; # LETTER TTA
|
||||
\ue020>\u0d20; # LETTER TTHA
|
||||
\ue021>\u0d21; # LETTER DDA
|
||||
\ue022>\u0d22; # LETTER DDHA
|
||||
\ue023>\u0d23; # LETTER NNA
|
||||
\ue024>\u0d24; # LETTER TA
|
||||
\ue025>\u0d25; # LETTER THA
|
||||
\ue026>\u0d26; # LETTER DA
|
||||
\ue027>\u0d27; # LETTER DHA
|
||||
\ue028>\u0d28; # LETTER NA
|
||||
\ue029>\u0d28; # REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0d2a; # LETTER PA
|
||||
\ue02b>\u0d2b; # LETTER PHA
|
||||
\ue02c>\u0d2c; # LETTER BA
|
||||
\ue02d>\u0d2d; # LETTER BHA
|
||||
\ue02e>\u0d2e; # LETTER MA
|
||||
\ue02f>\u0d2f; # LETTER YA
|
||||
\ue030\ue03c>\u0d31;
|
||||
\ue030>\u0d30; # LETTER RA
|
||||
\ue031>\u0d31; # LETTER RRA
|
||||
\ue032>\u0d32; # LETTER LA
|
||||
\ue033\ue03c>\u0d34;
|
||||
\ue033>\u0d33; # LETTER LLA
|
||||
\ue034>\u0d34; # LETTER LLLA
|
||||
\ue035>\u0d35; # LETTER VA
|
||||
\ue036>\u0d36; # LETTER SHA
|
||||
\ue037>\u0d37; # LETTER SSA
|
||||
\ue038>\u0d38; # LETTER SA
|
||||
\ue039>\u0d39; # LETTER HA
|
||||
|
||||
\ue03c>; # FALLBACK BLOW AWAY NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
\ue03e>\u0d3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0d3f; # VOWEL SIGN I
|
||||
\ue040>\u0d40; # VOWEL SIGN II
|
||||
\ue041>\u0d41; # VOWEL SIGN U
|
||||
\ue042>\u0d42; # VOWEL SIGN UU
|
||||
\ue043>\u0d43; # VOWEL SIGN VOCALIC R
|
||||
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0d3e; # REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
\ue046>\u0d46; # VOWEL SIGN E
|
||||
\ue047>\u0d47; # VOWEL SIGN EE
|
||||
\ue048>\u0d48; # VOWEL SIGN AI
|
||||
\ue049>\u0d4b; # REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
|
||||
\ue04a>\u0d4a; # VOWEL SIGN O
|
||||
\ue04b>\u0d4b; # VOWEL SIGN OO
|
||||
\ue04c>\u0d4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0d4d; # SIGN VIRAMA
|
||||
\ue050>\u0d13\u0d02; # UNMAPPED InterIndic-Malayalam: OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
|
||||
\ue056>\u0d48; # REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0d57; # AU LENGTH MARK
|
||||
\ue058>\u0d15; # FALLBACK
|
||||
\ue059>\u0d16; # REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
|
||||
\ue05a>\u0d17; # REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
|
||||
\ue05b>\u0d1c; # REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
|
||||
\ue05d>\u0d22; # REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
|
||||
\ue05c>\u0d21; # FALLBACK
|
||||
\ue05e>\u0d2b; # REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
|
||||
\ue05f>\u0d2f; # REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
|
||||
\ue060>\u0d60; # LETTER VOCALIC RR
|
||||
\ue061>\u0d61; # LETTER VOCALIC LL
|
||||
\ue062>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L
|
||||
\ue063>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
\ue066>\u0d66; # DIGIT ZERO
|
||||
\ue067>\u0d67; # DIGIT ONE
|
||||
\ue068>\u0d68; # DIGIT TWO
|
||||
\ue069>\u0d69; # DIGIT THREE
|
||||
\ue06a>\u0d6a; # DIGIT FOUR
|
||||
\ue06b>\u0d6b; # DIGIT FIVE
|
||||
\ue06c>\u0d6c; # DIGIT SIX
|
||||
\ue06d>\u0d6d; # DIGIT SEVEN
|
||||
\ue06e>\u0d6e; # DIGIT EIGHT
|
||||
\ue06f>\u0d6f; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0d30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0d30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0d35; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0d66; # FALLBACK FOR TAMIL
|
||||
1 > \u0d67;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
137
icu4c/source/data/translit/InterIndic_Oriya.txt
Normal file
137
icu4c/source/data/translit/InterIndic_Oriya.txt
Normal file
|
@ -0,0 +1,137 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Oriya
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0b01; # SIGN CANDRABINDU
|
||||
\ue002>\u0b02; # SIGN ANUSVARA
|
||||
\ue003>\u0b03; # SIGN VISARGA
|
||||
\uE004>\u0b05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0b05; # LETTER A
|
||||
\ue006>\u0b06; # LETTER AA
|
||||
\ue007>\u0b07; # LETTER I
|
||||
\ue008>\u0b08; # LETTER II
|
||||
\ue009>\u0b09; # LETTER U
|
||||
\ue00a>\u0b0a; # LETTER UU
|
||||
\ue00b>\u0b0b; # LETTER VOCALIC R
|
||||
\ue00c>\u0b0c; # LETTER VOCALIC L
|
||||
\ue00d>\u0b0f; # FALLBACK
|
||||
\ue00e>\u0b0f; # FALLBACK
|
||||
\ue00f>\u0b0f; # LETTER E
|
||||
\ue010>\u0b10; # LETTER AI
|
||||
\ue011>\u0b13; # FALLBACK
|
||||
\ue012>\u0b13; # FALLBACK
|
||||
\ue013>\u0b13; # FALLBACK LETTER OO (\u0b13 = LETTER O)
|
||||
\ue014>\u0b14; # LETTER AU
|
||||
\ue015>\u0b15; # LETTER KA
|
||||
\ue016>\u0b16; # LETTER KHA
|
||||
\ue017>\u0b17; # LETTER GA
|
||||
\ue018>\u0b18; # LETTER GHA
|
||||
\ue019>\u0b19; # LETTER NGA
|
||||
\ue01a>\u0b1a; # LETTER CA
|
||||
\ue01b>\u0b1b; # LETTER CHA
|
||||
\ue01c>\u0b1c; # LETTER JA
|
||||
\ue01d>\u0b1d; # LETTER JHA
|
||||
\ue01e>\u0b1e; # LETTER NYA
|
||||
\ue01f>\u0b1f; # LETTER TTA
|
||||
\ue020>\u0b20; # LETTER TTHA
|
||||
\ue021>\u0b21; # LETTER DDA
|
||||
\ue022>\u0b22; # LETTER DDHA
|
||||
\ue023>\u0b23; # LETTER NNA
|
||||
\ue024>\u0b24; # LETTER TA
|
||||
\ue025>\u0b25; # LETTER THA
|
||||
\ue026>\u0b26; # LETTER DA
|
||||
\ue027>\u0b27; # LETTER DHA
|
||||
\ue028>\u0b28; # LETTER NA
|
||||
\ue029>\u0b28\u0b3c; # FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0b2a; # LETTER PA
|
||||
\ue02b>\u0b2b; # LETTER PHA
|
||||
\ue02c>\u0b2c; # LETTER BA
|
||||
\ue02d>\u0b2d; # LETTER BHA
|
||||
\ue02e>\u0b2e; # LETTER MA
|
||||
\ue02f>\u0b2f; # LETTER YA
|
||||
\ue030>\u0b30; # LETTER RA
|
||||
\ue031>\u0b5c; # LETTER RRA
|
||||
\ue032>\u0b32; # LETTER LA
|
||||
\ue033>\u0b33; # LETTER LLA
|
||||
\ue034>\u0b33\u0b3c; # FALLBACK LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0b35; # LETTER VA
|
||||
\ue036>\u0b36; # LETTER SHA
|
||||
\ue037>\u0b37; # LETTER SSA
|
||||
\ue038>\u0b38; # LETTER SA
|
||||
\ue039>\u0b39; # LETTER HA
|
||||
\ue03c>\u0b3c; # SIGN NUKTA
|
||||
\ue03d>\u0b3d; # SIGN AVAGRAHA
|
||||
\ue03e>\u0b3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0b3f; # VOWEL SIGN I
|
||||
\ue040>\u0b40; # VOWEL SIGN II
|
||||
\ue041>\u0b41; # VOWEL SIGN U
|
||||
\ue042>\u0b42; # VOWEL SIGN UU
|
||||
\ue043>\u0b43; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0b43\u0b3c; # FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
|
||||
\ue045>\u0b47; # FALLBACK
|
||||
\ue046>\u0b47; # FALLBACK
|
||||
\ue047>\u0b47; # VOWEL SIGN E
|
||||
\ue048>\u0b48; # VOWEL SIGN AI
|
||||
\ue049>\u0b4b; # FALLBACK
|
||||
\ue04a>\u0b4b; # FALLBACK
|
||||
\ue04b>\u0b4b; # VOWEL SIGN E
|
||||
\ue04c>\u0b4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0b4d; # SIGN VIRAMA
|
||||
\ue050>\u0b13\u0b01; # FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
|
||||
\ue056>\u0b56; # AI LENGTH MARK
|
||||
\ue057>\u0b57; # AU LENGTH MARK
|
||||
\ue059>\u0b16\u0b3c; # FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
\ue058>\u0b15\u0b3c; # FALLBACK
|
||||
\ue05a>\u0b17\u0b3c; # FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
\ue05b>\u0b1c\u0b3c; # FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
\ue05c>\u0b21\u0b3c; # FALLBACK
|
||||
\ue05d>\u0b5d; # LETTER RHA
|
||||
\ue05e>\u0b2b\u0b3c; # FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
\ue05f>\u0b5f; # LETTER YYA
|
||||
\ue060>\u0b60; # LETTER VOCALIC RR
|
||||
\ue061>\u0b61; # LETTER VOCALIC LL
|
||||
\ue062>\u0b56\u0b3c; # FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
|
||||
\ue063>\u0b57\u0b3c; # FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\ue066>\u0b66; # DIGIT ZERO
|
||||
\ue067>\u0b67; # DIGIT ONE
|
||||
\ue068>\u0b68; # DIGIT TWO
|
||||
\ue069>\u0b69; # DIGIT THREE
|
||||
\ue06a>\u0b6a; # DIGIT FOUR
|
||||
\ue06b>\u0b6b; # DIGIT FIVE
|
||||
\ue06c>\u0b6c; # DIGIT SIX
|
||||
\ue06d>\u0b6d; # DIGIT SEVEN
|
||||
\ue06e>\u0b6e; # DIGIT EIGHT
|
||||
\ue06f>\u0b6f; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0b30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0b30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>\u0B70; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0B71; # LETTER WA
|
||||
0 > \u0b66; # FALLBACK FOR TAMIL
|
||||
1 > \u0b67;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
151
icu4c/source/data/translit/InterIndic_Tamil.txt
Normal file
151
icu4c/source/data/translit/InterIndic_Tamil.txt
Normal file
|
@ -0,0 +1,151 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Tamil
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0b82; # FALLBACK SIGN CANDRABINDU
|
||||
\ue002>\u0b82; # SIGN ANUSVARA
|
||||
\ue003>\u0b83; # SIGN VISARGA
|
||||
\uE004>\u0b85; # FALLBACK TO LETTER A
|
||||
\ue005>\u0b85; # LETTER A
|
||||
\ue006>\u0b86; # LETTER AA
|
||||
\ue007>\u0b87; # LETTER I
|
||||
\ue008>\u0b88; # LETTER II
|
||||
\ue009>\u0b89; # LETTER U
|
||||
\ue00a>\u0b8a; # LETTER UU
|
||||
\ue00b>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
\ue00c>\u0bb2; # FALLBACK LETTER LA
|
||||
\ue00d>\u0b8f; # FALLBACK
|
||||
\ue00e>\u0b8e; # LETTER E
|
||||
\ue00f>\u0b8f; # LETTER EE
|
||||
\ue010>\u0b90; # LETTER AI
|
||||
\ue011>\u0b92; # FALLBACK
|
||||
\ue012>\u0b92; # LETTER O
|
||||
\ue013>\u0b93; # LETTER OO
|
||||
\ue014>\u0b94; # LETTER AU
|
||||
\ue015>\u0b95; # LETTER KA
|
||||
\ue016>\u0b95; # REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
|
||||
\ue017>\u0b95; # REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
|
||||
\ue018>\u0b95; # REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
|
||||
\ue019>\u0b99; # LETTER NGA
|
||||
\ue01a>\u0b9a; # LETTER CA
|
||||
\ue01b>\u0b9a; # REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
|
||||
\ue01c>\u0b9c; # LETTER JA
|
||||
\ue01d>\u0b9a; # REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
|
||||
\ue01e>\u0b9e; # LETTER NYA
|
||||
\ue01f>\u0b9f; # LETTER TTA
|
||||
\ue020>\u0b9f; # REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
|
||||
\ue021>\u0b9f; # REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
|
||||
\ue022>\u0b9f; # REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
|
||||
\ue023>\u0ba3; # LETTER NNA
|
||||
\ue024>\u0ba4; # LETTER TA
|
||||
\ue025>\u0ba4; # REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
|
||||
\ue026>\u0ba4; # REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
|
||||
\ue027>\u0ba4; # REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
|
||||
\ue028\ue03c>\u0ba9;
|
||||
\ue028>\u0ba8; # LETTER NA
|
||||
\ue029>\u0ba9; # LETTER NNNA
|
||||
\ue02a>\u0baa; # LETTER PA
|
||||
\ue02b>\u0baa; # REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
|
||||
\ue02c>\u0baa; # REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
|
||||
\ue02d>\u0baa; # REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
|
||||
\ue02e>\u0bae; # LETTER MA
|
||||
\ue02f>\u0baf; # LETTER YA
|
||||
\ue030\ue03c>\u0bb1;
|
||||
\ue030>\u0bb0; # LETTER RA
|
||||
\ue031>\u0bb1; # LETTER RRA
|
||||
\ue032>\u0bb2; # LETTER LA
|
||||
\ue033\ue03c>\u0bb4;
|
||||
\ue033>\u0bb3; # LETTER LLA
|
||||
\ue034>\u0bb4; # LETTER LLLA
|
||||
\ue035>\u0bb5; # LETTER VA
|
||||
\ue036>\u0bb7; # REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
|
||||
\ue037>\u0bb7; # LETTER SSA
|
||||
\ue038>\u0bb8; # LETTER SA
|
||||
\ue039>\u0bb9; # LETTER HA
|
||||
|
||||
\ue03c>; # FALLBACK BLOW AWAY NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
\ue03e>\u0bbe; # VOWEL SIGN AA
|
||||
\ue03f>\u0bbf; # VOWEL SIGN I
|
||||
\ue040>\u0bc0; # VOWEL SIGN II
|
||||
\ue041>\u0bc1; # VOWEL SIGN U
|
||||
\ue042>\u0bc2; # VOWEL SIGN UU
|
||||
\ue043>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
\ue044>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
\ue045>\u0bbe; # REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
\ue046>\u0bc6; # VOWEL SIGN E
|
||||
\ue047>\u0bc7; # VOWEL SIGN EE
|
||||
\ue048>\u0bc8; # VOWEL SIGN AI
|
||||
\ue049>\u0bbe; # REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
|
||||
\ue04a>\u0bca; # VOWEL SIGN O
|
||||
\ue04b>\u0bcb; # VOWEL SIGN OO
|
||||
\ue04c>\u0bcc; # VOWEL SIGN AU
|
||||
\ue04d>\u0bcd; # SIGN VIRAMA
|
||||
\ue050>\u0b93\u0bae\u0bcd; # REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
|
||||
\ue056>\u0bc8; # REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0bd7; # AU LENGTH MARK
|
||||
\ue058>\u0b95; # FALLBACK
|
||||
\ue059>\u0b95; # REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
|
||||
\ue05a>\u0b95; # REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
|
||||
\ue05b>\u0b9c; # REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
|
||||
\ue05c>\u0ba4; # FALLBACK
|
||||
\ue05d>\u0b9f; # REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
|
||||
\ue05e>\u0baa; # REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
|
||||
\ue05f>\u0baf; # REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
|
||||
\ue060>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
\ue061>\u0bb3; # FALLBACK LETTER LLA
|
||||
\ue062>\u0bbf; # FALLBACK VOWEL SIGN VOCALIC L
|
||||
\ue063>\u0bc0; # FALLBACK VOWEL SIGN VOCALIC LL
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
|
||||
\ue066>\u0030; # FALLBACK DIGIT ZERO
|
||||
|
||||
\ue067\ue066\ue066\ue066>\u0bF2;
|
||||
\ue067\ue066\ue066>\u0bf1;
|
||||
\ue067\ue066>\u0bF0;
|
||||
|
||||
\ue067>\u0be7; # DIGIT ONE
|
||||
\ue068>\u0be8; # DIGIT TWO
|
||||
\ue069>\u0be9; # DIGIT THREE
|
||||
\ue06a>\u0bea; # DIGIT FOUR
|
||||
\ue06b>\u0beb; # DIGIT FIVE
|
||||
\ue06c>\u0bec; # DIGIT SIX
|
||||
\ue06d>\u0bed; # DIGIT SEVEN
|
||||
\ue06e>\u0bee; # DIGIT EIGHT
|
||||
\ue06f>\u0bef; # DIGIT NINE
|
||||
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0bc0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0bc0; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0bb5; # FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
1000 >\u0BF2; # NUMBER ONE THOUSAND
|
||||
100 >\u0BF1; # NUMBER ONE HUNDRED
|
||||
10 >\u0BF0; # NUMBER TEN
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
141
icu4c/source/data/translit/InterIndic_Telugu.txt
Normal file
141
icu4c/source/data/translit/InterIndic_Telugu.txt
Normal file
|
@ -0,0 +1,141 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Telugu
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0c01; # SIGN CANDRABINDU
|
||||
\ue002>\u0c02; # SIGN ANUSVARA
|
||||
\ue003>\u0c03; # SIGN VISARGA
|
||||
\uE004>\u0c05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0c05; # LETTER A
|
||||
\ue006>\u0c06; # LETTER AA
|
||||
\ue007>\u0c07; # LETTER I
|
||||
\ue008>\u0c08; # LETTER II
|
||||
\ue009>\u0c09; # LETTER U
|
||||
\ue00a>\u0c0a; # LETTER UU
|
||||
\ue00b>\u0c0b; # LETTER VOCALIC R
|
||||
\ue00c>\u0c0c; # LETTER VOCALIC L
|
||||
\ue00d>\u0c0E; # FALLBACK MAPPING
|
||||
\ue00e>\u0c0E; # LETTER E
|
||||
\ue00f>\u0c0f; # LETTER EE
|
||||
\ue010>\u0c10; # LETTER AI
|
||||
\ue011>\u0c12; # FALBACK MAPPING
|
||||
\ue012>\u0c12; # LETTER O
|
||||
\ue013>\u0c13; # LETTER OO
|
||||
\ue014>\u0c14; # LETTER AU
|
||||
\ue015>\u0c15; # LETTER KA
|
||||
\ue016>\u0c16; # LETTER KHA
|
||||
\ue017>\u0c17; # LETTER GA
|
||||
\ue018>\u0c18; # LETTER GHA
|
||||
\ue019>\u0c19; # LETTER NGA
|
||||
\ue01a>\u0c1a; # LETTER CA
|
||||
\ue01b>\u0c1b; # LETTER CHA
|
||||
\ue01c>\u0c1c; # LETTER JA
|
||||
\ue01d>\u0c1d; # LETTER JHA
|
||||
\ue01e>\u0c1e; # LETTER NYA
|
||||
\ue01f>\u0c1f; # LETTER TTA
|
||||
\ue020>\u0c20; # LETTER TTHA
|
||||
\ue021>\u0c21; # LETTER DDA
|
||||
\ue022>\u0c22; # LETTER DDHA
|
||||
\ue023>\u0c23; # LETTER NNA
|
||||
\ue024>\u0c24; # LETTER TA
|
||||
\ue025>\u0c25; # LETTER THA
|
||||
\ue026>\u0c26; # LETTER DA
|
||||
\ue027>\u0c27; # LETTER DHA
|
||||
\ue028>\u0c28; # LETTER NA
|
||||
\ue029>\u0c28; # REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0c2a; # LETTER PA
|
||||
\ue02b>\u0c2b; # LETTER PHA
|
||||
\ue02c>\u0c2c; # LETTER BA
|
||||
\ue02d>\u0c2d; # LETTER BHA
|
||||
\ue02e>\u0c2e; # LETTER MA
|
||||
\ue02f>\u0c2f; # LETTER YA
|
||||
\ue030\ue03c>\u0c31;
|
||||
\ue030>\u0c30; # LETTER RA
|
||||
\ue031>\u0c31; # LETTER RRA
|
||||
\ue032>\u0c32; # LETTER LA
|
||||
\ue033>\u0c33; # LETTER LLA
|
||||
\ue034>\u0c33; # REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0c35; # LETTER VA
|
||||
\ue036>\u0c36; # LETTER SHA
|
||||
\ue037>\u0c37; # LETTER SSA
|
||||
\ue038>\u0c38; # LETTER SA
|
||||
\ue039>\u0c39; # LETTER HA
|
||||
|
||||
\ue03c>; # FALLBACK BLOW AWAY NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
\ue03e>\u0c3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0c3f; # VOWEL SIGN I
|
||||
\ue040>\u0c40; # VOWEL SIGN II
|
||||
\ue041>\u0c41; # VOWEL SIGN U
|
||||
\ue042>\u0c42; # VOWEL SIGN UU
|
||||
\ue043>\u0c43; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0c44; # VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0c46; # VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\ue046>\u0c46; # VOWEL SIGN E
|
||||
\ue047>\u0c47; # VOWEL SIGN EE
|
||||
\ue048>\u0c48; # VOWEL SIGN AI
|
||||
\ue049>\u0c4a; # REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\ue04a>\u0c4a; # VOWEL SIGN O
|
||||
\ue04b>\u0c4b; # VOWEL SIGN OO
|
||||
\ue04c>\u0c4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0c4d; # SIGN VIRAMA
|
||||
\ue050>\u0c13\u0c02; # REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>\u0c55; # LENGTH MARK
|
||||
\ue056>\u0c56; # AI LENGTH MARK
|
||||
\ue057>\u0c4c; # REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0c15; # REMAP
|
||||
\ue059>\u0c16; # REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
|
||||
\ue05a>\u0c17; # REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
|
||||
\ue05b>\u0c1c; # REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
|
||||
\ue05c>\u0c22; # REMAP
|
||||
\ue05d>\u0c22; # REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
|
||||
\ue05e>\u0c2b; # REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
|
||||
\ue05f>\u0c2f; # REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
|
||||
\ue060>\u0c60; # LETTER VOCALIC RR
|
||||
\ue061>\u0c61; # LETTER VOCALIC LL
|
||||
\ue062>\u0c3f; # REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
\ue063>\u0c40; # REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
\ue066>\u0c66; # DIGIT ZERO
|
||||
\ue067>\u0c67; # DIGIT ONE
|
||||
\ue068>\u0c68; # DIGIT TWO
|
||||
\ue069>\u0c69; # DIGIT THREE
|
||||
\ue06a>\u0c6a; # DIGIT FOUR
|
||||
\ue06b>\u0c6b; # DIGIT FIVE
|
||||
\ue06c>\u0c6c; # DIGIT SIX
|
||||
\ue06d>\u0c6d; # DIGIT SEVEN
|
||||
\ue06e>\u0c6e; # DIGIT EIGHT
|
||||
\ue06f>\u0c6f; # DIGIT NINE
|
||||
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0c30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0c30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0c35; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0c66; # FALLBACK FOR TAMIL
|
||||
1 > \u0c67;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
92
icu4c/source/data/translit/Kannada_InterIndic.txt
Normal file
92
icu4c/source/data/translit/Kannada_InterIndic.txt
Normal file
|
@ -0,0 +1,92 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Kannada-InterIndic
|
||||
\u0CC6\u0CD5>\uE047; # VOWEL SIGN EE
|
||||
\u0CC6\u0CCD\u0CD6>\uE048\ue04d; # VOWEL SIGN AI
|
||||
\u0CC6\u0CD6>\uE048; # VOWEL SIGN AI
|
||||
\u0CC6\u0CC2\u0CD5>\uE04B; # VOWEL SIGN OO
|
||||
\u0CC6\u0CC2>\uE04A; # VOWEL SIGN O
|
||||
\u0CBF\u0CD5>\uE040; # VOWEL SIGN II
|
||||
|
||||
\u0C82>\uE002; # SIGN ANUSVARA
|
||||
\u0C83>\uE003; # SIGN VISARGA
|
||||
\u0C85>\uE005; # LETTER A
|
||||
\u0C86>\uE006; # LETTER AA
|
||||
\u0C87>\uE007; # LETTER I
|
||||
\u0C88>\uE008; # LETTER II
|
||||
\u0C89>\uE009; # LETTER U
|
||||
\u0C8A>\uE00A; # LETTER UU
|
||||
\u0C8B>\uE00B; # LETTER VOCALIC R
|
||||
\u0C8C>\uE00C; # LETTER VOCALIC L
|
||||
\u0C8E>\uE00E; # LETTER E
|
||||
\u0C8F>\uE00F; # LETTER EE
|
||||
\u0C90>\uE010; # LETTER AI
|
||||
\u0C92>\uE012; # LETTER O
|
||||
\u0C93>\uE013; # LETTER OO
|
||||
\u0C94>\uE014; # LETTER AU
|
||||
\u0C95>\uE015; # LETTER KA
|
||||
\u0C96>\uE016; # LETTER KHA
|
||||
\u0C97>\uE017; # LETTER GA
|
||||
\u0C98>\uE018; # LETTER GHA
|
||||
\u0C99>\uE019; # LETTER NGA
|
||||
\u0C9A>\uE01A; # LETTER CA
|
||||
\u0C9B>\uE01B; # LETTER CHA
|
||||
\u0C9C>\uE01C; # LETTER JA
|
||||
\u0C9D>\uE01D; # LETTER JHA
|
||||
\u0C9E>\uE01E; # LETTER NYA
|
||||
\u0C9F>\uE01F; # LETTER TTA
|
||||
\u0CA0>\uE020; # LETTER TTHA
|
||||
\u0CA1>\uE021; # LETTER DDA
|
||||
\u0CA2>\uE022; # LETTER DDHA
|
||||
\u0CA3>\uE023; # LETTER NNA
|
||||
\u0CA4>\uE024; # LETTER TA
|
||||
\u0CA5>\uE025; # LETTER THA
|
||||
\u0CA6>\uE026; # LETTER DA
|
||||
\u0CA7>\uE027; # LETTER DHA
|
||||
\u0CA8>\uE028; # LETTER NA
|
||||
\u0CAA>\uE02A; # LETTER PA
|
||||
\u0CAB>\uE02B; # LETTER PHA
|
||||
\u0CAC>\uE02C; # LETTER BA
|
||||
\u0CAD>\uE02D; # LETTER BHA
|
||||
\u0CAE>\uE02E; # LETTER MA
|
||||
\u0CAF>\uE02F; # LETTER YA
|
||||
\u0CB0>\uE030; # LETTER RA
|
||||
\u0CB1>\uE031; # LETTER RRA
|
||||
\u0CB2>\uE032; # LETTER LA
|
||||
\u0CB3>\uE033; # LETTER LLA
|
||||
\u0CB5>\uE035; # LETTER VA
|
||||
\u0CB6>\uE036; # LETTER SHA
|
||||
\u0CB7>\uE037; # LETTER SSA
|
||||
\u0CB8>\uE038; # LETTER SA
|
||||
\u0CB9>\uE039; # LETTER HA
|
||||
\u0CBC>\uE03C; # SIGN NUKTA
|
||||
\u0CBD>\uE03D; # AVAGRAHA
|
||||
\u0CBE>\uE03E; # VOWEL SIGN AA
|
||||
\u0CBF>\uE03F; # VOWEL SIGN I
|
||||
\u0CC1>\uE041; # VOWEL SIGN U
|
||||
\u0CC2>\uE042; # VOWEL SIGN UU
|
||||
\u0CC3>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0CC4>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u0CC6>\uE046; # VOWEL SIGN E
|
||||
\u0CCC>\uE04C; # VOWEL SIGN AU
|
||||
\u0CCD>\uE04D; # SIGN VIRAMA
|
||||
\u0CD5>\uE055; # LENGTH MARK
|
||||
\u0CD6>\uE056; # AI LENGTH MARK
|
||||
\u0CDE>\uE034; # LETTER LLLA
|
||||
\u0CE0>\uE060; # LETTER VOCALIC RR
|
||||
\u0CE1>\uE061; # LETTER VOCALIC LL
|
||||
\u0CE6>\uE066; # DIGIT ZERO
|
||||
\u0CE7>\uE067; # DIGIT ONE
|
||||
\u0CE8>\uE068; # DIGIT TWO
|
||||
\u0CE9>\uE069; # DIGIT THREE
|
||||
\u0CEA>\uE06A; # DIGIT FOUR
|
||||
\u0CEB>\uE06B; # DIGIT FIVE
|
||||
\u0CEC>\uE06C; # DIGIT SIX
|
||||
\u0CED>\uE06D; # DIGIT SEVEN
|
||||
\u0CEE>\uE06E; # DIGIT EIGHT
|
||||
\u0CEF>\uE06F; # DIGIT NINE
|
||||
|
||||
# eof
|
383
icu4c/source/data/translit/Latin_InterIndic.txt
Normal file
383
icu4c/source/data/translit/Latin_InterIndic.txt
Normal file
|
@ -0,0 +1,383 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Latin-InterIndic
|
||||
#:: NFD;
|
||||
#\u0e00 reserved
|
||||
#consonants
|
||||
$chandrabindu=\ue001;
|
||||
$anusvara=\ue002;
|
||||
$visarga=\ue003;
|
||||
#\u0e004 reserved
|
||||
# w<vowel> represents the stand-alone form
|
||||
$wa=\ue005;
|
||||
$waa=\ue006;
|
||||
$wi=\ue007;
|
||||
$wii=\ue008;
|
||||
$wu=\ue009;
|
||||
$wuu=\ue00a;
|
||||
$wr=\ue00b;
|
||||
$wl=\ue00c;
|
||||
$wce=\ue00d; # LETTER CANDRA E
|
||||
$wse=\ue00e; # LETTER SHORT E
|
||||
$we=\ue00f; # \u090f LETTER E
|
||||
$wai=\ue010;
|
||||
$wco=\ue011; # LETTER CANDRA O
|
||||
$wso=\ue012; # LETTER SHORT O
|
||||
$wo=\ue013; # \u0913 LETTER O
|
||||
$wau=\ue014;
|
||||
$ka=\ue015;
|
||||
$kha=\ue016;
|
||||
$ga=\ue017;
|
||||
$gha=\ue018;
|
||||
$nga=\ue019;
|
||||
$ca=\ue01a;
|
||||
$cha=\ue01b;
|
||||
$ja=\ue01c;
|
||||
$jha=\ue01d;
|
||||
$nya=\ue01e;
|
||||
$tta=\ue01f;
|
||||
$ttha=\ue020;
|
||||
$dda=\ue021;
|
||||
$ddha=\ue022;
|
||||
$nna=\ue023;
|
||||
$ta=\ue024;
|
||||
$tha=\ue025;
|
||||
$da=\ue026;
|
||||
$dha=\ue027;
|
||||
$na=\ue028;
|
||||
$ena=\ue029; #compatibility
|
||||
$pa=\ue02a;
|
||||
$pha=\ue02b;
|
||||
$ba=\ue02c;
|
||||
$bha=\ue02d;
|
||||
$ma=\ue02e;
|
||||
$ya=\ue02f;
|
||||
$ra=\ue030;
|
||||
$rra=\ue031;
|
||||
$la=\ue032;
|
||||
$lla=\ue033;
|
||||
$ela=\ue034; #compatibility
|
||||
$va=\ue035;
|
||||
$vva=\ue081;
|
||||
$sha=\ue036;
|
||||
$ssa=\ue037;
|
||||
$sa=\ue038;
|
||||
$ha=\ue039;
|
||||
#\u093a Reserved
|
||||
#\u093b Reserved
|
||||
$nukta=\ue03c;
|
||||
$avagraha=\ue03d; # SIGN AVAGRAHA
|
||||
# <vowel> represents the dependent form
|
||||
$aa=\ue03e;
|
||||
$i=\ue03f;
|
||||
$ii=\ue040;
|
||||
$u=\ue041;
|
||||
$uu=\ue042;
|
||||
$rh=\ue043;
|
||||
$lh=\ue044;
|
||||
$ce=\ue045; #VOWEL SIGN CANDRA E
|
||||
$se=\ue046; #VOWEL SIGN SHORT E
|
||||
$e=\ue047;
|
||||
$ai=\ue048;
|
||||
$co=\ue049; # VOWEL SIGN CANDRA O
|
||||
$so=\ue04a; # VOWEL SIGN SHORT O
|
||||
$o=\ue04b; # \u094b
|
||||
$au=\ue04c;
|
||||
$virama=\ue04d;
|
||||
# \u094e Reserved
|
||||
# \u094f Reserved
|
||||
$om = \ue050; # OM
|
||||
# \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
# \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
# \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
# \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
$lm = \ue055;# Telugu Length Mark
|
||||
$ailm=\ue056;# AI Length Mark
|
||||
$aulm=\ue057;# AU Length Mark
|
||||
#urdu compatibity forms
|
||||
$uka=\ue058;
|
||||
$ukha=\ue059;
|
||||
$ugha=\ue05a;
|
||||
$ujha=\ue05b;
|
||||
$uddha=\ue05c;
|
||||
$udha=\ue05d;
|
||||
$ufa=\ue05e;
|
||||
$uya=\ue05f;
|
||||
$wrr=\ue060;
|
||||
$wll=\ue061;
|
||||
$rrh=\ue062;
|
||||
$llh=\ue063;
|
||||
$danda=\ue064;
|
||||
$doubleDanda=\ue065;
|
||||
$zero=\ue066; # DIGIT ZERO
|
||||
$one=\ue067; # DIGIT ONE
|
||||
$two=\ue068; # DIGIT TWO
|
||||
$three=\ue069; # DIGIT THREE
|
||||
$four=\ue06a; # DIGIT FOUR
|
||||
$five=\ue06b; # DIGIT FIVE
|
||||
$six=\ue06c; # DIGIT SIX
|
||||
$seven=\ue06d; # DIGIT SEVEN
|
||||
$eight=\ue06e; # DIGIT EIGHT
|
||||
$nine=\ue06f; # DIGIT NINE
|
||||
# For all other scripts
|
||||
$ecp0=\ue070;
|
||||
$ecp1=\ue071;
|
||||
$ecp2=\ue072;
|
||||
$ecp3=\ue073;
|
||||
$ecp4=\ue074;
|
||||
$ecp5=\ue075;
|
||||
$ecp6=\ue076;
|
||||
$ecp7=\ue077;
|
||||
$ecp8=\ue078;
|
||||
$ecp9=\ue079;
|
||||
$ecpA=\ue07a;
|
||||
$ecpB=\ue07b;
|
||||
$ecpC=\ue07c;
|
||||
$ecpD=\ue07d;
|
||||
$ecpE=\ue07e;
|
||||
$ecpF=\ue07f;
|
||||
# \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
|
||||
$depVowelBelow=[\ue041-\ue044];
|
||||
$endThing=[$danda$doubleDanda];
|
||||
# $x was originally called '&'; $z was '%'
|
||||
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
|
||||
$z=[bcdfghjklmnpqrstvwxyz];
|
||||
$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
|
||||
\u0315 > $avagraha;
|
||||
\u0303>$chandrabindu$anusvara;
|
||||
m\u0310>$chandrabindu;
|
||||
h\u0323>$visarga;
|
||||
x>$ka$virama$sa;
|
||||
# convert to independent forms at start of word or syllable:
|
||||
# dependent forms for roundtrip
|
||||
\u0314a\u0304>$aa;
|
||||
\u0314ai>$ai;
|
||||
\u0314au>$au;
|
||||
\u0314ii>$ii;
|
||||
\u0314i\u0304>$ii;
|
||||
\u0314i>$i;
|
||||
\u0314u\u0304>$uu;
|
||||
\u0314u>$u;
|
||||
\u0314r\u0325\u0304>$rrh;
|
||||
\u0314r\u0325>$rh;
|
||||
\u0314l\u0325\u0304>$llh;
|
||||
\u0314lh>$lh;
|
||||
\u0314l\u0325>$lh;
|
||||
\u0314e\u0304>$e;
|
||||
\u0314o\u0304>$o;
|
||||
\u0314a>;
|
||||
\u0314e\u0306>$ce;
|
||||
\u0314o\u0306>$co;
|
||||
\u0314e>$se;
|
||||
\u0314o>$so;
|
||||
|
||||
# preceeded by consonants
|
||||
$consonants{ a\u0304>$aa;
|
||||
$consonants{ ai>$ai;
|
||||
$consonants{ au>$au;
|
||||
$consonants{ ii>$ii;
|
||||
$consonants{ i\u0304>$ii;
|
||||
$consonants{ i>$i;
|
||||
$consonants{ u\u0304>$uu;
|
||||
$consonants{ u>$u;
|
||||
$consonants{ r\u0325\u0304>$rrh;
|
||||
$consonants{ r\u0325a>$rh;
|
||||
$consonants{ r\u0325>$rh;
|
||||
$consonants{ l\u0325\u0304>$llh;
|
||||
$consonants{ lh>$lh;
|
||||
$consonants{ l\u0325>$lh;
|
||||
$consonants{ e\u0304>$e;
|
||||
$consonants{ o\u0304>$o;
|
||||
$consonants{ e\u0306>$ce;
|
||||
$consonants{ o\u0306>$co;
|
||||
$consonants{ e>$se;
|
||||
$consonants{ o>$so;
|
||||
|
||||
# e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
||||
a\u0304>$waa;
|
||||
ai>$wai;
|
||||
au>$wau;
|
||||
i\u0304>$wii;
|
||||
i>$wi;
|
||||
u\u0304>$wuu;
|
||||
u>$wu;
|
||||
r\u0325\u0304>$wrr;
|
||||
r\u0325>$wr;
|
||||
l\u0325\u0304>$wll;
|
||||
lh>$wl;
|
||||
l\u0325>$wl;
|
||||
e\u0304>$we;
|
||||
o\u0304>$wo;
|
||||
a>$wa;
|
||||
e\u0306>$wce;
|
||||
o\u0306>$wco;
|
||||
e>$wse;
|
||||
''om>$om;
|
||||
o>$wso;
|
||||
|
||||
# rules for anusvara
|
||||
n}r\u0325 > $na|$virama;
|
||||
n}l\u0325 > $na|$virama;
|
||||
n}na > $na|$virama;
|
||||
n\u0307}[kg] > $anusvara;
|
||||
n\u0307}n\u0307 > $anusvara;
|
||||
n\u0304}[cj] > $anusvara;
|
||||
n\u0304}n\u0303 > $anusvara;
|
||||
n\u0323}[tdn]\u0323 > $anusvara;
|
||||
n}[tdn] > $anusvara;
|
||||
m}[pbm] > $anusvara;
|
||||
n}[ylvshr] > $anusvara;
|
||||
m\u0307 > $anusvara;
|
||||
|
||||
#urdu compatibility
|
||||
q>$uka|$virama;
|
||||
k\u0331h\u0331>$ukha |$virama;
|
||||
g\u0307> $ugha | $virama;
|
||||
z > $ujha |$virama;
|
||||
f > $ufa|$virama;
|
||||
|
||||
# dev
|
||||
y\u0307>$uya|$virama;
|
||||
l\u0331>$ela|$virama;
|
||||
n\u0331>$ena|$virama;
|
||||
n\u0307>$nga|$virama;
|
||||
n\u0303>$nya|$virama;
|
||||
n\u0323>$nna|$virama;
|
||||
t\u0323h>$ttha|$virama;
|
||||
t\u0323>$tta|$virama;
|
||||
r\u0323h>$udha|$virama;
|
||||
r\u0323>$uddha|$virama;
|
||||
d\u0323h>$ddha|$virama;
|
||||
d\u0323>$dda|$virama;
|
||||
kh>$kha|$virama;
|
||||
k>$ka|$virama;
|
||||
gh>$gha|$virama;
|
||||
g>$ga|$virama;
|
||||
ch>$cha|$virama;
|
||||
c>$ca|$virama;
|
||||
jh>$jha|$virama;
|
||||
j>$ja|$virama;
|
||||
ny>$nya|$virama;
|
||||
tth>$ttha|$virama;
|
||||
ddh>$ddha|$virama;
|
||||
th>$tha|$virama;
|
||||
t>$ta|$virama;
|
||||
dh>$dha|$virama;
|
||||
d>$da|$virama;
|
||||
n>$na|$virama;
|
||||
ph>$pha|$virama;
|
||||
p>$pa|$virama;
|
||||
bh>$bha|$virama;
|
||||
b>$ba|$virama;
|
||||
m>$ma|$virama;
|
||||
y>$ya|$virama;
|
||||
r\u0331>$rra|$virama;
|
||||
r>$ra|$virama;
|
||||
l\u0323>$lla|$virama;
|
||||
l>$la|$virama;
|
||||
v>$va|$virama;
|
||||
w\u0307>$vva|$virama;
|
||||
w>$va|$virama;
|
||||
sh>$sha|$virama;
|
||||
ss>$ssa|$virama;
|
||||
s\u0323>$ssa|$virama;
|
||||
s\u0301>$sha|$virama;
|
||||
s>$sa|$virama;
|
||||
h>$ha|$virama;
|
||||
'.'>$danda;
|
||||
$danda'.'>$doubleDanda;
|
||||
$depVowelAbove{'~'>$anusvara;
|
||||
$depVowelBelow{'~'>$chandrabindu;
|
||||
# convert to dependent forms after consonant with no vowel:
|
||||
# e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
||||
#$virama aa>$aa;
|
||||
$virama a\u0304>$aa;
|
||||
$virama ai>$ai;
|
||||
$virama au>$au;
|
||||
$virama ii>$ii;
|
||||
$virama i\u0304>$ii;
|
||||
$virama i>$i;
|
||||
#$virama uu>$uu;
|
||||
$virama u\u0304>$uu;
|
||||
$virama u>$u;
|
||||
#$virama rrh>$rrh;
|
||||
$virama r\u0325\u0304>$rrh;
|
||||
#$virama rh>$rh;
|
||||
$virama r\u0325a>$rh;
|
||||
$virama r\u0325>$rh;
|
||||
$virama l\u0325\u0304>$llh;
|
||||
$virama lh>$lh;
|
||||
$virama l\u0325>$lh;
|
||||
$virama e\u0304>$e;
|
||||
$virama o\u0304>$o;
|
||||
$virama a>;
|
||||
$virama e\u0306>$ce;
|
||||
$virama o\u0306>$co;
|
||||
$virama e>$se;
|
||||
$virama o>$so;
|
||||
|
||||
|
||||
# otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
||||
#$virama''aa>$waa;
|
||||
$virama''a\u0304>$waa;
|
||||
$virama''ai>$wai;
|
||||
$virama''au>$wau;
|
||||
#$virama''ii>$wii;
|
||||
$virama''i\u0304>$wii;
|
||||
$virama''i>$wi;
|
||||
#$virama''uu>$wuu;
|
||||
$virama''u\u0304>$wuu;
|
||||
$virama''u>$wu;
|
||||
#$virama''rrh>$wrr;
|
||||
$virama''r\u0325\u0304>$wrr;
|
||||
#$virama''rh>$wr;
|
||||
$virama''r\u0325>$wr;
|
||||
$virama''l\u0325\u0304>$wll;
|
||||
#$virama''lh>$wl;
|
||||
$virama''l\u0325>$wl;
|
||||
$virama''e\u0304>$we;
|
||||
$virama''o\u0304>$wo;
|
||||
$virama''a>$wa;
|
||||
$virama''e\u0306>$wce;
|
||||
$virama''o\u0306>$wco;
|
||||
$virama''e>$wse;
|
||||
$virama''o>$wso;
|
||||
# no virama
|
||||
''a\u0304>$waa;
|
||||
''ai>$wai;
|
||||
''au>$wau;
|
||||
''i\u0304>$wii;
|
||||
''i>$wi;
|
||||
''u\u0304>$wuu;
|
||||
''u>$wu;
|
||||
''r\u0325\u0304>$wrr;
|
||||
''r\u0325>$wr;
|
||||
''l\u0325\u0304>$wll;
|
||||
''l\u0325>$wl;
|
||||
''e\u0304>$we;
|
||||
''o\u0304>$wo;
|
||||
''a>$wa;
|
||||
''e\u0306>$wce;
|
||||
''o\u0306>$wco;
|
||||
''e>$wse;
|
||||
''o>$wso;
|
||||
|
||||
$virama } [$z] > $virama;
|
||||
$virama } ' ' > $virama ;
|
||||
$virama}$endThing>;
|
||||
0>$zero;
|
||||
1>$one;
|
||||
2>$two;
|
||||
3>$three;
|
||||
4>$four;
|
||||
5>$five;
|
||||
6>$six;
|
||||
7>$seven;
|
||||
8>$eight;
|
||||
9>$nine;
|
||||
''>;
|
||||
#:: NFC (NFD) ;
|
522
icu4c/source/data/translit/Latin_Jamo.txt
Normal file
522
icu4c/source/data/translit/Latin_Jamo.txt
Normal file
|
@ -0,0 +1,522 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
|
||||
#- the INDEX file. This transliterator is, by itself, not
|
||||
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
|
||||
#- inverses thereof.
|
||||
|
||||
# Transliteration from Latin characters to Korean script is done in
|
||||
# two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
|
||||
# transliteration is done algorithmically following Unicode 3.0
|
||||
# section 3.11. This file implements the Latin to Jamo
|
||||
# transliteration using rules.
|
||||
|
||||
# Jamo occupy the block 1100-11FF. Within this block there are three
|
||||
# groups of characters: initial consonants or choseong (I), medial
|
||||
# vowels or jungseong (M), and trailing consonants or jongseong (F).
|
||||
# Standard Korean syllables are of the form I+M+F*.
|
||||
|
||||
# Section 3.11 describes the use of 'filler' jamo to convert
|
||||
# nonstandard syllables to standard form: the choseong filler 115F and
|
||||
# the junseong filler 1160. In this transliterator, we will not use
|
||||
# 115F or 1160.
|
||||
|
||||
# We will, however, insert two 'null' jamo to make foreign words
|
||||
# conform to Korean syllable structure. These are the null initial
|
||||
# consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
|
||||
# we will use the separator in order to disambiguate strings,
|
||||
# e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
|
||||
|
||||
# We will not use all of the characters in the jamo block. We will
|
||||
# only use the 19 initials, 21 medials, and 27 finals possessing a
|
||||
# jamo short name as defined in section 4.4 of the Unicode book.
|
||||
|
||||
# Rules of thumb. These guidelines provide the basic framework
|
||||
# for the rules. They are phrased in terms of Latin-Jamo transliteration.
|
||||
# The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
|
||||
# just context-free transliteration of jamo to corresponding short names,
|
||||
# with the addition of separators to maintain round-trip integrity
|
||||
# in the context of the Latin-Jamo rules.
|
||||
|
||||
# A sequence of vowels:
|
||||
# - Take the longest sequence you can. If there are too many, or you don't
|
||||
# have a starting consonant, introduce a 110B necessary.
|
||||
|
||||
# A sequence of consonants.
|
||||
# - First join the double consonants: G + G -> GG
|
||||
# - In the remaining list,
|
||||
# -- If there is no preceding vowel, take the first consonant, and insert EU
|
||||
# after it. Continue with the rest of the consonants.
|
||||
# -- If there is one consonant, attach to the following vowel
|
||||
# -- If there are two consonants and a following vowel, attach one to the
|
||||
# preceeding vowel, and one to the following vowel.
|
||||
# -- If there are more than two consonants, join the first two together if you
|
||||
# can: L + G => LG
|
||||
# -- If you still end up with more than 2 consonants, insert EU after the
|
||||
# first one, and continue with the rest of the consonants.
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Variables
|
||||
|
||||
# Some latin consonants or consonant pairs only occur as initials, and
|
||||
# some only as finals, but some occur as both. This makes some jamo
|
||||
# consonants ambiguous when transliterated into latin.
|
||||
# Initial only: IEUNG BB DD JJ R
|
||||
# Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
|
||||
# Initial and Final: B C D G GG H J K M N P S SS T
|
||||
|
||||
$Gi = \u1100;
|
||||
$GGi = \u1101;
|
||||
$Ni = \u1102;
|
||||
$Di = \u1103;
|
||||
$DD = \u1104;
|
||||
$R = \u1105;
|
||||
$Mi = \u1106;
|
||||
$Bi = \u1107;
|
||||
$BB = \u1108;
|
||||
$Si = \u1109;
|
||||
$SSi = \u110A;
|
||||
$IEUNG = \u110B; # null initial, inserted during Latin-Jamo
|
||||
$Ji = \u110C;
|
||||
$JJ = \u110D;
|
||||
$Ci = \u110E;
|
||||
$Ki = \u110F;
|
||||
$Ti = \u1110;
|
||||
$Pi = \u1111;
|
||||
$Hi = \u1112;
|
||||
|
||||
$A = \u1161;
|
||||
$AE = \u1162;
|
||||
$YA = \u1163;
|
||||
$YAE = \u1164;
|
||||
$EO = \u1165;
|
||||
$E = \u1166;
|
||||
$YEO = \u1167;
|
||||
$YE = \u1168;
|
||||
$O = \u1169;
|
||||
$WA = \u116A;
|
||||
$WAE = \u116B;
|
||||
$OE = \u116C;
|
||||
$YO = \u116D;
|
||||
$U = \u116E;
|
||||
$WEO = \u116F;
|
||||
$WE = \u1170;
|
||||
$WI = \u1171;
|
||||
$YU = \u1172;
|
||||
$EU = \u1173; # null medial, inserted during Latin-Jamo
|
||||
$YI = \u1174;
|
||||
$I = \u1175;
|
||||
|
||||
$Gf = \u11A8;
|
||||
$GGf = \u11A9;
|
||||
$GS = \u11AA;
|
||||
$Nf = \u11AB;
|
||||
$NJ = \u11AC;
|
||||
$NH = \u11AD;
|
||||
$Df = \u11AE;
|
||||
$L = \u11AF;
|
||||
$LG = \u11B0;
|
||||
$LM = \u11B1;
|
||||
$LB = \u11B2;
|
||||
$LS = \u11B3;
|
||||
$LT = \u11B4;
|
||||
$LP = \u11B5;
|
||||
$LH = \u11B6;
|
||||
$Mf = \u11B7;
|
||||
$Bf = \u11B8;
|
||||
$BS = \u11B9;
|
||||
$Sf = \u11BA;
|
||||
$SSf = \u11BB;
|
||||
$NG = \u11BC;
|
||||
$Jf = \u11BD;
|
||||
$Cf = \u11BE;
|
||||
$Kf = \u11BF;
|
||||
$Tf = \u11C0;
|
||||
$Pf = \u11C1;
|
||||
$Hf = \u11C2;
|
||||
|
||||
$jamoInitial = [\u1100-\u1112];
|
||||
|
||||
$jamoMedial = [\u1161-\u1175];
|
||||
|
||||
$latinInitial = [bcdghjkmnprst];
|
||||
|
||||
# Any character in the latin transliteration of a medial
|
||||
$latinMedial = [aeiouwy];
|
||||
|
||||
# The last character of the latin transliteration of a medial
|
||||
$latinMedialEnd = [aeiou];
|
||||
|
||||
# Disambiguation separator
|
||||
$sep = \';
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Jamo-Latin
|
||||
|
||||
# Jamo to latin is relatively simple, since it is the latin that is
|
||||
# ambiguous. Most rules are straightforward, and we encode them below
|
||||
# as simple add-on back rule, e.g.:
|
||||
|
||||
# $jamoMedial {bs} > $BS;
|
||||
|
||||
# becomes
|
||||
|
||||
# $jamoMedial {bs} <> $BS;
|
||||
|
||||
# Furthermore, we don't care about the ordering for Jamo-Latin because
|
||||
# we are going from single characters, so we can very easily piggyback
|
||||
# on the Latin-Jamo.
|
||||
|
||||
# The main issue with Jamo-Latin is when to insert separators.
|
||||
# Separators are inserted to obtain correct round trip behavior. For
|
||||
# example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
|
||||
# would then round trip to Ki A GGi E. To prevent this, we insert a
|
||||
# separator: "kag-ge". IMPORTANT: The need for separators depends
|
||||
# very specifically on the behavior of the Latin-Jamo rules. A change
|
||||
# in the Latin-Jamo behavior can completely change the way the
|
||||
# separator insertion must be done.
|
||||
|
||||
# First try to preserve actual separators in the jamo text by doubling
|
||||
# them. This fixes problems like:
|
||||
# (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
|
||||
# => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
|
||||
# -- if we don't care about losing separators in the jamo, we can delete
|
||||
# this rule.
|
||||
|
||||
$sep $sep <> $sep;
|
||||
|
||||
# Triple consonants. For three consonants "axxx" we insert a
|
||||
# separator between the first and second "x" if XXf, Xf, and Xi all
|
||||
# exist, and we have A Xf XXi. This prevents the reverse
|
||||
# transliteration to A XXf Xi.
|
||||
|
||||
$sep < $latinMedialEnd g {} $GGi;
|
||||
$sep < $latinMedialEnd s {} $SSi;
|
||||
|
||||
# For vowels the rule is similar. If there is a vowel "ae" such that
|
||||
# "a" by itself and "e" by itself are vowels, then we want to map A E
|
||||
# to "a-e" so as not to round trip to AE. However, in the text Ki EO
|
||||
# IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
|
||||
# vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
|
||||
# tested. NOTE: These rules used to have a left context of
|
||||
# $latinInitial instead of [^$latinMedial]. The problem with this is
|
||||
# sequences where an initial IEUNG is transliterated away:
|
||||
# (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
|
||||
|
||||
$sep < [^$latinMedial] [y w] e {} [$O $OE];
|
||||
$sep < [^$latinMedial] e {} [$O $OE $U];
|
||||
$sep < [^$latinMedial] [o a] {} [$E $EO $EU];
|
||||
$sep < [^$latinMedial] [w y] a {} [$E $EO $EU];
|
||||
|
||||
# Similar to the above, but with an intervening $IEUNG.
|
||||
|
||||
$sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];
|
||||
$sep < [^$latinMedial] e {} $IEUNG [$O $OE $U];
|
||||
$sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];
|
||||
$sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];
|
||||
|
||||
# Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
|
||||
# where Xi also exists, must be transliterated as "ax-e" to prevent
|
||||
# the round trip conversion to A Xi E.
|
||||
|
||||
$sep < $latinMedialEnd b {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd c {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd d {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd h {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd j {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd k {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd m {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd p {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd t {} $IEUNG $jamoMedial;
|
||||
|
||||
# Double finals followed by IEUNG. Similar to the single finals
|
||||
# followed by IEUNG. Any latin consonant pair X Y, between medials,
|
||||
# that we would split by Latin-Jamo, we must handle when it occurs as
|
||||
# part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
|
||||
# E.
|
||||
|
||||
$sep < $latinMedialEnd b s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd g g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd g s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l b {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l h {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l m {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l p {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l t {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n h {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n j {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd s s {} $IEUNG $jamoMedial;
|
||||
|
||||
# Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
|
||||
# we transliterate as "ax-xe" to prevent round trip transliteration as
|
||||
# A XXi E.
|
||||
|
||||
$sep < $latinMedialEnd b {} $Bi $jamoMedial;
|
||||
$sep < $latinMedialEnd d {} $Di $jamoMedial;
|
||||
$sep < $latinMedialEnd j {} $Ji $jamoMedial;
|
||||
$sep < $latinMedialEnd g {} $Gi $jamoMedial;
|
||||
$sep < $latinMedialEnd s {} $Si $jamoMedial;
|
||||
|
||||
# XYY. This corresponds to the XYY rule in Latin-Jamo. By default
|
||||
# Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
|
||||
# "xyy" forms that correspond to XYf Yi must be transliterated as
|
||||
# "xy-y".
|
||||
|
||||
$sep < $latinMedialEnd b s {} [$Si $SSi];
|
||||
$sep < $latinMedialEnd g s {} [$Si $SSi];
|
||||
$sep < $latinMedialEnd l b {} [$Bi $BB];
|
||||
$sep < $latinMedialEnd l g {} [$Gi $GGi];
|
||||
$sep < $latinMedialEnd l s {} [$Si $SSi];
|
||||
$sep < $latinMedialEnd n g {} [$Gi $GGi];
|
||||
$sep < $latinMedialEnd n j {} [$Ji $JJ];
|
||||
|
||||
# Deletion of IEUNG is handled below.
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Latin-Jamo
|
||||
|
||||
# [Basic, context-free Jamo-Latin rules are embedded here too. See
|
||||
# above.]
|
||||
|
||||
# Split digraphs: Text of the form 'axye', where 'xy' is a final
|
||||
# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
|
||||
# 'e' are medials, we want to transliterate this as A Xf Yi E rather
|
||||
# than A XYf IEUNG E. We do NOT include text of the form "axxe",
|
||||
# since that is handled differently below. These rules are generated
|
||||
# programmatically from the jamo data.
|
||||
|
||||
$jamoMedial {b s} $latinMedial > $Bf $Si;
|
||||
$jamoMedial {g s} $latinMedial > $Gf $Si;
|
||||
$jamoMedial {l b} $latinMedial > $L $Bi;
|
||||
$jamoMedial {l g} $latinMedial > $L $Gi;
|
||||
$jamoMedial {l h} $latinMedial > $L $Hi;
|
||||
$jamoMedial {l m} $latinMedial > $L $Mi;
|
||||
$jamoMedial {l p} $latinMedial > $L $Pi;
|
||||
$jamoMedial {l s} $latinMedial > $L $Si;
|
||||
$jamoMedial {l t} $latinMedial > $L $Ti;
|
||||
$jamoMedial {n g} $latinMedial > $Nf $Gi;
|
||||
$jamoMedial {n h} $latinMedial > $Nf $Hi;
|
||||
$jamoMedial {n j} $latinMedial > $Nf $Ji;
|
||||
|
||||
# Single consonants are initials: Text of the form 'axe', where 'x'
|
||||
# can be an initial or a final, and 'a' and 'e' are medials, we want
|
||||
# to transliterate as A Xi E rather than A Xf IEUNG E.
|
||||
|
||||
$jamoMedial {b} $latinMedial > $Bi;
|
||||
$jamoMedial {c} $latinMedial > $Ci;
|
||||
$jamoMedial {d} $latinMedial > $Di;
|
||||
$jamoMedial {g} $latinMedial > $Gi;
|
||||
$jamoMedial {h} $latinMedial > $Hi;
|
||||
$jamoMedial {j} $latinMedial > $Ji;
|
||||
$jamoMedial {k} $latinMedial > $Ki;
|
||||
$jamoMedial {m} $latinMedial > $Mi;
|
||||
$jamoMedial {n} $latinMedial > $Ni;
|
||||
$jamoMedial {p} $latinMedial > $Pi;
|
||||
$jamoMedial {s} $latinMedial > $Si;
|
||||
$jamoMedial {t} $latinMedial > $Ti;
|
||||
|
||||
# Doubled initials. The sequence "axxe", where XX exists as an initial
|
||||
# (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
|
||||
# to transliterate as A XXi E, rather than split to A Xf Xi E.
|
||||
|
||||
$jamoMedial {b b} $latinMedial > $BB;
|
||||
$jamoMedial {d d} $latinMedial > $DD;
|
||||
$jamoMedial {j j} $latinMedial > $JJ;
|
||||
$jamoMedial {g g} $latinMedial > $GGi;
|
||||
$jamoMedial {s s} $latinMedial > $SSi;
|
||||
|
||||
# XYY. Because doubled consonants bind more strongly than XY
|
||||
# consonants, we must handle the sequence "axyy" specially. Here XYf
|
||||
# and YYi must exist. In these cases, we map to Xf YYi rather than
|
||||
# XYf.
|
||||
|
||||
$jamoMedial {b} s s > $Bf;
|
||||
$jamoMedial {g} s s > $Gf;
|
||||
$jamoMedial {l} b b > $L;
|
||||
$jamoMedial {l} g g > $L;
|
||||
$jamoMedial {l} s s > $L;
|
||||
$jamoMedial {n} g g > $Nf;
|
||||
$jamoMedial {n} j j > $Nf;
|
||||
|
||||
# Finals: Attach consonant with preceding medial to preceding medial.
|
||||
# Do this BEFORE mapping consonants to initials. Longer keys must
|
||||
# precede shorter keys that they start with, e.g., the rule for 'bs'
|
||||
# must precede 'b'.
|
||||
|
||||
# [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
|
||||
# block for Jamo-Latin.]
|
||||
|
||||
$jamoMedial {bs} <> $BS;
|
||||
$jamoMedial {b} <> $Bf;
|
||||
$jamoMedial {c} <> $Cf;
|
||||
$jamoMedial {d} <> $Df;
|
||||
$jamoMedial {gg} <> $GGf;
|
||||
$jamoMedial {gs} <> $GS;
|
||||
$jamoMedial {g} <> $Gf;
|
||||
$jamoMedial {h} <> $Hf;
|
||||
$jamoMedial {j} <> $Jf;
|
||||
$jamoMedial {k} <> $Kf;
|
||||
$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;
|
||||
$jamoMedial {lh} <> $LH;
|
||||
$jamoMedial {lm} <> $LM;
|
||||
$jamoMedial {lp} <> $LP;
|
||||
$jamoMedial {ls} <> $LS;
|
||||
$jamoMedial {lt} <> $LT;
|
||||
$jamoMedial {l} <> $L;
|
||||
$jamoMedial {m} <> $Mf;
|
||||
$jamoMedial {ng} <> $NG;
|
||||
$jamoMedial {nh} <> $NH;
|
||||
$jamoMedial {nj} <> $NJ;
|
||||
$jamoMedial {n} <> $Nf;
|
||||
$jamoMedial {p} <> $Pf;
|
||||
$jamoMedial {ss} <> $SSf;
|
||||
$jamoMedial {s} <> $Sf;
|
||||
$jamoMedial {t} <> $Tf;
|
||||
|
||||
# Initials: Attach single consonant to following medial. Do this
|
||||
# AFTER mapping finals. Longer keys must precede shorter keys that
|
||||
# they start with, e.g., the rule for 'gg' must precede 'g'.
|
||||
|
||||
# [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
|
||||
# this block for Jamo-Latin.]
|
||||
|
||||
{gg} $latinMedial <> $GGi;
|
||||
{g} $latinMedial <> $Gi;
|
||||
{n} $latinMedial <> $Ni;
|
||||
{dd} $latinMedial <> $DD;
|
||||
{d} $latinMedial <> $Di;
|
||||
{r} $latinMedial <> $R;
|
||||
{m} $latinMedial <> $Mi;
|
||||
{bb} $latinMedial <> $BB;
|
||||
{b} $latinMedial <> $Bi;
|
||||
{ss} $latinMedial <> $SSi;
|
||||
{s} $latinMedial <> $Si;
|
||||
{jj} $latinMedial <> $JJ;
|
||||
{j} $latinMedial <> $Ji;
|
||||
{c} $latinMedial <> $Ci;
|
||||
{k} $latinMedial <> $Ki;
|
||||
{t} $latinMedial <> $Ti;
|
||||
{p} $latinMedial <> $Pi;
|
||||
{h} $latinMedial <> $Hi;
|
||||
|
||||
# 'r' in final position. Because of the equivalency of the 'l' and
|
||||
# 'r' jamo (the glyphs are the same), we try to provide the same
|
||||
# equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
|
||||
# below. If we see an 'r' in an apparent final position, treat it
|
||||
# like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
|
||||
# Instead, we want Ki A L Ki A.
|
||||
|
||||
$jamoMedial {r} $latinInitial > | l;
|
||||
|
||||
# Initial + Final: If we match the next rule, we have initial then
|
||||
# final consonant with no intervening medial. We insert the null
|
||||
# vowel BEFORE it to create a well-formed syllable. (In the next rule
|
||||
# we insert a null vowel AFTER an anomalous initial.)
|
||||
|
||||
$jamoInitial {} [bcdghjklmnpst] > $EU;
|
||||
|
||||
# Initial + X: This block matches an initial consonant not followed by
|
||||
# a medial. We insert the null vowel after it. We handle double
|
||||
# initials explicitly here; for single initial consonants we insert EU
|
||||
# (as Latin) after them and let standard rules do the rest.
|
||||
|
||||
# BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
gg > $GGi $EU;
|
||||
dd > $DD $EU;
|
||||
bb > $BB $EU;
|
||||
ss > $SSi $EU;
|
||||
jj > $JJ $EU;
|
||||
|
||||
([bcdghjkmnprst]) > | $1 eu;
|
||||
|
||||
# X + Final: Finally we have to deal with a consonant that can only be
|
||||
# interpreted as a final (not an initial) and which is preceded
|
||||
# neither by an initial nor a medial. It is the start of the
|
||||
# syllable, but cannot be. Most of these will already be handled by
|
||||
# the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
|
||||
# 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
|
||||
# For this isolated case, we could add a null initial and medial,
|
||||
# which would give "la" => IEUNG EU L IEUNG A, for example. A more
|
||||
# economical solution is to transliterate isolated "l" (that is,
|
||||
# initial "l") to "r". (Other similar conversions of consonants that
|
||||
# occur neither as initials nor as finals are handled below.)
|
||||
|
||||
l > | r;
|
||||
|
||||
# Medials. If a medial is preceded by an initial, then we proceed
|
||||
# normally. As usual, longer keys must precede shorter ones.
|
||||
|
||||
# [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
|
||||
# this block for Jamo-Latin.]
|
||||
|
||||
$jamoInitial {ae} <> $AE;
|
||||
$jamoInitial {a} <> $A;
|
||||
$jamoInitial {eo} <> $EO;
|
||||
$jamoInitial {eu} <> $EU;
|
||||
$jamoInitial {e} <> $E;
|
||||
$jamoInitial {i} <> $I;
|
||||
$jamoInitial {oe} <> $OE;
|
||||
$jamoInitial {o} <> $O;
|
||||
$jamoInitial {u} <> $U;
|
||||
$jamoInitial {wae} <> $WAE;
|
||||
$jamoInitial {wa} <> $WA;
|
||||
$jamoInitial {weo} <> $WEO;
|
||||
$jamoInitial {we} <> $WE;
|
||||
$jamoInitial {wi} <> $WI;
|
||||
$jamoInitial {yae} <> $YAE;
|
||||
$jamoInitial {ya} <> $YA;
|
||||
$jamoInitial {yeo} <> $YEO;
|
||||
$jamoInitial {ye} <> $YE;
|
||||
$jamoInitial {yi} <> $YI;
|
||||
$jamoInitial {yo} <> $YO;
|
||||
$jamoInitial {yu} <> $YU;
|
||||
|
||||
# We may see an anomalous isolated 'w' or 'y'. In that case, we
|
||||
# interpret it as 'wi' and 'yu', respectively.
|
||||
|
||||
# BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
$jamoInitial {w} > | wi;
|
||||
$jamoInitial {y} > | yu;
|
||||
|
||||
# Otherwise, insert a null consonant IEUNG before the medial (which is
|
||||
# still an untransliterated latin vowel).
|
||||
|
||||
($latinMedial) > $IEUNG | $1;
|
||||
|
||||
# Convert non-jamo latin consonants to equivalents. These occur as
|
||||
# neither initials nor finals in jamo. 'l' occurs as a final, but not
|
||||
# an initial; it is handled above. The following letters (left hand
|
||||
# side) will never be output by Jamo-Latin.
|
||||
|
||||
f > | p;
|
||||
q > | k;
|
||||
v > | b;
|
||||
x > | ks;
|
||||
z > | s;
|
||||
|
||||
# Delete separators (Latin-Jamo).
|
||||
|
||||
$sep > ;
|
||||
|
||||
# Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
|
||||
# since these may also occur in text.
|
||||
|
||||
< $IEUNG;
|
||||
|
||||
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
|
||||
#- the INDEX file. This transliterator is, by itself, not
|
||||
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
|
||||
#- inverses thereof.
|
||||
|
||||
# eof
|
495
icu4c/source/data/translit/Latin_Katakana.txt
Normal file
495
icu4c/source/data/translit/Latin_Katakana.txt
Normal file
|
@ -0,0 +1,495 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars
|
||||
#:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ;
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Katakana
|
||||
### WARNING -- must add width filter, both here and below!!! ###
|
||||
:: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;
|
||||
|
||||
:: [:Latin:] fullwidth-halfwidth ();
|
||||
:: NFD (NFC);
|
||||
:: Lower (); # whenever transliterating from cased to uncased script, include this
|
||||
# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
|
||||
|
||||
# Uses modified Hepburn. Small changes to make unambiguous.
|
||||
|
||||
# | Kunrei-shiki: Hepburn/MHepburn
|
||||
# | ------------------------------
|
||||
# | si: shi
|
||||
# | si ~ya: sha
|
||||
# | si ~yu: shu
|
||||
# | si ~yo: sho
|
||||
# | zi: ji
|
||||
# | zi ~ya: ja
|
||||
# | zi ~yu: ju
|
||||
# | zi ~yo: jo
|
||||
# | ti: chi
|
||||
# | ti ~ya: cha
|
||||
# | ti ~yu: chu
|
||||
# | ti ~yu: cho
|
||||
# | tu: tsu
|
||||
# | di: ji/dji
|
||||
# | du: zu/dzu
|
||||
# | hu: fu
|
||||
|
||||
# | For foreign words:
|
||||
# | -----------------
|
||||
# | se ~i si
|
||||
# | si ~e she
|
||||
# |
|
||||
# | ze ~i zi
|
||||
# | zi ~e je
|
||||
# |
|
||||
# | te ~i ti
|
||||
# | ti ~e che
|
||||
# | te ~u tu
|
||||
# |
|
||||
# | de ~i di
|
||||
# | de ~u du
|
||||
# | de ~i di
|
||||
# |
|
||||
# | he ~u: hu
|
||||
# | hu ~a fa
|
||||
# | hu ~i fi
|
||||
# | hu ~e he
|
||||
# | hu ~o ho
|
||||
|
||||
# Most small forms are generated, but if necessary
|
||||
# explicit small forms are given with ~a, ~ya, etc.
|
||||
|
||||
#------------------------------------------------------
|
||||
# Variables
|
||||
|
||||
$vowel = [aeiou] ;
|
||||
$consonant = [bcdfghjklmnpqrstvwxyz] ;
|
||||
$macron = \u0304 ;
|
||||
|
||||
# Variables used for doubled-consonants with tsu
|
||||
|
||||
$kana = [\u3041-\u3094] ;
|
||||
|
||||
$voice = [\u3099\u309B];
|
||||
$semivoice = [\u309A\u309C];
|
||||
|
||||
$k_start = [カキクケコかきくけこ] ;
|
||||
|
||||
$s_start = [サシスセソさしすせそ] ;
|
||||
|
||||
$j_start = [シし] $voice ;
|
||||
|
||||
$t_start = [タチツテトたちつてと] ;
|
||||
|
||||
$n_start = [ナニヌネノンなにぬねの] ;
|
||||
|
||||
$h_start = [ハヒヘホはひへほ] ;
|
||||
$f_start = [フふ] ;
|
||||
|
||||
$m_start = [マミムメモまみむめも] ;
|
||||
|
||||
$y_start = [ヤユヨやゆよ] ;
|
||||
|
||||
$r_start = [ラリルレロらりるれろ] ;
|
||||
|
||||
$w_start = [ワヰヱヲわゐゑを] ;
|
||||
|
||||
$v_start = [ワヰヱヲ]゙ ;
|
||||
|
||||
# if ン is followed by $n_quoter, then it needs an
|
||||
# apostrophe after its romaji form to disambiguate it.
|
||||
# e.g., ン ア ! = ナ, so represent as "n'a", not "na".
|
||||
|
||||
$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;
|
||||
|
||||
$small_y = [ャィュェョ] ;
|
||||
|
||||
$iteration = \u309D ;
|
||||
|
||||
#------------------------------------------------------
|
||||
# katakana rules
|
||||
|
||||
# Punctuation
|
||||
|
||||
'.' <> 。;
|
||||
',' <> 、;
|
||||
# ' ' } [a-z] > ; # delete spaces before latin
|
||||
# ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
|
||||
|
||||
# Iteration Mark
|
||||
# Copy previous letter & marks
|
||||
|
||||
# TODO
|
||||
# | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
|
||||
|
||||
# Specials for katakana -- not shared with hiragana
|
||||
|
||||
va <> ヷ ;
|
||||
vi <> ヸ ;
|
||||
ve <> ヹ ;
|
||||
vo <> ヺ ;
|
||||
'~ka' <> ヵ ;
|
||||
'~ke' <> ヶ ;
|
||||
|
||||
# ~~~ begin shared rules ~~~
|
||||
|
||||
#special
|
||||
|
||||
ya < '~'ャ;
|
||||
yi < '~'ィ ;
|
||||
yu < '~'ュ;
|
||||
ye < '~'ェ;
|
||||
yo < '~'ョ;
|
||||
|
||||
#normal
|
||||
|
||||
a <> ア ;
|
||||
|
||||
b | '~' < ヒ ゙} $small_y ;
|
||||
by } $vowel > ビ | '~y' ;
|
||||
|
||||
ba <> バ ;
|
||||
bi <> ビ ;
|
||||
bu <> ブ ;
|
||||
be <> ベ ;
|
||||
bo <> ボ ;
|
||||
|
||||
c } i > | s ;
|
||||
c } e > | s ;
|
||||
|
||||
da <> ダ ;
|
||||
di <> ディ ;
|
||||
du <> デゥ ;
|
||||
de <> デ ;
|
||||
do <> ド ;
|
||||
dzu <> ヅ ;
|
||||
dja < ヂャ ;
|
||||
dji'~i' < ヂィ ; # liu
|
||||
dju < ヂュ ;
|
||||
dje < ヂェ ;
|
||||
djo < ヂョ ;
|
||||
dji <> ヂ ;
|
||||
dj } $vowel > ヂ | '~y' ;
|
||||
|
||||
# TODO: QUESTION: use ĵĴżŻ instead of dj, dz
|
||||
|
||||
cha < チャ ;
|
||||
chi'~i' < チィ ; # liu
|
||||
chu < チュ ;
|
||||
che < チェ ;
|
||||
cho < チョ ;
|
||||
chi <> チ ;
|
||||
ch } $vowel > チ | '~y' ;
|
||||
|
||||
e <> エ ;
|
||||
|
||||
g | '~' < ギ} $small_y ;
|
||||
gy } $vowel > ギ | '~y' ;
|
||||
|
||||
ga <> ガ ;
|
||||
gi <> ギ ;
|
||||
gu <> グ ;
|
||||
ge <> ゲ ;
|
||||
go <> ゴ ;
|
||||
|
||||
i <> イ ;
|
||||
|
||||
# j } $vowel > ジ | '~y' ;
|
||||
|
||||
ja <> ジャ ;
|
||||
ji'~i' < ジィ ; # liu
|
||||
ju <> ジュ ;
|
||||
je <> ジェ ;
|
||||
jo <> ジョ ;
|
||||
ji <> ジ ;
|
||||
|
||||
k | '~' < キ} $small_y ;
|
||||
ky } $vowel > キ | '~y' ;
|
||||
|
||||
ka <> カ ;
|
||||
ki <> キ ;
|
||||
ku <> ク ;
|
||||
ke <> ケ ;
|
||||
ko <> コ ;
|
||||
|
||||
m | '~' < ミ} $small_y ;
|
||||
my } $vowel > ミ | '~y' ;
|
||||
|
||||
ma <> マ ;
|
||||
mi <> ミ ;
|
||||
mu <> ム ;
|
||||
me <> メ ;
|
||||
mo <> モ ;
|
||||
|
||||
m } [pbfv] > ン ;
|
||||
|
||||
n | '~' < ニ } $small_y ;
|
||||
ny } $vowel > ニ | '~y' ;
|
||||
|
||||
na <> ナ ;
|
||||
ni <> ニ ;
|
||||
nu <> ヌ ;
|
||||
ne <> ネ ;
|
||||
no <> ノ ;
|
||||
|
||||
o <> オ ;
|
||||
|
||||
p | '~' < ピ } $small_y ;
|
||||
py } $vowel > ピ | '~y' ;
|
||||
|
||||
pa <> パ ;
|
||||
pi <> ピ ;
|
||||
pu <> プ ;
|
||||
pe <> ペ ;
|
||||
po <> ポ ;
|
||||
|
||||
h | '~' < ヒ } $small_y ;
|
||||
hy } $vowel > ヒ | '~y' ;
|
||||
|
||||
ha <> ハ ;
|
||||
hi <> ヒ ;
|
||||
hu <> ヘゥ ;
|
||||
he <> ヘ ;
|
||||
ho <> ホ ;
|
||||
|
||||
# f | '~' < フ } $small_y ;
|
||||
# f } $vowel > フ | '~' ;
|
||||
|
||||
fa <> ファ ;
|
||||
fi <> フィ ;
|
||||
fe <> フェ ;
|
||||
fo <> フォ ;
|
||||
fu <> フ ;
|
||||
|
||||
r | '~' < リ } $small_y ;
|
||||
ry } $vowel > リ | '~y' ;
|
||||
|
||||
ra <> ラ ;
|
||||
ri <> リ ;
|
||||
ru <> ル ;
|
||||
re <> レ ;
|
||||
ro <> ロ ;
|
||||
|
||||
za <> ザ ;
|
||||
zi <> ゼィ ;
|
||||
zu <> ズ ;
|
||||
ze <> ゼ ;
|
||||
zo <> ゾ ;
|
||||
|
||||
sa <> サ ;
|
||||
si <> セィ ;
|
||||
su <> ス ;
|
||||
se <> セ ;
|
||||
so <> ソ ;
|
||||
|
||||
sha < シャ ;
|
||||
shi'~i' < シィ ; # liu
|
||||
shu < シュ ;
|
||||
she < シェ ;
|
||||
sho < ショ ;
|
||||
shi <> シ ;
|
||||
sh } $vowel > シ | '~y' ;
|
||||
|
||||
ta <> タ ;
|
||||
ti <> ティ ;
|
||||
tu <> テゥ ;
|
||||
te <> テ ;
|
||||
to <> ト ;
|
||||
|
||||
tsu <> ツ ;
|
||||
|
||||
# v } $vowel > ヴ | '~' ;
|
||||
|
||||
#'v~a' < ヴァ ; # liu
|
||||
#'v~i' < ヴィ ; # liu
|
||||
#'v~e' < ヴェ ; # liu
|
||||
#'v~o' < ヴォ ; # liu
|
||||
vu <> ヴ ;
|
||||
|
||||
u <> ウ ;
|
||||
|
||||
# w } $vowel > ウ | '~' ;
|
||||
|
||||
wa <> ワ ;
|
||||
wi <> ヰ ;
|
||||
wu > ウ ;
|
||||
we <> ヱ ;
|
||||
wo <> ヲ ;
|
||||
|
||||
ya <> ヤ ;
|
||||
yi > イ ;
|
||||
yu <> ユ ;
|
||||
ye > エ ;
|
||||
yo <> ヨ ;
|
||||
|
||||
# double consonants
|
||||
|
||||
#specials
|
||||
s } sh > ッ ;
|
||||
t } ch > ッ ;
|
||||
|
||||
#voiced
|
||||
|
||||
j } j <> ッ } $j_start ;
|
||||
b } b <> ッ } [$h_start$f_start] $voice;
|
||||
d } d <> ッ } $t_start $voice;
|
||||
g } g <> ッ } $k_start $voice;
|
||||
p } p <> ッ } [$h_start$f_start] $semivoice;
|
||||
# v } v <> ッ } [ワヰウヱヲう] $voice ;
|
||||
z } z <> ッ } $s_start $voice;
|
||||
v } v <> ッ } $v_start;
|
||||
|
||||
# normal
|
||||
|
||||
k } k <> ッ } $k_start ;
|
||||
m } m <> ッ } $m_start ;
|
||||
n } n <> ッ } $n_start ;
|
||||
h } h <> ッ } $h_start ;
|
||||
f } f <> ッ } $f_start ;
|
||||
r } r <> ッ } $r_start ;
|
||||
t } t <> ッ } $t_start ;
|
||||
s } s <> ッ } $s_start ;
|
||||
|
||||
w } w <> ッ } $w_start;
|
||||
y } y <> ッ } $y_start;
|
||||
|
||||
# completeness
|
||||
x } x > ッ ;
|
||||
c } k > ッ ;
|
||||
c } c > ッ ;
|
||||
c } q > ッ ;
|
||||
l } l > ッ ;
|
||||
q } q > ッ ;
|
||||
# y } y > ッ ;
|
||||
# w } w > ッ ;
|
||||
|
||||
# prolonged vowel mark. this indicates a doubling of
|
||||
# the preceding vowel sound
|
||||
|
||||
#a < a { ー ; # liu
|
||||
#e < e { ー ; # liu
|
||||
#i < i { ー ; # liu
|
||||
#o < o { ー ; # liu
|
||||
#u < u { ー ; # liu
|
||||
|
||||
$macron <> ー ;
|
||||
|
||||
# small forms
|
||||
|
||||
'~a' <> ァ ;
|
||||
'~i' <> ィ ;
|
||||
'~u' <> ゥ ;
|
||||
'~e' <> ェ ;
|
||||
'~o' <> ォ ;
|
||||
'~tsu' <> ッ ;
|
||||
'~wa' <> ヮ ;
|
||||
'~ya' <> ャ ;
|
||||
'~yi' > ィ ;
|
||||
'~yu' <> ュ ;
|
||||
'~ye' > ェ ;
|
||||
'~yo' <> ョ ;
|
||||
|
||||
# iteration marks
|
||||
# TODO: make more accurate
|
||||
|
||||
j $1 < sh (y* $vowel) {ヽ$voice ;
|
||||
dj $1 < ch (y* $vowel) {ヽ$voice ;
|
||||
dz $1 < ts (y* $vowel) {ヽ$voice ;
|
||||
|
||||
g $1 < k (y* $vowel) {ヽ$voice ;
|
||||
z $1 < s (y* $vowel) {ヽ$voice ;
|
||||
d $1 < t (y* $vowel) {ヽ$voice ;
|
||||
h $1 < b (y* $vowel) {ヽ$voice ;
|
||||
v $1 < w (y* $vowel) {ヽ$voice ;
|
||||
|
||||
sh $1 < sh (y* $vowel) {ヽ$voice ;
|
||||
j $1 < j (y* $vowel) {ヽ$voice ;
|
||||
ch $1 < ch (y* $vowel) {ヽ$voice ;
|
||||
dj $1 < dj(y* $vowel) {ヽ$voice ;
|
||||
ts $1 < ts (y* $vowel) {ヽ$voice ;
|
||||
dz $1 < dz (y* $vowel) {ヽ$voice ;
|
||||
|
||||
$1 < ($consonant y* $vowel) {ヽ$voice? ;
|
||||
$1 < (.) {ヽ $voice? ; # otherwise repeat last character
|
||||
< ヽ $voice? ; # delete if no characters found
|
||||
|
||||
# h- rule: lengthens vowel if not followed by a vowel
|
||||
|
||||
[aeiou] } h > ー ;
|
||||
|
||||
# one-way latin- > kana rules. these do not occur in
|
||||
# well-formed romaji representing actual japanese text.
|
||||
# their purpose is to make all romaji map to kana of
|
||||
# some sort.
|
||||
|
||||
# the following are not really necessary, but produce
|
||||
# slightly more natural results.
|
||||
|
||||
cy > セィ ;
|
||||
dy > ディ ;
|
||||
hy > ヒ ;
|
||||
sy > セィ ;
|
||||
ty > ティ ;
|
||||
zy > ゼィ ;
|
||||
|
||||
h > ヘ ;
|
||||
|
||||
# isolated consonants listed here so as not to mask
|
||||
# longer rules above.
|
||||
|
||||
ch > チ;
|
||||
sh > シ ;
|
||||
dz > ヅ ;
|
||||
dj > ヂ;
|
||||
|
||||
b > ブ ;
|
||||
d > デ ;
|
||||
g > グ ;
|
||||
k > ク ;
|
||||
m > ム ;
|
||||
n'' < ン } $n_quoter ;
|
||||
n <> ン ;
|
||||
p > プ ;
|
||||
r > ル ;
|
||||
s > ス ;
|
||||
t > テ ;
|
||||
y > イ ;
|
||||
z > ズ ;
|
||||
v > ヴ ;
|
||||
|
||||
f > フ;
|
||||
j > ジ;
|
||||
w > ウ;
|
||||
|
||||
ß > | ss ;
|
||||
æ > | e ;
|
||||
ð > | d ;
|
||||
ø > | u ;
|
||||
þ > | th ;
|
||||
|
||||
# simple substitutions using backup
|
||||
|
||||
c > | k ;
|
||||
l > | r ;
|
||||
q > | k ;
|
||||
x > | ks ;
|
||||
|
||||
# ~~~ END shared rules ~~~
|
||||
|
||||
#------------------------------------------------------
|
||||
# Final cleanup
|
||||
|
||||
'~' > ; # delete stray tildes between letters
|
||||
[:Katakana:] { '' } [:Latin:] > ; # delete stray quotes between letters
|
||||
# [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
|
||||
|
||||
:: NFC (NFD) ;
|
||||
:: ([:Katakana:] halfwidth-fullwidth);
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars!!
|
||||
#:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
|
||||
:: ( [[\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;
|
||||
|
||||
# eof
|
41
icu4c/source/data/translit/Latin_NumericPinyin.txt
Normal file
41
icu4c/source/data/translit/Latin_NumericPinyin.txt
Normal file
|
@ -0,0 +1,41 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# According to the pinyin definitions I've been able to find:
|
||||
# 'a', 'e' are the preferred bases
|
||||
# otherwise 'o'
|
||||
# otherwise last vowel
|
||||
|
||||
# The trailing form of syllables are the following:
|
||||
# "a", "ai", "ao", "an", "ang",
|
||||
# "o", "ou", "ong",
|
||||
# "e", "ei", "er", "en", "eng",
|
||||
# "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
|
||||
# "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
|
||||
# "ü", "üe", "üan", "ün"
|
||||
# so the letters the tone will 'hop' are:
|
||||
|
||||
::NFD (NFC);
|
||||
$tone = [\u0304\u0301\u030C\u0300\u0306] ;
|
||||
|
||||
# Move the tone to the end of a syllable, and convert to number
|
||||
e {($tone) r} > r &tone-digit($1);
|
||||
($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1);
|
||||
($tone) > &tone-digit($1);
|
||||
|
||||
# The following backs up until it finds the right vowel, then deposits the tone
|
||||
|
||||
$vowel = [aAeEiIoOuUüÜ];
|
||||
$consonant = [[a-z A-Z] - [$vowel]];
|
||||
$digit = [1-5];
|
||||
$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);
|
||||
$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
|
||||
$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit);
|
||||
&digit-tone($1) < [:letter:] {($digit)};
|
||||
|
||||
::NFC (NFD);
|
||||
|
||||
|
||||
|
85
icu4c/source/data/translit/Malayalam_InterIndic.txt
Normal file
85
icu4c/source/data/translit/Malayalam_InterIndic.txt
Normal file
|
@ -0,0 +1,85 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Malayalam-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
|
||||
\u0D02>\uE002; # SIGN ANUSVARA
|
||||
\u0D03>\uE003; # SIGN VISARGA
|
||||
\u0D05>\uE005; # LETTER A
|
||||
\u0D06>\uE006; # LETTER AA
|
||||
\u0D07>\uE007; # LETTER I
|
||||
\u0D08>\uE008; # LETTER II
|
||||
\u0D09>\uE009; # LETTER U
|
||||
\u0D0A>\uE00A; # LETTER UU
|
||||
\u0D0B>\uE00B; # LETTER VOCALIC R
|
||||
\u0D0C>\uE00C; # LETTER VOCALIC L
|
||||
\u0D0E>\uE00E; # LETTER E
|
||||
\u0D0F>\uE00F; # LETTER EE
|
||||
\u0D10>\uE010; # LETTER AI
|
||||
\u0D12>\uE012; # LETTER O
|
||||
\u0D13>\uE013; # LETTER OO
|
||||
\u0D14>\uE014; # LETTER AU
|
||||
\u0D15>\uE015; # LETTER KA
|
||||
\u0D16>\uE016; # LETTER KHA
|
||||
\u0D17>\uE017; # LETTER GA
|
||||
\u0D18>\uE018; # LETTER GHA
|
||||
\u0D19>\uE019; # LETTER NGA
|
||||
\u0D1A>\uE01A; # LETTER CA
|
||||
\u0D1B>\uE01B; # LETTER CHA
|
||||
\u0D1C>\uE01C; # LETTER JA
|
||||
\u0D1D>\uE01D; # LETTER JHA
|
||||
\u0D1E>\uE01E; # LETTER NYA
|
||||
\u0D1F>\uE01F; # LETTER TTA
|
||||
\u0D20>\uE020; # LETTER TTHA
|
||||
\u0D21>\uE021; # LETTER DDA
|
||||
\u0D22>\uE022; # LETTER DDHA
|
||||
\u0D23>\uE023; # LETTER NNA
|
||||
\u0D24>\uE024; # LETTER TA
|
||||
\u0D25>\uE025; # LETTER THA
|
||||
\u0D26>\uE026; # LETTER DA
|
||||
\u0D27>\uE027; # LETTER DHA
|
||||
\u0D28>\uE028; # LETTER NA
|
||||
\u0D2A>\uE02A; # LETTER PA
|
||||
\u0D2B>\uE02B; # LETTER PHA
|
||||
\u0D2C>\uE02C; # LETTER BA
|
||||
\u0D2D>\uE02D; # LETTER BHA
|
||||
\u0D2E>\uE02E; # LETTER MA
|
||||
\u0D2F>\uE02F; # LETTER YA
|
||||
\u0D30>\uE030; # LETTER RA
|
||||
\u0D31>\uE031; # LETTER RRA
|
||||
\u0D32>\uE032; # LETTER LA
|
||||
\u0D33>\uE033; # LETTER LLA
|
||||
\u0D34>\uE034; # LETTER LLLA
|
||||
\u0D35>\uE035; # LETTER VA
|
||||
\u0D36>\uE036; # LETTER SHA
|
||||
\u0D37>\uE037; # LETTER SSA
|
||||
\u0D38>\uE038; # LETTER SA
|
||||
\u0D39>\uE039; # LETTER HA
|
||||
\u0D3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0D3F>\uE03F; # VOWEL SIGN I
|
||||
\u0D40>\uE040; # VOWEL SIGN II
|
||||
\u0D41>\uE041; # VOWEL SIGN U
|
||||
\u0D42>\uE042; # VOWEL SIGN UU
|
||||
\u0D43>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0D46>\uE046; # VOWEL SIGN E
|
||||
\u0D47>\uE047; # VOWEL SIGN EE
|
||||
\u0D48>\uE048; # VOWEL SIGN AI
|
||||
\u0D4D>\uE04D; # SIGN VIRAMA
|
||||
\u0D57>\uE057; # AU LENGTH MARK
|
||||
\u0D60>\uE060; # LETTER VOCALIC RR
|
||||
\u0D61>\uE061; # LETTER VOCALIC LL
|
||||
\u0D66>\uE066; # DIGIT ZERO
|
||||
\u0D67>\uE067; # DIGIT ONE
|
||||
\u0D68>\uE068; # DIGIT TWO
|
||||
\u0D69>\uE069; # DIGIT THREE
|
||||
\u0D6A>\uE06A; # DIGIT FOUR
|
||||
\u0D6B>\uE06B; # DIGIT FIVE
|
||||
\u0D6C>\uE06C; # DIGIT SIX
|
||||
\u0D6D>\uE06D; # DIGIT SEVEN
|
||||
\u0D6E>\uE06E; # DIGIT EIGHT
|
||||
\u0D6F>\uE06F; # DIGIT NINE
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
95
icu4c/source/data/translit/Oriya_InterIndic.txt
Normal file
95
icu4c/source/data/translit/Oriya_InterIndic.txt
Normal file
|
@ -0,0 +1,95 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Oriya-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
#\u0B21\u0B3C>\uE05C;# LETTER RRA
|
||||
#\u0B22\u0B3C>\uE05D;# LETTER RHA
|
||||
\u0B47\u0B56>\uE048;# VOWEL SIGN AI
|
||||
\u0B47\u0B3E>\uE04B;# VOWEL SIGN O
|
||||
\u0B47\u0B57>\uE04C;# VOWEL SIGN AU
|
||||
|
||||
\u0B01>\uE001; # SIGN CANDRABINDU
|
||||
\u0B02>\uE002; # SIGN ANUSVARA
|
||||
\u0B03>\uE003; # SIGN VISARGA
|
||||
\u0B05>\uE005; # LETTER A
|
||||
\u0B06>\uE006; # LETTER AA
|
||||
\u0B07>\uE007; # LETTER I
|
||||
\u0B08>\uE008; # LETTER II
|
||||
\u0B09>\uE009; # LETTER U
|
||||
\u0B0A>\uE00A; # LETTER UU
|
||||
\u0B0B>\uE00B; # LETTER VOCALIC R
|
||||
\u0B0C>\uE00C; # LETTER VOCALIC L
|
||||
\u0B0F>\uE00F; # LETTER E
|
||||
\u0B10>\uE010; # LETTER AI
|
||||
\u0B13>\uE013; # LETTER O
|
||||
\u0B14>\uE014; # LETTER AU
|
||||
\u0B15>\uE015; # LETTER KA
|
||||
\u0B16>\uE016; # LETTER KHA
|
||||
\u0B17>\uE017; # LETTER GA
|
||||
\u0B18>\uE018; # LETTER GHA
|
||||
\u0B19>\uE019; # LETTER NGA
|
||||
\u0B1A>\uE01A; # LETTER CA
|
||||
\u0B1B>\uE01B; # LETTER CHA
|
||||
\u0B1C>\uE01C; # LETTER JA
|
||||
\u0B1D>\uE01D; # LETTER JHA
|
||||
\u0B1E>\uE01E; # LETTER NYA
|
||||
\u0B1F>\uE01F; # LETTER TTA
|
||||
\u0B20>\uE020; # LETTER TTHA
|
||||
\u0B21>\uE021; # LETTER DDA
|
||||
\u0B22>\uE022; # LETTER DDHA
|
||||
\u0B23>\uE023; # LETTER NNA
|
||||
\u0B24>\uE024; # LETTER TA
|
||||
\u0B25>\uE025; # LETTER THA
|
||||
\u0B26>\uE026; # LETTER DA
|
||||
\u0B27>\uE027; # LETTER DHA
|
||||
\u0B28>\uE028; # LETTER NA
|
||||
\u0B2A>\uE02A; # LETTER PA
|
||||
\u0B2B>\uE02B; # LETTER PHA
|
||||
\u0B2C>\uE02C; # LETTER BA
|
||||
\u0B2D>\uE02D; # LETTER BHA
|
||||
\u0B2E>\uE02E; # LETTER MA
|
||||
\u0B2F>\uE02F; # LETTER YA
|
||||
\u0B30>\uE030; # LETTER RA
|
||||
\u0B32>\uE032; # LETTER LA
|
||||
\u0B33>\uE033; # LETTER LLA
|
||||
\u0B35>\uE035; # LETTER VA
|
||||
\u0B36>\uE036; # LETTER SHA
|
||||
\u0B37>\uE037; # LETTER SSA
|
||||
\u0B38>\uE038; # LETTER SA
|
||||
\u0B39>\uE039; # LETTER HA
|
||||
\u0B3C>\uE03C; # SIGN NUKTA
|
||||
\u0B3D>\uE03D; # SIGN AVAGRAHA
|
||||
\u0B3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0B3F>\uE03F; # VOWEL SIGN I
|
||||
\u0B40>\uE040; # VOWEL SIGN II
|
||||
\u0B41>\uE041; # VOWEL SIGN U
|
||||
\u0B42>\uE042; # VOWEL SIGN UU
|
||||
\u0B43>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0B47>\uE047; # VOWEL SIGN E
|
||||
#
|
||||
\u0B4D>\uE04D; # SIGN VIRAMA
|
||||
\u0B56>\uE056; # AI LENGTH MARK
|
||||
\u0B57>\uE057; # AU LENGTH MARK
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
#
|
||||
\u0B5F>\uE05F; # LETTER YYA
|
||||
\u0B60>\uE060; # LETTER VOCALIC RR
|
||||
\u0B61>\uE061; # LETTER VOCALIC LL
|
||||
\u0B66>\uE066; # DIGIT ZERO
|
||||
\u0B67>\uE067; # DIGIT ONE
|
||||
\u0B68>\uE068; # DIGIT TWO
|
||||
\u0B69>\uE069; # DIGIT THREE
|
||||
\u0B6A>\uE06A; # DIGIT FOUR
|
||||
\u0B6B>\uE06B; # DIGIT FIVE
|
||||
\u0B6C>\uE06C; # DIGIT SIX
|
||||
\u0B6D>\uE06D; # DIGIT SEVEN
|
||||
\u0B6E>\uE06E; # DIGIT EIGHT
|
||||
\u0B6F>\uE06F; # DIGIT NINE
|
||||
\u0B70>\ue07B; # ISSHAR
|
||||
\u0B71>\ue081; # LETTER WA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
76
icu4c/source/data/translit/Tamil_InterIndic.txt
Normal file
76
icu4c/source/data/translit/Tamil_InterIndic.txt
Normal file
|
@ -0,0 +1,76 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Tamil-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
|
||||
\u0BC6\u0BBE>\uE04A;# VOWEL SIGN O
|
||||
\u0BC7\u0BBE>\uE04B;# VOWEL SIGN OO
|
||||
\u0BC6\u0BD7>\uE04C;# VOWEL SIGN AU
|
||||
\u0B92\u0BD7>\uE014;# LETTER AU
|
||||
|
||||
\u0B82>\uE002; # SIGN ANUSVARA
|
||||
\u0B83>\uE003; # SIGN VISARGA
|
||||
\u0B85>\uE005; # LETTER A
|
||||
\u0B86>\uE006; # LETTER AA
|
||||
\u0B87>\uE007; # LETTER I
|
||||
\u0B88>\uE008; # LETTER II
|
||||
\u0B89>\uE009; # LETTER U
|
||||
\u0B8A>\uE00A; # LETTER UU
|
||||
\u0B8E>\uE00E; # LETTER E
|
||||
\u0B8F>\uE00F; # LETTER EE
|
||||
\u0B90>\uE010; # LETTER AI
|
||||
\u0B92>\uE012; # LETTER O
|
||||
\u0B93>\uE013; # LETTER OO
|
||||
\u0B94>\uE014; # LETTER AU
|
||||
\u0B95>\uE015; # LETTER KA
|
||||
\u0B99>\uE019; # LETTER NGA
|
||||
\u0B9A>\uE01A; # LETTER CA
|
||||
\u0B9C>\uE01C; # LETTER JA
|
||||
\u0B9E>\uE01E; # LETTER NYA
|
||||
\u0B9F>\uE01F; # LETTER TTA
|
||||
\u0BA3>\uE023; # LETTER NNA
|
||||
\u0BA4>\uE024; # LETTER TA
|
||||
\u0BA8>\uE028; # LETTER NA
|
||||
\u0BA9>\uE029; # LETTER NNNA
|
||||
\u0BAA>\uE02A; # LETTER PA
|
||||
\u0BAE>\uE02E; # LETTER MA
|
||||
\u0BAF>\uE02F; # LETTER YA
|
||||
\u0BB0>\uE030; # LETTER RA
|
||||
\u0BB1>\uE031; # LETTER RRA
|
||||
\u0BB2>\uE032; # LETTER LA
|
||||
\u0BB3>\uE033; # LETTER LLA
|
||||
\u0BB4>\uE034; # LETTER LLLA
|
||||
\u0BB5>\uE035; # LETTER VA
|
||||
\u0BB7>\uE037; # LETTER SSA
|
||||
\u0BB8>\uE038; # LETTER SA
|
||||
\u0BB9>\uE039; # LETTER HA
|
||||
\u0BBE>\uE03E; # VOWEL SIGN AA
|
||||
\u0BBF>\uE03F; # VOWEL SIGN I
|
||||
\u0BC0>\uE040; # VOWEL SIGN II
|
||||
\u0BC1>\uE041; # VOWEL SIGN U
|
||||
\u0BC2>\uE042; # VOWEL SIGN UU
|
||||
\u0BC6>\uE046; # VOWEL SIGN E
|
||||
\u0BC7>\uE047; # VOWEL SIGN EE
|
||||
\u0BC8>\uE048; # VOWEL SIGN AI
|
||||
|
||||
\u0BCD>\uE04D; # SIGN VIRAMA
|
||||
\u0BD7>\uE057; # AU LENGTH MARK
|
||||
\u0BE7>\uE067; # DIGIT ONE
|
||||
\u0BE8>\uE068; # DIGIT TWO
|
||||
\u0BE9>\uE069; # DIGIT THREE
|
||||
\u0BEA>\uE06A; # DIGIT FOUR
|
||||
\u0BEB>\uE06B; # DIGIT FIVE
|
||||
\u0BEC>\uE06C; # DIGIT SIX
|
||||
\u0BED>\uE06D; # DIGIT SEVEN
|
||||
\u0BEE>\uE06E; # DIGIT EIGHT
|
||||
\u0BEF>\uE06F; # DIGIT NINE
|
||||
\u0BF0>\uE067\uE066; # UNMAPPED Tamil-InterIndic: NUMBER TEN
|
||||
\u0BF1>\uE067\uE066\uE066; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
|
||||
\u0BF2>\uE067\uE066\uE066\uE066;# UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
|
||||
0>\ue066;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
90
icu4c/source/data/translit/Telugu_InterIndic.txt
Normal file
90
icu4c/source/data/translit/Telugu_InterIndic.txt
Normal file
|
@ -0,0 +1,90 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Telugu-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
\u0c46\u0c4d\u0c56>\ue048\ue04d;
|
||||
\u0C46\u0C56>\uE048;# VOWEL SIGN AI
|
||||
\u0C01>\uE001; # SIGN CANDRABINDU
|
||||
\u0C02>\uE002; # SIGN ANUSVARA
|
||||
\u0C03>\uE003; # SIGN VISARGA
|
||||
\u0C05>\uE005; # LETTER A
|
||||
\u0C06>\uE006; # LETTER AA
|
||||
\u0C07>\uE007; # LETTER I
|
||||
\u0C08>\uE008; # LETTER II
|
||||
\u0C09>\uE009; # LETTER U
|
||||
\u0C0A>\uE00A; # LETTER UU
|
||||
\u0C0B>\uE00B; # LETTER VOCALIC R
|
||||
\u0C0C>\uE00C; # LETTER VOCALIC L
|
||||
\u0C0E>\uE00E; # LETTER E
|
||||
\u0C0F>\uE00F; # LETTER EE
|
||||
\u0C10>\uE010; # LETTER AI
|
||||
\u0C12>\uE012; # LETTER O
|
||||
\u0C13>\uE013; # LETTER OO
|
||||
\u0C14>\uE014; # LETTER AU
|
||||
\u0C15>\uE015; # LETTER KA
|
||||
\u0C16>\uE016; # LETTER KHA
|
||||
\u0C17>\uE017; # LETTER GA
|
||||
\u0C18>\uE018; # LETTER GHA
|
||||
\u0C19>\uE019; # LETTER NGA
|
||||
\u0C1A>\uE01A; # LETTER CA
|
||||
\u0C1B>\uE01B; # LETTER CHA
|
||||
\u0C1C>\uE01C; # LETTER JA
|
||||
\u0C1D>\uE01D; # LETTER JHA
|
||||
\u0C1E>\uE01E; # LETTER NYA
|
||||
\u0C1F>\uE01F; # LETTER TTA
|
||||
\u0C20>\uE020; # LETTER TTHA
|
||||
\u0C21>\uE021; # LETTER DDA
|
||||
\u0C22>\uE022; # LETTER DDHA
|
||||
\u0C23>\uE023; # LETTER NNA
|
||||
\u0C24>\uE024; # LETTER TA
|
||||
\u0C25>\uE025; # LETTER THA
|
||||
\u0C26>\uE026; # LETTER DA
|
||||
\u0C27>\uE027; # LETTER DHA
|
||||
\u0C28>\uE028; # LETTER NA
|
||||
\u0C2A>\uE02A; # LETTER PA
|
||||
\u0C2B>\uE02B; # LETTER PHA
|
||||
\u0C2C>\uE02C; # LETTER BA
|
||||
\u0C2D>\uE02D; # LETTER BHA
|
||||
\u0C2E>\uE02E; # LETTER MA
|
||||
\u0C2F>\uE02F; # LETTER YA
|
||||
\u0C30>\uE030; # LETTER RA
|
||||
\u0C31>\uE031; # LETTER RRA
|
||||
\u0C32>\uE032; # LETTER LA
|
||||
\u0C33>\uE033; # LETTER LLA
|
||||
\u0C35>\uE035; # LETTER VA
|
||||
\u0C36>\uE036; # LETTER SHA
|
||||
\u0C37>\uE037; # LETTER SSA
|
||||
\u0C38>\uE038; # LETTER SA
|
||||
\u0C39>\uE039; # LETTER HA
|
||||
\u0C3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0C3F>\uE03F; # VOWEL SIGN I
|
||||
\u0C40>\uE040; # VOWEL SIGN II
|
||||
\u0C41>\uE041; # VOWEL SIGN U
|
||||
\u0C42>\uE042; # VOWEL SIGN UU
|
||||
\u0C43>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0C44>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u0C46>\uE046; # VOWEL SIGN E
|
||||
\u0C47>\uE047; # VOWEL SIGN EE
|
||||
\u0C4A>\uE04A; # VOWEL SIGN O
|
||||
\u0C4B>\uE04B; # VOWEL SIGN OO
|
||||
\u0C4C>\uE04C; # VOWEL SIGN AU
|
||||
\u0C4D>\uE04D; # SIGN VIRAMA
|
||||
\u0C55>\uE055; # LENGTH MARK
|
||||
\u0C56>\uE056; # AI LENGTH MARK
|
||||
\u0C60>\uE060; # LETTER VOCALIC RR
|
||||
\u0C61>\uE061; # LETTER VOCALIC LL
|
||||
\u0C66>\uE066; # DIGIT ZERO
|
||||
\u0C67>\uE067; # DIGIT ONE
|
||||
\u0C68>\uE068; # DIGIT TWO
|
||||
\u0C69>\uE069; # DIGIT THREE
|
||||
\u0C6A>\uE06A; # DIGIT FOUR
|
||||
\u0C6B>\uE06B; # DIGIT FIVE
|
||||
\u0C6C>\uE06C; # DIGIT SIX
|
||||
\u0C6D>\uE06D; # DIGIT SEVEN
|
||||
\u0C6E>\uE06E; # DIGIT EIGHT
|
||||
\u0C6F>\uE06F; # DIGIT NINE
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
187
icu4c/source/data/translit/ThaiLogical_Latin.txt
Normal file
187
icu4c/source/data/translit/ThaiLogical_Latin.txt
Normal file
|
@ -0,0 +1,187 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Thai-Latin
|
||||
# This set of rules follows ISO 11940
|
||||
# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
|
||||
# except that that does not mention an implicit vowel, so we use ọ
|
||||
#
|
||||
# The transcription is fairly ugly, so we ought to also do the UNGEGN version
|
||||
# see: http://www.eki.ee/wgrs/rom1_th.pdf
|
||||
# and probably make that the main variant.
|
||||
|
||||
# Note: this is an internal file. The NFD/NFC is handled externally, in the index
|
||||
# The insertion of spaces between words, the reversal of the vowels
|
||||
# and the conversion of space to semicolon are done *outside* of these rules.
|
||||
# So as far as these rules are concerned, the vowels are in logical order!
|
||||
|
||||
# insert implicit vowel (and remove it going the other way)
|
||||
# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
|
||||
#$consonant = [ก-ฮ];
|
||||
#$vowel = [ะ-ฺเ-ไ็];
|
||||
|
||||
#{ ( $consonant ) } [^$vowel ] > | $1 ;
|
||||
# > ọ ;
|
||||
# < ọ ;
|
||||
|
||||
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
|
||||
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
|
||||
|
||||
# Consonants
|
||||
# Warning: the 'h's need to be handled carefully!
|
||||
# What we really want to say is the following, but we can't
|
||||
# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;
|
||||
|
||||
# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
|
||||
$freeStandingBelow = [\u0325 ];
|
||||
$hAccent = [ ̄ ̣]
|
||||
$notHAccent0 = [^$freeStandingBelow$hAccent];
|
||||
$notHAccent1 = $freeStandingBelow [^$hAccent];
|
||||
|
||||
ห > h̄ ; # THAI CHARACTER HO HIP
|
||||
ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering
|
||||
ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK
|
||||
|
||||
ข <> k̄h ; # THAI CHARACTER KHO KHAI
|
||||
ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT
|
||||
ฅ <> kʹh ; # THAI CHARACTER KHO KHON
|
||||
ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG
|
||||
ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
|
||||
ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
|
||||
ก <> k ; # THAI CHARACTER KO KAI
|
||||
|
||||
ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO
|
||||
ผ <> p̄h ; # THAI CHARACTER PHO PHUNG
|
||||
พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
|
||||
พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
|
||||
ป <> p ; # THAI CHARACTER PO PLA
|
||||
|
||||
ฉ <> c̄h ; # THAI CHARACTER CHO CHING
|
||||
ฌ <> c̣h ; # THAI CHARACTER CHO CHOE
|
||||
ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
|
||||
ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
|
||||
จ <> c ; # THAI CHARACTER CHO CHAN
|
||||
|
||||
ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN
|
||||
ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO
|
||||
ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO
|
||||
ถ <> t̄h ; # THAI CHARACTER THO THUNG
|
||||
ธ <> ṭh ; # THAI CHARACTER THO THONG
|
||||
ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
|
||||
ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
|
||||
#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
|
||||
ฏ <> t̩ ; # THAI CHARACTER TO PATAK
|
||||
ต <> t ; # THAI CHARACTER TO TAO
|
||||
|
||||
# since there is no singleton g (generated), don't worry about that.
|
||||
ง <> ng ; # THAI CHARACTER NGO NGU
|
||||
ณ <> ṇ ; # THAI CHARACTER NO NEN
|
||||
น <> n ; # THAI CHARACTER NO NU
|
||||
|
||||
ญ <> ỵ ; # THAI CHARACTER YO YING
|
||||
ฎ <> ḍ ; # THAI CHARACTER DO CHADA
|
||||
ด <> d ; # THAI CHARACTER DO DEK
|
||||
|
||||
บ <> b ; # THAI CHARACTER BO BAIMAI
|
||||
ฝ <> f̄ ; # THAI CHARACTER FO FA
|
||||
ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering
|
||||
|
||||
ม <> m ; # THAI CHARACTER MO MA
|
||||
ย <> y ; # THAI CHARACTER YO YAK
|
||||
ร <> r ; # THAI CHARACTER RO RUA
|
||||
ฤ <> v ; # THAI CHARACTER RU
|
||||
ฦ <> ł ; # THAI CHARACTER LU
|
||||
ว <> w ; # THAI CHARACTER WO WAEN
|
||||
|
||||
ศ <> ṣ̄ ; # THAI CHARACTER SO SALA***
|
||||
ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering
|
||||
ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI
|
||||
ส > s̄ ; # THAI CHARACTER SO SUA***
|
||||
ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering
|
||||
|
||||
ฬ <> ḷ ; # THAI CHARACTER LO CHULA
|
||||
ล <> l ; # THAI CHARACTER LO LING
|
||||
ฟ <> f ; # THAI CHARACTER FO FAN
|
||||
|
||||
อ <> x ; # THAI CHARACTER O ANG
|
||||
ซ <> s ; # THAI CHARACTER SO SO
|
||||
|
||||
# vowels
|
||||
|
||||
ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT
|
||||
|
||||
า > ā ; # THAI CHARACTER SARA AA
|
||||
า | $1 < a ($notAbove*) ̄; # backward case, account for reordering
|
||||
|
||||
# We deviate from ISO for SARA AM for disambiguation
|
||||
ำ > a ̉; # THAI CHARACTER SARA AM
|
||||
ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering
|
||||
|
||||
ะ <> a ; # THAI CHARACTER SARA A
|
||||
ี <> ī ; # THAI CHARACTER SARA II
|
||||
ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering
|
||||
|
||||
ื <> ụ̄ ; # THAI CHARACTER SARA UEE
|
||||
ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering
|
||||
|
||||
ึ <> ụ ; # THAI CHARACTER SARA UE
|
||||
ู <> ū ; # THAI CHARACTER SARA UU
|
||||
ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering
|
||||
|
||||
ุ <> u ; # THAI CHARACTER SARA U
|
||||
|
||||
ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI
|
||||
|
||||
# ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT
|
||||
|
||||
เ <> e ; # THAI CHARACTER SARA E
|
||||
แ <> æ ; # THAI CHARACTER SARA AE
|
||||
โ <> o ; # THAI CHARACTER SARA O
|
||||
ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN
|
||||
ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI
|
||||
ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO
|
||||
็ <> ̆ ; # THAI CHARACTER MAITAIKHU
|
||||
่ <> ̀ ; # THAI CHARACTER MAI EK
|
||||
้ <> ̂ ; # THAI CHARACTER MAI THO
|
||||
๊ <> ́ ; # THAI CHARACTER MAI TRI
|
||||
๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA
|
||||
์ <> ̒ ; # THAI CHARACTER THANTHAKHAT
|
||||
๎ <> '~' ; # THAI CHARACTER YAMAKKAN
|
||||
|
||||
# We deviate from ISO for disambiguation
|
||||
ํ <> ̊ ; # THAI CHARACTER NIKHAHIT
|
||||
|
||||
๏ <> § ; # THAI CHARACTER FONGMAN
|
||||
|
||||
๐ <> 0 ; # THAI DIGIT ZERO
|
||||
๑ <> 1 ; # THAI DIGIT ONE
|
||||
๒ <> 2 ; # THAI DIGIT TWO
|
||||
๓ <> 3 ; # THAI DIGIT THREE
|
||||
๔ <> 4 ; # THAI DIGIT FOUR
|
||||
๕ <> 5 ; # THAI DIGIT FIVE
|
||||
๖ <> 6 ; # THAI DIGIT SIX
|
||||
๗ <> 7 ; # THAI DIGIT SEVEN
|
||||
๘ <> 8 ; # THAI DIGIT EIGHT
|
||||
๙ <> 9 ; # THAI DIGIT NINE
|
||||
|
||||
๚ <> '||' ; # THAI CHARACTER ANGKHANKHU
|
||||
|
||||
๛ <> » ; # THAI CHARACTER KHOMUT
|
||||
ๆ <> « ; # THAI CHARACTER MAIYAMOK
|
||||
|
||||
# moved down to make shorter first
|
||||
#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
|
||||
ฺ <> ˌ ; # THAI CHARACTER PHINTHU
|
||||
ิ <> i ; # THAI CHARACTER SARA I
|
||||
|
||||
# fallbacks
|
||||
|
||||
| k < g ;
|
||||
| k < h ;
|
||||
| c < j ;
|
||||
| k < q ;
|
||||
| s < z ;
|
||||
|
||||
:: (lower);
|
26
icu4c/source/data/translit/Thai_ThaiLogical.txt
Normal file
26
icu4c/source/data/translit/Thai_ThaiLogical.txt
Normal file
|
@ -0,0 +1,26 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
|
||||
# The rules that convert space into semicolon are in another file;
|
||||
# since they have to come BEFORE the break iterator
|
||||
|
||||
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
|
||||
|
||||
# First convert the semicolon back
|
||||
|
||||
' ' < $thai { '; ' } $thai;
|
||||
|
||||
# Remove any other spaces between thai letters
|
||||
|
||||
< $thai { ' ' } $thai;
|
||||
|
||||
# Now vowels
|
||||
$thai_reversing = [[:Logical_Order_Exception:] & $thai];
|
||||
$thai_non_reversing = [$thai - $thai_reversing ];
|
||||
|
||||
( $thai_reversing ) ( $thai_non_reversing ) > $2 $1;
|
||||
# other direction
|
||||
$2 $1 < ( $thai_non_reversing ) ( $thai_reversing ) ;
|
11
icu4c/source/data/translit/Thai_ThaiSemi.txt
Normal file
11
icu4c/source/data/translit/Thai_ThaiSemi.txt
Normal file
|
@ -0,0 +1,11 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# The rules that convert space into semicolon are in this file;
|
||||
# since they have to come BEFORE the break iterator.
|
||||
|
||||
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
|
||||
|
||||
$thai { ' ' } $thai > '; ' ;
|
11
icu4c/source/data/translit/Tone_Digit.txt
Normal file
11
icu4c/source/data/translit/Tone_Digit.txt
Normal file
|
@ -0,0 +1,11 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Only intended for internal use
|
||||
\u0304 <> 1;
|
||||
\u0301 <> 2;
|
||||
\u030C <> 3;
|
||||
\u0300 <> 4;
|
||||
< 5;
|
8
icu4c/source/data/translit/el.txt
Normal file
8
icu4c/source/data/translit/el.txt
Normal file
|
@ -0,0 +1,8 @@
|
|||
el{
|
||||
|
||||
TransliterateLATIN {
|
||||
"UNGEGN",
|
||||
"::Greek-Latin/UNGEGN;"
|
||||
}
|
||||
}
|
||||
|
22
icu4c/source/data/translit/en.txt
Normal file
22
icu4c/source/data/translit/en.txt
Normal file
|
@ -0,0 +1,22 @@
|
|||
// ***************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2004, International Business Machines
|
||||
// * Corporation and others. All Rights Reserved.
|
||||
// *
|
||||
// ***************************************************************************
|
||||
//
|
||||
|
||||
en{
|
||||
|
||||
// Format for the display name of a Transliterator.
|
||||
// This is the English form of this resource.
|
||||
TransliteratorNamePattern { "{0,choice,0#|1#{1}|2#{1} to {2}}" }
|
||||
|
||||
// Transliterator display names
|
||||
// This is the English form of this resource.
|
||||
// This list is currently incomplete, and care should be taken to translate these identifiers.
|
||||
// TODO: Reorganize this data like Country, Currencies and Language tables.
|
||||
"%Translit%Hex" { "Hex Escape" }
|
||||
"%Translit%UnicodeName" { "Unicode Name" }
|
||||
"%Translit%UnicodeChar" { "Unicode Character" }
|
||||
}
|
752
icu4c/source/data/translit/root.txt
Normal file
752
icu4c/source/data/translit/root.txt
Normal file
|
@ -0,0 +1,752 @@
|
|||
// ***************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2004, International Business Machines
|
||||
// * Corporation and others. All Rights Reserved.
|
||||
// *
|
||||
// ***************************************************************************
|
||||
//
|
||||
|
||||
root{
|
||||
|
||||
RuleBasedTransliteratorIDs{
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//
|
||||
// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
|
||||
// system transliterators. It allows arbitrary mappings between
|
||||
// transliterator IDs and file names, and also allows the system to
|
||||
// define aliases for transliterators, so that "Latin-Hangul", for
|
||||
// example, can be implemented transparently as the compound
|
||||
// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
|
||||
// are invisible to the user, but can be composed together by the
|
||||
// system to create visible transliterators.
|
||||
//
|
||||
// Blank lines and lines beginning with '#' are ignored.
|
||||
//
|
||||
// Lines in this file have one of the following forms (text not
|
||||
// enclosed by <> is literal):
|
||||
//
|
||||
// <id>:file:<resource>:<encoding>:<direction>
|
||||
// <id>:internal:<resource>:<encoding>:<direction>
|
||||
// <id>:alias:<getInstanceArg>
|
||||
//
|
||||
// <id> is the ID of the system transliterator being defined. These
|
||||
// are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
// unless the second field is "internal".
|
||||
//
|
||||
// <resource> is a ResourceReader resource name. Currently these refer
|
||||
// to file names under com/ibm/text/resources. This string is passed
|
||||
// directly to ResourceReader, together with <encoding>.
|
||||
//
|
||||
// <encoding> is the character encoding to use when reading <resource>;
|
||||
// passed directly to ResourceReader. E.g., "UTF8".
|
||||
//
|
||||
// <direction> is either "FORWARD" or "REVERSE".
|
||||
//
|
||||
// <getInstanceArg> is a string to be passed directly to
|
||||
// Transliterator.getInstance(). The returned Transliterator object
|
||||
// then has its ID changed to <id> and is returned.
|
||||
|
||||
|
||||
// Bidirectional rule files
|
||||
|
||||
Fullwidth-Halfwidth {
|
||||
file {
|
||||
resource:include{"Fullwidth_Halfwidth.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Halfwidth-Fullwidth {
|
||||
file {
|
||||
resource:include{"Fullwidth_Halfwidth.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
Latin-Cyrillic {
|
||||
file {
|
||||
resource:include{"Cyrillic_Latin.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
Cyrillic-Latin {
|
||||
file {
|
||||
resource:include{"Cyrillic_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
Latin-Hebrew {
|
||||
file {
|
||||
resource:include{"Hebrew_Latin.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
Hebrew-Latin {
|
||||
file {
|
||||
resource:include{"Hebrew_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
Latin-Arabic {
|
||||
file {
|
||||
resource:include{"Arabic_Latin.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
Arabic-Latin {
|
||||
file {
|
||||
resource:include{"Arabic_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
Tone-Digit {
|
||||
internal {
|
||||
resource:include{"Tone_Digit.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Digit-Tone {
|
||||
internal {
|
||||
resource:include{"Tone_Digit.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
Latin-NumericPinyin {
|
||||
file {
|
||||
resource:include{"Latin_NumericPinyin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
NumericPinyin-Latin {
|
||||
file {
|
||||
resource:include{"Latin_NumericPinyin.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
Han-Spacedhan {
|
||||
internal {
|
||||
resource:include{"Han_Spacedhan.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Spacedhan-Han {
|
||||
alias {"null"}
|
||||
}
|
||||
|
||||
Han-Latin {
|
||||
file {
|
||||
resource:include{"Han_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
//Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip!
|
||||
Latin-Han {
|
||||
alias {"null"}
|
||||
}
|
||||
|
||||
// Comment these out; they are only for testing
|
||||
// Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE
|
||||
// Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD
|
||||
|
||||
//Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE
|
||||
//Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD
|
||||
|
||||
Latin-Greek {
|
||||
file {
|
||||
resource:include{"Greek_Latin.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
Greek-Latin {
|
||||
file {
|
||||
resource:include{"Greek_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
Latin-Greek/UNGEGN {
|
||||
file {
|
||||
resource:include{"Greek_Latin_UNGEGN.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
Greek-Latin/UNGEGN {
|
||||
file {
|
||||
resource:include{"Greek_Latin_UNGEGN.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
Latin-Katakana {
|
||||
file {
|
||||
resource:include{"Latin_Katakana.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Katakana-Latin {
|
||||
file {
|
||||
resource:include{"Latin_Katakana.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
Latin-Hiragana {
|
||||
file {
|
||||
resource:include{"Hiragana_Latin.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
Hiragana-Latin {
|
||||
file {
|
||||
resource:include{"Hiragana_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
//Thai Stuff: will change if we get \b into Transliterator
|
||||
|
||||
Thai-ThaiSemi {
|
||||
internal {
|
||||
resource:include{"Thai_ThaiSemi.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
Thai-ThaiLogical {
|
||||
internal {
|
||||
resource:include{"Thai_ThaiLogical.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
ThaiLogical-Thai {
|
||||
internal {
|
||||
resource:include{"Thai_ThaiLogical.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
ThaiLogical-Latin {
|
||||
internal {
|
||||
resource:include{"ThaiLogical_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Latin-ThaiLogical {
|
||||
internal {
|
||||
resource:include{"ThaiLogical_Latin.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
// Must use the order below!
|
||||
// We need two separate passes because of the Thai vowel reversal
|
||||
// Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces
|
||||
|
||||
Thai-Latin {
|
||||
alias {"[[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC"}
|
||||
}
|
||||
Latin-Thai {
|
||||
alias {"[[:Latin:][:Mn:][:Me:] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC"}
|
||||
}
|
||||
|
||||
// end of Thai Stuff
|
||||
|
||||
Hiragana-Katakana {
|
||||
file {
|
||||
resource:include{"Hiragana_Katakana.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Katakana-Hiragana {
|
||||
file {
|
||||
resource:include{"Hiragana_Katakana.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
Any-Accents {
|
||||
file {
|
||||
resource:include{"Any_Accents.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Accents-Any {
|
||||
file {
|
||||
resource:include{"Any_Accents.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
Any-Publishing {
|
||||
file {
|
||||
resource:include{"Any_Publishing.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Publishing-Any {
|
||||
file {
|
||||
resource:include{"Any_Publishing.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
|
||||
// Korean
|
||||
// N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For
|
||||
// Hangul output use Latin-Hangul.
|
||||
|
||||
LowerLatin-Jamo {
|
||||
internal {
|
||||
resource:include{"Latin_Jamo.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Jamo-LowerLatin {
|
||||
internal {
|
||||
resource:include{"Latin_Jamo.txt"}
|
||||
direction{"REVERSE"}
|
||||
}
|
||||
}
|
||||
Latin-Jamo {
|
||||
alias {"['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo"}
|
||||
}
|
||||
Jamo-Latin {
|
||||
alias {"['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC"}
|
||||
}
|
||||
Latin-Hangul {
|
||||
alias {"['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC"}
|
||||
}
|
||||
Hangul-Latin {
|
||||
alias {"['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC"}
|
||||
}
|
||||
|
||||
// Inter-Indic composed rules
|
||||
Latin-InterIndic {
|
||||
internal {
|
||||
resource:include{"Latin_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Devanagari-InterIndic {
|
||||
internal {
|
||||
resource:include{"Devanagari_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Bengali-InterIndic {
|
||||
internal {
|
||||
resource:include{"Bengali_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Gurmukhi-InterIndic {
|
||||
internal {
|
||||
resource:include{"Gurmukhi_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Gujarati-InterIndic {
|
||||
internal {
|
||||
resource:include{"Gujarati_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Oriya-InterIndic {
|
||||
internal {
|
||||
resource:include{"Oriya_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Tamil-InterIndic {
|
||||
internal {
|
||||
resource:include{"Tamil_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Telugu-InterIndic {
|
||||
internal {
|
||||
resource:include{"Telugu_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Kannada-InterIndic {
|
||||
internal {
|
||||
resource:include{"Kannada_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
Malayalam-InterIndic {
|
||||
internal {
|
||||
resource:include{"Malayalam_InterIndic.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
InterIndic-Latin {
|
||||
internal {
|
||||
resource:include{"InterIndic_Latin.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Devanagari {
|
||||
internal {
|
||||
resource:include{"InterIndic_Devanagari.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Bengali {
|
||||
internal {
|
||||
resource:include{"InterIndic_Bengali.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Gurmukhi {
|
||||
internal {
|
||||
resource:include{"InterIndic_Gurmukhi.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Gujarati {
|
||||
internal {
|
||||
resource:include{"InterIndic_Gujarati.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Oriya {
|
||||
internal {
|
||||
resource:include{"InterIndic_Oriya.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Tamil {
|
||||
internal {
|
||||
resource:include{"InterIndic_Tamil.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Telugu {
|
||||
internal {
|
||||
resource:include{"InterIndic_Telugu.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Kannada {
|
||||
internal {
|
||||
resource:include{"InterIndic_Kannada.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
InterIndic-Malayalam {
|
||||
internal {
|
||||
resource:include{"InterIndic_Malayalam.txt"}
|
||||
direction{"FORWARD"}
|
||||
}
|
||||
}
|
||||
|
||||
//Latin-Indic transliterators
|
||||
Latin-Devanagari {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Latin-Bengali {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Latin-Gurmukhi {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Latin-Gujarati {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Latin-Oriya {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Latin-Tamil {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Latin-Telugu {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Latin-Kannada {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Latin-Malayalam {
|
||||
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
//Indic-Latin transliterators
|
||||
Devanagari-Latin {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Bengali-Latin {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Gurmukhi-Latin {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Gujarati-Latin {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Oriya-Latin {
|
||||
alias {"[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Tamil-Latin {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Telugu-Latin {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Kannada-Latin {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
Malayalam-Latin {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC"}
|
||||
}
|
||||
|
||||
Devanagari-Bengali {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Devanagari-Gurmukhi {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Devanagari-Gujarati {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Devanagari-Oriya {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Devanagari-Tamil {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Devanagari-Telugu {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Devanagari-Kannada {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Devanagari-Malayalam {
|
||||
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Bengali-Devanagari {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Bengali-Gurmukhi {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Bengali-Gujarati {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Bengali-Oriya {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Bengali-Tamil {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Bengali-Telugu {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Bengali-Kannada {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Bengali-Malayalam {
|
||||
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Gurmukhi-Devanagari {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Gurmukhi-Bengali {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Gurmukhi-Gujarati {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Gurmukhi-Oriya {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Gurmukhi-Tamil {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Gurmukhi-Telugu {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Gurmukhi-Kannada {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Gurmukhi-Malayalam {
|
||||
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Gujarati-Devanagari {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Gujarati-Bengali {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Gujarati-Gurmukhi {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Gujarati-Oriya {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Gujarati-Tamil {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Gujarati-Telugu {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Gujarati-Kannada {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Gujarati-Malayalam {
|
||||
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Oriya-Devanagari {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Oriya-Bengali {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Oriya-Gurmukhi {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Oriya-Gujarati {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Oriya-Tamil {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Oriya-Telugu {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Oriya-Kannada {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Oriya-Malayalam {
|
||||
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Tamil-Devanagari {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Tamil-Bengali {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Tamil-Gurmukhi {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Tamil-Gujarati {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Tamil-Oriya {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Tamil-Telugu {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Tamil-Kannada {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Tamil-Malayalam {
|
||||
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Telugu-Devanagari {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Telugu-Bengali {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Telugu-Gurmukhi {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Telugu-Gujarati {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Telugu-Oriya {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Telugu-Tamil {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Telugu-Kannada {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
Telugu-Malayalam {
|
||||
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Kannada-Devanagari {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Kannada-Bengali {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Kannada-Gurmukhi {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Kannada-Gujarati {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Kannada-Oriya {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Kannada-Tamil {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Kannada-Telugu {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Kannada-Malayalam {
|
||||
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC"}
|
||||
}
|
||||
|
||||
Malayalam-Devanagari {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC"}
|
||||
}
|
||||
Malayalam-Bengali {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC"}
|
||||
}
|
||||
Malayalam-Gurmukhi {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC"}
|
||||
}
|
||||
Malayalam-Gujarati {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC"}
|
||||
}
|
||||
Malayalam-Oriya {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC"}
|
||||
}
|
||||
Malayalam-Tamil {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC"}
|
||||
}
|
||||
Malayalam-Telugu {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC"}
|
||||
}
|
||||
Malayalam-Kannada {
|
||||
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC"}
|
||||
}
|
||||
|
||||
// eof
|
||||
}
|
||||
TransliteratorNamePattern {
|
||||
// Format for the display name of a Transliterator.
|
||||
// This is the language-neutral form of this resource.
|
||||
"{0,choice,0#|1#{1}|2#{1}-{2}}" // Display name
|
||||
}
|
||||
|
||||
// Transliterator display names
|
||||
// This is the English form of this resource.
|
||||
"%Translit%Hex" { "%Translit%Hex" }
|
||||
"%Translit%UnicodeName" { "%Translit%UnicodeName" }
|
||||
"%Translit%UnicodeChar" { "%Translit%UnicodeChar" }
|
||||
|
||||
TransliterateLATIN{
|
||||
"",
|
||||
""
|
||||
}
|
||||
|
||||
}
|
|
@ -1,306 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Any_Accents.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Accents
|
||||
|
||||
t_Any_Accents {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
// to do: make reversible
|
||||
|
||||
// define special conversion characters.
|
||||
// varients of this could use different characters, or set one or the other to null.
|
||||
|
||||
"$pre = \\\< ;"
|
||||
"$post = \\\> ;"
|
||||
|
||||
// Provide keyboard equivalents for common diacritics used in transliteration
|
||||
|
||||
"$pre \\\` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
|
||||
"$pre \\\' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
|
||||
"$pre \\\^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
|
||||
"$pre \\\~ $post <> \u0303 ;" // COMBINING TILDE
|
||||
"$pre \\\- $post <> \u0304 ;" // COMBINING MACRON
|
||||
"$pre \\\" $post <> \u0308 ;" // COMBINING DIAERESIS
|
||||
"$pre \\\* $post <> \u030A ;" // COMBINING RING ABOVE
|
||||
"$pre \\\, $post <> \u0327 ;" // COMBINING CEDILLA
|
||||
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
|
||||
"$pre \\\. $post <> \u0323 ;" // COMBINING DOT BELOW
|
||||
|
||||
// Combine common characters
|
||||
|
||||
"$pre AE $post <> \u00C6 ;" // LATIN CAPITAL LETTER AE
|
||||
"$pre ae $post <> \u00E6 ;" // LATIN SMALL LETTER AE
|
||||
"$pre D $post <> \u00D0 ;" // LATIN CAPITAL LETTER ETH
|
||||
"$pre d $post <> \u00F0 ;" // LATIN SMALL LETTER ETH
|
||||
"$pre O'/' $post <> \u00D8 ;" // LATIN CAPITAL LETTER O WITH STROKE
|
||||
"$pre o'/' $post <> \u00F8 ;" // LATIN SMALL LETTER O WITH STROKE
|
||||
"$pre TH $post <> \u00DE ;" // LATIN CAPITAL LETTER THORN
|
||||
"$pre th $post <> \u00FE ;" // LATIN SMALL LETTER THORN
|
||||
"$pre OE $post <> \u0152 ;" // LATIN CAPITAL LIGATURE OE
|
||||
"$pre oe $post <> \u0153 ;" // LATIN SMALL LIGATURE OE
|
||||
|
||||
"$pre ss $post <> \u00DF ;" // LATIN SMALL LETTER SHARP S
|
||||
|
||||
"$pre NG $post <> \u014A ;" // LATIN CAPITAL LETTER ENG
|
||||
"$pre ng $post <> \u014B ;" // LATIN SMALL LETTER ENG
|
||||
|
||||
"$pre T $post <> \u0398 ;" // THETA
|
||||
"$pre t $post <> \u03B8 ;" // THETA
|
||||
"$pre SH $post <> \u01A9 ;" // LATIN CAPITAL LETTER ESH
|
||||
"$pre sh $post <> \u0283 ;" // LATIN SMALL LETTER ESH
|
||||
"$pre ZH $post <> \u01B7 ;" // LATIN CAPITAL LETTER EZH
|
||||
"$pre zh $post <> \u0292 ;" // LATIN SMALL LETTER EZH
|
||||
|
||||
"$pre U $post <> \u01B1 ;" // LATIN CAPITAL LETTER UPSILON
|
||||
"$pre u $post <> \u028A ;" // LATIN SMALL LETTER UPSILON
|
||||
"$pre A $post <> \u018F ;" // LATIN CAPITAL LETTER SCHWA
|
||||
"$pre a $post <> \u0259 ;" // LATIN SMALL LETTER SCHWA
|
||||
"$pre O $post <> \u0186 ;" // LATIN CAPITAL LETTER OPEN O
|
||||
"$pre o $post <> \u0254 ;" // LATIN SMALL LETTER OPEN O
|
||||
"$pre E $post <> \u0190 ;" // LATIN CAPITAL LETTER OPEN E
|
||||
"$pre e $post <> \u025B ;" // LATIN SMALL LETTER OPEN E
|
||||
|
||||
// three that don't have uppercases
|
||||
|
||||
"$pre '?' $post <> \u0294 ;" // LATIN LETTER GLOTTAL STOP
|
||||
"$pre i $post <> \u026A ;" // LATIN LETTER SMALL CAPITAL I
|
||||
"$pre v $post <> \u028C ;" // LATIN SMALL LETTER TURNED V
|
||||
|
||||
// Additional Characters that may be added in the future
|
||||
|
||||
// $pre XXX $post <> \u0306 ; # COMBINING BREVE
|
||||
// $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
|
||||
// $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
|
||||
// $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
|
||||
// $pre XXX $post <> \u030C ; # COMBINING CARON
|
||||
// $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
|
||||
// $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
|
||||
// $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
|
||||
// $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
|
||||
// $pre XXX $post <> \u031B ; # COMBINING HORN
|
||||
// $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
|
||||
// $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
|
||||
// $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
|
||||
// $pre XXX $post <> \u0328 ; # COMBINING OGONEK
|
||||
// $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
// $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
|
||||
// $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
|
||||
// $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
|
||||
|
||||
// $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
|
||||
// $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
|
||||
// $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
|
||||
// $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
|
||||
// $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
// $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
|
||||
// $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
|
||||
// $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
|
||||
// $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
// $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
|
||||
// $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
|
||||
// $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
|
||||
// $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
// $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
|
||||
// $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
|
||||
// $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
|
||||
// $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
|
||||
// $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
|
||||
// $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
// $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
|
||||
// $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
|
||||
// $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
|
||||
// $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
|
||||
// $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
|
||||
// $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
|
||||
// $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
|
||||
// $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
// $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
|
||||
// $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
|
||||
// $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
|
||||
// $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
|
||||
// $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
|
||||
// $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
|
||||
// $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
|
||||
// $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
|
||||
// $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
|
||||
// $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
|
||||
// $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
|
||||
// $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
|
||||
// $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
|
||||
// $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
|
||||
// $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
|
||||
// $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
// $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
|
||||
// $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
// $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
|
||||
// $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
|
||||
// $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
|
||||
// $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
|
||||
// $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
|
||||
// $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
|
||||
// $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
|
||||
// $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
|
||||
// $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
|
||||
// $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
|
||||
// $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
|
||||
// $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
|
||||
// $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
|
||||
// $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
|
||||
// $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
|
||||
// $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
|
||||
// $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
|
||||
// $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
|
||||
// $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
|
||||
// $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
|
||||
// $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
|
||||
// $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
|
||||
// $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
|
||||
// $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
|
||||
// $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
|
||||
// $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
|
||||
// $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
|
||||
// $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
|
||||
// $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
// $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
|
||||
// $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
|
||||
// $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
// $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
|
||||
// $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
|
||||
// $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
// $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
|
||||
// $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
|
||||
// $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
|
||||
// $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
|
||||
// $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
|
||||
// $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
// $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
|
||||
// $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
|
||||
// $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
|
||||
// $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
|
||||
// $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
|
||||
// $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
|
||||
// $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
|
||||
// $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
|
||||
// $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
|
||||
// $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
|
||||
// $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
|
||||
// $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
|
||||
// $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
|
||||
// $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
|
||||
// $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
|
||||
// $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
|
||||
// $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
|
||||
// $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
|
||||
// $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
|
||||
// $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
|
||||
// $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
|
||||
// $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
|
||||
// $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
|
||||
// $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
|
||||
// $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
|
||||
// $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
|
||||
// $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
|
||||
// $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
|
||||
// $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
|
||||
// $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
|
||||
// $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
|
||||
// $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
|
||||
// $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
|
||||
// $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
|
||||
// $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
|
||||
// $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
|
||||
// $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
|
||||
// $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
|
||||
// $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
|
||||
// $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
|
||||
// $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
|
||||
// $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
|
||||
// $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
|
||||
// $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
|
||||
// $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
|
||||
// $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
|
||||
// $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
|
||||
// $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
|
||||
// $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
|
||||
// $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
|
||||
// $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
|
||||
// $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
|
||||
// $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
|
||||
// $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
|
||||
// $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
|
||||
// $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
|
||||
// $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
|
||||
// $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
|
||||
// $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
|
||||
// $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
|
||||
// $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
|
||||
// $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
|
||||
// $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
|
||||
// $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
|
||||
// $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
|
||||
// $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
|
||||
// $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
|
||||
// $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
|
||||
// $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
|
||||
// $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
|
||||
// $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
|
||||
// $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
|
||||
// $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
|
||||
// $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
|
||||
// $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
|
||||
// $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
|
||||
// $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
|
||||
// $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
|
||||
// $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
|
||||
// $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
|
||||
// $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
|
||||
// $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
|
||||
// $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
|
||||
// $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
|
||||
// $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
|
||||
// $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
|
||||
// $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
|
||||
// $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
|
||||
// $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
|
||||
// $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
|
||||
// $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
|
||||
// $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
|
||||
// $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
|
||||
// $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
|
||||
// $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
|
||||
// $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
|
||||
// $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
|
||||
// $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
|
||||
// $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
|
||||
// $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
|
||||
// $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
|
||||
// $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
|
||||
// $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
|
||||
// $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
|
||||
// $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
|
||||
// $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
// $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
}
|
||||
}
|
|
@ -1,50 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Any_Publishing.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Publishing
|
||||
|
||||
t_Any_Publishing {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Test case
|
||||
// "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
|
||||
|
||||
// Variables
|
||||
|
||||
"$single = \\\' ;"
|
||||
"$space = ' ' ;"
|
||||
"$double = \\\" ;"
|
||||
"$back = \\\` ;"
|
||||
"$tab = '\u0008' ;"
|
||||
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
|
||||
|
||||
// fix UNIX quotes
|
||||
|
||||
"$back $back > “ ;"
|
||||
"$back > ‘ ;"
|
||||
|
||||
// fix typewriter quotes, by context
|
||||
|
||||
"$makeRight {$double} <> “ ;"
|
||||
"$double <> ” ;"
|
||||
|
||||
"$makeRight {$single} <> ‘ ;"
|
||||
"$single <> ’;"
|
||||
|
||||
// fix multiple spaces and hyphens
|
||||
|
||||
"$space {$space} > ;"
|
||||
"'--' <> — ;"
|
||||
}
|
||||
}
|
|
@ -1,162 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Arabic_Latin.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Arabic_Latin
|
||||
|
||||
t_Arab_Latn {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Generally follows UNGEGN <http://www.eki.ee/wgrs/rom1_ar.pdf>
|
||||
// Occasionally deviates in the direction of ISO 233 <http://homepage.mac.com/sirbinks/pdf/Arabic.pdf>
|
||||
// a) where required for disambiguation.
|
||||
// b) with underdot instead of cedilla for letter like SAD, since
|
||||
// those are explicitly in Unicode for transliteration.
|
||||
// c) with extra non-Arabic-language letters, like PEH
|
||||
|
||||
// Does *not* do assimilation of "al", nor hyphenation.
|
||||
// While it could be done, we need to determine whether a prefix "al" could
|
||||
// occur other than as the definite article (since no space is used).
|
||||
|
||||
":: [[:Arabic:] [ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;"
|
||||
":: NFKD (NFC);"
|
||||
"$disambig = ̱ ;"
|
||||
"$disambig2 = ̰ ;"
|
||||
"$under = ̣ ;"
|
||||
|
||||
"$notAbove = [[:^ccc=0:]&[:^ccc=230:]];"
|
||||
|
||||
// non-letters
|
||||
|
||||
"٫ <> '.' $disambig ;" // ARABIC DECIMAL SEPARATOR
|
||||
"٬ <> ',' $disambig ;" // ARABIC THOUSANDS SEPARATOR
|
||||
// ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate
|
||||
|
||||
"، <> ',' ;" // ARABIC COMMA
|
||||
"؛ <> ';' ;" // ARABIC SEMICOLON
|
||||
"؟ <> '?' ;" // ARABIC QUESTION MARK
|
||||
"٪ <> '%' ;" // ARABIC PERCENT SIGN
|
||||
|
||||
"۰ <> 0 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
"۱ <> 1 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
"۲ <> 2 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
"۳ <> 3 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
"۴ <> 4 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
"۵ <> 5 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
"۶ <> 6 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
"۷ <> 7 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
"۸ <> 8 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
"۹ <> 9 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
|
||||
"٠ <> 0 ;" // ARABIC-INDIC DIGIT ZERO
|
||||
"١ <> 1 ;" // ARABIC-INDIC DIGIT ONE
|
||||
"٢ <> 2 ;" // ARABIC-INDIC DIGIT TWO
|
||||
"٣ <> 3 ;" // ARABIC-INDIC DIGIT THREE
|
||||
"٤ <> 4 ;" // ARABIC-INDIC DIGIT FOUR
|
||||
"٥ <> 5 ;" // ARABIC-INDIC DIGIT FIVE
|
||||
"٦ <> 6 ;" // ARABIC-INDIC DIGIT SIX
|
||||
"٧ <> 7 ;" // ARABIC-INDIC DIGIT SEVEN
|
||||
"٨ <> 8 ;" // ARABIC-INDIC DIGIT EIGHT
|
||||
"٩ <> 9 ;" // ARABIC-INDIC DIGIT NINE
|
||||
|
||||
// letters
|
||||
|
||||
// long vowels
|
||||
"َا<> ā ;" // ARABIC FATHA, ARABIC LETTER ALEF
|
||||
"ُو <> ū ;" // ARABIC DAMMA, ARABIC LETTER WAW
|
||||
"ِي <> ī ;" // ARABIC KASRA, ARABIC LETTER YEH
|
||||
|
||||
// longer items moved here to prevent masking
|
||||
"ث <> t h $disambig ;" // ARABIC LETTER THEH
|
||||
"ذ <> d h $disambig ;" // ARABIC LETTER THAL
|
||||
"ش <> s h $disambig ;" // ARABIC LETTER SHEEN
|
||||
"ص <> s $under ;" // ARABIC LETTER SAD
|
||||
"ض <> d $under ;" // ARABIC LETTER DAD
|
||||
"ط <> t $under ;" // ARABIC LETTER TAH
|
||||
"ظ <> z $under ;" // ARABIC LETTER ZAH
|
||||
"غ <> g h $disambig ;" // ARABIC LETTER GHAIN
|
||||
|
||||
// WARNING: special case
|
||||
// <t, umlaut, half-ring below> will be canonically ordered as <t, half-ring below, umlaut>
|
||||
// so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
|
||||
// ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
|
||||
|
||||
"ة <> t \u0308 ;" // ARABIC LETTER TEH MARBUTA
|
||||
"ة | $1 < t ($notAbove+) \u0308 ;" // ARABIC LETTER TEH MARBUTA
|
||||
|
||||
// non-Arabic language
|
||||
"ژ <> z h $disambig ;" // ARABIC LETTER JEH
|
||||
"ڭ <> n $disambig g ;" // ARABIC LETTER NG
|
||||
"ۋ <> v $disambig ;" // ARABIC LETTER VE
|
||||
"ی <> y $disambig2 ;" // ARABIC LETTER FARSI YEH
|
||||
|
||||
// Arabic language
|
||||
|
||||
"ء <> ʾ ;" // ARABIC LETTER HAMZA
|
||||
"ا <> a $under;" // ARABIC LETTER ALEF
|
||||
"ب <> b ;" // ARABIC LETTER BEH
|
||||
"ت <> t ;" // ARABIC LETTER TEH
|
||||
"ج <> j ;" // ARABIC LETTER JEEM
|
||||
"ح <> h $under ;" // ARABIC LETTER HAH
|
||||
"خ <> k h $disambig ;" // ARABIC LETTER KHAH
|
||||
"د <> d ;" // ARABIC LETTER DAL
|
||||
"ر <> r ;" // ARABIC LETTER REH
|
||||
"ز <> z ;" // ARABIC LETTER ZAIN
|
||||
"س <> s ;" // ARABIC LETTER SEEN
|
||||
"ع <> ʿ ;" // ARABIC LETTER AIN
|
||||
"ـ > ;" // ARABIC TATWEEL
|
||||
"ف <> f ;" // ARABIC LETTER FEH
|
||||
"ق <> q ;" // ARABIC LETTER QAF
|
||||
"ك <> k ;" // ARABIC LETTER KAF
|
||||
"ل <> l ;" // ARABIC LETTER LAM
|
||||
"م <> m ;" // ARABIC LETTER MEEM
|
||||
"ن <> n ;" // ARABIC LETTER NOON
|
||||
"ه <> h ;" // ARABIC LETTER HEH
|
||||
"و <> w ;" // ARABIC LETTER WAW
|
||||
"ى <> y $disambig ;" // ARABIC LETTER ALEF MAKSURA
|
||||
"ي <> y ;" // ARABIC LETTER YEH
|
||||
"ً <> aⁿ ;" // ARABIC FATHATAN
|
||||
"ٌ <> uⁿ ;" // ARABIC DAMMATAN
|
||||
"ٍ <> iⁿ ;" // ARABIC KASRATAN
|
||||
"َ <> a ;" // ARABIC FATHA
|
||||
"ُ <> u ;" // ARABIC DAMMA
|
||||
"ِ <> i ;" // ARABIC KASRA
|
||||
"ّ <> ̃ ;" // ARABIC SHADDA
|
||||
"ْ <> ̊ ;" // ARABIC SUKUN
|
||||
|
||||
// special combining marks
|
||||
"ٓ <> ̂ ;" // ARABIC MADDAH ABOVE
|
||||
"ٔ <> ̉ ;" // ARABIC HAMZA ABOVE
|
||||
"ٕ <> ̹ ;" // ARABIC HAMZA BELOW
|
||||
|
||||
// Some non-Arabic language (not in UNGEGN)
|
||||
"پ <> p ;" // ARABIC LETTER PEH
|
||||
"چ <> c h $disambig ;" // ARABIC LETTER TCHEH
|
||||
"ڤ <> v ;" // ARABIC LETTER VEH
|
||||
// ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
|
||||
// ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
|
||||
"گ <> g ;" // ARABIC LETTER GAF
|
||||
|
||||
// fallbacks
|
||||
"| s < c } [eiy];"
|
||||
"| k < c ;"
|
||||
"| i < e ;"
|
||||
"| u < o ;"
|
||||
"| ks < x ;"
|
||||
"| n < ⁿ;"
|
||||
|
||||
":: (lower) ;"
|
||||
"::NFC (NFD);"
|
||||
":: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );"
|
||||
}
|
||||
}
|
|
@ -1,119 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Bengali_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Bengali_InterIndic
|
||||
|
||||
t_Beng_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Bengali-InterIndic
|
||||
|
||||
"\u09C7\u09BE>\uE04B;" // VOWEL SIGN O
|
||||
"\u09C7\u09D7>\uE04C;" // VOWEL SIGN AU
|
||||
"\u0981>\uE001;" // SIGN CANDRABINDU
|
||||
"\u0982>\uE002;" // SIGN ANUSVARA
|
||||
"\u0983>\uE003;" // SIGN VISARGA
|
||||
"\u0985>\uE005;" // LETTER A
|
||||
"\u0986>\uE006;" // LETTER AA
|
||||
"\u0987>\uE007;" // LETTER I
|
||||
"\u0988>\uE008;" // LETTER II
|
||||
"\u0989>\uE009;" // LETTER U
|
||||
"\u098A>\uE00A;" // LETTER UU
|
||||
"\u098B>\uE00B;" // LETTER VOCALIC R
|
||||
"\u098C>\uE00C;" // LETTER VOCALIC L
|
||||
"\u098F>\uE00F;" // LETTER E
|
||||
"\u0990>\uE010;" // LETTER AI
|
||||
"\u0993>\uE013;" // LETTER O
|
||||
"\u0994>\uE014;" // LETTER AU
|
||||
"\u0995>\uE015;" // LETTER KA
|
||||
"\u0996>\uE016;" // LETTER KHA
|
||||
"\u0997>\uE017;" // LETTER GA
|
||||
"\u0998>\uE018;" // LETTER GHA
|
||||
"\u0999>\uE019;" // LETTER NGA
|
||||
"\u099A>\uE01A;" // LETTER CA
|
||||
"\u099B>\uE01B;" // LETTER CHA
|
||||
"\u099C>\uE01C;" // LETTER JA
|
||||
"\u099D>\uE01D;" // LETTER JHA
|
||||
"\u099E>\uE01E;" // LETTER NYA
|
||||
"\u099F>\uE01F;" // LETTER TTA
|
||||
"\u09A0>\uE020;" // LETTER TTHA
|
||||
"\u09A1>\uE021;" // LETTER DDA
|
||||
"\u09A2>\uE022;" // LETTER DDHA
|
||||
"\u09A3>\uE023;" // LETTER NNA
|
||||
"\u09A4>\uE024;" // LETTER TA
|
||||
"\u09A5>\uE025;" // LETTER THA
|
||||
"\u09A6>\uE026;" // LETTER DA
|
||||
"\u09A7>\uE027;" // LETTER DHA
|
||||
"\u09A8>\uE028;" // LETTER NA
|
||||
"\u09AA>\uE02A;" // LETTER PA
|
||||
"\u09AB>\uE02B;" // LETTER PHA
|
||||
"\u09AC>\uE02C;" // LETTER BA
|
||||
"\u09AD>\uE02D;" // LETTER BHA
|
||||
"\u09AE>\uE02E;" // LETTER MA
|
||||
"\u09AF>\uE02F;" // LETTER YA
|
||||
"\u09B0>\uE030;" // LETTER RA
|
||||
"\u09B2>\uE032;" // LETTER LA
|
||||
"\u09B6>\uE036;" // LETTER SHA
|
||||
"\u09B7>\uE037;" // LETTER SSA
|
||||
"\u09B8>\uE038;" // LETTER SA
|
||||
"\u09B9>\uE039;" // LETTER HA
|
||||
"\u09BC>\uE03C;" // SIGN NUKTA
|
||||
"\u09BD>\uE03D;" // SIGN AVAGRAHA
|
||||
"\u09BE>\uE03E;" // VOWEL SIGN AA
|
||||
"\u09BF>\uE03F;" // VOWEL SIGN I
|
||||
"\u09C0>\uE040;" // VOWEL SIGN II
|
||||
"\u09C1>\uE041;" // VOWEL SIGN U
|
||||
"\u09C2>\uE042;" // VOWEL SIGN UU
|
||||
"\u09C3>\uE043;" // VOWEL SIGN VOCALIC R
|
||||
"\u09C4>\uE044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u09C7>\uE047;" // VOWEL SIGN E
|
||||
"\u09C8>\uE048;" // VOWEL SIGN AI
|
||||
"\u09CB>\uE04B;"
|
||||
"\u09CC>\uE04C;"
|
||||
//
|
||||
"\u09CD>\uE04D;" // SIGN VIRAMA
|
||||
"\u09D7>\uE057;" // AU LENGTH MARK
|
||||
//
|
||||
"\u09E0>\uE060;" // LETTER VOCALIC RR
|
||||
"\u09E1>\uE061;" // LETTER VOCALIC LL
|
||||
"\u09E2>\uE062;" // VOWEL SIGN VOCALIC L
|
||||
"\u09E3>\uE063;" // VOWEL SIGN VOCALIC LL
|
||||
"\u09E6>\uE066;" // DIGIT ZERO
|
||||
"\u09E7>\uE067;" // DIGIT ONE
|
||||
"\u09E8>\uE068;" // DIGIT TWO
|
||||
"\u09E9>\uE069;" // DIGIT THREE
|
||||
"\u09EA>\uE06A;" // DIGIT FOUR
|
||||
"\u09EB>\uE06B;" // DIGIT FIVE
|
||||
"\u09EC>\uE06C;" // DIGIT SIX
|
||||
"\u09ED>\uE06D;" // DIGIT SEVEN
|
||||
"\u09EE>\uE06E;" // DIGIT EIGHT
|
||||
"\u09EF>\uE06F;" // DIGIT NINE
|
||||
"\u09F0>\ue071;" // Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\u09F1>\ue072;" // Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
|
||||
"\u09F2>\ue073;" // Bengali-InterIndic: RUPEE MARK
|
||||
"\u09F3>\ue074;" // Bengali-InterIndic: RUPEE SIGN
|
||||
"\u09F4>\ue075;" // Bengali-InterIndic: CURRENCY NUMERATOR ONE
|
||||
"\u09F5>\ue076;" // Bengali-InterIndic: CURRENCY NUMERATOR TWO
|
||||
"\u09F6>\ue077;" // Bengali-InterIndic: CURRENCY NUMERATOR THREE
|
||||
"\u09F7>\ue078;" // Bengali-InterIndic: CURRENCY NUMERATOR FOUR
|
||||
"\u09F8>\ue079;" // Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\u09F9>\ue07A;" // Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
|
||||
"\u09FA>\ue07B;" // ISSHAR
|
||||
|
||||
"\u0964>\ue064;" // DANDA
|
||||
"\u0965>\ue065;" // DOUBLE DANDA
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,322 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Cyrillic_Latin.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Cyrillic_Latin
|
||||
|
||||
t_Cyrl_Latn {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
// TODO: add remaining characters
|
||||
// Should add variants for Russian-English, Russian-German
|
||||
// Those can use this as a base, and then remap cases
|
||||
// like a $hat to ya or ja.
|
||||
|
||||
// :: [\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
|
||||
//## WARNING, \u0308 must be added to the generated filters, in both directions ###
|
||||
// MINIMAL FILTER
|
||||
":: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;"
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
"$modprime = \u02B9;"
|
||||
"$modprime2 = \u02BA;"
|
||||
|
||||
"$grave = \u0300;"
|
||||
"$acute = \u0301;"
|
||||
"$hat = \u0302;"
|
||||
"$breve = \u0306 ;"
|
||||
"$dot = \u0307 ;"
|
||||
"$caron = \u030C ;"
|
||||
"$comma = \u0326 ;"
|
||||
"$under = \u0331 ;"
|
||||
|
||||
// move up so not masked
|
||||
|
||||
"я <> a $hat ;" // CYRILLIC SMALL LETTER YA
|
||||
"Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA
|
||||
|
||||
"ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE
|
||||
"Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE
|
||||
// ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
||||
// Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
// ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
||||
// Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
// ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
||||
// Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
|
||||
"э <> e $acute;" // CYRILLIC SMALL LETTER E
|
||||
"Э <> E $acute;" // CYRILLIC CAPITAL LETTER E
|
||||
"є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
"Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
|
||||
"ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA
|
||||
"Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA
|
||||
"щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA
|
||||
"Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA
|
||||
|
||||
"ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE
|
||||
"Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE
|
||||
// ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
||||
// Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
|
||||
"ю <> u $hat ;" // CYRILLIC SMALL LETTER YU
|
||||
"Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU
|
||||
|
||||
"і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
"І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
"ј <> j $caron;" // CYRILLIC SMALL LETTER JE
|
||||
"Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE
|
||||
|
||||
"љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE
|
||||
"Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE
|
||||
"њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE
|
||||
"Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE
|
||||
|
||||
"ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE
|
||||
"Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE
|
||||
|
||||
"џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE
|
||||
"Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE
|
||||
|
||||
// Normal order
|
||||
|
||||
"а <> a ;" // CYRILLIC SMALL LETTER A
|
||||
"А <> A ;" // CYRILLIC CAPITAL LETTER A
|
||||
"ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA
|
||||
"Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA
|
||||
"ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE
|
||||
"Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE
|
||||
"б <> b ;" // CYRILLIC SMALL LETTER BE
|
||||
"Б <> B ;" // CYRILLIC CAPITAL LETTER BE
|
||||
"в <> v ;" // CYRILLIC SMALL LETTER VE
|
||||
"В <> V ;" // CYRILLIC CAPITAL LETTER VE
|
||||
|
||||
"ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
"Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
"ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE
|
||||
"Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
"ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
|
||||
"Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
"г <> g ;" // CYRILLIC SMALL LETTER GHE
|
||||
"Г <> G ;" // CYRILLIC CAPITAL LETTER GHE
|
||||
|
||||
"д <> d;" // CYRILLIC SMALL LETTER DE
|
||||
"Д <> D;" // CYRILLIC CAPITAL LETTER DE
|
||||
"ђ <> đ ;" // CYRILLIC SMALL LETTER DJE
|
||||
"Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE
|
||||
"ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER
|
||||
"Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
"е <> e ;" // CYRILLIC SMALL LETTER IE
|
||||
"Е <> E;" // CYRILLIC CAPITAL LETTER IE
|
||||
|
||||
"ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE
|
||||
"Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE
|
||||
|
||||
// җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
||||
// Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
|
||||
"з <> z ;" // CYRILLIC SMALL LETTER ZE
|
||||
"З <> Z;" // CYRILLIC CAPITAL LETTER ZE
|
||||
|
||||
"й <> j ;" // CYRILLIC SMALL LETTER I
|
||||
"Й <> J ;" // CYRILLIC CAPITAL LETTER I
|
||||
"и <> i ;" // CYRILLIC SMALL LETTER I
|
||||
"И <> I ;" // CYRILLIC CAPITAL LETTER I
|
||||
|
||||
"к <> k ;" // CYRILLIC SMALL LETTER KA
|
||||
"К <> K;" // CYRILLIC CAPITAL LETTER KA
|
||||
|
||||
// қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
||||
// Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
// ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
||||
// Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
// ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
|
||||
// Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
// ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
||||
// Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
// ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
||||
// Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
"л <> l ;" // CYRILLIC SMALL LETTER EL
|
||||
"Л <> L;" // CYRILLIC CAPITAL LETTER EL
|
||||
|
||||
"м <> m ;" // CYRILLIC SMALL LETTER EM
|
||||
"М <> M ;" // CYRILLIC CAPITAL LETTER EM
|
||||
"н <> n ;" // CYRILLIC SMALL LETTER EN
|
||||
"Н <> N;" // CYRILLIC CAPITAL LETTER EN
|
||||
// ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
||||
// Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
// ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
|
||||
// Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
// ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
|
||||
// Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
|
||||
"о <> o ;" // CYRILLIC SMALL LETTER O
|
||||
"О <> O ;" // CYRILLIC CAPITAL LETTER O
|
||||
// ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
// Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
"п <> p ;" // CYRILLIC SMALL LETTER PE
|
||||
"П <> P ;" // CYRILLIC CAPITAL LETTER PE
|
||||
// ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
||||
// Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
// ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
|
||||
// Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
"р <> r ;" // CYRILLIC SMALL LETTER ER
|
||||
"Р <> R ;" // CYRILLIC CAPITAL LETTER ER
|
||||
// ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
|
||||
// Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
"с <> s ;" // CYRILLIC SMALL LETTER ES
|
||||
"С <> S ;" // CYRILLIC CAPITAL LETTER ES
|
||||
// ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
||||
// Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
"т <> t ;" // CYRILLIC SMALL LETTER TE
|
||||
"Т <> T ;" // CYRILLIC CAPITAL LETTER TE
|
||||
// ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
||||
// Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
|
||||
"у <> u ;" // CYRILLIC SMALL LETTER U
|
||||
"У <> U ;" // CYRILLIC CAPITAL LETTER U
|
||||
// ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
|
||||
// Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
// ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
||||
// Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
// ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
|
||||
// Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
|
||||
"ф <> f ;" // CYRILLIC SMALL LETTER EF
|
||||
"Ф <> F ;" // CYRILLIC CAPITAL LETTER EF
|
||||
"х <> h ;" // CYRILLIC SMALL LETTER HA
|
||||
"Х <> H;" // CYRILLIC CAPITAL LETTER HA
|
||||
// ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
||||
// Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
// һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
||||
// Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
||||
// ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
||||
// Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
// ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
||||
// Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
||||
// ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
||||
// Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
// ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
||||
// Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
"ц <> c ;" // CYRILLIC SMALL LETTER TSE
|
||||
"Ц <> C;" // CYRILLIC CAPITAL LETTER TSE
|
||||
// ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
|
||||
// Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
|
||||
// ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
|
||||
// Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
// ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
// Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
|
||||
|
||||
"Ъ <> $modprime2 $under ;" // CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
"ъ <> $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN
|
||||
"Ь <> $modprime $under ;" // CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
"ь <> $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN
|
||||
|
||||
"ы <> y ;" // CYRILLIC SMALL LETTER YERU
|
||||
"Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU
|
||||
|
||||
// ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
|
||||
// Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
// ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
|
||||
// Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
|
||||
|
||||
// ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
|
||||
// Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
// ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
|
||||
// Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
// ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
|
||||
// Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
|
||||
// ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
|
||||
// Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
// ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
|
||||
// Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
// ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
|
||||
// Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
|
||||
// ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
|
||||
// Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
|
||||
// ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
|
||||
// Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
|
||||
// ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
// Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
// ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
|
||||
// Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
// Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
|
||||
//## ӑ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
//## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
//## ӓ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
//## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
//## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
|
||||
//## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
//## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
|
||||
//## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
|
||||
//## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ё <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
//## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
//## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
//## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
//## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
//## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
//## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
|
||||
//## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
|
||||
//## ѝ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ӣ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ӥ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
//## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
//## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
//## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
//## ӧ <> XXX ; # CYRILLIC SMALL LETTER O
|
||||
//## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
|
||||
//## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
//## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
//## ќ <> XXX ; # CYRILLIC SMALL LETTER KA
|
||||
//## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
|
||||
//## ӯ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ў <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӱ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӳ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
//## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
//## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
|
||||
//## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
|
||||
//## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
|
||||
//## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
|
||||
//## ӭ <> XXX ; # CYRILLIC SMALL LETTER E
|
||||
//## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
|
||||
//## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
//## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
|
||||
// Completeness
|
||||
"$ignore = [[:Mark:]''] * ;"
|
||||
"| k < q ;"
|
||||
"| K < Q ;"
|
||||
"| u < w ;"
|
||||
"| U < W ;"
|
||||
"| KS < X } $ignore [:UppercaseLetter:] ;"
|
||||
"| KS < [:UppercaseLetter:] $ignore { X ;"
|
||||
"| Ks < X ;"
|
||||
"| ks < x ;"
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
// note: a global filter is more efficient, but MUST include all source chars!!
|
||||
// :: ([\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]);
|
||||
// MINIMAL FILTER: Latin-Cyrillic
|
||||
":: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;"
|
||||
}
|
||||
}
|
|
@ -1,133 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Devanagari_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Devanagari_InterIndic
|
||||
|
||||
t_Deva_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Devanagari-InterIndic
|
||||
// :: NFD;
|
||||
//Rules for Decomposed characters
|
||||
|
||||
|
||||
"\u0901>\uE001;" // SIGN CANDRABINDU
|
||||
"\u0902>\uE002;" // SIGN ANUSVARA
|
||||
"\u0903>\uE003;" // SIGN VISARGA
|
||||
"\u0904>\uE004;" // SIGN SHORT A
|
||||
"\u0905>\uE005;" // LETTER A
|
||||
"\u0906>\uE006;" // LETTER AA
|
||||
"\u0907>\uE007;" // LETTER I
|
||||
"\u0908>\uE008;" // LETTER II
|
||||
"\u0909>\uE009;" // LETTER U
|
||||
"\u090A>\uE00A;" // LETTER UU
|
||||
"\u090B>\uE00B;" // LETTER VOCALIC R
|
||||
"\u090C>\uE00C;" // LETTER VOCALIC L
|
||||
"\u090D>\uE00D;" // LETTER CANDRA E (For representing English sounds)
|
||||
"\u090E>\uE00E;" // UNMAPPED LETTER SHORT E(For Southern Scripts)
|
||||
"\u090F>\uE00F;" // LETTER E
|
||||
"\u0910>\uE010;" // LETTER AI
|
||||
"\u0911>\uE011;" // LETTER CANDRA O (For representing English sounds)
|
||||
"\u0912>\uE012;" // UNMAPPED LETTER SHORT O (For Southern Scripts)
|
||||
"\u0913>\uE013;" // LETTER O
|
||||
"\u0914>\uE014;" // LETTER AU
|
||||
"\u0915>\uE015;" // LETTER KA
|
||||
"\u0916>\uE016;" // LETTER KHA
|
||||
"\u0917>\uE017;" // LETTER GA
|
||||
"\u0918>\uE018;" // LETTER GHA
|
||||
"\u0919>\uE019;" // LETTER NGA
|
||||
"\u091A>\uE01A;" // LETTER CA
|
||||
"\u091B>\uE01B;" // LETTER CHA
|
||||
"\u091C>\uE01C;" // LETTER JA
|
||||
"\u091D>\uE01D;" // LETTER JHA
|
||||
"\u091E>\uE01E;" // LETTER NYA
|
||||
"\u091F>\uE01F;" // LETTER TTA
|
||||
"\u0920>\uE020;" // LETTER TTHA
|
||||
"\u0921>\uE021;" // LETTER DDA
|
||||
"\u0922>\uE022;" // LETTER DDHA
|
||||
"\u0923>\uE023;" // LETTER NNA
|
||||
"\u0924>\uE024;" // LETTER TA
|
||||
"\u0925>\uE025;" // LETTER THA
|
||||
"\u0926>\uE026;" // LETTER DA
|
||||
"\u0927>\uE027;" // LETTER DHA
|
||||
"\u0928>\uE028;" // LETTER NA
|
||||
"\u0929>\uE029;"
|
||||
"\u092A>\uE02A;" // LETTER PA
|
||||
"\u092B>\uE02B;" // LETTER PHA
|
||||
"\u092C>\uE02C;" // LETTER BA
|
||||
"\u092D>\uE02D;" // LETTER BHA
|
||||
"\u092E>\uE02E;" // LETTER MA
|
||||
"\u092F>\uE02F;" // LETTER YA
|
||||
"\u0930>\uE030;" // LETTER RA
|
||||
"\u0931>\uE031;"
|
||||
"\u0932>\uE032;" // LETTER LA
|
||||
"\u0933>\uE033;" // LETTER LLA
|
||||
"\u0934>\uE034;"
|
||||
|
||||
"\u0935>\uE035;" // LETTER VA
|
||||
"\u0936>\uE036;" // LETTER SHA
|
||||
"\u0937>\uE037;" // LETTER SSA
|
||||
"\u0938>\uE038;" // LETTER SA
|
||||
"\u0939>\uE039;" // LETTER HA
|
||||
"\u093C>\uE03C;" // SIGN NUKTA
|
||||
"\u093D>\uE03D;" // SIGN AVAGRAHA
|
||||
"\u093E>\uE03E;" // VOWEL SIGN AA
|
||||
"\u093F>\uE03F;" // VOWEL SIGN I
|
||||
"\u0940>\uE040;" // VOWEL SIGN II
|
||||
"\u0941>\uE041;" // VOWEL SIGN U
|
||||
"\u0942>\uE042;" // VOWEL SIGN UU
|
||||
"\u0943>\uE043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0944>\uE044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0945>\uE045;" // VOWEL SIGN CANDRA E
|
||||
"\u0946>\uE046;" // UNMAPPED VOWEL SIGN SHORT E
|
||||
"\u0947>\uE047;" // VOWEL SIGN E
|
||||
"\u0948>\uE048;" // VOWEL SIGN AI
|
||||
"\u0949>\uE049;" // VOWEL SIGN CANDRA O
|
||||
"\u094A>\uE04A;" // UNMAPPED VOWEL SIGN SHORT O
|
||||
"\u094B>\uE04B;" // VOWEL SIGN O
|
||||
"\u094C>\uE04C;" // VOWEL SIGN AU
|
||||
"\u094D>\uE04D;" // SIGN VIRAMA
|
||||
"\u0950>\uE050;" // OM
|
||||
"\u0951>\uE051;" // UNMAPPED STRESS SIGN UDATTA
|
||||
"\u0952>\uE052;" // UNMAPPED STRESS SIGN ANUDATTA
|
||||
"\u0953>\uE053;" // UNMAPPED GRAVE ACCENT
|
||||
"\u0954>\uE054;" // UNMAPPED ACUTE ACCENT
|
||||
"\u0958>\uE058;"
|
||||
"\u0959>\uE059;"
|
||||
"\u095A>\uE05a;"
|
||||
"\u095B>\uE05b;"
|
||||
"\u095C>\uE05c;"
|
||||
"\u095D>\uE05d;"
|
||||
"\u095E>\uE05e;"
|
||||
"\u095F>\uE05f;"
|
||||
"\u0960>\uE060;" // LETTER VOCALIC RR
|
||||
"\u0961>\uE061;" // LETTER VOCALIC LL
|
||||
"\u0962>\uE062;" // VOWEL SIGN VOCALIC L
|
||||
"\u0963>\uE063;" // VOWEL SIGN VOCALIC LL
|
||||
"\u0964>\ue064;" // DANDA
|
||||
"\u0965>\ue065;" // DOUBLE DANDA
|
||||
"\u0966>\uE066;" // DIGIT ZERO
|
||||
"\u0967>\uE067;" // DIGIT ONE
|
||||
"\u0968>\uE068;" // DIGIT TWO
|
||||
"\u0969>\uE069;" // DIGIT THREE
|
||||
"\u096A>\uE06A;" // DIGIT FOUR
|
||||
"\u096B>\uE06B;" // DIGIT FIVE
|
||||
"\u096C>\uE06C;" // DIGIT SIX
|
||||
"\u096D>\uE06D;" // DIGIT SEVEN
|
||||
"\u096E>\uE06E;" // DIGIT EIGHT
|
||||
"\u096F>\uE06F;" // DIGIT NINE
|
||||
"\u0970>\uE070;" // Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
// :: NFC (NFD) ;
|
||||
}
|
||||
}
|
|
@ -1,287 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Fullwidth_Halfwidth.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth_Halfwidth
|
||||
|
||||
t_FWidth_HWidth {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth-Halfwidth
|
||||
|
||||
// Mechanically generated from Unicode Character Database
|
||||
// IDEOGRAPHIC SPACE then added, and
|
||||
// FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
|
||||
|
||||
// multicharacter
|
||||
|
||||
"ガ<>ガ;" // to KATAKANA LETTER GA
|
||||
"ギ<>ギ;" // to KATAKANA LETTER GI
|
||||
"グ<>グ;" // to KATAKANA LETTER GU
|
||||
"ゲ<>ゲ;" // to KATAKANA LETTER GE
|
||||
"ゴ<>ゴ;" // to KATAKANA LETTER GO
|
||||
"ザ<>ザ;" // to KATAKANA LETTER ZA
|
||||
"ジ<>ジ;" // to KATAKANA LETTER ZI
|
||||
"ズ<>ズ;" // to KATAKANA LETTER ZU
|
||||
"ゼ<>ゼ;" // to KATAKANA LETTER ZE
|
||||
"ゾ<>ゾ;" // to KATAKANA LETTER ZO
|
||||
"ダ<>ダ;" // to KATAKANA LETTER DA
|
||||
"ヂ<>ヂ;" // to KATAKANA LETTER DI
|
||||
"ヅ<>ヅ;" // to KATAKANA LETTER DU
|
||||
"デ<>デ;" // to KATAKANA LETTER DE
|
||||
"ド<>ド;" // to KATAKANA LETTER DO
|
||||
"バ<>バ;" // to KATAKANA LETTER BA
|
||||
"パ<>パ;" // to KATAKANA LETTER PA
|
||||
"ビ<>ビ;" // to KATAKANA LETTER BI
|
||||
"ピ<>ピ;" // to KATAKANA LETTER PI
|
||||
"ブ<>ブ;" // to KATAKANA LETTER BU
|
||||
"プ<>プ;" // to KATAKANA LETTER PU
|
||||
"ベ<>ベ;" // to KATAKANA LETTER BE
|
||||
"ペ<>ペ;" // to KATAKANA LETTER PE
|
||||
"ボ<>ボ;" // to KATAKANA LETTER BO
|
||||
"ポ<>ポ;" // to KATAKANA LETTER PO
|
||||
"ヴ<>ヴ;" // to KATAKANA LETTER VU
|
||||
"ヷ<>ヷ;" // to KATAKANA LETTER VA
|
||||
"ヺ<>ヺ;" // to KATAKANA LETTER VO
|
||||
|
||||
// single character
|
||||
|
||||
"!<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
""<>'\\\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"#<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"$<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"%<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
"&<>'&';" // from FULLWIDTH AMPERSAND
|
||||
"'<>'';" // from FULLWIDTH APOSTROPHE
|
||||
"(<>'(';" // from FULLWIDTH LEFT PARENTHESIS
|
||||
")<>')';" // from FULLWIDTH RIGHT PARENTHESIS
|
||||
"*<>'*';" // from FULLWIDTH ASTERISK
|
||||
"+<>'+';" // from FULLWIDTH PLUS SIGN
|
||||
",<>',';" // from FULLWIDTH COMMA
|
||||
"-<>'-';" // from FULLWIDTH HYPHEN-MINUS
|
||||
".<>'.';" // from FULLWIDTH FULL STOP
|
||||
"/<>'/';" // from FULLWIDTH SOLIDUS
|
||||
"0<>'0';" // from FULLWIDTH DIGIT ZERO
|
||||
"1<>'1';" // from FULLWIDTH DIGIT ONE
|
||||
"2<>'2';" // from FULLWIDTH DIGIT TWO
|
||||
"3<>'3';" // from FULLWIDTH DIGIT THREE
|
||||
"4<>'4';" // from FULLWIDTH DIGIT FOUR
|
||||
"5<>'5';" // from FULLWIDTH DIGIT FIVE
|
||||
"6<>'6';" // from FULLWIDTH DIGIT SIX
|
||||
"7<>'7';" // from FULLWIDTH DIGIT SEVEN
|
||||
"8<>'8';" // from FULLWIDTH DIGIT EIGHT
|
||||
"9<>'9';" // from FULLWIDTH DIGIT NINE
|
||||
":<>':';" // from FULLWIDTH COLON
|
||||
";<>';';" // from FULLWIDTH SEMICOLON
|
||||
"<<>'<';" // from FULLWIDTH LESS-THAN SIGN
|
||||
"=<>'=';" // from FULLWIDTH EQUALS SIGN
|
||||
"><>'>';" // from FULLWIDTH GREATER-THAN SIGN
|
||||
"?<>'?';" // from FULLWIDTH QUESTION MARK
|
||||
"@<>'@';" // from FULLWIDTH COMMERCIAL AT
|
||||
"A<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
"B<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
"C<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
"D<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
"E<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
"F<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
"G<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
"H<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
"I<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
"J<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
"K<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
"L<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
"M<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
"N<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
"O<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
"P<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
"Q<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
"R<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
"S<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
"T<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
"U<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
"V<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
"W<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
"X<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
"Y<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
"Z<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
"[<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
|
||||
"\<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
"]<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
"^<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
"_<>'_';" // from FULLWIDTH LOW LINE
|
||||
"`<>'`';" // from FULLWIDTH GRAVE ACCENT
|
||||
"a<>a;" // from FULLWIDTH LATIN SMALL LETTER A
|
||||
"b<>b;" // from FULLWIDTH LATIN SMALL LETTER B
|
||||
"c<>c;" // from FULLWIDTH LATIN SMALL LETTER C
|
||||
"d<>d;" // from FULLWIDTH LATIN SMALL LETTER D
|
||||
"e<>e;" // from FULLWIDTH LATIN SMALL LETTER E
|
||||
"f<>f;" // from FULLWIDTH LATIN SMALL LETTER F
|
||||
"g<>g;" // from FULLWIDTH LATIN SMALL LETTER G
|
||||
"h<>h;" // from FULLWIDTH LATIN SMALL LETTER H
|
||||
"i<>i;" // from FULLWIDTH LATIN SMALL LETTER I
|
||||
"j<>j;" // from FULLWIDTH LATIN SMALL LETTER J
|
||||
"k<>k;" // from FULLWIDTH LATIN SMALL LETTER K
|
||||
"l<>l;" // from FULLWIDTH LATIN SMALL LETTER L
|
||||
"m<>m;" // from FULLWIDTH LATIN SMALL LETTER M
|
||||
"n<>n;" // from FULLWIDTH LATIN SMALL LETTER N
|
||||
"o<>o;" // from FULLWIDTH LATIN SMALL LETTER O
|
||||
"p<>p;" // from FULLWIDTH LATIN SMALL LETTER P
|
||||
"q<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
|
||||
"r<>r;" // from FULLWIDTH LATIN SMALL LETTER R
|
||||
"s<>s;" // from FULLWIDTH LATIN SMALL LETTER S
|
||||
"t<>t;" // from FULLWIDTH LATIN SMALL LETTER T
|
||||
"u<>u;" // from FULLWIDTH LATIN SMALL LETTER U
|
||||
"v<>v;" // from FULLWIDTH LATIN SMALL LETTER V
|
||||
"w<>w;" // from FULLWIDTH LATIN SMALL LETTER W
|
||||
"x<>x;" // from FULLWIDTH LATIN SMALL LETTER X
|
||||
"y<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
|
||||
"z<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
|
||||
"{<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
|
||||
"|<>'|';" // from FULLWIDTH VERTICAL LINE
|
||||
"}<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
|
||||
"~<>'~';" // from FULLWIDTH TILDE
|
||||
"。<>。;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
"「<>「;" // to HALFWIDTH LEFT CORNER BRACKET
|
||||
"」<>」;" // to HALFWIDTH RIGHT CORNER BRACKET
|
||||
"、<>、;" // to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
"・<>・;" // to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
"ヲ<>ヲ;" // to HALFWIDTH KATAKANA LETTER WO
|
||||
"ァ<>ァ;" // to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
"ィ<>ィ;" // to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
"ゥ<>ゥ;" // to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
"ェ<>ェ;" // to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
"ォ<>ォ;" // to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
"ャ<>ャ;" // to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
"ュ<>ュ;" // to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
"ョ<>ョ;" // to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
"ッ<>ッ;" // to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
"ー<>ー;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
"ア<>ア;" // to HALFWIDTH KATAKANA LETTER A
|
||||
"イ<>イ;" // to HALFWIDTH KATAKANA LETTER I
|
||||
"ウ<>ウ;" // to HALFWIDTH KATAKANA LETTER U
|
||||
"エ<>エ;" // to HALFWIDTH KATAKANA LETTER E
|
||||
"オ<>オ;" // to HALFWIDTH KATAKANA LETTER O
|
||||
"カ<>カ;" // to HALFWIDTH KATAKANA LETTER KA
|
||||
"キ<>キ;" // to HALFWIDTH KATAKANA LETTER KI
|
||||
"ク<>ク;" // to HALFWIDTH KATAKANA LETTER KU
|
||||
"ケ<>ケ;" // to HALFWIDTH KATAKANA LETTER KE
|
||||
"コ<>コ;" // to HALFWIDTH KATAKANA LETTER KO
|
||||
"サ<>サ;" // to HALFWIDTH KATAKANA LETTER SA
|
||||
"シ<>シ;" // to HALFWIDTH KATAKANA LETTER SI
|
||||
"ス<>ス;" // to HALFWIDTH KATAKANA LETTER SU
|
||||
"セ<>セ;" // to HALFWIDTH KATAKANA LETTER SE
|
||||
"ソ<>ソ;" // to HALFWIDTH KATAKANA LETTER SO
|
||||
"タ<>タ;" // to HALFWIDTH KATAKANA LETTER TA
|
||||
"チ<>チ;" // to HALFWIDTH KATAKANA LETTER TI
|
||||
"ツ<>ツ;" // to HALFWIDTH KATAKANA LETTER TU
|
||||
"テ<>テ;" // to HALFWIDTH KATAKANA LETTER TE
|
||||
"ト<>ト;" // to HALFWIDTH KATAKANA LETTER TO
|
||||
"ナ<>ナ;" // to HALFWIDTH KATAKANA LETTER NA
|
||||
"ニ<>ニ;" // to HALFWIDTH KATAKANA LETTER NI
|
||||
"ヌ<>ヌ;" // to HALFWIDTH KATAKANA LETTER NU
|
||||
"ネ<>ネ;" // to HALFWIDTH KATAKANA LETTER NE
|
||||
"ノ<>ノ;" // to HALFWIDTH KATAKANA LETTER NO
|
||||
"ハ<>ハ;" // to HALFWIDTH KATAKANA LETTER HA
|
||||
"ヒ<>ヒ;" // to HALFWIDTH KATAKANA LETTER HI
|
||||
"フ<>フ;" // to HALFWIDTH KATAKANA LETTER HU
|
||||
"ヘ<>ヘ;" // to HALFWIDTH KATAKANA LETTER HE
|
||||
"ホ<>ホ;" // to HALFWIDTH KATAKANA LETTER HO
|
||||
"マ<>マ;" // to HALFWIDTH KATAKANA LETTER MA
|
||||
"ミ<>ミ;" // to HALFWIDTH KATAKANA LETTER MI
|
||||
"ム<>ム;" // to HALFWIDTH KATAKANA LETTER MU
|
||||
"メ<>メ;" // to HALFWIDTH KATAKANA LETTER ME
|
||||
"モ<>モ;" // to HALFWIDTH KATAKANA LETTER MO
|
||||
"ヤ<>ヤ;" // to HALFWIDTH KATAKANA LETTER YA
|
||||
"ユ<>ユ;" // to HALFWIDTH KATAKANA LETTER YU
|
||||
"ヨ<>ヨ;" // to HALFWIDTH KATAKANA LETTER YO
|
||||
"ラ<>ラ;" // to HALFWIDTH KATAKANA LETTER RA
|
||||
"リ<>リ;" // to HALFWIDTH KATAKANA LETTER RI
|
||||
"ル<>ル;" // to HALFWIDTH KATAKANA LETTER RU
|
||||
"レ<>レ;" // to HALFWIDTH KATAKANA LETTER RE
|
||||
"ロ<>ロ;" // to HALFWIDTH KATAKANA LETTER RO
|
||||
"ワ<>ワ;" // to HALFWIDTH KATAKANA LETTER WA
|
||||
"ン<>ン;" // to HALFWIDTH KATAKANA LETTER N
|
||||
"゙<>゙;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
"゚<>゚;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
"ᅠ<>ᅠ;" // to HALFWIDTH HANGUL FILLER
|
||||
"ᄀ<>ᄀ;" // to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
"ᄁ<>ᄁ;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
"ᆪ<>ᆪ;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
"ᄂ<>ᄂ;" // to HALFWIDTH HANGUL LETTER NIEUN
|
||||
"ᆬ<>ᆬ;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
"ᆭ<>ᆭ;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
"ᄃ<>ᄃ;" // to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
"ᄄ<>ᄄ;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
"ᄅ<>ᄅ;" // to HALFWIDTH HANGUL LETTER RIEUL
|
||||
"ᆰ<>ᆰ;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
"ᆱ<>ᆱ;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
"ᆲ<>ᆲ;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
"ᆳ<>ᆳ;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
"ᆴ<>ᆴ;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
"ᆵ<>ᆵ;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
"ᄚ<>ᄚ;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
"ᄆ<>ᄆ;" // to HALFWIDTH HANGUL LETTER MIEUM
|
||||
"ᄇ<>ᄇ;" // to HALFWIDTH HANGUL LETTER PIEUP
|
||||
"ᄈ<>ᄈ;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
"ᄡ<>ᄡ;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
"ᄉ<>ᄉ;" // to HALFWIDTH HANGUL LETTER SIOS
|
||||
"ᄊ<>ᄊ;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
"ᄋ<>ᄋ;" // to HALFWIDTH HANGUL LETTER IEUNG
|
||||
"ᄌ<>ᄌ;" // to HALFWIDTH HANGUL LETTER CIEUC
|
||||
"ᄍ<>ᄍ;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
"ᄎ<>ᄎ;" // to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
"ᄏ<>ᄏ;" // to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
"ᄐ<>ᄐ;" // to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
"ᄑ<>ᄑ;" // to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
"ᄒ<>ᄒ;" // to HALFWIDTH HANGUL LETTER HIEUH
|
||||
"ᅡ<>ᅡ;" // to HALFWIDTH HANGUL LETTER A
|
||||
"ᅢ<>ᅢ;" // to HALFWIDTH HANGUL LETTER AE
|
||||
"ᅣ<>ᅣ;" // to HALFWIDTH HANGUL LETTER YA
|
||||
"ᅤ<>ᅤ;" // to HALFWIDTH HANGUL LETTER YAE
|
||||
"ᅥ<>ᅥ;" // to HALFWIDTH HANGUL LETTER EO
|
||||
"ᅦ<>ᅦ;" // to HALFWIDTH HANGUL LETTER E
|
||||
"ᅧ<>ᅧ;" // to HALFWIDTH HANGUL LETTER YEO
|
||||
"ᅨ<>ᅨ;" // to HALFWIDTH HANGUL LETTER YE
|
||||
"ᅩ<>ᅩ;" // to HALFWIDTH HANGUL LETTER O
|
||||
"ᅪ<>ᅪ;" // to HALFWIDTH HANGUL LETTER WA
|
||||
"ᅫ<>ᅫ;" // to HALFWIDTH HANGUL LETTER WAE
|
||||
"ᅬ<>ᅬ;" // to HALFWIDTH HANGUL LETTER OE
|
||||
"ᅭ<>ᅭ;" // to HALFWIDTH HANGUL LETTER YO
|
||||
"ᅮ<>ᅮ;" // to HALFWIDTH HANGUL LETTER U
|
||||
"ᅯ<>ᅯ;" // to HALFWIDTH HANGUL LETTER WEO
|
||||
"ᅰ<>ᅰ;" // to HALFWIDTH HANGUL LETTER WE
|
||||
"ᅱ<>ᅱ;" // to HALFWIDTH HANGUL LETTER WI
|
||||
"ᅲ<>ᅲ;" // to HALFWIDTH HANGUL LETTER YU
|
||||
"ᅳ<>ᅳ;" // to HALFWIDTH HANGUL LETTER EU
|
||||
"ᅴ<>ᅴ;" // to HALFWIDTH HANGUL LETTER YI
|
||||
"ᅵ<>ᅵ;" // to HALFWIDTH HANGUL LETTER I
|
||||
"¢<>'¢';" // from FULLWIDTH CENT SIGN
|
||||
"£<>'£';" // from FULLWIDTH POUND SIGN
|
||||
"¬<>'¬';" // from FULLWIDTH NOT SIGN
|
||||
" ̄<>'¯';" // from FULLWIDTH MACRON
|
||||
"' '<>' ';" // ideographic space (place this after MACRON)
|
||||
"¦<>'¦';" // from FULLWIDTH BROKEN BAR
|
||||
"¥<>'¥';" // from FULLWIDTH YEN SIGN
|
||||
"₩<>₩;" // from FULLWIDTH WON SIGN
|
||||
"│<>│;" // to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
"'←'<>'←';" // to HALFWIDTH LEFTWARDS ARROW
|
||||
"↑<>↑;" // to HALFWIDTH UPWARDS ARROW
|
||||
"'→'<>'→';" // to HALFWIDTH RIGHTWARDS ARROW
|
||||
"↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW
|
||||
"■<>■;" // to HALFWIDTH BLACK SQUARE
|
||||
"○<>○;" // to HALFWIDTH WHITE CIRCLE
|
||||
|
||||
// eof
|
||||
|
||||
}
|
||||
}
|
|
@ -1,361 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Greek_Latin.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin
|
||||
|
||||
t_Grek_Latn {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
// :: [\\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
|
||||
// MINIMAL FILTER GENERATED FOR: Greek-Latin
|
||||
":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;"
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
||||
// TEST CASES
|
||||
|
||||
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
||||
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
||||
// ᾳ ῃ ῳ ὃ ὄ
|
||||
// ὠς ὡς ὢς ὣς
|
||||
// Ὠς Ὡς Ὢς Ὣς
|
||||
// ὨΣ ὩΣ ὪΣ ὫΣ
|
||||
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
||||
|
||||
// Useful variables
|
||||
|
||||
"$lower = [[:latin:][:greek:] & [:Ll:]];"
|
||||
"$glower = [[:greek:] & [:Ll:]];"
|
||||
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
|
||||
"$accent = [:M:] ;"
|
||||
|
||||
// NOTE: restrict to just the Greek & Latin accents that we care about
|
||||
// TODO: broaden out once interation is fixed
|
||||
"$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;"
|
||||
|
||||
"$macron = \u0304 ;"
|
||||
"$ddot = \u0308 ;"
|
||||
"$ddotmac = [$ddot$macron];"
|
||||
|
||||
"$lcgvowel = [αεηιουω] ;"
|
||||
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
||||
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
||||
"$lcgvowelC = [$lcgvowel $accent] ;"
|
||||
|
||||
"$evowel = [aeiouyAEIOUY];"
|
||||
"$evowel2 = [iuyIUY];"
|
||||
"$vowel = [ $evowel $gvowel] ;"
|
||||
|
||||
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
|
||||
"$egammaLike = [GKXCgkxc] ;"
|
||||
"$smooth = ̓ ;"
|
||||
"$rough = ̔ ;"
|
||||
"$iotasub = ͅ ;"
|
||||
|
||||
"$evowel_i = [$evowel-[iI]] ;"
|
||||
"$evowel2_i = [uyUY];"
|
||||
|
||||
"$underbar = \u0331;"
|
||||
|
||||
"$afterLetter = [:L:] [[:M:]\\\']* ;"
|
||||
"$beforeLetter = [[:M:]\\\']* [:L:] ;"
|
||||
"$beforeLower = $accent * $lower ;"
|
||||
|
||||
"$notLetter = [^[:L:][:M:]] ;"
|
||||
"$under = ̱;"
|
||||
|
||||
// Fix punctuation
|
||||
// preserve original
|
||||
"\\\: <> \\\: $under ;"
|
||||
"\\\? <> \\\? $under ;"
|
||||
|
||||
"\\\; <> \\\? ;"
|
||||
"· <> \\\: ;"
|
||||
|
||||
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||||
|
||||
"\u0342 <> \u0302 ;"
|
||||
|
||||
// IOTA: convert iota subscript to iota
|
||||
// first make previous alpha long!
|
||||
|
||||
"$accent_minus = [[$accent]-[$iotasub$macron]];"
|
||||
|
||||
"Α } $accent_minus * $iotasub > | Α $macron ;"
|
||||
"α } $accent_minus * $iotasub > | α $macron ;"
|
||||
|
||||
// now convert to uppercase if after uppercase, ow to lowercase
|
||||
|
||||
"$upper $accent * { $iotasub > I ;"
|
||||
"$iotasub > i ;"
|
||||
|
||||
"| $1 $iotasub < ($evowel $macron $accentMinus *) i ;"
|
||||
"| $1 $iotasub < ($evowel $macron $accentMinus *) I ;"
|
||||
|
||||
// BREATHING
|
||||
|
||||
// Convert rough breathing to h, and move before letters.
|
||||
|
||||
// Make A ` x = > H a x
|
||||
|
||||
"Α ($macron?) $rough } $beforeLower > H | α $1;"
|
||||
"Ε $rough } $beforeLower > H | ε;"
|
||||
"Η $rough } $beforeLower > H | η ;"
|
||||
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
|
||||
"Ο $rough } $beforeLower > H | ο ;"
|
||||
"Υ $rough } $beforeLower > H | υ ;"
|
||||
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
|
||||
|
||||
// Make A x ` = > H a x
|
||||
|
||||
"Α ($glower $macron?) $rough > H | α $1 ;"
|
||||
"Ε ($glower) $rough > H | ε $1 ;"
|
||||
"Η ($glower) $rough > H | η $1 ;"
|
||||
"Ι ($glower $ddot?) $rough > H | ι $1 ;"
|
||||
"Ο ($glower) $rough > H | ο $1 ;"
|
||||
"Υ ($glower) $rough > H | υ $1 ;"
|
||||
"Ω ($glower $ddot?) $rough > H | ω $1 ;"
|
||||
|
||||
//Otherwise, make x ` into h x and X ` into H X
|
||||
|
||||
"($lcgvowel + $ddotmac? ) $rough > h | $1 ;"
|
||||
"($gvowel + $ddotmac? ) $rough > H | $1 ;"
|
||||
|
||||
// Go backwards with H
|
||||
|
||||
"| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;"
|
||||
"| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;"
|
||||
"| $1 $rough < h ($evowel $macron? $ddot?) ;"
|
||||
|
||||
"| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;"
|
||||
"| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;"
|
||||
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
|
||||
|
||||
// titlecase, have to fix individually
|
||||
// in the future, we should add &uppercase() to make this easier
|
||||
|
||||
"| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;"
|
||||
"| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;"
|
||||
"| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;"
|
||||
"| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;"
|
||||
"| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;"
|
||||
"| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;"
|
||||
|
||||
"| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;"
|
||||
"| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;"
|
||||
"| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;"
|
||||
"| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;"
|
||||
"| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;"
|
||||
"| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;"
|
||||
|
||||
"| A $1 $rough < H a ($macron? $ddot? ) ;"
|
||||
"| E $1 $rough < H e ($macron? $ddot? ) ;"
|
||||
"| I $1 $rough < H i ($macron? $ddot? ) ;"
|
||||
"| O $1 $rough < H o ($macron? $ddot? ) ;"
|
||||
"| U $1 $rough < H u ($macron? $ddot? ) ;"
|
||||
"| Y $1 $rough < H y ($macron? $ddot? ) ;"
|
||||
|
||||
// Now do smooth
|
||||
|
||||
//delete smooth breathing for Latin
|
||||
"$smooth > ;"
|
||||
|
||||
// insert in Greek
|
||||
// the assumption is that all Marks are on letters.
|
||||
|
||||
"| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;"
|
||||
"| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;"
|
||||
"| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;"
|
||||
|
||||
// TODO: preserve smooth/rough breathing if not
|
||||
// on initial vowel sequence
|
||||
|
||||
// need to have these up here so the rules don't mask
|
||||
|
||||
// remove now superfluous macron when returning
|
||||
|
||||
"Α < A $macron ;"
|
||||
"α < a $macron ;"
|
||||
|
||||
"η <> e $macron ;"
|
||||
"Η <> E $macron ;"
|
||||
|
||||
"φ <> ph ;"
|
||||
"Ψ } $beforeLower <> Ps ;"
|
||||
"Ψ <> PS ;"
|
||||
|
||||
"Φ } $beforeLower <> Ph ;"
|
||||
"Φ <> PH ;"
|
||||
"ψ <> ps ;"
|
||||
|
||||
"ω <> o $macron ;"
|
||||
"Ω <> O $macron;"
|
||||
|
||||
// NORMAL
|
||||
|
||||
"α <> a ;"
|
||||
"Α <> A ;"
|
||||
|
||||
"β <> b ;"
|
||||
"Β <> B ;"
|
||||
|
||||
"γ } $gammaLike <> n } $egammaLike ;"
|
||||
"γ <> g ;"
|
||||
"Γ } $gammaLike <> N } $egammaLike ;"
|
||||
"Γ <> G ;"
|
||||
|
||||
"δ <> d ;"
|
||||
"Δ <> D ;"
|
||||
|
||||
"ε <> e ;"
|
||||
"Ε <> E ;"
|
||||
|
||||
"ζ <> z ;"
|
||||
"Ζ <> Z ;"
|
||||
|
||||
"θ <> th ;"
|
||||
"Θ } $beforeLower <> Th ;"
|
||||
"Θ <> TH ;"
|
||||
|
||||
"ι <> i ;"
|
||||
"Ι <> I ;"
|
||||
|
||||
"κ <> k ;"
|
||||
"Κ <> K ;"
|
||||
|
||||
"λ <> l ;"
|
||||
"Λ <> L ;"
|
||||
|
||||
"μ <> m ;"
|
||||
"Μ <> M ;"
|
||||
|
||||
"ν } $gammaLike > n\\\' ;"
|
||||
"ν <> n ;"
|
||||
"Ν } $gammaLike <> N\\\' ;"
|
||||
"Ν <> N ;"
|
||||
|
||||
"ξ <> x ;"
|
||||
"Ξ <> X ;"
|
||||
|
||||
"ο <> o ;"
|
||||
"Ο <> O ;"
|
||||
|
||||
"π <> p ;"
|
||||
"Π <> P ;"
|
||||
|
||||
"ρ $rough <> rh;"
|
||||
"Ρ $rough } $beforeLower <> Rh ;"
|
||||
"Ρ $rough <> RH ;"
|
||||
"ρ <> r ;"
|
||||
"Ρ <> R ;"
|
||||
|
||||
// insert separator before things that turn into s
|
||||
|
||||
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
|
||||
|
||||
// special S variants
|
||||
|
||||
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
// underbar means exception
|
||||
|
||||
// before a letter, initial
|
||||
"ς } $beforeLetter <> s $underbar } $beforeLetter;"
|
||||
"σ } $beforeLetter <> s } $beforeLetter;"
|
||||
|
||||
// otherwise, after a letter = final
|
||||
"$afterLetter { σ <> $afterLetter { s $underbar;"
|
||||
"$afterLetter { ς <> $afterLetter { s ;"
|
||||
|
||||
// otherwise (isolated) = initial
|
||||
"ς <> s $underbar;"
|
||||
"σ <> s ;"
|
||||
|
||||
// [Pp] { Σ <> \\\'S ;
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
"Τ <> T ;"
|
||||
|
||||
"$vowel {υ } <> u ;"
|
||||
"υ <> y ;"
|
||||
"$vowel { Υ <> U ;"
|
||||
"Υ <> Y ;"
|
||||
|
||||
"χ <> ch ;"
|
||||
"Χ } $beforeLower <> Ch ;"
|
||||
"Χ <> CH ;"
|
||||
|
||||
// Completeness for ASCII
|
||||
|
||||
"$ignore = [[:Mark:]''] * ;"
|
||||
|
||||
"| k < c ;"
|
||||
"| ph < f ;"
|
||||
"| i < j ;"
|
||||
"| k < q ;"
|
||||
"| b < v } $vowel ;"
|
||||
"| b < w } $vowel;"
|
||||
"| u < v ;"
|
||||
"| u < w;"
|
||||
"| K < C ;"
|
||||
"| Ph < F ;"
|
||||
"| I < J ;"
|
||||
"| K < Q ;"
|
||||
"| B < V } $vowel ;"
|
||||
"| B < W } $vowel ;"
|
||||
"| U < V ;"
|
||||
"| U < W ;"
|
||||
|
||||
"$rough } $ignore [:UppercaseLetter:] > H ;"
|
||||
"$ignore [:UppercaseLetter:] { $rough > H ;"
|
||||
"$rough < H ;"
|
||||
"$rough <> h ;"
|
||||
|
||||
// Completeness for Greek
|
||||
|
||||
"ϐ > | β ;"
|
||||
"ϑ > | θ ;"
|
||||
"ϒ > | Υ ;"
|
||||
"ϕ > | φ ;"
|
||||
"ϖ > | π ;"
|
||||
|
||||
"ϰ > | κ ;"
|
||||
"ϱ > | ρ ;"
|
||||
"ϲ > | σ ;"
|
||||
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
"ϳ > j ;"
|
||||
"ϴ > | Θ ;"
|
||||
"ϵ > | ε ;"
|
||||
|
||||
"µ > | μ ;"
|
||||
|
||||
"ͺ > i;"
|
||||
|
||||
// delete any trailing ' marks used for roundtripping
|
||||
|
||||
"< [Ππ] { \\\' } [Ss] ;"
|
||||
"< [Νν] { \\\' } $egammaLike ;"
|
||||
|
||||
"::NFC (NFD) ;"
|
||||
// ([\\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
|
||||
// ([\\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ;
|
||||
// MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
|
||||
":: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;"
|
||||
}
|
||||
}
|
|
@ -1,268 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin_UNGEGN
|
||||
|
||||
t_Grek_Latn_UNGEGN {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
// For modern Greek, based on UNGEGN rules.
|
||||
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
||||
// WARNING: need to add accents to both filters ###
|
||||
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
|
||||
|
||||
":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;"
|
||||
"::NFD (NFC) ;"
|
||||
|
||||
// Useful variables
|
||||
|
||||
"$lower = [[:latin:][:greek:] & [:Ll:]] ;"
|
||||
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
|
||||
"$accent = [[:Mn:][:Me:]] ;"
|
||||
|
||||
"$macron = ̄ ;"
|
||||
"$ddot = ̈ ;"
|
||||
|
||||
"$lcgvowel = [αεηιουω] ;"
|
||||
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
|
||||
"$gvowel = [$lcgvowel $ucgvowel] ;"
|
||||
"$lcgvowelC = [$lcgvowel $accent] ;"
|
||||
|
||||
"$evowel = [aeiouyAEIOUY];"
|
||||
"$vowel = [ $evowel $gvowel] ;"
|
||||
|
||||
"$beforeLower = $accent * $lower ;"
|
||||
|
||||
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
|
||||
"$egammaLike = [GKXCgkxc] ;"
|
||||
"$smooth = ̓ ;"
|
||||
"$rough = ̔ ;"
|
||||
"$iotasub = ͅ ;"
|
||||
|
||||
"$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;"
|
||||
|
||||
"$under = ̱;"
|
||||
|
||||
"$caron = ̌;"
|
||||
|
||||
"$afterLetter = [:L:] [\\\'$accent]* ;"
|
||||
"$beforeLetter = [\\\'$accent]* [:L:] ;"
|
||||
|
||||
// Fix punctuation
|
||||
|
||||
// preserve orginal
|
||||
"\\\: <> \\\: $under ;"
|
||||
"\\\? <> \\\? $under ;"
|
||||
|
||||
"\\\; <> \\\? ;"
|
||||
"· <> \\\: ;"
|
||||
|
||||
// Fix any ancient characters that creep in
|
||||
|
||||
"͂ > ́ ;"
|
||||
"̂ > ́ ;"
|
||||
"̀ > ́ ;"
|
||||
"$smooth > ;"
|
||||
"$rough > ;"
|
||||
"$iotasub > ;"
|
||||
"ͺ > ;"
|
||||
|
||||
// need to have these up here so the rules don't mask
|
||||
|
||||
"η <> i $under ;"
|
||||
"Η <> I $under ;"
|
||||
|
||||
"Ψ } $beforeLower <> Ps ;"
|
||||
"Ψ <> PS ;"
|
||||
"ψ <> ps ;"
|
||||
|
||||
"ω <> o $under ;"
|
||||
"Ω <> O $under;"
|
||||
|
||||
// at begining or end of word, convert mp to b
|
||||
|
||||
"[^[:L:]$accent] { μπ > b ;"
|
||||
"μπ } [^[:L:]$accent] > b ;"
|
||||
"[^[:L:]$accent] { [Μμ][Ππ] > B ;"
|
||||
"[Μμ][Ππ] } [^[:L:]$accent] > B ;"
|
||||
|
||||
"μπ < b ;"
|
||||
"Μπ < B } $beforeLower ;"
|
||||
"ΜΠ < B ;"
|
||||
|
||||
// handle diphthongs ending with upsilon
|
||||
|
||||
"ου <> ou ;"
|
||||
"ΟΥ <> OU ;"
|
||||
"Ου <> Ou ;"
|
||||
"οΥ <> oU ;"
|
||||
|
||||
"$fmaker = [aeiAEI] $under ? ;"
|
||||
"$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate
|
||||
|
||||
"$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;"
|
||||
"υ $1 < ( $shiftForwardVowels )* v $under ;"
|
||||
|
||||
"$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;"
|
||||
"υ $1 < ( $shiftForwardVowels )* f $under ;"
|
||||
|
||||
"$fmaker { Υ } $softener <> V $under ;"
|
||||
"$fmaker { Υ <> U $under ;"
|
||||
|
||||
"υ <> y ;"
|
||||
"Υ <> Y ;"
|
||||
|
||||
// NORMAL
|
||||
|
||||
"α <> a ;"
|
||||
"Α <> A ;"
|
||||
|
||||
"β <> v ;"
|
||||
"Β <> V ;"
|
||||
|
||||
"γ } $gammaLike <> n } $egammaLike ;"
|
||||
"γ <> g ;"
|
||||
"Γ } $gammaLike <> N } $egammaLike ;"
|
||||
"Γ <> G ;"
|
||||
|
||||
"δ <> d ;"
|
||||
"Δ <> D ;"
|
||||
|
||||
"ε <> e ;"
|
||||
"Ε <> E ;"
|
||||
|
||||
"ζ <> z ;"
|
||||
"Ζ <> Z ;"
|
||||
|
||||
"θ <> th ;"
|
||||
"Θ } $beforeLower <> Th ;"
|
||||
"Θ <> TH ;"
|
||||
|
||||
"ι <> i ;"
|
||||
"Ι <> I ;"
|
||||
|
||||
"κ <> k ;"
|
||||
"Κ <> K ;"
|
||||
|
||||
"λ <> l ;"
|
||||
"Λ <> L ;"
|
||||
|
||||
"μ <> m ;"
|
||||
"Μ <> M ;"
|
||||
|
||||
"ν } $gammaLike > n\\\' ;"
|
||||
"ν <> n ;"
|
||||
"Ν } $gammaLike <> N\\\' ;"
|
||||
"Ν <> N ;"
|
||||
|
||||
"ξ <> x ;"
|
||||
"Ξ <> X ;"
|
||||
|
||||
"ο <> o ;"
|
||||
"Ο <> O ;"
|
||||
|
||||
"π <> p ;"
|
||||
"Π <> P ;"
|
||||
|
||||
"ρ <> r ;"
|
||||
"Ρ <> R ;"
|
||||
|
||||
// insert separator before things that turn into s
|
||||
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
|
||||
|
||||
// special S variants
|
||||
|
||||
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
// Caron means exception
|
||||
|
||||
// before a letter, initial
|
||||
"ς } $beforeLetter <> s $under } $beforeLetter;"
|
||||
"σ } $beforeLetter <> s } $beforeLetter;"
|
||||
|
||||
// otherwise, after a letter = final
|
||||
"$afterLetter { σ <> $afterLetter { s $under;"
|
||||
"$afterLetter { ς <> $afterLetter { s ;"
|
||||
|
||||
// otherwise (isolated) = initial
|
||||
"ς <> s $under;"
|
||||
"σ <> s ;"
|
||||
|
||||
// [Pp] { Σ <> \\\'S ;
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
"Τ <> T ;"
|
||||
|
||||
"φ <> f ;"
|
||||
"Φ <> F ;"
|
||||
|
||||
"χ <> ch ;"
|
||||
"Χ } $beforeLower <> Ch ;"
|
||||
"Χ <> CH ;"
|
||||
|
||||
// Completeness for ASCII
|
||||
|
||||
// $ignore = [[:Mark:]''] * ;
|
||||
|
||||
"| ch < h ;"
|
||||
"| k < c ;"
|
||||
"| i < j ;"
|
||||
"| k < q ;"
|
||||
"| b < u } $vowel ;"
|
||||
"| b < w } $vowel ;"
|
||||
"| y < u ;"
|
||||
"| y < w ;"
|
||||
|
||||
"| Ch < H ;"
|
||||
"| K < C ;"
|
||||
"| I < J ;"
|
||||
"| K < Q ;"
|
||||
"| B < W } $vowel ;"
|
||||
"| B < U } $vowel ;"
|
||||
"| Y < W ;"
|
||||
"| Y < U ;"
|
||||
|
||||
// Completeness for Greek
|
||||
|
||||
"ϐ > | β ;"
|
||||
"ϑ > | θ ;"
|
||||
"ϒ > | Υ ;"
|
||||
"ϕ > | φ ;"
|
||||
"ϖ > | π ;"
|
||||
|
||||
"ϰ > | κ ;"
|
||||
"ϱ > | ρ ;"
|
||||
"ϲ > | σ ;"
|
||||
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
"ϳ > j ;"
|
||||
"ϴ > | Θ ;"
|
||||
"ϵ > | ε ;"
|
||||
"µ > | μ ;"
|
||||
|
||||
// delete any trailing ' marks used for roundtripping
|
||||
|
||||
"< [Ππ] { \\\' } [Ss] ;"
|
||||
"< [Νν] { \\\' } $egammaLike ;"
|
||||
|
||||
"::NFC (NFD) ;"
|
||||
|
||||
// MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
||||
":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;"
|
||||
}
|
||||
}
|
|
@ -1,107 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Gujarati_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gujarati_InterIndic
|
||||
|
||||
t_Gujr_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gujarati-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0a81>\ue001;" // SIGN CANDRABINDU
|
||||
"\u0a82>\ue002;" // SIGN ANUSVARA
|
||||
"\u0a83>\ue003;" // SIGN VISARGA
|
||||
"\u0a85>\ue005;" // LETTER A
|
||||
"\u0a86>\ue006;" // LETTER AA
|
||||
"\u0a87>\ue007;" // LETTER I
|
||||
"\u0a88>\ue008;" // LETTER II
|
||||
"\u0a89>\ue009;" // LETTER U
|
||||
"\u0a8a>\ue00a;" // LETTER UU
|
||||
"\u0a8b>\ue00b;" // LETTER VOCALIC R
|
||||
"\u0a8c>\ue00c;" // LETTER VOCALLIC L
|
||||
"\u0a8d>\ue00d;" // VOWEL CANDRA E
|
||||
"\u0a8f>\ue00f;" // LETTER E
|
||||
"\u0a90>\ue010;" // LETTER AI
|
||||
"\u0a91>\ue011;" // VOWEL CANDRA O
|
||||
"\u0a93>\ue013;" // LETTER O
|
||||
"\u0a94>\ue014;" // LETTER AU
|
||||
"\u0a95>\ue015;" // LETTER KA
|
||||
"\u0a96>\ue016;" // LETTER KHA
|
||||
"\u0a97>\ue017;" // LETTER GA
|
||||
"\u0a98>\ue018;" // LETTER GHA
|
||||
"\u0a99>\ue019;" // LETTER NGA
|
||||
"\u0a9a>\ue01a;" // LETTER CA
|
||||
"\u0a9b>\ue01b;" // LETTER CHA
|
||||
"\u0a9c>\ue01c;" // LETTER JA
|
||||
"\u0a9d>\ue01d;" // LETTER JHA
|
||||
"\u0a9e>\ue01e;" // LETTER NYA
|
||||
"\u0a9f>\ue01f;" // LETTER TTA
|
||||
"\u0aa0>\ue020;" // LETTER TTHA
|
||||
"\u0aa1>\ue021;" // LETTER DDA
|
||||
"\u0aa2>\ue022;" // LETTER DDHA
|
||||
"\u0aa3>\ue023;" // LETTER NNA
|
||||
"\u0aa4>\ue024;" // LETTER TA
|
||||
"\u0aa5>\ue025;" // LETTER THA
|
||||
"\u0aa6>\ue026;" // LETTER DA
|
||||
"\u0aa7>\ue027;" // LETTER DHA
|
||||
"\u0aa8>\ue028;" // LETTER NA
|
||||
"\u0aaa>\ue02a;" // LETTER PA
|
||||
"\u0aab>\ue02b;" // LETTER PHA
|
||||
"\u0aac>\ue02c;" // LETTER BA
|
||||
"\u0aad>\ue02d;" // LETTER BHA
|
||||
"\u0aae>\ue02e;" // LETTER MA
|
||||
"\u0aaf>\ue02f;" // LETTER YA
|
||||
"\u0ab0>\ue030;" // LETTER RA
|
||||
"\u0ab2>\ue032;" // LETTER LA
|
||||
"\u0ab3>\ue033;" // LETTER LLA
|
||||
"\u0ab5>\ue035;" // LETTER VA
|
||||
"\u0ab6>\ue036;" // LETTER SHA
|
||||
"\u0ab7>\ue037;" // LETTER SSA
|
||||
"\u0ab8>\ue038;" // LETTER SA
|
||||
"\u0ab9>\ue039;" // LETTER HA
|
||||
"\u0abc>\ue03c;" // SIGN NUKTA
|
||||
"\u0abd>\ue03d;" // SIGN AVAGRAHA
|
||||
"\u0abe>\ue03e;" // VOWEL SIGN AA
|
||||
"\u0abf>\ue03f;" // VOWEL SIGN I
|
||||
"\u0ac0>\ue040;" // VOWEL SIGN II
|
||||
"\u0ac1>\ue041;" // VOWEL SIGN U
|
||||
"\u0ac2>\ue042;" // VOWEL SIGN UU
|
||||
"\u0ac3>\ue043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0ac4>\ue044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0ac5>\ue045;" // VOWEL SIGN CANDRA E
|
||||
"\u0ac7>\ue047;" // VOWEL SIGN E
|
||||
"\u0ac8>\ue048;" // VOWEL SIGN AI
|
||||
"\u0ac9>\ue049;" // VOWEL SIGN CANDRA O
|
||||
"\u0acb>\ue04b;" // VOWEL SIGN O
|
||||
"\u0acc>\ue04c;" // VOWEL SIGN AU
|
||||
"\u0acd>\ue04d;" // SIGN VIRAMA
|
||||
"\u0ad0>\ue050;" // OM
|
||||
"\u0ae0>\ue060;" // LETTER VOCALIC RR
|
||||
"\u0ae1>\ue061;" // LETTER VOCALIC LL
|
||||
"\u0ae6>\ue066;" // DIGIT ZERO
|
||||
"\u0ae7>\ue067;" // DIGIT ONE
|
||||
"\u0ae8>\ue068;" // DIGIT TWO
|
||||
"\u0ae9>\ue069;" // DIGIT THREE
|
||||
"\u0aea>\ue06a;" // DIGIT FOUR
|
||||
"\u0aeb>\ue06b;" // DIGIT FIVE
|
||||
"\u0aec>\ue06c;" // DIGIT SIX
|
||||
"\u0aed>\ue06d;" // DIGIT SEVEN
|
||||
"\u0aee>\ue06e;" // DIGIT EIGHT
|
||||
"\u0aef>\ue06f;" // DIGIT NINE
|
||||
"\u0964>\ue064;" // DANDA
|
||||
"\u0965>\ue065;" // DOUBLE DANDA
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Gurmukhi_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:48 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gurmukhi_InterIndic
|
||||
|
||||
t_Guru_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Gurmukhi-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
|
||||
//\u0A16\u0A3C>\uE059; # LETTER KHHA
|
||||
//\u0A17\u0A3C>\uE05A; # LETTER GHHA
|
||||
//\u0A1C\u0A3C>\uE05B; # LETTER ZA
|
||||
//\u0A38\u0A3C>\uE036; # LETTER SHA
|
||||
//\u0A32\u0A3C>\uE033; # LETTER LLA
|
||||
//\u0A2B\u0A3C>\uE05E; # LETTER FA
|
||||
"\u0A01>\uE001;" // SIGN CHANDRABINDU
|
||||
"\u0A02>\uE002;" // SIGN BINDI
|
||||
"\u0A05>\uE005;" // LETTER A
|
||||
"\u0A06>\uE006;" // LETTER AA
|
||||
"\u0A07>\uE007;" // LETTER I
|
||||
"\u0A08>\uE008;" // LETTER II
|
||||
"\u0A09>\uE009;" // LETTER U
|
||||
"\u0A0A>\uE00A;" // LETTER UU
|
||||
"\u0A0C>\uE032;" // FALLBACK : VOCALLIC LA
|
||||
"\u0A0F>\uE00F;" // LETTER EE
|
||||
"\u0A10>\uE010;" // LETTER AI
|
||||
"\u0A13>\uE013;" // LETTER OO
|
||||
"\u0A14>\uE014;" // LETTER AU
|
||||
"\u0A15>\uE015;" // LETTER KA
|
||||
"\u0A16>\uE016;" // LETTER KHA
|
||||
"\u0A17>\uE017;" // LETTER GA
|
||||
"\u0A18>\uE018;" // LETTER GHA
|
||||
"\u0A19>\uE019;" // LETTER NGA
|
||||
"\u0A1A>\uE01A;" // LETTER CA
|
||||
"\u0A1B>\uE01B;" // LETTER CHA
|
||||
"\u0A1C>\uE01C;" // LETTER JA
|
||||
"\u0A1D>\uE01D;" // LETTER JHA
|
||||
"\u0A1E>\uE01E;" // LETTER NYA
|
||||
"\u0A1F>\uE01F;" // LETTER TTA
|
||||
"\u0A20>\uE020;" // LETTER TTHA
|
||||
"\u0A21>\uE021;" // LETTER DDA
|
||||
"\u0A22>\uE022;" // LETTER DDHA
|
||||
"\u0A23>\uE023;" // LETTER NNA
|
||||
"\u0A24>\uE024;" // LETTER TA
|
||||
"\u0A25>\uE025;" // LETTER THA
|
||||
"\u0A26>\uE026;" // LETTER DA
|
||||
"\u0A27>\uE027;" // LETTER DHA
|
||||
"\u0A28>\uE028;" // LETTER NA
|
||||
"\u0A2A>\uE02A;" // LETTER PA
|
||||
"\u0A2B>\uE02B;" // LETTER PHA
|
||||
"\u0A2C>\uE02C;" // LETTER BA
|
||||
"\u0A2D>\uE02D;" // LETTER BHA
|
||||
"\u0A2E>\uE02E;" // LETTER MA
|
||||
"\u0A2F>\uE02F;" // LETTER YA
|
||||
"\u0A30>\uE030;" // LETTER RA
|
||||
"\u0A32>\uE032;" // LETTER LA
|
||||
"\u0a33>\uE033;" // FALLBACK
|
||||
"\u0A35>\uE035;" // LETTER VA
|
||||
"\u0a36>\ue036;"
|
||||
"\u0A38\\\0a3c>\ue036;" // FALLBACK
|
||||
"\u0A38>\uE038;" // LETTER SA
|
||||
"\u0A39>\uE039;" // LETTER HA
|
||||
"\u0A3C>\uE03C;" // SIGN NUKTA
|
||||
"\u0A3E>\uE03E;" // VOWEL SIGN AA
|
||||
"\u0A3F>\uE03F;" // VOWEL SIGN I
|
||||
"\u0A40>\uE040;" // VOWEL SIGN II
|
||||
"\u0A41>\uE041;" // VOWEL SIGN U
|
||||
"\u0A42>\uE042;" // VOWEL SIGN UU
|
||||
"\u0A47>\uE047;" // VOWEL SIGN EE
|
||||
"\u0A48>\uE048;" // VOWEL SIGN AI
|
||||
"\u0A4B>\uE04B;" // VOWEL SIGN OO
|
||||
"\u0A4C>\uE04C;" // VOWEL SIGN AU
|
||||
"\u0A4D>\uE04D;" // SIGN VIRAMA
|
||||
|
||||
"\u0A5C>\uE05C;" // LETTER RRA
|
||||
|
||||
"\u0A66>\uE066;" // DIGIT ZERO
|
||||
"\u0A67>\uE067;" // DIGIT ONE
|
||||
"\u0A68>\uE068;" // DIGIT TWO
|
||||
"\u0A69>\uE069;" // DIGIT THREE
|
||||
"\u0A6A>\uE06A;" // DIGIT FOUR
|
||||
"\u0A6B>\uE06B;" // DIGIT FIVE
|
||||
"\u0A6C>\uE06C;" // DIGIT SIX
|
||||
"\u0A6D>\uE06D;" // DIGIT SEVEN
|
||||
"\u0A6E>\uE06E;" // DIGIT EIGHT
|
||||
"\u0A6F>\uE06F;" // DIGIT NINE
|
||||
"\u0A70>\uE07C;" // TIPPI
|
||||
"\u0A71>\uE07D;" // ADDAK
|
||||
"\u0A72>\uE07E;" // IRI
|
||||
"\u0A73>\uE07F;" // URA
|
||||
"\u0A74>\uE080;" // EK ONKAR
|
||||
"\u0964>\ue064;" // DANDA
|
||||
"\u0965>\ue065;" // DOUBLE DANDA
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -1,39 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
|
||||
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Han_Spacedhan.txt
|
||||
// Date: Fri May 28 17:07:31 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Han_Spacedhan
|
||||
|
||||
t_Hani_SpHan {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Only intended for internal use
|
||||
":: fullwidth-halfwidth;"
|
||||
|
||||
"。 > '.';"
|
||||
|
||||
"$terminalPunct = [\\\.\\\,\\\:\\\;\\\?\\\!.,:?!。、;[:Pe:][:Pf:]];"
|
||||
"$initialPunct = [:Ps:][:Pi:];"
|
||||
|
||||
// add space between any Han or terminal punctuation and letters, and
|
||||
// between letters and Han or initial punct
|
||||
|
||||
"[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;"
|
||||
"[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;"
|
||||
|
||||
// remove spacing between ideographs and other letters
|
||||
|
||||
"< [:Ideographic:] { ' ' } [:Letter:] ;"
|
||||
"< [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;"
|
||||
|
||||
}
|
||||
}
|
|
@ -1,124 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
|
||||
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Hebrew_Latin.txt
|
||||
// Date: Fri May 28 17:07:31 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hebrew_Latin
|
||||
|
||||
t_Hebr_Latn {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Transliteration table for Hebrew
|
||||
// Based on the UNGEGN table at:
|
||||
// http://www.eki.ee/wgrs/rom1_he.pdf
|
||||
//
|
||||
// Exceptions:
|
||||
// - Accents are added to disambiguate letters
|
||||
// - Combinations of dagesh, shin/sin dot that produce different
|
||||
// letters are not yet encoded.
|
||||
//
|
||||
// To test, open:
|
||||
// http://oss.software.ibm.com/cgi-bin/icu/tr
|
||||
// Click Edit, paste in this file, Save As hebrew-latin/XXX
|
||||
// (where XXX is a username)
|
||||
// Now go back to the main window, and try it out.
|
||||
// Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
|
||||
// Paste in hebrew text in Input, and hit Transliterate.
|
||||
//
|
||||
// For more information, see"
|
||||
// http://oss.software.ibm.com/icu/userguide/Transliteration.html
|
||||
|
||||
":: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;"
|
||||
":: nfkd (nfc) ;"
|
||||
"$letterAfter = [:M:]* [:L:] ;"
|
||||
|
||||
// move longer items here to avoid masking
|
||||
|
||||
"ח <> ẖ ;"
|
||||
"צ <> ẕ } $letterAfter;"
|
||||
"ץ <> ẕ ;"
|
||||
"ש <> ş ;"
|
||||
"ת <> ţ ;"
|
||||
|
||||
"א <> ʼ ;"
|
||||
"ב <> b ;"
|
||||
"ג <> g ;"
|
||||
"ד <> d ;"
|
||||
"ה <> h ;"
|
||||
"ו <> w ;"
|
||||
"ז <> z ;"
|
||||
"ט <> t ;"
|
||||
"י <> y ;"
|
||||
"כ <> k } $letterAfter;"
|
||||
"ך <> k ;"
|
||||
"ל <> l ;"
|
||||
"מ <> m } $letterAfter;"
|
||||
"ם <> m ;"
|
||||
"נ <> n } $letterAfter;"
|
||||
"ן <> n ;"
|
||||
"ס <> s ;"
|
||||
"ע <> ʻ ;"
|
||||
"פ <> p } $letterAfter;"
|
||||
"ף <> p ;"
|
||||
"ק <> q ;"
|
||||
"ר <> r ;"
|
||||
|
||||
"װ > | וו;" // HEBREW LIGATURE YIDDISH DOUBLE VAV
|
||||
"ױ > | וי;" // HEBREW LIGATURE YIDDISH VAV YOD
|
||||
"ײ > | יי ;" // HEBREW LIGATURE YIDDISH DOUBLE YOD
|
||||
|
||||
|
||||
"ּ <> ̇ ;" // dagesh just goes to overdot for now
|
||||
"ׁ <> ̌ ;" // shin dot -> sh
|
||||
"ׂ <> ̂ ;" // sin dot -> s
|
||||
|
||||
// points
|
||||
"$above = [^[:ccc=0:][:ccc=230:]]*;"
|
||||
|
||||
"ֲ > à ;"
|
||||
"ֲ $1< a ($above) ̀;"
|
||||
|
||||
"ָ > á ;"
|
||||
"ָ $1 < a ($above) ́;"
|
||||
|
||||
"ֱ > è ;"
|
||||
"ֱ $1 < e ($above) ̀;"
|
||||
|
||||
"ֵ > é ;"
|
||||
"ֵ $1 < e ($above) ́;"
|
||||
|
||||
"ְ > e ̆ ;"
|
||||
"ְ $1 < e ($above) ̆;"
|
||||
|
||||
"ֹ > ò ;"
|
||||
"ֹ $1 < o ($above) ̀;"
|
||||
|
||||
"ִ <> i ;"
|
||||
"ֻ <> u ;"
|
||||
"ַ <> a ;"
|
||||
"ֶ <> e ;"
|
||||
"ֳ <> o ;"
|
||||
|
||||
"\u05BF <> ̄ ;"
|
||||
|
||||
// fallbacks
|
||||
"ק < c ;"
|
||||
"פ < f } $letterAfter;"
|
||||
"ף < f ;"
|
||||
"ז < j ;"
|
||||
"ו < v ;"
|
||||
"כס < x ;"
|
||||
|
||||
":: (lower);"
|
||||
":: nfc (nfd) ;"
|
||||
":: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);"
|
||||
}
|
||||
}
|
|
@ -1,223 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana_Katakana
|
||||
|
||||
t_Hira_Kana {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// note: a global filter is more efficient, but MUST include all source chars
|
||||
":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
|
||||
":: NFKC ();"
|
||||
|
||||
// Hiragana-Katakana
|
||||
|
||||
// This is largely a one-to-one mapping, but it has a
|
||||
// few kinks:
|
||||
|
||||
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||||
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||||
// (308F-3092) with a voicing mark (3099), which is
|
||||
// semantically equivalent. However, this is a non-
|
||||
// roundtripping transformation.
|
||||
|
||||
// 2. The Katakana small ka/ke (30F5,30F6) have no
|
||||
// Hiragana equiavlents. We convert them to normal
|
||||
// Hiragana ka/ke (304B,3051). This is a one-way
|
||||
// information-losing transformation and precludes
|
||||
// round-tripping of 30F5 and 30F6.
|
||||
|
||||
// 3. The combining marks 3099-309C are in the Hiragana
|
||||
// block, but they apply to Katakana as well, so we
|
||||
// leave them untouched.
|
||||
|
||||
// 4. The Katakana prolonged sound mark 30FC doubles the
|
||||
// preceding vowel. This is a one-way information-
|
||||
// losing transformation from Katakana to Hiragana.
|
||||
|
||||
// 5. The Katakana middle dot separates words in foreign
|
||||
// expressions; we leave this unmodified.
|
||||
|
||||
// The above points preclude successful round-trip
|
||||
// transformations of arbitrary input text. However,
|
||||
// they provide naturalistic results that should conform
|
||||
// to user expectations.
|
||||
|
||||
|
||||
// Combining equivalents va/vi/ve/vo
|
||||
"わ゙ <> ヷ;"
|
||||
"ゐ゙ <> ヸ;"
|
||||
"ゑ゙ <> ヹ;"
|
||||
"を゙ <> ヺ;"
|
||||
|
||||
// One-to-one mappings, main block
|
||||
// 3041:3094 <> 30A1:30F4
|
||||
// 309D,E <> 30FD,E
|
||||
"ぁ <> ァ;"
|
||||
"あ <> ア;"
|
||||
"ぃ <> ィ;"
|
||||
"い <> イ;"
|
||||
"ぅ <> ゥ;"
|
||||
"う <> ウ;"
|
||||
"ぇ <> ェ;"
|
||||
"え <> エ;"
|
||||
"ぉ <> ォ;"
|
||||
"お <> オ;"
|
||||
"か <> カ;"
|
||||
"が <> ガ;"
|
||||
"き <> キ;"
|
||||
"ぎ <> ギ;"
|
||||
"く <> ク;"
|
||||
"ぐ <> グ;"
|
||||
"け <> ケ;"
|
||||
"げ <> ゲ;"
|
||||
"こ <> コ;"
|
||||
"ご <> ゴ;"
|
||||
"さ <> サ;"
|
||||
"ざ <> ザ;"
|
||||
"し <> シ;"
|
||||
"じ <> ジ;"
|
||||
"す <> ス;"
|
||||
"ず <> ズ;"
|
||||
"せ <> セ;"
|
||||
"ぜ <> ゼ;"
|
||||
"そ <> ソ;"
|
||||
"ぞ <> ゾ;"
|
||||
"た <> タ;"
|
||||
"だ <> ダ;"
|
||||
"ち <> チ;"
|
||||
"ぢ <> ヂ;"
|
||||
"っ <> ッ;"
|
||||
"つ <> ツ;"
|
||||
"づ <> ヅ;"
|
||||
"て <> テ;"
|
||||
"で <> デ;"
|
||||
"と <> ト;"
|
||||
"ど <> ド;"
|
||||
"な <> ナ;"
|
||||
"に <> ニ;"
|
||||
"ぬ <> ヌ;"
|
||||
"ね <> ネ;"
|
||||
"の <> ノ;"
|
||||
"は <> ハ;"
|
||||
"ば <> バ;"
|
||||
"ぱ <> パ;"
|
||||
"ひ <> ヒ;"
|
||||
"び <> ビ;"
|
||||
"ぴ <> ピ;"
|
||||
"ふ <> フ;"
|
||||
"ぶ <> ブ;"
|
||||
"ぷ <> プ;"
|
||||
"へ <> ヘ;"
|
||||
"べ <> ベ;"
|
||||
"ぺ <> ペ;"
|
||||
"ほ <> ホ;"
|
||||
"ぼ <> ボ;"
|
||||
"ぽ <> ポ;"
|
||||
"ま <> マ;"
|
||||
"み <> ミ;"
|
||||
"む <> ム;"
|
||||
"め <> メ;"
|
||||
"も <> モ;"
|
||||
"ゃ <> ャ;"
|
||||
"や <> ヤ;"
|
||||
"ゅ <> ュ;"
|
||||
"ゆ <> ユ;"
|
||||
"ょ <> ョ;"
|
||||
"よ <> ヨ;"
|
||||
"ら <> ラ;"
|
||||
"り <> リ;"
|
||||
"る <> ル;"
|
||||
"れ <> レ;"
|
||||
"ろ <> ロ;"
|
||||
"ゎ <> ヮ;"
|
||||
"わ <> ワ;"
|
||||
"ゐ <> ヰ;"
|
||||
"ゑ <> ヱ;"
|
||||
"を <> ヲ;"
|
||||
"ん <> ン;"
|
||||
"ゔ <> ヴ;"
|
||||
"ゝ <> ヽ;"
|
||||
"ゞ <> ヾ;"
|
||||
|
||||
// One-way Katakana-Hiragana xform of small K ka/ke to
|
||||
// normal H ka/ke.
|
||||
"か < ヵ;"
|
||||
"け < ヶ;"
|
||||
|
||||
// Katakana followed by a prolonged sound mark 30FC has
|
||||
// its final vowel doubled. This is a Katakana-Hiragana
|
||||
// one-way information-losing transformation. We
|
||||
// include the small Katakana (e.g., small A 3041) and
|
||||
// do not distinguish them from their large
|
||||
// counterparts. It doesn't make sense to double a
|
||||
// small counterpart vowel as a small Hiragana vowel, so
|
||||
// we don't do so. In natural text this should never
|
||||
// occur anyway. If a 30FC is seen without a preceding
|
||||
// vowel sound (e.g., after n 30F3) we do not change it.
|
||||
|
||||
//## $long = ー;
|
||||
|
||||
// The following categories are Hiragana, not Katakana
|
||||
// as might be expected, since by the time we get to the
|
||||
// 30FC, the preceding character will have already been
|
||||
// transformed to Hiragana.
|
||||
|
||||
// {The following mechanically generated from the
|
||||
// Unicode 3.0 data:}
|
||||
|
||||
"$xa = ["
|
||||
"ぁ あ か が さ ざ"
|
||||
"た だ な は ば ぱ"
|
||||
"ま ゃ や ら ゎ わ"
|
||||
"];"
|
||||
|
||||
"$xi = ["
|
||||
"ぃ い き ぎ し じ"
|
||||
"ち ぢ に ひ び ぴ"
|
||||
"み り ゐ"
|
||||
"];"
|
||||
|
||||
"$xu = ["
|
||||
"ぅ う く ぐ す ず"
|
||||
"っ つ づ ぬ ふ ぶ"
|
||||
"ぷ む ゅ ゆ る ゔ"
|
||||
"];"
|
||||
|
||||
"$xe = ["
|
||||
"ぇ え け げ せ ぜ"
|
||||
"て で ね へ べ ぺ"
|
||||
"め れ ゑ"
|
||||
"];"
|
||||
|
||||
"$xo = ["
|
||||
"ぉ お こ ご そ ぞ"
|
||||
"と ど の ほ ぼ ぽ"
|
||||
"も ょ よ ろ を"
|
||||
"];"
|
||||
|
||||
"あ < $xa {ー};"
|
||||
"い < $xi {ー};"
|
||||
"う < $xu {ー};"
|
||||
"え < $xe {ー};"
|
||||
"お < $xo {ー};"
|
||||
|
||||
":: (NFKC) ;"
|
||||
|
||||
// note: a global filter is more efficient, but MUST include all source chars!!
|
||||
":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Hiragana_Latin.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Hiragana_Latin
|
||||
|
||||
t_Hira_Latn {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ;"
|
||||
":: NFD ;"
|
||||
|
||||
":: Hiragana-Katakana;"
|
||||
":: Katakana-Latin;"
|
||||
|
||||
":: NFC ;"
|
||||
":: (Lower) ;"
|
||||
":: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ;"
|
||||
}
|
||||
}
|
|
@ -1,163 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Bengali.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Bengali
|
||||
|
||||
t_InterIndic_Beng {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Bengali
|
||||
//:: NFD (NFC) ;
|
||||
"\uE001>\u0981;" // SIGN CANDRABINDU
|
||||
"\uE002>\u0982;" // SIGN ANUSVARA
|
||||
"\uE003>\u0983;" // SIGN VISARGA
|
||||
"\uE004>\u0985;" // FALLBACK TO LETTER A
|
||||
"\uE005>\u0985;" // LETTER A
|
||||
"\uE006>\u0986;" // LETTER AA
|
||||
"\uE007>\u0987;" // LETTER I
|
||||
"\uE008>\u0988;" // LETTER II
|
||||
"\uE009>\u0989;" // LETTER U
|
||||
"\uE00A>\u098A;" // LETTER UU
|
||||
"\uE00B>\u098B;" // LETTER VOCALIC R
|
||||
"\uE00C>\u098C;" // LETTER VOCALIC L
|
||||
"\uE00D>\u098F;" // FALLBACK
|
||||
"\uE00E>\u098F;" // FALLBACK
|
||||
"\uE00F>\u098F;" // LETTER E
|
||||
"\uE010>\u0990;" // LETTER AI
|
||||
"\uE011>\u0993;" // FALLBACK
|
||||
"\uE012>\u0993;" // FALLBACK
|
||||
"\uE013>\u0993;" // LETTER O
|
||||
"\uE014>\u0994;" // LETTER AU
|
||||
"\uE015>\u0995;" // LETTER KA
|
||||
"\uE016>\u0996;" // LETTER KHA
|
||||
"\uE017>\u0997;" // LETTER GA
|
||||
"\uE018>\u0998;" // LETTER GHA
|
||||
"\uE019>\u0999;" // LETTER NGA
|
||||
"\uE01A>\u099A;" // LETTER CA
|
||||
"\uE01B>\u099B;" // LETTER CHA
|
||||
"\uE01C>\u099C;" // LETTER JA
|
||||
"\uE01D>\u099D;" // LETTER JHA
|
||||
"\uE01E>\u099E;" // LETTER NYA
|
||||
"\uE01F>\u099F;" // LETTER TTA
|
||||
"\uE020>\u09A0;" // LETTER TTHA
|
||||
"\uE021>\u09A1;" // LETTER DDA
|
||||
"\uE022>\u09A2;" // LETTER DDHA
|
||||
"\uE023>\u09A3;" // LETTER NNA
|
||||
"\uE024>\u09A4;" // LETTER TA
|
||||
"\uE025>\u09A5;" // LETTER THA
|
||||
"\uE026>\u09A6;" // LETTER DA
|
||||
"\uE027>\u09A7;" // LETTER DHA
|
||||
"\uE028>\u09A8;" // LETTER NA
|
||||
"\uE029>\u09A8\u09BC;" // REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
|
||||
"\uE02A>\u09AA;" // LETTER PA
|
||||
"\uE02B>\u09AB;" // LETTER PHA
|
||||
"\uE02C>\u09AC;" // LETTER BA
|
||||
"\uE02D>\u09AD;" // LETTER BHA
|
||||
"\uE02E>\u09AE;" // LETTER MA
|
||||
"\uE02F>\u09AF;" // LETTER YA
|
||||
"\uE030>\u09B0;" // LETTER RA
|
||||
"\uE031>\u09B0\u09BC;" // FALLBACK to RA
|
||||
"\uE032>\u09B2;" // LETTER LA
|
||||
"\uE033>\u09B2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
|
||||
"\uE034>\u09B2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
|
||||
"\uE035>\u09AC;" // REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
|
||||
"\uE036>\u09B6;" // LETTER SHA
|
||||
"\uE037>\u09B7;" // LETTER SSA
|
||||
"\uE038>\u09B8;" // LETTER SA
|
||||
"\uE039>\u09B9;" // LETTER HA
|
||||
"\uE03C>\u09BC;" // SIGN NUKTA
|
||||
"\uE03D>\u09bd;" // SIGN AVAGRAHA
|
||||
"\uE03E>\u09BE;" // VOWEL SIGN AA
|
||||
"\uE03F>\u09BF;" // VOWEL SIGN I
|
||||
"\uE040>\u09C0;" // VOWEL SIGN II
|
||||
"\uE041>\u09C1;" // VOWEL SIGN U
|
||||
"\uE042>\u09C2;" // VOWEL SIGN UU
|
||||
"\uE043>\u09C3;" // VOWEL SIGN VOCALIC R
|
||||
"\uE044>\u09C4;" // VOWEL SIGN VOCALIC RR
|
||||
"\uE045>\u09C7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
"\uE046>\u09C7;" // FALLBACK
|
||||
"\uE047>\u09C7;" // VOWEL SIGN E
|
||||
"\uE048>\u09C8;" // VOWEL SIGN AI
|
||||
"\uE049>\u09C7\u09BE;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
"\uE04A>\u09C7\u09BE;" // FALLBACK
|
||||
"\uE04B>\u09C7\u09BE;" // VOWEL SIGN O
|
||||
"\uE04C>\u09C7\u09D7;" // VOWEL SIGN AU
|
||||
"\uE04D>\u09CD;" // SIGN VIRAMA
|
||||
"\uE050>\u0993\u0982;" // InterIndic-Bengali: OM
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\uE055>;" // LENGTH MARK
|
||||
"\uE056>\u09C8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\uE057>\u09D7;" // AU LENGTH MARK
|
||||
"\uE058>\u0995\u09BC;" // FALLBACK
|
||||
"\uE059>\u0996\u09BC;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
|
||||
"\uE05A>\u0997\u09BC;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
|
||||
"\uE05B>\u099C\u09BC;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
|
||||
"\uE05C>\u09A1\u09BC;" // FALLBACK
|
||||
"\uE05D>\u09A2\u09BC;" // LETTER RHA
|
||||
"\uE05E>\u09AB\u09BC;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
|
||||
"\uE05F>\u09AF\u09BC;" // LETTER YYA
|
||||
"\uE060>\u09E0;" // LETTER VOCALIC RR
|
||||
"\uE061>\u09E1;" // LETTER VOCALIC LL
|
||||
"\uE062>\u09E2;" // VOWEL SIGN VOCALIC L
|
||||
"\uE063>\u09E3;" // VOWEL SIGN VOCALIC LL
|
||||
"\uE064>\u0964;" // DANDA
|
||||
"\uE065>\u0965;" // DOUBLE DANDA
|
||||
"\uE066>\u09E6;" // DIGIT ZERO
|
||||
"\uE067>\u09E7;" // DIGIT ONE
|
||||
"\uE068>\u09E8;" // DIGIT TWO
|
||||
"\uE069>\u09E9;" // DIGIT THREE
|
||||
"\uE06A>\u09EA;" // DIGIT FOUR
|
||||
"\uE06B>\u09EB;" // DIGIT FIVE
|
||||
"\uE06C>\u09EC;" // DIGIT SIX
|
||||
"\uE06D>\u09ED;" // DIGIT SEVEN
|
||||
"\uE06E>\u09EE;" // DIGIT EIGHT
|
||||
"\uE06F>\u09EF;" // DIGIT NINE
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u09F0;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u09F1;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>\u09F2;" // RUPEE MARK
|
||||
"\ue074>\u09F3;" // RUPEE SIGN
|
||||
"\ue075>\u09F4;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>\u09F5;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>\u09F6;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>\u09F7;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>\u09F8;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>\u09F9;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>\u09FA;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u09AC;" // FALLBACK FOR ORIYA LETTER WA
|
||||
"0 > \u09E6;" // FALLBACK FOR TAMIL
|
||||
"1 > \u09E7;"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,174 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Devanagari.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Devanagari
|
||||
|
||||
t_InterIndic_Deva {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Devanagari
|
||||
//:: NFD (NFC) ;
|
||||
//Rules for Decomposed characters
|
||||
"\ue028\ue03c > \u0929;" //\ue029
|
||||
"\ue030\ue03c > \u0931;" //\ue031
|
||||
"\ue033\ue03c > \u0934;" //\ue034
|
||||
"\ue015\ue03c > \u0958;" //\ue058 LETTER QA (For Urdu)
|
||||
"\ue016\ue03c > \u0959;" //\ue059 LETTER KHHA (For Urdu)
|
||||
"\ue017\ue03c > \u095a;" //\ue05a LETTER GHHA (For Urdu)
|
||||
"\ue01c\ue03c > \u095b;" //\ue05b LETTER ZA (For Urdu)
|
||||
"\ue021\ue03c > \u095c;" //\ue05c LETTER DDDHA (pronounced RRA)
|
||||
"\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA)
|
||||
"\ue02b\ue03c > \u095e;" //\ue05e LETTER FA
|
||||
"\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA
|
||||
|
||||
//Decomposed compatibility transliterations
|
||||
"\ue012\ue057>\u0914;" // FALLBACK FOR TAMIL AU
|
||||
"0 > \u0966;" // FALLBACK FOR TAMIL
|
||||
"1 > \u0967;"
|
||||
|
||||
"\ue055>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK
|
||||
"\ue056>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK
|
||||
"\ue057>;" // FALLBACK BLOW AWAY TAMIL AU LENGTH MARK
|
||||
|
||||
"\ue001 > \u0901;" // SIGN CANDRABINDU
|
||||
"\ue002 > \u0902;" // SIGN ANUSVARA
|
||||
"\ue003 > \u0903;" // SIGN VISARGA
|
||||
"\ue004 > \u0904;" // SIGN SHORT A
|
||||
"\ue005 > \u0905;" // LETTER A
|
||||
"\ue006 > \u0906;" // LETTER AA
|
||||
"\ue007 > \u0907;" // LETTER I
|
||||
"\ue008 > \u0908;" // LETTER II
|
||||
"\ue009 > \u0909;" // LETTER U
|
||||
"\ue00a > \u090a;" // LETTER UU
|
||||
"\ue00b > \u090b;" // LETTER VOCALIC R
|
||||
"\ue00c > \u090c;" // LETTER VOCALIC L
|
||||
"\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds)
|
||||
"\ue00e > \u090e;" // LETTER SHORT E(For Southern Scripts)
|
||||
"\ue00f > \u090f;" // LETTER E
|
||||
"\ue010 > \u0910;" // LETTER AI
|
||||
"\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds)
|
||||
"\ue012 > \u0912;" // LETTER SHORT O (For Southern Scripts)
|
||||
"\ue013 > \u0913;" // LETTER O
|
||||
"\ue014 > \u0914;" // LETTER AU
|
||||
"\ue015 > \u0915;" // LETTER KA
|
||||
"\ue016 > \u0916;" // LETTER KHA
|
||||
"\ue017 > \u0917;" // LETTER GA
|
||||
"\ue018 > \u0918;" // LETTER GHA
|
||||
"\ue019 > \u0919;" // LETTER NGA
|
||||
"\ue01a > \u091a;" // LETTER CA
|
||||
"\ue01b > \u091b;" // LETTER CHA
|
||||
"\ue01c > \u091c;" // LETTER JA
|
||||
"\ue01d > \u091d;" // LETTER JHA
|
||||
"\ue01e > \u091e;" // LETTER NYA
|
||||
"\ue01f > \u091f;" // LETTER TTA
|
||||
"\ue020 > \u0920;" // LETTER TTHA
|
||||
"\ue021 > \u0921;" // LETTER DDA
|
||||
"\ue022 > \u0922;" // LETTER DDHA
|
||||
"\ue023 > \u0923;" // LETTER NNA
|
||||
"\ue024 > \u0924;" // LETTER TA
|
||||
"\ue025 > \u0925;" // LETTER THA
|
||||
"\ue026 > \u0926;" // LETTER DA
|
||||
"\ue027 > \u0927;" // LETTER DHA
|
||||
"\ue028 > \u0928;" // LETTER NA
|
||||
"\ue029 > \u0929;" // LETTER NNNA
|
||||
"\ue02a > \u092a;" // LETTER PA
|
||||
"\ue02b > \u092b;" // LETTER PHA
|
||||
"\ue02c > \u092c;" // LETTER BA
|
||||
"\ue02d > \u092d;" // LETTER BHA
|
||||
"\ue02e > \u092e;" // LETTER MA
|
||||
"\ue02f > \u092f;" // LETTER YA
|
||||
"\ue030 > \u0930;" // LETTER RA
|
||||
"\ue031 > \u0931;" // LETTER RRA (Eyelash RA for Southern scripts)
|
||||
//\ue031 > \u0930;
|
||||
"\ue032 > \u0932;" // LETTER LA
|
||||
"\ue033 > \u0933;" // LETTER LLA
|
||||
"\ue034 > \u0934;" // LETTER LLLA (LLLA for Southern scripts)
|
||||
//\ue034 > \u0933;
|
||||
"\ue035 > \u0935;" // LETTER VA
|
||||
"\ue036 > \u0936;" // LETTER SHA
|
||||
"\ue037 > \u0937;" // LETTER SSA
|
||||
"\ue038 > \u0938;" // LETTER SA
|
||||
"\ue039 > \u0939;" // LETTER HA
|
||||
"\ue03c > \u093c;" // SIGN NUKTA
|
||||
"\ue03d > \u093d;" // SIGN AVAGRAHA
|
||||
"\ue03e > \u093e;" // VOWEL SIGN AA
|
||||
"\ue03f > \u093f;" // VOWEL SIGN I
|
||||
"\ue040 > \u0940;" // VOWEL SIGN II
|
||||
"\ue041 > \u0941;" // VOWEL SIGN U
|
||||
"\ue042 > \u0942;" // VOWEL SIGN UU
|
||||
"\ue043 > \u0943;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045 > \u0945;" // VOWEL SIGN CANDRA E
|
||||
"\ue046 > \u0946;" // VOWEL SIGN SHORT E
|
||||
"\ue047 > \u0947;" // VOWEL SIGN E
|
||||
"\ue048 > \u0948;" // VOWEL SIGN AI
|
||||
"\ue049 > \u0949;" // VOWEL SIGN CANDRA O
|
||||
"\ue04a > \u094a;" // VOWEL SIGN SHORT O
|
||||
"\ue04b > \u094b;" // VOWEL SIGN O
|
||||
"\ue04c > \u094c;" // VOWEL SIGN AU
|
||||
"\ue04d > \u094d;" // SIGN VIRAMA
|
||||
"\ue050 > \u0950;" // OM
|
||||
"\ue051 > \u0951;" // STRESS SIGN UDATTA
|
||||
"\ue052 > \u0952;" // STRESS SIGN ANUDATTA
|
||||
"\ue053 > \u0953;" // GRAVE ACCENT
|
||||
"\ue054 > \u0954;" // ACUTE ACCENT
|
||||
"\ue058 > \u0958;" // LETTER QA (For Urdu)
|
||||
"\ue059 > \u0959;" // LETTER KHHA (For Urdu)
|
||||
"\ue05a > \u095a;" // LETTER GHHA (For Urdu)
|
||||
"\ue05b > \u095b;" // LETTER ZA (For Urdu)
|
||||
"\ue05c > \u095c;" // LETTER DDDHA (pronounced RRA)
|
||||
"\ue05d > \u095d;" // LETTER RHA (pronounced RRHA)
|
||||
"\ue05e > \u095e;" // LETTER FA
|
||||
"\ue05f > \u095f;" // LETTER YYA
|
||||
"\ue060 > \u0960;" // LETTER VOCALIC RR
|
||||
"\ue061 > \u0961;" // LETTER VOCALIC LL
|
||||
"\ue062 > \u0962;" // VOWEL SIGN VOCALIC L
|
||||
"\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL
|
||||
"\ue064 > \u0964;" // DANDA
|
||||
"\ue065 > \u0965;" // DOUBLE DANDA
|
||||
"\ue066 > \u0966;" // DIGIT ZERO
|
||||
"\ue067 > \u0967;" // DIGIT ONE
|
||||
"\ue068 > \u0968;" // DIGIT TWO
|
||||
"\ue069 > \u0969;" // DIGIT THREE
|
||||
"\ue06a > \u096a;" // DIGIT FOUR
|
||||
"\ue06b > \u096b;" // DIGIT FIVE
|
||||
"\ue06c > \u096c;" // DIGIT SIX
|
||||
"\ue06d > \u096d;" // DIGIT SEVEN
|
||||
"\ue06e > \u096e;" // DIGIT EIGHT
|
||||
"\ue06f > \u096f;" // DIGIT NINE
|
||||
|
||||
"\ue070>\u0970;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0930;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0930;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>\u0930\u0942;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u0935;" // FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
// \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
// :: NFC;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,154 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Gujarati.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Gujarati
|
||||
|
||||
t_InterIndic_Gujr {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Gujarati
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0a81;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0a82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0a83;" // SIGN VISARGA
|
||||
"\uE004>\u0a85;" // FALLBACK TO LETTER A
|
||||
"\ue005>\u0a85;" // LETTER A
|
||||
"\ue006>\u0a86;" // LETTER AA
|
||||
"\ue007>\u0a87;" // LETTER I
|
||||
"\ue008>\u0a88;" // LETTER II
|
||||
"\ue009>\u0a89;" // LETTER U
|
||||
"\ue00a>\u0a8a;" // LETTER UU
|
||||
"\ue00b>\u0a8b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0a8c;" // LETTER VOCALIC L
|
||||
"\ue00d>\u0a8d;" // GUJARATI VOWEL CANDRA E
|
||||
"\ue00e>\u0a8f;" // FALLBACK
|
||||
"\ue00f>\u0a8f;" // InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
|
||||
"\ue010>\u0a90;" // LETTER AI
|
||||
"\ue011>\u0a91;" // FALLBACK
|
||||
"\ue012>\u0a93;" // FALLBACK
|
||||
"\ue013>\u0a93;" // UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
|
||||
"\ue014>\u0a94;" // LETTER AU
|
||||
"\ue015>\u0a95;" // LETTER KA
|
||||
"\ue016>\u0a96;" // LETTER KHA
|
||||
"\ue017>\u0a97;" // LETTER GA
|
||||
"\ue018>\u0a98;" // LETTER GHA
|
||||
"\ue019>\u0a99;" // LETTER NGA
|
||||
"\ue01a>\u0a9a;" // LETTER CA
|
||||
"\ue01b>\u0a9b;" // LETTER CHA
|
||||
"\ue01c>\u0a9c;" // LETTER JA
|
||||
"\ue01d>\u0a9d;" // LETTER JHA
|
||||
"\ue01e>\u0a9e;" // LETTER NYA
|
||||
"\ue01f>\u0a9f;" // LETTER TTA
|
||||
"\ue020>\u0aa0;" // LETTER TTHA
|
||||
"\ue021>\u0aa1;" // LETTER DDA
|
||||
"\ue022>\u0aa2;" // LETTER DDHA
|
||||
"\ue023>\u0aa3;" // LETTER NNA
|
||||
"\ue024>\u0aa4;" // LETTER TA
|
||||
"\ue025>\u0aa5;" // LETTER THA
|
||||
"\ue026>\u0aa6;" // LETTER DA
|
||||
"\ue027>\u0aa7;" // LETTER DHA
|
||||
"\ue028>\u0aa8;" // LETTER NA
|
||||
"\ue029>\u0aa8\u0abc;" // FALLBACK to NA+NUKTA
|
||||
"\ue02a>\u0aaa;" // LETTER PA
|
||||
"\ue02b>\u0aab;" // LETTER PHA
|
||||
"\ue02c>\u0aac;" // LETTER BA
|
||||
"\ue02d>\u0aad;" // LETTER BHA
|
||||
"\ue02e>\u0aae;" // LETTER MA
|
||||
"\ue02f>\u0aaf;" // LETTER YA
|
||||
"\ue030>\u0ab0;" // LETTER RA
|
||||
"\ue031>\u0ab0\u0abc;" // FALLBACK
|
||||
"\ue032>\u0ab2;" // LETTER LA
|
||||
"\ue033>\u0ab3;" // LETTER LLA
|
||||
"\ue034>\u0ab3\u0abc;" // LETTER LLLA>LETTER LLA+NUKTA
|
||||
"\ue035>\u0ab5;" // LETTER VA
|
||||
"\ue036>\u0ab6;" // LETTER SHA
|
||||
"\ue037>\u0ab7;" // LETTER SSA
|
||||
"\ue038>\u0ab8;" // LETTER SA
|
||||
"\ue039>\u0ab9;" // LETTER HA
|
||||
"\ue03c>\u0abc;" // SIGN NUKTA
|
||||
"\ue03d>\u0abd;" // SIGN AVAGRAHA
|
||||
"\ue03e>\u0abe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0abf;" // VOWEL SIGN I
|
||||
"\ue040>\u0ac0;" // VOWEL SIGN II
|
||||
"\ue041>\u0ac1;" // VOWEL SIGN U
|
||||
"\ue042>\u0ac2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0ac5;" // VOWEL SIGN CANDRA E
|
||||
"\ue046>\u0ac7;" // FALLBACK
|
||||
"\ue047>\u0ac7;" // InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
|
||||
"\ue048>\u0ac8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0ac9;" // VOWEL SIGN CANDRA O
|
||||
"\ue04a>\u0acb;" // FALLBACK
|
||||
"\ue04b>\u0acb;" // UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
|
||||
"\ue04c>\u0acc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0acd;" // SIGN VIRAMA
|
||||
"\ue050>\u0ad0;" // OM
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\ue055>;" // UNMAPPED InterIndic-Gujarati: LENGTH MARK
|
||||
"\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue058>\u0a95\u0abc;" // FALLBACK
|
||||
"\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
"\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
"\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
"\ue05c>\u0aa1\u0abc;" // FALLBACK
|
||||
"\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
"\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
"\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
|
||||
"\ue060>\u0ae0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0ae1;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0abf\u0abc;" // REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
"\ue063>\u0ac0\u0abc;" // REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
"\uE064>\u0964;" // DANDA
|
||||
"\uE065>\u0965;" // DOUBLE DANDA
|
||||
"\ue066>\u0ae6;" // DIGIT ZERO
|
||||
"\ue067>\u0ae7;" // DIGIT ONE
|
||||
"\ue068>\u0ae8;" // DIGIT TWO
|
||||
"\ue069>\u0ae9;" // DIGIT THREE
|
||||
"\ue06a>\u0aea;" // DIGIT FOUR
|
||||
"\ue06b>\u0aeb;" // DIGIT FIVE
|
||||
"\ue06c>\u0aec;" // DIGIT SIX
|
||||
"\ue06d>\u0aed;" // DIGIT SEVEN
|
||||
"\ue06e>\u0aee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0aef;" // DIGIT NINE
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0ab0;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0ab0;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u0ab5;" // FALLBACK FOR ORIYA LETTER WA
|
||||
"0 > \u0ae6;" // FALLBACK FOR TAMIL
|
||||
"1 > \u0ae7;"
|
||||
|
||||
//\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,163 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Gurmukhi.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Gurmukhi
|
||||
|
||||
t_InterIndic_Guru {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Gurmukhi
|
||||
//:: NFD (NFC) ;
|
||||
"$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];"
|
||||
"$consonant = [\u0A15-\u0A39];"
|
||||
|
||||
"\ue001>\u0A01;" // SIGN CHANDRABINDU
|
||||
//rules for BINDI
|
||||
|
||||
// Anusvara is equivalent to BINDI when preceeded by a vowel
|
||||
"$vowel{\ue002>\u0a02;" // SIGN ANUSVARA (\u0a02 = SIGN BINDI)
|
||||
// else is equivalent to TIPPI
|
||||
"$consonant{\ue002>\u0a70;" // SIGN TIPPI
|
||||
"\ue002>\u0a02;"
|
||||
|
||||
"\ue003>;" // FALLBACK BLOW AWAY SIGN VISARGA
|
||||
"\uE004>\u0a05;" // FALLBACK TO LETTER A
|
||||
"\ue005>\u0a05;" // LETTER A
|
||||
"\ue006>\u0a06;" // LETTER AA
|
||||
"\ue007>\u0a07;" // LETTER I
|
||||
"\ue008>\u0a08;" // LETTER II
|
||||
"\ue009>\u0a09;" // LETTER U
|
||||
"\ue00a>\u0a0a;" // LETTER UU
|
||||
"\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
"\ue00c>\u0a33;" // FALLBACK
|
||||
"\ue00d>\u0a0f;" // FALLBACK
|
||||
"\ue00e>\u0a0f;" // FALLBACK
|
||||
"\ue00f>\u0a0f;" // LETTER EE
|
||||
"\ue010>\u0a10;" // LETTER AI
|
||||
"\ue011>\u0a13;" // FALLBACK
|
||||
"\ue012>\u0a13;" // FALLBACK
|
||||
"\ue013>\u0a13;" // LETTER OO
|
||||
"\ue014>\u0a14;" // LETTER AU
|
||||
"\ue015>\u0a15;" // LETTER KA
|
||||
"\ue016>\u0a16;" // LETTER KHA
|
||||
"\ue017>\u0a17;" // LETTER GA
|
||||
"\ue018>\u0a18;" // LETTER GHA
|
||||
"\ue019>\u0a19;" // LETTER NGA
|
||||
"\ue01a>\u0a1a;" // LETTER CA
|
||||
"\ue01b>\u0a1b;" // LETTER CHA
|
||||
"\ue01c>\u0a1c;" // LETTER JA
|
||||
"\ue01d>\u0a1d;" // LETTER JHA
|
||||
"\ue01e>\u0a1e;" // LETTER NYA
|
||||
"\ue01f>\u0a1f;" // LETTER TTA
|
||||
"\ue020>\u0a20;" // LETTER TTHA
|
||||
"\ue021>\u0a21;" // LETTER DDA
|
||||
"\ue022>\u0a22;" // LETTER DDHA
|
||||
"\ue023>\u0a23;" // LETTER NNA
|
||||
"\ue024>\u0a24;" // LETTER TA
|
||||
"\ue025>\u0a25;" // LETTER THA
|
||||
"\ue026>\u0a26;" // LETTER DA
|
||||
"\ue027>\u0a27;" // LETTER DHA
|
||||
"\ue028>\u0a28;" // LETTER NA
|
||||
"\ue029>\u0a28\u0a3c;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0a2a;" // LETTER PA
|
||||
"\ue02b>\u0a2b;" // LETTER PHA
|
||||
"\ue02c>\u0a2c;" // LETTER BA
|
||||
"\ue02d>\u0a2d;" // LETTER BHA
|
||||
"\ue02e>\u0a2e;" // LETTER MA
|
||||
"\ue02f>\u0a2f;" // LETTER YA
|
||||
"\ue030>\u0a30;" // LETTER RA
|
||||
"\ue031>\u0a30\u0a3c;" // FALLBACK LETTER RA+NUKTA
|
||||
"\ue032>\u0a32;" // LETTER LA
|
||||
"\ue033>\u0a33;" // LETTER LLA
|
||||
"\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0a35;" // LETTER VA
|
||||
"\ue036>\u0a36;" // LETTER SHA
|
||||
"\ue037>\u0a36;" // REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
|
||||
"\ue038>\u0a38;" // LETTER SA
|
||||
"\ue039>\u0a39;" // LETTER HA
|
||||
"\ue03c>\u0a3c;" // SIGN NUKTA
|
||||
"\ue03d>;" // FALLBACK BLOW AWAY SIGN AVAGRAHA
|
||||
"\ue03e>\u0a3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0a3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0a40;" // VOWEL SIGN II
|
||||
"\ue041>\u0a41;" // VOWEL SIGN U
|
||||
"\ue042>\u0a42;" // VOWEL SIGN UU
|
||||
"\ue043>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R
|
||||
"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
|
||||
"\ue046>\u0a47;" // FALLABCK
|
||||
"\ue047>\u0a47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0a48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
|
||||
"\ue04a>\u0a4b;" // FALLBACK
|
||||
"\ue04b>\u0a4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0a4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0a4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0a0f\u0a02;" // FALLBACK to OO+BINDI : OM
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK
|
||||
"\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue058>\u0a15\u0a3c;" // FALLBACK RA+ NUKTA
|
||||
"\ue059>\u0a59;" // LETTER KHHA
|
||||
"\ue05a>\u0a5a;" // LETTER GHHA
|
||||
"\ue05b>\u0a5b;" // LETTER ZA
|
||||
"\ue05c>\u0a5c;" // LETTER RRA
|
||||
"\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
"\ue05e>\u0a5e;" // LETTER FA
|
||||
"\ue05f>\u0a2f\u0a3c;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
"\ue061>\u0a32\u0a3c;" //
|
||||
"\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
"\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
"\uE064>\u0964;" // DANDA
|
||||
"\uE065>\u0965;" // DOUBLE DANDA
|
||||
"\ue066>\u0a66;" // DIGIT ZERO
|
||||
"\ue067>\u0a67;" // DIGIT ONE
|
||||
"\ue068>\u0a68;" // DIGIT TWO
|
||||
"\ue069>\u0a69;" // DIGIT THREE
|
||||
"\ue06a>\u0a6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0a6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0a6c;" // DIGIT SIX
|
||||
"\ue06d>\u0a6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0a6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0a6f;" // DIGIT NINE
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0a30;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0a30;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>\u0a70;" // TIPPI
|
||||
"\uE07D>\u0a71;" // ADDAK
|
||||
"\uE07E>\u0a72;" // IRI
|
||||
"\uE07F>\u0a73;" // URA
|
||||
"\uE080>\u0a74;" // EK ONKAR
|
||||
"\uE081>\u0a35;" // FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
"0 > \u0a66;" // FALLBACK FOR TAMIL
|
||||
"1 > \u0a67;"
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,157 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Kannada.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Kannada
|
||||
|
||||
t_InterIndic_Knda {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Kannada
|
||||
//:: NFD (NFC) ;
|
||||
"\ue033\ue03c>\u0cde;" // LETTER FA
|
||||
"\ue001>\u0c82;" // REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
"\ue002>\u0c82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0c83;" // SIGN VISARGA
|
||||
"\uE004>\u0c85;" // FALLBACK TO LETTER A
|
||||
"\ue005>\u0c85;" // LETTER A
|
||||
"\ue006>\u0c86;" // LETTER AA
|
||||
"\ue007>\u0c87;" // LETTER I
|
||||
"\ue008>\u0c88;" // LETTER II
|
||||
"\ue009>\u0c89;" // LETTER U
|
||||
"\ue00a>\u0c8a;" // LETTER UU
|
||||
"\ue00b>\u0c8b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0c8c;" // LETTER VOCALIC L
|
||||
"\ue00d>\u0c8e;" // LETTER E
|
||||
"\ue00e>\u0c8e;" // FALLBACK
|
||||
"\ue00f>\u0c8f;" // LETTER EE
|
||||
"\ue010>\u0c90;" // LETTER AI
|
||||
"\ue011>\u0c92;" // FALLBACK
|
||||
"\ue012>\u0c92;" // LETTER O
|
||||
"\ue013>\u0c93;" // LETTER OO
|
||||
"\ue014>\u0c94;" // LETTER AU
|
||||
"\ue015>\u0c95;" // LETTER KA
|
||||
"\ue016>\u0c96;" // LETTER KHA
|
||||
"\ue017>\u0c97;" // LETTER GA
|
||||
"\ue018>\u0c98;" // LETTER GHA
|
||||
"\ue019>\u0c99;" // LETTER NGA
|
||||
"\ue01a>\u0c9a;" // LETTER CA
|
||||
"\ue01b>\u0c9b;" // LETTER CHA
|
||||
"\ue01c>\u0c9c;" // LETTER JA
|
||||
"\ue01d>\u0c9d;" // LETTER JHA
|
||||
"\ue01e>\u0c9e;" // LETTER NYA
|
||||
"\ue01f>\u0c9f;" // LETTER TTA
|
||||
"\ue020>\u0ca0;" // LETTER TTHA
|
||||
"\ue021>\u0ca1;" // LETTER DDA
|
||||
"\ue022>\u0ca2;" // LETTER DDHA
|
||||
"\ue023>\u0ca3;" // LETTER NNA
|
||||
"\ue024>\u0ca4;" // LETTER TA
|
||||
"\ue025>\u0ca5;" // LETTER THA
|
||||
"\ue026>\u0ca6;" // LETTER DA
|
||||
"\ue027>\u0ca7;" // LETTER DHA
|
||||
"\ue028>\u0ca8;" // LETTER NA
|
||||
"\ue029>\u0ca8;" // REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0caa;" // LETTER PA
|
||||
"\ue02b>\u0cab;" // LETTER PHA
|
||||
"\ue02c>\u0cac;" // LETTER BA
|
||||
"\ue02d>\u0cad;" // LETTER BHA
|
||||
"\ue02e>\u0cae;" // LETTER MA
|
||||
"\ue02f>\u0caf;" // LETTER YA
|
||||
"\ue030\ue03c>\u0cb1;"
|
||||
"\ue030>\u0cb0;" // LETTER RA
|
||||
"\ue031>\u0cb1;" // LETTER RRA
|
||||
"\ue032>\u0cb2;" // LETTER LA
|
||||
"\ue033>\u0cb3;" // LETTER LLA
|
||||
"\ue034>\u0cde;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0cb5;" // LETTER VA
|
||||
"\ue036>\u0cb6;" // LETTER SHA
|
||||
"\ue037>\u0cb7;" // LETTER SSA
|
||||
"\ue038>\u0cb8;" // LETTER SA
|
||||
"\ue039>\u0cb9;" // LETTER HA
|
||||
|
||||
"\ue03c>\u0cbc;" // NUKTA
|
||||
"\ue03d>\u0cbd;" // AVAGRAHA
|
||||
|
||||
"\ue03e>\u0cbe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0cbf;" // VOWEL SIGN I
|
||||
"\ue040>\u0cc0;" // VOWEL SIGN II
|
||||
"\ue041>\u0cc1;" // VOWEL SIGN U
|
||||
"\ue042>\u0cc2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
"\ue046>\u0cc6;" // VOWEL SIGN E
|
||||
"\ue047>\u0cc7;" // VOWEL SIGN EE
|
||||
"\ue048>\u0cc8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
"\ue04a>\u0cca;" // VOWEL SIGN O
|
||||
"\ue04b>\u0ccb;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0ccc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0ccd;" // SIGN VIRAMA
|
||||
"\ue050>\u0c93\u0c82;" // REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\ue055>\u0cd5;" // LENGTH MARK
|
||||
"\ue056>\u0cd6;" // AI LENGTH MARK
|
||||
"\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue058>\u0c95;" // FALLBACK
|
||||
"\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
|
||||
"\ue05c>\u0ca2;" // FALLBACK
|
||||
"\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0cde;" // LETTER FA
|
||||
"\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0ce0;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0ce1;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0cbf;" // REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
"\ue063>\u0cc0;" // REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
"\ue064>'.' ;" // FALLBACK FOR DANDA
|
||||
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
|
||||
"\ue066>\u0ce6;" // DIGIT ZERO
|
||||
"\ue067>\u0ce7;" // DIGIT ONE
|
||||
"\ue068>\u0ce8;" // DIGIT TWO
|
||||
"\ue069>\u0ce9;" // DIGIT THREE
|
||||
"\ue06a>\u0cea;" // DIGIT FOUR
|
||||
"\ue06b>\u0ceb;" // DIGIT FIVE
|
||||
"\ue06c>\u0cec;" // DIGIT SIX
|
||||
"\ue06d>\u0ced;" // DIGIT SEVEN
|
||||
"\ue06e>\u0cee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0cef;" // DIGIT NINE
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0cb0;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0cb0;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u0cb5;" // FALLBACK FOR ORIYA LETTER WA
|
||||
"0 > \u0ce6;" // FALLBACK FOR TAMIL
|
||||
"1 > \u0ce7;"
|
||||
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,545 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Latin.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Latin
|
||||
|
||||
t_InterIndic_Latn {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Latin
|
||||
//\u0e00 reserved
|
||||
//consonants
|
||||
"$chandrabindu=\ue001;"
|
||||
"$anusvara=\ue002;"
|
||||
"$visarga=\ue003;"
|
||||
//\u0e004 reserved
|
||||
// w<vowel> represents the stand-alone form
|
||||
"$wa=\ue005;"
|
||||
"$waa=\ue006;"
|
||||
"$wi=\ue007;"
|
||||
"$wii=\ue008;"
|
||||
"$wu=\ue009;"
|
||||
"$wuu=\ue00a;"
|
||||
"$wr=\ue00b;"
|
||||
"$wl=\ue00c;"
|
||||
"$wce=\ue00d;" // LETTER CANDRA E
|
||||
"$wse=\ue00e;" // LETTER SHORT E
|
||||
"$we=\ue00f;" // \u090f LETTER E
|
||||
"$wai=\ue010;"
|
||||
"$wco=\ue011;" // LETTER CANDRA O
|
||||
"$wso=\ue012;" // LETTER SHORT O
|
||||
"$wo=\ue013;" // \u0913 LETTER O
|
||||
"$wau=\ue014;"
|
||||
"$ka=\ue015;"
|
||||
"$kha=\ue016;"
|
||||
"$ga=\ue017;"
|
||||
"$gha=\ue018;"
|
||||
"$nga=\ue019;"
|
||||
"$ca=\ue01a;"
|
||||
"$cha=\ue01b;"
|
||||
"$ja=\ue01c;"
|
||||
"$jha=\ue01d;"
|
||||
"$nya=\ue01e;"
|
||||
"$tta=\ue01f;"
|
||||
"$ttha=\ue020;"
|
||||
"$dda=\ue021;"
|
||||
"$ddha=\ue022;"
|
||||
"$nna=\ue023;"
|
||||
"$ta=\ue024;"
|
||||
"$tha=\ue025;"
|
||||
"$da=\ue026;"
|
||||
"$dha=\ue027;"
|
||||
"$na=\ue028;"
|
||||
"$ena=\ue029;" //compatibility
|
||||
"$pa=\ue02a;"
|
||||
"$pha=\ue02b;"
|
||||
"$ba=\ue02c;"
|
||||
"$bha=\ue02d;"
|
||||
"$ma=\ue02e;"
|
||||
"$ya=\ue02f;"
|
||||
"$ra=\ue030;"
|
||||
"$vva=\ue081;"
|
||||
"$rra=\ue031;"
|
||||
"$la=\ue032;"
|
||||
"$lla=\ue033;"
|
||||
"$ela=\ue034;" //compatibility
|
||||
"$va=\ue035;"
|
||||
"$sha=\ue036;"
|
||||
"$ssa=\ue037;"
|
||||
"$sa=\ue038;"
|
||||
"$ha=\ue039;"
|
||||
//\u093a Reserved
|
||||
//\u093b Reserved
|
||||
"$nukta=\ue03c;"
|
||||
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
||||
// <vowel> represents the dependent form
|
||||
"$aa=\ue03e;"
|
||||
"$i=\ue03f;"
|
||||
"$ii=\ue040;"
|
||||
"$u=\ue041;"
|
||||
"$uu=\ue042;"
|
||||
"$rh=\ue043;"
|
||||
"$lh=\ue044;"
|
||||
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
||||
"$se=\ue046;" //VOWEL SIGN SHORT E
|
||||
"$e=\ue047;"
|
||||
"$ai=\ue048;"
|
||||
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
||||
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
||||
"$o=\ue04b;" // \u094b
|
||||
"$au=\ue04c;"
|
||||
"$virama=\ue04d;"
|
||||
// \u094e Reserved
|
||||
// \u094f Reserved
|
||||
"$om=\ue050;" // OM
|
||||
"\ue051>;" // UNMAPPED STRESS SIGN UDATTA
|
||||
"\ue052>;" // UNMAPPED STRESS SIGN ANUDATTA
|
||||
"\ue053>;" // UNMAPPED GRAVE ACCENT
|
||||
"\ue054>;" // UNMAPPED ACUTE ACCENT
|
||||
"$lm = \ue055;"// Telugu Length Mark
|
||||
"$ailm=\ue056;"// AI Length Mark
|
||||
"$aulm=\ue057;"// AU Length Mark
|
||||
//urdu compatibity forms
|
||||
"$uka=\ue058;"
|
||||
"$ukha=\ue059;"
|
||||
"$ugha=\ue05a;"
|
||||
"$ujha=\ue05b;"
|
||||
"$uddha=\ue05c;"
|
||||
"$udha=\ue05d;"
|
||||
"$ufa=\ue05e;"
|
||||
"$uya=\ue05f;"
|
||||
"$wrr=\ue060;"
|
||||
"$wll=\ue061;"
|
||||
"$rrh=\ue062;"
|
||||
"$llh=\ue063;"
|
||||
"$danda=\ue064;"
|
||||
"$doubleDanda=\ue065;"
|
||||
"$zero=\ue066;" // DIGIT ZERO
|
||||
"$one=\ue067;" // DIGIT ONE
|
||||
"$two=\ue068;" // DIGIT TWO
|
||||
"$three=\ue069;" // DIGIT THREE
|
||||
"$four=\ue06a;" // DIGIT FOUR
|
||||
"$five=\ue06b;" // DIGIT FIVE
|
||||
"$six=\ue06c;" // DIGIT SIX
|
||||
"$seven=\ue06d;" // DIGIT SEVEN
|
||||
"$eight=\ue06e;" // DIGIT EIGHT
|
||||
"$nine=\ue06f;" // DIGIT NINE
|
||||
|
||||
// \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
||||
"$depVowelBelow=[\ue041-\ue044];"
|
||||
// $x was originally called '&'; $z was '%'
|
||||
"$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];"
|
||||
"$z=[bcdfghjklmnpqrstvwxyz];"
|
||||
"$vowels=[aeiour\u0304\u0325\u0306];"
|
||||
"$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];"
|
||||
//#####################################################################
|
||||
// convert from Native letters to Latin letters
|
||||
//#####################################################################
|
||||
//transliterations for anusvara
|
||||
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
|
||||
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
|
||||
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
|
||||
"$anusvara} [$ta$tha$da$dha$na] > n ;"
|
||||
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
|
||||
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
|
||||
"$anusvara> m\u0307;"
|
||||
|
||||
// Urdu compatibility
|
||||
"$ya$nukta}$x > y\u0307 ;"
|
||||
"$ya$nukta$virama > y\u0307 ;"
|
||||
"$ya$nukta > y\u0307a ;"
|
||||
|
||||
"$la$nukta }$x > l\u0331 ;"
|
||||
"$la$nukta$virama > l\u0331 ;"
|
||||
"$la$nukta > l\u0331a ;"
|
||||
|
||||
"$na$nukta }$x > n\u0331 ;"
|
||||
"$na$nukta$virama > n\u0331 ;"
|
||||
"$na$nukta > n\u0331a ;"
|
||||
|
||||
"$ena }$x > n\u0331 ;"
|
||||
"$ena$virama > n\u0331 ;"
|
||||
"$ena > n\u0331a ;"
|
||||
"$uka > qa ;"
|
||||
"$ka$nukta }$x > q ;"
|
||||
"$ka$nukta$virama > q ;"
|
||||
"$ka$nukta > qa ;"
|
||||
"$kha$nukta }$x > k\u0331h\u0331 ;"
|
||||
"$kha$nukta$virama > k\u0331h\u0331 ;"
|
||||
"$kha$nukta > k\u0331h\u0331a ;"
|
||||
"$ukha$virama > k\u0331h\u0331;"
|
||||
"$ukha > k\u0331h\u0331a;"
|
||||
"$ugha > g\u0307a ;"
|
||||
"$ga$nukta }$x > g\u0307 ;"
|
||||
"$ga$nukta$virama > g\u0307 ;"
|
||||
"$ga$nukta > g\u0307a ;"
|
||||
|
||||
"$ujha > za ;"
|
||||
"$ja$nukta }$x > z ;"
|
||||
"$ja$nukta$virama > z ;"
|
||||
"$ja$nukta > za ;"
|
||||
"$ddha$nukta}$x > r\u0323h ;"
|
||||
"$ddha$nukta$virama > r\u0323h ;"
|
||||
"$ddha$nukta > r\u0323ha;"
|
||||
|
||||
"$uddha}$x > r\u0323 ;"
|
||||
"$uddha$virama > r\u0323 ;"
|
||||
"$uddha > r\u0323a;"
|
||||
|
||||
"$udha > r\u0323a ;"
|
||||
"$dda$nukta}$x > r\u0323 ;"
|
||||
"$dda$nukta$virama > r\u0323 ;"
|
||||
"$dda$nukta > r\u0323a ;"
|
||||
"$pha$nukta }$x > f ;"
|
||||
"$pha$nukta$virama > f ;"
|
||||
"$pha$nukta > fa ;"
|
||||
"$ufa }$x > f ;"
|
||||
"$ufa$virama > f ;"
|
||||
"$ufa > fa ;"
|
||||
|
||||
"$ra$nukta}$x > r\u0331;"
|
||||
"$ra$nukta$virama > r\u0331;"
|
||||
"$ra$nukta > r\u0331a;"
|
||||
"$lla$nukta}$x > l\u0331;"
|
||||
"$lla$nukta$virama > l\u0331;"
|
||||
"$lla$nukta > l\u0331a;"
|
||||
|
||||
"$ela}$x > l\u0331;"
|
||||
"$ela$virama > l\u0331;"
|
||||
"$ela > l\u0331a;"
|
||||
|
||||
"$uya}$x > y\u0307;"
|
||||
"$uya$virama > y\u0307;"
|
||||
"$uya > y\u0307a;"
|
||||
|
||||
|
||||
// normal consonants
|
||||
"$ka$virama}$ha>k'';"
|
||||
"$ka}$x>k;"
|
||||
"$ka$virama>k;"
|
||||
"$ka>ka;"
|
||||
"$kha}$x>kh;"
|
||||
"$kha$virama>kh;"
|
||||
"$kha>kha;"
|
||||
"$ga$virama}$ha>g'';"
|
||||
"$ga}$x>g;"
|
||||
"$ga$virama>g;"
|
||||
"$ga>ga;"
|
||||
|
||||
"$gha}$x>gh;"
|
||||
"$gha$virama>gh;"
|
||||
"$gha>gha;"
|
||||
|
||||
"$nga}$x>n\u0307;"
|
||||
"$nga$virama>n\u0307;"
|
||||
"$nga>n\u0307a ;"
|
||||
"$ca$virama}$ha>c'';"
|
||||
"$ca}$x>c;"
|
||||
"$ca$virama>c;"
|
||||
"$ca>ca;"
|
||||
|
||||
"$cha}$x>ch;"
|
||||
"$cha$virama>ch;"
|
||||
"$cha>cha;"
|
||||
"$ja$virama}$ha>j'';"
|
||||
"$ja}$x>j;"
|
||||
"$ja$virama>j;"
|
||||
"$ja>ja;"
|
||||
|
||||
"$jha}$x>jh;"
|
||||
"$jha$virama>jh;"
|
||||
"$jha>jha;"
|
||||
|
||||
"$nya }$x>n\u0303 ;"
|
||||
"$nya$virama>n\u0303;"
|
||||
"$nya > n\u0303a ;"
|
||||
|
||||
|
||||
"$tta$virama}$ha>t\u0323'';"
|
||||
"$tta}$x>t\u0323;"
|
||||
"$tta$virama>t\u0323;"
|
||||
"$tta>t\u0323a;"
|
||||
|
||||
"$ttha}$x>t\u0323h;"
|
||||
"$ttha$virama>t\u0323h;"
|
||||
"$ttha>t\u0323ha;"
|
||||
"$dda}$x$ha>d\u0323'';"
|
||||
"$dda}$x>d\u0323;"
|
||||
"$dda$virama>d\u0323;"
|
||||
"$dda>d\u0323a;"
|
||||
|
||||
"$ddha}$x>d\u0323h;"
|
||||
"$ddha$virama>d\u0323h;"
|
||||
"$ddha>d\u0323ha;"
|
||||
|
||||
"$nna}$x>n\u0323 ;"
|
||||
"$nna$virama>n\u0323;"
|
||||
"$nna>n\u0323a ;"
|
||||
|
||||
|
||||
"$ta$virama}$ha>t'';"
|
||||
"$ta$virama}$ttha>t'';"
|
||||
"$ta$virama}$tta>t'';"
|
||||
"$ta$virama}$tha>t'';"
|
||||
"$ta}$x>t;"
|
||||
"$ta$virama>t;"
|
||||
"$ta>ta;"
|
||||
"$tha}$x>th;"
|
||||
"$tha$virama>th;"
|
||||
"$tha>tha;"
|
||||
|
||||
"$da$virama}$ha>d'';"
|
||||
"$da$virama}$ddha>d'';"
|
||||
"$da$virama}$dda>d'';"
|
||||
"$da$virama}$dha>d'';"
|
||||
"$da}$x>d;"
|
||||
"$da$virama>d;"
|
||||
"$da>da;"
|
||||
"$dha}$x>dh;"
|
||||
"$dha$virama>dh;"
|
||||
"$dha>dha;"
|
||||
"$na$virama}$ga>n'';"
|
||||
"$na$virama}$ya>n'';"
|
||||
"$na}$x>n;"
|
||||
"$na$virama>n;"
|
||||
"$na>na;"
|
||||
|
||||
|
||||
"$pa$virama}$ha>p'';"
|
||||
"$pa}$x>p;"
|
||||
"$pa$virama>p;"
|
||||
"$pa>pa;"
|
||||
"$pha}$x>ph;"
|
||||
"$pha$virama>ph;"
|
||||
"$pha>pha;"
|
||||
"$ba$virama}$ha>b'';"
|
||||
"$ba}$x>b;"
|
||||
"$ba$virama>b;"
|
||||
"$ba>ba;"
|
||||
|
||||
"$bha}$x>bh;"
|
||||
"$bha$virama>bh;"
|
||||
"$bha>bha;"
|
||||
|
||||
"$ma$virama}$ma>m'';"
|
||||
"$ma}$x>m;"
|
||||
"$ma$virama>m;"
|
||||
"$ma>ma;"
|
||||
|
||||
"$ya}$x>y;"
|
||||
"$ya$virama>y;"
|
||||
"$ya>ya;"
|
||||
"$ra$virama}$ha>r'';"
|
||||
"$ra}$x>r;"
|
||||
"$ra$virama>r;"
|
||||
"$ra>ra;"
|
||||
"$vva$virama}$ha>w\u0307'';"
|
||||
"$vva}$x>w\u0307;"
|
||||
"$vva$virama>w\u0307;"
|
||||
"$vva>w\u0307a;"
|
||||
"$rra$virama}$ha>r\u0331'';"
|
||||
"$rra}$x>r\u0331;"
|
||||
"$rra$virama>r\u0331;"
|
||||
"$rra>r\u0331a;"
|
||||
"$la$virama}$ha>l'';"
|
||||
"$la}$x>l;"
|
||||
"$la$virama>l;"
|
||||
"$la>la;"
|
||||
"$lla$virama}$ha>l\u0323'';"
|
||||
"$lla}$x>l\u0323;"
|
||||
"$lla$virama>l\u0323;"
|
||||
"$lla>l\u0323a;"
|
||||
"$va}$x>v;"
|
||||
"$va$virama>v;"
|
||||
"$va>va;"
|
||||
"$sa$virama}$ha>s'';"
|
||||
"$sa$virama}$sha>s'';"
|
||||
"$sa$virama}$ssa>s'';"
|
||||
"$sa$virama}$sa>s'';"
|
||||
"$sa}$x>s;"
|
||||
"$sa$virama>s;"
|
||||
|
||||
//for gurmukhi
|
||||
"$sa$nukta}$x>s\u0301;"
|
||||
"$sa$nukta$virama>s\u0301;"
|
||||
"$sa$nukta>s\u0301a;"
|
||||
"$sa>sa;"
|
||||
|
||||
"$sha}$x>s\u0301;"
|
||||
"$sha$virama>s\u0301;"
|
||||
"$sha>s\u0301a;"
|
||||
|
||||
"$ssa}$x>s\u0323;"
|
||||
"$ssa$virama>s\u0323;"
|
||||
"$ssa>s\u0323a;"
|
||||
"$ha}$x>h;"
|
||||
"$ha$virama>h;"
|
||||
"$ha>ha;"
|
||||
|
||||
// dependent vowels (should never occur except following consonants)
|
||||
"$forceIndependentMatra{$aa > \u0314a\u0304 ;"
|
||||
"$forceIndependentMatra{$ai > \u0314ai ;"
|
||||
"$forceIndependentMatra{$au > \u0314au ;"
|
||||
"$forceIndependentMatra{$ii > \u0314i\u0304 ;"
|
||||
"$forceIndependentMatra{$i > \u0314i ;"
|
||||
"$forceIndependentMatra{$uu > \u0314u\u0304 ;"
|
||||
"$forceIndependentMatra{$u > \u0314u ;"
|
||||
"$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;"
|
||||
"$forceIndependentMatra{$rh > \u0314r\u0325 ;"
|
||||
"$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;"
|
||||
"$forceIndependentMatra{$lh > \u0314l\u0325 ;"
|
||||
"$forceIndependentMatra{$e > \u0314e\u0304 ;"
|
||||
"$forceIndependentMatra{$o > \u0314o\u0304 ;"
|
||||
//extra vowels
|
||||
"$forceIndependentMatra{$ce > \u0314e\u0306 ;"
|
||||
"$forceIndependentMatra{$co > \u0314o\u0306 ;"
|
||||
"$forceIndependentMatra{$se > \u0314e ;"
|
||||
"$forceIndependentMatra{$so > \u0314o ;"
|
||||
"$forceIndependentMatra{$nukta >;" // Nukta cannot appear independently or as first character
|
||||
"$forceIndependentMatra{$virama >;" // Virama cannot appear independently or as first character
|
||||
"$aa > a\u0304 ;"
|
||||
"$ai > ai ;"
|
||||
"$au > au ;"
|
||||
"$ii > i\u0304 ;"
|
||||
"$i > i ;"
|
||||
"$uu > u\u0304 ;"
|
||||
"$u > u ;"
|
||||
"$rrh > r\u0325\u0304 ;"
|
||||
"$rh > r\u0325 ;"
|
||||
"$llh > l\u0325\u0304 ;"
|
||||
"$lh > l\u0325 ;"
|
||||
"$e > e\u0304 ;"
|
||||
"$o > o\u0304 ;"
|
||||
//extra vowels
|
||||
"$ce > e\u0306 ;"
|
||||
"$co > o\u0306 ;"
|
||||
"$se > e ;"
|
||||
"$so > o ;"
|
||||
//dependent vowels when following independent vowels. Generally Illegal only for roundtripping
|
||||
"$waa} $x > a\u0304\u0314 ;"
|
||||
"$wai} $x > ai\u0314 ;"
|
||||
"$wau} $x > au\u0314 ;"
|
||||
"$wii} $x > i\u0304\u0314 ;"
|
||||
"$wi } $x > i\u0314 ;"
|
||||
"$wuu} $x > u\u0304\u0314 ;"
|
||||
"$wu } $x > u\u0314 ;"
|
||||
"$wrr} $x > r\u0325\u0304\u0314 ;"
|
||||
"$wr } $x > r\u0325\u0314 ;"
|
||||
"$wll} $x > l\u0325\u0304\u0314 ;"
|
||||
"$wl } $x > l\u0325\u0314 ;"
|
||||
"$we } $x > e\u0304\u0314 ;"
|
||||
"$wo } $x > o\u0304\u0314 ;"
|
||||
"$wa } $x > a\u0314 ;"
|
||||
//extra vowels
|
||||
"$wce} $x > e\u0306\u0314 ;"
|
||||
"$wco} $x > o\u0306\u0314 ;"
|
||||
"$wse} $x > e\u0314 ;"
|
||||
"$wso} $x > o\u0314 ;"
|
||||
"$om} $x > ''om\u0314 ;"
|
||||
|
||||
// independent vowels when preceeded by vowels
|
||||
"$vowels{$waa > ''a\u0304 ;"
|
||||
"$vowels{$wai > ''ai ;"
|
||||
"$vowels{$wau > ''au ;"
|
||||
"$vowels{$wii > ''i\u0304 ;"
|
||||
"$vowels{$wi > ''i ;"
|
||||
"$vowels{$wuu > ''u\u0304 ;"
|
||||
"$vowels{$wu > ''u ;"
|
||||
"$vowels{$wrr > ''r\u0325\u0304 ;"
|
||||
"$vowels{$wr > ''r\u0325 ;"
|
||||
"$vowels{$wll > ''l\u0325\u0304 ;"
|
||||
"$vowels{$wl > ''l\u0325 ;"
|
||||
"$vowels{$we > ''e\u0304 ;"
|
||||
"$vowels{$wo > ''o\u0304 ;"
|
||||
"$vowels{$wa > ''a ;"
|
||||
//extra vowels
|
||||
"$vowels{$wce > ''e\u0306 ;"
|
||||
"$vowels{$wco > ''o\u0306 ;"
|
||||
"$vowels{$wse > ''e ;"
|
||||
"$vowels{$wso > ''o ;"
|
||||
|
||||
// independent vowels (otherwise)
|
||||
"$waa > a\u0304 ;"
|
||||
"$wai > ai ;"
|
||||
"$wau > au ;"
|
||||
"$wii > i\u0304 ;"
|
||||
"$wi > i ;"
|
||||
"$wuu > u\u0304 ;"
|
||||
"$wu > u ;"
|
||||
"$wrr > r\u0325\u0304 ;"
|
||||
"$wr > r\u0325 ;"
|
||||
"$wll > l\u0325\u0304 ;"
|
||||
"$wl > l\u0325 ;"
|
||||
"$we > e\u0304 ;"
|
||||
"$wo > o\u0304 ;"
|
||||
"$wa > a ;"
|
||||
//extra vowels
|
||||
"$wce > e\u0306 ;"
|
||||
"$wco > o\u0306 ;"
|
||||
"$wse > e ;"
|
||||
"$wso > o ;"
|
||||
"$om > ''om ;"
|
||||
|
||||
//stress marks
|
||||
"$avagraha > \u0315;"
|
||||
"$chandrabindu$anusvara>\u0303;"
|
||||
"$chandrabindu > m\u0310;"
|
||||
"$visarga>h\u0323;"
|
||||
//numbers
|
||||
"$zero > 0;"
|
||||
"$one > 1;"
|
||||
"$two > 2;"
|
||||
"$three > 3;"
|
||||
"$four > 4;"
|
||||
"$five > 5;"
|
||||
"$six > 6;"
|
||||
"$seven > 7;"
|
||||
"$eight > 8;"
|
||||
"$nine > 9;"
|
||||
"$lm >;"
|
||||
"$ailm >;"
|
||||
"$aulm >;"
|
||||
|
||||
"$danda>'.';"
|
||||
"$doubleDanda>'.';"
|
||||
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
// LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue071}$x>ra;"
|
||||
"\ue071$virama>r;"
|
||||
"\ue071>ra;"
|
||||
// LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue072}$x>ra;"
|
||||
"\ue072$virama>r;"
|
||||
"\ue072>ra;"
|
||||
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE004>;" // DEVANAGARI VOWEL SIGN SHORT A
|
||||
|
||||
}
|
||||
}
|
|
@ -1,157 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Malayalam.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Malayalam
|
||||
|
||||
t_InterIndic_Mlym {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Malayalam
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0d02;" // REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
"\ue002>\u0d02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0d03;" // SIGN VISARGA
|
||||
"\uE004>\u0d05;" // FALLBACK TO LETTER A
|
||||
"\ue005>\u0d05;" // LETTER A
|
||||
"\ue006>\u0d06;" // LETTER AA
|
||||
"\ue007>\u0d07;" // LETTER I
|
||||
"\ue008>\u0d08;" // LETTER II
|
||||
"\ue009>\u0d09;" // LETTER U
|
||||
"\ue00a>\u0d0a;" // LETTER UU
|
||||
"\ue00b>\u0d0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0d0c;" // LETTER VOCALIC L
|
||||
"\ue00d>\u0d0e;" // FALLLBACK LETTER E
|
||||
"\ue00e>\u0d0e;" // LETTER E
|
||||
"\ue00f>\u0d0f;" // LETTER EE
|
||||
"\ue010>\u0d10;" // LETTER AI
|
||||
"\ue011>\u0d12;" // FALLBACK TO O
|
||||
"\ue012>\u0d12;" // LETTER O
|
||||
"\ue013>\u0d13;" // LETTER OO
|
||||
"\ue014>\u0d14;" // LETTER AU
|
||||
"\ue015>\u0d15;" // LETTER KA
|
||||
"\ue016>\u0d16;" // LETTER KHA
|
||||
"\ue017>\u0d17;" // LETTER GA
|
||||
"\ue018>\u0d18;" // LETTER GHA
|
||||
"\ue019>\u0d19;" // LETTER NGA
|
||||
"\ue01a>\u0d1a;" // LETTER CA
|
||||
"\ue01b>\u0d1b;" // LETTER CHA
|
||||
"\ue01c>\u0d1c;" // LETTER JA
|
||||
"\ue01d>\u0d1d;" // LETTER JHA
|
||||
"\ue01e>\u0d1e;" // LETTER NYA
|
||||
"\ue01f>\u0d1f;" // LETTER TTA
|
||||
"\ue020>\u0d20;" // LETTER TTHA
|
||||
"\ue021>\u0d21;" // LETTER DDA
|
||||
"\ue022>\u0d22;" // LETTER DDHA
|
||||
"\ue023>\u0d23;" // LETTER NNA
|
||||
"\ue024>\u0d24;" // LETTER TA
|
||||
"\ue025>\u0d25;" // LETTER THA
|
||||
"\ue026>\u0d26;" // LETTER DA
|
||||
"\ue027>\u0d27;" // LETTER DHA
|
||||
"\ue028>\u0d28;" // LETTER NA
|
||||
"\ue029>\u0d28;" // REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0d2a;" // LETTER PA
|
||||
"\ue02b>\u0d2b;" // LETTER PHA
|
||||
"\ue02c>\u0d2c;" // LETTER BA
|
||||
"\ue02d>\u0d2d;" // LETTER BHA
|
||||
"\ue02e>\u0d2e;" // LETTER MA
|
||||
"\ue02f>\u0d2f;" // LETTER YA
|
||||
"\ue030\ue03c>\u0d31;"
|
||||
"\ue030>\u0d30;" // LETTER RA
|
||||
"\ue031>\u0d31;" // LETTER RRA
|
||||
"\ue032>\u0d32;" // LETTER LA
|
||||
"\ue033\ue03c>\u0d34;"
|
||||
"\ue033>\u0d33;" // LETTER LLA
|
||||
"\ue034>\u0d34;" // LETTER LLLA
|
||||
"\ue035>\u0d35;" // LETTER VA
|
||||
"\ue036>\u0d36;" // LETTER SHA
|
||||
"\ue037>\u0d37;" // LETTER SSA
|
||||
"\ue038>\u0d38;" // LETTER SA
|
||||
"\ue039>\u0d39;" // LETTER HA
|
||||
|
||||
"\ue03c>;" // FALLBACK BLOW AWAY NUKTA
|
||||
"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
"\ue03e>\u0d3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0d3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0d40;" // VOWEL SIGN II
|
||||
"\ue041>\u0d41;" // VOWEL SIGN U
|
||||
"\ue042>\u0d42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0d43;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
"\ue046>\u0d46;" // VOWEL SIGN E
|
||||
"\ue047>\u0d47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0d48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
|
||||
"\ue04a>\u0d4a;" // VOWEL SIGN O
|
||||
"\ue04b>\u0d4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0d4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0d4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0d13\u0d02;" // UNMAPPED InterIndic-Malayalam: OM
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK
|
||||
"\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0d57;" // AU LENGTH MARK
|
||||
"\ue058>\u0d15;" // FALLBACK
|
||||
"\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
|
||||
"\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
|
||||
"\ue05c>\u0d21;" // FALLBACK
|
||||
"\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0d60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0d61;" // LETTER VOCALIC LL
|
||||
"\ue062>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L
|
||||
"\ue063>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL
|
||||
"\ue064>'.' ;" // FALLBACK FOR DANDA
|
||||
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
|
||||
"\ue066>\u0d66;" // DIGIT ZERO
|
||||
"\ue067>\u0d67;" // DIGIT ONE
|
||||
"\ue068>\u0d68;" // DIGIT TWO
|
||||
"\ue069>\u0d69;" // DIGIT THREE
|
||||
"\ue06a>\u0d6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0d6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0d6c;" // DIGIT SIX
|
||||
"\ue06d>\u0d6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0d6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0d6f;" // DIGIT NINE
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0d30;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0d30;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u0d35;" // FALLBACK FOR ORIYA LETTER WA
|
||||
"0 > \u0d66;" // FALLBACK FOR TAMIL
|
||||
"1 > \u0d67;"
|
||||
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,153 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Oriya.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Oriya
|
||||
|
||||
t_InterIndic_Orya {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Oriya
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0b01;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0b02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0b03;" // SIGN VISARGA
|
||||
"\uE004>\u0b05;" // FALLBACK TO LETTER A
|
||||
"\ue005>\u0b05;" // LETTER A
|
||||
"\ue006>\u0b06;" // LETTER AA
|
||||
"\ue007>\u0b07;" // LETTER I
|
||||
"\ue008>\u0b08;" // LETTER II
|
||||
"\ue009>\u0b09;" // LETTER U
|
||||
"\ue00a>\u0b0a;" // LETTER UU
|
||||
"\ue00b>\u0b0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0b0c;" // LETTER VOCALIC L
|
||||
"\ue00d>\u0b0f;" // FALLBACK
|
||||
"\ue00e>\u0b0f;" // FALLBACK
|
||||
"\ue00f>\u0b0f;" // LETTER E
|
||||
"\ue010>\u0b10;" // LETTER AI
|
||||
"\ue011>\u0b13;" // FALLBACK
|
||||
"\ue012>\u0b13;" // FALLBACK
|
||||
"\ue013>\u0b13;" // FALLBACK LETTER OO (\u0b13 = LETTER O)
|
||||
"\ue014>\u0b14;" // LETTER AU
|
||||
"\ue015>\u0b15;" // LETTER KA
|
||||
"\ue016>\u0b16;" // LETTER KHA
|
||||
"\ue017>\u0b17;" // LETTER GA
|
||||
"\ue018>\u0b18;" // LETTER GHA
|
||||
"\ue019>\u0b19;" // LETTER NGA
|
||||
"\ue01a>\u0b1a;" // LETTER CA
|
||||
"\ue01b>\u0b1b;" // LETTER CHA
|
||||
"\ue01c>\u0b1c;" // LETTER JA
|
||||
"\ue01d>\u0b1d;" // LETTER JHA
|
||||
"\ue01e>\u0b1e;" // LETTER NYA
|
||||
"\ue01f>\u0b1f;" // LETTER TTA
|
||||
"\ue020>\u0b20;" // LETTER TTHA
|
||||
"\ue021>\u0b21;" // LETTER DDA
|
||||
"\ue022>\u0b22;" // LETTER DDHA
|
||||
"\ue023>\u0b23;" // LETTER NNA
|
||||
"\ue024>\u0b24;" // LETTER TA
|
||||
"\ue025>\u0b25;" // LETTER THA
|
||||
"\ue026>\u0b26;" // LETTER DA
|
||||
"\ue027>\u0b27;" // LETTER DHA
|
||||
"\ue028>\u0b28;" // LETTER NA
|
||||
"\ue029>\u0b28\u0b3c;" // FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0b2a;" // LETTER PA
|
||||
"\ue02b>\u0b2b;" // LETTER PHA
|
||||
"\ue02c>\u0b2c;" // LETTER BA
|
||||
"\ue02d>\u0b2d;" // LETTER BHA
|
||||
"\ue02e>\u0b2e;" // LETTER MA
|
||||
"\ue02f>\u0b2f;" // LETTER YA
|
||||
"\ue030>\u0b30;" // LETTER RA
|
||||
"\ue031>\u0b5c;" // LETTER RRA
|
||||
"\ue032>\u0b32;" // LETTER LA
|
||||
"\ue033>\u0b33;" // LETTER LLA
|
||||
"\ue034>\u0b33\u0b3c;" // FALLBACK LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0b35;" // LETTER VA
|
||||
"\ue036>\u0b36;" // LETTER SHA
|
||||
"\ue037>\u0b37;" // LETTER SSA
|
||||
"\ue038>\u0b38;" // LETTER SA
|
||||
"\ue039>\u0b39;" // LETTER HA
|
||||
"\ue03c>\u0b3c;" // SIGN NUKTA
|
||||
"\ue03d>\u0b3d;" // SIGN AVAGRAHA
|
||||
"\ue03e>\u0b3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0b3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0b40;" // VOWEL SIGN II
|
||||
"\ue041>\u0b41;" // VOWEL SIGN U
|
||||
"\ue042>\u0b42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0b43;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0b43\u0b3c;" // FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
|
||||
"\ue045>\u0b47;" // FALLBACK
|
||||
"\ue046>\u0b47;" // FALLBACK
|
||||
"\ue047>\u0b47;" // VOWEL SIGN E
|
||||
"\ue048>\u0b48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0b4b;" // FALLBACK
|
||||
"\ue04a>\u0b4b;" // FALLBACK
|
||||
"\ue04b>\u0b4b;" // VOWEL SIGN E
|
||||
"\ue04c>\u0b4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0b4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0b13\u0b01;" // FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\ue055>;" // UNMAPPED InterIndic-Oriya: LENGTH MARK
|
||||
"\ue056>\u0b56;" // AI LENGTH MARK
|
||||
"\ue057>\u0b57;" // AU LENGTH MARK
|
||||
"\ue059>\u0b16\u0b3c;" // FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
"\ue058>\u0b15\u0b3c;" // FALLBACK
|
||||
"\ue05a>\u0b17\u0b3c;" // FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
"\ue05b>\u0b1c\u0b3c;" // FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
"\ue05c>\u0b21\u0b3c;" // FALLBACK
|
||||
"\ue05d>\u0b5d;" // LETTER RHA
|
||||
"\ue05e>\u0b2b\u0b3c;" // FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
"\ue05f>\u0b5f;" // LETTER YYA
|
||||
"\ue060>\u0b60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0b61;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0b56\u0b3c;" // FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
|
||||
"\ue063>\u0b57\u0b3c;" // FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
|
||||
"\uE064>\u0964;" // DANDA
|
||||
"\uE065>\u0965;" // DOUBLE DANDA
|
||||
"\ue066>\u0b66;" // DIGIT ZERO
|
||||
"\ue067>\u0b67;" // DIGIT ONE
|
||||
"\ue068>\u0b68;" // DIGIT TWO
|
||||
"\ue069>\u0b69;" // DIGIT THREE
|
||||
"\ue06a>\u0b6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0b6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0b6c;" // DIGIT SIX
|
||||
"\ue06d>\u0b6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0b6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0b6f;" // DIGIT NINE
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0b30;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0b30;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>\u0B70;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u0B71;" // LETTER WA
|
||||
"0 > \u0b66;" // FALLBACK FOR TAMIL
|
||||
"1 > \u0b67;"
|
||||
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,167 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Tamil.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Tamil
|
||||
|
||||
t_InterIndic_Taml {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Tamil
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0b82;" // FALLBACK SIGN CANDRABINDU
|
||||
"\ue002>\u0b82;" // SIGN ANUSVARA
|
||||
"\ue003>\u0b83;" // SIGN VISARGA
|
||||
"\uE004>\u0b85;" // FALLBACK TO LETTER A
|
||||
"\ue005>\u0b85;" // LETTER A
|
||||
"\ue006>\u0b86;" // LETTER AA
|
||||
"\ue007>\u0b87;" // LETTER I
|
||||
"\ue008>\u0b88;" // LETTER II
|
||||
"\ue009>\u0b89;" // LETTER U
|
||||
"\ue00a>\u0b8a;" // LETTER UU
|
||||
"\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
"\ue00c>\u0bb2;" // FALLBACK LETTER LA
|
||||
"\ue00d>\u0b8f;" // FALLBACK
|
||||
"\ue00e>\u0b8e;" // LETTER E
|
||||
"\ue00f>\u0b8f;" // LETTER EE
|
||||
"\ue010>\u0b90;" // LETTER AI
|
||||
"\ue011>\u0b92;" // FALLBACK
|
||||
"\ue012>\u0b92;" // LETTER O
|
||||
"\ue013>\u0b93;" // LETTER OO
|
||||
"\ue014>\u0b94;" // LETTER AU
|
||||
"\ue015>\u0b95;" // LETTER KA
|
||||
"\ue016>\u0b95;" // REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
|
||||
"\ue017>\u0b95;" // REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
|
||||
"\ue018>\u0b95;" // REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
|
||||
"\ue019>\u0b99;" // LETTER NGA
|
||||
"\ue01a>\u0b9a;" // LETTER CA
|
||||
"\ue01b>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
|
||||
"\ue01c>\u0b9c;" // LETTER JA
|
||||
"\ue01d>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
|
||||
"\ue01e>\u0b9e;" // LETTER NYA
|
||||
"\ue01f>\u0b9f;" // LETTER TTA
|
||||
"\ue020>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
|
||||
"\ue021>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
|
||||
"\ue022>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
|
||||
"\ue023>\u0ba3;" // LETTER NNA
|
||||
"\ue024>\u0ba4;" // LETTER TA
|
||||
"\ue025>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
|
||||
"\ue026>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
|
||||
"\ue027>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
|
||||
"\ue028\ue03c>\u0ba9;"
|
||||
"\ue028>\u0ba8;" // LETTER NA
|
||||
"\ue029>\u0ba9;" // LETTER NNNA
|
||||
"\ue02a>\u0baa;" // LETTER PA
|
||||
"\ue02b>\u0baa;" // REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
|
||||
"\ue02c>\u0baa;" // REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
|
||||
"\ue02d>\u0baa;" // REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
|
||||
"\ue02e>\u0bae;" // LETTER MA
|
||||
"\ue02f>\u0baf;" // LETTER YA
|
||||
"\ue030\ue03c>\u0bb1;"
|
||||
"\ue030>\u0bb0;" // LETTER RA
|
||||
"\ue031>\u0bb1;" // LETTER RRA
|
||||
"\ue032>\u0bb2;" // LETTER LA
|
||||
"\ue033\ue03c>\u0bb4;"
|
||||
"\ue033>\u0bb3;" // LETTER LLA
|
||||
"\ue034>\u0bb4;" // LETTER LLLA
|
||||
"\ue035>\u0bb5;" // LETTER VA
|
||||
"\ue036>\u0bb7;" // REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
|
||||
"\ue037>\u0bb7;" // LETTER SSA
|
||||
"\ue038>\u0bb8;" // LETTER SA
|
||||
"\ue039>\u0bb9;" // LETTER HA
|
||||
|
||||
"\ue03c>;" // FALLBACK BLOW AWAY NUKTA
|
||||
"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
"\ue03e>\u0bbe;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0bbf;" // VOWEL SIGN I
|
||||
"\ue040>\u0bc0;" // VOWEL SIGN II
|
||||
"\ue041>\u0bc1;" // VOWEL SIGN U
|
||||
"\ue042>\u0bc2;" // VOWEL SIGN UU
|
||||
"\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
"\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
"\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
"\ue046>\u0bc6;" // VOWEL SIGN E
|
||||
"\ue047>\u0bc7;" // VOWEL SIGN EE
|
||||
"\ue048>\u0bc8;" // VOWEL SIGN AI
|
||||
"\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
|
||||
"\ue04a>\u0bca;" // VOWEL SIGN O
|
||||
"\ue04b>\u0bcb;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0bcc;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0bcd;" // SIGN VIRAMA
|
||||
"\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\ue055>;" // UNMAPPED InterIndic-Tamil: LENGTH MARK
|
||||
"\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
"\ue057>\u0bd7;" // AU LENGTH MARK
|
||||
"\ue058>\u0b95;" // FALLBACK
|
||||
"\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
|
||||
"\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
|
||||
"\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
|
||||
"\ue05c>\u0ba4;" // FALLBACK
|
||||
"\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
|
||||
"\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
|
||||
"\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
"\ue061>\u0bb3;" // FALLBACK LETTER LLA
|
||||
"\ue062>\u0bbf;" // FALLBACK VOWEL SIGN VOCALIC L
|
||||
"\ue063>\u0bc0;" // FALLBACK VOWEL SIGN VOCALIC LL
|
||||
"\ue064>'.' ;" // FALLBACK FOR DANDA
|
||||
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
|
||||
|
||||
"\ue066>\u0030;" // FALLBACK DIGIT ZERO
|
||||
|
||||
"\ue067\ue066\ue066\ue066>\u0bF2;"
|
||||
"\ue067\ue066\ue066>\u0bf1;"
|
||||
"\ue067\ue066>\u0bF0;"
|
||||
|
||||
"\ue067>\u0be7;" // DIGIT ONE
|
||||
"\ue068>\u0be8;" // DIGIT TWO
|
||||
"\ue069>\u0be9;" // DIGIT THREE
|
||||
"\ue06a>\u0bea;" // DIGIT FOUR
|
||||
"\ue06b>\u0beb;" // DIGIT FIVE
|
||||
"\ue06c>\u0bec;" // DIGIT SIX
|
||||
"\ue06d>\u0bed;" // DIGIT SEVEN
|
||||
"\ue06e>\u0bee;" // DIGIT EIGHT
|
||||
"\ue06f>\u0bef;" // DIGIT NINE
|
||||
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0bc0;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0bc0;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u0bb5;" // FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
"1000 >\u0BF2;" // NUMBER ONE THOUSAND
|
||||
"100 >\u0BF1;" // NUMBER ONE HUNDRED
|
||||
"10 >\u0BF0;" // NUMBER TEN
|
||||
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,157 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_InterIndic_Telugu.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic_Telugu
|
||||
|
||||
t_InterIndic_Telu {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// InterIndic-Telugu
|
||||
//:: NFD (NFC) ;
|
||||
"\ue001>\u0c01;" // SIGN CANDRABINDU
|
||||
"\ue002>\u0c02;" // SIGN ANUSVARA
|
||||
"\ue003>\u0c03;" // SIGN VISARGA
|
||||
"\uE004>\u0c05;" // FALLBACK TO LETTER A
|
||||
"\ue005>\u0c05;" // LETTER A
|
||||
"\ue006>\u0c06;" // LETTER AA
|
||||
"\ue007>\u0c07;" // LETTER I
|
||||
"\ue008>\u0c08;" // LETTER II
|
||||
"\ue009>\u0c09;" // LETTER U
|
||||
"\ue00a>\u0c0a;" // LETTER UU
|
||||
"\ue00b>\u0c0b;" // LETTER VOCALIC R
|
||||
"\ue00c>\u0c0c;" // LETTER VOCALIC L
|
||||
"\ue00d>\u0c0E;" // FALLBACK MAPPING
|
||||
"\ue00e>\u0c0E;" // LETTER E
|
||||
"\ue00f>\u0c0f;" // LETTER EE
|
||||
"\ue010>\u0c10;" // LETTER AI
|
||||
"\ue011>\u0c12;" // FALBACK MAPPING
|
||||
"\ue012>\u0c12;" // LETTER O
|
||||
"\ue013>\u0c13;" // LETTER OO
|
||||
"\ue014>\u0c14;" // LETTER AU
|
||||
"\ue015>\u0c15;" // LETTER KA
|
||||
"\ue016>\u0c16;" // LETTER KHA
|
||||
"\ue017>\u0c17;" // LETTER GA
|
||||
"\ue018>\u0c18;" // LETTER GHA
|
||||
"\ue019>\u0c19;" // LETTER NGA
|
||||
"\ue01a>\u0c1a;" // LETTER CA
|
||||
"\ue01b>\u0c1b;" // LETTER CHA
|
||||
"\ue01c>\u0c1c;" // LETTER JA
|
||||
"\ue01d>\u0c1d;" // LETTER JHA
|
||||
"\ue01e>\u0c1e;" // LETTER NYA
|
||||
"\ue01f>\u0c1f;" // LETTER TTA
|
||||
"\ue020>\u0c20;" // LETTER TTHA
|
||||
"\ue021>\u0c21;" // LETTER DDA
|
||||
"\ue022>\u0c22;" // LETTER DDHA
|
||||
"\ue023>\u0c23;" // LETTER NNA
|
||||
"\ue024>\u0c24;" // LETTER TA
|
||||
"\ue025>\u0c25;" // LETTER THA
|
||||
"\ue026>\u0c26;" // LETTER DA
|
||||
"\ue027>\u0c27;" // LETTER DHA
|
||||
"\ue028>\u0c28;" // LETTER NA
|
||||
"\ue029>\u0c28;" // REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
|
||||
"\ue02a>\u0c2a;" // LETTER PA
|
||||
"\ue02b>\u0c2b;" // LETTER PHA
|
||||
"\ue02c>\u0c2c;" // LETTER BA
|
||||
"\ue02d>\u0c2d;" // LETTER BHA
|
||||
"\ue02e>\u0c2e;" // LETTER MA
|
||||
"\ue02f>\u0c2f;" // LETTER YA
|
||||
"\ue030\ue03c>\u0c31;"
|
||||
"\ue030>\u0c30;" // LETTER RA
|
||||
"\ue031>\u0c31;" // LETTER RRA
|
||||
"\ue032>\u0c32;" // LETTER LA
|
||||
"\ue033>\u0c33;" // LETTER LLA
|
||||
"\ue034>\u0c33;" // REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
|
||||
"\ue035>\u0c35;" // LETTER VA
|
||||
"\ue036>\u0c36;" // LETTER SHA
|
||||
"\ue037>\u0c37;" // LETTER SSA
|
||||
"\ue038>\u0c38;" // LETTER SA
|
||||
"\ue039>\u0c39;" // LETTER HA
|
||||
|
||||
"\ue03c>;" // FALLBACK BLOW AWAY NUKTA
|
||||
"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
"\ue03e>\u0c3e;" // VOWEL SIGN AA
|
||||
"\ue03f>\u0c3f;" // VOWEL SIGN I
|
||||
"\ue040>\u0c40;" // VOWEL SIGN II
|
||||
"\ue041>\u0c41;" // VOWEL SIGN U
|
||||
"\ue042>\u0c42;" // VOWEL SIGN UU
|
||||
"\ue043>\u0c43;" // VOWEL SIGN VOCALIC R
|
||||
"\ue044>\u0c44;" // VOWEL SIGN VOCALIC RR
|
||||
"\ue045>\u0c46;" // VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
"\ue046>\u0c46;" // VOWEL SIGN E
|
||||
"\ue047>\u0c47;" // VOWEL SIGN EE
|
||||
"\ue048>\u0c48;" // VOWEL SIGN AI
|
||||
"\ue049>\u0c4a;" // REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
"\ue04a>\u0c4a;" // VOWEL SIGN O
|
||||
"\ue04b>\u0c4b;" // VOWEL SIGN OO
|
||||
"\ue04c>\u0c4c;" // VOWEL SIGN AU
|
||||
"\ue04d>\u0c4d;" // SIGN VIRAMA
|
||||
"\ue050>\u0c13\u0c02;" // REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
|
||||
"\ue051>;"
|
||||
"\ue052>;"
|
||||
"\ue053>;"
|
||||
"\ue054>;"
|
||||
"\ue055>\u0c55;" // LENGTH MARK
|
||||
"\ue056>\u0c56;" // AI LENGTH MARK
|
||||
"\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
"\ue058>\u0c15;" // REMAP
|
||||
"\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
|
||||
"\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
|
||||
"\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
|
||||
"\ue05c>\u0c22;" // REMAP
|
||||
"\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
|
||||
"\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
|
||||
"\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
|
||||
"\ue060>\u0c60;" // LETTER VOCALIC RR
|
||||
"\ue061>\u0c61;" // LETTER VOCALIC LL
|
||||
"\ue062>\u0c3f;" // REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
"\ue063>\u0c40;" // REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
"\ue064>'.' ;" // FALLBACK FOR DANDA
|
||||
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
|
||||
"\ue066>\u0c66;" // DIGIT ZERO
|
||||
"\ue067>\u0c67;" // DIGIT ONE
|
||||
"\ue068>\u0c68;" // DIGIT TWO
|
||||
"\ue069>\u0c69;" // DIGIT THREE
|
||||
"\ue06a>\u0c6a;" // DIGIT FOUR
|
||||
"\ue06b>\u0c6b;" // DIGIT FIVE
|
||||
"\ue06c>\u0c6c;" // DIGIT SIX
|
||||
"\ue06d>\u0c6d;" // DIGIT SEVEN
|
||||
"\ue06e>\u0c6e;" // DIGIT EIGHT
|
||||
"\ue06f>\u0c6f;" // DIGIT NINE
|
||||
|
||||
"\ue070>;" // ABBREVIATION SIGN
|
||||
"\ue071>\u0c30;" // LETTER RA WITH MIDDLE DIAGONAL
|
||||
"\ue072>\u0c30;" // LETTER RA WITH LOWER DIAGONAL
|
||||
"\ue073>;" // RUPEE MARK
|
||||
"\ue074>;" // RUPEE SIGN
|
||||
"\ue075>;" // CURRENCY NUMERATOR ONE
|
||||
"\ue076>;" // CURRENCY NUMERATOR TWO
|
||||
"\ue077>;" // CURRENCY NUMERATOR THREE
|
||||
"\ue078>;" // CURRENCY NUMERATOR FOUR
|
||||
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
|
||||
"\ue07B>;" // ISSHAR
|
||||
"\uE07C>;" // TIPPI
|
||||
"\uE07D>;" // ADDAK
|
||||
"\uE07E>;" // IRI
|
||||
"\uE07F>;" // URA
|
||||
"\uE080>;" // EK ONKAR
|
||||
"\uE081>\u0c35;" // FALLBACK FOR ORIYA LETTER WA
|
||||
"0 > \u0c66;" // FALLBACK FOR TAMIL
|
||||
"1 > \u0c67;"
|
||||
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,108 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Kannada_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Kannada_InterIndic
|
||||
|
||||
t_Knda_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Kannada-InterIndic
|
||||
"\u0CC6\u0CD5>\uE047;" // VOWEL SIGN EE
|
||||
"\u0CC6\u0CCD\u0CD6>\uE048\ue04d;" // VOWEL SIGN AI
|
||||
"\u0CC6\u0CD6>\uE048;" // VOWEL SIGN AI
|
||||
"\u0CC6\u0CC2\u0CD5>\uE04B;" // VOWEL SIGN OO
|
||||
"\u0CC6\u0CC2>\uE04A;" // VOWEL SIGN O
|
||||
"\u0CBF\u0CD5>\uE040;" // VOWEL SIGN II
|
||||
|
||||
"\u0C82>\uE002;" // SIGN ANUSVARA
|
||||
"\u0C83>\uE003;" // SIGN VISARGA
|
||||
"\u0C85>\uE005;" // LETTER A
|
||||
"\u0C86>\uE006;" // LETTER AA
|
||||
"\u0C87>\uE007;" // LETTER I
|
||||
"\u0C88>\uE008;" // LETTER II
|
||||
"\u0C89>\uE009;" // LETTER U
|
||||
"\u0C8A>\uE00A;" // LETTER UU
|
||||
"\u0C8B>\uE00B;" // LETTER VOCALIC R
|
||||
"\u0C8C>\uE00C;" // LETTER VOCALIC L
|
||||
"\u0C8E>\uE00E;" // LETTER E
|
||||
"\u0C8F>\uE00F;" // LETTER EE
|
||||
"\u0C90>\uE010;" // LETTER AI
|
||||
"\u0C92>\uE012;" // LETTER O
|
||||
"\u0C93>\uE013;" // LETTER OO
|
||||
"\u0C94>\uE014;" // LETTER AU
|
||||
"\u0C95>\uE015;" // LETTER KA
|
||||
"\u0C96>\uE016;" // LETTER KHA
|
||||
"\u0C97>\uE017;" // LETTER GA
|
||||
"\u0C98>\uE018;" // LETTER GHA
|
||||
"\u0C99>\uE019;" // LETTER NGA
|
||||
"\u0C9A>\uE01A;" // LETTER CA
|
||||
"\u0C9B>\uE01B;" // LETTER CHA
|
||||
"\u0C9C>\uE01C;" // LETTER JA
|
||||
"\u0C9D>\uE01D;" // LETTER JHA
|
||||
"\u0C9E>\uE01E;" // LETTER NYA
|
||||
"\u0C9F>\uE01F;" // LETTER TTA
|
||||
"\u0CA0>\uE020;" // LETTER TTHA
|
||||
"\u0CA1>\uE021;" // LETTER DDA
|
||||
"\u0CA2>\uE022;" // LETTER DDHA
|
||||
"\u0CA3>\uE023;" // LETTER NNA
|
||||
"\u0CA4>\uE024;" // LETTER TA
|
||||
"\u0CA5>\uE025;" // LETTER THA
|
||||
"\u0CA6>\uE026;" // LETTER DA
|
||||
"\u0CA7>\uE027;" // LETTER DHA
|
||||
"\u0CA8>\uE028;" // LETTER NA
|
||||
"\u0CAA>\uE02A;" // LETTER PA
|
||||
"\u0CAB>\uE02B;" // LETTER PHA
|
||||
"\u0CAC>\uE02C;" // LETTER BA
|
||||
"\u0CAD>\uE02D;" // LETTER BHA
|
||||
"\u0CAE>\uE02E;" // LETTER MA
|
||||
"\u0CAF>\uE02F;" // LETTER YA
|
||||
"\u0CB0>\uE030;" // LETTER RA
|
||||
"\u0CB1>\uE031;" // LETTER RRA
|
||||
"\u0CB2>\uE032;" // LETTER LA
|
||||
"\u0CB3>\uE033;" // LETTER LLA
|
||||
"\u0CB5>\uE035;" // LETTER VA
|
||||
"\u0CB6>\uE036;" // LETTER SHA
|
||||
"\u0CB7>\uE037;" // LETTER SSA
|
||||
"\u0CB8>\uE038;" // LETTER SA
|
||||
"\u0CB9>\uE039;" // LETTER HA
|
||||
"\u0CBC>\uE03C;" // SIGN NUKTA
|
||||
"\u0CBD>\uE03D;" // AVAGRAHA
|
||||
"\u0CBE>\uE03E;" // VOWEL SIGN AA
|
||||
"\u0CBF>\uE03F;" // VOWEL SIGN I
|
||||
"\u0CC1>\uE041;" // VOWEL SIGN U
|
||||
"\u0CC2>\uE042;" // VOWEL SIGN UU
|
||||
"\u0CC3>\uE043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0CC4>\uE044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0CC6>\uE046;" // VOWEL SIGN E
|
||||
"\u0CCC>\uE04C;" // VOWEL SIGN AU
|
||||
"\u0CCD>\uE04D;" // SIGN VIRAMA
|
||||
"\u0CD5>\uE055;" // LENGTH MARK
|
||||
"\u0CD6>\uE056;" // AI LENGTH MARK
|
||||
"\u0CDE>\uE034;" // LETTER LLLA
|
||||
"\u0CE0>\uE060;" // LETTER VOCALIC RR
|
||||
"\u0CE1>\uE061;" // LETTER VOCALIC LL
|
||||
"\u0CE6>\uE066;" // DIGIT ZERO
|
||||
"\u0CE7>\uE067;" // DIGIT ONE
|
||||
"\u0CE8>\uE068;" // DIGIT TWO
|
||||
"\u0CE9>\uE069;" // DIGIT THREE
|
||||
"\u0CEA>\uE06A;" // DIGIT FOUR
|
||||
"\u0CEB>\uE06B;" // DIGIT FIVE
|
||||
"\u0CEC>\uE06C;" // DIGIT SIX
|
||||
"\u0CED>\uE06D;" // DIGIT SEVEN
|
||||
"\u0CEE>\uE06E;" // DIGIT EIGHT
|
||||
"\u0CEF>\uE06F;" // DIGIT NINE
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,399 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Latin_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_InterIndic
|
||||
|
||||
t_Latn_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-InterIndic
|
||||
//:: NFD;
|
||||
//\u0e00 reserved
|
||||
//consonants
|
||||
"$chandrabindu=\ue001;"
|
||||
"$anusvara=\ue002;"
|
||||
"$visarga=\ue003;"
|
||||
//\u0e004 reserved
|
||||
// w<vowel> represents the stand-alone form
|
||||
"$wa=\ue005;"
|
||||
"$waa=\ue006;"
|
||||
"$wi=\ue007;"
|
||||
"$wii=\ue008;"
|
||||
"$wu=\ue009;"
|
||||
"$wuu=\ue00a;"
|
||||
"$wr=\ue00b;"
|
||||
"$wl=\ue00c;"
|
||||
"$wce=\ue00d;" // LETTER CANDRA E
|
||||
"$wse=\ue00e;" // LETTER SHORT E
|
||||
"$we=\ue00f;" // \u090f LETTER E
|
||||
"$wai=\ue010;"
|
||||
"$wco=\ue011;" // LETTER CANDRA O
|
||||
"$wso=\ue012;" // LETTER SHORT O
|
||||
"$wo=\ue013;" // \u0913 LETTER O
|
||||
"$wau=\ue014;"
|
||||
"$ka=\ue015;"
|
||||
"$kha=\ue016;"
|
||||
"$ga=\ue017;"
|
||||
"$gha=\ue018;"
|
||||
"$nga=\ue019;"
|
||||
"$ca=\ue01a;"
|
||||
"$cha=\ue01b;"
|
||||
"$ja=\ue01c;"
|
||||
"$jha=\ue01d;"
|
||||
"$nya=\ue01e;"
|
||||
"$tta=\ue01f;"
|
||||
"$ttha=\ue020;"
|
||||
"$dda=\ue021;"
|
||||
"$ddha=\ue022;"
|
||||
"$nna=\ue023;"
|
||||
"$ta=\ue024;"
|
||||
"$tha=\ue025;"
|
||||
"$da=\ue026;"
|
||||
"$dha=\ue027;"
|
||||
"$na=\ue028;"
|
||||
"$ena=\ue029;" //compatibility
|
||||
"$pa=\ue02a;"
|
||||
"$pha=\ue02b;"
|
||||
"$ba=\ue02c;"
|
||||
"$bha=\ue02d;"
|
||||
"$ma=\ue02e;"
|
||||
"$ya=\ue02f;"
|
||||
"$ra=\ue030;"
|
||||
"$rra=\ue031;"
|
||||
"$la=\ue032;"
|
||||
"$lla=\ue033;"
|
||||
"$ela=\ue034;" //compatibility
|
||||
"$va=\ue035;"
|
||||
"$vva=\ue081;"
|
||||
"$sha=\ue036;"
|
||||
"$ssa=\ue037;"
|
||||
"$sa=\ue038;"
|
||||
"$ha=\ue039;"
|
||||
//\u093a Reserved
|
||||
//\u093b Reserved
|
||||
"$nukta=\ue03c;"
|
||||
"$avagraha=\ue03d;" // SIGN AVAGRAHA
|
||||
// <vowel> represents the dependent form
|
||||
"$aa=\ue03e;"
|
||||
"$i=\ue03f;"
|
||||
"$ii=\ue040;"
|
||||
"$u=\ue041;"
|
||||
"$uu=\ue042;"
|
||||
"$rh=\ue043;"
|
||||
"$lh=\ue044;"
|
||||
"$ce=\ue045;" //VOWEL SIGN CANDRA E
|
||||
"$se=\ue046;" //VOWEL SIGN SHORT E
|
||||
"$e=\ue047;"
|
||||
"$ai=\ue048;"
|
||||
"$co=\ue049;" // VOWEL SIGN CANDRA O
|
||||
"$so=\ue04a;" // VOWEL SIGN SHORT O
|
||||
"$o=\ue04b;" // \u094b
|
||||
"$au=\ue04c;"
|
||||
"$virama=\ue04d;"
|
||||
// \u094e Reserved
|
||||
// \u094f Reserved
|
||||
"$om = \ue050;" // OM
|
||||
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
// \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
// \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
"$lm = \ue055;"// Telugu Length Mark
|
||||
"$ailm=\ue056;"// AI Length Mark
|
||||
"$aulm=\ue057;"// AU Length Mark
|
||||
//urdu compatibity forms
|
||||
"$uka=\ue058;"
|
||||
"$ukha=\ue059;"
|
||||
"$ugha=\ue05a;"
|
||||
"$ujha=\ue05b;"
|
||||
"$uddha=\ue05c;"
|
||||
"$udha=\ue05d;"
|
||||
"$ufa=\ue05e;"
|
||||
"$uya=\ue05f;"
|
||||
"$wrr=\ue060;"
|
||||
"$wll=\ue061;"
|
||||
"$rrh=\ue062;"
|
||||
"$llh=\ue063;"
|
||||
"$danda=\ue064;"
|
||||
"$doubleDanda=\ue065;"
|
||||
"$zero=\ue066;" // DIGIT ZERO
|
||||
"$one=\ue067;" // DIGIT ONE
|
||||
"$two=\ue068;" // DIGIT TWO
|
||||
"$three=\ue069;" // DIGIT THREE
|
||||
"$four=\ue06a;" // DIGIT FOUR
|
||||
"$five=\ue06b;" // DIGIT FIVE
|
||||
"$six=\ue06c;" // DIGIT SIX
|
||||
"$seven=\ue06d;" // DIGIT SEVEN
|
||||
"$eight=\ue06e;" // DIGIT EIGHT
|
||||
"$nine=\ue06f;" // DIGIT NINE
|
||||
// For all other scripts
|
||||
"$ecp0=\ue070;"
|
||||
"$ecp1=\ue071;"
|
||||
"$ecp2=\ue072;"
|
||||
"$ecp3=\ue073;"
|
||||
"$ecp4=\ue074;"
|
||||
"$ecp5=\ue075;"
|
||||
"$ecp6=\ue076;"
|
||||
"$ecp7=\ue077;"
|
||||
"$ecp8=\ue078;"
|
||||
"$ecp9=\ue079;"
|
||||
"$ecpA=\ue07a;"
|
||||
"$ecpB=\ue07b;"
|
||||
"$ecpC=\ue07c;"
|
||||
"$ecpD=\ue07d;"
|
||||
"$ecpE=\ue07e;"
|
||||
"$ecpF=\ue07f;"
|
||||
// \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
|
||||
"$depVowelBelow=[\ue041-\ue044];"
|
||||
"$endThing=[$danda$doubleDanda];"
|
||||
// $x was originally called '&'; $z was '%'
|
||||
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
|
||||
"$z=[bcdfghjklmnpqrstvwxyz];"
|
||||
"$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];"
|
||||
"\u0315 > $avagraha;"
|
||||
"\u0303>$chandrabindu$anusvara;"
|
||||
"m\u0310>$chandrabindu;"
|
||||
"h\u0323>$visarga;"
|
||||
"x>$ka$virama$sa;"
|
||||
// convert to independent forms at start of word or syllable:
|
||||
// dependent forms for roundtrip
|
||||
"\u0314a\u0304>$aa;"
|
||||
"\u0314ai>$ai;"
|
||||
"\u0314au>$au;"
|
||||
"\u0314ii>$ii;"
|
||||
"\u0314i\u0304>$ii;"
|
||||
"\u0314i>$i;"
|
||||
"\u0314u\u0304>$uu;"
|
||||
"\u0314u>$u;"
|
||||
"\u0314r\u0325\u0304>$rrh;"
|
||||
"\u0314r\u0325>$rh;"
|
||||
"\u0314l\u0325\u0304>$llh;"
|
||||
"\u0314lh>$lh;"
|
||||
"\u0314l\u0325>$lh;"
|
||||
"\u0314e\u0304>$e;"
|
||||
"\u0314o\u0304>$o;"
|
||||
"\u0314a>;"
|
||||
"\u0314e\u0306>$ce;"
|
||||
"\u0314o\u0306>$co;"
|
||||
"\u0314e>$se;"
|
||||
"\u0314o>$so;"
|
||||
|
||||
// preceeded by consonants
|
||||
"$consonants{ a\u0304>$aa;"
|
||||
"$consonants{ ai>$ai;"
|
||||
"$consonants{ au>$au;"
|
||||
"$consonants{ ii>$ii;"
|
||||
"$consonants{ i\u0304>$ii;"
|
||||
"$consonants{ i>$i;"
|
||||
"$consonants{ u\u0304>$uu;"
|
||||
"$consonants{ u>$u;"
|
||||
"$consonants{ r\u0325\u0304>$rrh;"
|
||||
"$consonants{ r\u0325a>$rh;"
|
||||
"$consonants{ r\u0325>$rh;"
|
||||
"$consonants{ l\u0325\u0304>$llh;"
|
||||
"$consonants{ lh>$lh;"
|
||||
"$consonants{ l\u0325>$lh;"
|
||||
"$consonants{ e\u0304>$e;"
|
||||
"$consonants{ o\u0304>$o;"
|
||||
"$consonants{ e\u0306>$ce;"
|
||||
"$consonants{ o\u0306>$co;"
|
||||
"$consonants{ e>$se;"
|
||||
"$consonants{ o>$so;"
|
||||
|
||||
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
||||
"a\u0304>$waa;"
|
||||
"ai>$wai;"
|
||||
"au>$wau;"
|
||||
"i\u0304>$wii;"
|
||||
"i>$wi;"
|
||||
"u\u0304>$wuu;"
|
||||
"u>$wu;"
|
||||
"r\u0325\u0304>$wrr;"
|
||||
"r\u0325>$wr;"
|
||||
"l\u0325\u0304>$wll;"
|
||||
"lh>$wl;"
|
||||
"l\u0325>$wl;"
|
||||
"e\u0304>$we;"
|
||||
"o\u0304>$wo;"
|
||||
"a>$wa;"
|
||||
"e\u0306>$wce;"
|
||||
"o\u0306>$wco;"
|
||||
"e>$wse;"
|
||||
"''om>$om;"
|
||||
"o>$wso;"
|
||||
|
||||
// rules for anusvara
|
||||
"n}r\u0325 > $na|$virama;"
|
||||
"n}l\u0325 > $na|$virama;"
|
||||
"n}na > $na|$virama;"
|
||||
"n\u0307}[kg] > $anusvara;"
|
||||
"n\u0307}n\u0307 > $anusvara;"
|
||||
"n\u0304}[cj] > $anusvara;"
|
||||
"n\u0304}n\u0303 > $anusvara;"
|
||||
"n\u0323}[tdn]\u0323 > $anusvara;"
|
||||
"n}[tdn] > $anusvara;"
|
||||
"m}[pbm] > $anusvara;"
|
||||
"n}[ylvshr] > $anusvara;"
|
||||
"m\u0307 > $anusvara;"
|
||||
|
||||
//urdu compatibility
|
||||
"q>$uka|$virama;"
|
||||
"k\u0331h\u0331>$ukha |$virama;"
|
||||
"g\u0307> $ugha | $virama;"
|
||||
"z > $ujha |$virama;"
|
||||
"f > $ufa|$virama;"
|
||||
|
||||
// dev
|
||||
"y\u0307>$uya|$virama;"
|
||||
"l\u0331>$ela|$virama;"
|
||||
"n\u0331>$ena|$virama;"
|
||||
"n\u0307>$nga|$virama;"
|
||||
"n\u0303>$nya|$virama;"
|
||||
"n\u0323>$nna|$virama;"
|
||||
"t\u0323h>$ttha|$virama;"
|
||||
"t\u0323>$tta|$virama;"
|
||||
"r\u0323h>$udha|$virama;"
|
||||
"r\u0323>$uddha|$virama;"
|
||||
"d\u0323h>$ddha|$virama;"
|
||||
"d\u0323>$dda|$virama;"
|
||||
"kh>$kha|$virama;"
|
||||
"k>$ka|$virama;"
|
||||
"gh>$gha|$virama;"
|
||||
"g>$ga|$virama;"
|
||||
"ch>$cha|$virama;"
|
||||
"c>$ca|$virama;"
|
||||
"jh>$jha|$virama;"
|
||||
"j>$ja|$virama;"
|
||||
"ny>$nya|$virama;"
|
||||
"tth>$ttha|$virama;"
|
||||
"ddh>$ddha|$virama;"
|
||||
"th>$tha|$virama;"
|
||||
"t>$ta|$virama;"
|
||||
"dh>$dha|$virama;"
|
||||
"d>$da|$virama;"
|
||||
"n>$na|$virama;"
|
||||
"ph>$pha|$virama;"
|
||||
"p>$pa|$virama;"
|
||||
"bh>$bha|$virama;"
|
||||
"b>$ba|$virama;"
|
||||
"m>$ma|$virama;"
|
||||
"y>$ya|$virama;"
|
||||
"r\u0331>$rra|$virama;"
|
||||
"r>$ra|$virama;"
|
||||
"l\u0323>$lla|$virama;"
|
||||
"l>$la|$virama;"
|
||||
"v>$va|$virama;"
|
||||
"w\u0307>$vva|$virama;"
|
||||
"w>$va|$virama;"
|
||||
"sh>$sha|$virama;"
|
||||
"ss>$ssa|$virama;"
|
||||
"s\u0323>$ssa|$virama;"
|
||||
"s\u0301>$sha|$virama;"
|
||||
"s>$sa|$virama;"
|
||||
"h>$ha|$virama;"
|
||||
"'.'>$danda;"
|
||||
"$danda'.'>$doubleDanda;"
|
||||
"$depVowelAbove{'~'>$anusvara;"
|
||||
"$depVowelBelow{'~'>$chandrabindu;"
|
||||
// convert to dependent forms after consonant with no vowel:
|
||||
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
||||
//$virama aa>$aa;
|
||||
"$virama a\u0304>$aa;"
|
||||
"$virama ai>$ai;"
|
||||
"$virama au>$au;"
|
||||
"$virama ii>$ii;"
|
||||
"$virama i\u0304>$ii;"
|
||||
"$virama i>$i;"
|
||||
//$virama uu>$uu;
|
||||
"$virama u\u0304>$uu;"
|
||||
"$virama u>$u;"
|
||||
//$virama rrh>$rrh;
|
||||
"$virama r\u0325\u0304>$rrh;"
|
||||
//$virama rh>$rh;
|
||||
"$virama r\u0325a>$rh;"
|
||||
"$virama r\u0325>$rh;"
|
||||
"$virama l\u0325\u0304>$llh;"
|
||||
"$virama lh>$lh;"
|
||||
"$virama l\u0325>$lh;"
|
||||
"$virama e\u0304>$e;"
|
||||
"$virama o\u0304>$o;"
|
||||
"$virama a>;"
|
||||
"$virama e\u0306>$ce;"
|
||||
"$virama o\u0306>$co;"
|
||||
"$virama e>$se;"
|
||||
"$virama o>$so;"
|
||||
|
||||
|
||||
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
||||
//$virama''aa>$waa;
|
||||
"$virama''a\u0304>$waa;"
|
||||
"$virama''ai>$wai;"
|
||||
"$virama''au>$wau;"
|
||||
//$virama''ii>$wii;
|
||||
"$virama''i\u0304>$wii;"
|
||||
"$virama''i>$wi;"
|
||||
//$virama''uu>$wuu;
|
||||
"$virama''u\u0304>$wuu;"
|
||||
"$virama''u>$wu;"
|
||||
//$virama''rrh>$wrr;
|
||||
"$virama''r\u0325\u0304>$wrr;"
|
||||
//$virama''rh>$wr;
|
||||
"$virama''r\u0325>$wr;"
|
||||
"$virama''l\u0325\u0304>$wll;"
|
||||
//$virama''lh>$wl;
|
||||
"$virama''l\u0325>$wl;"
|
||||
"$virama''e\u0304>$we;"
|
||||
"$virama''o\u0304>$wo;"
|
||||
"$virama''a>$wa;"
|
||||
"$virama''e\u0306>$wce;"
|
||||
"$virama''o\u0306>$wco;"
|
||||
"$virama''e>$wse;"
|
||||
"$virama''o>$wso;"
|
||||
// no virama
|
||||
"''a\u0304>$waa;"
|
||||
"''ai>$wai;"
|
||||
"''au>$wau;"
|
||||
"''i\u0304>$wii;"
|
||||
"''i>$wi;"
|
||||
"''u\u0304>$wuu;"
|
||||
"''u>$wu;"
|
||||
"''r\u0325\u0304>$wrr;"
|
||||
"''r\u0325>$wr;"
|
||||
"''l\u0325\u0304>$wll;"
|
||||
"''l\u0325>$wl;"
|
||||
"''e\u0304>$we;"
|
||||
"''o\u0304>$wo;"
|
||||
"''a>$wa;"
|
||||
"''e\u0306>$wce;"
|
||||
"''o\u0306>$wco;"
|
||||
"''e>$wse;"
|
||||
"''o>$wso;"
|
||||
|
||||
"$virama } [$z] > $virama;"
|
||||
"$virama } ' ' > $virama ;"
|
||||
"$virama}$endThing>;"
|
||||
"0>$zero;"
|
||||
"1>$one;"
|
||||
"2>$two;"
|
||||
"3>$three;"
|
||||
"4>$four;"
|
||||
"5>$five;"
|
||||
"6>$six;"
|
||||
"7>$seven;"
|
||||
"8>$eight;"
|
||||
"9>$nine;"
|
||||
"''>;"
|
||||
//:: NFC (NFD) ;
|
||||
}
|
||||
}
|
|
@ -1,538 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Latin_Jamo.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_Jamo
|
||||
|
||||
t_Latn_Jamo {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
//- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
|
||||
//- the INDEX file. This transliterator is, by itself, not
|
||||
//- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
|
||||
//- inverses thereof.
|
||||
|
||||
// Transliteration from Latin characters to Korean script is done in
|
||||
// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
|
||||
// transliteration is done algorithmically following Unicode 3.0
|
||||
// section 3.11. This file implements the Latin to Jamo
|
||||
// transliteration using rules.
|
||||
|
||||
// Jamo occupy the block 1100-11FF. Within this block there are three
|
||||
// groups of characters: initial consonants or choseong (I), medial
|
||||
// vowels or jungseong (M), and trailing consonants or jongseong (F).
|
||||
// Standard Korean syllables are of the form I+M+F*.
|
||||
|
||||
// Section 3.11 describes the use of 'filler' jamo to convert
|
||||
// nonstandard syllables to standard form: the choseong filler 115F and
|
||||
// the junseong filler 1160. In this transliterator, we will not use
|
||||
// 115F or 1160.
|
||||
|
||||
// We will, however, insert two 'null' jamo to make foreign words
|
||||
// conform to Korean syllable structure. These are the null initial
|
||||
// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
|
||||
// we will use the separator in order to disambiguate strings,
|
||||
// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
|
||||
|
||||
// We will not use all of the characters in the jamo block. We will
|
||||
// only use the 19 initials, 21 medials, and 27 finals possessing a
|
||||
// jamo short name as defined in section 4.4 of the Unicode book.
|
||||
|
||||
// Rules of thumb. These guidelines provide the basic framework
|
||||
// for the rules. They are phrased in terms of Latin-Jamo transliteration.
|
||||
// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
|
||||
// just context-free transliteration of jamo to corresponding short names,
|
||||
// with the addition of separators to maintain round-trip integrity
|
||||
// in the context of the Latin-Jamo rules.
|
||||
|
||||
// A sequence of vowels:
|
||||
// - Take the longest sequence you can. If there are too many, or you don't
|
||||
// have a starting consonant, introduce a 110B necessary.
|
||||
|
||||
// A sequence of consonants.
|
||||
// - First join the double consonants: G + G -> GG
|
||||
// - In the remaining list,
|
||||
// -- If there is no preceding vowel, take the first consonant, and insert EU
|
||||
// after it. Continue with the rest of the consonants.
|
||||
// -- If there is one consonant, attach to the following vowel
|
||||
// -- If there are two consonants and a following vowel, attach one to the
|
||||
// preceeding vowel, and one to the following vowel.
|
||||
// -- If there are more than two consonants, join the first two together if you
|
||||
// can: L + G => LG
|
||||
// -- If you still end up with more than 2 consonants, insert EU after the
|
||||
// first one, and continue with the rest of the consonants.
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Variables
|
||||
|
||||
// Some latin consonants or consonant pairs only occur as initials, and
|
||||
// some only as finals, but some occur as both. This makes some jamo
|
||||
// consonants ambiguous when transliterated into latin.
|
||||
// Initial only: IEUNG BB DD JJ R
|
||||
// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
|
||||
// Initial and Final: B C D G GG H J K M N P S SS T
|
||||
|
||||
"$Gi = \u1100;"
|
||||
"$GGi = \u1101;"
|
||||
"$Ni = \u1102;"
|
||||
"$Di = \u1103;"
|
||||
"$DD = \u1104;"
|
||||
"$R = \u1105;"
|
||||
"$Mi = \u1106;"
|
||||
"$Bi = \u1107;"
|
||||
"$BB = \u1108;"
|
||||
"$Si = \u1109;"
|
||||
"$SSi = \u110A;"
|
||||
"$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
|
||||
"$Ji = \u110C;"
|
||||
"$JJ = \u110D;"
|
||||
"$Ci = \u110E;"
|
||||
"$Ki = \u110F;"
|
||||
"$Ti = \u1110;"
|
||||
"$Pi = \u1111;"
|
||||
"$Hi = \u1112;"
|
||||
|
||||
"$A = \u1161;"
|
||||
"$AE = \u1162;"
|
||||
"$YA = \u1163;"
|
||||
"$YAE = \u1164;"
|
||||
"$EO = \u1165;"
|
||||
"$E = \u1166;"
|
||||
"$YEO = \u1167;"
|
||||
"$YE = \u1168;"
|
||||
"$O = \u1169;"
|
||||
"$WA = \u116A;"
|
||||
"$WAE = \u116B;"
|
||||
"$OE = \u116C;"
|
||||
"$YO = \u116D;"
|
||||
"$U = \u116E;"
|
||||
"$WEO = \u116F;"
|
||||
"$WE = \u1170;"
|
||||
"$WI = \u1171;"
|
||||
"$YU = \u1172;"
|
||||
"$EU = \u1173;" // null medial, inserted during Latin-Jamo
|
||||
"$YI = \u1174;"
|
||||
"$I = \u1175;"
|
||||
|
||||
"$Gf = \u11A8;"
|
||||
"$GGf = \u11A9;"
|
||||
"$GS = \u11AA;"
|
||||
"$Nf = \u11AB;"
|
||||
"$NJ = \u11AC;"
|
||||
"$NH = \u11AD;"
|
||||
"$Df = \u11AE;"
|
||||
"$L = \u11AF;"
|
||||
"$LG = \u11B0;"
|
||||
"$LM = \u11B1;"
|
||||
"$LB = \u11B2;"
|
||||
"$LS = \u11B3;"
|
||||
"$LT = \u11B4;"
|
||||
"$LP = \u11B5;"
|
||||
"$LH = \u11B6;"
|
||||
"$Mf = \u11B7;"
|
||||
"$Bf = \u11B8;"
|
||||
"$BS = \u11B9;"
|
||||
"$Sf = \u11BA;"
|
||||
"$SSf = \u11BB;"
|
||||
"$NG = \u11BC;"
|
||||
"$Jf = \u11BD;"
|
||||
"$Cf = \u11BE;"
|
||||
"$Kf = \u11BF;"
|
||||
"$Tf = \u11C0;"
|
||||
"$Pf = \u11C1;"
|
||||
"$Hf = \u11C2;"
|
||||
|
||||
"$jamoInitial = [\u1100-\u1112];"
|
||||
|
||||
"$jamoMedial = [\u1161-\u1175];"
|
||||
|
||||
"$latinInitial = [bcdghjkmnprst];"
|
||||
|
||||
// Any character in the latin transliteration of a medial
|
||||
"$latinMedial = [aeiouwy];"
|
||||
|
||||
// The last character of the latin transliteration of a medial
|
||||
"$latinMedialEnd = [aeiou];"
|
||||
|
||||
// Disambiguation separator
|
||||
"$sep = \\\';"
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Jamo-Latin
|
||||
|
||||
// Jamo to latin is relatively simple, since it is the latin that is
|
||||
// ambiguous. Most rules are straightforward, and we encode them below
|
||||
// as simple add-on back rule, e.g.:
|
||||
|
||||
// $jamoMedial {bs} > $BS;
|
||||
|
||||
// becomes
|
||||
|
||||
// $jamoMedial {bs} <> $BS;
|
||||
|
||||
// Furthermore, we don't care about the ordering for Jamo-Latin because
|
||||
// we are going from single characters, so we can very easily piggyback
|
||||
// on the Latin-Jamo.
|
||||
|
||||
// The main issue with Jamo-Latin is when to insert separators.
|
||||
// Separators are inserted to obtain correct round trip behavior. For
|
||||
// example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
|
||||
// would then round trip to Ki A GGi E. To prevent this, we insert a
|
||||
// separator: "kag-ge". IMPORTANT: The need for separators depends
|
||||
// very specifically on the behavior of the Latin-Jamo rules. A change
|
||||
// in the Latin-Jamo behavior can completely change the way the
|
||||
// separator insertion must be done.
|
||||
|
||||
// First try to preserve actual separators in the jamo text by doubling
|
||||
// them. This fixes problems like:
|
||||
// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
|
||||
// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
|
||||
// -- if we don't care about losing separators in the jamo, we can delete
|
||||
// this rule.
|
||||
|
||||
"$sep $sep <> $sep;"
|
||||
|
||||
// Triple consonants. For three consonants "axxx" we insert a
|
||||
// separator between the first and second "x" if XXf, Xf, and Xi all
|
||||
// exist, and we have A Xf XXi. This prevents the reverse
|
||||
// transliteration to A XXf Xi.
|
||||
|
||||
"$sep < $latinMedialEnd g {} $GGi;"
|
||||
"$sep < $latinMedialEnd s {} $SSi;"
|
||||
|
||||
// For vowels the rule is similar. If there is a vowel "ae" such that
|
||||
// "a" by itself and "e" by itself are vowels, then we want to map A E
|
||||
// to "a-e" so as not to round trip to AE. However, in the text Ki EO
|
||||
// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
|
||||
// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
|
||||
// tested. NOTE: These rules used to have a left context of
|
||||
// $latinInitial instead of [^$latinMedial]. The problem with this is
|
||||
// sequences where an initial IEUNG is transliterated away:
|
||||
// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
|
||||
|
||||
"$sep < [^$latinMedial] [y w] e {} [$O $OE];"
|
||||
"$sep < [^$latinMedial] e {} [$O $OE $U];"
|
||||
"$sep < [^$latinMedial] [o a] {} [$E $EO $EU];"
|
||||
"$sep < [^$latinMedial] [w y] a {} [$E $EO $EU];"
|
||||
|
||||
// Similar to the above, but with an intervening $IEUNG.
|
||||
|
||||
"$sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
|
||||
"$sep < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
|
||||
"$sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
|
||||
"$sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"
|
||||
|
||||
// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
|
||||
// where Xi also exists, must be transliterated as "ax-e" to prevent
|
||||
// the round trip conversion to A Xi E.
|
||||
|
||||
"$sep < $latinMedialEnd b {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd c {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd d {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd g {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd h {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd j {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd k {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd m {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd n {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd p {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd s {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd t {} $IEUNG $jamoMedial;"
|
||||
|
||||
// Double finals followed by IEUNG. Similar to the single finals
|
||||
// followed by IEUNG. Any latin consonant pair X Y, between medials,
|
||||
// that we would split by Latin-Jamo, we must handle when it occurs as
|
||||
// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
|
||||
// E.
|
||||
|
||||
"$sep < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
|
||||
"$sep < $latinMedialEnd s s {} $IEUNG $jamoMedial;"
|
||||
|
||||
// Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
|
||||
// we transliterate as "ax-xe" to prevent round trip transliteration as
|
||||
// A XXi E.
|
||||
|
||||
"$sep < $latinMedialEnd b {} $Bi $jamoMedial;"
|
||||
"$sep < $latinMedialEnd d {} $Di $jamoMedial;"
|
||||
"$sep < $latinMedialEnd j {} $Ji $jamoMedial;"
|
||||
"$sep < $latinMedialEnd g {} $Gi $jamoMedial;"
|
||||
"$sep < $latinMedialEnd s {} $Si $jamoMedial;"
|
||||
|
||||
// XYY. This corresponds to the XYY rule in Latin-Jamo. By default
|
||||
// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
|
||||
// "xyy" forms that correspond to XYf Yi must be transliterated as
|
||||
// "xy-y".
|
||||
|
||||
"$sep < $latinMedialEnd b s {} [$Si $SSi];"
|
||||
"$sep < $latinMedialEnd g s {} [$Si $SSi];"
|
||||
"$sep < $latinMedialEnd l b {} [$Bi $BB];"
|
||||
"$sep < $latinMedialEnd l g {} [$Gi $GGi];"
|
||||
"$sep < $latinMedialEnd l s {} [$Si $SSi];"
|
||||
"$sep < $latinMedialEnd n g {} [$Gi $GGi];"
|
||||
"$sep < $latinMedialEnd n j {} [$Ji $JJ];"
|
||||
|
||||
// Deletion of IEUNG is handled below.
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Latin-Jamo
|
||||
|
||||
// [Basic, context-free Jamo-Latin rules are embedded here too. See
|
||||
// above.]
|
||||
|
||||
// Split digraphs: Text of the form 'axye', where 'xy' is a final
|
||||
// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
|
||||
// 'e' are medials, we want to transliterate this as A Xf Yi E rather
|
||||
// than A XYf IEUNG E. We do NOT include text of the form "axxe",
|
||||
// since that is handled differently below. These rules are generated
|
||||
// programmatically from the jamo data.
|
||||
|
||||
"$jamoMedial {b s} $latinMedial > $Bf $Si;"
|
||||
"$jamoMedial {g s} $latinMedial > $Gf $Si;"
|
||||
"$jamoMedial {l b} $latinMedial > $L $Bi;"
|
||||
"$jamoMedial {l g} $latinMedial > $L $Gi;"
|
||||
"$jamoMedial {l h} $latinMedial > $L $Hi;"
|
||||
"$jamoMedial {l m} $latinMedial > $L $Mi;"
|
||||
"$jamoMedial {l p} $latinMedial > $L $Pi;"
|
||||
"$jamoMedial {l s} $latinMedial > $L $Si;"
|
||||
"$jamoMedial {l t} $latinMedial > $L $Ti;"
|
||||
"$jamoMedial {n g} $latinMedial > $Nf $Gi;"
|
||||
"$jamoMedial {n h} $latinMedial > $Nf $Hi;"
|
||||
"$jamoMedial {n j} $latinMedial > $Nf $Ji;"
|
||||
|
||||
// Single consonants are initials: Text of the form 'axe', where 'x'
|
||||
// can be an initial or a final, and 'a' and 'e' are medials, we want
|
||||
// to transliterate as A Xi E rather than A Xf IEUNG E.
|
||||
|
||||
"$jamoMedial {b} $latinMedial > $Bi;"
|
||||
"$jamoMedial {c} $latinMedial > $Ci;"
|
||||
"$jamoMedial {d} $latinMedial > $Di;"
|
||||
"$jamoMedial {g} $latinMedial > $Gi;"
|
||||
"$jamoMedial {h} $latinMedial > $Hi;"
|
||||
"$jamoMedial {j} $latinMedial > $Ji;"
|
||||
"$jamoMedial {k} $latinMedial > $Ki;"
|
||||
"$jamoMedial {m} $latinMedial > $Mi;"
|
||||
"$jamoMedial {n} $latinMedial > $Ni;"
|
||||
"$jamoMedial {p} $latinMedial > $Pi;"
|
||||
"$jamoMedial {s} $latinMedial > $Si;"
|
||||
"$jamoMedial {t} $latinMedial > $Ti;"
|
||||
|
||||
// Doubled initials. The sequence "axxe", where XX exists as an initial
|
||||
// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
|
||||
// to transliterate as A XXi E, rather than split to A Xf Xi E.
|
||||
|
||||
"$jamoMedial {b b} $latinMedial > $BB;"
|
||||
"$jamoMedial {d d} $latinMedial > $DD;"
|
||||
"$jamoMedial {j j} $latinMedial > $JJ;"
|
||||
"$jamoMedial {g g} $latinMedial > $GGi;"
|
||||
"$jamoMedial {s s} $latinMedial > $SSi;"
|
||||
|
||||
// XYY. Because doubled consonants bind more strongly than XY
|
||||
// consonants, we must handle the sequence "axyy" specially. Here XYf
|
||||
// and YYi must exist. In these cases, we map to Xf YYi rather than
|
||||
// XYf.
|
||||
|
||||
"$jamoMedial {b} s s > $Bf;"
|
||||
"$jamoMedial {g} s s > $Gf;"
|
||||
"$jamoMedial {l} b b > $L;"
|
||||
"$jamoMedial {l} g g > $L;"
|
||||
"$jamoMedial {l} s s > $L;"
|
||||
"$jamoMedial {n} g g > $Nf;"
|
||||
"$jamoMedial {n} j j > $Nf;"
|
||||
|
||||
// Finals: Attach consonant with preceding medial to preceding medial.
|
||||
// Do this BEFORE mapping consonants to initials. Longer keys must
|
||||
// precede shorter keys that they start with, e.g., the rule for 'bs'
|
||||
// must precede 'b'.
|
||||
|
||||
// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
|
||||
// block for Jamo-Latin.]
|
||||
|
||||
"$jamoMedial {bs} <> $BS;"
|
||||
"$jamoMedial {b} <> $Bf;"
|
||||
"$jamoMedial {c} <> $Cf;"
|
||||
"$jamoMedial {d} <> $Df;"
|
||||
"$jamoMedial {gg} <> $GGf;"
|
||||
"$jamoMedial {gs} <> $GS;"
|
||||
"$jamoMedial {g} <> $Gf;"
|
||||
"$jamoMedial {h} <> $Hf;"
|
||||
"$jamoMedial {j} <> $Jf;"
|
||||
"$jamoMedial {k} <> $Kf;"
|
||||
"$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;"
|
||||
"$jamoMedial {lh} <> $LH;"
|
||||
"$jamoMedial {lm} <> $LM;"
|
||||
"$jamoMedial {lp} <> $LP;"
|
||||
"$jamoMedial {ls} <> $LS;"
|
||||
"$jamoMedial {lt} <> $LT;"
|
||||
"$jamoMedial {l} <> $L;"
|
||||
"$jamoMedial {m} <> $Mf;"
|
||||
"$jamoMedial {ng} <> $NG;"
|
||||
"$jamoMedial {nh} <> $NH;"
|
||||
"$jamoMedial {nj} <> $NJ;"
|
||||
"$jamoMedial {n} <> $Nf;"
|
||||
"$jamoMedial {p} <> $Pf;"
|
||||
"$jamoMedial {ss} <> $SSf;"
|
||||
"$jamoMedial {s} <> $Sf;"
|
||||
"$jamoMedial {t} <> $Tf;"
|
||||
|
||||
// Initials: Attach single consonant to following medial. Do this
|
||||
// AFTER mapping finals. Longer keys must precede shorter keys that
|
||||
// they start with, e.g., the rule for 'gg' must precede 'g'.
|
||||
|
||||
// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
|
||||
// this block for Jamo-Latin.]
|
||||
|
||||
"{gg} $latinMedial <> $GGi;"
|
||||
"{g} $latinMedial <> $Gi;"
|
||||
"{n} $latinMedial <> $Ni;"
|
||||
"{dd} $latinMedial <> $DD;"
|
||||
"{d} $latinMedial <> $Di;"
|
||||
"{r} $latinMedial <> $R;"
|
||||
"{m} $latinMedial <> $Mi;"
|
||||
"{bb} $latinMedial <> $BB;"
|
||||
"{b} $latinMedial <> $Bi;"
|
||||
"{ss} $latinMedial <> $SSi;"
|
||||
"{s} $latinMedial <> $Si;"
|
||||
"{jj} $latinMedial <> $JJ;"
|
||||
"{j} $latinMedial <> $Ji;"
|
||||
"{c} $latinMedial <> $Ci;"
|
||||
"{k} $latinMedial <> $Ki;"
|
||||
"{t} $latinMedial <> $Ti;"
|
||||
"{p} $latinMedial <> $Pi;"
|
||||
"{h} $latinMedial <> $Hi;"
|
||||
|
||||
// 'r' in final position. Because of the equivalency of the 'l' and
|
||||
// 'r' jamo (the glyphs are the same), we try to provide the same
|
||||
// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
|
||||
// below. If we see an 'r' in an apparent final position, treat it
|
||||
// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
|
||||
// Instead, we want Ki A L Ki A.
|
||||
|
||||
"$jamoMedial {r} $latinInitial > | l;"
|
||||
|
||||
// Initial + Final: If we match the next rule, we have initial then
|
||||
// final consonant with no intervening medial. We insert the null
|
||||
// vowel BEFORE it to create a well-formed syllable. (In the next rule
|
||||
// we insert a null vowel AFTER an anomalous initial.)
|
||||
|
||||
"$jamoInitial {} [bcdghjklmnpst] > $EU;"
|
||||
|
||||
// Initial + X: This block matches an initial consonant not followed by
|
||||
// a medial. We insert the null vowel after it. We handle double
|
||||
// initials explicitly here; for single initial consonants we insert EU
|
||||
// (as Latin) after them and let standard rules do the rest.
|
||||
|
||||
// BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
"gg > $GGi $EU;"
|
||||
"dd > $DD $EU;"
|
||||
"bb > $BB $EU;"
|
||||
"ss > $SSi $EU;"
|
||||
"jj > $JJ $EU;"
|
||||
|
||||
"([bcdghjkmnprst]) > | $1 eu;"
|
||||
|
||||
// X + Final: Finally we have to deal with a consonant that can only be
|
||||
// interpreted as a final (not an initial) and which is preceded
|
||||
// neither by an initial nor a medial. It is the start of the
|
||||
// syllable, but cannot be. Most of these will already be handled by
|
||||
// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
|
||||
// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
|
||||
// For this isolated case, we could add a null initial and medial,
|
||||
// which would give "la" => IEUNG EU L IEUNG A, for example. A more
|
||||
// economical solution is to transliterate isolated "l" (that is,
|
||||
// initial "l") to "r". (Other similar conversions of consonants that
|
||||
// occur neither as initials nor as finals are handled below.)
|
||||
|
||||
"l > | r;"
|
||||
|
||||
// Medials. If a medial is preceded by an initial, then we proceed
|
||||
// normally. As usual, longer keys must precede shorter ones.
|
||||
|
||||
// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
|
||||
// this block for Jamo-Latin.]
|
||||
|
||||
"$jamoInitial {ae} <> $AE;"
|
||||
"$jamoInitial {a} <> $A;"
|
||||
"$jamoInitial {eo} <> $EO;"
|
||||
"$jamoInitial {eu} <> $EU;"
|
||||
"$jamoInitial {e} <> $E;"
|
||||
"$jamoInitial {i} <> $I;"
|
||||
"$jamoInitial {oe} <> $OE;"
|
||||
"$jamoInitial {o} <> $O;"
|
||||
"$jamoInitial {u} <> $U;"
|
||||
"$jamoInitial {wae} <> $WAE;"
|
||||
"$jamoInitial {wa} <> $WA;"
|
||||
"$jamoInitial {weo} <> $WEO;"
|
||||
"$jamoInitial {we} <> $WE;"
|
||||
"$jamoInitial {wi} <> $WI;"
|
||||
"$jamoInitial {yae} <> $YAE;"
|
||||
"$jamoInitial {ya} <> $YA;"
|
||||
"$jamoInitial {yeo} <> $YEO;"
|
||||
"$jamoInitial {ye} <> $YE;"
|
||||
"$jamoInitial {yi} <> $YI;"
|
||||
"$jamoInitial {yo} <> $YO;"
|
||||
"$jamoInitial {yu} <> $YU;"
|
||||
|
||||
// We may see an anomalous isolated 'w' or 'y'. In that case, we
|
||||
// interpret it as 'wi' and 'yu', respectively.
|
||||
|
||||
// BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
"$jamoInitial {w} > | wi;"
|
||||
"$jamoInitial {y} > | yu;"
|
||||
|
||||
// Otherwise, insert a null consonant IEUNG before the medial (which is
|
||||
// still an untransliterated latin vowel).
|
||||
|
||||
"($latinMedial) > $IEUNG | $1;"
|
||||
|
||||
// Convert non-jamo latin consonants to equivalents. These occur as
|
||||
// neither initials nor finals in jamo. 'l' occurs as a final, but not
|
||||
// an initial; it is handled above. The following letters (left hand
|
||||
// side) will never be output by Jamo-Latin.
|
||||
|
||||
"f > | p;"
|
||||
"q > | k;"
|
||||
"v > | b;"
|
||||
"x > | ks;"
|
||||
"z > | s;"
|
||||
|
||||
// Delete separators (Latin-Jamo).
|
||||
|
||||
"$sep > ;"
|
||||
|
||||
// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
|
||||
// since these may also occur in text.
|
||||
|
||||
"< $IEUNG;"
|
||||
|
||||
//- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
|
||||
//- the INDEX file. This transliterator is, by itself, not
|
||||
//- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
|
||||
//- inverses thereof.
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,511 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Latin_Katakana.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_Katakana
|
||||
|
||||
t_Latn_Kana {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// note: a global filter is more efficient, but MUST include all source chars
|
||||
//:: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ;
|
||||
// MINIMAL FILTER GENERATED FOR: Latin-Katakana
|
||||
//## WARNING -- must add width filter, both here and below!!! ###
|
||||
":: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;"
|
||||
|
||||
":: [:Latin:] fullwidth-halfwidth ();"
|
||||
":: NFD (NFC);"
|
||||
":: Lower ();" // whenever transliterating from cased to uncased script, include this
|
||||
// :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
|
||||
|
||||
// Uses modified Hepburn. Small changes to make unambiguous.
|
||||
|
||||
// | Kunrei-shiki: Hepburn/MHepburn
|
||||
// | ------------------------------
|
||||
// | si: shi
|
||||
// | si ~ya: sha
|
||||
// | si ~yu: shu
|
||||
// | si ~yo: sho
|
||||
// | zi: ji
|
||||
// | zi ~ya: ja
|
||||
// | zi ~yu: ju
|
||||
// | zi ~yo: jo
|
||||
// | ti: chi
|
||||
// | ti ~ya: cha
|
||||
// | ti ~yu: chu
|
||||
// | ti ~yu: cho
|
||||
// | tu: tsu
|
||||
// | di: ji/dji
|
||||
// | du: zu/dzu
|
||||
// | hu: fu
|
||||
|
||||
// | For foreign words:
|
||||
// | -----------------
|
||||
// | se ~i si
|
||||
// | si ~e she
|
||||
// |
|
||||
// | ze ~i zi
|
||||
// | zi ~e je
|
||||
// |
|
||||
// | te ~i ti
|
||||
// | ti ~e che
|
||||
// | te ~u tu
|
||||
// |
|
||||
// | de ~i di
|
||||
// | de ~u du
|
||||
// | de ~i di
|
||||
// |
|
||||
// | he ~u: hu
|
||||
// | hu ~a fa
|
||||
// | hu ~i fi
|
||||
// | hu ~e he
|
||||
// | hu ~o ho
|
||||
|
||||
// Most small forms are generated, but if necessary
|
||||
// explicit small forms are given with ~a, ~ya, etc.
|
||||
|
||||
//------------------------------------------------------
|
||||
// Variables
|
||||
|
||||
"$vowel = [aeiou] ;"
|
||||
"$consonant = [bcdfghjklmnpqrstvwxyz] ;"
|
||||
"$macron = \u0304 ;"
|
||||
|
||||
// Variables used for doubled-consonants with tsu
|
||||
|
||||
"$kana = [\u3041-\u3094] ;"
|
||||
|
||||
"$voice = [\u3099\u309B];"
|
||||
"$semivoice = [\u309A\u309C];"
|
||||
|
||||
"$k_start = [カキクケコかきくけこ] ;"
|
||||
|
||||
"$s_start = [サシスセソさしすせそ] ;"
|
||||
|
||||
"$j_start = [シし] $voice ;"
|
||||
|
||||
"$t_start = [タチツテトたちつてと] ;"
|
||||
|
||||
"$n_start = [ナニヌネノンなにぬねの] ;"
|
||||
|
||||
"$h_start = [ハヒヘホはひへほ] ;"
|
||||
"$f_start = [フふ] ;"
|
||||
|
||||
"$m_start = [マミムメモまみむめも] ;"
|
||||
|
||||
"$y_start = [ヤユヨやゆよ] ;"
|
||||
|
||||
"$r_start = [ラリルレロらりるれろ] ;"
|
||||
|
||||
"$w_start = [ワヰヱヲわゐゑを] ;"
|
||||
|
||||
"$v_start = [ワヰヱヲ]゙ ;"
|
||||
|
||||
// if ン is followed by $n_quoter, then it needs an
|
||||
// apostrophe after its romaji form to disambiguate it.
|
||||
// e.g., ン ア ! = ナ, so represent as "n'a", not "na".
|
||||
|
||||
"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;"
|
||||
|
||||
"$small_y = [ャィュェョ] ;"
|
||||
|
||||
"$iteration = \u309D ;"
|
||||
|
||||
//------------------------------------------------------
|
||||
// katakana rules
|
||||
|
||||
// Punctuation
|
||||
|
||||
"'.' <> 。;"
|
||||
"',' <> 、;"
|
||||
// ' ' } [a-z] > ; # delete spaces before latin
|
||||
// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
|
||||
|
||||
// Iteration Mark
|
||||
// Copy previous letter & marks
|
||||
|
||||
// TODO
|
||||
// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
|
||||
|
||||
// Specials for katakana -- not shared with hiragana
|
||||
|
||||
"va <> ヷ ;"
|
||||
"vi <> ヸ ;"
|
||||
"ve <> ヹ ;"
|
||||
"vo <> ヺ ;"
|
||||
"'~ka' <> ヵ ;"
|
||||
"'~ke' <> ヶ ;"
|
||||
|
||||
// ~~~ begin shared rules ~~~
|
||||
|
||||
//special
|
||||
|
||||
"ya < '~'ャ;"
|
||||
"yi < '~'ィ ;"
|
||||
"yu < '~'ュ;"
|
||||
"ye < '~'ェ;"
|
||||
"yo < '~'ョ;"
|
||||
|
||||
//normal
|
||||
|
||||
"a <> ア ;"
|
||||
|
||||
"b | '~' < ヒ ゙} $small_y ;"
|
||||
"by } $vowel > ビ | '~y' ;"
|
||||
|
||||
"ba <> バ ;"
|
||||
"bi <> ビ ;"
|
||||
"bu <> ブ ;"
|
||||
"be <> ベ ;"
|
||||
"bo <> ボ ;"
|
||||
|
||||
"c } i > | s ;"
|
||||
"c } e > | s ;"
|
||||
|
||||
"da <> ダ ;"
|
||||
"di <> ディ ;"
|
||||
"du <> デゥ ;"
|
||||
"de <> デ ;"
|
||||
"do <> ド ;"
|
||||
"dzu <> ヅ ;"
|
||||
"dja < ヂャ ;"
|
||||
"dji'~i' < ヂィ ;" // liu
|
||||
"dju < ヂュ ;"
|
||||
"dje < ヂェ ;"
|
||||
"djo < ヂョ ;"
|
||||
"dji <> ヂ ;"
|
||||
"dj } $vowel > ヂ | '~y' ;"
|
||||
|
||||
// TODO: QUESTION: use ĵĴżŻ instead of dj, dz
|
||||
|
||||
"cha < チャ ;"
|
||||
"chi'~i' < チィ ;" // liu
|
||||
"chu < チュ ;"
|
||||
"che < チェ ;"
|
||||
"cho < チョ ;"
|
||||
"chi <> チ ;"
|
||||
"ch } $vowel > チ | '~y' ;"
|
||||
|
||||
"e <> エ ;"
|
||||
|
||||
"g | '~' < ギ} $small_y ;"
|
||||
"gy } $vowel > ギ | '~y' ;"
|
||||
|
||||
"ga <> ガ ;"
|
||||
"gi <> ギ ;"
|
||||
"gu <> グ ;"
|
||||
"ge <> ゲ ;"
|
||||
"go <> ゴ ;"
|
||||
|
||||
"i <> イ ;"
|
||||
|
||||
// j } $vowel > ジ | '~y' ;
|
||||
|
||||
"ja <> ジャ ;"
|
||||
"ji'~i' < ジィ ;" // liu
|
||||
"ju <> ジュ ;"
|
||||
"je <> ジェ ;"
|
||||
"jo <> ジョ ;"
|
||||
"ji <> ジ ;"
|
||||
|
||||
"k | '~' < キ} $small_y ;"
|
||||
"ky } $vowel > キ | '~y' ;"
|
||||
|
||||
"ka <> カ ;"
|
||||
"ki <> キ ;"
|
||||
"ku <> ク ;"
|
||||
"ke <> ケ ;"
|
||||
"ko <> コ ;"
|
||||
|
||||
"m | '~' < ミ} $small_y ;"
|
||||
"my } $vowel > ミ | '~y' ;"
|
||||
|
||||
"ma <> マ ;"
|
||||
"mi <> ミ ;"
|
||||
"mu <> ム ;"
|
||||
"me <> メ ;"
|
||||
"mo <> モ ;"
|
||||
|
||||
"m } [pbfv] > ン ;"
|
||||
|
||||
"n | '~' < ニ } $small_y ;"
|
||||
"ny } $vowel > ニ | '~y' ;"
|
||||
|
||||
"na <> ナ ;"
|
||||
"ni <> ニ ;"
|
||||
"nu <> ヌ ;"
|
||||
"ne <> ネ ;"
|
||||
"no <> ノ ;"
|
||||
|
||||
"o <> オ ;"
|
||||
|
||||
"p | '~' < ピ } $small_y ;"
|
||||
"py } $vowel > ピ | '~y' ;"
|
||||
|
||||
"pa <> パ ;"
|
||||
"pi <> ピ ;"
|
||||
"pu <> プ ;"
|
||||
"pe <> ペ ;"
|
||||
"po <> ポ ;"
|
||||
|
||||
"h | '~' < ヒ } $small_y ;"
|
||||
"hy } $vowel > ヒ | '~y' ;"
|
||||
|
||||
"ha <> ハ ;"
|
||||
"hi <> ヒ ;"
|
||||
"hu <> ヘゥ ;"
|
||||
"he <> ヘ ;"
|
||||
"ho <> ホ ;"
|
||||
|
||||
// f | '~' < フ } $small_y ;
|
||||
// f } $vowel > フ | '~' ;
|
||||
|
||||
"fa <> ファ ;"
|
||||
"fi <> フィ ;"
|
||||
"fe <> フェ ;"
|
||||
"fo <> フォ ;"
|
||||
"fu <> フ ;"
|
||||
|
||||
"r | '~' < リ } $small_y ;"
|
||||
"ry } $vowel > リ | '~y' ;"
|
||||
|
||||
"ra <> ラ ;"
|
||||
"ri <> リ ;"
|
||||
"ru <> ル ;"
|
||||
"re <> レ ;"
|
||||
"ro <> ロ ;"
|
||||
|
||||
"za <> ザ ;"
|
||||
"zi <> ゼィ ;"
|
||||
"zu <> ズ ;"
|
||||
"ze <> ゼ ;"
|
||||
"zo <> ゾ ;"
|
||||
|
||||
"sa <> サ ;"
|
||||
"si <> セィ ;"
|
||||
"su <> ス ;"
|
||||
"se <> セ ;"
|
||||
"so <> ソ ;"
|
||||
|
||||
"sha < シャ ;"
|
||||
"shi'~i' < シィ ;" // liu
|
||||
"shu < シュ ;"
|
||||
"she < シェ ;"
|
||||
"sho < ショ ;"
|
||||
"shi <> シ ;"
|
||||
"sh } $vowel > シ | '~y' ;"
|
||||
|
||||
"ta <> タ ;"
|
||||
"ti <> ティ ;"
|
||||
"tu <> テゥ ;"
|
||||
"te <> テ ;"
|
||||
"to <> ト ;"
|
||||
|
||||
"tsu <> ツ ;"
|
||||
|
||||
// v } $vowel > ヴ | '~' ;
|
||||
|
||||
//'v~a' < ヴァ ; # liu
|
||||
//'v~i' < ヴィ ; # liu
|
||||
//'v~e' < ヴェ ; # liu
|
||||
//'v~o' < ヴォ ; # liu
|
||||
"vu <> ヴ ;"
|
||||
|
||||
"u <> ウ ;"
|
||||
|
||||
// w } $vowel > ウ | '~' ;
|
||||
|
||||
"wa <> ワ ;"
|
||||
"wi <> ヰ ;"
|
||||
"wu > ウ ;"
|
||||
"we <> ヱ ;"
|
||||
"wo <> ヲ ;"
|
||||
|
||||
"ya <> ヤ ;"
|
||||
"yi > イ ;"
|
||||
"yu <> ユ ;"
|
||||
"ye > エ ;"
|
||||
"yo <> ヨ ;"
|
||||
|
||||
// double consonants
|
||||
|
||||
//specials
|
||||
"s } sh > ッ ;"
|
||||
"t } ch > ッ ;"
|
||||
|
||||
//voiced
|
||||
|
||||
"j } j <> ッ } $j_start ;"
|
||||
"b } b <> ッ } [$h_start$f_start] $voice;"
|
||||
"d } d <> ッ } $t_start $voice;"
|
||||
"g } g <> ッ } $k_start $voice;"
|
||||
"p } p <> ッ } [$h_start$f_start] $semivoice;"
|
||||
// v } v <> ッ } [ワヰウヱヲう] $voice ;
|
||||
"z } z <> ッ } $s_start $voice;"
|
||||
"v } v <> ッ } $v_start;"
|
||||
|
||||
// normal
|
||||
|
||||
"k } k <> ッ } $k_start ;"
|
||||
"m } m <> ッ } $m_start ;"
|
||||
"n } n <> ッ } $n_start ;"
|
||||
"h } h <> ッ } $h_start ;"
|
||||
"f } f <> ッ } $f_start ;"
|
||||
"r } r <> ッ } $r_start ;"
|
||||
"t } t <> ッ } $t_start ;"
|
||||
"s } s <> ッ } $s_start ;"
|
||||
|
||||
"w } w <> ッ } $w_start;"
|
||||
"y } y <> ッ } $y_start;"
|
||||
|
||||
// completeness
|
||||
"x } x > ッ ;"
|
||||
"c } k > ッ ;"
|
||||
"c } c > ッ ;"
|
||||
"c } q > ッ ;"
|
||||
"l } l > ッ ;"
|
||||
"q } q > ッ ;"
|
||||
// y } y > ッ ;
|
||||
// w } w > ッ ;
|
||||
|
||||
// prolonged vowel mark. this indicates a doubling of
|
||||
// the preceding vowel sound
|
||||
|
||||
//a < a { ー ; # liu
|
||||
//e < e { ー ; # liu
|
||||
//i < i { ー ; # liu
|
||||
//o < o { ー ; # liu
|
||||
//u < u { ー ; # liu
|
||||
|
||||
"$macron <> ー ;"
|
||||
|
||||
// small forms
|
||||
|
||||
"'~a' <> ァ ;"
|
||||
"'~i' <> ィ ;"
|
||||
"'~u' <> ゥ ;"
|
||||
"'~e' <> ェ ;"
|
||||
"'~o' <> ォ ;"
|
||||
"'~tsu' <> ッ ;"
|
||||
"'~wa' <> ヮ ;"
|
||||
"'~ya' <> ャ ;"
|
||||
"'~yi' > ィ ;"
|
||||
"'~yu' <> ュ ;"
|
||||
"'~ye' > ェ ;"
|
||||
"'~yo' <> ョ ;"
|
||||
|
||||
// iteration marks
|
||||
// TODO: make more accurate
|
||||
|
||||
"j $1 < sh (y* $vowel) {ヽ$voice ;"
|
||||
"dj $1 < ch (y* $vowel) {ヽ$voice ;"
|
||||
"dz $1 < ts (y* $vowel) {ヽ$voice ;"
|
||||
|
||||
"g $1 < k (y* $vowel) {ヽ$voice ;"
|
||||
"z $1 < s (y* $vowel) {ヽ$voice ;"
|
||||
"d $1 < t (y* $vowel) {ヽ$voice ;"
|
||||
"h $1 < b (y* $vowel) {ヽ$voice ;"
|
||||
"v $1 < w (y* $vowel) {ヽ$voice ;"
|
||||
|
||||
"sh $1 < sh (y* $vowel) {ヽ$voice ;"
|
||||
"j $1 < j (y* $vowel) {ヽ$voice ;"
|
||||
"ch $1 < ch (y* $vowel) {ヽ$voice ;"
|
||||
"dj $1 < dj(y* $vowel) {ヽ$voice ;"
|
||||
"ts $1 < ts (y* $vowel) {ヽ$voice ;"
|
||||
"dz $1 < dz (y* $vowel) {ヽ$voice ;"
|
||||
|
||||
"$1 < ($consonant y* $vowel) {ヽ$voice? ;"
|
||||
"$1 < (.) {ヽ $voice? ;" // otherwise repeat last character
|
||||
"< ヽ $voice? ;" // delete if no characters found
|
||||
|
||||
// h- rule: lengthens vowel if not followed by a vowel
|
||||
|
||||
"[aeiou] } h > ー ;"
|
||||
|
||||
// one-way latin- > kana rules. these do not occur in
|
||||
// well-formed romaji representing actual japanese text.
|
||||
// their purpose is to make all romaji map to kana of
|
||||
// some sort.
|
||||
|
||||
// the following are not really necessary, but produce
|
||||
// slightly more natural results.
|
||||
|
||||
"cy > セィ ;"
|
||||
"dy > ディ ;"
|
||||
"hy > ヒ ;"
|
||||
"sy > セィ ;"
|
||||
"ty > ティ ;"
|
||||
"zy > ゼィ ;"
|
||||
|
||||
"h > ヘ ;"
|
||||
|
||||
// isolated consonants listed here so as not to mask
|
||||
// longer rules above.
|
||||
|
||||
"ch > チ;"
|
||||
"sh > シ ;"
|
||||
"dz > ヅ ;"
|
||||
"dj > ヂ;"
|
||||
|
||||
"b > ブ ;"
|
||||
"d > デ ;"
|
||||
"g > グ ;"
|
||||
"k > ク ;"
|
||||
"m > ム ;"
|
||||
"n'' < ン } $n_quoter ;"
|
||||
"n <> ン ;"
|
||||
"p > プ ;"
|
||||
"r > ル ;"
|
||||
"s > ス ;"
|
||||
"t > テ ;"
|
||||
"y > イ ;"
|
||||
"z > ズ ;"
|
||||
"v > ヴ ;"
|
||||
|
||||
"f > フ;"
|
||||
"j > ジ;"
|
||||
"w > ウ;"
|
||||
|
||||
"ß > | ss ;"
|
||||
"æ > | e ;"
|
||||
"ð > | d ;"
|
||||
"ø > | u ;"
|
||||
"þ > | th ;"
|
||||
|
||||
// simple substitutions using backup
|
||||
|
||||
"c > | k ;"
|
||||
"l > | r ;"
|
||||
"q > | k ;"
|
||||
"x > | ks ;"
|
||||
|
||||
// ~~~ END shared rules ~~~
|
||||
|
||||
//------------------------------------------------------
|
||||
// Final cleanup
|
||||
|
||||
"'~' > ;" // delete stray tildes between letters
|
||||
"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters
|
||||
// [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
|
||||
|
||||
":: NFC (NFD) ;"
|
||||
":: ([:Katakana:] halfwidth-fullwidth);"
|
||||
|
||||
// note: a global filter is more efficient, but MUST include all source chars!!
|
||||
//:: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);
|
||||
// MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
|
||||
":: ( [[\\\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;"
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
|
||||
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Latin_NumericPinyin.txt
|
||||
// Date: Fri May 28 17:07:31 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin_NumericPinyin
|
||||
|
||||
t_Latn_NPinyn {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// According to the pinyin definitions I've been able to find:
|
||||
// 'a', 'e' are the preferred bases
|
||||
// otherwise 'o'
|
||||
// otherwise last vowel
|
||||
|
||||
// The trailing form of syllables are the following:
|
||||
// "a", "ai", "ao", "an", "ang",
|
||||
// "o", "ou", "ong",
|
||||
// "e", "ei", "er", "en", "eng",
|
||||
// "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
|
||||
// "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
|
||||
// "ü", "üe", "üan", "ün"
|
||||
// so the letters the tone will 'hop' are:
|
||||
|
||||
"::NFD (NFC);"
|
||||
"$tone = [\u0304\u0301\u030C\u0300\u0306] ;"
|
||||
|
||||
// Move the tone to the end of a syllable, and convert to number
|
||||
"e {($tone) r} > r &tone-digit($1);"
|
||||
"($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1);"
|
||||
"($tone) > &tone-digit($1);"
|
||||
|
||||
// The following backs up until it finds the right vowel, then deposits the tone
|
||||
|
||||
"$vowel = [aAeEiIoOuUüÜ];"
|
||||
"$consonant = [[a-z A-Z] - [$vowel]];"
|
||||
"$digit = [1-5];"
|
||||
"$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);"
|
||||
"$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);"
|
||||
"$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit);"
|
||||
"&digit-tone($1) < [:letter:] {($digit)};"
|
||||
|
||||
"::NFC (NFD);"
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -1,101 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Malayalam_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Malayalam_InterIndic
|
||||
|
||||
t_Mlym_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Malayalam-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
|
||||
"\u0D02>\uE002;" // SIGN ANUSVARA
|
||||
"\u0D03>\uE003;" // SIGN VISARGA
|
||||
"\u0D05>\uE005;" // LETTER A
|
||||
"\u0D06>\uE006;" // LETTER AA
|
||||
"\u0D07>\uE007;" // LETTER I
|
||||
"\u0D08>\uE008;" // LETTER II
|
||||
"\u0D09>\uE009;" // LETTER U
|
||||
"\u0D0A>\uE00A;" // LETTER UU
|
||||
"\u0D0B>\uE00B;" // LETTER VOCALIC R
|
||||
"\u0D0C>\uE00C;" // LETTER VOCALIC L
|
||||
"\u0D0E>\uE00E;" // LETTER E
|
||||
"\u0D0F>\uE00F;" // LETTER EE
|
||||
"\u0D10>\uE010;" // LETTER AI
|
||||
"\u0D12>\uE012;" // LETTER O
|
||||
"\u0D13>\uE013;" // LETTER OO
|
||||
"\u0D14>\uE014;" // LETTER AU
|
||||
"\u0D15>\uE015;" // LETTER KA
|
||||
"\u0D16>\uE016;" // LETTER KHA
|
||||
"\u0D17>\uE017;" // LETTER GA
|
||||
"\u0D18>\uE018;" // LETTER GHA
|
||||
"\u0D19>\uE019;" // LETTER NGA
|
||||
"\u0D1A>\uE01A;" // LETTER CA
|
||||
"\u0D1B>\uE01B;" // LETTER CHA
|
||||
"\u0D1C>\uE01C;" // LETTER JA
|
||||
"\u0D1D>\uE01D;" // LETTER JHA
|
||||
"\u0D1E>\uE01E;" // LETTER NYA
|
||||
"\u0D1F>\uE01F;" // LETTER TTA
|
||||
"\u0D20>\uE020;" // LETTER TTHA
|
||||
"\u0D21>\uE021;" // LETTER DDA
|
||||
"\u0D22>\uE022;" // LETTER DDHA
|
||||
"\u0D23>\uE023;" // LETTER NNA
|
||||
"\u0D24>\uE024;" // LETTER TA
|
||||
"\u0D25>\uE025;" // LETTER THA
|
||||
"\u0D26>\uE026;" // LETTER DA
|
||||
"\u0D27>\uE027;" // LETTER DHA
|
||||
"\u0D28>\uE028;" // LETTER NA
|
||||
"\u0D2A>\uE02A;" // LETTER PA
|
||||
"\u0D2B>\uE02B;" // LETTER PHA
|
||||
"\u0D2C>\uE02C;" // LETTER BA
|
||||
"\u0D2D>\uE02D;" // LETTER BHA
|
||||
"\u0D2E>\uE02E;" // LETTER MA
|
||||
"\u0D2F>\uE02F;" // LETTER YA
|
||||
"\u0D30>\uE030;" // LETTER RA
|
||||
"\u0D31>\uE031;" // LETTER RRA
|
||||
"\u0D32>\uE032;" // LETTER LA
|
||||
"\u0D33>\uE033;" // LETTER LLA
|
||||
"\u0D34>\uE034;" // LETTER LLLA
|
||||
"\u0D35>\uE035;" // LETTER VA
|
||||
"\u0D36>\uE036;" // LETTER SHA
|
||||
"\u0D37>\uE037;" // LETTER SSA
|
||||
"\u0D38>\uE038;" // LETTER SA
|
||||
"\u0D39>\uE039;" // LETTER HA
|
||||
"\u0D3E>\uE03E;" // VOWEL SIGN AA
|
||||
"\u0D3F>\uE03F;" // VOWEL SIGN I
|
||||
"\u0D40>\uE040;" // VOWEL SIGN II
|
||||
"\u0D41>\uE041;" // VOWEL SIGN U
|
||||
"\u0D42>\uE042;" // VOWEL SIGN UU
|
||||
"\u0D43>\uE043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0D46>\uE046;" // VOWEL SIGN E
|
||||
"\u0D47>\uE047;" // VOWEL SIGN EE
|
||||
"\u0D48>\uE048;" // VOWEL SIGN AI
|
||||
"\u0D4D>\uE04D;" // SIGN VIRAMA
|
||||
"\u0D57>\uE057;" // AU LENGTH MARK
|
||||
"\u0D60>\uE060;" // LETTER VOCALIC RR
|
||||
"\u0D61>\uE061;" // LETTER VOCALIC LL
|
||||
"\u0D66>\uE066;" // DIGIT ZERO
|
||||
"\u0D67>\uE067;" // DIGIT ONE
|
||||
"\u0D68>\uE068;" // DIGIT TWO
|
||||
"\u0D69>\uE069;" // DIGIT THREE
|
||||
"\u0D6A>\uE06A;" // DIGIT FOUR
|
||||
"\u0D6B>\uE06B;" // DIGIT FIVE
|
||||
"\u0D6C>\uE06C;" // DIGIT SIX
|
||||
"\u0D6D>\uE06D;" // DIGIT SEVEN
|
||||
"\u0D6E>\uE06E;" // DIGIT EIGHT
|
||||
"\u0D6F>\uE06F;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Oriya_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Oriya_InterIndic
|
||||
|
||||
t_Orya_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Oriya-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
//\u0B21\u0B3C>\uE05C;# LETTER RRA
|
||||
//\u0B22\u0B3C>\uE05D;# LETTER RHA
|
||||
"\u0B47\u0B56>\uE048;"// VOWEL SIGN AI
|
||||
"\u0B47\u0B3E>\uE04B;"// VOWEL SIGN O
|
||||
"\u0B47\u0B57>\uE04C;"// VOWEL SIGN AU
|
||||
|
||||
"\u0B01>\uE001;" // SIGN CANDRABINDU
|
||||
"\u0B02>\uE002;" // SIGN ANUSVARA
|
||||
"\u0B03>\uE003;" // SIGN VISARGA
|
||||
"\u0B05>\uE005;" // LETTER A
|
||||
"\u0B06>\uE006;" // LETTER AA
|
||||
"\u0B07>\uE007;" // LETTER I
|
||||
"\u0B08>\uE008;" // LETTER II
|
||||
"\u0B09>\uE009;" // LETTER U
|
||||
"\u0B0A>\uE00A;" // LETTER UU
|
||||
"\u0B0B>\uE00B;" // LETTER VOCALIC R
|
||||
"\u0B0C>\uE00C;" // LETTER VOCALIC L
|
||||
"\u0B0F>\uE00F;" // LETTER E
|
||||
"\u0B10>\uE010;" // LETTER AI
|
||||
"\u0B13>\uE013;" // LETTER O
|
||||
"\u0B14>\uE014;" // LETTER AU
|
||||
"\u0B15>\uE015;" // LETTER KA
|
||||
"\u0B16>\uE016;" // LETTER KHA
|
||||
"\u0B17>\uE017;" // LETTER GA
|
||||
"\u0B18>\uE018;" // LETTER GHA
|
||||
"\u0B19>\uE019;" // LETTER NGA
|
||||
"\u0B1A>\uE01A;" // LETTER CA
|
||||
"\u0B1B>\uE01B;" // LETTER CHA
|
||||
"\u0B1C>\uE01C;" // LETTER JA
|
||||
"\u0B1D>\uE01D;" // LETTER JHA
|
||||
"\u0B1E>\uE01E;" // LETTER NYA
|
||||
"\u0B1F>\uE01F;" // LETTER TTA
|
||||
"\u0B20>\uE020;" // LETTER TTHA
|
||||
"\u0B21>\uE021;" // LETTER DDA
|
||||
"\u0B22>\uE022;" // LETTER DDHA
|
||||
"\u0B23>\uE023;" // LETTER NNA
|
||||
"\u0B24>\uE024;" // LETTER TA
|
||||
"\u0B25>\uE025;" // LETTER THA
|
||||
"\u0B26>\uE026;" // LETTER DA
|
||||
"\u0B27>\uE027;" // LETTER DHA
|
||||
"\u0B28>\uE028;" // LETTER NA
|
||||
"\u0B2A>\uE02A;" // LETTER PA
|
||||
"\u0B2B>\uE02B;" // LETTER PHA
|
||||
"\u0B2C>\uE02C;" // LETTER BA
|
||||
"\u0B2D>\uE02D;" // LETTER BHA
|
||||
"\u0B2E>\uE02E;" // LETTER MA
|
||||
"\u0B2F>\uE02F;" // LETTER YA
|
||||
"\u0B30>\uE030;" // LETTER RA
|
||||
"\u0B32>\uE032;" // LETTER LA
|
||||
"\u0B33>\uE033;" // LETTER LLA
|
||||
"\u0B35>\uE035;" // LETTER VA
|
||||
"\u0B36>\uE036;" // LETTER SHA
|
||||
"\u0B37>\uE037;" // LETTER SSA
|
||||
"\u0B38>\uE038;" // LETTER SA
|
||||
"\u0B39>\uE039;" // LETTER HA
|
||||
"\u0B3C>\uE03C;" // SIGN NUKTA
|
||||
"\u0B3D>\uE03D;" // SIGN AVAGRAHA
|
||||
"\u0B3E>\uE03E;" // VOWEL SIGN AA
|
||||
"\u0B3F>\uE03F;" // VOWEL SIGN I
|
||||
"\u0B40>\uE040;" // VOWEL SIGN II
|
||||
"\u0B41>\uE041;" // VOWEL SIGN U
|
||||
"\u0B42>\uE042;" // VOWEL SIGN UU
|
||||
"\u0B43>\uE043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0B47>\uE047;" // VOWEL SIGN E
|
||||
//
|
||||
"\u0B4D>\uE04D;" // SIGN VIRAMA
|
||||
"\u0B56>\uE056;" // AI LENGTH MARK
|
||||
"\u0B57>\uE057;" // AU LENGTH MARK
|
||||
"\u0964>\ue064;" // DANDA
|
||||
"\u0965>\ue065;" // DOUBLE DANDA
|
||||
//
|
||||
"\u0B5F>\uE05F;" // LETTER YYA
|
||||
"\u0B60>\uE060;" // LETTER VOCALIC RR
|
||||
"\u0B61>\uE061;" // LETTER VOCALIC LL
|
||||
"\u0B66>\uE066;" // DIGIT ZERO
|
||||
"\u0B67>\uE067;" // DIGIT ONE
|
||||
"\u0B68>\uE068;" // DIGIT TWO
|
||||
"\u0B69>\uE069;" // DIGIT THREE
|
||||
"\u0B6A>\uE06A;" // DIGIT FOUR
|
||||
"\u0B6B>\uE06B;" // DIGIT FIVE
|
||||
"\u0B6C>\uE06C;" // DIGIT SIX
|
||||
"\u0B6D>\uE06D;" // DIGIT SEVEN
|
||||
"\u0B6E>\uE06E;" // DIGIT EIGHT
|
||||
"\u0B6F>\uE06F;" // DIGIT NINE
|
||||
"\u0B70>\ue07B;" // ISSHAR
|
||||
"\u0B71>\ue081;" // LETTER WA
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,92 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Tamil_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Tamil_InterIndic
|
||||
|
||||
t_Taml_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Tamil-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
|
||||
"\u0BC6\u0BBE>\uE04A;"// VOWEL SIGN O
|
||||
"\u0BC7\u0BBE>\uE04B;"// VOWEL SIGN OO
|
||||
"\u0BC6\u0BD7>\uE04C;"// VOWEL SIGN AU
|
||||
"\u0B92\u0BD7>\uE014;"// LETTER AU
|
||||
|
||||
"\u0B82>\uE002;" // SIGN ANUSVARA
|
||||
"\u0B83>\uE003;" // SIGN VISARGA
|
||||
"\u0B85>\uE005;" // LETTER A
|
||||
"\u0B86>\uE006;" // LETTER AA
|
||||
"\u0B87>\uE007;" // LETTER I
|
||||
"\u0B88>\uE008;" // LETTER II
|
||||
"\u0B89>\uE009;" // LETTER U
|
||||
"\u0B8A>\uE00A;" // LETTER UU
|
||||
"\u0B8E>\uE00E;" // LETTER E
|
||||
"\u0B8F>\uE00F;" // LETTER EE
|
||||
"\u0B90>\uE010;" // LETTER AI
|
||||
"\u0B92>\uE012;" // LETTER O
|
||||
"\u0B93>\uE013;" // LETTER OO
|
||||
"\u0B94>\uE014;" // LETTER AU
|
||||
"\u0B95>\uE015;" // LETTER KA
|
||||
"\u0B99>\uE019;" // LETTER NGA
|
||||
"\u0B9A>\uE01A;" // LETTER CA
|
||||
"\u0B9C>\uE01C;" // LETTER JA
|
||||
"\u0B9E>\uE01E;" // LETTER NYA
|
||||
"\u0B9F>\uE01F;" // LETTER TTA
|
||||
"\u0BA3>\uE023;" // LETTER NNA
|
||||
"\u0BA4>\uE024;" // LETTER TA
|
||||
"\u0BA8>\uE028;" // LETTER NA
|
||||
"\u0BA9>\uE029;" // LETTER NNNA
|
||||
"\u0BAA>\uE02A;" // LETTER PA
|
||||
"\u0BAE>\uE02E;" // LETTER MA
|
||||
"\u0BAF>\uE02F;" // LETTER YA
|
||||
"\u0BB0>\uE030;" // LETTER RA
|
||||
"\u0BB1>\uE031;" // LETTER RRA
|
||||
"\u0BB2>\uE032;" // LETTER LA
|
||||
"\u0BB3>\uE033;" // LETTER LLA
|
||||
"\u0BB4>\uE034;" // LETTER LLLA
|
||||
"\u0BB5>\uE035;" // LETTER VA
|
||||
"\u0BB7>\uE037;" // LETTER SSA
|
||||
"\u0BB8>\uE038;" // LETTER SA
|
||||
"\u0BB9>\uE039;" // LETTER HA
|
||||
"\u0BBE>\uE03E;" // VOWEL SIGN AA
|
||||
"\u0BBF>\uE03F;" // VOWEL SIGN I
|
||||
"\u0BC0>\uE040;" // VOWEL SIGN II
|
||||
"\u0BC1>\uE041;" // VOWEL SIGN U
|
||||
"\u0BC2>\uE042;" // VOWEL SIGN UU
|
||||
"\u0BC6>\uE046;" // VOWEL SIGN E
|
||||
"\u0BC7>\uE047;" // VOWEL SIGN EE
|
||||
"\u0BC8>\uE048;" // VOWEL SIGN AI
|
||||
|
||||
"\u0BCD>\uE04D;" // SIGN VIRAMA
|
||||
"\u0BD7>\uE057;" // AU LENGTH MARK
|
||||
"\u0BE7>\uE067;" // DIGIT ONE
|
||||
"\u0BE8>\uE068;" // DIGIT TWO
|
||||
"\u0BE9>\uE069;" // DIGIT THREE
|
||||
"\u0BEA>\uE06A;" // DIGIT FOUR
|
||||
"\u0BEB>\uE06B;" // DIGIT FIVE
|
||||
"\u0BEC>\uE06C;" // DIGIT SIX
|
||||
"\u0BED>\uE06D;" // DIGIT SEVEN
|
||||
"\u0BEE>\uE06E;" // DIGIT EIGHT
|
||||
"\u0BEF>\uE06F;" // DIGIT NINE
|
||||
"\u0BF0>\uE067\uE066;" // UNMAPPED Tamil-InterIndic: NUMBER TEN
|
||||
"\u0BF1>\uE067\uE066\uE066;" // UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
|
||||
"\u0BF2>\uE067\uE066\uE066\uE066;"// UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
|
||||
"0>\ue066;"
|
||||
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../../impl/data/Transliterator_Telugu_InterIndic.txt
|
||||
// Date: Tue May 18 17:24:49 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Telugu_InterIndic
|
||||
|
||||
t_Telu_InterIndic {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Telugu-InterIndic
|
||||
//:: NFD (NFC) ;
|
||||
"\u0c46\u0c4d\u0c56>\ue048\ue04d;"
|
||||
"\u0C46\u0C56>\uE048;"// VOWEL SIGN AI
|
||||
"\u0C01>\uE001;" // SIGN CANDRABINDU
|
||||
"\u0C02>\uE002;" // SIGN ANUSVARA
|
||||
"\u0C03>\uE003;" // SIGN VISARGA
|
||||
"\u0C05>\uE005;" // LETTER A
|
||||
"\u0C06>\uE006;" // LETTER AA
|
||||
"\u0C07>\uE007;" // LETTER I
|
||||
"\u0C08>\uE008;" // LETTER II
|
||||
"\u0C09>\uE009;" // LETTER U
|
||||
"\u0C0A>\uE00A;" // LETTER UU
|
||||
"\u0C0B>\uE00B;" // LETTER VOCALIC R
|
||||
"\u0C0C>\uE00C;" // LETTER VOCALIC L
|
||||
"\u0C0E>\uE00E;" // LETTER E
|
||||
"\u0C0F>\uE00F;" // LETTER EE
|
||||
"\u0C10>\uE010;" // LETTER AI
|
||||
"\u0C12>\uE012;" // LETTER O
|
||||
"\u0C13>\uE013;" // LETTER OO
|
||||
"\u0C14>\uE014;" // LETTER AU
|
||||
"\u0C15>\uE015;" // LETTER KA
|
||||
"\u0C16>\uE016;" // LETTER KHA
|
||||
"\u0C17>\uE017;" // LETTER GA
|
||||
"\u0C18>\uE018;" // LETTER GHA
|
||||
"\u0C19>\uE019;" // LETTER NGA
|
||||
"\u0C1A>\uE01A;" // LETTER CA
|
||||
"\u0C1B>\uE01B;" // LETTER CHA
|
||||
"\u0C1C>\uE01C;" // LETTER JA
|
||||
"\u0C1D>\uE01D;" // LETTER JHA
|
||||
"\u0C1E>\uE01E;" // LETTER NYA
|
||||
"\u0C1F>\uE01F;" // LETTER TTA
|
||||
"\u0C20>\uE020;" // LETTER TTHA
|
||||
"\u0C21>\uE021;" // LETTER DDA
|
||||
"\u0C22>\uE022;" // LETTER DDHA
|
||||
"\u0C23>\uE023;" // LETTER NNA
|
||||
"\u0C24>\uE024;" // LETTER TA
|
||||
"\u0C25>\uE025;" // LETTER THA
|
||||
"\u0C26>\uE026;" // LETTER DA
|
||||
"\u0C27>\uE027;" // LETTER DHA
|
||||
"\u0C28>\uE028;" // LETTER NA
|
||||
"\u0C2A>\uE02A;" // LETTER PA
|
||||
"\u0C2B>\uE02B;" // LETTER PHA
|
||||
"\u0C2C>\uE02C;" // LETTER BA
|
||||
"\u0C2D>\uE02D;" // LETTER BHA
|
||||
"\u0C2E>\uE02E;" // LETTER MA
|
||||
"\u0C2F>\uE02F;" // LETTER YA
|
||||
"\u0C30>\uE030;" // LETTER RA
|
||||
"\u0C31>\uE031;" // LETTER RRA
|
||||
"\u0C32>\uE032;" // LETTER LA
|
||||
"\u0C33>\uE033;" // LETTER LLA
|
||||
"\u0C35>\uE035;" // LETTER VA
|
||||
"\u0C36>\uE036;" // LETTER SHA
|
||||
"\u0C37>\uE037;" // LETTER SSA
|
||||
"\u0C38>\uE038;" // LETTER SA
|
||||
"\u0C39>\uE039;" // LETTER HA
|
||||
"\u0C3E>\uE03E;" // VOWEL SIGN AA
|
||||
"\u0C3F>\uE03F;" // VOWEL SIGN I
|
||||
"\u0C40>\uE040;" // VOWEL SIGN II
|
||||
"\u0C41>\uE041;" // VOWEL SIGN U
|
||||
"\u0C42>\uE042;" // VOWEL SIGN UU
|
||||
"\u0C43>\uE043;" // VOWEL SIGN VOCALIC R
|
||||
"\u0C44>\uE044;" // VOWEL SIGN VOCALIC RR
|
||||
"\u0C46>\uE046;" // VOWEL SIGN E
|
||||
"\u0C47>\uE047;" // VOWEL SIGN EE
|
||||
"\u0C4A>\uE04A;" // VOWEL SIGN O
|
||||
"\u0C4B>\uE04B;" // VOWEL SIGN OO
|
||||
"\u0C4C>\uE04C;" // VOWEL SIGN AU
|
||||
"\u0C4D>\uE04D;" // SIGN VIRAMA
|
||||
"\u0C55>\uE055;" // LENGTH MARK
|
||||
"\u0C56>\uE056;" // AI LENGTH MARK
|
||||
"\u0C60>\uE060;" // LETTER VOCALIC RR
|
||||
"\u0C61>\uE061;" // LETTER VOCALIC LL
|
||||
"\u0C66>\uE066;" // DIGIT ZERO
|
||||
"\u0C67>\uE067;" // DIGIT ONE
|
||||
"\u0C68>\uE068;" // DIGIT TWO
|
||||
"\u0C69>\uE069;" // DIGIT THREE
|
||||
"\u0C6A>\uE06A;" // DIGIT FOUR
|
||||
"\u0C6B>\uE06B;" // DIGIT FIVE
|
||||
"\u0C6C>\uE06C;" // DIGIT SIX
|
||||
"\u0C6D>\uE06D;" // DIGIT SEVEN
|
||||
"\u0C6E>\uE06E;" // DIGIT EIGHT
|
||||
"\u0C6F>\uE06F;" // DIGIT NINE
|
||||
// :: NFC (NFD) ;
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
// -*- Coding: utf-8; -*-
|
||||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
|
||||
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Tone_Digit.txt
|
||||
// Date: Fri May 28 17:07:31 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Tone_Digit
|
||||
|
||||
t_Tone_Digit {
|
||||
Rule {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Only intended for internal use
|
||||
"\u0304 <> 1;"
|
||||
"\u0301 <> 2;"
|
||||
"\u030C <> 3;"
|
||||
"\u0300 <> 4;"
|
||||
"< 5;"
|
||||
}
|
||||
}
|
|
@ -1,275 +0,0 @@
|
|||
//--------------------------------------------------------------------
|
||||
// Copyright (c) 1999-2004, International Business Machines
|
||||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
|
||||
// Source: Transliterator_index.txt
|
||||
// Date: Fri May 28 17:07:30 2004
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
// N.B.: This file has been generated mechanically from the
|
||||
// corresponding ICU4J file, which is the master file that receives
|
||||
// primary updates. The colon-delimited fields have been split into
|
||||
// separate strings. For 'file' and 'internal' lines, the encoding
|
||||
// field has been deleted, since the encoding is processed at build
|
||||
// time in ICU4C. Certain large rule sets not intended for general
|
||||
// use have been commented out with the notation "Java only".
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
translit_index {
|
||||
RuleBasedTransliteratorIDs {
|
||||
//--------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------
|
||||
//
|
||||
// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
|
||||
// system transliterators. It allows arbitrary mappings between
|
||||
// transliterator IDs and file names, and also allows the system to
|
||||
// define aliases for transliterators, so that "Latin-Hangul", for
|
||||
// example, can be implemented transparently as the compound
|
||||
// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
|
||||
// are invisible to the user, but can be composed together by the
|
||||
// system to create visible transliterators.
|
||||
//
|
||||
// Blank lines and lines beginning with '#' are ignored.
|
||||
//
|
||||
// Lines in this file have one of the following forms (text not
|
||||
// enclosed by <> is literal):
|
||||
//
|
||||
// <id>:file:<resource>:<encoding>:<direction>
|
||||
// <id>:internal:<resource>:<encoding>:<direction>
|
||||
// <id>:alias:<getInstanceArg>
|
||||
//
|
||||
// <id> is the ID of the system transliterator being defined. These
|
||||
// are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
// unless the second field is "internal".
|
||||
//
|
||||
// <resource> is a ResourceReader resource name. Currently these refer
|
||||
// to file names under com/ibm/text/resources. This string is passed
|
||||
// directly to ResourceReader, together with <encoding>.
|
||||
//
|
||||
// <encoding> is the character encoding to use when reading <resource>;
|
||||
// passed directly to ResourceReader. E.g., "UTF8".
|
||||
//
|
||||
// <direction> is either "FORWARD" or "REVERSE".
|
||||
//
|
||||
// <getInstanceArg> is a string to be passed directly to
|
||||
// Transliterator.getInstance(). The returned Transliterator object
|
||||
// then has its ID changed to <id> and is returned.
|
||||
|
||||
|
||||
// Bidirectional rule files
|
||||
|
||||
{ "Fullwidth-Halfwidth", "file", "t_FWidth_HWidth", "FORWARD" },
|
||||
{ "Halfwidth-Fullwidth", "file", "t_FWidth_HWidth", "REVERSE" },
|
||||
|
||||
{ "Latin-Cyrillic", "file", "t_Cyrl_Latn", "REVERSE" },
|
||||
{ "Cyrillic-Latin", "file", "t_Cyrl_Latn", "FORWARD" },
|
||||
|
||||
{ "Latin-Hebrew", "file", "t_Hebr_Latn", "REVERSE" },
|
||||
{ "Hebrew-Latin", "file", "t_Hebr_Latn", "FORWARD" },
|
||||
|
||||
{ "Latin-Arabic", "file", "t_Arab_Latn", "REVERSE" },
|
||||
{ "Arabic-Latin", "file", "t_Arab_Latn", "FORWARD" },
|
||||
|
||||
{ "Tone-Digit", "internal", "t_Tone_Digit", "FORWARD" },
|
||||
{ "Digit-Tone", "internal", "t_Tone_Digit", "REVERSE" },
|
||||
|
||||
{ "Latin-NumericPinyin", "file", "t_Latn_NPinyn", "FORWARD" },
|
||||
{ "NumericPinyin-Latin", "file", "t_Latn_NPinyn", "REVERSE" },
|
||||
|
||||
{ "Han-Spacedhan", "internal", "t_Hani_SpHan", "FORWARD" },
|
||||
{ "Spacedhan-Han", "alias", "null", "" },
|
||||
|
||||
{ "Han-Latin", "file", "t_Hani_Latn", "FORWARD" },
|
||||
//Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip!
|
||||
{ "Latin-Han", "alias", "null", "" },
|
||||
|
||||
// Comment these out; they are only for testing
|
||||
// Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE
|
||||
// Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD
|
||||
|
||||
//Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE
|
||||
//Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD
|
||||
|
||||
{ "Latin-Greek", "file", "t_Grek_Latn", "REVERSE" },
|
||||
{ "Greek-Latin", "file", "t_Grek_Latn", "FORWARD" },
|
||||
|
||||
{ "Latin-Greek/UNGEGN", "file", "t_Grek_Latn_UNGEGN", "REVERSE" },
|
||||
{ "Greek-Latin/UNGEGN", "file", "t_Grek_Latn_UNGEGN", "FORWARD" },
|
||||
|
||||
{ "Latin-Katakana", "file", "t_Latn_Kana", "FORWARD" },
|
||||
{ "Katakana-Latin", "file", "t_Latn_Kana", "REVERSE" },
|
||||
|
||||
{ "Latin-Hiragana", "file", "t_Hira_Latn", "REVERSE" },
|
||||
{ "Hiragana-Latin", "file", "t_Hira_Latn", "FORWARD" },
|
||||
|
||||
//Thai Stuff: will change if we get \b into Transliterator
|
||||
|
||||
// Java only: { "Thai-ThaiSemi", "internal", "-", "FORWARD" },
|
||||
|
||||
// Java only: { "Thai-ThaiLogical", "internal", "-", "FORWARD" },
|
||||
// Java only: { "ThaiLogical-Thai", "internal", "-", "REVERSE" },
|
||||
|
||||
// Java only: { "ThaiLogical-Latin", "internal", "-", "FORWARD" },
|
||||
// Java only: { "Latin-ThaiLogical", "internal", "-", "REVERSE" },
|
||||
|
||||
// Must use the order below!
|
||||
// We need two separate passes because of the Thai vowel reversal
|
||||
// Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces
|
||||
|
||||
{ "Thai-Latin", "alias", "[[", "thai", "] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC", "" },
|
||||
{ "Latin-Thai", "alias", "[[", "Latin", "][", "Mn", "][", "Me", "] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC", "" },
|
||||
|
||||
// end of Thai Stuff
|
||||
|
||||
{ "Hiragana-Katakana", "file", "t_Hira_Kana", "FORWARD" },
|
||||
{ "Katakana-Hiragana", "file", "t_Hira_Kana", "REVERSE" },
|
||||
|
||||
{ "Any-Accents", "file", "t_Any_Accents", "FORWARD" },
|
||||
{ "Accents-Any", "file", "t_Any_Accents", "REVERSE" },
|
||||
|
||||
{ "Any-Publishing", "file", "t_Any_Publishing", "FORWARD" },
|
||||
{ "Publishing-Any", "file", "t_Any_Publishing", "REVERSE" },
|
||||
|
||||
// Korean
|
||||
// N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For
|
||||
// Hangul output use Latin-Hangul.
|
||||
|
||||
{ "LowerLatin-Jamo", "internal", "t_Latn_Jamo", "FORWARD" },
|
||||
{ "Jamo-LowerLatin", "internal", "t_Latn_Jamo", "REVERSE" },
|
||||
{ "Latin-Jamo", "alias", "['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo", "" },
|
||||
{ "Jamo-Latin", "alias", "['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC", "" },
|
||||
{ "Latin-Hangul", "alias", "['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC", "" },
|
||||
{ "Hangul-Latin", "alias", "['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC", "" },
|
||||
|
||||
// Inter-Indic composed rules
|
||||
{ "Latin-InterIndic", "internal", "t_Latn_InterIndic", "FORWARD" },
|
||||
{ "Devanagari-InterIndic", "internal", "t_Deva_InterIndic", "FORWARD" },
|
||||
{ "Bengali-InterIndic", "internal", "t_Beng_InterIndic", "FORWARD" },
|
||||
{ "Gurmukhi-InterIndic", "internal", "t_Guru_InterIndic", "FORWARD" },
|
||||
{ "Gujarati-InterIndic", "internal", "t_Gujr_InterIndic", "FORWARD" },
|
||||
{ "Oriya-InterIndic", "internal", "t_Orya_InterIndic", "FORWARD" },
|
||||
{ "Tamil-InterIndic", "internal", "t_Taml_InterIndic", "FORWARD" },
|
||||
{ "Telugu-InterIndic", "internal", "t_Telu_InterIndic", "FORWARD" },
|
||||
{ "Kannada-InterIndic", "internal", "t_Knda_InterIndic", "FORWARD" },
|
||||
{ "Malayalam-InterIndic", "internal", "t_Mlym_InterIndic", "FORWARD" },
|
||||
|
||||
{ "InterIndic-Latin", "internal", "t_InterIndic_Latn", "FORWARD" },
|
||||
{ "InterIndic-Devanagari", "internal", "t_InterIndic_Deva", "FORWARD" },
|
||||
{ "InterIndic-Bengali", "internal", "t_InterIndic_Beng", "FORWARD" },
|
||||
{ "InterIndic-Gurmukhi", "internal", "t_InterIndic_Guru", "FORWARD" },
|
||||
{ "InterIndic-Gujarati", "internal", "t_InterIndic_Gujr", "FORWARD" },
|
||||
{ "InterIndic-Oriya", "internal", "t_InterIndic_Orya", "FORWARD" },
|
||||
{ "InterIndic-Tamil", "internal", "t_InterIndic_Taml", "FORWARD" },
|
||||
{ "InterIndic-Telugu", "internal", "t_InterIndic_Telu", "FORWARD" },
|
||||
{ "InterIndic-Kannada", "internal", "t_InterIndic_Knda", "FORWARD" },
|
||||
{ "InterIndic-Malayalam", "internal", "t_InterIndic_Mlym", "FORWARD" },
|
||||
|
||||
//Latin-Indic transliterators
|
||||
{ "Latin-Devanagari", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Latin-Bengali", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Latin-Gurmukhi", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Latin-Gujarati", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Latin-Oriya", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Latin-Tamil", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Latin-Telugu", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Latin-Kannada", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Latin-Malayalam", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
//Indic-Latin transliterators
|
||||
{ "Devanagari-Latin", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Bengali-Latin", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Gurmukhi-Latin", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Gujarati-Latin", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Oriya-Latin", "alias", "[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Tamil-Latin", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Telugu-Latin", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Kannada-Latin", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
{ "Malayalam-Latin", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
|
||||
{ "Devanagari-Bengali", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Devanagari-Gurmukhi", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Devanagari-Gujarati", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Devanagari-Oriya", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Devanagari-Tamil", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Devanagari-Telugu", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Devanagari-Kannada", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Devanagari-Malayalam", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Bengali-Devanagari", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Bengali-Gurmukhi", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Bengali-Gujarati", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Bengali-Oriya", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Bengali-Tamil", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Bengali-Telugu", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Bengali-Kannada", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Bengali-Malayalam", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Gurmukhi-Devanagari", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Gurmukhi-Bengali", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Gurmukhi-Gujarati", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Gurmukhi-Oriya", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Gurmukhi-Tamil", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Gurmukhi-Telugu", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Gurmukhi-Kannada", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Gurmukhi-Malayalam", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Gujarati-Devanagari", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Gujarati-Bengali", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Gujarati-Gurmukhi", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Gujarati-Oriya", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Gujarati-Tamil", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Gujarati-Telugu", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Gujarati-Kannada", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Gujarati-Malayalam", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Oriya-Devanagari", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Oriya-Bengali", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Oriya-Gurmukhi", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Oriya-Gujarati", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Oriya-Tamil", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Oriya-Telugu", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Oriya-Kannada", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Oriya-Malayalam", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Tamil-Devanagari", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Tamil-Bengali", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Tamil-Gurmukhi", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Tamil-Gujarati", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Tamil-Oriya", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Tamil-Telugu", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Tamil-Kannada", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Tamil-Malayalam", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Telugu-Devanagari", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Telugu-Bengali", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Telugu-Gurmukhi", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Telugu-Gujarati", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Telugu-Oriya", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Telugu-Tamil", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Telugu-Kannada", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Telugu-Malayalam", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Kannada-Devanagari", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Kannada-Bengali", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Kannada-Gurmukhi", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Kannada-Gujarati", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Kannada-Oriya", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Kannada-Tamil", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Kannada-Telugu", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Kannada-Malayalam", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
{ "Malayalam-Devanagari", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Malayalam-Bengali", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Malayalam-Gurmukhi", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Malayalam-Gujarati", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Malayalam-Oriya", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Malayalam-Tamil", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Malayalam-Telugu", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Malayalam-Kannada", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
|
||||
// eof
|
||||
}
|
||||
}
|
|
@ -19,44 +19,8 @@
|
|||
# * To REPLACE the default list and only build with a few
|
||||
# transliterators:
|
||||
# _____________________________________________________
|
||||
# | TRANLIST_SOURCE = translit_index.txt translit_Any_Publishing.txt
|
||||
# | TRANLIST_SOURCE = el.txt th.txt
|
||||
#
|
||||
#
|
||||
|
||||
TRANSLIT_SOURCE=t_Any_Accents.txt\
|
||||
t_Any_Publishing.txt\
|
||||
t_Arab_Latn.txt\
|
||||
t_Beng_InterIndic.txt\
|
||||
t_Cyrl_Latn.txt\
|
||||
t_Deva_InterIndic.txt\
|
||||
t_FWidth_HWidth.txt\
|
||||
t_Grek_Latn.txt\
|
||||
t_Grek_Latn_UNGEGN.txt\
|
||||
t_Gujr_InterIndic.txt\
|
||||
t_Guru_InterIndic.txt\
|
||||
t_Hani_Latn.txt\
|
||||
t_Hebr_Latn.txt\
|
||||
t_Hira_Kana.txt\
|
||||
t_Hira_Latn.txt\
|
||||
t_InterIndic_Beng.txt\
|
||||
t_InterIndic_Deva.txt\
|
||||
t_InterIndic_Gujr.txt\
|
||||
t_InterIndic_Guru.txt\
|
||||
t_InterIndic_Knda.txt\
|
||||
t_InterIndic_Latn.txt\
|
||||
t_InterIndic_Mlym.txt\
|
||||
t_InterIndic_Orya.txt\
|
||||
t_InterIndic_Taml.txt\
|
||||
t_InterIndic_Telu.txt\
|
||||
t_Knda_InterIndic.txt\
|
||||
t_Latn_InterIndic.txt\
|
||||
t_Latn_Jamo.txt\
|
||||
t_Latn_Kana.txt\
|
||||
t_Mlym_InterIndic.txt\
|
||||
t_Orya_InterIndic.txt\
|
||||
t_Taml_InterIndic.txt\
|
||||
t_Telu_InterIndic.txt\
|
||||
t_Latn_NPinyn.txt\
|
||||
t_Tone_Digit.txt\
|
||||
t_Hani_SpHan.txt\
|
||||
translit_index.txt
|
||||
TRANSLIT_SOURCE=el.txt en.txt
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
#include "unicode/translit.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/udata.h"
|
||||
|
||||
#define U_ICUDATA_TRANSLIT U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "translit"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
|
|
@ -725,7 +725,7 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& id,
|
|||
UnicodeString& result) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
ResourceBundle bundle(u_getDataDirectory(), inLocale, status);
|
||||
ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status);
|
||||
|
||||
// Suspend checking status until later...
|
||||
|
||||
|
@ -1411,13 +1411,23 @@ UBool Transliterator::initializeRegistry() {
|
|||
}
|
||||
|
||||
/* The following code parses the index table located in
|
||||
* icu/data/translit_index.txt. The index is an n x 4 table
|
||||
* icu/data/translit/root.txt. The index is an n x 4 table
|
||||
* that follows this format:
|
||||
*
|
||||
* <id>:file:<resource>:<direction>
|
||||
* <id>:internal:<resource>:<direction>
|
||||
* <id>:alias:<getInstanceArg>:
|
||||
*
|
||||
* <id>{
|
||||
* file{
|
||||
* resource{"<resource>"}
|
||||
* direction{"<direction>"}
|
||||
* }
|
||||
* }
|
||||
* <id>{
|
||||
* internal{
|
||||
* resource{"<resource>"}
|
||||
* direction{"<direction"}
|
||||
* }
|
||||
* }
|
||||
* <id>{
|
||||
* alias{"<getInstanceArg"}
|
||||
* }
|
||||
* <id> is the ID of the system transliterator being defined. These
|
||||
* are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
* unless the second field is "internal".
|
||||
|
@ -1434,10 +1444,10 @@ UBool Transliterator::initializeRegistry() {
|
|||
*
|
||||
* The extra blank field on "alias" lines is to make the array square.
|
||||
*/
|
||||
static const char translit_index[] = "translit_index";
|
||||
//static const char translit_index[] = "translit_index";
|
||||
|
||||
UResourceBundle *bundle, *transIDs, *colBund;
|
||||
bundle = ures_openDirect(0, translit_index, &status);
|
||||
bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open root bundle*/, &status);
|
||||
transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
|
||||
|
||||
int32_t row, maxRows;
|
||||
|
@ -1445,11 +1455,11 @@ UBool Transliterator::initializeRegistry() {
|
|||
maxRows = ures_getSize(transIDs);
|
||||
for (row = 0; row < maxRows; row++) {
|
||||
colBund = ures_getByIndex(transIDs, row, 0, &status);
|
||||
|
||||
if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
|
||||
UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
|
||||
UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
|
||||
UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
UnicodeString id = ures_getKey(colBund);
|
||||
UResourceBundle* res = ures_getNextResource(colBund, NULL, &status);
|
||||
const char* typeStr = ures_getKey(res);
|
||||
UChar type = (UChar)*typeStr;
|
||||
|
||||
if (U_SUCCESS(status)) {
|
||||
switch (type) {
|
||||
|
@ -1458,9 +1468,11 @@ UBool Transliterator::initializeRegistry() {
|
|||
// 'file' or 'internal';
|
||||
// row[2]=resource, row[3]=direction
|
||||
{
|
||||
|
||||
UnicodeString resString = ures_getUnicodeStringByKey(res, "resource", &status);
|
||||
UBool visible = (type == 0x0066 /*f*/);
|
||||
UTransDirection dir =
|
||||
(ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
|
||||
(ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) ==
|
||||
0x0046 /*F*/) ?
|
||||
UTRANS_FORWARD : UTRANS_REVERSE;
|
||||
registry->put(id, resString, dir, visible);
|
||||
|
@ -1468,12 +1480,13 @@ UBool Transliterator::initializeRegistry() {
|
|||
break;
|
||||
case 0x61: // 'a'
|
||||
// 'alias'; row[2]=createInstance argument
|
||||
UnicodeString resString = ures_getUnicodeString(res, &status);
|
||||
registry->put(id, resString, TRUE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ures_close(res);
|
||||
}
|
||||
|
||||
ures_close(colBund);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -202,7 +202,7 @@ Spec::Spec(const UnicodeString& theSpec) : top(theSpec) {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
CharString topch(top);
|
||||
Locale toploc(topch);
|
||||
res = new ResourceBundle(u_getDataDirectory(), toploc, status);
|
||||
res = new ResourceBundle(U_ICUDATA_TRANSLIT, toploc, status);
|
||||
/* test for NULL */
|
||||
if (res == 0) {
|
||||
return;
|
||||
|
@ -1208,10 +1208,10 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
|
|||
// 2-d array at static init time, as a locale language. We're
|
||||
// just using the locale mechanism to map through to a file
|
||||
// name; this in no way represents an actual locale.
|
||||
CharString ch(entry->stringArg);
|
||||
UResourceBundle *bundle = ures_openDirect(0, ch, &status);
|
||||
UnicodeString rules = ures_getUnicodeStringByKey(bundle, RB_RULE, &status);
|
||||
ures_close(bundle);
|
||||
//CharString ch(entry->stringArg);
|
||||
//UResourceBundle *bundle = ures_openDirect(0, ch, &status);
|
||||
UnicodeString rules = entry->stringArg;
|
||||
//ures_close(bundle);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
// We have a failure of some kind. Remove the ID from the
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "cstring.h"
|
||||
#include "umutex.h"
|
||||
#include "uassert.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
#ifdef XP_MAC_CONSOLE
|
||||
#include <console.h>
|
||||
|
@ -1491,6 +1492,16 @@ UBool IntlTest::assertEquals(const UnicodeString& message,
|
|||
const char* actual) {
|
||||
return assertEquals(extractToAssertBuf(message), expected, actual);
|
||||
}
|
||||
//--------------------------------------------------------------------
|
||||
// Time bomb - allows temporary behavior that expires at a given
|
||||
// release
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
UBool IntlTest::isICUVersionAtLeast(const UVersionInfo x) {
|
||||
UVersionInfo v;
|
||||
u_getVersion(v);
|
||||
return (uprv_memcmp(v, x, U_MAX_VERSION_LENGTH) >= 0);
|
||||
}
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
UBool IntlTest::assertEquals(const UnicodeString& message,
|
||||
|
|
|
@ -131,6 +131,12 @@ public:
|
|||
*/
|
||||
static float random();
|
||||
|
||||
/**
|
||||
* Ascertain the version of ICU. Useful for
|
||||
* time bomb testing
|
||||
*/
|
||||
UBool isICUVersionAtLeast(const UVersionInfo x);
|
||||
|
||||
protected:
|
||||
/* JUnit-like assertions. Each returns TRUE if it succeeds. */
|
||||
UBool assertTrue(const char* message, UBool condition, UBool quiet=FALSE);
|
||||
|
|
|
@ -345,16 +345,6 @@
|
|||
RelativePath=".\tsputil.h">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="conversion"
|
||||
Filter="">
|
||||
<File
|
||||
RelativePath=".\convtest.cpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\convtest.h">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="data & memory"
|
||||
Filter="">
|
||||
|
@ -816,6 +806,16 @@
|
|||
RelativePath=".\trnserr.h">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="conversion"
|
||||
Filter="">
|
||||
<File
|
||||
RelativePath=".\convtest.cpp">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\convtest.h">
|
||||
</File>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
|
|
|
@ -26,6 +26,9 @@
|
|||
#include "unicode/rep.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
static const UVersionInfo ICU_31 = {3,1,0,0};
|
||||
|
||||
int32_t getInt(UnicodeString str)
|
||||
{
|
||||
char buffer[20];
|
||||
|
@ -84,6 +87,9 @@ void TransliteratorAPITest::TestgetID() {
|
|||
for (i=0; i<Transliterator::countAvailableIDs(); i++){
|
||||
status = U_ZERO_ERROR;
|
||||
ID = (UnicodeString) Transliterator::getAvailableID(i);
|
||||
if(ID.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
|
||||
continue;
|
||||
}
|
||||
t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
|
||||
if(t == 0){
|
||||
errln("FAIL: " + ID);
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include "unesctrn.h"
|
||||
#include "uni2name.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include <stdio.h>
|
||||
|
||||
/***********************************************************************
|
||||
|
@ -186,6 +187,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
|
|||
}
|
||||
}
|
||||
|
||||
static const UVersionInfo ICU_31 = {3,1,0,0};
|
||||
/**
|
||||
* Make sure every system transliterator can be instantiated.
|
||||
*
|
||||
|
@ -220,6 +222,9 @@ void TransliteratorTest::TestInstantiation() {
|
|||
i + ") != getAvailableIDs().snext()");
|
||||
continue;
|
||||
}
|
||||
if(id2.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
|
||||
continue;
|
||||
}
|
||||
UParseError parseError;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
Transliterator* t = Transliterator::createInstance(id,
|
||||
|
@ -3472,7 +3477,10 @@ void TransliteratorTest::TestIncrementalProgress(void) {
|
|||
|
||||
Transliterator::getAvailableVariant(k, source, target, variant);
|
||||
UnicodeString id = source + "-" + target + "/" + variant;
|
||||
|
||||
|
||||
if(id.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
|
||||
continue;
|
||||
}
|
||||
Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln((UnicodeString)"FAIL: Could not create " + id);
|
||||
|
|
Loading…
Add table
Reference in a new issue