ICU-3925 separate Transliterator data to its own tree

X-SVN-Rev: 16095
This commit is contained in:
Ram Viswanadha 2004-08-02 20:06:55 +00:00
parent bf8a7e1793
commit 123132b8e6
89 changed files with 8683 additions and 8503 deletions

View file

@ -73,7 +73,7 @@ ICUCOL=coll
#
ICURBNF=rbnf
# ICUTRANSLIT
# ICUTRNS
# The directory that contains trfiles.mk files along with *.txt transliterator files
#
ICUTRNS=translit
@ -248,7 +248,10 @@ TRANLIT_SOURCE=$(TRANSLIT_SOURCE) $(TRANSLIT_SOURCE_LOCAL)
!MESSAGE Warning: cannot find "trnsfiles.mk"
!ENDIF
TRANSLIT_FILES = $(TRANSLIT_SOURCE:.txt=.res)
TRANSLIT_FILES = $(ICUTRNS)\root.txt $(TRANSLIT_ALIAS_SOURCE) $(TRANSLIT_SOURCE)
TRANSLIT_RES_FILES = $(TRANSLIT_FILES:.txt =.res translit\)
TRANSLIT_RES_FILES = $(TRANSLIT_RES_FILES:.txt=.res)
TRANSLIT_RES_FILES = $(TRANSLIT_RES_FILES:translit\ =translit\)
# Read list of miscellaneous resource bundle files
!IF EXISTS("$(ICUSRCDATA)\$(ICUMISC2)\miscfiles.mk")
@ -268,6 +271,7 @@ MISC_FILES = $(MISC_SOURCE:.txt=.res)
INDEX_RES_FILES = res_index.res
INDEX_COL_FILES = $(ICUCOL)\res_index.res
INDEX_RBNF_FILES = $(ICURBNF)\res_index.res
#INDEX_TRANSLIT_FILES = $(ICUTRNS)\res_index.res
#
# Break iterator data files.
@ -276,7 +280,7 @@ BRK_SOURCE_FILES = sent.txt char.txt line.txt word.txt title.txt line_th.txt wor
BRK_FILES=$(BRK_SOURCE_FILES:.txt=.brk)
# don't include COL_FILES
ALL_RES = $(INDEX_RES_FILES) $(RB_FILES) $(TRANSLIT_FILES) $(MISC_FILES)
ALL_RES = $(INDEX_RES_FILES) $(RB_FILES) $(MISC_FILES)
#############################################################################
#
@ -293,7 +297,7 @@ ALL : GODATA "$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" "$(TESTDATAOUT)\testdata.dat"
#
# testdata - nmake will invoke pkgdata, which will create testdata.dat
#
"$(TESTDATAOUT)\testdata.dat": "$(ICUBLD)\ucadata.icu" $(TRANSLIT_FILES) $(MISC_FILES) $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe
"$(TESTDATAOUT)\testdata.dat": "$(ICUBLD)\ucadata.icu" $(TRANSLIT_RES_FILES) $(MISC_FILES) $(RB_FILES) {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe
@cd "$(TESTDATA)"
@echo building testdata...
nmake /nologo /f "$(TESTDATA)\testdata.mak" TESTDATA=. ICUTOOLS="$(ICUTOOLS)" ICUP="$(ICUP)" CFG=$(CFG) TESTDATAOUT="$(TESTDATAOUT)" ICUDATA="$(ICUDATA)" TESTDATABLD="$(TESTDATABLD)"
@ -303,7 +307,7 @@ ALL : GODATA "$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" "$(TESTDATAOUT)\testdata.dat"
# move the .dll and .lib files to their final destination afterwards.
# The $(U_ICUDATA_NAME).lib and $(U_ICUDATA_NAME).exp should already be in the right place due to stubdata.
#
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
"$(DLL_OUTPUT)\$(U_ICUDATA_NAME).dll" : "$(ICUP)\bin\pkgdata.exe" $(CNV_FILES) "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu" "$(ICUBLD)\cnvalias.icu" "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\invuca.icu" "$(ICUBLD)\uidna.spp" $(BRK_FILES) $(INDEX_COL_FILES) $(COL_COL_FILES) $(INDEX_RBNF_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) "$(ICUTMP)\icudata.res" "$(ICUP)\source\stubdata\stubdatabuilt.txt"
@echo Building icu data
cd "$(ICUBLD)"
@"$(ICUP)\bin\pkgdata" -f -e $(U_ICUDATA_NAME) -v $(ICU_PACKAGE_MODE) -c -p $(ICUPKG) -T "$(ICUTMP)" -L $(U_ICUDATA_NAME) -d "$(ICUBLD)" -s . <<"$(ICUTMP)\pkgdatain.txt"
@ -325,6 +329,8 @@ $(ICUCOL)\res_index.res
$(RBNF_RES_FILES:.res =.res
)
$(ICURBNF)\res_index.res
$(TRANSLIT_RES_FILES:.res =.res
)
$(BRK_FILES:.brk =.brk
)
<<KEEP
@ -342,6 +348,7 @@ GODATA :
@if not exist "$(ICUBLD)\$(NULL)" mkdir "$(ICUBLD)"
@if not exist "$(ICUBLD)\$(ICUCOL)\$(NULL)" mkdir "$(ICUBLD)\$(ICUCOL)"
@if not exist "$(ICUBLD)\$(ICURBNF)\$(NULL)" mkdir "$(ICUBLD)\$(ICURBNF)"
@if not exist "$(ICUBLD)\$(ICUTRNS)\$(NULL)" mkdir "$(ICUBLD)\$(ICUTRNS)"
@if not exist "$(TESTDATAOUT)\$(NULL)" mkdir "$(TESTDATAOUT)"
@if not exist "$(TESTDATABLD)\$(NULL)" mkdir "$(TESTDATABLD)"
@cd "$(ICUBLD)"
@ -364,6 +371,9 @@ CLEAN : GODATA
@cd "$(ICUBLD)\$(ICURBNF)"
-@erase "*.res"
-@erase "*.txt"
@cd "$(ICUBLD)\$(ICUTRNS)"
-@erase "*.res"
-@erase "*.txt"
@cd "$(ICUOUT)"
-@erase "*.dat"
@cd "$(ICUTMP)"
@ -393,11 +403,6 @@ CLEAN : GODATA
@echo Generating converters
@"$(ICUTOOLS)\makeconv\$(CFG)\makeconv" -c -d"$(ICUBLD)" $<
# Batch inference rule for creating transliterator resource files
{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUTRNS)}.txt.res::
@echo Making Transliterator Resource Bundle files
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" $<
# Batch inference rule for creating miscellaneous resource files
# TODO: -q option is specified to squelch the 120+ warnings about
# empty intvectors and binary elements. Unfortunately, this may
@ -412,6 +417,18 @@ CLEAN : GODATA
@echo Making Locale Resource Bundle files
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" $<
$(INDEX_RES_FILES):
@echo Generating <<res_index.txt
// Warning this file is automatically generated
res_index {
InstalledLocales {
$(GENRB_SOURCE:.txt= {""}
)
}
}
<<KEEP
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" .\res_index.txt
{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUCOL)}.txt{$(ICUCOL)}.res::
@echo Making Collation files
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -i "$(ICUBLD)" -d"$(ICUBLD)\$(ICUCOL)" $<
@ -444,18 +461,10 @@ res_index {
<<KEEP
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)\$(ICURBNF)" .\$(ICURBNF)\res_index.txt
{$(ICUSRCDATA_RELATIVE_PATH)\$(ICUTRNS)}.txt{$(ICUTRNS)}.res::
@echo Making Transliterator files
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -i "$(ICUBLD)" -d"$(ICUBLD)\$(ICUTRNS)" $<
$(INDEX_RES_FILES):
@echo Generating <<res_index.txt
// Warning this file is automatically generated
res_index {
InstalledLocales {
$(GENRB_SOURCE:.txt= {""}
)
}
}
<<KEEP
@"$(ICUTOOLS)\genrb\$(CFG)\genrb" -k -d"$(ICUBLD)" .\res_index.txt
# DLL version information
# If you modify this, modify winmode.c in pkgdata.
@ -477,7 +486,7 @@ res_index {
# Targets for uprops.icu
"$(ICUBLD)\uprops.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\genprops\$(CFG)\genprops.exe" "$(ICUBLD)\pnames.icu"
@echo Creating data file for Unicode Character Properties
@"$(ICUTOOLS)\genprops\$(CFG)\genprops" -u $(UNICODE_VERSION) -i "$(ICUBLD)" -d "$(ICUBLD)" -s "$(ICUUNIDATA)"
@"$(ICUTOOLS)\genprops\$(CFG)\genprops" -u $(UNICODE_VERSION) -i "$(ICUBLD)" -s "$(ICUUNIDATA)" -d "$(ICUBLD)"
# Targets for unorm.icu
"$(ICUBLD)\unorm.icu": "$(ICUUNIDATA)\*.txt" "$(ICUTOOLS)\gennorm\$(CFG)\gennorm.exe"
@ -502,6 +511,6 @@ res_index {
$(UCM_SOURCE) : {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe
$(TRANSLIT_SOURCE) $(MISC_SOURCE) $(RB_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unorm.icu"
$(MISC_SOURCE) $(RB_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD)\ucadata.icu" "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unorm.icu"
$(BRK_SOURCE_FILES) : "$(ICUBLD)\uprops.icu" "$(ICUBLD)\unames.icu" "$(ICUBLD)\pnames.icu" "$(ICUBLD)\unorm.icu"

View file

@ -0,0 +1,290 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
:: NFD (NFC) ;
# to do: make reversible
# define special conversion characters.
# varients of this could use different characters, or set one or the other to null.
$pre = \< ;
$post = \> ;
# Provide keyboard equivalents for common diacritics used in transliteration
$pre \` $post <> \u0300 ; # COMBINING GRAVE ACCENT
$pre \' $post <> \u0301 ; # COMBINING ACUTE ACCENT
$pre \^ $post <> \u0302 ; # COMBINING CIRCUMFLEX ACCENT
$pre \~ $post <> \u0303 ; # COMBINING TILDE
$pre \- $post <> \u0304 ; # COMBINING MACRON
$pre \" $post <> \u0308 ; # COMBINING DIAERESIS
$pre \* $post <> \u030A ; # COMBINING RING ABOVE
$pre \, $post <> \u0327 ; # COMBINING CEDILLA
$pre '/' $post <> \u0338 ; # COMBINING LONG SOLIDUS OVERLAY
$pre \. $post <> \u0323 ; # COMBINING DOT BELOW
# Combine common characters
$pre AE $post <> \u00C6 ; # LATIN CAPITAL LETTER AE
$pre ae $post <> \u00E6 ; # LATIN SMALL LETTER AE
$pre D $post <> \u00D0 ; # LATIN CAPITAL LETTER ETH
$pre d $post <> \u00F0 ; # LATIN SMALL LETTER ETH
$pre O'/' $post <> \u00D8 ; # LATIN CAPITAL LETTER O WITH STROKE
$pre o'/' $post <> \u00F8 ; # LATIN SMALL LETTER O WITH STROKE
$pre TH $post <> \u00DE ; # LATIN CAPITAL LETTER THORN
$pre th $post <> \u00FE ; # LATIN SMALL LETTER THORN
$pre OE $post <> \u0152 ; # LATIN CAPITAL LIGATURE OE
$pre oe $post <> \u0153 ; # LATIN SMALL LIGATURE OE
$pre ss $post <> \u00DF ; # LATIN SMALL LETTER SHARP S
$pre NG $post <> \u014A ; # LATIN CAPITAL LETTER ENG
$pre ng $post <> \u014B ; # LATIN SMALL LETTER ENG
$pre T $post <> \u0398 ; # THETA
$pre t $post <> \u03B8 ; # THETA
$pre SH $post <> \u01A9 ; # LATIN CAPITAL LETTER ESH
$pre sh $post <> \u0283 ; # LATIN SMALL LETTER ESH
$pre ZH $post <> \u01B7 ; # LATIN CAPITAL LETTER EZH
$pre zh $post <> \u0292 ; # LATIN SMALL LETTER EZH
$pre U $post <> \u01B1 ; # LATIN CAPITAL LETTER UPSILON
$pre u $post <> \u028A ; # LATIN SMALL LETTER UPSILON
$pre A $post <> \u018F ; # LATIN CAPITAL LETTER SCHWA
$pre a $post <> \u0259 ; # LATIN SMALL LETTER SCHWA
$pre O $post <> \u0186 ; # LATIN CAPITAL LETTER OPEN O
$pre o $post <> \u0254 ; # LATIN SMALL LETTER OPEN O
$pre E $post <> \u0190 ; # LATIN CAPITAL LETTER OPEN E
$pre e $post <> \u025B ; # LATIN SMALL LETTER OPEN E
# three that don't have uppercases
$pre '?' $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
$pre i $post <> \u026A ; # LATIN LETTER SMALL CAPITAL I
$pre v $post <> \u028C ; # LATIN SMALL LETTER TURNED V
# Additional Characters that may be added in the future
# $pre XXX $post <> \u0306 ; # COMBINING BREVE
# $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
# $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
# $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
# $pre XXX $post <> \u030C ; # COMBINING CARON
# $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
# $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
# $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
# $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
# $pre XXX $post <> \u031B ; # COMBINING HORN
# $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
# $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
# $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
# $pre XXX $post <> \u0328 ; # COMBINING OGONEK
# $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
# $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
# $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
# $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
# $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
# $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
# $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
# $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
# $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
# $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
# $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
# $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
# $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
# $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
# $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
# $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
# $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
# $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
# $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
# $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
# $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
# $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
# $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
# $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
# $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
# $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
# $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
# $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
# $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
# $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
# $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
# $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
# $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
# $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
# $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
# $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
# $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
# $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
# $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
# $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
# $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
# $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
# $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
# $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
# $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
# $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
# $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
# $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
# $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
# $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
# $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
# $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
# $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
# $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
# $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
# $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
# $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
# $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
# $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
# $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
# $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
# $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
# $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
# $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
# $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
# $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
# $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
# $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
# $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
# $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
# $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
# $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
# $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
# $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
# $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
# $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
# $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
# $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
# $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
# $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
# $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
# $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
# $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
# $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
# $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
# $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
# $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
# $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
# $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
# $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
# $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
# $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
# $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
# $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
# $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
# $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
# $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
# $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
# $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
# $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
# $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
# $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
# $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
# $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
# $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
# $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
# $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
# $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
# $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
# $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
# $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
# $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
# $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
# $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
# $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
# $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
# $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
# $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
# $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
# $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
# $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
# $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
# $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
# $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
# $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
# $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
# $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
# $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
# $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
# $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
# $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
# $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
# $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
# $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
# $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
# $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
# $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
# $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
# $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
# $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
# $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
# $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
# $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
# $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
# $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
# $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
# $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
# $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
# $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
# $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
# $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
# $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
# $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
# $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
# $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
# $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
# $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
# $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
# $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
# $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
# $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
# $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
# $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
# $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
# $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
# $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
# $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
# $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
# $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
# $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
# $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
# $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
# $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
# $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
# $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
# $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
# $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
# $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
# $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
# $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
# $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
# $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
# $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
# $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
# $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
# $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
# $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
# $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
# $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
# $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
# $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
# $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
# $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
# $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
# $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
# $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
# $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
# $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
# $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
# $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
# $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
# $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
# $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
:: NFC (NFD) ;

View file

@ -0,0 +1,34 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Test case
# "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
# Variables
$single = \' ;
$space = ' ' ;
$double = \" ;
$back = \` ;
$tab = '\u0008' ;
$makeRight = [[:Z:][:Ps:][:Pi:]$] ;
# fix UNIX quotes
$back $back > “ ;
$back > ;
# fix typewriter quotes, by context
$makeRight {$double} <> “ ;
$double <> ” ;
$makeRight {$single} <> ;
$single <> ;
# fix multiple spaces and hyphens
$space {$space} > ;
'--' <> — ;

View file

@ -0,0 +1,146 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Generally follows UNGEGN <http://www.eki.ee/wgrs/rom1_ar.pdf>
# Occasionally deviates in the direction of ISO 233 <http://homepage.mac.com/sirbinks/pdf/Arabic.pdf>
# a) where required for disambiguation.
# b) with underdot instead of cedilla for letter like SAD, since
# those are explicitly in Unicode for transliteration.
# c) with extra non-Arabic-language letters, like PEH
# Does *not* do assimilation of "al", nor hyphenation.
# While it could be done, we need to determine whether a prefix "al" could
# occur other than as the definite article (since no space is used).
:: [[:Arabic:] [‎ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;
:: NFKD (NFC);
$disambig = ̱ ;
$disambig2 = ̰ ;
$under = ̣ ;
$notAbove = [[:^ccc=0:]&[:^ccc=230:]];
# non-letters
٫ <> '.' $disambig ; # ARABIC DECIMAL SEPARATOR
٬ <> ',' $disambig ; # ARABIC THOUSANDS SEPARATOR
# ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate
، <> ',' ; # ARABIC COMMA
؛ <> ';' ; # ARABIC SEMICOLON
؟ <> '?' ; # ARABIC QUESTION MARK
٪ <> '%' ; # ARABIC PERCENT SIGN
۰ <> 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
۱ <> 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
۲ <> 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
۳ <> 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
۴ <> 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
۵ <> 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
۶ <> 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
۷ <> 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
۸ <> 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
۹ <> 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
٠ <> 0 ; # ARABIC-INDIC DIGIT ZERO
١ <> 1 ; # ARABIC-INDIC DIGIT ONE
٢ <> 2 ; # ARABIC-INDIC DIGIT TWO
٣ <> 3 ; # ARABIC-INDIC DIGIT THREE
٤ <> 4 ; # ARABIC-INDIC DIGIT FOUR
٥ <> 5 ; # ARABIC-INDIC DIGIT FIVE
٦ <> 6 ; # ARABIC-INDIC DIGIT SIX
٧ <> 7 ; # ARABIC-INDIC DIGIT SEVEN
٨ <> 8 ; # ARABIC-INDIC DIGIT EIGHT
٩ <> 9 ; # ARABIC-INDIC DIGIT NINE
# letters
# long vowels
َا<> ā ; # ARABIC FATHA, ARABIC LETTER ALEF
ُو <> ū ; # ARABIC DAMMA, ARABIC LETTER WAW
ِي <> ī ; # ARABIC KASRA, ARABIC LETTER YEH
# longer items moved here to prevent masking
ث <> t h $disambig ; # ARABIC LETTER THEH
ذ <> d h $disambig ; # ARABIC LETTER THAL
ش <> s h $disambig ; # ARABIC LETTER SHEEN
ص <> s $under ; # ARABIC LETTER SAD
ض <> d $under ; # ARABIC LETTER DAD
ط <> t $under ; # ARABIC LETTER TAH
ظ <> z $under ; # ARABIC LETTER ZAH
غ <> g h $disambig ; # ARABIC LETTER GHAIN
# WARNING: special case
# <t, umlaut, half-ring below> will be canonically ordered as <t, half-ring below, umlaut>
# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
# ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
ة <> t \u0308 ; # ARABIC LETTER TEH MARBUTA
ة | $1 < t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
# non-Arabic language
ژ <> z h $disambig ; # ARABIC LETTER JEH
ڭ <> n $disambig g ; # ARABIC LETTER NG
ۋ <> v $disambig ; # ARABIC LETTER VE
ی <> y $disambig2 ; # ARABIC LETTER FARSI YEH
# Arabic language
ء <> ʾ ; # ARABIC LETTER HAMZA
ا <> a $under; # ARABIC LETTER ALEF
ب <> b ; # ARABIC LETTER BEH
ت <> t ; # ARABIC LETTER TEH
ج <> j ; # ARABIC LETTER JEEM
ح <> h $under ; # ARABIC LETTER HAH
خ <> k h $disambig ; # ARABIC LETTER KHAH
د <> d ; # ARABIC LETTER DAL
ر <> r ; # ARABIC LETTER REH
ز <> z ; # ARABIC LETTER ZAIN
س <> s ; # ARABIC LETTER SEEN
ع <> ʿ ; # ARABIC LETTER AIN
ـ > ; # ARABIC TATWEEL
ف <> f ; # ARABIC LETTER FEH
ق <> q ; # ARABIC LETTER QAF
ك <> k ; # ARABIC LETTER KAF
ل <> l ; # ARABIC LETTER LAM
م <> m ; # ARABIC LETTER MEEM
ن <> n ; # ARABIC LETTER NOON
ه <> h ; # ARABIC LETTER HEH
و <> w ; # ARABIC LETTER WAW
ى <> y $disambig ; # ARABIC LETTER ALEF MAKSURA
ي <> y ; # ARABIC LETTER YEH
ً <> aⁿ ; # ARABIC FATHATAN
ٌ <> uⁿ ; # ARABIC DAMMATAN
ٍ <> iⁿ ; # ARABIC KASRATAN
َ <> a ; # ARABIC FATHA
ُ <> u ; # ARABIC DAMMA
ِ <> i ; # ARABIC KASRA
ّ <> ̃ ; # ARABIC SHADDA
ْ <> ̊ ; # ARABIC SUKUN
# special combining marks
ٓ <> ̂ ; # ARABIC MADDAH ABOVE
ٔ <> ̉ ; # ARABIC HAMZA ABOVE
ٕ <> ̹ ; # ARABIC HAMZA BELOW
# Some non-Arabic language (not in UNGEGN)
پ <> p ; # ARABIC LETTER PEH
چ <> c h $disambig ; # ARABIC LETTER TCHEH
ڤ <> v ; # ARABIC LETTER VEH
# ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
# ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
گ <> g ; # ARABIC LETTER GAF
# fallbacks
| s < c } [eiy];
| k < c ;
| i < e ;
| u < o ;
| ks < x ;
| n < ‎ⁿ;
:: (lower) ;
::NFC (NFD);
:: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );

View file

@ -0,0 +1,103 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Bengali-InterIndic
\u09C7\u09BE>\uE04B; # VOWEL SIGN O
\u09C7\u09D7>\uE04C; # VOWEL SIGN AU
\u0981>\uE001; # SIGN CANDRABINDU
\u0982>\uE002; # SIGN ANUSVARA
\u0983>\uE003; # SIGN VISARGA
\u0985>\uE005; # LETTER A
\u0986>\uE006; # LETTER AA
\u0987>\uE007; # LETTER I
\u0988>\uE008; # LETTER II
\u0989>\uE009; # LETTER U
\u098A>\uE00A; # LETTER UU
\u098B>\uE00B; # LETTER VOCALIC R
\u098C>\uE00C; # LETTER VOCALIC L
\u098F>\uE00F; # LETTER E
\u0990>\uE010; # LETTER AI
\u0993>\uE013; # LETTER O
\u0994>\uE014; # LETTER AU
\u0995>\uE015; # LETTER KA
\u0996>\uE016; # LETTER KHA
\u0997>\uE017; # LETTER GA
\u0998>\uE018; # LETTER GHA
\u0999>\uE019; # LETTER NGA
\u099A>\uE01A; # LETTER CA
\u099B>\uE01B; # LETTER CHA
\u099C>\uE01C; # LETTER JA
\u099D>\uE01D; # LETTER JHA
\u099E>\uE01E; # LETTER NYA
\u099F>\uE01F; # LETTER TTA
\u09A0>\uE020; # LETTER TTHA
\u09A1>\uE021; # LETTER DDA
\u09A2>\uE022; # LETTER DDHA
\u09A3>\uE023; # LETTER NNA
\u09A4>\uE024; # LETTER TA
\u09A5>\uE025; # LETTER THA
\u09A6>\uE026; # LETTER DA
\u09A7>\uE027; # LETTER DHA
\u09A8>\uE028; # LETTER NA
\u09AA>\uE02A; # LETTER PA
\u09AB>\uE02B; # LETTER PHA
\u09AC>\uE02C; # LETTER BA
\u09AD>\uE02D; # LETTER BHA
\u09AE>\uE02E; # LETTER MA
\u09AF>\uE02F; # LETTER YA
\u09B0>\uE030; # LETTER RA
\u09B2>\uE032; # LETTER LA
\u09B6>\uE036; # LETTER SHA
\u09B7>\uE037; # LETTER SSA
\u09B8>\uE038; # LETTER SA
\u09B9>\uE039; # LETTER HA
\u09BC>\uE03C; # SIGN NUKTA
\u09BD>\uE03D; # SIGN AVAGRAHA
\u09BE>\uE03E; # VOWEL SIGN AA
\u09BF>\uE03F; # VOWEL SIGN I
\u09C0>\uE040; # VOWEL SIGN II
\u09C1>\uE041; # VOWEL SIGN U
\u09C2>\uE042; # VOWEL SIGN UU
\u09C3>\uE043; # VOWEL SIGN VOCALIC R
\u09C4>\uE044; # VOWEL SIGN VOCALIC RR
\u09C7>\uE047; # VOWEL SIGN E
\u09C8>\uE048; # VOWEL SIGN AI
\u09CB>\uE04B;
\u09CC>\uE04C;
#
\u09CD>\uE04D; # SIGN VIRAMA
\u09D7>\uE057; # AU LENGTH MARK
#
\u09E0>\uE060; # LETTER VOCALIC RR
\u09E1>\uE061; # LETTER VOCALIC LL
\u09E2>\uE062; # VOWEL SIGN VOCALIC L
\u09E3>\uE063; # VOWEL SIGN VOCALIC LL
\u09E6>\uE066; # DIGIT ZERO
\u09E7>\uE067; # DIGIT ONE
\u09E8>\uE068; # DIGIT TWO
\u09E9>\uE069; # DIGIT THREE
\u09EA>\uE06A; # DIGIT FOUR
\u09EB>\uE06B; # DIGIT FIVE
\u09EC>\uE06C; # DIGIT SIX
\u09ED>\uE06D; # DIGIT SEVEN
\u09EE>\uE06E; # DIGIT EIGHT
\u09EF>\uE06F; # DIGIT NINE
\u09F0>\ue071; # Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
\u09F1>\ue072; # Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
\u09F2>\ue073; # Bengali-InterIndic: RUPEE MARK
\u09F3>\ue074; # Bengali-InterIndic: RUPEE SIGN
\u09F4>\ue075; # Bengali-InterIndic: CURRENCY NUMERATOR ONE
\u09F5>\ue076; # Bengali-InterIndic: CURRENCY NUMERATOR TWO
\u09F6>\ue077; # Bengali-InterIndic: CURRENCY NUMERATOR THREE
\u09F7>\ue078; # Bengali-InterIndic: CURRENCY NUMERATOR FOUR
\u09F8>\ue079; # Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\u09F9>\ue07A; # Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
\u09FA>\ue07B; # ISSHAR
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,306 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# TODO: add remaining characters
# Should add variants for Russian-English, Russian-German
# Those can use this as a base, and then remap cases
# like a $hat to ya or ja.
# :: [\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
### WARNING, \u0308 must be added to the generated filters, in both directions ###
# MINIMAL FILTER
:: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;
:: NFD (NFC) ;
$modprime = \u02B9;
$modprime2 = \u02BA;
$grave = \u0300;
$acute = \u0301;
$hat = \u0302;
$breve = \u0306 ;
$dot = \u0307 ;
$caron = \u030C ;
$comma = \u0326 ;
$under = \u0331 ;
# move up so not masked
я <> a $hat ; # CYRILLIC SMALL LETTER YA
Я <> A $hat ; # CYRILLIC CAPITAL LETTER YA
ч <> c $caron ; # CYRILLIC SMALL LETTER CHE
Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE
# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
э <> e $acute; # CYRILLIC SMALL LETTER E
Э <> E $acute; # CYRILLIC CAPITAL LETTER E
є <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE
Є <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
ш <> s $caron ; # CYRILLIC SMALL LETTER SHA
Ш <> S $caron ; # CYRILLIC CAPITAL LETTER SHA
щ <> s $hat ; # CYRILLIC SMALL LETTER SHCHA
Щ <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA
ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE
Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE
# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
ю <> u $hat ; # CYRILLIC SMALL LETTER YU
Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU
і <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
І <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
ј <> j $caron; # CYRILLIC SMALL LETTER JE
Ј <> J $caron; # CYRILLIC CAPITAL LETTER JE
љ <> l $hat ; # CYRILLIC SMALL LETTER LJE
Љ <> L $hat ; # CYRILLIC CAPITAL LETTER LJE
њ <> n $hat ; # CYRILLIC SMALL LETTER NJE
Њ <> N $hat ; # CYRILLIC CAPITAL LETTER NJE
ћ <> c $acute ; # CYRILLIC SMALL LETTER TSHE
Ћ <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE
џ <> d $hat ; # CYRILLIC SMALL LETTER DZHE
Џ <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE
# Normal order
а <> a ; # CYRILLIC SMALL LETTER A
А <> A ; # CYRILLIC CAPITAL LETTER A
ә <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA
Ә <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA
ӕ <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE
Ӕ <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE
б <> b ; # CYRILLIC SMALL LETTER BE
Б <> B ; # CYRILLIC CAPITAL LETTER BE
в <> v ; # CYRILLIC SMALL LETTER VE
В <> V ; # CYRILLIC CAPITAL LETTER VE
ґ <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN
Ґ <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
ғ <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE
Ғ <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
ҕ <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
Ҕ <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
г <> g ; # CYRILLIC SMALL LETTER GHE
Г <> G ; # CYRILLIC CAPITAL LETTER GHE
д <> d; # CYRILLIC SMALL LETTER DE
Д <> D; # CYRILLIC CAPITAL LETTER DE
ђ <> đ ; # CYRILLIC SMALL LETTER DJE
Ђ <> Đ ; # CYRILLIC CAPITAL LETTER DJE
ҙ <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER
Ҙ <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
е <> e ; # CYRILLIC SMALL LETTER IE
Е <> E; # CYRILLIC CAPITAL LETTER IE
ж <> z $caron; # CYRILLIC SMALL LETTER ZHE
Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE
# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
з <> z ; # CYRILLIC SMALL LETTER ZE
З <> Z; # CYRILLIC CAPITAL LETTER ZE
й <> j ; # CYRILLIC SMALL LETTER I
Й <> J ; # CYRILLIC CAPITAL LETTER I
и <> i ; # CYRILLIC SMALL LETTER I
И <> I ; # CYRILLIC CAPITAL LETTER I
к <> k ; # CYRILLIC SMALL LETTER KA
К <> K; # CYRILLIC CAPITAL LETTER KA
# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
л <> l ; # CYRILLIC SMALL LETTER EL
Л <> L; # CYRILLIC CAPITAL LETTER EL
м <> m ; # CYRILLIC SMALL LETTER EM
М <> M ; # CYRILLIC CAPITAL LETTER EM
н <> n ; # CYRILLIC SMALL LETTER EN
Н <> N; # CYRILLIC CAPITAL LETTER EN
# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
о <> o ; # CYRILLIC SMALL LETTER O
О <> O ; # CYRILLIC CAPITAL LETTER O
# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
п <> p ; # CYRILLIC SMALL LETTER PE
П <> P ; # CYRILLIC CAPITAL LETTER PE
# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
р <> r ; # CYRILLIC SMALL LETTER ER
Р <> R ; # CYRILLIC CAPITAL LETTER ER
# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
с <> s ; # CYRILLIC SMALL LETTER ES
С <> S ; # CYRILLIC CAPITAL LETTER ES
# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
т <> t ; # CYRILLIC SMALL LETTER TE
Т <> T ; # CYRILLIC CAPITAL LETTER TE
# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
у <> u ; # CYRILLIC SMALL LETTER U
У <> U ; # CYRILLIC CAPITAL LETTER U
# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
ф <> f ; # CYRILLIC SMALL LETTER EF
Ф <> F ; # CYRILLIC CAPITAL LETTER EF
х <> h ; # CYRILLIC SMALL LETTER HA
Х <> H; # CYRILLIC CAPITAL LETTER HA
# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
ц <> c ; # CYRILLIC SMALL LETTER TSE
Ц <> C; # CYRILLIC CAPITAL LETTER TSE
# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
# ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
# Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
Ъ <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN
ъ <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN
Ь <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN
ь <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN
ы <> y ; # CYRILLIC SMALL LETTER YERU
Ы <> Y ; # CYRILLIC CAPITAL LETTER YERU
# ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
# Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
# Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
# ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
# Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
# ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
# Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
### ӑ <> XXX ; # CYRILLIC SMALL LETTER A
### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
### ӓ <> XXX ; # CYRILLIC SMALL LETTER A
### Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
### ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
### Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
### ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
### Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
### ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
### Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
### ё <> XXX ; # CYRILLIC SMALL LETTER IE
### Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
### ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
### Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
### ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
### Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
### ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
### Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
### ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
### Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
### ѝ <> XXX ; # CYRILLIC SMALL LETTER I
### Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
### ӣ <> XXX ; # CYRILLIC SMALL LETTER I
### Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
### ӥ <> XXX ; # CYRILLIC SMALL LETTER I
### Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
### ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
### Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
### ӧ <> XXX ; # CYRILLIC SMALL LETTER O
### Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
### ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
### Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
### ќ <> XXX ; # CYRILLIC SMALL LETTER KA
### Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
### ӯ <> XXX ; # CYRILLIC SMALL LETTER U
### Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
### ў <> XXX ; # CYRILLIC SMALL LETTER U
### Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
### ӱ <> XXX ; # CYRILLIC SMALL LETTER U
### Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
### ӳ <> XXX ; # CYRILLIC SMALL LETTER U
### Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
### ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
### Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
### ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
### Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
### ӭ <> XXX ; # CYRILLIC SMALL LETTER E
### Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
### ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
### Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
# Completeness
$ignore = [[:Mark:]''] * ;
| k < q ;
| K < Q ;
| u < w ;
| U < W ;
| KS < X } $ignore [:UppercaseLetter:] ;
| KS < [:UppercaseLetter:] $ignore { X ;
| Ks < X ;
| ks < x ;
:: NFC (NFD) ;
# note: a global filter is more efficient, but MUST include all source chars!!
# :: ([\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]);
# MINIMAL FILTER: Latin-Cyrillic
:: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;

View file

@ -0,0 +1,117 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Devanagari-InterIndic
# :: NFD;
#Rules for Decomposed characters
\u0901>\uE001; # SIGN CANDRABINDU
\u0902>\uE002; # SIGN ANUSVARA
\u0903>\uE003; # SIGN VISARGA
\u0904>\uE004; # SIGN SHORT A
\u0905>\uE005; # LETTER A
\u0906>\uE006; # LETTER AA
\u0907>\uE007; # LETTER I
\u0908>\uE008; # LETTER II
\u0909>\uE009; # LETTER U
\u090A>\uE00A; # LETTER UU
\u090B>\uE00B; # LETTER VOCALIC R
\u090C>\uE00C; # LETTER VOCALIC L
\u090D>\uE00D; # LETTER CANDRA E (For representing English sounds)
\u090E>\uE00E; # UNMAPPED LETTER SHORT E(For Southern Scripts)
\u090F>\uE00F; # LETTER E
\u0910>\uE010; # LETTER AI
\u0911>\uE011; # LETTER CANDRA O (For representing English sounds)
\u0912>\uE012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
\u0913>\uE013; # LETTER O
\u0914>\uE014; # LETTER AU
\u0915>\uE015; # LETTER KA
\u0916>\uE016; # LETTER KHA
\u0917>\uE017; # LETTER GA
\u0918>\uE018; # LETTER GHA
\u0919>\uE019; # LETTER NGA
\u091A>\uE01A; # LETTER CA
\u091B>\uE01B; # LETTER CHA
\u091C>\uE01C; # LETTER JA
\u091D>\uE01D; # LETTER JHA
\u091E>\uE01E; # LETTER NYA
\u091F>\uE01F; # LETTER TTA
\u0920>\uE020; # LETTER TTHA
\u0921>\uE021; # LETTER DDA
\u0922>\uE022; # LETTER DDHA
\u0923>\uE023; # LETTER NNA
\u0924>\uE024; # LETTER TA
\u0925>\uE025; # LETTER THA
\u0926>\uE026; # LETTER DA
\u0927>\uE027; # LETTER DHA
\u0928>\uE028; # LETTER NA
\u0929>\uE029;
\u092A>\uE02A; # LETTER PA
\u092B>\uE02B; # LETTER PHA
\u092C>\uE02C; # LETTER BA
\u092D>\uE02D; # LETTER BHA
\u092E>\uE02E; # LETTER MA
\u092F>\uE02F; # LETTER YA
\u0930>\uE030; # LETTER RA
\u0931>\uE031;
\u0932>\uE032; # LETTER LA
\u0933>\uE033; # LETTER LLA
\u0934>\uE034;
\u0935>\uE035; # LETTER VA
\u0936>\uE036; # LETTER SHA
\u0937>\uE037; # LETTER SSA
\u0938>\uE038; # LETTER SA
\u0939>\uE039; # LETTER HA
\u093C>\uE03C; # SIGN NUKTA
\u093D>\uE03D; # SIGN AVAGRAHA
\u093E>\uE03E; # VOWEL SIGN AA
\u093F>\uE03F; # VOWEL SIGN I
\u0940>\uE040; # VOWEL SIGN II
\u0941>\uE041; # VOWEL SIGN U
\u0942>\uE042; # VOWEL SIGN UU
\u0943>\uE043; # VOWEL SIGN VOCALIC R
\u0944>\uE044; # VOWEL SIGN VOCALIC RR
\u0945>\uE045; # VOWEL SIGN CANDRA E
\u0946>\uE046; # UNMAPPED VOWEL SIGN SHORT E
\u0947>\uE047; # VOWEL SIGN E
\u0948>\uE048; # VOWEL SIGN AI
\u0949>\uE049; # VOWEL SIGN CANDRA O
\u094A>\uE04A; # UNMAPPED VOWEL SIGN SHORT O
\u094B>\uE04B; # VOWEL SIGN O
\u094C>\uE04C; # VOWEL SIGN AU
\u094D>\uE04D; # SIGN VIRAMA
\u0950>\uE050; # OM
\u0951>\uE051; # UNMAPPED STRESS SIGN UDATTA
\u0952>\uE052; # UNMAPPED STRESS SIGN ANUDATTA
\u0953>\uE053; # UNMAPPED GRAVE ACCENT
\u0954>\uE054; # UNMAPPED ACUTE ACCENT
\u0958>\uE058;
\u0959>\uE059;
\u095A>\uE05a;
\u095B>\uE05b;
\u095C>\uE05c;
\u095D>\uE05d;
\u095E>\uE05e;
\u095F>\uE05f;
\u0960>\uE060; # LETTER VOCALIC RR
\u0961>\uE061; # LETTER VOCALIC LL
\u0962>\uE062; # VOWEL SIGN VOCALIC L
\u0963>\uE063; # VOWEL SIGN VOCALIC LL
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
\u0966>\uE066; # DIGIT ZERO
\u0967>\uE067; # DIGIT ONE
\u0968>\uE068; # DIGIT TWO
\u0969>\uE069; # DIGIT THREE
\u096A>\uE06A; # DIGIT FOUR
\u096B>\uE06B; # DIGIT FIVE
\u096C>\uE06C; # DIGIT SIX
\u096D>\uE06D; # DIGIT SEVEN
\u096E>\uE06E; # DIGIT EIGHT
\u096F>\uE06F; # DIGIT NINE
\u0970>\uE070; # Devanagari-InterIndic: ABBREVIATION SIGN
# :: NFC (NFD) ;

View file

@ -0,0 +1,271 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Fullwidth-Halfwidth
# Mechanically generated from Unicode Character Database
# IDEOGRAPHIC SPACE then added, and
# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
# multicharacter
ガ<>ガ; # to KATAKANA LETTER GA
ギ<>ギ; # to KATAKANA LETTER GI
グ<>グ; # to KATAKANA LETTER GU
ゲ<>ゲ; # to KATAKANA LETTER GE
ゴ<>ゴ; # to KATAKANA LETTER GO
ザ<>ザ; # to KATAKANA LETTER ZA
ジ<>ジ; # to KATAKANA LETTER ZI
ズ<>ズ; # to KATAKANA LETTER ZU
ゼ<>ゼ; # to KATAKANA LETTER ZE
ゾ<>ゾ; # to KATAKANA LETTER ZO
ダ<>ダ; # to KATAKANA LETTER DA
ヂ<>ヂ; # to KATAKANA LETTER DI
ヅ<>ヅ; # to KATAKANA LETTER DU
デ<>デ; # to KATAKANA LETTER DE
ド<>ド; # to KATAKANA LETTER DO
バ<>バ; # to KATAKANA LETTER BA
パ<>パ; # to KATAKANA LETTER PA
ビ<>ビ; # to KATAKANA LETTER BI
ピ<>ピ; # to KATAKANA LETTER PI
ブ<>ブ; # to KATAKANA LETTER BU
プ<>プ; # to KATAKANA LETTER PU
ベ<>ベ; # to KATAKANA LETTER BE
ペ<>ペ; # to KATAKANA LETTER PE
ボ<>ボ; # to KATAKANA LETTER BO
ポ<>ポ; # to KATAKANA LETTER PO
ヴ<>ヴ; # to KATAKANA LETTER VU
ヷ<>ヷ; # to KATAKANA LETTER VA
ヺ<>ヺ; # to KATAKANA LETTER VO
# single character
<>'!'; # from FULLWIDTH EXCLAMATION MARK
<>'\"'; # from FULLWIDTH QUOTATION MARK
<>'#'; # from FULLWIDTH NUMBER SIGN
<>'$'; # from FULLWIDTH DOLLAR SIGN
<>'%'; # from FULLWIDTH PERCENT SIGN
<>'&'; # from FULLWIDTH AMPERSAND
<>''; # from FULLWIDTH APOSTROPHE
<>'('; # from FULLWIDTH LEFT PARENTHESIS
<>')'; # from FULLWIDTH RIGHT PARENTHESIS
<>'*'; # from FULLWIDTH ASTERISK
<>'+'; # from FULLWIDTH PLUS SIGN
<>','; # from FULLWIDTH COMMA
<>'-'; # from FULLWIDTH HYPHEN-MINUS
<>'.'; # from FULLWIDTH FULL STOP
<>'/'; # from FULLWIDTH SOLIDUS
<>'0'; # from FULLWIDTH DIGIT ZERO
<>'1'; # from FULLWIDTH DIGIT ONE
<>'2'; # from FULLWIDTH DIGIT TWO
<>'3'; # from FULLWIDTH DIGIT THREE
<>'4'; # from FULLWIDTH DIGIT FOUR
<>'5'; # from FULLWIDTH DIGIT FIVE
<>'6'; # from FULLWIDTH DIGIT SIX
<>'7'; # from FULLWIDTH DIGIT SEVEN
<>'8'; # from FULLWIDTH DIGIT EIGHT
<>'9'; # from FULLWIDTH DIGIT NINE
<>':'; # from FULLWIDTH COLON
<>';'; # from FULLWIDTH SEMICOLON
<>'<'; # from FULLWIDTH LESS-THAN SIGN
<>'='; # from FULLWIDTH EQUALS SIGN
<>'>'; # from FULLWIDTH GREATER-THAN SIGN
<>'?'; # from FULLWIDTH QUESTION MARK
<>'@'; # from FULLWIDTH COMMERCIAL AT
<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
_<>'_'; # from FULLWIDTH LOW LINE
<>'`'; # from FULLWIDTH GRAVE ACCENT
<>a; # from FULLWIDTH LATIN SMALL LETTER A
<>b; # from FULLWIDTH LATIN SMALL LETTER B
<>c; # from FULLWIDTH LATIN SMALL LETTER C
<>d; # from FULLWIDTH LATIN SMALL LETTER D
<>e; # from FULLWIDTH LATIN SMALL LETTER E
<>f; # from FULLWIDTH LATIN SMALL LETTER F
<>g; # from FULLWIDTH LATIN SMALL LETTER G
<>h; # from FULLWIDTH LATIN SMALL LETTER H
<>i; # from FULLWIDTH LATIN SMALL LETTER I
<>j; # from FULLWIDTH LATIN SMALL LETTER J
<>k; # from FULLWIDTH LATIN SMALL LETTER K
<>l; # from FULLWIDTH LATIN SMALL LETTER L
<>m; # from FULLWIDTH LATIN SMALL LETTER M
<>n; # from FULLWIDTH LATIN SMALL LETTER N
<>o; # from FULLWIDTH LATIN SMALL LETTER O
<>p; # from FULLWIDTH LATIN SMALL LETTER P
<>q; # from FULLWIDTH LATIN SMALL LETTER Q
<>r; # from FULLWIDTH LATIN SMALL LETTER R
<>s; # from FULLWIDTH LATIN SMALL LETTER S
<>t; # from FULLWIDTH LATIN SMALL LETTER T
<>u; # from FULLWIDTH LATIN SMALL LETTER U
<>v; # from FULLWIDTH LATIN SMALL LETTER V
<>w; # from FULLWIDTH LATIN SMALL LETTER W
<>x; # from FULLWIDTH LATIN SMALL LETTER X
<>y; # from FULLWIDTH LATIN SMALL LETTER Y
<>z; # from FULLWIDTH LATIN SMALL LETTER Z
<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
<>'|'; # from FULLWIDTH VERTICAL LINE
<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
<>'~'; # from FULLWIDTH TILDE
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
<>ノ; # to HALFWIDTH KATAKANA LETTER NO
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
<>; # to HALFWIDTH HANGUL FILLER
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
¢<>'¢'; # from FULLWIDTH CENT SIGN
£<>'£'; # from FULLWIDTH POUND SIGN
¬<>'¬'; # from FULLWIDTH NOT SIGN
 ̄<>'¯'; # from FULLWIDTH MACRON
' '<>' '; # ideographic space (place this after MACRON)
¦<>'¦'; # from FULLWIDTH BROKEN BAR
¥<>'¥'; # from FULLWIDTH YEN SIGN
₩<>₩; # from FULLWIDTH WON SIGN
│<>; # to HALFWIDTH FORMS LIGHT VERTICAL
'←'<>'←'; # to HALFWIDTH LEFTWARDS ARROW
↑<>↑; # to HALFWIDTH UPWARDS ARROW
'→'<>'→'; # to HALFWIDTH RIGHTWARDS ARROW
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
■<>■; # to HALFWIDTH BLACK SQUARE
○<>○; # to HALFWIDTH WHITE CIRCLE
# eof

View file

@ -0,0 +1,345 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Rules are predicated on running NFD first, and NFC afterwards
# :: [\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
# MINIMAL FILTER GENERATED FOR: Greek-Latin
:: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;
:: NFD (NFC) ;
# TEST CASES
# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
# ᾳ ῃ ῳ ὃ ὄ
# ὠς ὡς ὢς ὣς
# Ὠς Ὡς Ὢς Ὣς
# ὨΣ ὩΣ ὪΣ ὫΣ
# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]];
$glower = [[:greek:] & [:Ll:]];
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$accent = [:M:] ;
# NOTE: restrict to just the Greek & Latin accents that we care about
# TODO: broaden out once interation is fixed
$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
$macron = \u0304 ;
$ddot = \u0308 ;
$ddotmac = [$ddot$macron];
$lcgvowel = [αεηιουω] ;
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
$gvowel = [$lcgvowel $ucgvowel] ;
$lcgvowelC = [$lcgvowel $accent] ;
$evowel = [aeiouyAEIOUY];
$evowel2 = [iuyIUY];
$vowel = [ $evowel $gvowel] ;
$gammaLike = [ΓΚΞΧγκξχϰ] ;
$egammaLike = [GKXCgkxc] ;
$smooth = ̓ ;
$rough = ̔ ;
$iotasub = ͅ ;
$evowel_i = [$evowel-[iI]] ;
$evowel2_i = [uyUY];
$underbar = \u0331;
$afterLetter = [:L:] [[:M:]\']* ;
$beforeLetter = [[:M:]\']* [:L:] ;
$beforeLower = $accent * $lower ;
$notLetter = [^[:L:][:M:]] ;
$under = ̱;
# Fix punctuation
# preserve original
\: <> \: $under ;
\? <> \? $under ;
\; <> \? ;
· <> \: ;
# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
\u0342 <> \u0302 ;
# IOTA: convert iota subscript to iota
# first make previous alpha long!
$accent_minus = [[$accent]-[$iotasub$macron]];
Α } $accent_minus * $iotasub > | Α $macron ;
α } $accent_minus * $iotasub > | α $macron ;
# now convert to uppercase if after uppercase, ow to lowercase
$upper $accent * { $iotasub > I ;
$iotasub > i ;
| $1 $iotasub < ($evowel $macron $accentMinus *) i ;
| $1 $iotasub < ($evowel $macron $accentMinus *) I ;
# BREATHING
# Convert rough breathing to h, and move before letters.
# Make A ` x = > H a x
Α ($macron?) $rough } $beforeLower > H | α $1;
Ε $rough } $beforeLower > H | ε;
Η $rough } $beforeLower > H | η ;
Ι ($ddot?) $rough } $beforeLower > H | ι $1;
Ο $rough } $beforeLower > H | ο ;
Υ $rough } $beforeLower > H | υ ;
Ω ($ddot?) $rough } $beforeLower > H | ω $1;
# Make A x ` = > H a x
Α ($glower $macron?) $rough > H | α $1 ;
Ε ($glower) $rough > H | ε $1 ;
Η ($glower) $rough > H | η $1 ;
Ι ($glower $ddot?) $rough > H | ι $1 ;
Ο ($glower) $rough > H | ο $1 ;
Υ ($glower) $rough > H | υ $1 ;
Ω ($glower $ddot?) $rough > H | ω $1 ;
#Otherwise, make x ` into h x and X ` into H X
($lcgvowel + $ddotmac? ) $rough > h | $1 ;
($gvowel + $ddotmac? ) $rough > H | $1 ;
# Go backwards with H
| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;
| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;
| $1 $rough < h ($evowel $macron? $ddot?) ;
| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;
| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;
# titlecase, have to fix individually
# in the future, we should add &uppercase() to make this easier
| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;
| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;
| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;
| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;
| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;
| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;
| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;
| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;
| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;
| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;
| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;
| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;
| A $1 $rough < H a ($macron? $ddot? ) ;
| E $1 $rough < H e ($macron? $ddot? ) ;
| I $1 $rough < H i ($macron? $ddot? ) ;
| O $1 $rough < H o ($macron? $ddot? ) ;
| U $1 $rough < H u ($macron? $ddot? ) ;
| Y $1 $rough < H y ($macron? $ddot? ) ;
# Now do smooth
#delete smooth breathing for Latin
$smooth > ;
# insert in Greek
# the assumption is that all Marks are on letters.
| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;
| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
# TODO: preserve smooth/rough breathing if not
# on initial vowel sequence
# need to have these up here so the rules don't mask
# remove now superfluous macron when returning
Α < A $macron ;
α < a $macron ;
η <> e $macron ;
Η <> E $macron ;
φ <> ph ;
Ψ } $beforeLower <> Ps ;
Ψ <> PS ;
Φ } $beforeLower <> Ph ;
Φ <> PH ;
ψ <> ps ;
ω <> o $macron ;
Ω <> O $macron;
# NORMAL
α <> a ;
Α <> A ;
β <> b ;
Β <> B ;
γ } $gammaLike <> n } $egammaLike ;
γ <> g ;
Γ } $gammaLike <> N } $egammaLike ;
Γ <> G ;
δ <> d ;
Δ <> D ;
ε <> e ;
Ε <> E ;
ζ <> z ;
Ζ <> Z ;
θ <> th ;
Θ } $beforeLower <> Th ;
Θ <> TH ;
ι <> i ;
Ι <> I ;
κ <> k ;
Κ <> K ;
λ <> l ;
Λ <> L ;
μ <> m ;
Μ <> M ;
ν } $gammaLike > n\' ;
ν <> n ;
Ν } $gammaLike <> N\' ;
Ν <> N ;
ξ <> x ;
Ξ <> X ;
ο <> o ;
Ο <> O ;
π <> p ;
Π <> P ;
ρ $rough <> rh;
Ρ $rough } $beforeLower <> Rh ;
Ρ $rough <> RH ;
ρ <> r ;
Ρ <> R ;
# insert separator before things that turn into s
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
# special S variants
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
# underbar means exception
# before a letter, initial
ς } $beforeLetter <> s $underbar } $beforeLetter;
σ } $beforeLetter <> s } $beforeLetter;
# otherwise, after a letter = final
$afterLetter { σ <> $afterLetter { s $underbar;
$afterLetter { ς <> $afterLetter { s ;
# otherwise (isolated) = initial
ς <> s $underbar;
σ <> s ;
# [Pp] { Σ <> \'S ;
Σ <> S ;
τ <> t ;
Τ <> T ;
$vowel {υ } <> u ;
υ <> y ;
$vowel { Υ <> U ;
Υ <> Y ;
χ <> ch ;
Χ } $beforeLower <> Ch ;
Χ <> CH ;
# Completeness for ASCII
$ignore = [[:Mark:]''] * ;
| k < c ;
| ph < f ;
| i < j ;
| k < q ;
| b < v } $vowel ;
| b < w } $vowel;
| u < v ;
| u < w;
| K < C ;
| Ph < F ;
| I < J ;
| K < Q ;
| B < V } $vowel ;
| B < W } $vowel ;
| U < V ;
| U < W ;
$rough } $ignore [:UppercaseLetter:] > H ;
$ignore [:UppercaseLetter:] { $rough > H ;
$rough < H ;
$rough <> h ;
# Completeness for Greek
ϐ > | β ;
ϑ > | θ ;
ϒ > | Υ ;
ϕ > | φ ;
ϖ > | π ;
ϰ > | κ ;
ϱ > | ρ ;
ϲ > | σ ;
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
ϳ > j ;
ϴ > | Θ ;
ϵ > | ε ;
µ > | μ ;
ͺ > i;
# delete any trailing ' marks used for roundtripping
< [Ππ] { \' } [Ss] ;
< [Νν] { \' } $egammaLike ;
::NFC (NFD) ;
# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
# ([\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ;
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
:: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;

View file

@ -0,0 +1,252 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# For modern Greek, based on UNGEGN rules.
# Rules are predicated on running NFD first, and NFC afterwards
# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
# WARNING: need to add accents to both filters ###
# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ;
::NFD (NFC) ;
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]] ;
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$accent = [[:Mn:][:Me:]] ;
$macron = ̄ ;
$ddot = ̈ ;
$lcgvowel = [αεηιουω] ;
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
$gvowel = [$lcgvowel $ucgvowel] ;
$lcgvowelC = [$lcgvowel $accent] ;
$evowel = [aeiouyAEIOUY];
$vowel = [ $evowel $gvowel] ;
$beforeLower = $accent * $lower ;
$gammaLike = [ΓΚΞΧγκξχϰ] ;
$egammaLike = [GKXCgkxc] ;
$smooth = ̓ ;
$rough = ̔ ;
$iotasub = ͅ ;
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
$under = ̱;
$caron = ̌;
$afterLetter = [:L:] [\'$accent]* ;
$beforeLetter = [\'$accent]* [:L:] ;
# Fix punctuation
# preserve orginal
\: <> \: $under ;
\? <> \? $under ;
\; <> \? ;
· <> \: ;
# Fix any ancient characters that creep in
͂ > ́ ;
̂ > ́ ;
̀ > ́ ;
$smooth > ;
$rough > ;
$iotasub > ;
ͺ > ;
# need to have these up here so the rules don't mask
η <> i $under ;
Η <> I $under ;
Ψ } $beforeLower <> Ps ;
Ψ <> PS ;
ψ <> ps ;
ω <> o $under ;
Ω <> O $under;
# at begining or end of word, convert mp to b
[^[:L:]$accent] { μπ > b ;
μπ } [^[:L:]$accent] > b ;
[^[:L:]$accent] { [Μμ][Ππ] > B ;
[Μμ][Ππ] } [^[:L:]$accent] > B ;
μπ < b ;
Μπ < B } $beforeLower ;
ΜΠ < B ;
# handle diphthongs ending with upsilon
ου <> ou ;
ΟΥ <> OU ;
Ου <> Ou ;
οΥ <> oU ;
$fmaker = [aeiAEI] $under ? ;
$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;
υ $1 < ( $shiftForwardVowels )* v $under ;
$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;
υ $1 < ( $shiftForwardVowels )* f $under ;
$fmaker { Υ } $softener <> V $under ;
$fmaker { Υ <> U $under ;
υ <> y ;
Υ <> Y ;
# NORMAL
α <> a ;
Α <> A ;
β <> v ;
Β <> V ;
γ } $gammaLike <> n } $egammaLike ;
γ <> g ;
Γ } $gammaLike <> N } $egammaLike ;
Γ <> G ;
δ <> d ;
Δ <> D ;
ε <> e ;
Ε <> E ;
ζ <> z ;
Ζ <> Z ;
θ <> th ;
Θ } $beforeLower <> Th ;
Θ <> TH ;
ι <> i ;
Ι <> I ;
κ <> k ;
Κ <> K ;
λ <> l ;
Λ <> L ;
μ <> m ;
Μ <> M ;
ν } $gammaLike > n\' ;
ν <> n ;
Ν } $gammaLike <> N\' ;
Ν <> N ;
ξ <> x ;
Ξ <> X ;
ο <> o ;
Ο <> O ;
π <> p ;
Π <> P ;
ρ <> r ;
Ρ <> R ;
# insert separator before things that turn into s
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
# special S variants
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
# Caron means exception
# before a letter, initial
ς } $beforeLetter <> s $under } $beforeLetter;
σ } $beforeLetter <> s } $beforeLetter;
# otherwise, after a letter = final
$afterLetter { σ <> $afterLetter { s $under;
$afterLetter { ς <> $afterLetter { s ;
# otherwise (isolated) = initial
ς <> s $under;
σ <> s ;
# [Pp] { Σ <> \'S ;
Σ <> S ;
τ <> t ;
Τ <> T ;
φ <> f ;
Φ <> F ;
χ <> ch ;
Χ } $beforeLower <> Ch ;
Χ <> CH ;
# Completeness for ASCII
# $ignore = [[:Mark:]''] * ;
| ch < h ;
| k < c ;
| i < j ;
| k < q ;
| b < u } $vowel ;
| b < w } $vowel ;
| y < u ;
| y < w ;
| Ch < H ;
| K < C ;
| I < J ;
| K < Q ;
| B < W } $vowel ;
| B < U } $vowel ;
| Y < W ;
| Y < U ;
# Completeness for Greek
ϐ > | β ;
ϑ > | θ ;
ϒ > | Υ ;
ϕ > | φ ;
ϖ > | π ;
ϰ > | κ ;
ϱ > | ρ ;
ϲ > | σ ;
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
ϳ > j ;
ϴ > | Θ ;
ϵ > | ε ;
µ > | μ ;
# delete any trailing ' marks used for roundtripping
< [Ππ] { \' } [Ss] ;
< [Νν] { \' } $egammaLike ;
::NFC (NFD) ;
# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;

View file

@ -0,0 +1,91 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Gujarati-InterIndic
#:: NFD (NFC) ;
\u0a81>\ue001; # SIGN CANDRABINDU
\u0a82>\ue002; # SIGN ANUSVARA
\u0a83>\ue003; # SIGN VISARGA
\u0a85>\ue005; # LETTER A
\u0a86>\ue006; # LETTER AA
\u0a87>\ue007; # LETTER I
\u0a88>\ue008; # LETTER II
\u0a89>\ue009; # LETTER U
\u0a8a>\ue00a; # LETTER UU
\u0a8b>\ue00b; # LETTER VOCALIC R
\u0a8c>\ue00c; # LETTER VOCALLIC L
\u0a8d>\ue00d; # VOWEL CANDRA E
\u0a8f>\ue00f; # LETTER E
\u0a90>\ue010; # LETTER AI
\u0a91>\ue011; # VOWEL CANDRA O
\u0a93>\ue013; # LETTER O
\u0a94>\ue014; # LETTER AU
\u0a95>\ue015; # LETTER KA
\u0a96>\ue016; # LETTER KHA
\u0a97>\ue017; # LETTER GA
\u0a98>\ue018; # LETTER GHA
\u0a99>\ue019; # LETTER NGA
\u0a9a>\ue01a; # LETTER CA
\u0a9b>\ue01b; # LETTER CHA
\u0a9c>\ue01c; # LETTER JA
\u0a9d>\ue01d; # LETTER JHA
\u0a9e>\ue01e; # LETTER NYA
\u0a9f>\ue01f; # LETTER TTA
\u0aa0>\ue020; # LETTER TTHA
\u0aa1>\ue021; # LETTER DDA
\u0aa2>\ue022; # LETTER DDHA
\u0aa3>\ue023; # LETTER NNA
\u0aa4>\ue024; # LETTER TA
\u0aa5>\ue025; # LETTER THA
\u0aa6>\ue026; # LETTER DA
\u0aa7>\ue027; # LETTER DHA
\u0aa8>\ue028; # LETTER NA
\u0aaa>\ue02a; # LETTER PA
\u0aab>\ue02b; # LETTER PHA
\u0aac>\ue02c; # LETTER BA
\u0aad>\ue02d; # LETTER BHA
\u0aae>\ue02e; # LETTER MA
\u0aaf>\ue02f; # LETTER YA
\u0ab0>\ue030; # LETTER RA
\u0ab2>\ue032; # LETTER LA
\u0ab3>\ue033; # LETTER LLA
\u0ab5>\ue035; # LETTER VA
\u0ab6>\ue036; # LETTER SHA
\u0ab7>\ue037; # LETTER SSA
\u0ab8>\ue038; # LETTER SA
\u0ab9>\ue039; # LETTER HA
\u0abc>\ue03c; # SIGN NUKTA
\u0abd>\ue03d; # SIGN AVAGRAHA
\u0abe>\ue03e; # VOWEL SIGN AA
\u0abf>\ue03f; # VOWEL SIGN I
\u0ac0>\ue040; # VOWEL SIGN II
\u0ac1>\ue041; # VOWEL SIGN U
\u0ac2>\ue042; # VOWEL SIGN UU
\u0ac3>\ue043; # VOWEL SIGN VOCALIC R
\u0ac4>\ue044; # VOWEL SIGN VOCALIC RR
\u0ac5>\ue045; # VOWEL SIGN CANDRA E
\u0ac7>\ue047; # VOWEL SIGN E
\u0ac8>\ue048; # VOWEL SIGN AI
\u0ac9>\ue049; # VOWEL SIGN CANDRA O
\u0acb>\ue04b; # VOWEL SIGN O
\u0acc>\ue04c; # VOWEL SIGN AU
\u0acd>\ue04d; # SIGN VIRAMA
\u0ad0>\ue050; # OM
\u0ae0>\ue060; # LETTER VOCALIC RR
\u0ae1>\ue061; # LETTER VOCALIC LL
\u0ae6>\ue066; # DIGIT ZERO
\u0ae7>\ue067; # DIGIT ONE
\u0ae8>\ue068; # DIGIT TWO
\u0ae9>\ue069; # DIGIT THREE
\u0aea>\ue06a; # DIGIT FOUR
\u0aeb>\ue06b; # DIGIT FIVE
\u0aec>\ue06c; # DIGIT SIX
\u0aed>\ue06d; # DIGIT SEVEN
\u0aee>\ue06e; # DIGIT EIGHT
\u0aef>\ue06f; # DIGIT NINE
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,95 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Gurmukhi-InterIndic
#:: NFD (NFC) ;
#\u0A16\u0A3C>\uE059; # LETTER KHHA
#\u0A17\u0A3C>\uE05A; # LETTER GHHA
#\u0A1C\u0A3C>\uE05B; # LETTER ZA
#\u0A38\u0A3C>\uE036; # LETTER SHA
#\u0A32\u0A3C>\uE033; # LETTER LLA
#\u0A2B\u0A3C>\uE05E; # LETTER FA
\u0A01>\ue001; # SIGN CHANDRABINDU
\u0A02>\uE002; # SIGN BINDI
\u0A05>\uE005; # LETTER A
\u0A06>\uE006; # LETTER AA
\u0A07>\uE007; # LETTER I
\u0A08>\uE008; # LETTER II
\u0A09>\uE009; # LETTER U
\u0A0A>\uE00A; # LETTER UU
\u0A0C>\uE032; # FALLBACK : VOCALLIC LA
\u0A0F>\uE00F; # LETTER EE
\u0A10>\uE010; # LETTER AI
\u0A13>\uE013; # LETTER OO
\u0A14>\uE014; # LETTER AU
\u0A15>\uE015; # LETTER KA
\u0A16>\uE016; # LETTER KHA
\u0A17>\uE017; # LETTER GA
\u0A18>\uE018; # LETTER GHA
\u0A19>\uE019; # LETTER NGA
\u0A1A>\uE01A; # LETTER CA
\u0A1B>\uE01B; # LETTER CHA
\u0A1C>\uE01C; # LETTER JA
\u0A1D>\uE01D; # LETTER JHA
\u0A1E>\uE01E; # LETTER NYA
\u0A1F>\uE01F; # LETTER TTA
\u0A20>\uE020; # LETTER TTHA
\u0A21>\uE021; # LETTER DDA
\u0A22>\uE022; # LETTER DDHA
\u0A23>\uE023; # LETTER NNA
\u0A24>\uE024; # LETTER TA
\u0A25>\uE025; # LETTER THA
\u0A26>\uE026; # LETTER DA
\u0A27>\uE027; # LETTER DHA
\u0A28>\uE028; # LETTER NA
\u0A2A>\uE02A; # LETTER PA
\u0A2B>\uE02B; # LETTER PHA
\u0A2C>\uE02C; # LETTER BA
\u0A2D>\uE02D; # LETTER BHA
\u0A2E>\uE02E; # LETTER MA
\u0A2F>\uE02F; # LETTER YA
\u0A30>\uE030; # LETTER RA
\u0A32>\uE032; # LETTER LA
\u0a33>\uE033; # FALLBACK
\u0A35>\uE035; # LETTER VA
\u0a36>\ue036;
\u0A38\0a3c>\ue036; # FALLBACK
\u0A38>\uE038; # LETTER SA
\u0A39>\uE039; # LETTER HA
\u0A3C>\uE03C; # SIGN NUKTA
\u0A3E>\uE03E; # VOWEL SIGN AA
\u0A3F>\uE03F; # VOWEL SIGN I
\u0A40>\uE040; # VOWEL SIGN II
\u0A41>\uE041; # VOWEL SIGN U
\u0A42>\uE042; # VOWEL SIGN UU
\u0A47>\uE047; # VOWEL SIGN EE
\u0A48>\uE048; # VOWEL SIGN AI
\u0A4B>\uE04B; # VOWEL SIGN OO
\u0A4C>\uE04C; # VOWEL SIGN AU
\u0A4D>\uE04D; # SIGN VIRAMA
\u0A5C>\uE05C; # LETTER RRA
\u0A66>\uE066; # DIGIT ZERO
\u0A67>\uE067; # DIGIT ONE
\u0A68>\uE068; # DIGIT TWO
\u0A69>\uE069; # DIGIT THREE
\u0A6A>\uE06A; # DIGIT FOUR
\u0A6B>\uE06B; # DIGIT FIVE
\u0A6C>\uE06C; # DIGIT SIX
\u0A6D>\uE06D; # DIGIT SEVEN
\u0A6E>\uE06E; # DIGIT EIGHT
\u0A6F>\uE06F; # DIGIT NINE
\u0A70>\uE07C; # TIPPI
\u0A71>\uE07D; # ADDAK
\u0A72>\uE07E; # IRI
\u0A73>\uE07F; # URA
\u0A74>\uE080; # EK ONKAR
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
# :: NFC (NFD) ;
# eof

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,24 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Only intended for internal use
:: fullwidth-halfwidth;
。 > '.';
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
$initialPunct = [:Ps:][:Pi:];
# add space between any Han or terminal punctuation and letters, and
# between letters and Han or initial punct
[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;
[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;
# remove spacing between ideographs and other letters
< [:Ideographic:] { ' ' } [:Letter:] ;
< [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;

View file

@ -0,0 +1,109 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Transliteration table for Hebrew
# Based on the UNGEGN table at:
# http://www.eki.ee/wgrs/rom1_he.pdf
#
# Exceptions:
# - Accents are added to disambiguate letters
# - Combinations of dagesh, shin/sin dot that produce different
# letters are not yet encoded.
#
# To test, open:
# http://oss.software.ibm.com/cgi-bin/icu/tr
# Click Edit, paste in this file, Save As hebrew-latin/XXX
# (where XXX is a username)
# Now go back to the main window, and try it out.
# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
# Paste in hebrew text in Input, and hit Transliterate.
#
# For more information, see"
# http://oss.software.ibm.com/icu/userguide/Transliteration.html
:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;
:: nfkd (nfc) ;
$letterAfter = [:M:]* [:L:] ;
# move longer items here to avoid masking
ח <> ẖ ;
צ <> ẕ } $letterAfter;
ץ <> ẕ ;
ש <> ş ;
ת <> ţ ;
א <> ʼ ;
ב <> b ;
ג <> g ;
ד <> d ;
ה <> h ;
ו <> w ;
ז <> z ;
ט <> t ;
י <> y ;
כ <> k } $letterAfter;
ך <> k ;
ל <> l ;
מ <> m } $letterAfter;
ם <> m ;
נ <> n } $letterAfter;
ן <> n ;
ס <> s ;
ע <> ʻ ;
פ <> p } $letterAfter;
ף <> p ;
ק <> q ;
ר <> r ;
װ > | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV
ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD
ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD
ּ <> ̇ ; # dagesh just goes to overdot for now
ׁ <> ̌ ; # shin dot -> sh
ׂ <> ̂ ; # sin dot -> s
# points
$above = [^[:ccc=0:][:ccc=230:]]*;
‎ֲ‎ > à ;
‎ֲ‎ $1< a ($above) ̀;
‎ָ‎ > á ;
‎ָ‎ $1 < a ($above) ́;
‎ֱ‎ > è ;
‎ֱ‎ $1 < e ($above) ̀;
‎ֵ‎ > é ;
‎ֵ‎ $1 < e ($above) ́;
‎ְ‎ > e ̆ ;
‎ְ‎ $1 < e ($above) ̆;
‎ֹ‎ > ò ;
‎ֹ‎ $1 < o ($above) ̀;
ִ <> i ;
ֻ <> u ;
ַ <> a ;
ֶ <> e ;
ֳ <> o ;
\u05BF <> ̄ ;
# fallbacks
ק < c ;
פ < f } $letterAfter;
ף < f ;
ז < j ;
ו < v ;
כס < x ;
:: (lower);
:: nfc (nfd) ;
:: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);

View file

@ -0,0 +1,207 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# note: a global filter is more efficient, but MUST include all source chars
:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
:: NFKC ();
# Hiragana-Katakana
# This is largely a one-to-one mapping, but it has a
# few kinks:
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
# Hiragana equivalents. We use Hiragana wa/wi/we/wo
# (308F-3092) with a voicing mark (3099), which is
# semantically equivalent. However, this is a non-
# roundtripping transformation.
# 2. The Katakana small ka/ke (30F5,30F6) have no
# Hiragana equiavlents. We convert them to normal
# Hiragana ka/ke (304B,3051). This is a one-way
# information-losing transformation and precludes
# round-tripping of 30F5 and 30F6.
# 3. The combining marks 3099-309C are in the Hiragana
# block, but they apply to Katakana as well, so we
# leave them untouched.
# 4. The Katakana prolonged sound mark 30FC doubles the
# preceding vowel. This is a one-way information-
# losing transformation from Katakana to Hiragana.
# 5. The Katakana middle dot separates words in foreign
# expressions; we leave this unmodified.
# The above points preclude successful round-trip
# transformations of arbitrary input text. However,
# they provide naturalistic results that should conform
# to user expectations.
# Combining equivalents va/vi/ve/vo
わ゙ <> ヷ;
ゐ゙ <> ヸ;
ゑ゙ <> ヹ;
を゙ <> ヺ;
# One-to-one mappings, main block
# 3041:3094 <> 30A1:30F4
# 309D,E <> 30FD,E
ぁ <> ァ;
あ <> ア;
ぃ <> ィ;
い <> イ;
ぅ <> ゥ;
う <> ウ;
ぇ <> ェ;
え <> エ;
ぉ <> ォ;
お <> オ;
か <> カ;
が <> ガ;
き <> キ;
ぎ <> ギ;
く <> ク;
ぐ <> グ;
け <> ケ;
げ <> ゲ;
こ <> コ;
ご <> ゴ;
さ <> サ;
ざ <> ザ;
し <> シ;
じ <> ジ;
す <> ス;
ず <> ズ;
せ <> セ;
ぜ <> ゼ;
そ <> ソ;
ぞ <> ゾ;
た <> タ;
だ <> ダ;
ち <> チ;
ぢ <> ヂ;
っ <> ッ;
つ <> ツ;
づ <> ヅ;
て <> テ;
で <> デ;
と <> ト;
ど <> ド;
な <> ナ;
に <> ニ;
ぬ <> ヌ;
ね <> ネ;
の <> ;
は <> ハ;
ば <> バ;
ぱ <> パ;
ひ <> ヒ;
び <> ビ;
ぴ <> ピ;
ふ <> フ;
ぶ <> ブ;
ぷ <> プ;
へ <> ヘ;
べ <> ベ;
ぺ <> ペ;
ほ <> ホ;
ぼ <> ボ;
ぽ <> ポ;
ま <> マ;
み <> ミ;
む <> ム;
め <> メ;
も <> モ;
ゃ <> ャ;
や <> ヤ;
ゅ <> ュ;
ゆ <> ユ;
ょ <> ョ;
よ <> ヨ;
ら <> ラ;
り <> リ;
る <> ル;
れ <> レ;
ろ <> ロ;
ゎ <> ヮ;
わ <> ワ;
ゐ <> ヰ;
ゑ <> ヱ;
を <> ヲ;
ん <> ン;
ゔ <> ヴ;
ゝ <> ヽ;
ゞ <> ヾ;
# One-way Katakana-Hiragana xform of small K ka/ke to
# normal H ka/ke.
か < ヵ;
け < ヶ;
# Katakana followed by a prolonged sound mark 30FC has
# its final vowel doubled. This is a Katakana-Hiragana
# one-way information-losing transformation. We
# include the small Katakana (e.g., small A 3041) and
# do not distinguish them from their large
# counterparts. It doesn't make sense to double a
# small counterpart vowel as a small Hiragana vowel, so
# we don't do so. In natural text this should never
# occur anyway. If a 30FC is seen without a preceding
# vowel sound (e.g., after n 30F3) we do not change it.
### $long = ー;
# The following categories are Hiragana, not Katakana
# as might be expected, since by the time we get to the
# 30FC, the preceding character will have already been
# transformed to Hiragana.
# {The following mechanically generated from the
# Unicode 3.0 data:}
$xa = [ \
ぁ あ か が さ ざ \
た だ な は ば ぱ \
ま ゃ や ら ゎ わ \
];
$xi = [ \
ぃ い き ぎ し じ \
ち ぢ に ひ び ぴ \
み り ゐ \
];
$xu = [ \
ぅ う く ぐ す ず \
っ つ づ ぬ ふ ぶ \
ぷ む ゅ ゆ る ゔ \
];
$xe = [ \
ぇ え け げ せ ぜ \
て で ね へ べ ぺ \
め れ ゑ \
];
$xo = [ \
ぉ お こ ご そ ぞ \
と ど の ほ ぼ ぽ \
も ょ よ ろ を \
];
あ < $xa {ー};
い < $xi {ー};
う < $xu {ー};
え < $xe {ー};
お < $xo {ー};
:: (NFKC) ;
# note: a global filter is more efficient, but MUST include all source chars!!
:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
# eof

View file

@ -0,0 +1,14 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
:: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ;
:: NFD ;
:: Hiragana-Katakana;
:: Katakana-Latin;
:: NFC ;
:: (Lower) ;
:: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ;

View file

@ -0,0 +1,147 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Bengali
#:: NFD (NFC) ;
\uE001>\u0981; # SIGN CANDRABINDU
\uE002>\u0982; # SIGN ANUSVARA
\uE003>\u0983; # SIGN VISARGA
\uE004>\u0985; # FALLBACK TO LETTER A
\uE005>\u0985; # LETTER A
\uE006>\u0986; # LETTER AA
\uE007>\u0987; # LETTER I
\uE008>\u0988; # LETTER II
\uE009>\u0989; # LETTER U
\uE00A>\u098A; # LETTER UU
\uE00B>\u098B; # LETTER VOCALIC R
\uE00C>\u098C; # LETTER VOCALIC L
\uE00D>\u098F; # FALLBACK
\uE00E>\u098F; # FALLBACK
\uE00F>\u098F; # LETTER E
\uE010>\u0990; # LETTER AI
\uE011>\u0993; # FALLBACK
\uE012>\u0993; # FALLBACK
\uE013>\u0993; # LETTER O
\uE014>\u0994; # LETTER AU
\uE015>\u0995; # LETTER KA
\uE016>\u0996; # LETTER KHA
\uE017>\u0997; # LETTER GA
\uE018>\u0998; # LETTER GHA
\uE019>\u0999; # LETTER NGA
\uE01A>\u099A; # LETTER CA
\uE01B>\u099B; # LETTER CHA
\uE01C>\u099C; # LETTER JA
\uE01D>\u099D; # LETTER JHA
\uE01E>\u099E; # LETTER NYA
\uE01F>\u099F; # LETTER TTA
\uE020>\u09A0; # LETTER TTHA
\uE021>\u09A1; # LETTER DDA
\uE022>\u09A2; # LETTER DDHA
\uE023>\u09A3; # LETTER NNA
\uE024>\u09A4; # LETTER TA
\uE025>\u09A5; # LETTER THA
\uE026>\u09A6; # LETTER DA
\uE027>\u09A7; # LETTER DHA
\uE028>\u09A8; # LETTER NA
\uE029>\u09A8\u09BC; # REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
\uE02A>\u09AA; # LETTER PA
\uE02B>\u09AB; # LETTER PHA
\uE02C>\u09AC; # LETTER BA
\uE02D>\u09AD; # LETTER BHA
\uE02E>\u09AE; # LETTER MA
\uE02F>\u09AF; # LETTER YA
\uE030>\u09B0; # LETTER RA
\uE031>\u09B0\u09BC; # FALLBACK to RA
\uE032>\u09B2; # LETTER LA
\uE033>\u09B2; # REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
\uE034>\u09B2; # REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
\uE035>\u09AC; # REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
\uE036>\u09B6; # LETTER SHA
\uE037>\u09B7; # LETTER SSA
\uE038>\u09B8; # LETTER SA
\uE039>\u09B9; # LETTER HA
\uE03C>\u09BC; # SIGN NUKTA
\uE03D>\u09bd; # SIGN AVAGRAHA
\uE03E>\u09BE; # VOWEL SIGN AA
\uE03F>\u09BF; # VOWEL SIGN I
\uE040>\u09C0; # VOWEL SIGN II
\uE041>\u09C1; # VOWEL SIGN U
\uE042>\u09C2; # VOWEL SIGN UU
\uE043>\u09C3; # VOWEL SIGN VOCALIC R
\uE044>\u09C4; # VOWEL SIGN VOCALIC RR
\uE045>\u09C7; # REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
\uE046>\u09C7; # FALLBACK
\uE047>\u09C7; # VOWEL SIGN E
\uE048>\u09C8; # VOWEL SIGN AI
\uE049>\u09C7\u09BE; # REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
\uE04A>\u09C7\u09BE; # FALLBACK
\uE04B>\u09C7\u09BE; # VOWEL SIGN O
\uE04C>\u09C7\u09D7; # VOWEL SIGN AU
\uE04D>\u09CD; # SIGN VIRAMA
\uE050>\u0993\u0982; # InterIndic-Bengali: OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\uE055>; # LENGTH MARK
\uE056>\u09C8; # REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
\uE057>\u09D7; # AU LENGTH MARK
\uE058>\u0995\u09BC; # FALLBACK
\uE059>\u0996\u09BC; # REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
\uE05A>\u0997\u09BC; # REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
\uE05B>\u099C\u09BC; # REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
\uE05C>\u09A1\u09BC; # FALLBACK
\uE05D>\u09A2\u09BC; # LETTER RHA
\uE05E>\u09AB\u09BC; # REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
\uE05F>\u09AF\u09BC; # LETTER YYA
\uE060>\u09E0; # LETTER VOCALIC RR
\uE061>\u09E1; # LETTER VOCALIC LL
\uE062>\u09E2; # VOWEL SIGN VOCALIC L
\uE063>\u09E3; # VOWEL SIGN VOCALIC LL
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\uE066>\u09E6; # DIGIT ZERO
\uE067>\u09E7; # DIGIT ONE
\uE068>\u09E8; # DIGIT TWO
\uE069>\u09E9; # DIGIT THREE
\uE06A>\u09EA; # DIGIT FOUR
\uE06B>\u09EB; # DIGIT FIVE
\uE06C>\u09EC; # DIGIT SIX
\uE06D>\u09ED; # DIGIT SEVEN
\uE06E>\u09EE; # DIGIT EIGHT
\uE06F>\u09EF; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u09F0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u09F1; # LETTER RA WITH LOWER DIAGONAL
\ue073>\u09F2; # RUPEE MARK
\ue074>\u09F3; # RUPEE SIGN
\ue075>\u09F4; # CURRENCY NUMERATOR ONE
\ue076>\u09F5; # CURRENCY NUMERATOR TWO
\ue077>\u09F6; # CURRENCY NUMERATOR THREE
\ue078>\u09F7; # CURRENCY NUMERATOR FOUR
\ue079>\u09F8; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>\u09F9; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>\u09FA; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u09AC; # FALLBACK FOR ORIYA LETTER WA
0 > \u09E6; # FALLBACK FOR TAMIL
1 > \u09E7;
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,158 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Devanagari
#:: NFD (NFC) ;
#Rules for Decomposed characters
\ue028\ue03c > \u0929; #\ue029
\ue030\ue03c > \u0931; #\ue031
\ue033\ue03c > \u0934; #\ue034
\ue015\ue03c > \u0958; #\ue058 LETTER QA (For Urdu)
\ue016\ue03c > \u0959; #\ue059 LETTER KHHA (For Urdu)
\ue017\ue03c > \u095a; #\ue05a LETTER GHHA (For Urdu)
\ue01c\ue03c > \u095b; #\ue05b LETTER ZA (For Urdu)
\ue021\ue03c > \u095c; #\ue05c LETTER DDDHA (pronounced RRA)
\ue022\ue03c > \u095d; #\ue05d LETTER RHA (pronounced RRHA)
\ue02b\ue03c > \u095e; #\ue05e LETTER FA
\ue02f\ue03c > \u095f; #\ue05f LETTER YYA
#Decomposed compatibility transliterations
\ue012\ue057>\u0914; # FALLBACK FOR TAMIL AU
0 > \u0966; # FALLBACK FOR TAMIL
1 > \u0967;
\ue055>; # FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK
\ue056>; # FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK
\ue057>; # FALLBACK BLOW AWAY TAMIL AU LENGTH MARK
\ue001 > \u0901; # SIGN CANDRABINDU
\ue002 > \u0902; # SIGN ANUSVARA
\ue003 > \u0903; # SIGN VISARGA
\ue004 > \u0904; # SIGN SHORT A
\ue005 > \u0905; # LETTER A
\ue006 > \u0906; # LETTER AA
\ue007 > \u0907; # LETTER I
\ue008 > \u0908; # LETTER II
\ue009 > \u0909; # LETTER U
\ue00a > \u090a; # LETTER UU
\ue00b > \u090b; # LETTER VOCALIC R
\ue00c > \u090c; # LETTER VOCALIC L
\ue00d > \u090d; # LETTER CANDRA E (For representing English sounds)
\ue00e > \u090e; # LETTER SHORT E(For Southern Scripts)
\ue00f > \u090f; # LETTER E
\ue010 > \u0910; # LETTER AI
\ue011 > \u0911; # LETTER CANDRA O (For representing English sounds)
\ue012 > \u0912; # LETTER SHORT O (For Southern Scripts)
\ue013 > \u0913; # LETTER O
\ue014 > \u0914; # LETTER AU
\ue015 > \u0915; # LETTER KA
\ue016 > \u0916; # LETTER KHA
\ue017 > \u0917; # LETTER GA
\ue018 > \u0918; # LETTER GHA
\ue019 > \u0919; # LETTER NGA
\ue01a > \u091a; # LETTER CA
\ue01b > \u091b; # LETTER CHA
\ue01c > \u091c; # LETTER JA
\ue01d > \u091d; # LETTER JHA
\ue01e > \u091e; # LETTER NYA
\ue01f > \u091f; # LETTER TTA
\ue020 > \u0920; # LETTER TTHA
\ue021 > \u0921; # LETTER DDA
\ue022 > \u0922; # LETTER DDHA
\ue023 > \u0923; # LETTER NNA
\ue024 > \u0924; # LETTER TA
\ue025 > \u0925; # LETTER THA
\ue026 > \u0926; # LETTER DA
\ue027 > \u0927; # LETTER DHA
\ue028 > \u0928; # LETTER NA
\ue029 > \u0929; # LETTER NNNA
\ue02a > \u092a; # LETTER PA
\ue02b > \u092b; # LETTER PHA
\ue02c > \u092c; # LETTER BA
\ue02d > \u092d; # LETTER BHA
\ue02e > \u092e; # LETTER MA
\ue02f > \u092f; # LETTER YA
\ue030 > \u0930; # LETTER RA
\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
#\ue031 > \u0930;
\ue032 > \u0932; # LETTER LA
\ue033 > \u0933; # LETTER LLA
\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
#\ue034 > \u0933;
\ue035 > \u0935; # LETTER VA
\ue036 > \u0936; # LETTER SHA
\ue037 > \u0937; # LETTER SSA
\ue038 > \u0938; # LETTER SA
\ue039 > \u0939; # LETTER HA
\ue03c > \u093c; # SIGN NUKTA
\ue03d > \u093d; # SIGN AVAGRAHA
\ue03e > \u093e; # VOWEL SIGN AA
\ue03f > \u093f; # VOWEL SIGN I
\ue040 > \u0940; # VOWEL SIGN II
\ue041 > \u0941; # VOWEL SIGN U
\ue042 > \u0942; # VOWEL SIGN UU
\ue043 > \u0943; # VOWEL SIGN VOCALIC R
\ue044 > \u0944; # VOWEL SIGN VOCALIC RR
\ue045 > \u0945; # VOWEL SIGN CANDRA E
\ue046 > \u0946; # VOWEL SIGN SHORT E
\ue047 > \u0947; # VOWEL SIGN E
\ue048 > \u0948; # VOWEL SIGN AI
\ue049 > \u0949; # VOWEL SIGN CANDRA O
\ue04a > \u094a; # VOWEL SIGN SHORT O
\ue04b > \u094b; # VOWEL SIGN O
\ue04c > \u094c; # VOWEL SIGN AU
\ue04d > \u094d; # SIGN VIRAMA
\ue050 > \u0950; # OM
\ue051 > \u0951; # STRESS SIGN UDATTA
\ue052 > \u0952; # STRESS SIGN ANUDATTA
\ue053 > \u0953; # GRAVE ACCENT
\ue054 > \u0954; # ACUTE ACCENT
\ue058 > \u0958; # LETTER QA (For Urdu)
\ue059 > \u0959; # LETTER KHHA (For Urdu)
\ue05a > \u095a; # LETTER GHHA (For Urdu)
\ue05b > \u095b; # LETTER ZA (For Urdu)
\ue05c > \u095c; # LETTER DDDHA (pronounced RRA)
\ue05d > \u095d; # LETTER RHA (pronounced RRHA)
\ue05e > \u095e; # LETTER FA
\ue05f > \u095f; # LETTER YYA
\ue060 > \u0960; # LETTER VOCALIC RR
\ue061 > \u0961; # LETTER VOCALIC LL
\ue062 > \u0962; # VOWEL SIGN VOCALIC L
\ue063 > \u0963; # VOWEL SIGN VOCALIC LL
\ue064 > \u0964; # DANDA
\ue065 > \u0965; # DOUBLE DANDA
\ue066 > \u0966; # DIGIT ZERO
\ue067 > \u0967; # DIGIT ONE
\ue068 > \u0968; # DIGIT TWO
\ue069 > \u0969; # DIGIT THREE
\ue06a > \u096a; # DIGIT FOUR
\ue06b > \u096b; # DIGIT FIVE
\ue06c > \u096c; # DIGIT SIX
\ue06d > \u096d; # DIGIT SEVEN
\ue06e > \u096e; # DIGIT EIGHT
\ue06f > \u096f; # DIGIT NINE
\ue070>\u0970; # ABBREVIATION SIGN
\ue071>\u0930; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0930; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>\u0930\u0942; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0935; # FALLBACK FOR ORIYA LETTER WA
# \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
# :: NFC;
# eof

View file

@ -0,0 +1,138 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Gujarati
#:: NFD (NFC) ;
\ue001>\u0a81; # SIGN CANDRABINDU
\ue002>\u0a82; # SIGN ANUSVARA
\ue003>\u0a83; # SIGN VISARGA
\uE004>\u0a85; # FALLBACK TO LETTER A
\ue005>\u0a85; # LETTER A
\ue006>\u0a86; # LETTER AA
\ue007>\u0a87; # LETTER I
\ue008>\u0a88; # LETTER II
\ue009>\u0a89; # LETTER U
\ue00a>\u0a8a; # LETTER UU
\ue00b>\u0a8b; # LETTER VOCALIC R
\ue00c>\u0a8c; # LETTER VOCALIC L
\ue00d>\u0a8d; # GUJARATI VOWEL CANDRA E
\ue00e>\u0a8f; # FALLBACK
\ue00f>\u0a8f; # InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
\ue010>\u0a90; # LETTER AI
\ue011>\u0a91; # FALLBACK
\ue012>\u0a93; # FALLBACK
\ue013>\u0a93; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
\ue014>\u0a94; # LETTER AU
\ue015>\u0a95; # LETTER KA
\ue016>\u0a96; # LETTER KHA
\ue017>\u0a97; # LETTER GA
\ue018>\u0a98; # LETTER GHA
\ue019>\u0a99; # LETTER NGA
\ue01a>\u0a9a; # LETTER CA
\ue01b>\u0a9b; # LETTER CHA
\ue01c>\u0a9c; # LETTER JA
\ue01d>\u0a9d; # LETTER JHA
\ue01e>\u0a9e; # LETTER NYA
\ue01f>\u0a9f; # LETTER TTA
\ue020>\u0aa0; # LETTER TTHA
\ue021>\u0aa1; # LETTER DDA
\ue022>\u0aa2; # LETTER DDHA
\ue023>\u0aa3; # LETTER NNA
\ue024>\u0aa4; # LETTER TA
\ue025>\u0aa5; # LETTER THA
\ue026>\u0aa6; # LETTER DA
\ue027>\u0aa7; # LETTER DHA
\ue028>\u0aa8; # LETTER NA
\ue029>\u0aa8\u0abc; # FALLBACK to NA+NUKTA
\ue02a>\u0aaa; # LETTER PA
\ue02b>\u0aab; # LETTER PHA
\ue02c>\u0aac; # LETTER BA
\ue02d>\u0aad; # LETTER BHA
\ue02e>\u0aae; # LETTER MA
\ue02f>\u0aaf; # LETTER YA
\ue030>\u0ab0; # LETTER RA
\ue031>\u0ab0\u0abc; # FALLBACK
\ue032>\u0ab2; # LETTER LA
\ue033>\u0ab3; # LETTER LLA
\ue034>\u0ab3\u0abc; # LETTER LLLA>LETTER LLA+NUKTA
\ue035>\u0ab5; # LETTER VA
\ue036>\u0ab6; # LETTER SHA
\ue037>\u0ab7; # LETTER SSA
\ue038>\u0ab8; # LETTER SA
\ue039>\u0ab9; # LETTER HA
\ue03c>\u0abc; # SIGN NUKTA
\ue03d>\u0abd; # SIGN AVAGRAHA
\ue03e>\u0abe; # VOWEL SIGN AA
\ue03f>\u0abf; # VOWEL SIGN I
\ue040>\u0ac0; # VOWEL SIGN II
\ue041>\u0ac1; # VOWEL SIGN U
\ue042>\u0ac2; # VOWEL SIGN UU
\ue043>\u0ac3; # VOWEL SIGN VOCALIC R
\ue044>\u0ac4; # VOWEL SIGN VOCALIC RR
\ue045>\u0ac5; # VOWEL SIGN CANDRA E
\ue046>\u0ac7; # FALLBACK
\ue047>\u0ac7; # InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
\ue048>\u0ac8; # VOWEL SIGN AI
\ue049>\u0ac9; # VOWEL SIGN CANDRA O
\ue04a>\u0acb; # FALLBACK
\ue04b>\u0acb; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
\ue04c>\u0acc; # VOWEL SIGN AU
\ue04d>\u0acd; # SIGN VIRAMA
\ue050>\u0ad0; # OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
\ue056>\u0ac8; # REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0acc; # REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0a95\u0abc; # FALLBACK
\ue059>\u0a96\u0abc; # REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
\ue05a>\u0a97\u0abc; # REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
\ue05b>\u0a9c\u0abc; # REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
\ue05c>\u0aa1\u0abc; # FALLBACK
\ue05d>\u0aa2\u0abc; # REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
\ue05e>\u0aab\u0abc; # REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
\ue05f>\u0aaf\u0abc; # REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
\ue060>\u0ae0; # LETTER VOCALIC RR
\ue061>\u0ae1; # LETTER VOCALIC LL
\ue062>\u0abf\u0abc; # REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
\ue063>\u0ac0\u0abc; # REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\ue066>\u0ae6; # DIGIT ZERO
\ue067>\u0ae7; # DIGIT ONE
\ue068>\u0ae8; # DIGIT TWO
\ue069>\u0ae9; # DIGIT THREE
\ue06a>\u0aea; # DIGIT FOUR
\ue06b>\u0aeb; # DIGIT FIVE
\ue06c>\u0aec; # DIGIT SIX
\ue06d>\u0aed; # DIGIT SEVEN
\ue06e>\u0aee; # DIGIT EIGHT
\ue06f>\u0aef; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0ab0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0ab0; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0ab5; # FALLBACK FOR ORIYA LETTER WA
0 > \u0ae6; # FALLBACK FOR TAMIL
1 > \u0ae7;
#\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,147 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Gurmukhi
#:: NFD (NFC) ;
$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];
$consonant = [\u0A15-\u0A39];
\ue001>\u0a01; # SIGN CHANDRABINDU
#rules for BINDI
# Anusvara is equivalent to BINDI when preceeded by a vowel
$vowel{\ue002>\u0a02; # SIGN ANUSVARA (\u0a02 = SIGN BINDI)
# else is equivalent to TIPPI
$consonant{\ue002>\u0a70; # SIGN TIPPI
\ue002>\u0a02;
\ue003>; # FALLBACK BLOW AWAY SIGN VISARGA
\uE004>\u0a05; # FALLBACK TO LETTER A
\ue005>\u0a05; # LETTER A
\ue006>\u0a06; # LETTER AA
\ue007>\u0a07; # LETTER I
\ue008>\u0a08; # LETTER II
\ue009>\u0a09; # LETTER U
\ue00a>\u0a0a; # LETTER UU
\ue00b>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
\ue00c>\u0a33; # FALLBACK
\ue00d>\u0a0f; # FALLBACK
\ue00e>\u0a0f; # FALLBACK
\ue00f>\u0a0f; # LETTER EE
\ue010>\u0a10; # LETTER AI
\ue011>\u0a13; # FALLBACK
\ue012>\u0a13; # FALLBACK
\ue013>\u0a13; # LETTER OO
\ue014>\u0a14; # LETTER AU
\ue015>\u0a15; # LETTER KA
\ue016>\u0a16; # LETTER KHA
\ue017>\u0a17; # LETTER GA
\ue018>\u0a18; # LETTER GHA
\ue019>\u0a19; # LETTER NGA
\ue01a>\u0a1a; # LETTER CA
\ue01b>\u0a1b; # LETTER CHA
\ue01c>\u0a1c; # LETTER JA
\ue01d>\u0a1d; # LETTER JHA
\ue01e>\u0a1e; # LETTER NYA
\ue01f>\u0a1f; # LETTER TTA
\ue020>\u0a20; # LETTER TTHA
\ue021>\u0a21; # LETTER DDA
\ue022>\u0a22; # LETTER DDHA
\ue023>\u0a23; # LETTER NNA
\ue024>\u0a24; # LETTER TA
\ue025>\u0a25; # LETTER THA
\ue026>\u0a26; # LETTER DA
\ue027>\u0a27; # LETTER DHA
\ue028>\u0a28; # LETTER NA
\ue029>\u0a28\u0a3c; # REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
\ue02a>\u0a2a; # LETTER PA
\ue02b>\u0a2b; # LETTER PHA
\ue02c>\u0a2c; # LETTER BA
\ue02d>\u0a2d; # LETTER BHA
\ue02e>\u0a2e; # LETTER MA
\ue02f>\u0a2f; # LETTER YA
\ue030>\u0a30; # LETTER RA
\ue031>\u0a30\u0a3c; # FALLBACK LETTER RA+NUKTA
\ue032>\u0a32; # LETTER LA
\ue033>\u0a33; # LETTER LLA
\ue034>\u0a33; # REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
\ue035>\u0a35; # LETTER VA
\ue036>\u0a36; # LETTER SHA
\ue037>\u0a36; # REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
\ue038>\u0a38; # LETTER SA
\ue039>\u0a39; # LETTER HA
\ue03c>\u0a3c; # SIGN NUKTA
\ue03d>; # FALLBACK BLOW AWAY SIGN AVAGRAHA
\ue03e>\u0a3e; # VOWEL SIGN AA
\ue03f>\u0a3f; # VOWEL SIGN I
\ue040>\u0a40; # VOWEL SIGN II
\ue041>\u0a41; # VOWEL SIGN U
\ue042>\u0a42; # VOWEL SIGN UU
\ue043>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
\ue045>\u0a48; # REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
\ue046>\u0a47; # FALLABCK
\ue047>\u0a47; # VOWEL SIGN EE
\ue048>\u0a48; # VOWEL SIGN AI
\ue049>\u0a4c; # REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
\ue04a>\u0a4b; # FALLBACK
\ue04b>\u0a4b; # VOWEL SIGN OO
\ue04c>\u0a4c; # VOWEL SIGN AU
\ue04d>\u0a4d; # SIGN VIRAMA
\ue050>\u0a0f\u0a02; # FALLBACK to OO+BINDI : OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
\ue056>\u0a48; # REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0a4c; # REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0a15\u0a3c; # FALLBACK RA+ NUKTA
\ue059>\u0a59; # LETTER KHHA
\ue05a>\u0a5a; # LETTER GHHA
\ue05b>\u0a5b; # LETTER ZA
\ue05c>\u0a5c; # LETTER RRA
\ue05d>\u0a22\u0a3c; # REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
\ue05e>\u0a5e; # LETTER FA
\ue05f>\u0a2f\u0a3c; # REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
\ue060>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
\ue061>\u0a32\u0a3c; #
\ue062>\u0a3f\u0a3c; # REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
\ue063>\u0a40\u0a3c; # REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\ue066>\u0a66; # DIGIT ZERO
\ue067>\u0a67; # DIGIT ONE
\ue068>\u0a68; # DIGIT TWO
\ue069>\u0a69; # DIGIT THREE
\ue06a>\u0a6a; # DIGIT FOUR
\ue06b>\u0a6b; # DIGIT FIVE
\ue06c>\u0a6c; # DIGIT SIX
\ue06d>\u0a6d; # DIGIT SEVEN
\ue06e>\u0a6e; # DIGIT EIGHT
\ue06f>\u0a6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0a30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0a30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>\u0a70; # TIPPI
\uE07D>\u0a71; # ADDAK
\uE07E>\u0a72; # IRI
\uE07F>\u0a73; # URA
\uE080>\u0a74; # EK ONKAR
\uE081>\u0a35; # FALLBACK FOR ORIYA LETTER WA
0 > \u0a66; # FALLBACK FOR TAMIL
1 > \u0a67;
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,141 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Kannada
#:: NFD (NFC) ;
\ue033\ue03c>\u0cde; # LETTER FA
\ue001>\u0c82; # REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
\ue002>\u0c82; # SIGN ANUSVARA
\ue003>\u0c83; # SIGN VISARGA
\uE004>\u0c85; # FALLBACK TO LETTER A
\ue005>\u0c85; # LETTER A
\ue006>\u0c86; # LETTER AA
\ue007>\u0c87; # LETTER I
\ue008>\u0c88; # LETTER II
\ue009>\u0c89; # LETTER U
\ue00a>\u0c8a; # LETTER UU
\ue00b>\u0c8b; # LETTER VOCALIC R
\ue00c>\u0c8c; # LETTER VOCALIC L
\ue00d>\u0c8e; # LETTER E
\ue00e>\u0c8e; # FALLBACK
\ue00f>\u0c8f; # LETTER EE
\ue010>\u0c90; # LETTER AI
\ue011>\u0c92; # FALLBACK
\ue012>\u0c92; # LETTER O
\ue013>\u0c93; # LETTER OO
\ue014>\u0c94; # LETTER AU
\ue015>\u0c95; # LETTER KA
\ue016>\u0c96; # LETTER KHA
\ue017>\u0c97; # LETTER GA
\ue018>\u0c98; # LETTER GHA
\ue019>\u0c99; # LETTER NGA
\ue01a>\u0c9a; # LETTER CA
\ue01b>\u0c9b; # LETTER CHA
\ue01c>\u0c9c; # LETTER JA
\ue01d>\u0c9d; # LETTER JHA
\ue01e>\u0c9e; # LETTER NYA
\ue01f>\u0c9f; # LETTER TTA
\ue020>\u0ca0; # LETTER TTHA
\ue021>\u0ca1; # LETTER DDA
\ue022>\u0ca2; # LETTER DDHA
\ue023>\u0ca3; # LETTER NNA
\ue024>\u0ca4; # LETTER TA
\ue025>\u0ca5; # LETTER THA
\ue026>\u0ca6; # LETTER DA
\ue027>\u0ca7; # LETTER DHA
\ue028>\u0ca8; # LETTER NA
\ue029>\u0ca8; # REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
\ue02a>\u0caa; # LETTER PA
\ue02b>\u0cab; # LETTER PHA
\ue02c>\u0cac; # LETTER BA
\ue02d>\u0cad; # LETTER BHA
\ue02e>\u0cae; # LETTER MA
\ue02f>\u0caf; # LETTER YA
\ue030\ue03c>\u0cb1;
\ue030>\u0cb0; # LETTER RA
\ue031>\u0cb1; # LETTER RRA
\ue032>\u0cb2; # LETTER LA
\ue033>\u0cb3; # LETTER LLA
\ue034>\u0cde; # REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
\ue035>\u0cb5; # LETTER VA
\ue036>\u0cb6; # LETTER SHA
\ue037>\u0cb7; # LETTER SSA
\ue038>\u0cb8; # LETTER SA
\ue039>\u0cb9; # LETTER HA
\ue03c>\u0cbc; # NUKTA
\ue03d>\u0cbd; # AVAGRAHA
\ue03e>\u0cbe; # VOWEL SIGN AA
\ue03f>\u0cbf; # VOWEL SIGN I
\ue040>\u0cc0; # VOWEL SIGN II
\ue041>\u0cc1; # VOWEL SIGN U
\ue042>\u0cc2; # VOWEL SIGN UU
\ue043>\u0cc3; # VOWEL SIGN VOCALIC R
\ue044>\u0cc4; # VOWEL SIGN VOCALIC RR
\ue045>\u0cc6; # REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
\ue046>\u0cc6; # VOWEL SIGN E
\ue047>\u0cc7; # VOWEL SIGN EE
\ue048>\u0cc8; # VOWEL SIGN AI
\ue049>\u0cca; # REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
\ue04a>\u0cca; # VOWEL SIGN O
\ue04b>\u0ccb; # VOWEL SIGN OO
\ue04c>\u0ccc; # VOWEL SIGN AU
\ue04d>\u0ccd; # SIGN VIRAMA
\ue050>\u0c93\u0c82; # REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>\u0cd5; # LENGTH MARK
\ue056>\u0cd6; # AI LENGTH MARK
\ue057>\u0ccc; # REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0c95; # FALLBACK
\ue059>\u0c96; # REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
\ue05a>\u0c97; # REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
\ue05b>\u0c9c; # REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
\ue05c>\u0ca2; # FALLBACK
\ue05d>\u0ca2; # REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
\ue05e>\u0cde; # LETTER FA
\ue05f>\u0caf; # REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
\ue060>\u0ce0; # LETTER VOCALIC RR
\ue061>\u0ce1; # LETTER VOCALIC LL
\ue062>\u0cbf; # REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
\ue063>\u0cc0; # REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0ce6; # DIGIT ZERO
\ue067>\u0ce7; # DIGIT ONE
\ue068>\u0ce8; # DIGIT TWO
\ue069>\u0ce9; # DIGIT THREE
\ue06a>\u0cea; # DIGIT FOUR
\ue06b>\u0ceb; # DIGIT FIVE
\ue06c>\u0cec; # DIGIT SIX
\ue06d>\u0ced; # DIGIT SEVEN
\ue06e>\u0cee; # DIGIT EIGHT
\ue06f>\u0cef; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0cb0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0cb0; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0cb5; # FALLBACK FOR ORIYA LETTER WA
0 > \u0ce6; # FALLBACK FOR TAMIL
1 > \u0ce7;
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,529 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Latin
#\u0e00 reserved
#consonants
$chandrabindu=\ue001;
$anusvara=\ue002;
$visarga=\ue003;
#\u0e004 reserved
# w<vowel> represents the stand-alone form
$wa=\ue005;
$waa=\ue006;
$wi=\ue007;
$wii=\ue008;
$wu=\ue009;
$wuu=\ue00a;
$wr=\ue00b;
$wl=\ue00c;
$wce=\ue00d; # LETTER CANDRA E
$wse=\ue00e; # LETTER SHORT E
$we=\ue00f; # \u090f LETTER E
$wai=\ue010;
$wco=\ue011; # LETTER CANDRA O
$wso=\ue012; # LETTER SHORT O
$wo=\ue013; # \u0913 LETTER O
$wau=\ue014;
$ka=\ue015;
$kha=\ue016;
$ga=\ue017;
$gha=\ue018;
$nga=\ue019;
$ca=\ue01a;
$cha=\ue01b;
$ja=\ue01c;
$jha=\ue01d;
$nya=\ue01e;
$tta=\ue01f;
$ttha=\ue020;
$dda=\ue021;
$ddha=\ue022;
$nna=\ue023;
$ta=\ue024;
$tha=\ue025;
$da=\ue026;
$dha=\ue027;
$na=\ue028;
$ena=\ue029; #compatibility
$pa=\ue02a;
$pha=\ue02b;
$ba=\ue02c;
$bha=\ue02d;
$ma=\ue02e;
$ya=\ue02f;
$ra=\ue030;
$vva=\ue081;
$rra=\ue031;
$la=\ue032;
$lla=\ue033;
$ela=\ue034; #compatibility
$va=\ue035;
$sha=\ue036;
$ssa=\ue037;
$sa=\ue038;
$ha=\ue039;
#\u093a Reserved
#\u093b Reserved
$nukta=\ue03c;
$avagraha=\ue03d; # SIGN AVAGRAHA
# <vowel> represents the dependent form
$aa=\ue03e;
$i=\ue03f;
$ii=\ue040;
$u=\ue041;
$uu=\ue042;
$rh=\ue043;
$lh=\ue044;
$ce=\ue045; #VOWEL SIGN CANDRA E
$se=\ue046; #VOWEL SIGN SHORT E
$e=\ue047;
$ai=\ue048;
$co=\ue049; # VOWEL SIGN CANDRA O
$so=\ue04a; # VOWEL SIGN SHORT O
$o=\ue04b; # \u094b
$au=\ue04c;
$virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
$om=\ue050; # OM
\ue051>; # UNMAPPED STRESS SIGN UDATTA
\ue052>; # UNMAPPED STRESS SIGN ANUDATTA
\ue053>; # UNMAPPED GRAVE ACCENT
\ue054>; # UNMAPPED ACUTE ACCENT
$lm = \ue055;# Telugu Length Mark
$ailm=\ue056;# AI Length Mark
$aulm=\ue057;# AU Length Mark
#urdu compatibity forms
$uka=\ue058;
$ukha=\ue059;
$ugha=\ue05a;
$ujha=\ue05b;
$uddha=\ue05c;
$udha=\ue05d;
$ufa=\ue05e;
$uya=\ue05f;
$wrr=\ue060;
$wll=\ue061;
$rrh=\ue062;
$llh=\ue063;
$danda=\ue064;
$doubleDanda=\ue065;
$zero=\ue066; # DIGIT ZERO
$one=\ue067; # DIGIT ONE
$two=\ue068; # DIGIT TWO
$three=\ue069; # DIGIT THREE
$four=\ue06a; # DIGIT FOUR
$five=\ue06b; # DIGIT FIVE
$six=\ue06c; # DIGIT SIX
$seven=\ue06d; # DIGIT SEVEN
$eight=\ue06e; # DIGIT EIGHT
$nine=\ue06f; # DIGIT NINE
# \u0970>; # UNMAPPED ABBREVIATION SIGN
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
$depVowelBelow=[\ue041-\ue044];
# $x was originally called '&'; $z was '%'
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$vowels=[aeiour\u0304\u0325\u0306];
$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];
######################################################################
# convert from Native letters to Latin letters
######################################################################
#transliterations for anusvara
$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;
$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;
$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;
$anusvara} [$ta$tha$da$dha$na] > n ;
$anusvara} [$pa$pha$ba$bha$ma] > m ;
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;
$anusvara> m\u0307;
# Urdu compatibility
$ya$nukta}$x > y\u0307 ;
$ya$nukta$virama > y\u0307 ;
$ya$nukta > y\u0307a ;
$la$nukta }$x > l\u0331 ;
$la$nukta$virama > l\u0331 ;
$la$nukta > l\u0331a ;
$na$nukta }$x > n\u0331 ;
$na$nukta$virama > n\u0331 ;
$na$nukta > n\u0331a ;
$ena }$x > n\u0331 ;
$ena$virama > n\u0331 ;
$ena > n\u0331a ;
$uka > qa ;
$ka$nukta }$x > q ;
$ka$nukta$virama > q ;
$ka$nukta > qa ;
$kha$nukta }$x > k\u0331h\u0331 ;
$kha$nukta$virama > k\u0331h\u0331 ;
$kha$nukta > k\u0331h\u0331a ;
$ukha$virama > k\u0331h\u0331;
$ukha > k\u0331h\u0331a;
$ugha > g\u0307a ;
$ga$nukta }$x > g\u0307 ;
$ga$nukta$virama > g\u0307 ;
$ga$nukta > g\u0307a ;
$ujha > za ;
$ja$nukta }$x > z ;
$ja$nukta$virama > z ;
$ja$nukta > za ;
$ddha$nukta}$x > r\u0323h ;
$ddha$nukta$virama > r\u0323h ;
$ddha$nukta > r\u0323ha;
$uddha}$x > r\u0323 ;
$uddha$virama > r\u0323 ;
$uddha > r\u0323a;
$udha > r\u0323a ;
$dda$nukta}$x > r\u0323 ;
$dda$nukta$virama > r\u0323 ;
$dda$nukta > r\u0323a ;
$pha$nukta }$x > f ;
$pha$nukta$virama > f ;
$pha$nukta > fa ;
$ufa }$x > f ;
$ufa$virama > f ;
$ufa > fa ;
$ra$nukta}$x > r\u0331;
$ra$nukta$virama > r\u0331;
$ra$nukta > r\u0331a;
$lla$nukta}$x > l\u0331;
$lla$nukta$virama > l\u0331;
$lla$nukta > l\u0331a;
$ela}$x > l\u0331;
$ela$virama > l\u0331;
$ela > l\u0331a;
$uya}$x > y\u0307;
$uya$virama > y\u0307;
$uya > y\u0307a;
# normal consonants
$ka$virama}$ha>k'';
$ka}$x>k;
$ka$virama>k;
$ka>ka;
$kha}$x>kh;
$kha$virama>kh;
$kha>kha;
$ga$virama}$ha>g'';
$ga}$x>g;
$ga$virama>g;
$ga>ga;
$gha}$x>gh;
$gha$virama>gh;
$gha>gha;
$nga}$x>n\u0307;
$nga$virama>n\u0307;
$nga>n\u0307a ;
$ca$virama}$ha>c'';
$ca}$x>c;
$ca$virama>c;
$ca>ca;
$cha}$x>ch;
$cha$virama>ch;
$cha>cha;
$ja$virama}$ha>j'';
$ja}$x>j;
$ja$virama>j;
$ja>ja;
$jha}$x>jh;
$jha$virama>jh;
$jha>jha;
$nya }$x>n\u0303 ;
$nya$virama>n\u0303;
$nya > n\u0303a ;
$tta$virama}$ha>t\u0323'';
$tta}$x>t\u0323;
$tta$virama>t\u0323;
$tta>t\u0323a;
$ttha}$x>t\u0323h;
$ttha$virama>t\u0323h;
$ttha>t\u0323ha;
$dda}$x$ha>d\u0323'';
$dda}$x>d\u0323;
$dda$virama>d\u0323;
$dda>d\u0323a;
$ddha}$x>d\u0323h;
$ddha$virama>d\u0323h;
$ddha>d\u0323ha;
$nna}$x>n\u0323 ;
$nna$virama>n\u0323;
$nna>n\u0323a ;
$ta$virama}$ha>t'';
$ta$virama}$ttha>t'';
$ta$virama}$tta>t'';
$ta$virama}$tha>t'';
$ta}$x>t;
$ta$virama>t;
$ta>ta;
$tha}$x>th;
$tha$virama>th;
$tha>tha;
$da$virama}$ha>d'';
$da$virama}$ddha>d'';
$da$virama}$dda>d'';
$da$virama}$dha>d'';
$da}$x>d;
$da$virama>d;
$da>da;
$dha}$x>dh;
$dha$virama>dh;
$dha>dha;
$na$virama}$ga>n'';
$na$virama}$ya>n'';
$na}$x>n;
$na$virama>n;
$na>na;
$pa$virama}$ha>p'';
$pa}$x>p;
$pa$virama>p;
$pa>pa;
$pha}$x>ph;
$pha$virama>ph;
$pha>pha;
$ba$virama}$ha>b'';
$ba}$x>b;
$ba$virama>b;
$ba>ba;
$bha}$x>bh;
$bha$virama>bh;
$bha>bha;
$ma$virama}$ma>m'';
$ma}$x>m;
$ma$virama>m;
$ma>ma;
$ya}$x>y;
$ya$virama>y;
$ya>ya;
$ra$virama}$ha>r'';
$ra}$x>r;
$ra$virama>r;
$ra>ra;
$vva$virama}$ha>w\u0307'';
$vva}$x>w\u0307;
$vva$virama>w\u0307;
$vva>w\u0307a;
$rra$virama}$ha>r\u0331'';
$rra}$x>r\u0331;
$rra$virama>r\u0331;
$rra>r\u0331a;
$la$virama}$ha>l'';
$la}$x>l;
$la$virama>l;
$la>la;
$lla$virama}$ha>l\u0323'';
$lla}$x>l\u0323;
$lla$virama>l\u0323;
$lla>l\u0323a;
$va}$x>v;
$va$virama>v;
$va>va;
$sa$virama}$ha>s'';
$sa$virama}$sha>s'';
$sa$virama}$ssa>s'';
$sa$virama}$sa>s'';
$sa}$x>s;
$sa$virama>s;
#for gurmukhi
$sa$nukta}$x>s\u0301;
$sa$nukta$virama>s\u0301;
$sa$nukta>s\u0301a;
$sa>sa;
$sha}$x>s\u0301;
$sha$virama>s\u0301;
$sha>s\u0301a;
$ssa}$x>s\u0323;
$ssa$virama>s\u0323;
$ssa>s\u0323a;
$ha}$x>h;
$ha$virama>h;
$ha>ha;
# dependent vowels (should never occur except following consonants)
$forceIndependentMatra{$aa > \u0314a\u0304 ;
$forceIndependentMatra{$ai > \u0314ai ;
$forceIndependentMatra{$au > \u0314au ;
$forceIndependentMatra{$ii > \u0314i\u0304 ;
$forceIndependentMatra{$i > \u0314i ;
$forceIndependentMatra{$uu > \u0314u\u0304 ;
$forceIndependentMatra{$u > \u0314u ;
$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;
$forceIndependentMatra{$rh > \u0314r\u0325 ;
$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;
$forceIndependentMatra{$lh > \u0314l\u0325 ;
$forceIndependentMatra{$e > \u0314e\u0304 ;
$forceIndependentMatra{$o > \u0314o\u0304 ;
#extra vowels
$forceIndependentMatra{$ce > \u0314e\u0306 ;
$forceIndependentMatra{$co > \u0314o\u0306 ;
$forceIndependentMatra{$se > \u0314e ;
$forceIndependentMatra{$so > \u0314o ;
$forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character
$forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character
$aa > a\u0304 ;
$ai > ai ;
$au > au ;
$ii > i\u0304 ;
$i > i ;
$uu > u\u0304 ;
$u > u ;
$rrh > r\u0325\u0304 ;
$rh > r\u0325 ;
$llh > l\u0325\u0304 ;
$lh > l\u0325 ;
$e > e\u0304 ;
$o > o\u0304 ;
#extra vowels
$ce > e\u0306 ;
$co > o\u0306 ;
$se > e ;
$so > o ;
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
$waa} $x > a\u0304\u0314 ;
$wai} $x > ai\u0314 ;
$wau} $x > au\u0314 ;
$wii} $x > i\u0304\u0314 ;
$wi } $x > i\u0314 ;
$wuu} $x > u\u0304\u0314 ;
$wu } $x > u\u0314 ;
$wrr} $x > r\u0325\u0304\u0314 ;
$wr } $x > r\u0325\u0314 ;
$wll} $x > l\u0325\u0304\u0314 ;
$wl } $x > l\u0325\u0314 ;
$we } $x > e\u0304\u0314 ;
$wo } $x > o\u0304\u0314 ;
$wa } $x > a\u0314 ;
#extra vowels
$wce} $x > e\u0306\u0314 ;
$wco} $x > o\u0306\u0314 ;
$wse} $x > e\u0314 ;
$wso} $x > o\u0314 ;
$om} $x > ''om\u0314 ;
# independent vowels when preceeded by vowels
$vowels{$waa > ''a\u0304 ;
$vowels{$wai > ''ai ;
$vowels{$wau > ''au ;
$vowels{$wii > ''i\u0304 ;
$vowels{$wi > ''i ;
$vowels{$wuu > ''u\u0304 ;
$vowels{$wu > ''u ;
$vowels{$wrr > ''r\u0325\u0304 ;
$vowels{$wr > ''r\u0325 ;
$vowels{$wll > ''l\u0325\u0304 ;
$vowels{$wl > ''l\u0325 ;
$vowels{$we > ''e\u0304 ;
$vowels{$wo > ''o\u0304 ;
$vowels{$wa > ''a ;
#extra vowels
$vowels{$wce > ''e\u0306 ;
$vowels{$wco > ''o\u0306 ;
$vowels{$wse > ''e ;
$vowels{$wso > ''o ;
# independent vowels (otherwise)
$waa > a\u0304 ;
$wai > ai ;
$wau > au ;
$wii > i\u0304 ;
$wi > i ;
$wuu > u\u0304 ;
$wu > u ;
$wrr > r\u0325\u0304 ;
$wr > r\u0325 ;
$wll > l\u0325\u0304 ;
$wl > l\u0325 ;
$we > e\u0304 ;
$wo > o\u0304 ;
$wa > a ;
#extra vowels
$wce > e\u0306 ;
$wco > o\u0306 ;
$wse > e ;
$wso > o ;
$om > ''om ;
#stress marks
$avagraha > \u0315;
$chandrabindu$anusvara>\u0303;
$chandrabindu > m\u0310;
$visarga>h\u0323;
#numbers
$zero > 0;
$one > 1;
$two > 2;
$three > 3;
$four > 4;
$five > 5;
$six > 6;
$seven > 7;
$eight > 8;
$nine > 9;
$lm >;
$ailm >;
$aulm >;
$danda>'.';
$doubleDanda>'.';
\ue070>; # ABBREVIATION SIGN
# LETTER RA WITH MIDDLE DIAGONAL
\ue071}$x>ra;
\ue071$virama>r;
\ue071>ra;
# LETTER RA WITH LOWER DIAGONAL
\ue072}$x>ra;
\ue072$virama>r;
\ue072>ra;
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE004>; # DEVANAGARI VOWEL SIGN SHORT A

View file

@ -0,0 +1,141 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Malayalam
#:: NFD (NFC) ;
\ue001>\u0d02; # REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
\ue002>\u0d02; # SIGN ANUSVARA
\ue003>\u0d03; # SIGN VISARGA
\uE004>\u0d05; # FALLBACK TO LETTER A
\ue005>\u0d05; # LETTER A
\ue006>\u0d06; # LETTER AA
\ue007>\u0d07; # LETTER I
\ue008>\u0d08; # LETTER II
\ue009>\u0d09; # LETTER U
\ue00a>\u0d0a; # LETTER UU
\ue00b>\u0d0b; # LETTER VOCALIC R
\ue00c>\u0d0c; # LETTER VOCALIC L
\ue00d>\u0d0e; # FALLLBACK LETTER E
\ue00e>\u0d0e; # LETTER E
\ue00f>\u0d0f; # LETTER EE
\ue010>\u0d10; # LETTER AI
\ue011>\u0d12; # FALLBACK TO O
\ue012>\u0d12; # LETTER O
\ue013>\u0d13; # LETTER OO
\ue014>\u0d14; # LETTER AU
\ue015>\u0d15; # LETTER KA
\ue016>\u0d16; # LETTER KHA
\ue017>\u0d17; # LETTER GA
\ue018>\u0d18; # LETTER GHA
\ue019>\u0d19; # LETTER NGA
\ue01a>\u0d1a; # LETTER CA
\ue01b>\u0d1b; # LETTER CHA
\ue01c>\u0d1c; # LETTER JA
\ue01d>\u0d1d; # LETTER JHA
\ue01e>\u0d1e; # LETTER NYA
\ue01f>\u0d1f; # LETTER TTA
\ue020>\u0d20; # LETTER TTHA
\ue021>\u0d21; # LETTER DDA
\ue022>\u0d22; # LETTER DDHA
\ue023>\u0d23; # LETTER NNA
\ue024>\u0d24; # LETTER TA
\ue025>\u0d25; # LETTER THA
\ue026>\u0d26; # LETTER DA
\ue027>\u0d27; # LETTER DHA
\ue028>\u0d28; # LETTER NA
\ue029>\u0d28; # REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
\ue02a>\u0d2a; # LETTER PA
\ue02b>\u0d2b; # LETTER PHA
\ue02c>\u0d2c; # LETTER BA
\ue02d>\u0d2d; # LETTER BHA
\ue02e>\u0d2e; # LETTER MA
\ue02f>\u0d2f; # LETTER YA
\ue030\ue03c>\u0d31;
\ue030>\u0d30; # LETTER RA
\ue031>\u0d31; # LETTER RRA
\ue032>\u0d32; # LETTER LA
\ue033\ue03c>\u0d34;
\ue033>\u0d33; # LETTER LLA
\ue034>\u0d34; # LETTER LLLA
\ue035>\u0d35; # LETTER VA
\ue036>\u0d36; # LETTER SHA
\ue037>\u0d37; # LETTER SSA
\ue038>\u0d38; # LETTER SA
\ue039>\u0d39; # LETTER HA
\ue03c>; # FALLBACK BLOW AWAY NUKTA
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
\ue03e>\u0d3e; # VOWEL SIGN AA
\ue03f>\u0d3f; # VOWEL SIGN I
\ue040>\u0d40; # VOWEL SIGN II
\ue041>\u0d41; # VOWEL SIGN U
\ue042>\u0d42; # VOWEL SIGN UU
\ue043>\u0d43; # VOWEL SIGN VOCALIC R
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
\ue045>\u0d3e; # REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
\ue046>\u0d46; # VOWEL SIGN E
\ue047>\u0d47; # VOWEL SIGN EE
\ue048>\u0d48; # VOWEL SIGN AI
\ue049>\u0d4b; # REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
\ue04a>\u0d4a; # VOWEL SIGN O
\ue04b>\u0d4b; # VOWEL SIGN OO
\ue04c>\u0d4c; # VOWEL SIGN AU
\ue04d>\u0d4d; # SIGN VIRAMA
\ue050>\u0d13\u0d02; # UNMAPPED InterIndic-Malayalam: OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
\ue056>\u0d48; # REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0d57; # AU LENGTH MARK
\ue058>\u0d15; # FALLBACK
\ue059>\u0d16; # REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
\ue05a>\u0d17; # REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
\ue05b>\u0d1c; # REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
\ue05d>\u0d22; # REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
\ue05c>\u0d21; # FALLBACK
\ue05e>\u0d2b; # REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
\ue05f>\u0d2f; # REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
\ue060>\u0d60; # LETTER VOCALIC RR
\ue061>\u0d61; # LETTER VOCALIC LL
\ue062>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L
\ue063>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0d66; # DIGIT ZERO
\ue067>\u0d67; # DIGIT ONE
\ue068>\u0d68; # DIGIT TWO
\ue069>\u0d69; # DIGIT THREE
\ue06a>\u0d6a; # DIGIT FOUR
\ue06b>\u0d6b; # DIGIT FIVE
\ue06c>\u0d6c; # DIGIT SIX
\ue06d>\u0d6d; # DIGIT SEVEN
\ue06e>\u0d6e; # DIGIT EIGHT
\ue06f>\u0d6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0d30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0d30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0d35; # FALLBACK FOR ORIYA LETTER WA
0 > \u0d66; # FALLBACK FOR TAMIL
1 > \u0d67;
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,137 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Oriya
#:: NFD (NFC) ;
\ue001>\u0b01; # SIGN CANDRABINDU
\ue002>\u0b02; # SIGN ANUSVARA
\ue003>\u0b03; # SIGN VISARGA
\uE004>\u0b05; # FALLBACK TO LETTER A
\ue005>\u0b05; # LETTER A
\ue006>\u0b06; # LETTER AA
\ue007>\u0b07; # LETTER I
\ue008>\u0b08; # LETTER II
\ue009>\u0b09; # LETTER U
\ue00a>\u0b0a; # LETTER UU
\ue00b>\u0b0b; # LETTER VOCALIC R
\ue00c>\u0b0c; # LETTER VOCALIC L
\ue00d>\u0b0f; # FALLBACK
\ue00e>\u0b0f; # FALLBACK
\ue00f>\u0b0f; # LETTER E
\ue010>\u0b10; # LETTER AI
\ue011>\u0b13; # FALLBACK
\ue012>\u0b13; # FALLBACK
\ue013>\u0b13; # FALLBACK LETTER OO (\u0b13 = LETTER O)
\ue014>\u0b14; # LETTER AU
\ue015>\u0b15; # LETTER KA
\ue016>\u0b16; # LETTER KHA
\ue017>\u0b17; # LETTER GA
\ue018>\u0b18; # LETTER GHA
\ue019>\u0b19; # LETTER NGA
\ue01a>\u0b1a; # LETTER CA
\ue01b>\u0b1b; # LETTER CHA
\ue01c>\u0b1c; # LETTER JA
\ue01d>\u0b1d; # LETTER JHA
\ue01e>\u0b1e; # LETTER NYA
\ue01f>\u0b1f; # LETTER TTA
\ue020>\u0b20; # LETTER TTHA
\ue021>\u0b21; # LETTER DDA
\ue022>\u0b22; # LETTER DDHA
\ue023>\u0b23; # LETTER NNA
\ue024>\u0b24; # LETTER TA
\ue025>\u0b25; # LETTER THA
\ue026>\u0b26; # LETTER DA
\ue027>\u0b27; # LETTER DHA
\ue028>\u0b28; # LETTER NA
\ue029>\u0b28\u0b3c; # FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA
\ue02a>\u0b2a; # LETTER PA
\ue02b>\u0b2b; # LETTER PHA
\ue02c>\u0b2c; # LETTER BA
\ue02d>\u0b2d; # LETTER BHA
\ue02e>\u0b2e; # LETTER MA
\ue02f>\u0b2f; # LETTER YA
\ue030>\u0b30; # LETTER RA
\ue031>\u0b5c; # LETTER RRA
\ue032>\u0b32; # LETTER LA
\ue033>\u0b33; # LETTER LLA
\ue034>\u0b33\u0b3c; # FALLBACK LETTER LLLA>LETTER LLA
\ue035>\u0b35; # LETTER VA
\ue036>\u0b36; # LETTER SHA
\ue037>\u0b37; # LETTER SSA
\ue038>\u0b38; # LETTER SA
\ue039>\u0b39; # LETTER HA
\ue03c>\u0b3c; # SIGN NUKTA
\ue03d>\u0b3d; # SIGN AVAGRAHA
\ue03e>\u0b3e; # VOWEL SIGN AA
\ue03f>\u0b3f; # VOWEL SIGN I
\ue040>\u0b40; # VOWEL SIGN II
\ue041>\u0b41; # VOWEL SIGN U
\ue042>\u0b42; # VOWEL SIGN UU
\ue043>\u0b43; # VOWEL SIGN VOCALIC R
\ue044>\u0b43\u0b3c; # FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
\ue045>\u0b47; # FALLBACK
\ue046>\u0b47; # FALLBACK
\ue047>\u0b47; # VOWEL SIGN E
\ue048>\u0b48; # VOWEL SIGN AI
\ue049>\u0b4b; # FALLBACK
\ue04a>\u0b4b; # FALLBACK
\ue04b>\u0b4b; # VOWEL SIGN E
\ue04c>\u0b4c; # VOWEL SIGN AU
\ue04d>\u0b4d; # SIGN VIRAMA
\ue050>\u0b13\u0b01; # FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
\ue056>\u0b56; # AI LENGTH MARK
\ue057>\u0b57; # AU LENGTH MARK
\ue059>\u0b16\u0b3c; # FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
\ue058>\u0b15\u0b3c; # FALLBACK
\ue05a>\u0b17\u0b3c; # FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
\ue05b>\u0b1c\u0b3c; # FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
\ue05c>\u0b21\u0b3c; # FALLBACK
\ue05d>\u0b5d; # LETTER RHA
\ue05e>\u0b2b\u0b3c; # FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
\ue05f>\u0b5f; # LETTER YYA
\ue060>\u0b60; # LETTER VOCALIC RR
\ue061>\u0b61; # LETTER VOCALIC LL
\ue062>\u0b56\u0b3c; # FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
\ue063>\u0b57\u0b3c; # FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\ue066>\u0b66; # DIGIT ZERO
\ue067>\u0b67; # DIGIT ONE
\ue068>\u0b68; # DIGIT TWO
\ue069>\u0b69; # DIGIT THREE
\ue06a>\u0b6a; # DIGIT FOUR
\ue06b>\u0b6b; # DIGIT FIVE
\ue06c>\u0b6c; # DIGIT SIX
\ue06d>\u0b6d; # DIGIT SEVEN
\ue06e>\u0b6e; # DIGIT EIGHT
\ue06f>\u0b6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0b30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0b30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>\u0B70; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0B71; # LETTER WA
0 > \u0b66; # FALLBACK FOR TAMIL
1 > \u0b67;
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,151 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Tamil
#:: NFD (NFC) ;
\ue001>\u0b82; # FALLBACK SIGN CANDRABINDU
\ue002>\u0b82; # SIGN ANUSVARA
\ue003>\u0b83; # SIGN VISARGA
\uE004>\u0b85; # FALLBACK TO LETTER A
\ue005>\u0b85; # LETTER A
\ue006>\u0b86; # LETTER AA
\ue007>\u0b87; # LETTER I
\ue008>\u0b88; # LETTER II
\ue009>\u0b89; # LETTER U
\ue00a>\u0b8a; # LETTER UU
\ue00b>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
\ue00c>\u0bb2; # FALLBACK LETTER LA
\ue00d>\u0b8f; # FALLBACK
\ue00e>\u0b8e; # LETTER E
\ue00f>\u0b8f; # LETTER EE
\ue010>\u0b90; # LETTER AI
\ue011>\u0b92; # FALLBACK
\ue012>\u0b92; # LETTER O
\ue013>\u0b93; # LETTER OO
\ue014>\u0b94; # LETTER AU
\ue015>\u0b95; # LETTER KA
\ue016>\u0b95; # REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
\ue017>\u0b95; # REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
\ue018>\u0b95; # REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
\ue019>\u0b99; # LETTER NGA
\ue01a>\u0b9a; # LETTER CA
\ue01b>\u0b9a; # REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
\ue01c>\u0b9c; # LETTER JA
\ue01d>\u0b9a; # REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
\ue01e>\u0b9e; # LETTER NYA
\ue01f>\u0b9f; # LETTER TTA
\ue020>\u0b9f; # REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
\ue021>\u0b9f; # REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
\ue022>\u0b9f; # REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
\ue023>\u0ba3; # LETTER NNA
\ue024>\u0ba4; # LETTER TA
\ue025>\u0ba4; # REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
\ue026>\u0ba4; # REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
\ue027>\u0ba4; # REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
\ue028\ue03c>\u0ba9;
\ue028>\u0ba8; # LETTER NA
\ue029>\u0ba9; # LETTER NNNA
\ue02a>\u0baa; # LETTER PA
\ue02b>\u0baa; # REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
\ue02c>\u0baa; # REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
\ue02d>\u0baa; # REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
\ue02e>\u0bae; # LETTER MA
\ue02f>\u0baf; # LETTER YA
\ue030\ue03c>\u0bb1;
\ue030>\u0bb0; # LETTER RA
\ue031>\u0bb1; # LETTER RRA
\ue032>\u0bb2; # LETTER LA
\ue033\ue03c>\u0bb4;
\ue033>\u0bb3; # LETTER LLA
\ue034>\u0bb4; # LETTER LLLA
\ue035>\u0bb5; # LETTER VA
\ue036>\u0bb7; # REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
\ue037>\u0bb7; # LETTER SSA
\ue038>\u0bb8; # LETTER SA
\ue039>\u0bb9; # LETTER HA
\ue03c>; # FALLBACK BLOW AWAY NUKTA
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
\ue03e>\u0bbe; # VOWEL SIGN AA
\ue03f>\u0bbf; # VOWEL SIGN I
\ue040>\u0bc0; # VOWEL SIGN II
\ue041>\u0bc1; # VOWEL SIGN U
\ue042>\u0bc2; # VOWEL SIGN UU
\ue043>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
\ue044>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
\ue045>\u0bbe; # REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
\ue046>\u0bc6; # VOWEL SIGN E
\ue047>\u0bc7; # VOWEL SIGN EE
\ue048>\u0bc8; # VOWEL SIGN AI
\ue049>\u0bbe; # REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
\ue04a>\u0bca; # VOWEL SIGN O
\ue04b>\u0bcb; # VOWEL SIGN OO
\ue04c>\u0bcc; # VOWEL SIGN AU
\ue04d>\u0bcd; # SIGN VIRAMA
\ue050>\u0b93\u0bae\u0bcd; # REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
\ue056>\u0bc8; # REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0bd7; # AU LENGTH MARK
\ue058>\u0b95; # FALLBACK
\ue059>\u0b95; # REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
\ue05a>\u0b95; # REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
\ue05b>\u0b9c; # REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
\ue05c>\u0ba4; # FALLBACK
\ue05d>\u0b9f; # REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
\ue05e>\u0baa; # REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
\ue05f>\u0baf; # REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
\ue060>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
\ue061>\u0bb3; # FALLBACK LETTER LLA
\ue062>\u0bbf; # FALLBACK VOWEL SIGN VOCALIC L
\ue063>\u0bc0; # FALLBACK VOWEL SIGN VOCALIC LL
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0030; # FALLBACK DIGIT ZERO
\ue067\ue066\ue066\ue066>\u0bF2;
\ue067\ue066\ue066>\u0bf1;
\ue067\ue066>\u0bF0;
\ue067>\u0be7; # DIGIT ONE
\ue068>\u0be8; # DIGIT TWO
\ue069>\u0be9; # DIGIT THREE
\ue06a>\u0bea; # DIGIT FOUR
\ue06b>\u0beb; # DIGIT FIVE
\ue06c>\u0bec; # DIGIT SIX
\ue06d>\u0bed; # DIGIT SEVEN
\ue06e>\u0bee; # DIGIT EIGHT
\ue06f>\u0bef; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0bc0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0bc0; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0bb5; # FALLBACK FOR ORIYA LETTER WA
1000 >\u0BF2; # NUMBER ONE THOUSAND
100 >\u0BF1; # NUMBER ONE HUNDRED
10 >\u0BF0; # NUMBER TEN
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,141 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Telugu
#:: NFD (NFC) ;
\ue001>\u0c01; # SIGN CANDRABINDU
\ue002>\u0c02; # SIGN ANUSVARA
\ue003>\u0c03; # SIGN VISARGA
\uE004>\u0c05; # FALLBACK TO LETTER A
\ue005>\u0c05; # LETTER A
\ue006>\u0c06; # LETTER AA
\ue007>\u0c07; # LETTER I
\ue008>\u0c08; # LETTER II
\ue009>\u0c09; # LETTER U
\ue00a>\u0c0a; # LETTER UU
\ue00b>\u0c0b; # LETTER VOCALIC R
\ue00c>\u0c0c; # LETTER VOCALIC L
\ue00d>\u0c0E; # FALLBACK MAPPING
\ue00e>\u0c0E; # LETTER E
\ue00f>\u0c0f; # LETTER EE
\ue010>\u0c10; # LETTER AI
\ue011>\u0c12; # FALBACK MAPPING
\ue012>\u0c12; # LETTER O
\ue013>\u0c13; # LETTER OO
\ue014>\u0c14; # LETTER AU
\ue015>\u0c15; # LETTER KA
\ue016>\u0c16; # LETTER KHA
\ue017>\u0c17; # LETTER GA
\ue018>\u0c18; # LETTER GHA
\ue019>\u0c19; # LETTER NGA
\ue01a>\u0c1a; # LETTER CA
\ue01b>\u0c1b; # LETTER CHA
\ue01c>\u0c1c; # LETTER JA
\ue01d>\u0c1d; # LETTER JHA
\ue01e>\u0c1e; # LETTER NYA
\ue01f>\u0c1f; # LETTER TTA
\ue020>\u0c20; # LETTER TTHA
\ue021>\u0c21; # LETTER DDA
\ue022>\u0c22; # LETTER DDHA
\ue023>\u0c23; # LETTER NNA
\ue024>\u0c24; # LETTER TA
\ue025>\u0c25; # LETTER THA
\ue026>\u0c26; # LETTER DA
\ue027>\u0c27; # LETTER DHA
\ue028>\u0c28; # LETTER NA
\ue029>\u0c28; # REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
\ue02a>\u0c2a; # LETTER PA
\ue02b>\u0c2b; # LETTER PHA
\ue02c>\u0c2c; # LETTER BA
\ue02d>\u0c2d; # LETTER BHA
\ue02e>\u0c2e; # LETTER MA
\ue02f>\u0c2f; # LETTER YA
\ue030\ue03c>\u0c31;
\ue030>\u0c30; # LETTER RA
\ue031>\u0c31; # LETTER RRA
\ue032>\u0c32; # LETTER LA
\ue033>\u0c33; # LETTER LLA
\ue034>\u0c33; # REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
\ue035>\u0c35; # LETTER VA
\ue036>\u0c36; # LETTER SHA
\ue037>\u0c37; # LETTER SSA
\ue038>\u0c38; # LETTER SA
\ue039>\u0c39; # LETTER HA
\ue03c>; # FALLBACK BLOW AWAY NUKTA
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
\ue03e>\u0c3e; # VOWEL SIGN AA
\ue03f>\u0c3f; # VOWEL SIGN I
\ue040>\u0c40; # VOWEL SIGN II
\ue041>\u0c41; # VOWEL SIGN U
\ue042>\u0c42; # VOWEL SIGN UU
\ue043>\u0c43; # VOWEL SIGN VOCALIC R
\ue044>\u0c44; # VOWEL SIGN VOCALIC RR
\ue045>\u0c46; # VOWEL SIGN CANDRA E>VOWEL SIGN E
\ue046>\u0c46; # VOWEL SIGN E
\ue047>\u0c47; # VOWEL SIGN EE
\ue048>\u0c48; # VOWEL SIGN AI
\ue049>\u0c4a; # REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
\ue04a>\u0c4a; # VOWEL SIGN O
\ue04b>\u0c4b; # VOWEL SIGN OO
\ue04c>\u0c4c; # VOWEL SIGN AU
\ue04d>\u0c4d; # SIGN VIRAMA
\ue050>\u0c13\u0c02; # REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>\u0c55; # LENGTH MARK
\ue056>\u0c56; # AI LENGTH MARK
\ue057>\u0c4c; # REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0c15; # REMAP
\ue059>\u0c16; # REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
\ue05a>\u0c17; # REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
\ue05b>\u0c1c; # REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
\ue05c>\u0c22; # REMAP
\ue05d>\u0c22; # REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
\ue05e>\u0c2b; # REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
\ue05f>\u0c2f; # REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
\ue060>\u0c60; # LETTER VOCALIC RR
\ue061>\u0c61; # LETTER VOCALIC LL
\ue062>\u0c3f; # REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
\ue063>\u0c40; # REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0c66; # DIGIT ZERO
\ue067>\u0c67; # DIGIT ONE
\ue068>\u0c68; # DIGIT TWO
\ue069>\u0c69; # DIGIT THREE
\ue06a>\u0c6a; # DIGIT FOUR
\ue06b>\u0c6b; # DIGIT FIVE
\ue06c>\u0c6c; # DIGIT SIX
\ue06d>\u0c6d; # DIGIT SEVEN
\ue06e>\u0c6e; # DIGIT EIGHT
\ue06f>\u0c6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0c30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0c30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0c35; # FALLBACK FOR ORIYA LETTER WA
0 > \u0c66; # FALLBACK FOR TAMIL
1 > \u0c67;
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,92 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Kannada-InterIndic
\u0CC6\u0CD5>\uE047; # VOWEL SIGN EE
\u0CC6\u0CCD\u0CD6>\uE048\ue04d; # VOWEL SIGN AI
\u0CC6\u0CD6>\uE048; # VOWEL SIGN AI
\u0CC6\u0CC2\u0CD5>\uE04B; # VOWEL SIGN OO
\u0CC6\u0CC2>\uE04A; # VOWEL SIGN O
\u0CBF\u0CD5>\uE040; # VOWEL SIGN II
\u0C82>\uE002; # SIGN ANUSVARA
\u0C83>\uE003; # SIGN VISARGA
\u0C85>\uE005; # LETTER A
\u0C86>\uE006; # LETTER AA
\u0C87>\uE007; # LETTER I
\u0C88>\uE008; # LETTER II
\u0C89>\uE009; # LETTER U
\u0C8A>\uE00A; # LETTER UU
\u0C8B>\uE00B; # LETTER VOCALIC R
\u0C8C>\uE00C; # LETTER VOCALIC L
\u0C8E>\uE00E; # LETTER E
\u0C8F>\uE00F; # LETTER EE
\u0C90>\uE010; # LETTER AI
\u0C92>\uE012; # LETTER O
\u0C93>\uE013; # LETTER OO
\u0C94>\uE014; # LETTER AU
\u0C95>\uE015; # LETTER KA
\u0C96>\uE016; # LETTER KHA
\u0C97>\uE017; # LETTER GA
\u0C98>\uE018; # LETTER GHA
\u0C99>\uE019; # LETTER NGA
\u0C9A>\uE01A; # LETTER CA
\u0C9B>\uE01B; # LETTER CHA
\u0C9C>\uE01C; # LETTER JA
\u0C9D>\uE01D; # LETTER JHA
\u0C9E>\uE01E; # LETTER NYA
\u0C9F>\uE01F; # LETTER TTA
\u0CA0>\uE020; # LETTER TTHA
\u0CA1>\uE021; # LETTER DDA
\u0CA2>\uE022; # LETTER DDHA
\u0CA3>\uE023; # LETTER NNA
\u0CA4>\uE024; # LETTER TA
\u0CA5>\uE025; # LETTER THA
\u0CA6>\uE026; # LETTER DA
\u0CA7>\uE027; # LETTER DHA
\u0CA8>\uE028; # LETTER NA
\u0CAA>\uE02A; # LETTER PA
\u0CAB>\uE02B; # LETTER PHA
\u0CAC>\uE02C; # LETTER BA
\u0CAD>\uE02D; # LETTER BHA
\u0CAE>\uE02E; # LETTER MA
\u0CAF>\uE02F; # LETTER YA
\u0CB0>\uE030; # LETTER RA
\u0CB1>\uE031; # LETTER RRA
\u0CB2>\uE032; # LETTER LA
\u0CB3>\uE033; # LETTER LLA
\u0CB5>\uE035; # LETTER VA
\u0CB6>\uE036; # LETTER SHA
\u0CB7>\uE037; # LETTER SSA
\u0CB8>\uE038; # LETTER SA
\u0CB9>\uE039; # LETTER HA
\u0CBC>\uE03C; # SIGN NUKTA
\u0CBD>\uE03D; # AVAGRAHA
\u0CBE>\uE03E; # VOWEL SIGN AA
\u0CBF>\uE03F; # VOWEL SIGN I
\u0CC1>\uE041; # VOWEL SIGN U
\u0CC2>\uE042; # VOWEL SIGN UU
\u0CC3>\uE043; # VOWEL SIGN VOCALIC R
\u0CC4>\uE044; # VOWEL SIGN VOCALIC RR
\u0CC6>\uE046; # VOWEL SIGN E
\u0CCC>\uE04C; # VOWEL SIGN AU
\u0CCD>\uE04D; # SIGN VIRAMA
\u0CD5>\uE055; # LENGTH MARK
\u0CD6>\uE056; # AI LENGTH MARK
\u0CDE>\uE034; # LETTER LLLA
\u0CE0>\uE060; # LETTER VOCALIC RR
\u0CE1>\uE061; # LETTER VOCALIC LL
\u0CE6>\uE066; # DIGIT ZERO
\u0CE7>\uE067; # DIGIT ONE
\u0CE8>\uE068; # DIGIT TWO
\u0CE9>\uE069; # DIGIT THREE
\u0CEA>\uE06A; # DIGIT FOUR
\u0CEB>\uE06B; # DIGIT FIVE
\u0CEC>\uE06C; # DIGIT SIX
\u0CED>\uE06D; # DIGIT SEVEN
\u0CEE>\uE06E; # DIGIT EIGHT
\u0CEF>\uE06F; # DIGIT NINE
# eof

View file

@ -0,0 +1,383 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Latin-InterIndic
#:: NFD;
#\u0e00 reserved
#consonants
$chandrabindu=\ue001;
$anusvara=\ue002;
$visarga=\ue003;
#\u0e004 reserved
# w<vowel> represents the stand-alone form
$wa=\ue005;
$waa=\ue006;
$wi=\ue007;
$wii=\ue008;
$wu=\ue009;
$wuu=\ue00a;
$wr=\ue00b;
$wl=\ue00c;
$wce=\ue00d; # LETTER CANDRA E
$wse=\ue00e; # LETTER SHORT E
$we=\ue00f; # \u090f LETTER E
$wai=\ue010;
$wco=\ue011; # LETTER CANDRA O
$wso=\ue012; # LETTER SHORT O
$wo=\ue013; # \u0913 LETTER O
$wau=\ue014;
$ka=\ue015;
$kha=\ue016;
$ga=\ue017;
$gha=\ue018;
$nga=\ue019;
$ca=\ue01a;
$cha=\ue01b;
$ja=\ue01c;
$jha=\ue01d;
$nya=\ue01e;
$tta=\ue01f;
$ttha=\ue020;
$dda=\ue021;
$ddha=\ue022;
$nna=\ue023;
$ta=\ue024;
$tha=\ue025;
$da=\ue026;
$dha=\ue027;
$na=\ue028;
$ena=\ue029; #compatibility
$pa=\ue02a;
$pha=\ue02b;
$ba=\ue02c;
$bha=\ue02d;
$ma=\ue02e;
$ya=\ue02f;
$ra=\ue030;
$rra=\ue031;
$la=\ue032;
$lla=\ue033;
$ela=\ue034; #compatibility
$va=\ue035;
$vva=\ue081;
$sha=\ue036;
$ssa=\ue037;
$sa=\ue038;
$ha=\ue039;
#\u093a Reserved
#\u093b Reserved
$nukta=\ue03c;
$avagraha=\ue03d; # SIGN AVAGRAHA
# <vowel> represents the dependent form
$aa=\ue03e;
$i=\ue03f;
$ii=\ue040;
$u=\ue041;
$uu=\ue042;
$rh=\ue043;
$lh=\ue044;
$ce=\ue045; #VOWEL SIGN CANDRA E
$se=\ue046; #VOWEL SIGN SHORT E
$e=\ue047;
$ai=\ue048;
$co=\ue049; # VOWEL SIGN CANDRA O
$so=\ue04a; # VOWEL SIGN SHORT O
$o=\ue04b; # \u094b
$au=\ue04c;
$virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
$om = \ue050; # OM
# \u0951>; # UNMAPPED STRESS SIGN UDATTA
# \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
# \u0953>; # UNMAPPED GRAVE ACCENT
# \u0954>; # UNMAPPED ACUTE ACCENT
$lm = \ue055;# Telugu Length Mark
$ailm=\ue056;# AI Length Mark
$aulm=\ue057;# AU Length Mark
#urdu compatibity forms
$uka=\ue058;
$ukha=\ue059;
$ugha=\ue05a;
$ujha=\ue05b;
$uddha=\ue05c;
$udha=\ue05d;
$ufa=\ue05e;
$uya=\ue05f;
$wrr=\ue060;
$wll=\ue061;
$rrh=\ue062;
$llh=\ue063;
$danda=\ue064;
$doubleDanda=\ue065;
$zero=\ue066; # DIGIT ZERO
$one=\ue067; # DIGIT ONE
$two=\ue068; # DIGIT TWO
$three=\ue069; # DIGIT THREE
$four=\ue06a; # DIGIT FOUR
$five=\ue06b; # DIGIT FIVE
$six=\ue06c; # DIGIT SIX
$seven=\ue06d; # DIGIT SEVEN
$eight=\ue06e; # DIGIT EIGHT
$nine=\ue06f; # DIGIT NINE
# For all other scripts
$ecp0=\ue070;
$ecp1=\ue071;
$ecp2=\ue072;
$ecp3=\ue073;
$ecp4=\ue074;
$ecp5=\ue075;
$ecp6=\ue076;
$ecp7=\ue077;
$ecp8=\ue078;
$ecp9=\ue079;
$ecpA=\ue07a;
$ecpB=\ue07b;
$ecpC=\ue07c;
$ecpD=\ue07d;
$ecpE=\ue07e;
$ecpF=\ue07f;
# \u0970>; # UNMAPPED ABBREVIATION SIGN
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
$depVowelBelow=[\ue041-\ue044];
$endThing=[$danda$doubleDanda];
# $x was originally called '&'; $z was '%'
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
\u0315 > $avagraha;
\u0303>$chandrabindu$anusvara;
m\u0310>$chandrabindu;
h\u0323>$visarga;
x>$ka$virama$sa;
# convert to independent forms at start of word or syllable:
# dependent forms for roundtrip
\u0314a\u0304>$aa;
\u0314ai>$ai;
\u0314au>$au;
\u0314ii>$ii;
\u0314i\u0304>$ii;
\u0314i>$i;
\u0314u\u0304>$uu;
\u0314u>$u;
\u0314r\u0325\u0304>$rrh;
\u0314r\u0325>$rh;
\u0314l\u0325\u0304>$llh;
\u0314lh>$lh;
\u0314l\u0325>$lh;
\u0314e\u0304>$e;
\u0314o\u0304>$o;
\u0314a>;
\u0314e\u0306>$ce;
\u0314o\u0306>$co;
\u0314e>$se;
\u0314o>$so;
# preceeded by consonants
$consonants{ a\u0304>$aa;
$consonants{ ai>$ai;
$consonants{ au>$au;
$consonants{ ii>$ii;
$consonants{ i\u0304>$ii;
$consonants{ i>$i;
$consonants{ u\u0304>$uu;
$consonants{ u>$u;
$consonants{ r\u0325\u0304>$rrh;
$consonants{ r\u0325a>$rh;
$consonants{ r\u0325>$rh;
$consonants{ l\u0325\u0304>$llh;
$consonants{ lh>$lh;
$consonants{ l\u0325>$lh;
$consonants{ e\u0304>$e;
$consonants{ o\u0304>$o;
$consonants{ e\u0306>$ce;
$consonants{ o\u0306>$co;
$consonants{ e>$se;
$consonants{ o>$so;
# e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
a\u0304>$waa;
ai>$wai;
au>$wau;
i\u0304>$wii;
i>$wi;
u\u0304>$wuu;
u>$wu;
r\u0325\u0304>$wrr;
r\u0325>$wr;
l\u0325\u0304>$wll;
lh>$wl;
l\u0325>$wl;
e\u0304>$we;
o\u0304>$wo;
a>$wa;
e\u0306>$wce;
o\u0306>$wco;
e>$wse;
''om>$om;
o>$wso;
# rules for anusvara
n}r\u0325 > $na|$virama;
n}l\u0325 > $na|$virama;
n}na > $na|$virama;
n\u0307}[kg] > $anusvara;
n\u0307}n\u0307 > $anusvara;
n\u0304}[cj] > $anusvara;
n\u0304}n\u0303 > $anusvara;
n\u0323}[tdn]\u0323 > $anusvara;
n}[tdn] > $anusvara;
m}[pbm] > $anusvara;
n}[ylvshr] > $anusvara;
m\u0307 > $anusvara;
#urdu compatibility
q>$uka|$virama;
k\u0331h\u0331>$ukha |$virama;
g\u0307> $ugha | $virama;
z > $ujha |$virama;
f > $ufa|$virama;
# dev
y\u0307>$uya|$virama;
l\u0331>$ela|$virama;
n\u0331>$ena|$virama;
n\u0307>$nga|$virama;
n\u0303>$nya|$virama;
n\u0323>$nna|$virama;
t\u0323h>$ttha|$virama;
t\u0323>$tta|$virama;
r\u0323h>$udha|$virama;
r\u0323>$uddha|$virama;
d\u0323h>$ddha|$virama;
d\u0323>$dda|$virama;
kh>$kha|$virama;
k>$ka|$virama;
gh>$gha|$virama;
g>$ga|$virama;
ch>$cha|$virama;
c>$ca|$virama;
jh>$jha|$virama;
j>$ja|$virama;
ny>$nya|$virama;
tth>$ttha|$virama;
ddh>$ddha|$virama;
th>$tha|$virama;
t>$ta|$virama;
dh>$dha|$virama;
d>$da|$virama;
n>$na|$virama;
ph>$pha|$virama;
p>$pa|$virama;
bh>$bha|$virama;
b>$ba|$virama;
m>$ma|$virama;
y>$ya|$virama;
r\u0331>$rra|$virama;
r>$ra|$virama;
l\u0323>$lla|$virama;
l>$la|$virama;
v>$va|$virama;
w\u0307>$vva|$virama;
w>$va|$virama;
sh>$sha|$virama;
ss>$ssa|$virama;
s\u0323>$ssa|$virama;
s\u0301>$sha|$virama;
s>$sa|$virama;
h>$ha|$virama;
'.'>$danda;
$danda'.'>$doubleDanda;
$depVowelAbove{'~'>$anusvara;
$depVowelBelow{'~'>$chandrabindu;
# convert to dependent forms after consonant with no vowel:
# e.g. kai -> {ka}{virama}ai -> {ka}{ai}
#$virama aa>$aa;
$virama a\u0304>$aa;
$virama ai>$ai;
$virama au>$au;
$virama ii>$ii;
$virama i\u0304>$ii;
$virama i>$i;
#$virama uu>$uu;
$virama u\u0304>$uu;
$virama u>$u;
#$virama rrh>$rrh;
$virama r\u0325\u0304>$rrh;
#$virama rh>$rh;
$virama r\u0325a>$rh;
$virama r\u0325>$rh;
$virama l\u0325\u0304>$llh;
$virama lh>$lh;
$virama l\u0325>$lh;
$virama e\u0304>$e;
$virama o\u0304>$o;
$virama a>;
$virama e\u0306>$ce;
$virama o\u0306>$co;
$virama e>$se;
$virama o>$so;
# otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
#$virama''aa>$waa;
$virama''a\u0304>$waa;
$virama''ai>$wai;
$virama''au>$wau;
#$virama''ii>$wii;
$virama''i\u0304>$wii;
$virama''i>$wi;
#$virama''uu>$wuu;
$virama''u\u0304>$wuu;
$virama''u>$wu;
#$virama''rrh>$wrr;
$virama''r\u0325\u0304>$wrr;
#$virama''rh>$wr;
$virama''r\u0325>$wr;
$virama''l\u0325\u0304>$wll;
#$virama''lh>$wl;
$virama''l\u0325>$wl;
$virama''e\u0304>$we;
$virama''o\u0304>$wo;
$virama''a>$wa;
$virama''e\u0306>$wce;
$virama''o\u0306>$wco;
$virama''e>$wse;
$virama''o>$wso;
# no virama
''a\u0304>$waa;
''ai>$wai;
''au>$wau;
''i\u0304>$wii;
''i>$wi;
''u\u0304>$wuu;
''u>$wu;
''r\u0325\u0304>$wrr;
''r\u0325>$wr;
''l\u0325\u0304>$wll;
''l\u0325>$wl;
''e\u0304>$we;
''o\u0304>$wo;
''a>$wa;
''e\u0306>$wce;
''o\u0306>$wco;
''e>$wse;
''o>$wso;
$virama } [$z] > $virama;
$virama } ' ' > $virama ;
$virama}$endThing>;
0>$zero;
1>$one;
2>$two;
3>$three;
4>$four;
5>$five;
6>$six;
7>$seven;
8>$eight;
9>$nine;
''>;
#:: NFC (NFD) ;

View file

@ -0,0 +1,522 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
#- the INDEX file. This transliterator is, by itself, not
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
#- inverses thereof.
# Transliteration from Latin characters to Korean script is done in
# two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
# transliteration is done algorithmically following Unicode 3.0
# section 3.11. This file implements the Latin to Jamo
# transliteration using rules.
# Jamo occupy the block 1100-11FF. Within this block there are three
# groups of characters: initial consonants or choseong (I), medial
# vowels or jungseong (M), and trailing consonants or jongseong (F).
# Standard Korean syllables are of the form I+M+F*.
# Section 3.11 describes the use of 'filler' jamo to convert
# nonstandard syllables to standard form: the choseong filler 115F and
# the junseong filler 1160. In this transliterator, we will not use
# 115F or 1160.
# We will, however, insert two 'null' jamo to make foreign words
# conform to Korean syllable structure. These are the null initial
# consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
# we will use the separator in order to disambiguate strings,
# e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
# We will not use all of the characters in the jamo block. We will
# only use the 19 initials, 21 medials, and 27 finals possessing a
# jamo short name as defined in section 4.4 of the Unicode book.
# Rules of thumb. These guidelines provide the basic framework
# for the rules. They are phrased in terms of Latin-Jamo transliteration.
# The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
# just context-free transliteration of jamo to corresponding short names,
# with the addition of separators to maintain round-trip integrity
# in the context of the Latin-Jamo rules.
# A sequence of vowels:
# - Take the longest sequence you can. If there are too many, or you don't
# have a starting consonant, introduce a 110B necessary.
# A sequence of consonants.
# - First join the double consonants: G + G -> GG
# - In the remaining list,
# -- If there is no preceding vowel, take the first consonant, and insert EU
# after it. Continue with the rest of the consonants.
# -- If there is one consonant, attach to the following vowel
# -- If there are two consonants and a following vowel, attach one to the
# preceeding vowel, and one to the following vowel.
# -- If there are more than two consonants, join the first two together if you
# can: L + G => LG
# -- If you still end up with more than 2 consonants, insert EU after the
# first one, and continue with the rest of the consonants.
#----------------------------------------------------------------------
# Variables
# Some latin consonants or consonant pairs only occur as initials, and
# some only as finals, but some occur as both. This makes some jamo
# consonants ambiguous when transliterated into latin.
# Initial only: IEUNG BB DD JJ R
# Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
# Initial and Final: B C D G GG H J K M N P S SS T
$Gi = \u1100;
$GGi = \u1101;
$Ni = \u1102;
$Di = \u1103;
$DD = \u1104;
$R = \u1105;
$Mi = \u1106;
$Bi = \u1107;
$BB = \u1108;
$Si = \u1109;
$SSi = \u110A;
$IEUNG = \u110B; # null initial, inserted during Latin-Jamo
$Ji = \u110C;
$JJ = \u110D;
$Ci = \u110E;
$Ki = \u110F;
$Ti = \u1110;
$Pi = \u1111;
$Hi = \u1112;
$A = \u1161;
$AE = \u1162;
$YA = \u1163;
$YAE = \u1164;
$EO = \u1165;
$E = \u1166;
$YEO = \u1167;
$YE = \u1168;
$O = \u1169;
$WA = \u116A;
$WAE = \u116B;
$OE = \u116C;
$YO = \u116D;
$U = \u116E;
$WEO = \u116F;
$WE = \u1170;
$WI = \u1171;
$YU = \u1172;
$EU = \u1173; # null medial, inserted during Latin-Jamo
$YI = \u1174;
$I = \u1175;
$Gf = \u11A8;
$GGf = \u11A9;
$GS = \u11AA;
$Nf = \u11AB;
$NJ = \u11AC;
$NH = \u11AD;
$Df = \u11AE;
$L = \u11AF;
$LG = \u11B0;
$LM = \u11B1;
$LB = \u11B2;
$LS = \u11B3;
$LT = \u11B4;
$LP = \u11B5;
$LH = \u11B6;
$Mf = \u11B7;
$Bf = \u11B8;
$BS = \u11B9;
$Sf = \u11BA;
$SSf = \u11BB;
$NG = \u11BC;
$Jf = \u11BD;
$Cf = \u11BE;
$Kf = \u11BF;
$Tf = \u11C0;
$Pf = \u11C1;
$Hf = \u11C2;
$jamoInitial = [\u1100-\u1112];
$jamoMedial = [\u1161-\u1175];
$latinInitial = [bcdghjkmnprst];
# Any character in the latin transliteration of a medial
$latinMedial = [aeiouwy];
# The last character of the latin transliteration of a medial
$latinMedialEnd = [aeiou];
# Disambiguation separator
$sep = \';
#----------------------------------------------------------------------
# Jamo-Latin
# Jamo to latin is relatively simple, since it is the latin that is
# ambiguous. Most rules are straightforward, and we encode them below
# as simple add-on back rule, e.g.:
# $jamoMedial {bs} > $BS;
# becomes
# $jamoMedial {bs} <> $BS;
# Furthermore, we don't care about the ordering for Jamo-Latin because
# we are going from single characters, so we can very easily piggyback
# on the Latin-Jamo.
# The main issue with Jamo-Latin is when to insert separators.
# Separators are inserted to obtain correct round trip behavior. For
# example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
# would then round trip to Ki A GGi E. To prevent this, we insert a
# separator: "kag-ge". IMPORTANT: The need for separators depends
# very specifically on the behavior of the Latin-Jamo rules. A change
# in the Latin-Jamo behavior can completely change the way the
# separator insertion must be done.
# First try to preserve actual separators in the jamo text by doubling
# them. This fixes problems like:
# (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
# => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
# -- if we don't care about losing separators in the jamo, we can delete
# this rule.
$sep $sep <> $sep;
# Triple consonants. For three consonants "axxx" we insert a
# separator between the first and second "x" if XXf, Xf, and Xi all
# exist, and we have A Xf XXi. This prevents the reverse
# transliteration to A XXf Xi.
$sep < $latinMedialEnd g {} $GGi;
$sep < $latinMedialEnd s {} $SSi;
# For vowels the rule is similar. If there is a vowel "ae" such that
# "a" by itself and "e" by itself are vowels, then we want to map A E
# to "a-e" so as not to round trip to AE. However, in the text Ki EO
# IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
# vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
# tested. NOTE: These rules used to have a left context of
# $latinInitial instead of [^$latinMedial]. The problem with this is
# sequences where an initial IEUNG is transliterated away:
# (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
$sep < [^$latinMedial] [y w] e {} [$O $OE];
$sep < [^$latinMedial] e {} [$O $OE $U];
$sep < [^$latinMedial] [o a] {} [$E $EO $EU];
$sep < [^$latinMedial] [w y] a {} [$E $EO $EU];
# Similar to the above, but with an intervening $IEUNG.
$sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];
$sep < [^$latinMedial] e {} $IEUNG [$O $OE $U];
$sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];
$sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];
# Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
# where Xi also exists, must be transliterated as "ax-e" to prevent
# the round trip conversion to A Xi E.
$sep < $latinMedialEnd b {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd c {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd d {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd h {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd j {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd k {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd m {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd p {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd t {} $IEUNG $jamoMedial;
# Double finals followed by IEUNG. Similar to the single finals
# followed by IEUNG. Any latin consonant pair X Y, between medials,
# that we would split by Latin-Jamo, we must handle when it occurs as
# part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
# E.
$sep < $latinMedialEnd b s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd g g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd g s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l b {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l h {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l m {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l p {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l t {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n h {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n j {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd s s {} $IEUNG $jamoMedial;
# Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
# we transliterate as "ax-xe" to prevent round trip transliteration as
# A XXi E.
$sep < $latinMedialEnd b {} $Bi $jamoMedial;
$sep < $latinMedialEnd d {} $Di $jamoMedial;
$sep < $latinMedialEnd j {} $Ji $jamoMedial;
$sep < $latinMedialEnd g {} $Gi $jamoMedial;
$sep < $latinMedialEnd s {} $Si $jamoMedial;
# XYY. This corresponds to the XYY rule in Latin-Jamo. By default
# Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
# "xyy" forms that correspond to XYf Yi must be transliterated as
# "xy-y".
$sep < $latinMedialEnd b s {} [$Si $SSi];
$sep < $latinMedialEnd g s {} [$Si $SSi];
$sep < $latinMedialEnd l b {} [$Bi $BB];
$sep < $latinMedialEnd l g {} [$Gi $GGi];
$sep < $latinMedialEnd l s {} [$Si $SSi];
$sep < $latinMedialEnd n g {} [$Gi $GGi];
$sep < $latinMedialEnd n j {} [$Ji $JJ];
# Deletion of IEUNG is handled below.
#----------------------------------------------------------------------
# Latin-Jamo
# [Basic, context-free Jamo-Latin rules are embedded here too. See
# above.]
# Split digraphs: Text of the form 'axye', where 'xy' is a final
# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
# 'e' are medials, we want to transliterate this as A Xf Yi E rather
# than A XYf IEUNG E. We do NOT include text of the form "axxe",
# since that is handled differently below. These rules are generated
# programmatically from the jamo data.
$jamoMedial {b s} $latinMedial > $Bf $Si;
$jamoMedial {g s} $latinMedial > $Gf $Si;
$jamoMedial {l b} $latinMedial > $L $Bi;
$jamoMedial {l g} $latinMedial > $L $Gi;
$jamoMedial {l h} $latinMedial > $L $Hi;
$jamoMedial {l m} $latinMedial > $L $Mi;
$jamoMedial {l p} $latinMedial > $L $Pi;
$jamoMedial {l s} $latinMedial > $L $Si;
$jamoMedial {l t} $latinMedial > $L $Ti;
$jamoMedial {n g} $latinMedial > $Nf $Gi;
$jamoMedial {n h} $latinMedial > $Nf $Hi;
$jamoMedial {n j} $latinMedial > $Nf $Ji;
# Single consonants are initials: Text of the form 'axe', where 'x'
# can be an initial or a final, and 'a' and 'e' are medials, we want
# to transliterate as A Xi E rather than A Xf IEUNG E.
$jamoMedial {b} $latinMedial > $Bi;
$jamoMedial {c} $latinMedial > $Ci;
$jamoMedial {d} $latinMedial > $Di;
$jamoMedial {g} $latinMedial > $Gi;
$jamoMedial {h} $latinMedial > $Hi;
$jamoMedial {j} $latinMedial > $Ji;
$jamoMedial {k} $latinMedial > $Ki;
$jamoMedial {m} $latinMedial > $Mi;
$jamoMedial {n} $latinMedial > $Ni;
$jamoMedial {p} $latinMedial > $Pi;
$jamoMedial {s} $latinMedial > $Si;
$jamoMedial {t} $latinMedial > $Ti;
# Doubled initials. The sequence "axxe", where XX exists as an initial
# (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
# to transliterate as A XXi E, rather than split to A Xf Xi E.
$jamoMedial {b b} $latinMedial > $BB;
$jamoMedial {d d} $latinMedial > $DD;
$jamoMedial {j j} $latinMedial > $JJ;
$jamoMedial {g g} $latinMedial > $GGi;
$jamoMedial {s s} $latinMedial > $SSi;
# XYY. Because doubled consonants bind more strongly than XY
# consonants, we must handle the sequence "axyy" specially. Here XYf
# and YYi must exist. In these cases, we map to Xf YYi rather than
# XYf.
$jamoMedial {b} s s > $Bf;
$jamoMedial {g} s s > $Gf;
$jamoMedial {l} b b > $L;
$jamoMedial {l} g g > $L;
$jamoMedial {l} s s > $L;
$jamoMedial {n} g g > $Nf;
$jamoMedial {n} j j > $Nf;
# Finals: Attach consonant with preceding medial to preceding medial.
# Do this BEFORE mapping consonants to initials. Longer keys must
# precede shorter keys that they start with, e.g., the rule for 'bs'
# must precede 'b'.
# [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
# block for Jamo-Latin.]
$jamoMedial {bs} <> $BS;
$jamoMedial {b} <> $Bf;
$jamoMedial {c} <> $Cf;
$jamoMedial {d} <> $Df;
$jamoMedial {gg} <> $GGf;
$jamoMedial {gs} <> $GS;
$jamoMedial {g} <> $Gf;
$jamoMedial {h} <> $Hf;
$jamoMedial {j} <> $Jf;
$jamoMedial {k} <> $Kf;
$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;
$jamoMedial {lh} <> $LH;
$jamoMedial {lm} <> $LM;
$jamoMedial {lp} <> $LP;
$jamoMedial {ls} <> $LS;
$jamoMedial {lt} <> $LT;
$jamoMedial {l} <> $L;
$jamoMedial {m} <> $Mf;
$jamoMedial {ng} <> $NG;
$jamoMedial {nh} <> $NH;
$jamoMedial {nj} <> $NJ;
$jamoMedial {n} <> $Nf;
$jamoMedial {p} <> $Pf;
$jamoMedial {ss} <> $SSf;
$jamoMedial {s} <> $Sf;
$jamoMedial {t} <> $Tf;
# Initials: Attach single consonant to following medial. Do this
# AFTER mapping finals. Longer keys must precede shorter keys that
# they start with, e.g., the rule for 'gg' must precede 'g'.
# [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
# this block for Jamo-Latin.]
{gg} $latinMedial <> $GGi;
{g} $latinMedial <> $Gi;
{n} $latinMedial <> $Ni;
{dd} $latinMedial <> $DD;
{d} $latinMedial <> $Di;
{r} $latinMedial <> $R;
{m} $latinMedial <> $Mi;
{bb} $latinMedial <> $BB;
{b} $latinMedial <> $Bi;
{ss} $latinMedial <> $SSi;
{s} $latinMedial <> $Si;
{jj} $latinMedial <> $JJ;
{j} $latinMedial <> $Ji;
{c} $latinMedial <> $Ci;
{k} $latinMedial <> $Ki;
{t} $latinMedial <> $Ti;
{p} $latinMedial <> $Pi;
{h} $latinMedial <> $Hi;
# 'r' in final position. Because of the equivalency of the 'l' and
# 'r' jamo (the glyphs are the same), we try to provide the same
# equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
# below. If we see an 'r' in an apparent final position, treat it
# like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
# Instead, we want Ki A L Ki A.
$jamoMedial {r} $latinInitial > | l;
# Initial + Final: If we match the next rule, we have initial then
# final consonant with no intervening medial. We insert the null
# vowel BEFORE it to create a well-formed syllable. (In the next rule
# we insert a null vowel AFTER an anomalous initial.)
$jamoInitial {} [bcdghjklmnpst] > $EU;
# Initial + X: This block matches an initial consonant not followed by
# a medial. We insert the null vowel after it. We handle double
# initials explicitly here; for single initial consonants we insert EU
# (as Latin) after them and let standard rules do the rest.
# BREAKS ROUND TRIP INTEGRITY
gg > $GGi $EU;
dd > $DD $EU;
bb > $BB $EU;
ss > $SSi $EU;
jj > $JJ $EU;
([bcdghjkmnprst]) > | $1 eu;
# X + Final: Finally we have to deal with a consonant that can only be
# interpreted as a final (not an initial) and which is preceded
# neither by an initial nor a medial. It is the start of the
# syllable, but cannot be. Most of these will already be handled by
# the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
# 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
# For this isolated case, we could add a null initial and medial,
# which would give "la" => IEUNG EU L IEUNG A, for example. A more
# economical solution is to transliterate isolated "l" (that is,
# initial "l") to "r". (Other similar conversions of consonants that
# occur neither as initials nor as finals are handled below.)
l > | r;
# Medials. If a medial is preceded by an initial, then we proceed
# normally. As usual, longer keys must precede shorter ones.
# [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
# this block for Jamo-Latin.]
$jamoInitial {ae} <> $AE;
$jamoInitial {a} <> $A;
$jamoInitial {eo} <> $EO;
$jamoInitial {eu} <> $EU;
$jamoInitial {e} <> $E;
$jamoInitial {i} <> $I;
$jamoInitial {oe} <> $OE;
$jamoInitial {o} <> $O;
$jamoInitial {u} <> $U;
$jamoInitial {wae} <> $WAE;
$jamoInitial {wa} <> $WA;
$jamoInitial {weo} <> $WEO;
$jamoInitial {we} <> $WE;
$jamoInitial {wi} <> $WI;
$jamoInitial {yae} <> $YAE;
$jamoInitial {ya} <> $YA;
$jamoInitial {yeo} <> $YEO;
$jamoInitial {ye} <> $YE;
$jamoInitial {yi} <> $YI;
$jamoInitial {yo} <> $YO;
$jamoInitial {yu} <> $YU;
# We may see an anomalous isolated 'w' or 'y'. In that case, we
# interpret it as 'wi' and 'yu', respectively.
# BREAKS ROUND TRIP INTEGRITY
$jamoInitial {w} > | wi;
$jamoInitial {y} > | yu;
# Otherwise, insert a null consonant IEUNG before the medial (which is
# still an untransliterated latin vowel).
($latinMedial) > $IEUNG | $1;
# Convert non-jamo latin consonants to equivalents. These occur as
# neither initials nor finals in jamo. 'l' occurs as a final, but not
# an initial; it is handled above. The following letters (left hand
# side) will never be output by Jamo-Latin.
f > | p;
q > | k;
v > | b;
x > | ks;
z > | s;
# Delete separators (Latin-Jamo).
$sep > ;
# Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
# since these may also occur in text.
< $IEUNG;
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
#- the INDEX file. This transliterator is, by itself, not
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
#- inverses thereof.
# eof

View file

@ -0,0 +1,495 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# note: a global filter is more efficient, but MUST include all source chars
#:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ;
# MINIMAL FILTER GENERATED FOR: Latin-Katakana
### WARNING -- must add width filter, both here and below!!! ###
:: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;
:: [:Latin:] fullwidth-halfwidth ();
:: NFD (NFC);
:: Lower (); # whenever transliterating from cased to uncased script, include this
# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
# Uses modified Hepburn. Small changes to make unambiguous.
# | Kunrei-shiki: Hepburn/MHepburn
# | ------------------------------
# | si: shi
# | si ~ya: sha
# | si ~yu: shu
# | si ~yo: sho
# | zi: ji
# | zi ~ya: ja
# | zi ~yu: ju
# | zi ~yo: jo
# | ti: chi
# | ti ~ya: cha
# | ti ~yu: chu
# | ti ~yu: cho
# | tu: tsu
# | di: ji/dji
# | du: zu/dzu
# | hu: fu
# | For foreign words:
# | -----------------
# | se ~i si
# | si ~e she
# |
# | ze ~i zi
# | zi ~e je
# |
# | te ~i ti
# | ti ~e che
# | te ~u tu
# |
# | de ~i di
# | de ~u du
# | de ~i di
# |
# | he ~u: hu
# | hu ~a fa
# | hu ~i fi
# | hu ~e he
# | hu ~o ho
# Most small forms are generated, but if necessary
# explicit small forms are given with ~a, ~ya, etc.
#------------------------------------------------------
# Variables
$vowel = [aeiou] ;
$consonant = [bcdfghjklmnpqrstvwxyz] ;
$macron = \u0304 ;
# Variables used for doubled-consonants with tsu
$kana = [\u3041-\u3094] ;
$voice = [\u3099\u309B];
$semivoice = [\u309A\u309C];
$k_start = [カキクケコかきくけこ] ;
$s_start = [サシスセソさしすせそ] ;
$j_start = [シし] $voice ;
$t_start = [タチツテトたちつてと] ;
$n_start = [ナニヌネノンなにぬねの] ;
$h_start = [ハヒヘホはひへほ] ;
$f_start = [フふ] ;
$m_start = [マミムメモまみむめも] ;
$y_start = [ヤユヨやゆよ] ;
$r_start = [ラリルレロらりるれろ] ;
$w_start = [ワヰヱヲわゐゑを] ;
$v_start = [ワヰヱヲ]゙ ;
# if ン is followed by $n_quoter, then it needs an
# apostrophe after its romaji form to disambiguate it.
# e.g., ン ア ! = ナ, so represent as "n'a", not "na".
$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ヤ ユ ヨ ン] ;
$small_y = [ャィュェョ] ;
$iteration = \u309D ;
#------------------------------------------------------
# katakana rules
# Punctuation
'.' <> 。;
',' <> 、;
# ' ' } [a-z] > ; # delete spaces before latin
# ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
# Iteration Mark
# Copy previous letter & marks
# TODO
# | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
# Specials for katakana -- not shared with hiragana
va <> ヷ ;
vi <> ヸ ;
ve <> ヹ ;
vo <> ヺ ;
'~ka' <> ヵ ;
'~ke' <> ヶ ;
# ~~~ begin shared rules ~~~
#special
ya < '~'ャ;
yi < '~'ィ ;
yu < '~'ュ;
ye < '~'ェ;
yo < '~'ョ;
#normal
a <> ア ;
b | '~' < ヒ ゙} $small_y ;
by } $vowel > ビ | '~y' ;
ba <> バ ;
bi <> ビ ;
bu <> ブ ;
be <> ベ ;
bo <> ボ ;
c } i > | s ;
c } e > | s ;
da <> ダ ;
di <> ディ ;
du <> デゥ ;
de <> デ ;
do <> ド ;
dzu <> ヅ ;
dja < ヂャ ;
dji'~i' < ヂィ ; # liu
dju < ヂュ ;
dje < ヂェ ;
djo < ヂョ ;
dji <> ヂ ;
dj } $vowel > ヂ | '~y' ;
# TODO: QUESTION: use ĵĴżŻ instead of dj, dz
cha < チャ ;
chi'~i' < チィ ; # liu
chu < チュ ;
che < チェ ;
cho < チョ ;
chi <> チ ;
ch } $vowel > チ | '~y' ;
e <> エ ;
g | '~' < ギ} $small_y ;
gy } $vowel > ギ | '~y' ;
ga <> ガ ;
gi <> ギ ;
gu <> グ ;
ge <> ゲ ;
go <> ゴ ;
i <> イ ;
# j } $vowel > ジ | '~y' ;
ja <> ジャ ;
ji'~i' < ジィ ; # liu
ju <> ジュ ;
je <> ジェ ;
jo <> ジョ ;
ji <> ジ ;
k | '~' < キ} $small_y ;
ky } $vowel > キ | '~y' ;
ka <> カ ;
ki <> キ ;
ku <> ク ;
ke <> ケ ;
ko <> コ ;
m | '~' < ミ} $small_y ;
my } $vowel > ミ | '~y' ;
ma <> マ ;
mi <> ミ ;
mu <> ム ;
me <> メ ;
mo <> モ ;
m } [pbfv] > ン ;
n | '~' < ニ } $small_y ;
ny } $vowel > ニ | '~y' ;
na <> ナ ;
ni <> ニ ;
nu <> ヌ ;
ne <> ネ ;
no <> ;
o <> オ ;
p | '~' < ピ } $small_y ;
py } $vowel > ピ | '~y' ;
pa <> パ ;
pi <> ピ ;
pu <> プ ;
pe <> ペ ;
po <> ポ ;
h | '~' < ヒ } $small_y ;
hy } $vowel > ヒ | '~y' ;
ha <> ハ ;
hi <> ヒ ;
hu <> ヘゥ ;
he <> ヘ ;
ho <> ホ ;
# f | '~' < フ } $small_y ;
# f } $vowel > フ | '~' ;
fa <> ファ ;
fi <> フィ ;
fe <> フェ ;
fo <> フォ ;
fu <> フ ;
r | '~' < リ } $small_y ;
ry } $vowel > リ | '~y' ;
ra <> ラ ;
ri <> リ ;
ru <> ル ;
re <> レ ;
ro <> ロ ;
za <> ザ ;
zi <> ゼィ ;
zu <> ズ ;
ze <> ゼ ;
zo <> ゾ ;
sa <> サ ;
si <> セィ ;
su <> ス ;
se <> セ ;
so <> ソ ;
sha < シャ ;
shi'~i' < シィ ; # liu
shu < シュ ;
she < シェ ;
sho < ショ ;
shi <> シ ;
sh } $vowel > シ | '~y' ;
ta <> タ ;
ti <> ティ ;
tu <> テゥ ;
te <> テ ;
to <> ト ;
tsu <> ツ ;
# v } $vowel > ヴ | '~' ;
#'v~a' < ヴァ ; # liu
#'v~i' < ヴィ ; # liu
#'v~e' < ヴェ ; # liu
#'v~o' < ヴォ ; # liu
vu <> ヴ ;
u <> ウ ;
# w } $vowel > ウ | '~' ;
wa <> ワ ;
wi <> ヰ ;
wu > ウ ;
we <> ヱ ;
wo <> ヲ ;
ya <> ヤ ;
yi > イ ;
yu <> ユ ;
ye > エ ;
yo <> ヨ ;
# double consonants
#specials
s } sh > ッ ;
t } ch > ッ ;
#voiced
j } j <> ッ } $j_start ;
b } b <> ッ } [$h_start$f_start] $voice;
d } d <> ッ } $t_start $voice;
g } g <> ッ } $k_start $voice;
p } p <> ッ } [$h_start$f_start] $semivoice;
# v } v <> ッ } [ワヰウヱヲう] $voice ;
z } z <> ッ } $s_start $voice;
v } v <> ッ } $v_start;
# normal
k } k <> ッ } $k_start ;
m } m <> ッ } $m_start ;
n } n <> ッ } $n_start ;
h } h <> ッ } $h_start ;
f } f <> ッ } $f_start ;
r } r <> ッ } $r_start ;
t } t <> ッ } $t_start ;
s } s <> ッ } $s_start ;
w } w <> ッ } $w_start;
y } y <> ッ } $y_start;
# completeness
x } x > ッ ;
c } k > ッ ;
c } c > ッ ;
c } q > ッ ;
l } l > ッ ;
q } q > ッ ;
# y } y > ッ ;
# w } w > ッ ;
# prolonged vowel mark. this indicates a doubling of
# the preceding vowel sound
#a < a { ー ; # liu
#e < e { ー ; # liu
#i < i { ー ; # liu
#o < o { ー ; # liu
#u < u { ー ; # liu
$macron <> ー ;
# small forms
'~a' <> ァ ;
'~i' <> ィ ;
'~u' <> ゥ ;
'~e' <> ェ ;
'~o' <> ォ ;
'~tsu' <> ッ ;
'~wa' <> ヮ ;
'~ya' <> ャ ;
'~yi' > ィ ;
'~yu' <> ュ ;
'~ye' > ェ ;
'~yo' <> ョ ;
# iteration marks
# TODO: make more accurate
j $1 < sh (y* $vowel) {ヽ$voice ;
dj $1 < ch (y* $vowel) {ヽ$voice ;
dz $1 < ts (y* $vowel) {ヽ$voice ;
g $1 < k (y* $vowel) {ヽ$voice ;
z $1 < s (y* $vowel) {ヽ$voice ;
d $1 < t (y* $vowel) {ヽ$voice ;
h $1 < b (y* $vowel) {ヽ$voice ;
v $1 < w (y* $vowel) {ヽ$voice ;
sh $1 < sh (y* $vowel) {ヽ$voice ;
j $1 < j (y* $vowel) {ヽ$voice ;
ch $1 < ch (y* $vowel) {ヽ$voice ;
dj $1 < dj(y* $vowel) {ヽ$voice ;
ts $1 < ts (y* $vowel) {ヽ$voice ;
dz $1 < dz (y* $vowel) {ヽ$voice ;
$1 < ($consonant y* $vowel) {ヽ$voice? ;
$1 < (.) {ヽ $voice? ; # otherwise repeat last character
< ヽ $voice? ; # delete if no characters found
# h- rule: lengthens vowel if not followed by a vowel
[aeiou] } h > ー ;
# one-way latin- > kana rules. these do not occur in
# well-formed romaji representing actual japanese text.
# their purpose is to make all romaji map to kana of
# some sort.
# the following are not really necessary, but produce
# slightly more natural results.
cy > セィ ;
dy > ディ ;
hy > ヒ ;
sy > セィ ;
ty > ティ ;
zy > ゼィ ;
h > ヘ ;
# isolated consonants listed here so as not to mask
# longer rules above.
ch > チ;
sh > シ ;
dz > ヅ ;
dj > ヂ;
b > ブ ;
d > デ ;
g > グ ;
k > ク ;
m > ム ;
n'' < ン } $n_quoter ;
n <> ン ;
p > プ ;
r > ル ;
s > ス ;
t > テ ;
y > イ ;
z > ズ ;
v > ヴ ;
f > フ;
j > ジ;
w > ウ;
ß > | ss ;
æ > | e ;
ð > | d ;
ø > | u ;
þ > | th ;
# simple substitutions using backup
c > | k ;
l > | r ;
q > | k ;
x > | ks ;
# ~~~ END shared rules ~~~
#------------------------------------------------------
# Final cleanup
'~' > ; # delete stray tildes between letters
[:Katakana:] { '' } [:Latin:] > ; # delete stray quotes between letters
# [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
:: NFC (NFD) ;
:: ([:Katakana:] halfwidth-fullwidth);
# note: a global filter is more efficient, but MUST include all source chars!!
#:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);
# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
:: ( [[\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;
# eof

View file

@ -0,0 +1,41 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# According to the pinyin definitions I've been able to find:
# 'a', 'e' are the preferred bases
# otherwise 'o'
# otherwise last vowel
# The trailing form of syllables are the following:
# "a", "ai", "ao", "an", "ang",
# "o", "ou", "ong",
# "e", "ei", "er", "en", "eng",
# "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
# "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
# "ü", "üe", "üan", "ün"
# so the letters the tone will 'hop' are:
::NFD (NFC);
$tone = [\u0304\u0301\u030C\u0300\u0306] ;
# Move the tone to the end of a syllable, and convert to number
e {($tone) r} > r &tone-digit($1);
($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1);
($tone) > &tone-digit($1);
# The following backs up until it finds the right vowel, then deposits the tone
$vowel = [aAeEiIoOuUüÜ];
$consonant = [[a-z A-Z] - [$vowel]];
$digit = [1-5];
$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);
$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit);
&digit-tone($1) < [:letter:] {($digit)};
::NFC (NFD);

View file

@ -0,0 +1,85 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Malayalam-InterIndic
#:: NFD (NFC) ;
\u0D02>\uE002; # SIGN ANUSVARA
\u0D03>\uE003; # SIGN VISARGA
\u0D05>\uE005; # LETTER A
\u0D06>\uE006; # LETTER AA
\u0D07>\uE007; # LETTER I
\u0D08>\uE008; # LETTER II
\u0D09>\uE009; # LETTER U
\u0D0A>\uE00A; # LETTER UU
\u0D0B>\uE00B; # LETTER VOCALIC R
\u0D0C>\uE00C; # LETTER VOCALIC L
\u0D0E>\uE00E; # LETTER E
\u0D0F>\uE00F; # LETTER EE
\u0D10>\uE010; # LETTER AI
\u0D12>\uE012; # LETTER O
\u0D13>\uE013; # LETTER OO
\u0D14>\uE014; # LETTER AU
\u0D15>\uE015; # LETTER KA
\u0D16>\uE016; # LETTER KHA
\u0D17>\uE017; # LETTER GA
\u0D18>\uE018; # LETTER GHA
\u0D19>\uE019; # LETTER NGA
\u0D1A>\uE01A; # LETTER CA
\u0D1B>\uE01B; # LETTER CHA
\u0D1C>\uE01C; # LETTER JA
\u0D1D>\uE01D; # LETTER JHA
\u0D1E>\uE01E; # LETTER NYA
\u0D1F>\uE01F; # LETTER TTA
\u0D20>\uE020; # LETTER TTHA
\u0D21>\uE021; # LETTER DDA
\u0D22>\uE022; # LETTER DDHA
\u0D23>\uE023; # LETTER NNA
\u0D24>\uE024; # LETTER TA
\u0D25>\uE025; # LETTER THA
\u0D26>\uE026; # LETTER DA
\u0D27>\uE027; # LETTER DHA
\u0D28>\uE028; # LETTER NA
\u0D2A>\uE02A; # LETTER PA
\u0D2B>\uE02B; # LETTER PHA
\u0D2C>\uE02C; # LETTER BA
\u0D2D>\uE02D; # LETTER BHA
\u0D2E>\uE02E; # LETTER MA
\u0D2F>\uE02F; # LETTER YA
\u0D30>\uE030; # LETTER RA
\u0D31>\uE031; # LETTER RRA
\u0D32>\uE032; # LETTER LA
\u0D33>\uE033; # LETTER LLA
\u0D34>\uE034; # LETTER LLLA
\u0D35>\uE035; # LETTER VA
\u0D36>\uE036; # LETTER SHA
\u0D37>\uE037; # LETTER SSA
\u0D38>\uE038; # LETTER SA
\u0D39>\uE039; # LETTER HA
\u0D3E>\uE03E; # VOWEL SIGN AA
\u0D3F>\uE03F; # VOWEL SIGN I
\u0D40>\uE040; # VOWEL SIGN II
\u0D41>\uE041; # VOWEL SIGN U
\u0D42>\uE042; # VOWEL SIGN UU
\u0D43>\uE043; # VOWEL SIGN VOCALIC R
\u0D46>\uE046; # VOWEL SIGN E
\u0D47>\uE047; # VOWEL SIGN EE
\u0D48>\uE048; # VOWEL SIGN AI
\u0D4D>\uE04D; # SIGN VIRAMA
\u0D57>\uE057; # AU LENGTH MARK
\u0D60>\uE060; # LETTER VOCALIC RR
\u0D61>\uE061; # LETTER VOCALIC LL
\u0D66>\uE066; # DIGIT ZERO
\u0D67>\uE067; # DIGIT ONE
\u0D68>\uE068; # DIGIT TWO
\u0D69>\uE069; # DIGIT THREE
\u0D6A>\uE06A; # DIGIT FOUR
\u0D6B>\uE06B; # DIGIT FIVE
\u0D6C>\uE06C; # DIGIT SIX
\u0D6D>\uE06D; # DIGIT SEVEN
\u0D6E>\uE06E; # DIGIT EIGHT
\u0D6F>\uE06F; # DIGIT NINE
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,95 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Oriya-InterIndic
#:: NFD (NFC) ;
#\u0B21\u0B3C>\uE05C;# LETTER RRA
#\u0B22\u0B3C>\uE05D;# LETTER RHA
\u0B47\u0B56>\uE048;# VOWEL SIGN AI
\u0B47\u0B3E>\uE04B;# VOWEL SIGN O
\u0B47\u0B57>\uE04C;# VOWEL SIGN AU
\u0B01>\uE001; # SIGN CANDRABINDU
\u0B02>\uE002; # SIGN ANUSVARA
\u0B03>\uE003; # SIGN VISARGA
\u0B05>\uE005; # LETTER A
\u0B06>\uE006; # LETTER AA
\u0B07>\uE007; # LETTER I
\u0B08>\uE008; # LETTER II
\u0B09>\uE009; # LETTER U
\u0B0A>\uE00A; # LETTER UU
\u0B0B>\uE00B; # LETTER VOCALIC R
\u0B0C>\uE00C; # LETTER VOCALIC L
\u0B0F>\uE00F; # LETTER E
\u0B10>\uE010; # LETTER AI
\u0B13>\uE013; # LETTER O
\u0B14>\uE014; # LETTER AU
\u0B15>\uE015; # LETTER KA
\u0B16>\uE016; # LETTER KHA
\u0B17>\uE017; # LETTER GA
\u0B18>\uE018; # LETTER GHA
\u0B19>\uE019; # LETTER NGA
\u0B1A>\uE01A; # LETTER CA
\u0B1B>\uE01B; # LETTER CHA
\u0B1C>\uE01C; # LETTER JA
\u0B1D>\uE01D; # LETTER JHA
\u0B1E>\uE01E; # LETTER NYA
\u0B1F>\uE01F; # LETTER TTA
\u0B20>\uE020; # LETTER TTHA
\u0B21>\uE021; # LETTER DDA
\u0B22>\uE022; # LETTER DDHA
\u0B23>\uE023; # LETTER NNA
\u0B24>\uE024; # LETTER TA
\u0B25>\uE025; # LETTER THA
\u0B26>\uE026; # LETTER DA
\u0B27>\uE027; # LETTER DHA
\u0B28>\uE028; # LETTER NA
\u0B2A>\uE02A; # LETTER PA
\u0B2B>\uE02B; # LETTER PHA
\u0B2C>\uE02C; # LETTER BA
\u0B2D>\uE02D; # LETTER BHA
\u0B2E>\uE02E; # LETTER MA
\u0B2F>\uE02F; # LETTER YA
\u0B30>\uE030; # LETTER RA
\u0B32>\uE032; # LETTER LA
\u0B33>\uE033; # LETTER LLA
\u0B35>\uE035; # LETTER VA
\u0B36>\uE036; # LETTER SHA
\u0B37>\uE037; # LETTER SSA
\u0B38>\uE038; # LETTER SA
\u0B39>\uE039; # LETTER HA
\u0B3C>\uE03C; # SIGN NUKTA
\u0B3D>\uE03D; # SIGN AVAGRAHA
\u0B3E>\uE03E; # VOWEL SIGN AA
\u0B3F>\uE03F; # VOWEL SIGN I
\u0B40>\uE040; # VOWEL SIGN II
\u0B41>\uE041; # VOWEL SIGN U
\u0B42>\uE042; # VOWEL SIGN UU
\u0B43>\uE043; # VOWEL SIGN VOCALIC R
\u0B47>\uE047; # VOWEL SIGN E
#
\u0B4D>\uE04D; # SIGN VIRAMA
\u0B56>\uE056; # AI LENGTH MARK
\u0B57>\uE057; # AU LENGTH MARK
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
#
\u0B5F>\uE05F; # LETTER YYA
\u0B60>\uE060; # LETTER VOCALIC RR
\u0B61>\uE061; # LETTER VOCALIC LL
\u0B66>\uE066; # DIGIT ZERO
\u0B67>\uE067; # DIGIT ONE
\u0B68>\uE068; # DIGIT TWO
\u0B69>\uE069; # DIGIT THREE
\u0B6A>\uE06A; # DIGIT FOUR
\u0B6B>\uE06B; # DIGIT FIVE
\u0B6C>\uE06C; # DIGIT SIX
\u0B6D>\uE06D; # DIGIT SEVEN
\u0B6E>\uE06E; # DIGIT EIGHT
\u0B6F>\uE06F; # DIGIT NINE
\u0B70>\ue07B; # ISSHAR
\u0B71>\ue081; # LETTER WA
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,76 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Tamil-InterIndic
#:: NFD (NFC) ;
\u0BC6\u0BBE>\uE04A;# VOWEL SIGN O
\u0BC7\u0BBE>\uE04B;# VOWEL SIGN OO
\u0BC6\u0BD7>\uE04C;# VOWEL SIGN AU
\u0B92\u0BD7>\uE014;# LETTER AU
\u0B82>\uE002; # SIGN ANUSVARA
\u0B83>\uE003; # SIGN VISARGA
\u0B85>\uE005; # LETTER A
\u0B86>\uE006; # LETTER AA
\u0B87>\uE007; # LETTER I
\u0B88>\uE008; # LETTER II
\u0B89>\uE009; # LETTER U
\u0B8A>\uE00A; # LETTER UU
\u0B8E>\uE00E; # LETTER E
\u0B8F>\uE00F; # LETTER EE
\u0B90>\uE010; # LETTER AI
\u0B92>\uE012; # LETTER O
\u0B93>\uE013; # LETTER OO
\u0B94>\uE014; # LETTER AU
\u0B95>\uE015; # LETTER KA
\u0B99>\uE019; # LETTER NGA
\u0B9A>\uE01A; # LETTER CA
\u0B9C>\uE01C; # LETTER JA
\u0B9E>\uE01E; # LETTER NYA
\u0B9F>\uE01F; # LETTER TTA
\u0BA3>\uE023; # LETTER NNA
\u0BA4>\uE024; # LETTER TA
\u0BA8>\uE028; # LETTER NA
\u0BA9>\uE029; # LETTER NNNA
\u0BAA>\uE02A; # LETTER PA
\u0BAE>\uE02E; # LETTER MA
\u0BAF>\uE02F; # LETTER YA
\u0BB0>\uE030; # LETTER RA
\u0BB1>\uE031; # LETTER RRA
\u0BB2>\uE032; # LETTER LA
\u0BB3>\uE033; # LETTER LLA
\u0BB4>\uE034; # LETTER LLLA
\u0BB5>\uE035; # LETTER VA
\u0BB7>\uE037; # LETTER SSA
\u0BB8>\uE038; # LETTER SA
\u0BB9>\uE039; # LETTER HA
\u0BBE>\uE03E; # VOWEL SIGN AA
\u0BBF>\uE03F; # VOWEL SIGN I
\u0BC0>\uE040; # VOWEL SIGN II
\u0BC1>\uE041; # VOWEL SIGN U
\u0BC2>\uE042; # VOWEL SIGN UU
\u0BC6>\uE046; # VOWEL SIGN E
\u0BC7>\uE047; # VOWEL SIGN EE
\u0BC8>\uE048; # VOWEL SIGN AI
\u0BCD>\uE04D; # SIGN VIRAMA
\u0BD7>\uE057; # AU LENGTH MARK
\u0BE7>\uE067; # DIGIT ONE
\u0BE8>\uE068; # DIGIT TWO
\u0BE9>\uE069; # DIGIT THREE
\u0BEA>\uE06A; # DIGIT FOUR
\u0BEB>\uE06B; # DIGIT FIVE
\u0BEC>\uE06C; # DIGIT SIX
\u0BED>\uE06D; # DIGIT SEVEN
\u0BEE>\uE06E; # DIGIT EIGHT
\u0BEF>\uE06F; # DIGIT NINE
\u0BF0>\uE067\uE066; # UNMAPPED Tamil-InterIndic: NUMBER TEN
\u0BF1>\uE067\uE066\uE066; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
\u0BF2>\uE067\uE066\uE066\uE066;# UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
0>\ue066;
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,90 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Telugu-InterIndic
#:: NFD (NFC) ;
\u0c46\u0c4d\u0c56>\ue048\ue04d;
\u0C46\u0C56>\uE048;# VOWEL SIGN AI
\u0C01>\uE001; # SIGN CANDRABINDU
\u0C02>\uE002; # SIGN ANUSVARA
\u0C03>\uE003; # SIGN VISARGA
\u0C05>\uE005; # LETTER A
\u0C06>\uE006; # LETTER AA
\u0C07>\uE007; # LETTER I
\u0C08>\uE008; # LETTER II
\u0C09>\uE009; # LETTER U
\u0C0A>\uE00A; # LETTER UU
\u0C0B>\uE00B; # LETTER VOCALIC R
\u0C0C>\uE00C; # LETTER VOCALIC L
\u0C0E>\uE00E; # LETTER E
\u0C0F>\uE00F; # LETTER EE
\u0C10>\uE010; # LETTER AI
\u0C12>\uE012; # LETTER O
\u0C13>\uE013; # LETTER OO
\u0C14>\uE014; # LETTER AU
\u0C15>\uE015; # LETTER KA
\u0C16>\uE016; # LETTER KHA
\u0C17>\uE017; # LETTER GA
\u0C18>\uE018; # LETTER GHA
\u0C19>\uE019; # LETTER NGA
\u0C1A>\uE01A; # LETTER CA
\u0C1B>\uE01B; # LETTER CHA
\u0C1C>\uE01C; # LETTER JA
\u0C1D>\uE01D; # LETTER JHA
\u0C1E>\uE01E; # LETTER NYA
\u0C1F>\uE01F; # LETTER TTA
\u0C20>\uE020; # LETTER TTHA
\u0C21>\uE021; # LETTER DDA
\u0C22>\uE022; # LETTER DDHA
\u0C23>\uE023; # LETTER NNA
\u0C24>\uE024; # LETTER TA
\u0C25>\uE025; # LETTER THA
\u0C26>\uE026; # LETTER DA
\u0C27>\uE027; # LETTER DHA
\u0C28>\uE028; # LETTER NA
\u0C2A>\uE02A; # LETTER PA
\u0C2B>\uE02B; # LETTER PHA
\u0C2C>\uE02C; # LETTER BA
\u0C2D>\uE02D; # LETTER BHA
\u0C2E>\uE02E; # LETTER MA
\u0C2F>\uE02F; # LETTER YA
\u0C30>\uE030; # LETTER RA
\u0C31>\uE031; # LETTER RRA
\u0C32>\uE032; # LETTER LA
\u0C33>\uE033; # LETTER LLA
\u0C35>\uE035; # LETTER VA
\u0C36>\uE036; # LETTER SHA
\u0C37>\uE037; # LETTER SSA
\u0C38>\uE038; # LETTER SA
\u0C39>\uE039; # LETTER HA
\u0C3E>\uE03E; # VOWEL SIGN AA
\u0C3F>\uE03F; # VOWEL SIGN I
\u0C40>\uE040; # VOWEL SIGN II
\u0C41>\uE041; # VOWEL SIGN U
\u0C42>\uE042; # VOWEL SIGN UU
\u0C43>\uE043; # VOWEL SIGN VOCALIC R
\u0C44>\uE044; # VOWEL SIGN VOCALIC RR
\u0C46>\uE046; # VOWEL SIGN E
\u0C47>\uE047; # VOWEL SIGN EE
\u0C4A>\uE04A; # VOWEL SIGN O
\u0C4B>\uE04B; # VOWEL SIGN OO
\u0C4C>\uE04C; # VOWEL SIGN AU
\u0C4D>\uE04D; # SIGN VIRAMA
\u0C55>\uE055; # LENGTH MARK
\u0C56>\uE056; # AI LENGTH MARK
\u0C60>\uE060; # LETTER VOCALIC RR
\u0C61>\uE061; # LETTER VOCALIC LL
\u0C66>\uE066; # DIGIT ZERO
\u0C67>\uE067; # DIGIT ONE
\u0C68>\uE068; # DIGIT TWO
\u0C69>\uE069; # DIGIT THREE
\u0C6A>\uE06A; # DIGIT FOUR
\u0C6B>\uE06B; # DIGIT FIVE
\u0C6C>\uE06C; # DIGIT SIX
\u0C6D>\uE06D; # DIGIT SEVEN
\u0C6E>\uE06E; # DIGIT EIGHT
\u0C6F>\uE06F; # DIGIT NINE
# :: NFC (NFD) ;
# eof

View file

@ -0,0 +1,187 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Thai-Latin
# This set of rules follows ISO 11940
# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
# except that that does not mention an implicit vowel, so we use ọ
#
# The transcription is fairly ugly, so we ought to also do the UNGEGN version
# see: http://www.eki.ee/wgrs/rom1_th.pdf
# and probably make that the main variant.
# Note: this is an internal file. The NFD/NFC is handled externally, in the index
# The insertion of spaces between words, the reversal of the vowels
# and the conversion of space to semicolon are done *outside* of these rules.
# So as far as these rules are concerned, the vowels are in logical order!
# insert implicit vowel (and remove it going the other way)
# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
#$consonant = [ก-ฮ];
#$vowel = [ะ-ฺเ-ไ็];
#{ ( $consonant ) } [^$vowel ] > | $1  ;
# > ọ ;
# < ọ ;
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
# Consonants
# Warning: the 'h's need to be handled carefully!
# What we really want to say is the following, but we can't
# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;
# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
$freeStandingBelow = [\u0325 ];
$hAccent = [ ̄ ̣]
$notHAccent0 = [^$freeStandingBelow$hAccent];
$notHAccent1 = $freeStandingBelow [^$hAccent];
ห > h̄ ; # THAI CHARACTER HO HIP
ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering
ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK
ข <> k̄h ; # THAI CHARACTER KHO KHAI
ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT
ฅ <> kʹh ; # THAI CHARACTER KHO KHON
ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG
ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
ก <> k ; # THAI CHARACTER KO KAI
ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO
ผ <> p̄h ; # THAI CHARACTER PHO PHUNG
พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
ป <> p ; # THAI CHARACTER PO PLA
ฉ <> c̄h ; # THAI CHARACTER CHO CHING
ฌ <> c̣h ; # THAI CHARACTER CHO CHOE
ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
จ <> c ; # THAI CHARACTER CHO CHAN
ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN
ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO
ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO
ถ <> t̄h ; # THAI CHARACTER THO THUNG
ธ <> ṭh ; # THAI CHARACTER THO THONG
ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
ฏ <> t̩ ; # THAI CHARACTER TO PATAK
ต <> t ; # THAI CHARACTER TO TAO
# since there is no singleton g (generated), don't worry about that.
ง <> ng ; # THAI CHARACTER NGO NGU
ณ <> ṇ ; # THAI CHARACTER NO NEN
น <> n ; # THAI CHARACTER NO NU
ญ <> ỵ ; # THAI CHARACTER YO YING
ฎ <> ḍ ; # THAI CHARACTER DO CHADA
ด <> d ; # THAI CHARACTER DO DEK
บ <> b ; # THAI CHARACTER BO BAIMAI
ฝ <> f̄ ; # THAI CHARACTER FO FA
ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering
ม <> m ; # THAI CHARACTER MO MA
ย <> y ; # THAI CHARACTER YO YAK
ร <> r ; # THAI CHARACTER RO RUA
ฤ <> v ; # THAI CHARACTER RU
ฦ <> ł ; # THAI CHARACTER LU
ว <> w ; # THAI CHARACTER WO WAEN
ศ <> ṣ̄ ; # THAI CHARACTER SO SALA***
ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering
ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI
ส > s̄ ; # THAI CHARACTER SO SUA***
ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering
ฬ <> ḷ ; # THAI CHARACTER LO CHULA
ล <> l ; # THAI CHARACTER LO LING
ฟ <> f ; # THAI CHARACTER FO FAN
อ <> x ; # THAI CHARACTER O ANG
ซ <> s ; # THAI CHARACTER SO SO
# vowels
ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT
า > ā ; # THAI CHARACTER SARA AA
า | $1 < a ($notAbove*) ̄; # backward case, account for reordering
# We deviate from ISO for SARA AM for disambiguation
ำ > a ̉; # THAI CHARACTER SARA AM
ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering
ะ <> a ; # THAI CHARACTER SARA A
ี <> ī ; # THAI CHARACTER SARA II
ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering
ื <> ụ̄ ; # THAI CHARACTER SARA UEE
ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering
ึ <> ụ ; # THAI CHARACTER SARA UE
ู <> ū ; # THAI CHARACTER SARA UU
ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering
ุ <> u ; # THAI CHARACTER SARA U
ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI
# ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT
เ <> e ; # THAI CHARACTER SARA E
แ <> æ ; # THAI CHARACTER SARA AE
โ <> o ; # THAI CHARACTER SARA O
ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN
ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI
ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO
็ <> ̆ ; # THAI CHARACTER MAITAIKHU
่ <> ̀ ; # THAI CHARACTER MAI EK
้ <> ̂ ; # THAI CHARACTER MAI THO
๊ <> ́ ; # THAI CHARACTER MAI TRI
๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA
์ <> ̒ ; # THAI CHARACTER THANTHAKHAT
๎ <> '~' ; # THAI CHARACTER YAMAKKAN
# We deviate from ISO for disambiguation
ํ <> ̊ ; # THAI CHARACTER NIKHAHIT
๏ <> § ; # THAI CHARACTER FONGMAN
<> 0 ; # THAI DIGIT ZERO
๑ <> 1 ; # THAI DIGIT ONE
๒ <> 2 ; # THAI DIGIT TWO
๓ <> 3 ; # THAI DIGIT THREE
๔ <> 4 ; # THAI DIGIT FOUR
๕ <> 5 ; # THAI DIGIT FIVE
๖ <> 6 ; # THAI DIGIT SIX
๗ <> 7 ; # THAI DIGIT SEVEN
๘ <> 8 ; # THAI DIGIT EIGHT
๙ <> 9 ; # THAI DIGIT NINE
๚ <> '||' ; # THAI CHARACTER ANGKHANKHU
๛ <> » ; # THAI CHARACTER KHOMUT
ๆ <> « ; # THAI CHARACTER MAIYAMOK
# moved down to make shorter first
#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
ฺ <> ˌ ; # THAI CHARACTER PHINTHU
ิ <> i ; # THAI CHARACTER SARA I
# fallbacks
| k < g ;
| k < h ;
| c < j ;
| k < q ;
| s < z ;
:: (lower);

View file

@ -0,0 +1,26 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
# The rules that convert space into semicolon are in another file;
# since they have to come BEFORE the break iterator
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
# First convert the semicolon back
' ' < $thai { '; ' } $thai;
# Remove any other spaces between thai letters
< $thai { ' ' } $thai;
# Now vowels
$thai_reversing = [[:Logical_Order_Exception:] & $thai];
$thai_non_reversing = [$thai - $thai_reversing ];
( $thai_reversing ) ( $thai_non_reversing ) > $2 $1;
# other direction
$2 $1 < ( $thai_non_reversing ) ( $thai_reversing ) ;

View file

@ -0,0 +1,11 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# The rules that convert space into semicolon are in this file;
# since they have to come BEFORE the break iterator.
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
$thai { ' ' } $thai > '; ' ;

View file

@ -0,0 +1,11 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Only intended for internal use
\u0304 <> 1;
\u0301 <> 2;
\u030C <> 3;
\u0300 <> 4;
< 5;

View file

@ -0,0 +1,8 @@
el{
TransliterateLATIN {
"UNGEGN",
"::Greek-Latin/UNGEGN;"
}
}

View file

@ -0,0 +1,22 @@
// ***************************************************************************
// *
// * Copyright (C) 2004, International Business Machines
// * Corporation and others. All Rights Reserved.
// *
// ***************************************************************************
//
en{
// Format for the display name of a Transliterator.
// This is the English form of this resource.
TransliteratorNamePattern { "{0,choice,0#|1#{1}|2#{1} to {2}}" }
// Transliterator display names
// This is the English form of this resource.
// This list is currently incomplete, and care should be taken to translate these identifiers.
// TODO: Reorganize this data like Country, Currencies and Language tables.
"%Translit%Hex" { "Hex Escape" }
"%Translit%UnicodeName" { "Unicode Name" }
"%Translit%UnicodeChar" { "Unicode Character" }
}

View file

@ -0,0 +1,752 @@
// ***************************************************************************
// *
// * Copyright (C) 2004, International Business Machines
// * Corporation and others. All Rights Reserved.
// *
// ***************************************************************************
//
root{
RuleBasedTransliteratorIDs{
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//
// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
// system transliterators. It allows arbitrary mappings between
// transliterator IDs and file names, and also allows the system to
// define aliases for transliterators, so that "Latin-Hangul", for
// example, can be implemented transparently as the compound
// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
// are invisible to the user, but can be composed together by the
// system to create visible transliterators.
//
// Blank lines and lines beginning with '#' are ignored.
//
// Lines in this file have one of the following forms (text not
// enclosed by <> is literal):
//
// <id>:file:<resource>:<encoding>:<direction>
// <id>:internal:<resource>:<encoding>:<direction>
// <id>:alias:<getInstanceArg>
//
// <id> is the ID of the system transliterator being defined. These
// are public IDs enumerated by Transliterator.getAvailableIDs(),
// unless the second field is "internal".
//
// <resource> is a ResourceReader resource name. Currently these refer
// to file names under com/ibm/text/resources. This string is passed
// directly to ResourceReader, together with <encoding>.
//
// <encoding> is the character encoding to use when reading <resource>;
// passed directly to ResourceReader. E.g., "UTF8".
//
// <direction> is either "FORWARD" or "REVERSE".
//
// <getInstanceArg> is a string to be passed directly to
// Transliterator.getInstance(). The returned Transliterator object
// then has its ID changed to <id> and is returned.
// Bidirectional rule files
Fullwidth-Halfwidth {
file {
resource:include{"Fullwidth_Halfwidth.txt"}
direction{"FORWARD"}
}
}
Halfwidth-Fullwidth {
file {
resource:include{"Fullwidth_Halfwidth.txt"}
direction{"REVERSE"}
}
}
Latin-Cyrillic {
file {
resource:include{"Cyrillic_Latin.txt"}
direction{"REVERSE"}
}
}
Cyrillic-Latin {
file {
resource:include{"Cyrillic_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-Hebrew {
file {
resource:include{"Hebrew_Latin.txt"}
direction{"REVERSE"}
}
}
Hebrew-Latin {
file {
resource:include{"Hebrew_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-Arabic {
file {
resource:include{"Arabic_Latin.txt"}
direction{"REVERSE"}
}
}
Arabic-Latin {
file {
resource:include{"Arabic_Latin.txt"}
direction{"FORWARD"}
}
}
Tone-Digit {
internal {
resource:include{"Tone_Digit.txt"}
direction{"FORWARD"}
}
}
Digit-Tone {
internal {
resource:include{"Tone_Digit.txt"}
direction{"REVERSE"}
}
}
Latin-NumericPinyin {
file {
resource:include{"Latin_NumericPinyin.txt"}
direction{"FORWARD"}
}
}
NumericPinyin-Latin {
file {
resource:include{"Latin_NumericPinyin.txt"}
direction{"REVERSE"}
}
}
Han-Spacedhan {
internal {
resource:include{"Han_Spacedhan.txt"}
direction{"FORWARD"}
}
}
Spacedhan-Han {
alias {"null"}
}
Han-Latin {
file {
resource:include{"Han_Latin.txt"}
direction{"FORWARD"}
}
}
//Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip!
Latin-Han {
alias {"null"}
}
// Comment these out; they are only for testing
// Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE
// Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD
//Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE
//Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD
Latin-Greek {
file {
resource:include{"Greek_Latin.txt"}
direction{"REVERSE"}
}
}
Greek-Latin {
file {
resource:include{"Greek_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-Greek/UNGEGN {
file {
resource:include{"Greek_Latin_UNGEGN.txt"}
direction{"REVERSE"}
}
}
Greek-Latin/UNGEGN {
file {
resource:include{"Greek_Latin_UNGEGN.txt"}
direction{"FORWARD"}
}
}
Latin-Katakana {
file {
resource:include{"Latin_Katakana.txt"}
direction{"FORWARD"}
}
}
Katakana-Latin {
file {
resource:include{"Latin_Katakana.txt"}
direction{"REVERSE"}
}
}
Latin-Hiragana {
file {
resource:include{"Hiragana_Latin.txt"}
direction{"REVERSE"}
}
}
Hiragana-Latin {
file {
resource:include{"Hiragana_Latin.txt"}
direction{"FORWARD"}
}
}
//Thai Stuff: will change if we get \b into Transliterator
Thai-ThaiSemi {
internal {
resource:include{"Thai_ThaiSemi.txt"}
direction{"FORWARD"}
}
}
Thai-ThaiLogical {
internal {
resource:include{"Thai_ThaiLogical.txt"}
direction{"FORWARD"}
}
}
ThaiLogical-Thai {
internal {
resource:include{"Thai_ThaiLogical.txt"}
direction{"REVERSE"}
}
}
ThaiLogical-Latin {
internal {
resource:include{"ThaiLogical_Latin.txt"}
direction{"FORWARD"}
}
}
Latin-ThaiLogical {
internal {
resource:include{"ThaiLogical_Latin.txt"}
direction{"REVERSE"}
}
}
// Must use the order below!
// We need two separate passes because of the Thai vowel reversal
// Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces
Thai-Latin {
alias {"[[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC"}
}
Latin-Thai {
alias {"[[:Latin:][:Mn:][:Me:] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC"}
}
// end of Thai Stuff
Hiragana-Katakana {
file {
resource:include{"Hiragana_Katakana.txt"}
direction{"FORWARD"}
}
}
Katakana-Hiragana {
file {
resource:include{"Hiragana_Katakana.txt"}
direction{"REVERSE"}
}
}
Any-Accents {
file {
resource:include{"Any_Accents.txt"}
direction{"FORWARD"}
}
}
Accents-Any {
file {
resource:include{"Any_Accents.txt"}
direction{"REVERSE"}
}
}
Any-Publishing {
file {
resource:include{"Any_Publishing.txt"}
direction{"FORWARD"}
}
}
Publishing-Any {
file {
resource:include{"Any_Publishing.txt"}
direction{"REVERSE"}
}
}
// Korean
// N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For
// Hangul output use Latin-Hangul.
LowerLatin-Jamo {
internal {
resource:include{"Latin_Jamo.txt"}
direction{"FORWARD"}
}
}
Jamo-LowerLatin {
internal {
resource:include{"Latin_Jamo.txt"}
direction{"REVERSE"}
}
}
Latin-Jamo {
alias {"['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo"}
}
Jamo-Latin {
alias {"['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC"}
}
Latin-Hangul {
alias {"['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC"}
}
Hangul-Latin {
alias {"['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC"}
}
// Inter-Indic composed rules
Latin-InterIndic {
internal {
resource:include{"Latin_InterIndic.txt"}
direction{"FORWARD"}
}
}
Devanagari-InterIndic {
internal {
resource:include{"Devanagari_InterIndic.txt"}
direction{"FORWARD"}
}
}
Bengali-InterIndic {
internal {
resource:include{"Bengali_InterIndic.txt"}
direction{"FORWARD"}
}
}
Gurmukhi-InterIndic {
internal {
resource:include{"Gurmukhi_InterIndic.txt"}
direction{"FORWARD"}
}
}
Gujarati-InterIndic {
internal {
resource:include{"Gujarati_InterIndic.txt"}
direction{"FORWARD"}
}
}
Oriya-InterIndic {
internal {
resource:include{"Oriya_InterIndic.txt"}
direction{"FORWARD"}
}
}
Tamil-InterIndic {
internal {
resource:include{"Tamil_InterIndic.txt"}
direction{"FORWARD"}
}
}
Telugu-InterIndic {
internal {
resource:include{"Telugu_InterIndic.txt"}
direction{"FORWARD"}
}
}
Kannada-InterIndic {
internal {
resource:include{"Kannada_InterIndic.txt"}
direction{"FORWARD"}
}
}
Malayalam-InterIndic {
internal {
resource:include{"Malayalam_InterIndic.txt"}
direction{"FORWARD"}
}
}
InterIndic-Latin {
internal {
resource:include{"InterIndic_Latin.txt"}
direction{"FORWARD"}
}
}
InterIndic-Devanagari {
internal {
resource:include{"InterIndic_Devanagari.txt"}
direction{"FORWARD"}
}
}
InterIndic-Bengali {
internal {
resource:include{"InterIndic_Bengali.txt"}
direction{"FORWARD"}
}
}
InterIndic-Gurmukhi {
internal {
resource:include{"InterIndic_Gurmukhi.txt"}
direction{"FORWARD"}
}
}
InterIndic-Gujarati {
internal {
resource:include{"InterIndic_Gujarati.txt"}
direction{"FORWARD"}
}
}
InterIndic-Oriya {
internal {
resource:include{"InterIndic_Oriya.txt"}
direction{"FORWARD"}
}
}
InterIndic-Tamil {
internal {
resource:include{"InterIndic_Tamil.txt"}
direction{"FORWARD"}
}
}
InterIndic-Telugu {
internal {
resource:include{"InterIndic_Telugu.txt"}
direction{"FORWARD"}
}
}
InterIndic-Kannada {
internal {
resource:include{"InterIndic_Kannada.txt"}
direction{"FORWARD"}
}
}
InterIndic-Malayalam {
internal {
resource:include{"InterIndic_Malayalam.txt"}
direction{"FORWARD"}
}
}
//Latin-Indic transliterators
Latin-Devanagari {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC"}
}
Latin-Bengali {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC"}
}
Latin-Gurmukhi {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Latin-Gujarati {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC"}
}
Latin-Oriya {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC"}
}
Latin-Tamil {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC"}
}
Latin-Telugu {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC"}
}
Latin-Kannada {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC"}
}
Latin-Malayalam {
alias {"['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC"}
}
//Indic-Latin transliterators
Devanagari-Latin {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC"}
}
Bengali-Latin {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC"}
}
Gurmukhi-Latin {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC"}
}
Gujarati-Latin {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC"}
}
Oriya-Latin {
alias {"[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC"}
}
Tamil-Latin {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC"}
}
Telugu-Latin {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC"}
}
Kannada-Latin {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC"}
}
Malayalam-Latin {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC"}
}
Devanagari-Bengali {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC"}
}
Devanagari-Gurmukhi {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Devanagari-Gujarati {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC"}
}
Devanagari-Oriya {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC"}
}
Devanagari-Tamil {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC"}
}
Devanagari-Telugu {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC"}
}
Devanagari-Kannada {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC"}
}
Devanagari-Malayalam {
alias {"[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC"}
}
Bengali-Devanagari {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC"}
}
Bengali-Gurmukhi {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Bengali-Gujarati {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC"}
}
Bengali-Oriya {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC"}
}
Bengali-Tamil {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC"}
}
Bengali-Telugu {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC"}
}
Bengali-Kannada {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC"}
}
Bengali-Malayalam {
alias {"[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC"}
}
Gurmukhi-Devanagari {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC"}
}
Gurmukhi-Bengali {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC"}
}
Gurmukhi-Gujarati {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC"}
}
Gurmukhi-Oriya {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC"}
}
Gurmukhi-Tamil {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC"}
}
Gurmukhi-Telugu {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC"}
}
Gurmukhi-Kannada {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC"}
}
Gurmukhi-Malayalam {
alias {"[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC"}
}
Gujarati-Devanagari {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC"}
}
Gujarati-Bengali {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC"}
}
Gujarati-Gurmukhi {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Gujarati-Oriya {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC"}
}
Gujarati-Tamil {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC"}
}
Gujarati-Telugu {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC"}
}
Gujarati-Kannada {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC"}
}
Gujarati-Malayalam {
alias {"[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC"}
}
Oriya-Devanagari {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC"}
}
Oriya-Bengali {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC"}
}
Oriya-Gurmukhi {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Oriya-Gujarati {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC"}
}
Oriya-Tamil {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC"}
}
Oriya-Telugu {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC"}
}
Oriya-Kannada {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC"}
}
Oriya-Malayalam {
alias {"[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC"}
}
Tamil-Devanagari {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC"}
}
Tamil-Bengali {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC"}
}
Tamil-Gurmukhi {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Tamil-Gujarati {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC"}
}
Tamil-Oriya {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC"}
}
Tamil-Telugu {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC"}
}
Tamil-Kannada {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC"}
}
Tamil-Malayalam {
alias {"[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC"}
}
Telugu-Devanagari {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC"}
}
Telugu-Bengali {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC"}
}
Telugu-Gurmukhi {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Telugu-Gujarati {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC"}
}
Telugu-Oriya {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC"}
}
Telugu-Tamil {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC"}
}
Telugu-Kannada {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC"}
}
Telugu-Malayalam {
alias {"[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC"}
}
Kannada-Devanagari {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC"}
}
Kannada-Bengali {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC"}
}
Kannada-Gurmukhi {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Kannada-Gujarati {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC"}
}
Kannada-Oriya {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC"}
}
Kannada-Tamil {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC"}
}
Kannada-Telugu {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC"}
}
Kannada-Malayalam {
alias {"[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC"}
}
Malayalam-Devanagari {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC"}
}
Malayalam-Bengali {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC"}
}
Malayalam-Gurmukhi {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC"}
}
Malayalam-Gujarati {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC"}
}
Malayalam-Oriya {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC"}
}
Malayalam-Tamil {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC"}
}
Malayalam-Telugu {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC"}
}
Malayalam-Kannada {
alias {"[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC"}
}
// eof
}
TransliteratorNamePattern {
// Format for the display name of a Transliterator.
// This is the language-neutral form of this resource.
"{0,choice,0#|1#{1}|2#{1}-{2}}" // Display name
}
// Transliterator display names
// This is the English form of this resource.
"%Translit%Hex" { "%Translit%Hex" }
"%Translit%UnicodeName" { "%Translit%UnicodeName" }
"%Translit%UnicodeChar" { "%Translit%UnicodeChar" }
TransliterateLATIN{
"",
""
}
}

View file

@ -1,306 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Any_Accents.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Any_Accents
t_Any_Accents {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
":: NFD (NFC) ;"
// to do: make reversible
// define special conversion characters.
// varients of this could use different characters, or set one or the other to null.
"$pre = \\\< ;"
"$post = \\\> ;"
// Provide keyboard equivalents for common diacritics used in transliteration
"$pre \\\` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
"$pre \\\' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
"$pre \\\^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
"$pre \\\~ $post <> \u0303 ;" // COMBINING TILDE
"$pre \\\- $post <> \u0304 ;" // COMBINING MACRON
"$pre \\\" $post <> \u0308 ;" // COMBINING DIAERESIS
"$pre \\\* $post <> \u030A ;" // COMBINING RING ABOVE
"$pre \\\, $post <> \u0327 ;" // COMBINING CEDILLA
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
"$pre \\\. $post <> \u0323 ;" // COMBINING DOT BELOW
// Combine common characters
"$pre AE $post <> \u00C6 ;" // LATIN CAPITAL LETTER AE
"$pre ae $post <> \u00E6 ;" // LATIN SMALL LETTER AE
"$pre D $post <> \u00D0 ;" // LATIN CAPITAL LETTER ETH
"$pre d $post <> \u00F0 ;" // LATIN SMALL LETTER ETH
"$pre O'/' $post <> \u00D8 ;" // LATIN CAPITAL LETTER O WITH STROKE
"$pre o'/' $post <> \u00F8 ;" // LATIN SMALL LETTER O WITH STROKE
"$pre TH $post <> \u00DE ;" // LATIN CAPITAL LETTER THORN
"$pre th $post <> \u00FE ;" // LATIN SMALL LETTER THORN
"$pre OE $post <> \u0152 ;" // LATIN CAPITAL LIGATURE OE
"$pre oe $post <> \u0153 ;" // LATIN SMALL LIGATURE OE
"$pre ss $post <> \u00DF ;" // LATIN SMALL LETTER SHARP S
"$pre NG $post <> \u014A ;" // LATIN CAPITAL LETTER ENG
"$pre ng $post <> \u014B ;" // LATIN SMALL LETTER ENG
"$pre T $post <> \u0398 ;" // THETA
"$pre t $post <> \u03B8 ;" // THETA
"$pre SH $post <> \u01A9 ;" // LATIN CAPITAL LETTER ESH
"$pre sh $post <> \u0283 ;" // LATIN SMALL LETTER ESH
"$pre ZH $post <> \u01B7 ;" // LATIN CAPITAL LETTER EZH
"$pre zh $post <> \u0292 ;" // LATIN SMALL LETTER EZH
"$pre U $post <> \u01B1 ;" // LATIN CAPITAL LETTER UPSILON
"$pre u $post <> \u028A ;" // LATIN SMALL LETTER UPSILON
"$pre A $post <> \u018F ;" // LATIN CAPITAL LETTER SCHWA
"$pre a $post <> \u0259 ;" // LATIN SMALL LETTER SCHWA
"$pre O $post <> \u0186 ;" // LATIN CAPITAL LETTER OPEN O
"$pre o $post <> \u0254 ;" // LATIN SMALL LETTER OPEN O
"$pre E $post <> \u0190 ;" // LATIN CAPITAL LETTER OPEN E
"$pre e $post <> \u025B ;" // LATIN SMALL LETTER OPEN E
// three that don't have uppercases
"$pre '?' $post <> \u0294 ;" // LATIN LETTER GLOTTAL STOP
"$pre i $post <> \u026A ;" // LATIN LETTER SMALL CAPITAL I
"$pre v $post <> \u028C ;" // LATIN SMALL LETTER TURNED V
// Additional Characters that may be added in the future
// $pre XXX $post <> \u0306 ; # COMBINING BREVE
// $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
// $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
// $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
// $pre XXX $post <> \u030C ; # COMBINING CARON
// $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
// $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
// $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
// $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
// $pre XXX $post <> \u031B ; # COMBINING HORN
// $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
// $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
// $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
// $pre XXX $post <> \u0328 ; # COMBINING OGONEK
// $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
// $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
// $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
// $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
// $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
// $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
// $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
// $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
// $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
// $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
// $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
// $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
// $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
// $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
// $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
// $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
// $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
// $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
// $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
// $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
// $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
// $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
// $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
// $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
// $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
// $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
// $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
// $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
// $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
// $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
// $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
// $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
// $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
// $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
// $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
// $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
// $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
// $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
// $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
// $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
// $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
// $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
// $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
// $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
// $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
// $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
// $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
// $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
// $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
// $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
// $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
// $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
// $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
// $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
// $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
// $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
// $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
// $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
// $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
// $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
// $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
// $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
// $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
// $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
// $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
// $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
// $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
// $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
// $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
// $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
// $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
// $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
// $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
// $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
// $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
// $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
// $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
// $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
// $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
// $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
// $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
// $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
// $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
// $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
// $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
// $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
// $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
// $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
// $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
// $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
// $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
// $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
// $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
// $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
// $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
// $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
// $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
// $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
// $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
// $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
// $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
// $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
// $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
// $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
// $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
// $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
// $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
// $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
// $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
// $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
// $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
// $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
// $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
// $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
// $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
// $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
// $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
// $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
// $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
// $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
// $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
// $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
// $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
// $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
// $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
// $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
// $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
// $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
// $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
// $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
// $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
// $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
// $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
// $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
// $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
// $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
// $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
// $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
// $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
// $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
// $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
// $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
// $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
// $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
// $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
// $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
// $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
// $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
// $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
// $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
// $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
// $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
// $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
// $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
// $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
// $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
// $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
// $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
// $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
// $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
// $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
// $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
// $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
// $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
// $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
// $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
// $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
// $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
// $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
// $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
// $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
// $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
// $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
// $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
// $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
// $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
// $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
// $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
// $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
// $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
// $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
// $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
// $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
// $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
// $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
// $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
// $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
// $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
// $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
// $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
// $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
// $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
// $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
// $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
// $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
// $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
// $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
// $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
// $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
// $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
// $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
":: NFC (NFD) ;"
}
}

View file

@ -1,50 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Any_Publishing.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Any_Publishing
t_Any_Publishing {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Test case
// "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
// Variables
"$single = \\\' ;"
"$space = ' ' ;"
"$double = \\\" ;"
"$back = \\\` ;"
"$tab = '\u0008' ;"
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
// fix UNIX quotes
"$back $back > “ ;"
"$back > ;"
// fix typewriter quotes, by context
"$makeRight {$double} <> “ ;"
"$double <> ” ;"
"$makeRight {$single} <> ;"
"$single <> ;"
// fix multiple spaces and hyphens
"$space {$space} > ;"
"'--' <> — ;"
}
}

View file

@ -1,162 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Arabic_Latin.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Arabic_Latin
t_Arab_Latn {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Generally follows UNGEGN <http://www.eki.ee/wgrs/rom1_ar.pdf>
// Occasionally deviates in the direction of ISO 233 <http://homepage.mac.com/sirbinks/pdf/Arabic.pdf>
// a) where required for disambiguation.
// b) with underdot instead of cedilla for letter like SAD, since
// those are explicitly in Unicode for transliteration.
// c) with extra non-Arabic-language letters, like PEH
// Does *not* do assimilation of "al", nor hyphenation.
// While it could be done, we need to determine whether a prefix "al" could
// occur other than as the definite article (since no space is used).
":: [[:Arabic:] [‎ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;"
":: NFKD (NFC);"
"$disambig = ̱ ;"
"$disambig2 = ̰ ;"
"$under = ̣ ;"
"$notAbove = [[:^ccc=0:]&[:^ccc=230:]];"
// non-letters
"٫ <> '.' $disambig ;" // ARABIC DECIMAL SEPARATOR
"٬ <> ',' $disambig ;" // ARABIC THOUSANDS SEPARATOR
// ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate
"، <> ',' ;" // ARABIC COMMA
"؛ <> ';' ;" // ARABIC SEMICOLON
"؟ <> '?' ;" // ARABIC QUESTION MARK
"٪ <> '%' ;" // ARABIC PERCENT SIGN
"۰ <> 0 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ZERO
"۱ <> 1 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT ONE
"۲ <> 2 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT TWO
"۳ <> 3 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT THREE
"۴ <> 4 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FOUR
"۵ <> 5 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT FIVE
"۶ <> 6 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SIX
"۷ <> 7 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT SEVEN
"۸ <> 8 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT EIGHT
"۹ <> 9 $disambig ;" // EXTENDED ARABIC-INDIC DIGIT NINE
"٠ <> 0 ;" // ARABIC-INDIC DIGIT ZERO
"١ <> 1 ;" // ARABIC-INDIC DIGIT ONE
"٢ <> 2 ;" // ARABIC-INDIC DIGIT TWO
"٣ <> 3 ;" // ARABIC-INDIC DIGIT THREE
"٤ <> 4 ;" // ARABIC-INDIC DIGIT FOUR
"٥ <> 5 ;" // ARABIC-INDIC DIGIT FIVE
"٦ <> 6 ;" // ARABIC-INDIC DIGIT SIX
"٧ <> 7 ;" // ARABIC-INDIC DIGIT SEVEN
"٨ <> 8 ;" // ARABIC-INDIC DIGIT EIGHT
"٩ <> 9 ;" // ARABIC-INDIC DIGIT NINE
// letters
// long vowels
"َا<> ā ;" // ARABIC FATHA, ARABIC LETTER ALEF
"ُو <> ū ;" // ARABIC DAMMA, ARABIC LETTER WAW
"ِي <> ī ;" // ARABIC KASRA, ARABIC LETTER YEH
// longer items moved here to prevent masking
"ث <> t h $disambig ;" // ARABIC LETTER THEH
"ذ <> d h $disambig ;" // ARABIC LETTER THAL
"ش <> s h $disambig ;" // ARABIC LETTER SHEEN
"ص <> s $under ;" // ARABIC LETTER SAD
"ض <> d $under ;" // ARABIC LETTER DAD
"ط <> t $under ;" // ARABIC LETTER TAH
"ظ <> z $under ;" // ARABIC LETTER ZAH
"غ <> g h $disambig ;" // ARABIC LETTER GHAIN
// WARNING: special case
// <t, umlaut, half-ring below> will be canonically ordered as <t, half-ring below, umlaut>
// so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
// ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
"ة <> t \u0308 ;" // ARABIC LETTER TEH MARBUTA
"ة | $1 < t ($notAbove+) \u0308 ;" // ARABIC LETTER TEH MARBUTA
// non-Arabic language
"ژ <> z h $disambig ;" // ARABIC LETTER JEH
"ڭ <> n $disambig g ;" // ARABIC LETTER NG
"ۋ <> v $disambig ;" // ARABIC LETTER VE
"ی <> y $disambig2 ;" // ARABIC LETTER FARSI YEH
// Arabic language
"ء <> ʾ ;" // ARABIC LETTER HAMZA
"ا <> a $under;" // ARABIC LETTER ALEF
"ب <> b ;" // ARABIC LETTER BEH
"ت <> t ;" // ARABIC LETTER TEH
"ج <> j ;" // ARABIC LETTER JEEM
"ح <> h $under ;" // ARABIC LETTER HAH
"خ <> k h $disambig ;" // ARABIC LETTER KHAH
"د <> d ;" // ARABIC LETTER DAL
"ر <> r ;" // ARABIC LETTER REH
"ز <> z ;" // ARABIC LETTER ZAIN
"س <> s ;" // ARABIC LETTER SEEN
"ع <> ʿ ;" // ARABIC LETTER AIN
"ـ > ;" // ARABIC TATWEEL
"ف <> f ;" // ARABIC LETTER FEH
"ق <> q ;" // ARABIC LETTER QAF
"ك <> k ;" // ARABIC LETTER KAF
"ل <> l ;" // ARABIC LETTER LAM
"م <> m ;" // ARABIC LETTER MEEM
"ن <> n ;" // ARABIC LETTER NOON
"ه <> h ;" // ARABIC LETTER HEH
"و <> w ;" // ARABIC LETTER WAW
"ى <> y $disambig ;" // ARABIC LETTER ALEF MAKSURA
"ي <> y ;" // ARABIC LETTER YEH
"ً <> aⁿ ;" // ARABIC FATHATAN
"ٌ <> uⁿ ;" // ARABIC DAMMATAN
"ٍ <> iⁿ ;" // ARABIC KASRATAN
"َ <> a ;" // ARABIC FATHA
"ُ <> u ;" // ARABIC DAMMA
"ِ <> i ;" // ARABIC KASRA
"ّ <> ̃ ;" // ARABIC SHADDA
"ْ <> ̊ ;" // ARABIC SUKUN
// special combining marks
"ٓ <> ̂ ;" // ARABIC MADDAH ABOVE
"ٔ <> ̉ ;" // ARABIC HAMZA ABOVE
"ٕ <> ̹ ;" // ARABIC HAMZA BELOW
// Some non-Arabic language (not in UNGEGN)
"پ <> p ;" // ARABIC LETTER PEH
"چ <> c h $disambig ;" // ARABIC LETTER TCHEH
"ڤ <> v ;" // ARABIC LETTER VEH
// ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
// ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
"گ <> g ;" // ARABIC LETTER GAF
// fallbacks
"| s < c } [eiy];"
"| k < c ;"
"| i < e ;"
"| u < o ;"
"| ks < x ;"
"| n < ‎ⁿ;"
":: (lower) ;"
"::NFC (NFD);"
":: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );"
}
}

View file

@ -1,119 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Bengali_InterIndic.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Bengali_InterIndic
t_Beng_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Bengali-InterIndic
"\u09C7\u09BE>\uE04B;" // VOWEL SIGN O
"\u09C7\u09D7>\uE04C;" // VOWEL SIGN AU
"\u0981>\uE001;" // SIGN CANDRABINDU
"\u0982>\uE002;" // SIGN ANUSVARA
"\u0983>\uE003;" // SIGN VISARGA
"\u0985>\uE005;" // LETTER A
"\u0986>\uE006;" // LETTER AA
"\u0987>\uE007;" // LETTER I
"\u0988>\uE008;" // LETTER II
"\u0989>\uE009;" // LETTER U
"\u098A>\uE00A;" // LETTER UU
"\u098B>\uE00B;" // LETTER VOCALIC R
"\u098C>\uE00C;" // LETTER VOCALIC L
"\u098F>\uE00F;" // LETTER E
"\u0990>\uE010;" // LETTER AI
"\u0993>\uE013;" // LETTER O
"\u0994>\uE014;" // LETTER AU
"\u0995>\uE015;" // LETTER KA
"\u0996>\uE016;" // LETTER KHA
"\u0997>\uE017;" // LETTER GA
"\u0998>\uE018;" // LETTER GHA
"\u0999>\uE019;" // LETTER NGA
"\u099A>\uE01A;" // LETTER CA
"\u099B>\uE01B;" // LETTER CHA
"\u099C>\uE01C;" // LETTER JA
"\u099D>\uE01D;" // LETTER JHA
"\u099E>\uE01E;" // LETTER NYA
"\u099F>\uE01F;" // LETTER TTA
"\u09A0>\uE020;" // LETTER TTHA
"\u09A1>\uE021;" // LETTER DDA
"\u09A2>\uE022;" // LETTER DDHA
"\u09A3>\uE023;" // LETTER NNA
"\u09A4>\uE024;" // LETTER TA
"\u09A5>\uE025;" // LETTER THA
"\u09A6>\uE026;" // LETTER DA
"\u09A7>\uE027;" // LETTER DHA
"\u09A8>\uE028;" // LETTER NA
"\u09AA>\uE02A;" // LETTER PA
"\u09AB>\uE02B;" // LETTER PHA
"\u09AC>\uE02C;" // LETTER BA
"\u09AD>\uE02D;" // LETTER BHA
"\u09AE>\uE02E;" // LETTER MA
"\u09AF>\uE02F;" // LETTER YA
"\u09B0>\uE030;" // LETTER RA
"\u09B2>\uE032;" // LETTER LA
"\u09B6>\uE036;" // LETTER SHA
"\u09B7>\uE037;" // LETTER SSA
"\u09B8>\uE038;" // LETTER SA
"\u09B9>\uE039;" // LETTER HA
"\u09BC>\uE03C;" // SIGN NUKTA
"\u09BD>\uE03D;" // SIGN AVAGRAHA
"\u09BE>\uE03E;" // VOWEL SIGN AA
"\u09BF>\uE03F;" // VOWEL SIGN I
"\u09C0>\uE040;" // VOWEL SIGN II
"\u09C1>\uE041;" // VOWEL SIGN U
"\u09C2>\uE042;" // VOWEL SIGN UU
"\u09C3>\uE043;" // VOWEL SIGN VOCALIC R
"\u09C4>\uE044;" // VOWEL SIGN VOCALIC RR
"\u09C7>\uE047;" // VOWEL SIGN E
"\u09C8>\uE048;" // VOWEL SIGN AI
"\u09CB>\uE04B;"
"\u09CC>\uE04C;"
//
"\u09CD>\uE04D;" // SIGN VIRAMA
"\u09D7>\uE057;" // AU LENGTH MARK
//
"\u09E0>\uE060;" // LETTER VOCALIC RR
"\u09E1>\uE061;" // LETTER VOCALIC LL
"\u09E2>\uE062;" // VOWEL SIGN VOCALIC L
"\u09E3>\uE063;" // VOWEL SIGN VOCALIC LL
"\u09E6>\uE066;" // DIGIT ZERO
"\u09E7>\uE067;" // DIGIT ONE
"\u09E8>\uE068;" // DIGIT TWO
"\u09E9>\uE069;" // DIGIT THREE
"\u09EA>\uE06A;" // DIGIT FOUR
"\u09EB>\uE06B;" // DIGIT FIVE
"\u09EC>\uE06C;" // DIGIT SIX
"\u09ED>\uE06D;" // DIGIT SEVEN
"\u09EE>\uE06E;" // DIGIT EIGHT
"\u09EF>\uE06F;" // DIGIT NINE
"\u09F0>\ue071;" // Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
"\u09F1>\ue072;" // Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
"\u09F2>\ue073;" // Bengali-InterIndic: RUPEE MARK
"\u09F3>\ue074;" // Bengali-InterIndic: RUPEE SIGN
"\u09F4>\ue075;" // Bengali-InterIndic: CURRENCY NUMERATOR ONE
"\u09F5>\ue076;" // Bengali-InterIndic: CURRENCY NUMERATOR TWO
"\u09F6>\ue077;" // Bengali-InterIndic: CURRENCY NUMERATOR THREE
"\u09F7>\ue078;" // Bengali-InterIndic: CURRENCY NUMERATOR FOUR
"\u09F8>\ue079;" // Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\u09F9>\ue07A;" // Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
"\u09FA>\ue07B;" // ISSHAR
"\u0964>\ue064;" // DANDA
"\u0965>\ue065;" // DOUBLE DANDA
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,322 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Cyrillic_Latin.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Cyrillic_Latin
t_Cyrl_Latn {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// TODO: add remaining characters
// Should add variants for Russian-English, Russian-German
// Those can use this as a base, and then remap cases
// like a $hat to ya or ja.
// :: [\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
//## WARNING, \u0308 must be added to the generated filters, in both directions ###
// MINIMAL FILTER
":: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;"
":: NFD (NFC) ;"
"$modprime = \u02B9;"
"$modprime2 = \u02BA;"
"$grave = \u0300;"
"$acute = \u0301;"
"$hat = \u0302;"
"$breve = \u0306 ;"
"$dot = \u0307 ;"
"$caron = \u030C ;"
"$comma = \u0326 ;"
"$under = \u0331 ;"
// move up so not masked
"я <> a $hat ;" // CYRILLIC SMALL LETTER YA
"Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA
"ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE
"Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE
// ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
// Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
// ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
// Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
// ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
// Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
"э <> e $acute;" // CYRILLIC SMALL LETTER E
"Э <> E $acute;" // CYRILLIC CAPITAL LETTER E
"є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE
"Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE
"ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA
"Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA
"щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA
"Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA
"ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE
"Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE
// ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
// Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
"ю <> u $hat ;" // CYRILLIC SMALL LETTER YU
"Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU
"і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
"І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
"ј <> j $caron;" // CYRILLIC SMALL LETTER JE
"Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE
"љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE
"Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE
"њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE
"Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE
"ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE
"Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE
"џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE
"Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE
// Normal order
"а <> a ;" // CYRILLIC SMALL LETTER A
"А <> A ;" // CYRILLIC CAPITAL LETTER A
"ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA
"Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA
"ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE
"Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE
"б <> b ;" // CYRILLIC SMALL LETTER BE
"Б <> B ;" // CYRILLIC CAPITAL LETTER BE
"в <> v ;" // CYRILLIC SMALL LETTER VE
"В <> V ;" // CYRILLIC CAPITAL LETTER VE
"ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN
"Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
"ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE
"Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE
"ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
"Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
"г <> g ;" // CYRILLIC SMALL LETTER GHE
"Г <> G ;" // CYRILLIC CAPITAL LETTER GHE
"д <> d;" // CYRILLIC SMALL LETTER DE
"Д <> D;" // CYRILLIC CAPITAL LETTER DE
"ђ <> đ ;" // CYRILLIC SMALL LETTER DJE
"Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE
"ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER
"Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
"е <> e ;" // CYRILLIC SMALL LETTER IE
"Е <> E;" // CYRILLIC CAPITAL LETTER IE
"ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE
"Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE
// җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
// Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
"з <> z ;" // CYRILLIC SMALL LETTER ZE
"З <> Z;" // CYRILLIC CAPITAL LETTER ZE
"й <> j ;" // CYRILLIC SMALL LETTER I
"Й <> J ;" // CYRILLIC CAPITAL LETTER I
"и <> i ;" // CYRILLIC SMALL LETTER I
"И <> I ;" // CYRILLIC CAPITAL LETTER I
"к <> k ;" // CYRILLIC SMALL LETTER KA
"К <> K;" // CYRILLIC CAPITAL LETTER KA
// қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
// Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
// ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
// Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
// ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
// Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
// ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
// Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
// ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
// Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
"л <> l ;" // CYRILLIC SMALL LETTER EL
"Л <> L;" // CYRILLIC CAPITAL LETTER EL
"м <> m ;" // CYRILLIC SMALL LETTER EM
"М <> M ;" // CYRILLIC CAPITAL LETTER EM
"н <> n ;" // CYRILLIC SMALL LETTER EN
"Н <> N;" // CYRILLIC CAPITAL LETTER EN
// ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
// Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
// ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
// Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
// ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
// Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
"о <> o ;" // CYRILLIC SMALL LETTER O
"О <> O ;" // CYRILLIC CAPITAL LETTER O
// ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
// Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
"п <> p ;" // CYRILLIC SMALL LETTER PE
"П <> P ;" // CYRILLIC CAPITAL LETTER PE
// ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
// Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
// ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
// Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
"р <> r ;" // CYRILLIC SMALL LETTER ER
"Р <> R ;" // CYRILLIC CAPITAL LETTER ER
// ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
// Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
"с <> s ;" // CYRILLIC SMALL LETTER ES
"С <> S ;" // CYRILLIC CAPITAL LETTER ES
// ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
// Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
"т <> t ;" // CYRILLIC SMALL LETTER TE
"Т <> T ;" // CYRILLIC CAPITAL LETTER TE
// ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
// Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
"у <> u ;" // CYRILLIC SMALL LETTER U
"У <> U ;" // CYRILLIC CAPITAL LETTER U
// ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
// Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
// ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
// Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
// ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
// Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
"ф <> f ;" // CYRILLIC SMALL LETTER EF
"Ф <> F ;" // CYRILLIC CAPITAL LETTER EF
"х <> h ;" // CYRILLIC SMALL LETTER HA
"Х <> H;" // CYRILLIC CAPITAL LETTER HA
// ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
// Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
// һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
// Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
// ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
// Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
// ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
// Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
// ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
// Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
// ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
// Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
"ц <> c ;" // CYRILLIC SMALL LETTER TSE
"Ц <> C;" // CYRILLIC CAPITAL LETTER TSE
// ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
// Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
// ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
// Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
// ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
// Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
"Ъ <> $modprime2 $under ;" // CYRILLIC CAPITAL LETTER HARD SIGN
"ъ <> $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN
"Ь <> $modprime $under ;" // CYRILLIC CAPITAL LETTER SOFT SIGN
"ь <> $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN
"ы <> y ;" // CYRILLIC SMALL LETTER YERU
"Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU
// ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
// Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
// ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
// Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
// ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
// Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
// ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
// Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
// ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
// Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
// ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
// Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
// ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
// Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
// ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
// Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
// ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
// Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
// ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
// Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
// ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
// Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
// ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
// Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
// Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
//## ӑ <> XXX ; # CYRILLIC SMALL LETTER A
//## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
//## ӓ <> XXX ; # CYRILLIC SMALL LETTER A
//## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
//## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
//## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
//## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
//## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
//## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ё <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
//## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
//## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
//## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
//## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
//## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
//## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
//## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
//## ѝ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ӣ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ӥ <> XXX ; # CYRILLIC SMALL LETTER I
//## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
//## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
//## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
//## ӧ <> XXX ; # CYRILLIC SMALL LETTER O
//## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
//## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
//## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
//## ќ <> XXX ; # CYRILLIC SMALL LETTER KA
//## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
//## ӯ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ў <> XXX ; # CYRILLIC SMALL LETTER U
//## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӱ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӳ <> XXX ; # CYRILLIC SMALL LETTER U
//## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
//## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
//## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
//## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
//## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
//## ӭ <> XXX ; # CYRILLIC SMALL LETTER E
//## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
//## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
//## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
// Completeness
"$ignore = [[:Mark:]''] * ;"
"| k < q ;"
"| K < Q ;"
"| u < w ;"
"| U < W ;"
"| KS < X } $ignore [:UppercaseLetter:] ;"
"| KS < [:UppercaseLetter:] $ignore { X ;"
"| Ks < X ;"
"| ks < x ;"
":: NFC (NFD) ;"
// note: a global filter is more efficient, but MUST include all source chars!!
// :: ([\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]);
// MINIMAL FILTER: Latin-Cyrillic
":: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;"
}
}

View file

@ -1,133 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Devanagari_InterIndic.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Devanagari_InterIndic
t_Deva_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Devanagari-InterIndic
// :: NFD;
//Rules for Decomposed characters
"\u0901>\uE001;" // SIGN CANDRABINDU
"\u0902>\uE002;" // SIGN ANUSVARA
"\u0903>\uE003;" // SIGN VISARGA
"\u0904>\uE004;" // SIGN SHORT A
"\u0905>\uE005;" // LETTER A
"\u0906>\uE006;" // LETTER AA
"\u0907>\uE007;" // LETTER I
"\u0908>\uE008;" // LETTER II
"\u0909>\uE009;" // LETTER U
"\u090A>\uE00A;" // LETTER UU
"\u090B>\uE00B;" // LETTER VOCALIC R
"\u090C>\uE00C;" // LETTER VOCALIC L
"\u090D>\uE00D;" // LETTER CANDRA E (For representing English sounds)
"\u090E>\uE00E;" // UNMAPPED LETTER SHORT E(For Southern Scripts)
"\u090F>\uE00F;" // LETTER E
"\u0910>\uE010;" // LETTER AI
"\u0911>\uE011;" // LETTER CANDRA O (For representing English sounds)
"\u0912>\uE012;" // UNMAPPED LETTER SHORT O (For Southern Scripts)
"\u0913>\uE013;" // LETTER O
"\u0914>\uE014;" // LETTER AU
"\u0915>\uE015;" // LETTER KA
"\u0916>\uE016;" // LETTER KHA
"\u0917>\uE017;" // LETTER GA
"\u0918>\uE018;" // LETTER GHA
"\u0919>\uE019;" // LETTER NGA
"\u091A>\uE01A;" // LETTER CA
"\u091B>\uE01B;" // LETTER CHA
"\u091C>\uE01C;" // LETTER JA
"\u091D>\uE01D;" // LETTER JHA
"\u091E>\uE01E;" // LETTER NYA
"\u091F>\uE01F;" // LETTER TTA
"\u0920>\uE020;" // LETTER TTHA
"\u0921>\uE021;" // LETTER DDA
"\u0922>\uE022;" // LETTER DDHA
"\u0923>\uE023;" // LETTER NNA
"\u0924>\uE024;" // LETTER TA
"\u0925>\uE025;" // LETTER THA
"\u0926>\uE026;" // LETTER DA
"\u0927>\uE027;" // LETTER DHA
"\u0928>\uE028;" // LETTER NA
"\u0929>\uE029;"
"\u092A>\uE02A;" // LETTER PA
"\u092B>\uE02B;" // LETTER PHA
"\u092C>\uE02C;" // LETTER BA
"\u092D>\uE02D;" // LETTER BHA
"\u092E>\uE02E;" // LETTER MA
"\u092F>\uE02F;" // LETTER YA
"\u0930>\uE030;" // LETTER RA
"\u0931>\uE031;"
"\u0932>\uE032;" // LETTER LA
"\u0933>\uE033;" // LETTER LLA
"\u0934>\uE034;"
"\u0935>\uE035;" // LETTER VA
"\u0936>\uE036;" // LETTER SHA
"\u0937>\uE037;" // LETTER SSA
"\u0938>\uE038;" // LETTER SA
"\u0939>\uE039;" // LETTER HA
"\u093C>\uE03C;" // SIGN NUKTA
"\u093D>\uE03D;" // SIGN AVAGRAHA
"\u093E>\uE03E;" // VOWEL SIGN AA
"\u093F>\uE03F;" // VOWEL SIGN I
"\u0940>\uE040;" // VOWEL SIGN II
"\u0941>\uE041;" // VOWEL SIGN U
"\u0942>\uE042;" // VOWEL SIGN UU
"\u0943>\uE043;" // VOWEL SIGN VOCALIC R
"\u0944>\uE044;" // VOWEL SIGN VOCALIC RR
"\u0945>\uE045;" // VOWEL SIGN CANDRA E
"\u0946>\uE046;" // UNMAPPED VOWEL SIGN SHORT E
"\u0947>\uE047;" // VOWEL SIGN E
"\u0948>\uE048;" // VOWEL SIGN AI
"\u0949>\uE049;" // VOWEL SIGN CANDRA O
"\u094A>\uE04A;" // UNMAPPED VOWEL SIGN SHORT O
"\u094B>\uE04B;" // VOWEL SIGN O
"\u094C>\uE04C;" // VOWEL SIGN AU
"\u094D>\uE04D;" // SIGN VIRAMA
"\u0950>\uE050;" // OM
"\u0951>\uE051;" // UNMAPPED STRESS SIGN UDATTA
"\u0952>\uE052;" // UNMAPPED STRESS SIGN ANUDATTA
"\u0953>\uE053;" // UNMAPPED GRAVE ACCENT
"\u0954>\uE054;" // UNMAPPED ACUTE ACCENT
"\u0958>\uE058;"
"\u0959>\uE059;"
"\u095A>\uE05a;"
"\u095B>\uE05b;"
"\u095C>\uE05c;"
"\u095D>\uE05d;"
"\u095E>\uE05e;"
"\u095F>\uE05f;"
"\u0960>\uE060;" // LETTER VOCALIC RR
"\u0961>\uE061;" // LETTER VOCALIC LL
"\u0962>\uE062;" // VOWEL SIGN VOCALIC L
"\u0963>\uE063;" // VOWEL SIGN VOCALIC LL
"\u0964>\ue064;" // DANDA
"\u0965>\ue065;" // DOUBLE DANDA
"\u0966>\uE066;" // DIGIT ZERO
"\u0967>\uE067;" // DIGIT ONE
"\u0968>\uE068;" // DIGIT TWO
"\u0969>\uE069;" // DIGIT THREE
"\u096A>\uE06A;" // DIGIT FOUR
"\u096B>\uE06B;" // DIGIT FIVE
"\u096C>\uE06C;" // DIGIT SIX
"\u096D>\uE06D;" // DIGIT SEVEN
"\u096E>\uE06E;" // DIGIT EIGHT
"\u096F>\uE06F;" // DIGIT NINE
"\u0970>\uE070;" // Devanagari-InterIndic: ABBREVIATION SIGN
// :: NFC (NFD) ;
}
}

View file

@ -1,287 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Fullwidth_Halfwidth.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Fullwidth_Halfwidth
t_FWidth_HWidth {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Fullwidth-Halfwidth
// Mechanically generated from Unicode Character Database
// IDEOGRAPHIC SPACE then added, and
// FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
// multicharacter
"ガ<>ガ;" // to KATAKANA LETTER GA
"ギ<>ギ;" // to KATAKANA LETTER GI
"グ<>グ;" // to KATAKANA LETTER GU
"ゲ<>ゲ;" // to KATAKANA LETTER GE
"ゴ<>ゴ;" // to KATAKANA LETTER GO
"ザ<>ザ;" // to KATAKANA LETTER ZA
"ジ<>ジ;" // to KATAKANA LETTER ZI
"ズ<>ズ;" // to KATAKANA LETTER ZU
"ゼ<>ゼ;" // to KATAKANA LETTER ZE
"ゾ<>ゾ;" // to KATAKANA LETTER ZO
"ダ<>ダ;" // to KATAKANA LETTER DA
"ヂ<>ヂ;" // to KATAKANA LETTER DI
"ヅ<>ヅ;" // to KATAKANA LETTER DU
"デ<>デ;" // to KATAKANA LETTER DE
"ド<>ド;" // to KATAKANA LETTER DO
"バ<>バ;" // to KATAKANA LETTER BA
"パ<>パ;" // to KATAKANA LETTER PA
"ビ<>ビ;" // to KATAKANA LETTER BI
"ピ<>ピ;" // to KATAKANA LETTER PI
"ブ<>ブ;" // to KATAKANA LETTER BU
"プ<>プ;" // to KATAKANA LETTER PU
"ベ<>ベ;" // to KATAKANA LETTER BE
"ペ<>ペ;" // to KATAKANA LETTER PE
"ボ<>ボ;" // to KATAKANA LETTER BO
"ポ<>ポ;" // to KATAKANA LETTER PO
"ヴ<>ヴ;" // to KATAKANA LETTER VU
"ヷ<>ヷ;" // to KATAKANA LETTER VA
"ヺ<>ヺ;" // to KATAKANA LETTER VO
// single character
"<>'!';" // from FULLWIDTH EXCLAMATION MARK
"<>'\\\"';" // from FULLWIDTH QUOTATION MARK
"<>'#';" // from FULLWIDTH NUMBER SIGN
"<>'$';" // from FULLWIDTH DOLLAR SIGN
"<>'%';" // from FULLWIDTH PERCENT SIGN
"<>'&';" // from FULLWIDTH AMPERSAND
"<>'';" // from FULLWIDTH APOSTROPHE
"<>'(';" // from FULLWIDTH LEFT PARENTHESIS
"<>')';" // from FULLWIDTH RIGHT PARENTHESIS
"<>'*';" // from FULLWIDTH ASTERISK
"<>'+';" // from FULLWIDTH PLUS SIGN
"<>',';" // from FULLWIDTH COMMA
"<>'-';" // from FULLWIDTH HYPHEN-MINUS
"<>'.';" // from FULLWIDTH FULL STOP
"<>'/';" // from FULLWIDTH SOLIDUS
"<>'0';" // from FULLWIDTH DIGIT ZERO
"<>'1';" // from FULLWIDTH DIGIT ONE
"<>'2';" // from FULLWIDTH DIGIT TWO
"<>'3';" // from FULLWIDTH DIGIT THREE
"<>'4';" // from FULLWIDTH DIGIT FOUR
"<>'5';" // from FULLWIDTH DIGIT FIVE
"<>'6';" // from FULLWIDTH DIGIT SIX
"<>'7';" // from FULLWIDTH DIGIT SEVEN
"<>'8';" // from FULLWIDTH DIGIT EIGHT
"<>'9';" // from FULLWIDTH DIGIT NINE
"<>':';" // from FULLWIDTH COLON
"<>';';" // from FULLWIDTH SEMICOLON
"<>'<';" // from FULLWIDTH LESS-THAN SIGN
"<>'=';" // from FULLWIDTH EQUALS SIGN
"<>'>';" // from FULLWIDTH GREATER-THAN SIGN
"<>'?';" // from FULLWIDTH QUESTION MARK
"<>'@';" // from FULLWIDTH COMMERCIAL AT
"<>A;" // from FULLWIDTH LATIN CAPITAL LETTER A
"<>B;" // from FULLWIDTH LATIN CAPITAL LETTER B
"<>C;" // from FULLWIDTH LATIN CAPITAL LETTER C
"<>D;" // from FULLWIDTH LATIN CAPITAL LETTER D
"<>E;" // from FULLWIDTH LATIN CAPITAL LETTER E
"<>F;" // from FULLWIDTH LATIN CAPITAL LETTER F
"<>G;" // from FULLWIDTH LATIN CAPITAL LETTER G
"<>H;" // from FULLWIDTH LATIN CAPITAL LETTER H
"<>I;" // from FULLWIDTH LATIN CAPITAL LETTER I
"<>J;" // from FULLWIDTH LATIN CAPITAL LETTER J
"<>K;" // from FULLWIDTH LATIN CAPITAL LETTER K
"<>L;" // from FULLWIDTH LATIN CAPITAL LETTER L
"<>M;" // from FULLWIDTH LATIN CAPITAL LETTER M
"<>N;" // from FULLWIDTH LATIN CAPITAL LETTER N
"<>O;" // from FULLWIDTH LATIN CAPITAL LETTER O
"<>P;" // from FULLWIDTH LATIN CAPITAL LETTER P
"<>Q;" // from FULLWIDTH LATIN CAPITAL LETTER Q
"<>R;" // from FULLWIDTH LATIN CAPITAL LETTER R
"<>S;" // from FULLWIDTH LATIN CAPITAL LETTER S
"<>T;" // from FULLWIDTH LATIN CAPITAL LETTER T
"<>U;" // from FULLWIDTH LATIN CAPITAL LETTER U
"<>V;" // from FULLWIDTH LATIN CAPITAL LETTER V
"<>W;" // from FULLWIDTH LATIN CAPITAL LETTER W
"<>X;" // from FULLWIDTH LATIN CAPITAL LETTER X
"<>Y;" // from FULLWIDTH LATIN CAPITAL LETTER Y
"<>Z;" // from FULLWIDTH LATIN CAPITAL LETTER Z
"<>'[';" // from FULLWIDTH LEFT SQUARE BRACKET
"<>'\\';" // from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
"<>']';" // from FULLWIDTH RIGHT SQUARE BRACKET
"<>'^';" // from FULLWIDTH CIRCUMFLEX ACCENT
"_<>'_';" // from FULLWIDTH LOW LINE
"<>'`';" // from FULLWIDTH GRAVE ACCENT
"<>a;" // from FULLWIDTH LATIN SMALL LETTER A
"<>b;" // from FULLWIDTH LATIN SMALL LETTER B
"<>c;" // from FULLWIDTH LATIN SMALL LETTER C
"<>d;" // from FULLWIDTH LATIN SMALL LETTER D
"<>e;" // from FULLWIDTH LATIN SMALL LETTER E
"<>f;" // from FULLWIDTH LATIN SMALL LETTER F
"<>g;" // from FULLWIDTH LATIN SMALL LETTER G
"<>h;" // from FULLWIDTH LATIN SMALL LETTER H
"<>i;" // from FULLWIDTH LATIN SMALL LETTER I
"<>j;" // from FULLWIDTH LATIN SMALL LETTER J
"<>k;" // from FULLWIDTH LATIN SMALL LETTER K
"<>l;" // from FULLWIDTH LATIN SMALL LETTER L
"<>m;" // from FULLWIDTH LATIN SMALL LETTER M
"<>n;" // from FULLWIDTH LATIN SMALL LETTER N
"<>o;" // from FULLWIDTH LATIN SMALL LETTER O
"<>p;" // from FULLWIDTH LATIN SMALL LETTER P
"<>q;" // from FULLWIDTH LATIN SMALL LETTER Q
"<>r;" // from FULLWIDTH LATIN SMALL LETTER R
"<>s;" // from FULLWIDTH LATIN SMALL LETTER S
"<>t;" // from FULLWIDTH LATIN SMALL LETTER T
"<>u;" // from FULLWIDTH LATIN SMALL LETTER U
"<>v;" // from FULLWIDTH LATIN SMALL LETTER V
"<>w;" // from FULLWIDTH LATIN SMALL LETTER W
"<>x;" // from FULLWIDTH LATIN SMALL LETTER X
"<>y;" // from FULLWIDTH LATIN SMALL LETTER Y
"<>z;" // from FULLWIDTH LATIN SMALL LETTER Z
"<>'{';" // from FULLWIDTH LEFT CURLY BRACKET
"<>'|';" // from FULLWIDTH VERTICAL LINE
"<>'}';" // from FULLWIDTH RIGHT CURLY BRACKET
"<>'~';" // from FULLWIDTH TILDE
"。<>。;" // to HALFWIDTH IDEOGRAPHIC FULL STOP
"「<>「;" // to HALFWIDTH LEFT CORNER BRACKET
"」<>」;" // to HALFWIDTH RIGHT CORNER BRACKET
"、<>、;" // to HALFWIDTH IDEOGRAPHIC COMMA
"・<>・;" // to HALFWIDTH KATAKANA MIDDLE DOT
"ヲ<>ヲ;" // to HALFWIDTH KATAKANA LETTER WO
"ァ<>ァ;" // to HALFWIDTH KATAKANA LETTER SMALL A
"ィ<>ィ;" // to HALFWIDTH KATAKANA LETTER SMALL I
"ゥ<>ゥ;" // to HALFWIDTH KATAKANA LETTER SMALL U
"ェ<>ェ;" // to HALFWIDTH KATAKANA LETTER SMALL E
"ォ<>ォ;" // to HALFWIDTH KATAKANA LETTER SMALL O
"ャ<>ャ;" // to HALFWIDTH KATAKANA LETTER SMALL YA
"ュ<>ュ;" // to HALFWIDTH KATAKANA LETTER SMALL YU
"ョ<>ョ;" // to HALFWIDTH KATAKANA LETTER SMALL YO
"ッ<>ッ;" // to HALFWIDTH KATAKANA LETTER SMALL TU
"ー<>ー;" // to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
"ア<>ア;" // to HALFWIDTH KATAKANA LETTER A
"イ<>イ;" // to HALFWIDTH KATAKANA LETTER I
"ウ<>ウ;" // to HALFWIDTH KATAKANA LETTER U
"エ<>エ;" // to HALFWIDTH KATAKANA LETTER E
"オ<>オ;" // to HALFWIDTH KATAKANA LETTER O
"カ<>カ;" // to HALFWIDTH KATAKANA LETTER KA
"キ<>キ;" // to HALFWIDTH KATAKANA LETTER KI
"ク<>ク;" // to HALFWIDTH KATAKANA LETTER KU
"ケ<>ケ;" // to HALFWIDTH KATAKANA LETTER KE
"コ<>コ;" // to HALFWIDTH KATAKANA LETTER KO
"サ<>サ;" // to HALFWIDTH KATAKANA LETTER SA
"シ<>シ;" // to HALFWIDTH KATAKANA LETTER SI
"ス<>ス;" // to HALFWIDTH KATAKANA LETTER SU
"セ<>セ;" // to HALFWIDTH KATAKANA LETTER SE
"ソ<>ソ;" // to HALFWIDTH KATAKANA LETTER SO
"タ<>タ;" // to HALFWIDTH KATAKANA LETTER TA
"チ<>チ;" // to HALFWIDTH KATAKANA LETTER TI
"ツ<>ツ;" // to HALFWIDTH KATAKANA LETTER TU
"テ<>テ;" // to HALFWIDTH KATAKANA LETTER TE
"ト<>ト;" // to HALFWIDTH KATAKANA LETTER TO
"ナ<>ナ;" // to HALFWIDTH KATAKANA LETTER NA
"ニ<>ニ;" // to HALFWIDTH KATAKANA LETTER NI
"ヌ<>ヌ;" // to HALFWIDTH KATAKANA LETTER NU
"ネ<>ネ;" // to HALFWIDTH KATAKANA LETTER NE
"<>ノ;" // to HALFWIDTH KATAKANA LETTER NO
"ハ<>ハ;" // to HALFWIDTH KATAKANA LETTER HA
"ヒ<>ヒ;" // to HALFWIDTH KATAKANA LETTER HI
"フ<>フ;" // to HALFWIDTH KATAKANA LETTER HU
"ヘ<>ヘ;" // to HALFWIDTH KATAKANA LETTER HE
"ホ<>ホ;" // to HALFWIDTH KATAKANA LETTER HO
"マ<>マ;" // to HALFWIDTH KATAKANA LETTER MA
"ミ<>ミ;" // to HALFWIDTH KATAKANA LETTER MI
"ム<>ム;" // to HALFWIDTH KATAKANA LETTER MU
"メ<>メ;" // to HALFWIDTH KATAKANA LETTER ME
"モ<>モ;" // to HALFWIDTH KATAKANA LETTER MO
"ヤ<>ヤ;" // to HALFWIDTH KATAKANA LETTER YA
"ユ<>ユ;" // to HALFWIDTH KATAKANA LETTER YU
"ヨ<>ヨ;" // to HALFWIDTH KATAKANA LETTER YO
"ラ<>ラ;" // to HALFWIDTH KATAKANA LETTER RA
"リ<>リ;" // to HALFWIDTH KATAKANA LETTER RI
"ル<>ル;" // to HALFWIDTH KATAKANA LETTER RU
"レ<>レ;" // to HALFWIDTH KATAKANA LETTER RE
"ロ<>ロ;" // to HALFWIDTH KATAKANA LETTER RO
"ワ<>ワ;" // to HALFWIDTH KATAKANA LETTER WA
"ン<>ン;" // to HALFWIDTH KATAKANA LETTER N
"゙<>゙;" // to HALFWIDTH KATAKANA VOICED SOUND MARK
"゚<>゚;" // to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
"<>;" // to HALFWIDTH HANGUL FILLER
"ᄀ<>ᄀ;" // to HALFWIDTH HANGUL LETTER KIYEOK
"ᄁ<>ᄁ;" // to HALFWIDTH HANGUL LETTER SSANGKIYEOK
"ᆪ<>ᆪ;" // to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
"ᄂ<>ᄂ;" // to HALFWIDTH HANGUL LETTER NIEUN
"ᆬ<>ᆬ;" // to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
"ᆭ<>ᆭ;" // to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
"ᄃ<>ᄃ;" // to HALFWIDTH HANGUL LETTER TIKEUT
"ᄄ<>ᄄ;" // to HALFWIDTH HANGUL LETTER SSANGTIKEUT
"ᄅ<>ᄅ;" // to HALFWIDTH HANGUL LETTER RIEUL
"ᆰ<>ᆰ;" // to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
"ᆱ<>ᆱ;" // to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
"ᆲ<>ᆲ;" // to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
"ᆳ<>ᆳ;" // to HALFWIDTH HANGUL LETTER RIEUL-SIOS
"ᆴ<>ᆴ;" // to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
"ᆵ<>ᆵ;" // to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
"ᄚ<>ᄚ;" // to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
"ᄆ<>ᄆ;" // to HALFWIDTH HANGUL LETTER MIEUM
"ᄇ<>ᄇ;" // to HALFWIDTH HANGUL LETTER PIEUP
"ᄈ<>ᄈ;" // to HALFWIDTH HANGUL LETTER SSANGPIEUP
"ᄡ<>ᄡ;" // to HALFWIDTH HANGUL LETTER PIEUP-SIOS
"ᄉ<>ᄉ;" // to HALFWIDTH HANGUL LETTER SIOS
"ᄊ<>ᄊ;" // to HALFWIDTH HANGUL LETTER SSANGSIOS
"ᄋ<>ᄋ;" // to HALFWIDTH HANGUL LETTER IEUNG
"ᄌ<>ᄌ;" // to HALFWIDTH HANGUL LETTER CIEUC
"ᄍ<>ᄍ;" // to HALFWIDTH HANGUL LETTER SSANGCIEUC
"ᄎ<>ᄎ;" // to HALFWIDTH HANGUL LETTER CHIEUCH
"ᄏ<>ᄏ;" // to HALFWIDTH HANGUL LETTER KHIEUKH
"ᄐ<>ᄐ;" // to HALFWIDTH HANGUL LETTER THIEUTH
"ᄑ<>ᄑ;" // to HALFWIDTH HANGUL LETTER PHIEUPH
"ᄒ<>ᄒ;" // to HALFWIDTH HANGUL LETTER HIEUH
"ᅡ<>ᅡ;" // to HALFWIDTH HANGUL LETTER A
"ᅢ<>ᅢ;" // to HALFWIDTH HANGUL LETTER AE
"ᅣ<>ᅣ;" // to HALFWIDTH HANGUL LETTER YA
"ᅤ<>ᅤ;" // to HALFWIDTH HANGUL LETTER YAE
"ᅥ<>ᅥ;" // to HALFWIDTH HANGUL LETTER EO
"ᅦ<>ᅦ;" // to HALFWIDTH HANGUL LETTER E
"ᅧ<>ᅧ;" // to HALFWIDTH HANGUL LETTER YEO
"ᅨ<>ᅨ;" // to HALFWIDTH HANGUL LETTER YE
"ᅩ<>ᅩ;" // to HALFWIDTH HANGUL LETTER O
"ᅪ<>ᅪ;" // to HALFWIDTH HANGUL LETTER WA
"ᅫ<>ᅫ;" // to HALFWIDTH HANGUL LETTER WAE
"ᅬ<>ᅬ;" // to HALFWIDTH HANGUL LETTER OE
"ᅭ<>ᅭ;" // to HALFWIDTH HANGUL LETTER YO
"ᅮ<>ᅮ;" // to HALFWIDTH HANGUL LETTER U
"ᅯ<>ᅯ;" // to HALFWIDTH HANGUL LETTER WEO
"ᅰ<>ᅰ;" // to HALFWIDTH HANGUL LETTER WE
"ᅱ<>ᅱ;" // to HALFWIDTH HANGUL LETTER WI
"ᅲ<>ᅲ;" // to HALFWIDTH HANGUL LETTER YU
"ᅳ<>ᅳ;" // to HALFWIDTH HANGUL LETTER EU
"ᅴ<>ᅴ;" // to HALFWIDTH HANGUL LETTER YI
"ᅵ<>ᅵ;" // to HALFWIDTH HANGUL LETTER I
"¢<>'¢';" // from FULLWIDTH CENT SIGN
"£<>'£';" // from FULLWIDTH POUND SIGN
"¬<>'¬';" // from FULLWIDTH NOT SIGN
" ̄<>'¯';" // from FULLWIDTH MACRON
"' '<>' ';" // ideographic space (place this after MACRON)
"¦<>'¦';" // from FULLWIDTH BROKEN BAR
"¥<>'¥';" // from FULLWIDTH YEN SIGN
"₩<>₩;" // from FULLWIDTH WON SIGN
"│<>;" // to HALFWIDTH FORMS LIGHT VERTICAL
"'←'<>'←';" // to HALFWIDTH LEFTWARDS ARROW
"↑<>↑;" // to HALFWIDTH UPWARDS ARROW
"'→'<>'→';" // to HALFWIDTH RIGHTWARDS ARROW
"↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW
"■<>■;" // to HALFWIDTH BLACK SQUARE
"○<>○;" // to HALFWIDTH WHITE CIRCLE
// eof
}
}

View file

@ -1,361 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Greek_Latin.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Greek_Latin
t_Grek_Latn {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Rules are predicated on running NFD first, and NFC afterwards
// :: [\\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
// MINIMAL FILTER GENERATED FOR: Greek-Latin
":: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;"
":: NFD (NFC) ;"
// TEST CASES
// Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
// ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
// ᾳ ῃ ῳ ὃ ὄ
// ὠς ὡς ὢς ὣς
// Ὠς Ὡς Ὢς Ὣς
// ὨΣ ὩΣ ὪΣ ὫΣ
// Ạ, ạ, Ẹ, ẹ, Ọ, ọ
// Useful variables
"$lower = [[:latin:][:greek:] & [:Ll:]];"
"$glower = [[:greek:] & [:Ll:]];"
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
"$accent = [:M:] ;"
// NOTE: restrict to just the Greek & Latin accents that we care about
// TODO: broaden out once interation is fixed
"$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;"
"$macron = \u0304 ;"
"$ddot = \u0308 ;"
"$ddotmac = [$ddot$macron];"
"$lcgvowel = [αεηιουω] ;"
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
"$gvowel = [$lcgvowel $ucgvowel] ;"
"$lcgvowelC = [$lcgvowel $accent] ;"
"$evowel = [aeiouyAEIOUY];"
"$evowel2 = [iuyIUY];"
"$vowel = [ $evowel $gvowel] ;"
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
"$egammaLike = [GKXCgkxc] ;"
"$smooth = ̓ ;"
"$rough = ̔ ;"
"$iotasub = ͅ ;"
"$evowel_i = [$evowel-[iI]] ;"
"$evowel2_i = [uyUY];"
"$underbar = \u0331;"
"$afterLetter = [:L:] [[:M:]\\\']* ;"
"$beforeLetter = [[:M:]\\\']* [:L:] ;"
"$beforeLower = $accent * $lower ;"
"$notLetter = [^[:L:][:M:]] ;"
"$under = ̱;"
// Fix punctuation
// preserve original
"\\\: <> \\\: $under ;"
"\\\? <> \\\? $under ;"
"\\\; <> \\\? ;"
"· <> \\\: ;"
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
"\u0342 <> \u0302 ;"
// IOTA: convert iota subscript to iota
// first make previous alpha long!
"$accent_minus = [[$accent]-[$iotasub$macron]];"
"Α } $accent_minus * $iotasub > | Α $macron ;"
"α } $accent_minus * $iotasub > | α $macron ;"
// now convert to uppercase if after uppercase, ow to lowercase
"$upper $accent * { $iotasub > I ;"
"$iotasub > i ;"
"| $1 $iotasub < ($evowel $macron $accentMinus *) i ;"
"| $1 $iotasub < ($evowel $macron $accentMinus *) I ;"
// BREATHING
// Convert rough breathing to h, and move before letters.
// Make A ` x = > H a x
"Α ($macron?) $rough } $beforeLower > H | α $1;"
"Ε $rough } $beforeLower > H | ε;"
"Η $rough } $beforeLower > H | η ;"
"Ι ($ddot?) $rough } $beforeLower > H | ι $1;"
"Ο $rough } $beforeLower > H | ο ;"
"Υ $rough } $beforeLower > H | υ ;"
"Ω ($ddot?) $rough } $beforeLower > H | ω $1;"
// Make A x ` = > H a x
"Α ($glower $macron?) $rough > H | α $1 ;"
"Ε ($glower) $rough > H | ε $1 ;"
"Η ($glower) $rough > H | η $1 ;"
"Ι ($glower $ddot?) $rough > H | ι $1 ;"
"Ο ($glower) $rough > H | ο $1 ;"
"Υ ($glower) $rough > H | υ $1 ;"
"Ω ($glower $ddot?) $rough > H | ω $1 ;"
//Otherwise, make x ` into h x and X ` into H X
"($lcgvowel + $ddotmac? ) $rough > h | $1 ;"
"($gvowel + $ddotmac? ) $rough > H | $1 ;"
// Go backwards with H
"| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;"
"| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;"
"| $1 $rough < h ($evowel $macron? $ddot?) ;"
"| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;"
"| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;"
"| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;"
// titlecase, have to fix individually
// in the future, we should add &uppercase() to make this easier
"| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;"
"| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;"
"| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;"
"| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;"
"| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;"
"| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;"
"| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;"
"| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;"
"| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;"
"| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;"
"| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;"
"| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;"
"| A $1 $rough < H a ($macron? $ddot? ) ;"
"| E $1 $rough < H e ($macron? $ddot? ) ;"
"| I $1 $rough < H i ($macron? $ddot? ) ;"
"| O $1 $rough < H o ($macron? $ddot? ) ;"
"| U $1 $rough < H u ($macron? $ddot? ) ;"
"| Y $1 $rough < H y ($macron? $ddot? ) ;"
// Now do smooth
//delete smooth breathing for Latin
"$smooth > ;"
// insert in Greek
// the assumption is that all Marks are on letters.
"| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;"
"| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;"
"| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;"
// TODO: preserve smooth/rough breathing if not
// on initial vowel sequence
// need to have these up here so the rules don't mask
// remove now superfluous macron when returning
"Α < A $macron ;"
"α < a $macron ;"
"η <> e $macron ;"
"Η <> E $macron ;"
"φ <> ph ;"
"Ψ } $beforeLower <> Ps ;"
"Ψ <> PS ;"
"Φ } $beforeLower <> Ph ;"
"Φ <> PH ;"
"ψ <> ps ;"
"ω <> o $macron ;"
"Ω <> O $macron;"
// NORMAL
"α <> a ;"
"Α <> A ;"
"β <> b ;"
"Β <> B ;"
"γ } $gammaLike <> n } $egammaLike ;"
"γ <> g ;"
"Γ } $gammaLike <> N } $egammaLike ;"
"Γ <> G ;"
"δ <> d ;"
"Δ <> D ;"
"ε <> e ;"
"Ε <> E ;"
"ζ <> z ;"
"Ζ <> Z ;"
"θ <> th ;"
"Θ } $beforeLower <> Th ;"
"Θ <> TH ;"
"ι <> i ;"
"Ι <> I ;"
"κ <> k ;"
"Κ <> K ;"
"λ <> l ;"
"Λ <> L ;"
"μ <> m ;"
"Μ <> M ;"
"ν } $gammaLike > n\\\' ;"
"ν <> n ;"
"Ν } $gammaLike <> N\\\' ;"
"Ν <> N ;"
"ξ <> x ;"
"Ξ <> X ;"
"ο <> o ;"
"Ο <> O ;"
"π <> p ;"
"Π <> P ;"
"ρ $rough <> rh;"
"Ρ $rough } $beforeLower <> Rh ;"
"Ρ $rough <> RH ;"
"ρ <> r ;"
"Ρ <> R ;"
// insert separator before things that turn into s
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
// special S variants
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
// underbar means exception
// before a letter, initial
"ς } $beforeLetter <> s $underbar } $beforeLetter;"
"σ } $beforeLetter <> s } $beforeLetter;"
// otherwise, after a letter = final
"$afterLetter { σ <> $afterLetter { s $underbar;"
"$afterLetter { ς <> $afterLetter { s ;"
// otherwise (isolated) = initial
"ς <> s $underbar;"
"σ <> s ;"
// [Pp] { Σ <> \\\'S ;
"Σ <> S ;"
"τ <> t ;"
"Τ <> T ;"
"$vowel {υ } <> u ;"
"υ <> y ;"
"$vowel { Υ <> U ;"
"Υ <> Y ;"
"χ <> ch ;"
"Χ } $beforeLower <> Ch ;"
"Χ <> CH ;"
// Completeness for ASCII
"$ignore = [[:Mark:]''] * ;"
"| k < c ;"
"| ph < f ;"
"| i < j ;"
"| k < q ;"
"| b < v } $vowel ;"
"| b < w } $vowel;"
"| u < v ;"
"| u < w;"
"| K < C ;"
"| Ph < F ;"
"| I < J ;"
"| K < Q ;"
"| B < V } $vowel ;"
"| B < W } $vowel ;"
"| U < V ;"
"| U < W ;"
"$rough } $ignore [:UppercaseLetter:] > H ;"
"$ignore [:UppercaseLetter:] { $rough > H ;"
"$rough < H ;"
"$rough <> h ;"
// Completeness for Greek
"ϐ > | β ;"
"ϑ > | θ ;"
"ϒ > | Υ ;"
"ϕ > | φ ;"
"ϖ > | π ;"
"ϰ > | κ ;"
"ϱ > | ρ ;"
"ϲ > | σ ;"
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
"ϳ > j ;"
"ϴ > | Θ ;"
"ϵ > | ε ;"
"µ > | μ ;"
"ͺ > i;"
// delete any trailing ' marks used for roundtripping
"< [Ππ] { \\\' } [Ss] ;"
"< [Νν] { \\\' } $egammaLike ;"
"::NFC (NFD) ;"
// ([\\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
// ([\\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ;
// MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
":: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;"
}
}

View file

@ -1,268 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Greek_Latin_UNGEGN.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Greek_Latin_UNGEGN
t_Grek_Latn_UNGEGN {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// For modern Greek, based on UNGEGN rules.
// Rules are predicated on running NFD first, and NFC afterwards
// MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
// WARNING: need to add accents to both filters ###
// :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
":: [[[:Greek:][:Mn:][:Me:]] [\\\:-;?\u00B7\u037E\u0387]] ;"
"::NFD (NFC) ;"
// Useful variables
"$lower = [[:latin:][:greek:] & [:Ll:]] ;"
"$upper = [[:latin:][:greek:] & [:Lu:]] ;"
"$accent = [[:Mn:][:Me:]] ;"
"$macron = ̄ ;"
"$ddot = ̈ ;"
"$lcgvowel = [αεηιουω] ;"
"$ucgvowel = [ΑΕΗΙΟΥΩ] ;"
"$gvowel = [$lcgvowel $ucgvowel] ;"
"$lcgvowelC = [$lcgvowel $accent] ;"
"$evowel = [aeiouyAEIOUY];"
"$vowel = [ $evowel $gvowel] ;"
"$beforeLower = $accent * $lower ;"
"$gammaLike = [ΓΚΞΧγκξχϰ] ;"
"$egammaLike = [GKXCgkxc] ;"
"$smooth = ̓ ;"
"$rough = ̔ ;"
"$iotasub = ͅ ;"
"$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;"
"$under = ̱;"
"$caron = ̌;"
"$afterLetter = [:L:] [\\\'$accent]* ;"
"$beforeLetter = [\\\'$accent]* [:L:] ;"
// Fix punctuation
// preserve orginal
"\\\: <> \\\: $under ;"
"\\\? <> \\\? $under ;"
"\\\; <> \\\? ;"
"· <> \\\: ;"
// Fix any ancient characters that creep in
"͂ > ́ ;"
"̂ > ́ ;"
"̀ > ́ ;"
"$smooth > ;"
"$rough > ;"
"$iotasub > ;"
"ͺ > ;"
// need to have these up here so the rules don't mask
"η <> i $under ;"
"Η <> I $under ;"
"Ψ } $beforeLower <> Ps ;"
"Ψ <> PS ;"
"ψ <> ps ;"
"ω <> o $under ;"
"Ω <> O $under;"
// at begining or end of word, convert mp to b
"[^[:L:]$accent] { μπ > b ;"
"μπ } [^[:L:]$accent] > b ;"
"[^[:L:]$accent] { [Μμ][Ππ] > B ;"
"[Μμ][Ππ] } [^[:L:]$accent] > B ;"
"μπ < b ;"
"Μπ < B } $beforeLower ;"
"ΜΠ < B ;"
// handle diphthongs ending with upsilon
"ου <> ou ;"
"ΟΥ <> OU ;"
"Ου <> Ou ;"
"οΥ <> oU ;"
"$fmaker = [aeiAEI] $under ? ;"
"$shiftForwardVowels = [[:Mn:]-[\u0308]];" // note: a diaeresis keeps the items separate
"$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;"
"υ $1 < ( $shiftForwardVowels )* v $under ;"
"$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;"
"υ $1 < ( $shiftForwardVowels )* f $under ;"
"$fmaker { Υ } $softener <> V $under ;"
"$fmaker { Υ <> U $under ;"
"υ <> y ;"
"Υ <> Y ;"
// NORMAL
"α <> a ;"
"Α <> A ;"
"β <> v ;"
"Β <> V ;"
"γ } $gammaLike <> n } $egammaLike ;"
"γ <> g ;"
"Γ } $gammaLike <> N } $egammaLike ;"
"Γ <> G ;"
"δ <> d ;"
"Δ <> D ;"
"ε <> e ;"
"Ε <> E ;"
"ζ <> z ;"
"Ζ <> Z ;"
"θ <> th ;"
"Θ } $beforeLower <> Th ;"
"Θ <> TH ;"
"ι <> i ;"
"Ι <> I ;"
"κ <> k ;"
"Κ <> K ;"
"λ <> l ;"
"Λ <> L ;"
"μ <> m ;"
"Μ <> M ;"
"ν } $gammaLike > n\\\' ;"
"ν <> n ;"
"Ν } $gammaLike <> N\\\' ;"
"Ν <> N ;"
"ξ <> x ;"
"Ξ <> X ;"
"ο <> o ;"
"Ο <> O ;"
"π <> p ;"
"Π <> P ;"
"ρ <> r ;"
"Ρ <> R ;"
// insert separator before things that turn into s
"[Pp] { } [ςσΣϷϸϺϻ] > \\\' ;"
// special S variants
"Ϸ <> Š ;" // Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
"ϸ <> š ;" //ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
"Ϻ <> Ŝ ;" // Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
"ϻ <> ŝ ;" // ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
// Caron means exception
// before a letter, initial
"ς } $beforeLetter <> s $under } $beforeLetter;"
"σ } $beforeLetter <> s } $beforeLetter;"
// otherwise, after a letter = final
"$afterLetter { σ <> $afterLetter { s $under;"
"$afterLetter { ς <> $afterLetter { s ;"
// otherwise (isolated) = initial
"ς <> s $under;"
"σ <> s ;"
// [Pp] { Σ <> \\\'S ;
"Σ <> S ;"
"τ <> t ;"
"Τ <> T ;"
"φ <> f ;"
"Φ <> F ;"
"χ <> ch ;"
"Χ } $beforeLower <> Ch ;"
"Χ <> CH ;"
// Completeness for ASCII
// $ignore = [[:Mark:]''] * ;
"| ch < h ;"
"| k < c ;"
"| i < j ;"
"| k < q ;"
"| b < u } $vowel ;"
"| b < w } $vowel ;"
"| y < u ;"
"| y < w ;"
"| Ch < H ;"
"| K < C ;"
"| I < J ;"
"| K < Q ;"
"| B < W } $vowel ;"
"| B < U } $vowel ;"
"| Y < W ;"
"| Y < U ;"
// Completeness for Greek
"ϐ > | β ;"
"ϑ > | θ ;"
"ϒ > | Υ ;"
"ϕ > | φ ;"
"ϖ > | π ;"
"ϰ > | κ ;"
"ϱ > | ρ ;"
"ϲ > | σ ;"
"Ϲ > | Σ;" //U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
"ϳ > j ;"
"ϴ > | Θ ;"
"ϵ > | ε ;"
"µ > | μ ;"
// delete any trailing ' marks used for roundtripping
"< [Ππ] { \\\' } [Ss] ;"
"< [Νν] { \\\' } $egammaLike ;"
"::NFC (NFD) ;"
// MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
":: ([[[:Latin:][:Mn:][:Me:]] ['\\\:?]]) ;"
}
}

View file

@ -1,107 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Gujarati_InterIndic.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Gujarati_InterIndic
t_Gujr_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Gujarati-InterIndic
//:: NFD (NFC) ;
"\u0a81>\ue001;" // SIGN CANDRABINDU
"\u0a82>\ue002;" // SIGN ANUSVARA
"\u0a83>\ue003;" // SIGN VISARGA
"\u0a85>\ue005;" // LETTER A
"\u0a86>\ue006;" // LETTER AA
"\u0a87>\ue007;" // LETTER I
"\u0a88>\ue008;" // LETTER II
"\u0a89>\ue009;" // LETTER U
"\u0a8a>\ue00a;" // LETTER UU
"\u0a8b>\ue00b;" // LETTER VOCALIC R
"\u0a8c>\ue00c;" // LETTER VOCALLIC L
"\u0a8d>\ue00d;" // VOWEL CANDRA E
"\u0a8f>\ue00f;" // LETTER E
"\u0a90>\ue010;" // LETTER AI
"\u0a91>\ue011;" // VOWEL CANDRA O
"\u0a93>\ue013;" // LETTER O
"\u0a94>\ue014;" // LETTER AU
"\u0a95>\ue015;" // LETTER KA
"\u0a96>\ue016;" // LETTER KHA
"\u0a97>\ue017;" // LETTER GA
"\u0a98>\ue018;" // LETTER GHA
"\u0a99>\ue019;" // LETTER NGA
"\u0a9a>\ue01a;" // LETTER CA
"\u0a9b>\ue01b;" // LETTER CHA
"\u0a9c>\ue01c;" // LETTER JA
"\u0a9d>\ue01d;" // LETTER JHA
"\u0a9e>\ue01e;" // LETTER NYA
"\u0a9f>\ue01f;" // LETTER TTA
"\u0aa0>\ue020;" // LETTER TTHA
"\u0aa1>\ue021;" // LETTER DDA
"\u0aa2>\ue022;" // LETTER DDHA
"\u0aa3>\ue023;" // LETTER NNA
"\u0aa4>\ue024;" // LETTER TA
"\u0aa5>\ue025;" // LETTER THA
"\u0aa6>\ue026;" // LETTER DA
"\u0aa7>\ue027;" // LETTER DHA
"\u0aa8>\ue028;" // LETTER NA
"\u0aaa>\ue02a;" // LETTER PA
"\u0aab>\ue02b;" // LETTER PHA
"\u0aac>\ue02c;" // LETTER BA
"\u0aad>\ue02d;" // LETTER BHA
"\u0aae>\ue02e;" // LETTER MA
"\u0aaf>\ue02f;" // LETTER YA
"\u0ab0>\ue030;" // LETTER RA
"\u0ab2>\ue032;" // LETTER LA
"\u0ab3>\ue033;" // LETTER LLA
"\u0ab5>\ue035;" // LETTER VA
"\u0ab6>\ue036;" // LETTER SHA
"\u0ab7>\ue037;" // LETTER SSA
"\u0ab8>\ue038;" // LETTER SA
"\u0ab9>\ue039;" // LETTER HA
"\u0abc>\ue03c;" // SIGN NUKTA
"\u0abd>\ue03d;" // SIGN AVAGRAHA
"\u0abe>\ue03e;" // VOWEL SIGN AA
"\u0abf>\ue03f;" // VOWEL SIGN I
"\u0ac0>\ue040;" // VOWEL SIGN II
"\u0ac1>\ue041;" // VOWEL SIGN U
"\u0ac2>\ue042;" // VOWEL SIGN UU
"\u0ac3>\ue043;" // VOWEL SIGN VOCALIC R
"\u0ac4>\ue044;" // VOWEL SIGN VOCALIC RR
"\u0ac5>\ue045;" // VOWEL SIGN CANDRA E
"\u0ac7>\ue047;" // VOWEL SIGN E
"\u0ac8>\ue048;" // VOWEL SIGN AI
"\u0ac9>\ue049;" // VOWEL SIGN CANDRA O
"\u0acb>\ue04b;" // VOWEL SIGN O
"\u0acc>\ue04c;" // VOWEL SIGN AU
"\u0acd>\ue04d;" // SIGN VIRAMA
"\u0ad0>\ue050;" // OM
"\u0ae0>\ue060;" // LETTER VOCALIC RR
"\u0ae1>\ue061;" // LETTER VOCALIC LL
"\u0ae6>\ue066;" // DIGIT ZERO
"\u0ae7>\ue067;" // DIGIT ONE
"\u0ae8>\ue068;" // DIGIT TWO
"\u0ae9>\ue069;" // DIGIT THREE
"\u0aea>\ue06a;" // DIGIT FOUR
"\u0aeb>\ue06b;" // DIGIT FIVE
"\u0aec>\ue06c;" // DIGIT SIX
"\u0aed>\ue06d;" // DIGIT SEVEN
"\u0aee>\ue06e;" // DIGIT EIGHT
"\u0aef>\ue06f;" // DIGIT NINE
"\u0964>\ue064;" // DANDA
"\u0965>\ue065;" // DOUBLE DANDA
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,111 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Gurmukhi_InterIndic.txt
// Date: Tue May 18 17:24:48 2004
//--------------------------------------------------------------------
// Gurmukhi_InterIndic
t_Guru_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Gurmukhi-InterIndic
//:: NFD (NFC) ;
//\u0A16\u0A3C>\uE059; # LETTER KHHA
//\u0A17\u0A3C>\uE05A; # LETTER GHHA
//\u0A1C\u0A3C>\uE05B; # LETTER ZA
//\u0A38\u0A3C>\uE036; # LETTER SHA
//\u0A32\u0A3C>\uE033; # LETTER LLA
//\u0A2B\u0A3C>\uE05E; # LETTER FA
"\u0A01>\uE001;" // SIGN CHANDRABINDU
"\u0A02>\uE002;" // SIGN BINDI
"\u0A05>\uE005;" // LETTER A
"\u0A06>\uE006;" // LETTER AA
"\u0A07>\uE007;" // LETTER I
"\u0A08>\uE008;" // LETTER II
"\u0A09>\uE009;" // LETTER U
"\u0A0A>\uE00A;" // LETTER UU
"\u0A0C>\uE032;" // FALLBACK : VOCALLIC LA
"\u0A0F>\uE00F;" // LETTER EE
"\u0A10>\uE010;" // LETTER AI
"\u0A13>\uE013;" // LETTER OO
"\u0A14>\uE014;" // LETTER AU
"\u0A15>\uE015;" // LETTER KA
"\u0A16>\uE016;" // LETTER KHA
"\u0A17>\uE017;" // LETTER GA
"\u0A18>\uE018;" // LETTER GHA
"\u0A19>\uE019;" // LETTER NGA
"\u0A1A>\uE01A;" // LETTER CA
"\u0A1B>\uE01B;" // LETTER CHA
"\u0A1C>\uE01C;" // LETTER JA
"\u0A1D>\uE01D;" // LETTER JHA
"\u0A1E>\uE01E;" // LETTER NYA
"\u0A1F>\uE01F;" // LETTER TTA
"\u0A20>\uE020;" // LETTER TTHA
"\u0A21>\uE021;" // LETTER DDA
"\u0A22>\uE022;" // LETTER DDHA
"\u0A23>\uE023;" // LETTER NNA
"\u0A24>\uE024;" // LETTER TA
"\u0A25>\uE025;" // LETTER THA
"\u0A26>\uE026;" // LETTER DA
"\u0A27>\uE027;" // LETTER DHA
"\u0A28>\uE028;" // LETTER NA
"\u0A2A>\uE02A;" // LETTER PA
"\u0A2B>\uE02B;" // LETTER PHA
"\u0A2C>\uE02C;" // LETTER BA
"\u0A2D>\uE02D;" // LETTER BHA
"\u0A2E>\uE02E;" // LETTER MA
"\u0A2F>\uE02F;" // LETTER YA
"\u0A30>\uE030;" // LETTER RA
"\u0A32>\uE032;" // LETTER LA
"\u0a33>\uE033;" // FALLBACK
"\u0A35>\uE035;" // LETTER VA
"\u0a36>\ue036;"
"\u0A38\\\0a3c>\ue036;" // FALLBACK
"\u0A38>\uE038;" // LETTER SA
"\u0A39>\uE039;" // LETTER HA
"\u0A3C>\uE03C;" // SIGN NUKTA
"\u0A3E>\uE03E;" // VOWEL SIGN AA
"\u0A3F>\uE03F;" // VOWEL SIGN I
"\u0A40>\uE040;" // VOWEL SIGN II
"\u0A41>\uE041;" // VOWEL SIGN U
"\u0A42>\uE042;" // VOWEL SIGN UU
"\u0A47>\uE047;" // VOWEL SIGN EE
"\u0A48>\uE048;" // VOWEL SIGN AI
"\u0A4B>\uE04B;" // VOWEL SIGN OO
"\u0A4C>\uE04C;" // VOWEL SIGN AU
"\u0A4D>\uE04D;" // SIGN VIRAMA
"\u0A5C>\uE05C;" // LETTER RRA
"\u0A66>\uE066;" // DIGIT ZERO
"\u0A67>\uE067;" // DIGIT ONE
"\u0A68>\uE068;" // DIGIT TWO
"\u0A69>\uE069;" // DIGIT THREE
"\u0A6A>\uE06A;" // DIGIT FOUR
"\u0A6B>\uE06B;" // DIGIT FIVE
"\u0A6C>\uE06C;" // DIGIT SIX
"\u0A6D>\uE06D;" // DIGIT SEVEN
"\u0A6E>\uE06E;" // DIGIT EIGHT
"\u0A6F>\uE06F;" // DIGIT NINE
"\u0A70>\uE07C;" // TIPPI
"\u0A71>\uE07D;" // ADDAK
"\u0A72>\uE07E;" // IRI
"\u0A73>\uE07F;" // URA
"\u0A74>\uE080;" // EK ONKAR
"\u0964>\ue064;" // DANDA
"\u0965>\ue065;" // DOUBLE DANDA
// :: NFC (NFD) ;
// eof
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,39 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Han_Spacedhan.txt
// Date: Fri May 28 17:07:31 2004
//--------------------------------------------------------------------
// Han_Spacedhan
t_Hani_SpHan {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Only intended for internal use
":: fullwidth-halfwidth;"
"。 > '.';"
"$terminalPunct = [\\\.\\\,\\\:\\\;\\\?\\\!.,:?!。、;[:Pe:][:Pf:]];"
"$initialPunct = [:Ps:][:Pi:];"
// add space between any Han or terminal punctuation and letters, and
// between letters and Han or initial punct
"[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;"
"[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;"
// remove spacing between ideographs and other letters
"< [:Ideographic:] { ' ' } [:Letter:] ;"
"< [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;"
}
}

View file

@ -1,124 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Hebrew_Latin.txt
// Date: Fri May 28 17:07:31 2004
//--------------------------------------------------------------------
// Hebrew_Latin
t_Hebr_Latn {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Transliteration table for Hebrew
// Based on the UNGEGN table at:
// http://www.eki.ee/wgrs/rom1_he.pdf
//
// Exceptions:
// - Accents are added to disambiguate letters
// - Combinations of dagesh, shin/sin dot that produce different
// letters are not yet encoded.
//
// To test, open:
// http://oss.software.ibm.com/cgi-bin/icu/tr
// Click Edit, paste in this file, Save As hebrew-latin/XXX
// (where XXX is a username)
// Now go back to the main window, and try it out.
// Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
// Paste in hebrew text in Input, and hit Transliterate.
//
// For more information, see"
// http://oss.software.ibm.com/icu/userguide/Transliteration.html
":: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;"
":: nfkd (nfc) ;"
"$letterAfter = [:M:]* [:L:] ;"
// move longer items here to avoid masking
"ח <> ẖ ;"
"צ <> ẕ } $letterAfter;"
"ץ <> ẕ ;"
"ש <> ş ;"
"ת <> ţ ;"
"א <> ʼ ;"
"ב <> b ;"
"ג <> g ;"
"ד <> d ;"
"ה <> h ;"
"ו <> w ;"
"ז <> z ;"
"ט <> t ;"
"י <> y ;"
"כ <> k } $letterAfter;"
"ך <> k ;"
"ל <> l ;"
"מ <> m } $letterAfter;"
"ם <> m ;"
"נ <> n } $letterAfter;"
"ן <> n ;"
"ס <> s ;"
"ע <> ʻ ;"
"פ <> p } $letterAfter;"
"ף <> p ;"
"ק <> q ;"
"ר <> r ;"
"װ > | וו;" // HEBREW LIGATURE YIDDISH DOUBLE VAV
"ױ > | וי;" // HEBREW LIGATURE YIDDISH VAV YOD
"ײ > | יי ;" // HEBREW LIGATURE YIDDISH DOUBLE YOD
"ּ <> ̇ ;" // dagesh just goes to overdot for now
"ׁ <> ̌ ;" // shin dot -> sh
"ׂ <> ̂ ;" // sin dot -> s
// points
"$above = [^[:ccc=0:][:ccc=230:]]*;"
"‎ֲ‎ > à ;"
"‎ֲ‎ $1< a ($above) ̀;"
"‎ָ‎ > á ;"
"‎ָ‎ $1 < a ($above) ́;"
"‎ֱ‎ > è ;"
"‎ֱ‎ $1 < e ($above) ̀;"
"‎ֵ‎ > é ;"
"‎ֵ‎ $1 < e ($above) ́;"
"‎ְ‎ > e ̆ ;"
"‎ְ‎ $1 < e ($above) ̆;"
"‎ֹ‎ > ò ;"
"‎ֹ‎ $1 < o ($above) ̀;"
"ִ <> i ;"
"ֻ <> u ;"
"ַ <> a ;"
"ֶ <> e ;"
"ֳ <> o ;"
"\u05BF <> ̄ ;"
// fallbacks
"ק < c ;"
"פ < f } $letterAfter;"
"ף < f ;"
"ז < j ;"
"ו < v ;"
"כס < x ;"
":: (lower);"
":: nfc (nfd) ;"
":: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);"
}
}

View file

@ -1,223 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Hiragana_Katakana.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Hiragana_Katakana
t_Hira_Kana {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// note: a global filter is more efficient, but MUST include all source chars
":: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;"
":: NFKC ();"
// Hiragana-Katakana
// This is largely a one-to-one mapping, but it has a
// few kinks:
// 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
// Hiragana equivalents. We use Hiragana wa/wi/we/wo
// (308F-3092) with a voicing mark (3099), which is
// semantically equivalent. However, this is a non-
// roundtripping transformation.
// 2. The Katakana small ka/ke (30F5,30F6) have no
// Hiragana equiavlents. We convert them to normal
// Hiragana ka/ke (304B,3051). This is a one-way
// information-losing transformation and precludes
// round-tripping of 30F5 and 30F6.
// 3. The combining marks 3099-309C are in the Hiragana
// block, but they apply to Katakana as well, so we
// leave them untouched.
// 4. The Katakana prolonged sound mark 30FC doubles the
// preceding vowel. This is a one-way information-
// losing transformation from Katakana to Hiragana.
// 5. The Katakana middle dot separates words in foreign
// expressions; we leave this unmodified.
// The above points preclude successful round-trip
// transformations of arbitrary input text. However,
// they provide naturalistic results that should conform
// to user expectations.
// Combining equivalents va/vi/ve/vo
"わ゙ <> ヷ;"
"ゐ゙ <> ヸ;"
"ゑ゙ <> ヹ;"
"を゙ <> ヺ;"
// One-to-one mappings, main block
// 3041:3094 <> 30A1:30F4
// 309D,E <> 30FD,E
"ぁ <> ァ;"
"あ <> ア;"
"ぃ <> ィ;"
"い <> イ;"
"ぅ <> ゥ;"
"う <> ウ;"
"ぇ <> ェ;"
"え <> エ;"
"ぉ <> ォ;"
"お <> オ;"
"か <> カ;"
"が <> ガ;"
"き <> キ;"
"ぎ <> ギ;"
"く <> ク;"
"ぐ <> グ;"
"け <> ケ;"
"げ <> ゲ;"
"こ <> コ;"
"ご <> ゴ;"
"さ <> サ;"
"ざ <> ザ;"
"し <> シ;"
"じ <> ジ;"
"す <> ス;"
"ず <> ズ;"
"せ <> セ;"
"ぜ <> ゼ;"
"そ <> ソ;"
"ぞ <> ゾ;"
"た <> タ;"
"だ <> ダ;"
"ち <> チ;"
"ぢ <> ヂ;"
"っ <> ッ;"
"つ <> ツ;"
"づ <> ヅ;"
"て <> テ;"
"で <> デ;"
"と <> ト;"
"ど <> ド;"
"な <> ナ;"
"に <> ニ;"
"ぬ <> ヌ;"
"ね <> ネ;"
"の <> ;"
"は <> ハ;"
"ば <> バ;"
"ぱ <> パ;"
"ひ <> ヒ;"
"び <> ビ;"
"ぴ <> ピ;"
"ふ <> フ;"
"ぶ <> ブ;"
"ぷ <> プ;"
"へ <> ヘ;"
"べ <> ベ;"
"ぺ <> ペ;"
"ほ <> ホ;"
"ぼ <> ボ;"
"ぽ <> ポ;"
"ま <> マ;"
"み <> ミ;"
"む <> ム;"
"め <> メ;"
"も <> モ;"
"ゃ <> ャ;"
"や <> ヤ;"
"ゅ <> ュ;"
"ゆ <> ユ;"
"ょ <> ョ;"
"よ <> ヨ;"
"ら <> ラ;"
"り <> リ;"
"る <> ル;"
"れ <> レ;"
"ろ <> ロ;"
"ゎ <> ヮ;"
"わ <> ワ;"
"ゐ <> ヰ;"
"ゑ <> ヱ;"
"を <> ヲ;"
"ん <> ン;"
"ゔ <> ヴ;"
"ゝ <> ヽ;"
"ゞ <> ヾ;"
// One-way Katakana-Hiragana xform of small K ka/ke to
// normal H ka/ke.
"か < ヵ;"
"け < ヶ;"
// Katakana followed by a prolonged sound mark 30FC has
// its final vowel doubled. This is a Katakana-Hiragana
// one-way information-losing transformation. We
// include the small Katakana (e.g., small A 3041) and
// do not distinguish them from their large
// counterparts. It doesn't make sense to double a
// small counterpart vowel as a small Hiragana vowel, so
// we don't do so. In natural text this should never
// occur anyway. If a 30FC is seen without a preceding
// vowel sound (e.g., after n 30F3) we do not change it.
//## $long = ー;
// The following categories are Hiragana, not Katakana
// as might be expected, since by the time we get to the
// 30FC, the preceding character will have already been
// transformed to Hiragana.
// {The following mechanically generated from the
// Unicode 3.0 data:}
"$xa = ["
"ぁ あ か が さ ざ"
"た だ な は ば ぱ"
"ま ゃ や ら ゎ わ"
"];"
"$xi = ["
"ぃ い き ぎ し じ"
"ち ぢ に ひ び ぴ"
"み り ゐ"
"];"
"$xu = ["
"ぅ う く ぐ す ず"
"っ つ づ ぬ ふ ぶ"
"ぷ む ゅ ゆ る ゔ"
"];"
"$xe = ["
"ぇ え け げ せ ぜ"
"て で ね へ べ ぺ"
"め れ ゑ"
"];"
"$xo = ["
"ぉ お こ ご そ ぞ"
"と ど の ほ ぼ ぽ"
"も ょ よ ろ を"
"];"
"あ < $xa {ー};"
"い < $xi {ー};"
"う < $xu {ー};"
"え < $xe {ー};"
"お < $xo {ー};"
":: (NFKC) ;"
// note: a global filter is more efficient, but MUST include all source chars!!
":: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);"
// eof
}
}

View file

@ -1,30 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Hiragana_Latin.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Hiragana_Latin
t_Hira_Latn {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
":: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ;"
":: NFD ;"
":: Hiragana-Katakana;"
":: Katakana-Latin;"
":: NFC ;"
":: (Lower) ;"
":: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ;"
}
}

View file

@ -1,163 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Bengali.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Bengali
t_InterIndic_Beng {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Bengali
//:: NFD (NFC) ;
"\uE001>\u0981;" // SIGN CANDRABINDU
"\uE002>\u0982;" // SIGN ANUSVARA
"\uE003>\u0983;" // SIGN VISARGA
"\uE004>\u0985;" // FALLBACK TO LETTER A
"\uE005>\u0985;" // LETTER A
"\uE006>\u0986;" // LETTER AA
"\uE007>\u0987;" // LETTER I
"\uE008>\u0988;" // LETTER II
"\uE009>\u0989;" // LETTER U
"\uE00A>\u098A;" // LETTER UU
"\uE00B>\u098B;" // LETTER VOCALIC R
"\uE00C>\u098C;" // LETTER VOCALIC L
"\uE00D>\u098F;" // FALLBACK
"\uE00E>\u098F;" // FALLBACK
"\uE00F>\u098F;" // LETTER E
"\uE010>\u0990;" // LETTER AI
"\uE011>\u0993;" // FALLBACK
"\uE012>\u0993;" // FALLBACK
"\uE013>\u0993;" // LETTER O
"\uE014>\u0994;" // LETTER AU
"\uE015>\u0995;" // LETTER KA
"\uE016>\u0996;" // LETTER KHA
"\uE017>\u0997;" // LETTER GA
"\uE018>\u0998;" // LETTER GHA
"\uE019>\u0999;" // LETTER NGA
"\uE01A>\u099A;" // LETTER CA
"\uE01B>\u099B;" // LETTER CHA
"\uE01C>\u099C;" // LETTER JA
"\uE01D>\u099D;" // LETTER JHA
"\uE01E>\u099E;" // LETTER NYA
"\uE01F>\u099F;" // LETTER TTA
"\uE020>\u09A0;" // LETTER TTHA
"\uE021>\u09A1;" // LETTER DDA
"\uE022>\u09A2;" // LETTER DDHA
"\uE023>\u09A3;" // LETTER NNA
"\uE024>\u09A4;" // LETTER TA
"\uE025>\u09A5;" // LETTER THA
"\uE026>\u09A6;" // LETTER DA
"\uE027>\u09A7;" // LETTER DHA
"\uE028>\u09A8;" // LETTER NA
"\uE029>\u09A8\u09BC;" // REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
"\uE02A>\u09AA;" // LETTER PA
"\uE02B>\u09AB;" // LETTER PHA
"\uE02C>\u09AC;" // LETTER BA
"\uE02D>\u09AD;" // LETTER BHA
"\uE02E>\u09AE;" // LETTER MA
"\uE02F>\u09AF;" // LETTER YA
"\uE030>\u09B0;" // LETTER RA
"\uE031>\u09B0\u09BC;" // FALLBACK to RA
"\uE032>\u09B2;" // LETTER LA
"\uE033>\u09B2;" // REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
"\uE034>\u09B2;" // REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
"\uE035>\u09AC;" // REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
"\uE036>\u09B6;" // LETTER SHA
"\uE037>\u09B7;" // LETTER SSA
"\uE038>\u09B8;" // LETTER SA
"\uE039>\u09B9;" // LETTER HA
"\uE03C>\u09BC;" // SIGN NUKTA
"\uE03D>\u09bd;" // SIGN AVAGRAHA
"\uE03E>\u09BE;" // VOWEL SIGN AA
"\uE03F>\u09BF;" // VOWEL SIGN I
"\uE040>\u09C0;" // VOWEL SIGN II
"\uE041>\u09C1;" // VOWEL SIGN U
"\uE042>\u09C2;" // VOWEL SIGN UU
"\uE043>\u09C3;" // VOWEL SIGN VOCALIC R
"\uE044>\u09C4;" // VOWEL SIGN VOCALIC RR
"\uE045>\u09C7;" // REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
"\uE046>\u09C7;" // FALLBACK
"\uE047>\u09C7;" // VOWEL SIGN E
"\uE048>\u09C8;" // VOWEL SIGN AI
"\uE049>\u09C7\u09BE;" // REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
"\uE04A>\u09C7\u09BE;" // FALLBACK
"\uE04B>\u09C7\u09BE;" // VOWEL SIGN O
"\uE04C>\u09C7\u09D7;" // VOWEL SIGN AU
"\uE04D>\u09CD;" // SIGN VIRAMA
"\uE050>\u0993\u0982;" // InterIndic-Bengali: OM
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\uE055>;" // LENGTH MARK
"\uE056>\u09C8;" // REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
"\uE057>\u09D7;" // AU LENGTH MARK
"\uE058>\u0995\u09BC;" // FALLBACK
"\uE059>\u0996\u09BC;" // REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
"\uE05A>\u0997\u09BC;" // REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
"\uE05B>\u099C\u09BC;" // REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
"\uE05C>\u09A1\u09BC;" // FALLBACK
"\uE05D>\u09A2\u09BC;" // LETTER RHA
"\uE05E>\u09AB\u09BC;" // REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
"\uE05F>\u09AF\u09BC;" // LETTER YYA
"\uE060>\u09E0;" // LETTER VOCALIC RR
"\uE061>\u09E1;" // LETTER VOCALIC LL
"\uE062>\u09E2;" // VOWEL SIGN VOCALIC L
"\uE063>\u09E3;" // VOWEL SIGN VOCALIC LL
"\uE064>\u0964;" // DANDA
"\uE065>\u0965;" // DOUBLE DANDA
"\uE066>\u09E6;" // DIGIT ZERO
"\uE067>\u09E7;" // DIGIT ONE
"\uE068>\u09E8;" // DIGIT TWO
"\uE069>\u09E9;" // DIGIT THREE
"\uE06A>\u09EA;" // DIGIT FOUR
"\uE06B>\u09EB;" // DIGIT FIVE
"\uE06C>\u09EC;" // DIGIT SIX
"\uE06D>\u09ED;" // DIGIT SEVEN
"\uE06E>\u09EE;" // DIGIT EIGHT
"\uE06F>\u09EF;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u09F0;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u09F1;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>\u09F2;" // RUPEE MARK
"\ue074>\u09F3;" // RUPEE SIGN
"\ue075>\u09F4;" // CURRENCY NUMERATOR ONE
"\ue076>\u09F5;" // CURRENCY NUMERATOR TWO
"\ue077>\u09F6;" // CURRENCY NUMERATOR THREE
"\ue078>\u09F7;" // CURRENCY NUMERATOR FOUR
"\ue079>\u09F8;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>\u09F9;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>\u09FA;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u09AC;" // FALLBACK FOR ORIYA LETTER WA
"0 > \u09E6;" // FALLBACK FOR TAMIL
"1 > \u09E7;"
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,174 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Devanagari.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Devanagari
t_InterIndic_Deva {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Devanagari
//:: NFD (NFC) ;
//Rules for Decomposed characters
"\ue028\ue03c > \u0929;" //\ue029
"\ue030\ue03c > \u0931;" //\ue031
"\ue033\ue03c > \u0934;" //\ue034
"\ue015\ue03c > \u0958;" //\ue058 LETTER QA (For Urdu)
"\ue016\ue03c > \u0959;" //\ue059 LETTER KHHA (For Urdu)
"\ue017\ue03c > \u095a;" //\ue05a LETTER GHHA (For Urdu)
"\ue01c\ue03c > \u095b;" //\ue05b LETTER ZA (For Urdu)
"\ue021\ue03c > \u095c;" //\ue05c LETTER DDDHA (pronounced RRA)
"\ue022\ue03c > \u095d;" //\ue05d LETTER RHA (pronounced RRHA)
"\ue02b\ue03c > \u095e;" //\ue05e LETTER FA
"\ue02f\ue03c > \u095f;" //\ue05f LETTER YYA
//Decomposed compatibility transliterations
"\ue012\ue057>\u0914;" // FALLBACK FOR TAMIL AU
"0 > \u0966;" // FALLBACK FOR TAMIL
"1 > \u0967;"
"\ue055>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK
"\ue056>;" // FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK
"\ue057>;" // FALLBACK BLOW AWAY TAMIL AU LENGTH MARK
"\ue001 > \u0901;" // SIGN CANDRABINDU
"\ue002 > \u0902;" // SIGN ANUSVARA
"\ue003 > \u0903;" // SIGN VISARGA
"\ue004 > \u0904;" // SIGN SHORT A
"\ue005 > \u0905;" // LETTER A
"\ue006 > \u0906;" // LETTER AA
"\ue007 > \u0907;" // LETTER I
"\ue008 > \u0908;" // LETTER II
"\ue009 > \u0909;" // LETTER U
"\ue00a > \u090a;" // LETTER UU
"\ue00b > \u090b;" // LETTER VOCALIC R
"\ue00c > \u090c;" // LETTER VOCALIC L
"\ue00d > \u090d;" // LETTER CANDRA E (For representing English sounds)
"\ue00e > \u090e;" // LETTER SHORT E(For Southern Scripts)
"\ue00f > \u090f;" // LETTER E
"\ue010 > \u0910;" // LETTER AI
"\ue011 > \u0911;" // LETTER CANDRA O (For representing English sounds)
"\ue012 > \u0912;" // LETTER SHORT O (For Southern Scripts)
"\ue013 > \u0913;" // LETTER O
"\ue014 > \u0914;" // LETTER AU
"\ue015 > \u0915;" // LETTER KA
"\ue016 > \u0916;" // LETTER KHA
"\ue017 > \u0917;" // LETTER GA
"\ue018 > \u0918;" // LETTER GHA
"\ue019 > \u0919;" // LETTER NGA
"\ue01a > \u091a;" // LETTER CA
"\ue01b > \u091b;" // LETTER CHA
"\ue01c > \u091c;" // LETTER JA
"\ue01d > \u091d;" // LETTER JHA
"\ue01e > \u091e;" // LETTER NYA
"\ue01f > \u091f;" // LETTER TTA
"\ue020 > \u0920;" // LETTER TTHA
"\ue021 > \u0921;" // LETTER DDA
"\ue022 > \u0922;" // LETTER DDHA
"\ue023 > \u0923;" // LETTER NNA
"\ue024 > \u0924;" // LETTER TA
"\ue025 > \u0925;" // LETTER THA
"\ue026 > \u0926;" // LETTER DA
"\ue027 > \u0927;" // LETTER DHA
"\ue028 > \u0928;" // LETTER NA
"\ue029 > \u0929;" // LETTER NNNA
"\ue02a > \u092a;" // LETTER PA
"\ue02b > \u092b;" // LETTER PHA
"\ue02c > \u092c;" // LETTER BA
"\ue02d > \u092d;" // LETTER BHA
"\ue02e > \u092e;" // LETTER MA
"\ue02f > \u092f;" // LETTER YA
"\ue030 > \u0930;" // LETTER RA
"\ue031 > \u0931;" // LETTER RRA (Eyelash RA for Southern scripts)
//\ue031 > \u0930;
"\ue032 > \u0932;" // LETTER LA
"\ue033 > \u0933;" // LETTER LLA
"\ue034 > \u0934;" // LETTER LLLA (LLLA for Southern scripts)
//\ue034 > \u0933;
"\ue035 > \u0935;" // LETTER VA
"\ue036 > \u0936;" // LETTER SHA
"\ue037 > \u0937;" // LETTER SSA
"\ue038 > \u0938;" // LETTER SA
"\ue039 > \u0939;" // LETTER HA
"\ue03c > \u093c;" // SIGN NUKTA
"\ue03d > \u093d;" // SIGN AVAGRAHA
"\ue03e > \u093e;" // VOWEL SIGN AA
"\ue03f > \u093f;" // VOWEL SIGN I
"\ue040 > \u0940;" // VOWEL SIGN II
"\ue041 > \u0941;" // VOWEL SIGN U
"\ue042 > \u0942;" // VOWEL SIGN UU
"\ue043 > \u0943;" // VOWEL SIGN VOCALIC R
"\ue044 > \u0944;" // VOWEL SIGN VOCALIC RR
"\ue045 > \u0945;" // VOWEL SIGN CANDRA E
"\ue046 > \u0946;" // VOWEL SIGN SHORT E
"\ue047 > \u0947;" // VOWEL SIGN E
"\ue048 > \u0948;" // VOWEL SIGN AI
"\ue049 > \u0949;" // VOWEL SIGN CANDRA O
"\ue04a > \u094a;" // VOWEL SIGN SHORT O
"\ue04b > \u094b;" // VOWEL SIGN O
"\ue04c > \u094c;" // VOWEL SIGN AU
"\ue04d > \u094d;" // SIGN VIRAMA
"\ue050 > \u0950;" // OM
"\ue051 > \u0951;" // STRESS SIGN UDATTA
"\ue052 > \u0952;" // STRESS SIGN ANUDATTA
"\ue053 > \u0953;" // GRAVE ACCENT
"\ue054 > \u0954;" // ACUTE ACCENT
"\ue058 > \u0958;" // LETTER QA (For Urdu)
"\ue059 > \u0959;" // LETTER KHHA (For Urdu)
"\ue05a > \u095a;" // LETTER GHHA (For Urdu)
"\ue05b > \u095b;" // LETTER ZA (For Urdu)
"\ue05c > \u095c;" // LETTER DDDHA (pronounced RRA)
"\ue05d > \u095d;" // LETTER RHA (pronounced RRHA)
"\ue05e > \u095e;" // LETTER FA
"\ue05f > \u095f;" // LETTER YYA
"\ue060 > \u0960;" // LETTER VOCALIC RR
"\ue061 > \u0961;" // LETTER VOCALIC LL
"\ue062 > \u0962;" // VOWEL SIGN VOCALIC L
"\ue063 > \u0963;" // VOWEL SIGN VOCALIC LL
"\ue064 > \u0964;" // DANDA
"\ue065 > \u0965;" // DOUBLE DANDA
"\ue066 > \u0966;" // DIGIT ZERO
"\ue067 > \u0967;" // DIGIT ONE
"\ue068 > \u0968;" // DIGIT TWO
"\ue069 > \u0969;" // DIGIT THREE
"\ue06a > \u096a;" // DIGIT FOUR
"\ue06b > \u096b;" // DIGIT FIVE
"\ue06c > \u096c;" // DIGIT SIX
"\ue06d > \u096d;" // DIGIT SEVEN
"\ue06e > \u096e;" // DIGIT EIGHT
"\ue06f > \u096f;" // DIGIT NINE
"\ue070>\u0970;" // ABBREVIATION SIGN
"\ue071>\u0930;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0930;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>\u0930\u0942;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u0935;" // FALLBACK FOR ORIYA LETTER WA
// \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
// :: NFC;
// eof
}
}

View file

@ -1,154 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Gujarati.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Gujarati
t_InterIndic_Gujr {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Gujarati
//:: NFD (NFC) ;
"\ue001>\u0a81;" // SIGN CANDRABINDU
"\ue002>\u0a82;" // SIGN ANUSVARA
"\ue003>\u0a83;" // SIGN VISARGA
"\uE004>\u0a85;" // FALLBACK TO LETTER A
"\ue005>\u0a85;" // LETTER A
"\ue006>\u0a86;" // LETTER AA
"\ue007>\u0a87;" // LETTER I
"\ue008>\u0a88;" // LETTER II
"\ue009>\u0a89;" // LETTER U
"\ue00a>\u0a8a;" // LETTER UU
"\ue00b>\u0a8b;" // LETTER VOCALIC R
"\ue00c>\u0a8c;" // LETTER VOCALIC L
"\ue00d>\u0a8d;" // GUJARATI VOWEL CANDRA E
"\ue00e>\u0a8f;" // FALLBACK
"\ue00f>\u0a8f;" // InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
"\ue010>\u0a90;" // LETTER AI
"\ue011>\u0a91;" // FALLBACK
"\ue012>\u0a93;" // FALLBACK
"\ue013>\u0a93;" // UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
"\ue014>\u0a94;" // LETTER AU
"\ue015>\u0a95;" // LETTER KA
"\ue016>\u0a96;" // LETTER KHA
"\ue017>\u0a97;" // LETTER GA
"\ue018>\u0a98;" // LETTER GHA
"\ue019>\u0a99;" // LETTER NGA
"\ue01a>\u0a9a;" // LETTER CA
"\ue01b>\u0a9b;" // LETTER CHA
"\ue01c>\u0a9c;" // LETTER JA
"\ue01d>\u0a9d;" // LETTER JHA
"\ue01e>\u0a9e;" // LETTER NYA
"\ue01f>\u0a9f;" // LETTER TTA
"\ue020>\u0aa0;" // LETTER TTHA
"\ue021>\u0aa1;" // LETTER DDA
"\ue022>\u0aa2;" // LETTER DDHA
"\ue023>\u0aa3;" // LETTER NNA
"\ue024>\u0aa4;" // LETTER TA
"\ue025>\u0aa5;" // LETTER THA
"\ue026>\u0aa6;" // LETTER DA
"\ue027>\u0aa7;" // LETTER DHA
"\ue028>\u0aa8;" // LETTER NA
"\ue029>\u0aa8\u0abc;" // FALLBACK to NA+NUKTA
"\ue02a>\u0aaa;" // LETTER PA
"\ue02b>\u0aab;" // LETTER PHA
"\ue02c>\u0aac;" // LETTER BA
"\ue02d>\u0aad;" // LETTER BHA
"\ue02e>\u0aae;" // LETTER MA
"\ue02f>\u0aaf;" // LETTER YA
"\ue030>\u0ab0;" // LETTER RA
"\ue031>\u0ab0\u0abc;" // FALLBACK
"\ue032>\u0ab2;" // LETTER LA
"\ue033>\u0ab3;" // LETTER LLA
"\ue034>\u0ab3\u0abc;" // LETTER LLLA>LETTER LLA+NUKTA
"\ue035>\u0ab5;" // LETTER VA
"\ue036>\u0ab6;" // LETTER SHA
"\ue037>\u0ab7;" // LETTER SSA
"\ue038>\u0ab8;" // LETTER SA
"\ue039>\u0ab9;" // LETTER HA
"\ue03c>\u0abc;" // SIGN NUKTA
"\ue03d>\u0abd;" // SIGN AVAGRAHA
"\ue03e>\u0abe;" // VOWEL SIGN AA
"\ue03f>\u0abf;" // VOWEL SIGN I
"\ue040>\u0ac0;" // VOWEL SIGN II
"\ue041>\u0ac1;" // VOWEL SIGN U
"\ue042>\u0ac2;" // VOWEL SIGN UU
"\ue043>\u0ac3;" // VOWEL SIGN VOCALIC R
"\ue044>\u0ac4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0ac5;" // VOWEL SIGN CANDRA E
"\ue046>\u0ac7;" // FALLBACK
"\ue047>\u0ac7;" // InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
"\ue048>\u0ac8;" // VOWEL SIGN AI
"\ue049>\u0ac9;" // VOWEL SIGN CANDRA O
"\ue04a>\u0acb;" // FALLBACK
"\ue04b>\u0acb;" // UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
"\ue04c>\u0acc;" // VOWEL SIGN AU
"\ue04d>\u0acd;" // SIGN VIRAMA
"\ue050>\u0ad0;" // OM
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\ue055>;" // UNMAPPED InterIndic-Gujarati: LENGTH MARK
"\ue056>\u0ac8;" // REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0acc;" // REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
"\ue058>\u0a95\u0abc;" // FALLBACK
"\ue059>\u0a96\u0abc;" // REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
"\ue05a>\u0a97\u0abc;" // REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
"\ue05b>\u0a9c\u0abc;" // REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
"\ue05c>\u0aa1\u0abc;" // FALLBACK
"\ue05d>\u0aa2\u0abc;" // REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
"\ue05e>\u0aab\u0abc;" // REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
"\ue05f>\u0aaf\u0abc;" // REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
"\ue060>\u0ae0;" // LETTER VOCALIC RR
"\ue061>\u0ae1;" // LETTER VOCALIC LL
"\ue062>\u0abf\u0abc;" // REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
"\ue063>\u0ac0\u0abc;" // REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
"\uE064>\u0964;" // DANDA
"\uE065>\u0965;" // DOUBLE DANDA
"\ue066>\u0ae6;" // DIGIT ZERO
"\ue067>\u0ae7;" // DIGIT ONE
"\ue068>\u0ae8;" // DIGIT TWO
"\ue069>\u0ae9;" // DIGIT THREE
"\ue06a>\u0aea;" // DIGIT FOUR
"\ue06b>\u0aeb;" // DIGIT FIVE
"\ue06c>\u0aec;" // DIGIT SIX
"\ue06d>\u0aed;" // DIGIT SEVEN
"\ue06e>\u0aee;" // DIGIT EIGHT
"\ue06f>\u0aef;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u0ab0;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0ab0;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u0ab5;" // FALLBACK FOR ORIYA LETTER WA
"0 > \u0ae6;" // FALLBACK FOR TAMIL
"1 > \u0ae7;"
//\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,163 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Gurmukhi.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Gurmukhi
t_InterIndic_Guru {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Gurmukhi
//:: NFD (NFC) ;
"$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];"
"$consonant = [\u0A15-\u0A39];"
"\ue001>\u0A01;" // SIGN CHANDRABINDU
//rules for BINDI
// Anusvara is equivalent to BINDI when preceeded by a vowel
"$vowel{\ue002>\u0a02;" // SIGN ANUSVARA (\u0a02 = SIGN BINDI)
// else is equivalent to TIPPI
"$consonant{\ue002>\u0a70;" // SIGN TIPPI
"\ue002>\u0a02;"
"\ue003>;" // FALLBACK BLOW AWAY SIGN VISARGA
"\uE004>\u0a05;" // FALLBACK TO LETTER A
"\ue005>\u0a05;" // LETTER A
"\ue006>\u0a06;" // LETTER AA
"\ue007>\u0a07;" // LETTER I
"\ue008>\u0a08;" // LETTER II
"\ue009>\u0a09;" // LETTER U
"\ue00a>\u0a0a;" // LETTER UU
"\ue00b>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
"\ue00c>\u0a33;" // FALLBACK
"\ue00d>\u0a0f;" // FALLBACK
"\ue00e>\u0a0f;" // FALLBACK
"\ue00f>\u0a0f;" // LETTER EE
"\ue010>\u0a10;" // LETTER AI
"\ue011>\u0a13;" // FALLBACK
"\ue012>\u0a13;" // FALLBACK
"\ue013>\u0a13;" // LETTER OO
"\ue014>\u0a14;" // LETTER AU
"\ue015>\u0a15;" // LETTER KA
"\ue016>\u0a16;" // LETTER KHA
"\ue017>\u0a17;" // LETTER GA
"\ue018>\u0a18;" // LETTER GHA
"\ue019>\u0a19;" // LETTER NGA
"\ue01a>\u0a1a;" // LETTER CA
"\ue01b>\u0a1b;" // LETTER CHA
"\ue01c>\u0a1c;" // LETTER JA
"\ue01d>\u0a1d;" // LETTER JHA
"\ue01e>\u0a1e;" // LETTER NYA
"\ue01f>\u0a1f;" // LETTER TTA
"\ue020>\u0a20;" // LETTER TTHA
"\ue021>\u0a21;" // LETTER DDA
"\ue022>\u0a22;" // LETTER DDHA
"\ue023>\u0a23;" // LETTER NNA
"\ue024>\u0a24;" // LETTER TA
"\ue025>\u0a25;" // LETTER THA
"\ue026>\u0a26;" // LETTER DA
"\ue027>\u0a27;" // LETTER DHA
"\ue028>\u0a28;" // LETTER NA
"\ue029>\u0a28\u0a3c;" // REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
"\ue02a>\u0a2a;" // LETTER PA
"\ue02b>\u0a2b;" // LETTER PHA
"\ue02c>\u0a2c;" // LETTER BA
"\ue02d>\u0a2d;" // LETTER BHA
"\ue02e>\u0a2e;" // LETTER MA
"\ue02f>\u0a2f;" // LETTER YA
"\ue030>\u0a30;" // LETTER RA
"\ue031>\u0a30\u0a3c;" // FALLBACK LETTER RA+NUKTA
"\ue032>\u0a32;" // LETTER LA
"\ue033>\u0a33;" // LETTER LLA
"\ue034>\u0a33;" // REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
"\ue035>\u0a35;" // LETTER VA
"\ue036>\u0a36;" // LETTER SHA
"\ue037>\u0a36;" // REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
"\ue038>\u0a38;" // LETTER SA
"\ue039>\u0a39;" // LETTER HA
"\ue03c>\u0a3c;" // SIGN NUKTA
"\ue03d>;" // FALLBACK BLOW AWAY SIGN AVAGRAHA
"\ue03e>\u0a3e;" // VOWEL SIGN AA
"\ue03f>\u0a3f;" // VOWEL SIGN I
"\ue040>\u0a40;" // VOWEL SIGN II
"\ue041>\u0a41;" // VOWEL SIGN U
"\ue042>\u0a42;" // VOWEL SIGN UU
"\ue043>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R
"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
"\ue045>\u0a48;" // REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
"\ue046>\u0a47;" // FALLABCK
"\ue047>\u0a47;" // VOWEL SIGN EE
"\ue048>\u0a48;" // VOWEL SIGN AI
"\ue049>\u0a4c;" // REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
"\ue04a>\u0a4b;" // FALLBACK
"\ue04b>\u0a4b;" // VOWEL SIGN OO
"\ue04c>\u0a4c;" // VOWEL SIGN AU
"\ue04d>\u0a4d;" // SIGN VIRAMA
"\ue050>\u0a0f\u0a02;" // FALLBACK to OO+BINDI : OM
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK
"\ue056>\u0a48;" // REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0a4c;" // REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
"\ue058>\u0a15\u0a3c;" // FALLBACK RA+ NUKTA
"\ue059>\u0a59;" // LETTER KHHA
"\ue05a>\u0a5a;" // LETTER GHHA
"\ue05b>\u0a5b;" // LETTER ZA
"\ue05c>\u0a5c;" // LETTER RRA
"\ue05d>\u0a22\u0a3c;" // REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
"\ue05e>\u0a5e;" // LETTER FA
"\ue05f>\u0a2f\u0a3c;" // REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
"\ue060>\u0a30\u0a3f;" // REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
"\ue061>\u0a32\u0a3c;" //
"\ue062>\u0a3f\u0a3c;" // REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
"\ue063>\u0a40\u0a3c;" // REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
"\uE064>\u0964;" // DANDA
"\uE065>\u0965;" // DOUBLE DANDA
"\ue066>\u0a66;" // DIGIT ZERO
"\ue067>\u0a67;" // DIGIT ONE
"\ue068>\u0a68;" // DIGIT TWO
"\ue069>\u0a69;" // DIGIT THREE
"\ue06a>\u0a6a;" // DIGIT FOUR
"\ue06b>\u0a6b;" // DIGIT FIVE
"\ue06c>\u0a6c;" // DIGIT SIX
"\ue06d>\u0a6d;" // DIGIT SEVEN
"\ue06e>\u0a6e;" // DIGIT EIGHT
"\ue06f>\u0a6f;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u0a30;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0a30;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>\u0a70;" // TIPPI
"\uE07D>\u0a71;" // ADDAK
"\uE07E>\u0a72;" // IRI
"\uE07F>\u0a73;" // URA
"\uE080>\u0a74;" // EK ONKAR
"\uE081>\u0a35;" // FALLBACK FOR ORIYA LETTER WA
"0 > \u0a66;" // FALLBACK FOR TAMIL
"1 > \u0a67;"
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,157 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Kannada.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Kannada
t_InterIndic_Knda {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Kannada
//:: NFD (NFC) ;
"\ue033\ue03c>\u0cde;" // LETTER FA
"\ue001>\u0c82;" // REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
"\ue002>\u0c82;" // SIGN ANUSVARA
"\ue003>\u0c83;" // SIGN VISARGA
"\uE004>\u0c85;" // FALLBACK TO LETTER A
"\ue005>\u0c85;" // LETTER A
"\ue006>\u0c86;" // LETTER AA
"\ue007>\u0c87;" // LETTER I
"\ue008>\u0c88;" // LETTER II
"\ue009>\u0c89;" // LETTER U
"\ue00a>\u0c8a;" // LETTER UU
"\ue00b>\u0c8b;" // LETTER VOCALIC R
"\ue00c>\u0c8c;" // LETTER VOCALIC L
"\ue00d>\u0c8e;" // LETTER E
"\ue00e>\u0c8e;" // FALLBACK
"\ue00f>\u0c8f;" // LETTER EE
"\ue010>\u0c90;" // LETTER AI
"\ue011>\u0c92;" // FALLBACK
"\ue012>\u0c92;" // LETTER O
"\ue013>\u0c93;" // LETTER OO
"\ue014>\u0c94;" // LETTER AU
"\ue015>\u0c95;" // LETTER KA
"\ue016>\u0c96;" // LETTER KHA
"\ue017>\u0c97;" // LETTER GA
"\ue018>\u0c98;" // LETTER GHA
"\ue019>\u0c99;" // LETTER NGA
"\ue01a>\u0c9a;" // LETTER CA
"\ue01b>\u0c9b;" // LETTER CHA
"\ue01c>\u0c9c;" // LETTER JA
"\ue01d>\u0c9d;" // LETTER JHA
"\ue01e>\u0c9e;" // LETTER NYA
"\ue01f>\u0c9f;" // LETTER TTA
"\ue020>\u0ca0;" // LETTER TTHA
"\ue021>\u0ca1;" // LETTER DDA
"\ue022>\u0ca2;" // LETTER DDHA
"\ue023>\u0ca3;" // LETTER NNA
"\ue024>\u0ca4;" // LETTER TA
"\ue025>\u0ca5;" // LETTER THA
"\ue026>\u0ca6;" // LETTER DA
"\ue027>\u0ca7;" // LETTER DHA
"\ue028>\u0ca8;" // LETTER NA
"\ue029>\u0ca8;" // REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
"\ue02a>\u0caa;" // LETTER PA
"\ue02b>\u0cab;" // LETTER PHA
"\ue02c>\u0cac;" // LETTER BA
"\ue02d>\u0cad;" // LETTER BHA
"\ue02e>\u0cae;" // LETTER MA
"\ue02f>\u0caf;" // LETTER YA
"\ue030\ue03c>\u0cb1;"
"\ue030>\u0cb0;" // LETTER RA
"\ue031>\u0cb1;" // LETTER RRA
"\ue032>\u0cb2;" // LETTER LA
"\ue033>\u0cb3;" // LETTER LLA
"\ue034>\u0cde;" // REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
"\ue035>\u0cb5;" // LETTER VA
"\ue036>\u0cb6;" // LETTER SHA
"\ue037>\u0cb7;" // LETTER SSA
"\ue038>\u0cb8;" // LETTER SA
"\ue039>\u0cb9;" // LETTER HA
"\ue03c>\u0cbc;" // NUKTA
"\ue03d>\u0cbd;" // AVAGRAHA
"\ue03e>\u0cbe;" // VOWEL SIGN AA
"\ue03f>\u0cbf;" // VOWEL SIGN I
"\ue040>\u0cc0;" // VOWEL SIGN II
"\ue041>\u0cc1;" // VOWEL SIGN U
"\ue042>\u0cc2;" // VOWEL SIGN UU
"\ue043>\u0cc3;" // VOWEL SIGN VOCALIC R
"\ue044>\u0cc4;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0cc6;" // REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
"\ue046>\u0cc6;" // VOWEL SIGN E
"\ue047>\u0cc7;" // VOWEL SIGN EE
"\ue048>\u0cc8;" // VOWEL SIGN AI
"\ue049>\u0cca;" // REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
"\ue04a>\u0cca;" // VOWEL SIGN O
"\ue04b>\u0ccb;" // VOWEL SIGN OO
"\ue04c>\u0ccc;" // VOWEL SIGN AU
"\ue04d>\u0ccd;" // SIGN VIRAMA
"\ue050>\u0c93\u0c82;" // REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\ue055>\u0cd5;" // LENGTH MARK
"\ue056>\u0cd6;" // AI LENGTH MARK
"\ue057>\u0ccc;" // REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
"\ue058>\u0c95;" // FALLBACK
"\ue059>\u0c96;" // REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
"\ue05a>\u0c97;" // REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
"\ue05b>\u0c9c;" // REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
"\ue05c>\u0ca2;" // FALLBACK
"\ue05d>\u0ca2;" // REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
"\ue05e>\u0cde;" // LETTER FA
"\ue05f>\u0caf;" // REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
"\ue060>\u0ce0;" // LETTER VOCALIC RR
"\ue061>\u0ce1;" // LETTER VOCALIC LL
"\ue062>\u0cbf;" // REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
"\ue063>\u0cc0;" // REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
"\ue064>'.' ;" // FALLBACK FOR DANDA
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
"\ue066>\u0ce6;" // DIGIT ZERO
"\ue067>\u0ce7;" // DIGIT ONE
"\ue068>\u0ce8;" // DIGIT TWO
"\ue069>\u0ce9;" // DIGIT THREE
"\ue06a>\u0cea;" // DIGIT FOUR
"\ue06b>\u0ceb;" // DIGIT FIVE
"\ue06c>\u0cec;" // DIGIT SIX
"\ue06d>\u0ced;" // DIGIT SEVEN
"\ue06e>\u0cee;" // DIGIT EIGHT
"\ue06f>\u0cef;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u0cb0;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0cb0;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u0cb5;" // FALLBACK FOR ORIYA LETTER WA
"0 > \u0ce6;" // FALLBACK FOR TAMIL
"1 > \u0ce7;"
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,545 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Latin.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Latin
t_InterIndic_Latn {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Latin
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$vva=\ue081;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
"$om=\ue050;" // OM
"\ue051>;" // UNMAPPED STRESS SIGN UDATTA
"\ue052>;" // UNMAPPED STRESS SIGN ANUDATTA
"\ue053>;" // UNMAPPED GRAVE ACCENT
"\ue054>;" // UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
// $x was originally called '&'; $z was '%'
"$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
"$vowels=[aeiour\u0304\u0325\u0306];"
"$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];"
//#####################################################################
// convert from Native letters to Latin letters
//#####################################################################
//transliterations for anusvara
"$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;"
"$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;"
"$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;"
"$anusvara} [$ta$tha$da$dha$na] > n ;"
"$anusvara} [$pa$pha$ba$bha$ma] > m ;"
"$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;"
"$anusvara> m\u0307;"
// Urdu compatibility
"$ya$nukta}$x > y\u0307 ;"
"$ya$nukta$virama > y\u0307 ;"
"$ya$nukta > y\u0307a ;"
"$la$nukta }$x > l\u0331 ;"
"$la$nukta$virama > l\u0331 ;"
"$la$nukta > l\u0331a ;"
"$na$nukta }$x > n\u0331 ;"
"$na$nukta$virama > n\u0331 ;"
"$na$nukta > n\u0331a ;"
"$ena }$x > n\u0331 ;"
"$ena$virama > n\u0331 ;"
"$ena > n\u0331a ;"
"$uka > qa ;"
"$ka$nukta }$x > q ;"
"$ka$nukta$virama > q ;"
"$ka$nukta > qa ;"
"$kha$nukta }$x > k\u0331h\u0331 ;"
"$kha$nukta$virama > k\u0331h\u0331 ;"
"$kha$nukta > k\u0331h\u0331a ;"
"$ukha$virama > k\u0331h\u0331;"
"$ukha > k\u0331h\u0331a;"
"$ugha > g\u0307a ;"
"$ga$nukta }$x > g\u0307 ;"
"$ga$nukta$virama > g\u0307 ;"
"$ga$nukta > g\u0307a ;"
"$ujha > za ;"
"$ja$nukta }$x > z ;"
"$ja$nukta$virama > z ;"
"$ja$nukta > za ;"
"$ddha$nukta}$x > r\u0323h ;"
"$ddha$nukta$virama > r\u0323h ;"
"$ddha$nukta > r\u0323ha;"
"$uddha}$x > r\u0323 ;"
"$uddha$virama > r\u0323 ;"
"$uddha > r\u0323a;"
"$udha > r\u0323a ;"
"$dda$nukta}$x > r\u0323 ;"
"$dda$nukta$virama > r\u0323 ;"
"$dda$nukta > r\u0323a ;"
"$pha$nukta }$x > f ;"
"$pha$nukta$virama > f ;"
"$pha$nukta > fa ;"
"$ufa }$x > f ;"
"$ufa$virama > f ;"
"$ufa > fa ;"
"$ra$nukta}$x > r\u0331;"
"$ra$nukta$virama > r\u0331;"
"$ra$nukta > r\u0331a;"
"$lla$nukta}$x > l\u0331;"
"$lla$nukta$virama > l\u0331;"
"$lla$nukta > l\u0331a;"
"$ela}$x > l\u0331;"
"$ela$virama > l\u0331;"
"$ela > l\u0331a;"
"$uya}$x > y\u0307;"
"$uya$virama > y\u0307;"
"$uya > y\u0307a;"
// normal consonants
"$ka$virama}$ha>k'';"
"$ka}$x>k;"
"$ka$virama>k;"
"$ka>ka;"
"$kha}$x>kh;"
"$kha$virama>kh;"
"$kha>kha;"
"$ga$virama}$ha>g'';"
"$ga}$x>g;"
"$ga$virama>g;"
"$ga>ga;"
"$gha}$x>gh;"
"$gha$virama>gh;"
"$gha>gha;"
"$nga}$x>n\u0307;"
"$nga$virama>n\u0307;"
"$nga>n\u0307a ;"
"$ca$virama}$ha>c'';"
"$ca}$x>c;"
"$ca$virama>c;"
"$ca>ca;"
"$cha}$x>ch;"
"$cha$virama>ch;"
"$cha>cha;"
"$ja$virama}$ha>j'';"
"$ja}$x>j;"
"$ja$virama>j;"
"$ja>ja;"
"$jha}$x>jh;"
"$jha$virama>jh;"
"$jha>jha;"
"$nya }$x>n\u0303 ;"
"$nya$virama>n\u0303;"
"$nya > n\u0303a ;"
"$tta$virama}$ha>t\u0323'';"
"$tta}$x>t\u0323;"
"$tta$virama>t\u0323;"
"$tta>t\u0323a;"
"$ttha}$x>t\u0323h;"
"$ttha$virama>t\u0323h;"
"$ttha>t\u0323ha;"
"$dda}$x$ha>d\u0323'';"
"$dda}$x>d\u0323;"
"$dda$virama>d\u0323;"
"$dda>d\u0323a;"
"$ddha}$x>d\u0323h;"
"$ddha$virama>d\u0323h;"
"$ddha>d\u0323ha;"
"$nna}$x>n\u0323 ;"
"$nna$virama>n\u0323;"
"$nna>n\u0323a ;"
"$ta$virama}$ha>t'';"
"$ta$virama}$ttha>t'';"
"$ta$virama}$tta>t'';"
"$ta$virama}$tha>t'';"
"$ta}$x>t;"
"$ta$virama>t;"
"$ta>ta;"
"$tha}$x>th;"
"$tha$virama>th;"
"$tha>tha;"
"$da$virama}$ha>d'';"
"$da$virama}$ddha>d'';"
"$da$virama}$dda>d'';"
"$da$virama}$dha>d'';"
"$da}$x>d;"
"$da$virama>d;"
"$da>da;"
"$dha}$x>dh;"
"$dha$virama>dh;"
"$dha>dha;"
"$na$virama}$ga>n'';"
"$na$virama}$ya>n'';"
"$na}$x>n;"
"$na$virama>n;"
"$na>na;"
"$pa$virama}$ha>p'';"
"$pa}$x>p;"
"$pa$virama>p;"
"$pa>pa;"
"$pha}$x>ph;"
"$pha$virama>ph;"
"$pha>pha;"
"$ba$virama}$ha>b'';"
"$ba}$x>b;"
"$ba$virama>b;"
"$ba>ba;"
"$bha}$x>bh;"
"$bha$virama>bh;"
"$bha>bha;"
"$ma$virama}$ma>m'';"
"$ma}$x>m;"
"$ma$virama>m;"
"$ma>ma;"
"$ya}$x>y;"
"$ya$virama>y;"
"$ya>ya;"
"$ra$virama}$ha>r'';"
"$ra}$x>r;"
"$ra$virama>r;"
"$ra>ra;"
"$vva$virama}$ha>w\u0307'';"
"$vva}$x>w\u0307;"
"$vva$virama>w\u0307;"
"$vva>w\u0307a;"
"$rra$virama}$ha>r\u0331'';"
"$rra}$x>r\u0331;"
"$rra$virama>r\u0331;"
"$rra>r\u0331a;"
"$la$virama}$ha>l'';"
"$la}$x>l;"
"$la$virama>l;"
"$la>la;"
"$lla$virama}$ha>l\u0323'';"
"$lla}$x>l\u0323;"
"$lla$virama>l\u0323;"
"$lla>l\u0323a;"
"$va}$x>v;"
"$va$virama>v;"
"$va>va;"
"$sa$virama}$ha>s'';"
"$sa$virama}$sha>s'';"
"$sa$virama}$ssa>s'';"
"$sa$virama}$sa>s'';"
"$sa}$x>s;"
"$sa$virama>s;"
//for gurmukhi
"$sa$nukta}$x>s\u0301;"
"$sa$nukta$virama>s\u0301;"
"$sa$nukta>s\u0301a;"
"$sa>sa;"
"$sha}$x>s\u0301;"
"$sha$virama>s\u0301;"
"$sha>s\u0301a;"
"$ssa}$x>s\u0323;"
"$ssa$virama>s\u0323;"
"$ssa>s\u0323a;"
"$ha}$x>h;"
"$ha$virama>h;"
"$ha>ha;"
// dependent vowels (should never occur except following consonants)
"$forceIndependentMatra{$aa > \u0314a\u0304 ;"
"$forceIndependentMatra{$ai > \u0314ai ;"
"$forceIndependentMatra{$au > \u0314au ;"
"$forceIndependentMatra{$ii > \u0314i\u0304 ;"
"$forceIndependentMatra{$i > \u0314i ;"
"$forceIndependentMatra{$uu > \u0314u\u0304 ;"
"$forceIndependentMatra{$u > \u0314u ;"
"$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;"
"$forceIndependentMatra{$rh > \u0314r\u0325 ;"
"$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;"
"$forceIndependentMatra{$lh > \u0314l\u0325 ;"
"$forceIndependentMatra{$e > \u0314e\u0304 ;"
"$forceIndependentMatra{$o > \u0314o\u0304 ;"
//extra vowels
"$forceIndependentMatra{$ce > \u0314e\u0306 ;"
"$forceIndependentMatra{$co > \u0314o\u0306 ;"
"$forceIndependentMatra{$se > \u0314e ;"
"$forceIndependentMatra{$so > \u0314o ;"
"$forceIndependentMatra{$nukta >;" // Nukta cannot appear independently or as first character
"$forceIndependentMatra{$virama >;" // Virama cannot appear independently or as first character
"$aa > a\u0304 ;"
"$ai > ai ;"
"$au > au ;"
"$ii > i\u0304 ;"
"$i > i ;"
"$uu > u\u0304 ;"
"$u > u ;"
"$rrh > r\u0325\u0304 ;"
"$rh > r\u0325 ;"
"$llh > l\u0325\u0304 ;"
"$lh > l\u0325 ;"
"$e > e\u0304 ;"
"$o > o\u0304 ;"
//extra vowels
"$ce > e\u0306 ;"
"$co > o\u0306 ;"
"$se > e ;"
"$so > o ;"
//dependent vowels when following independent vowels. Generally Illegal only for roundtripping
"$waa} $x > a\u0304\u0314 ;"
"$wai} $x > ai\u0314 ;"
"$wau} $x > au\u0314 ;"
"$wii} $x > i\u0304\u0314 ;"
"$wi } $x > i\u0314 ;"
"$wuu} $x > u\u0304\u0314 ;"
"$wu } $x > u\u0314 ;"
"$wrr} $x > r\u0325\u0304\u0314 ;"
"$wr } $x > r\u0325\u0314 ;"
"$wll} $x > l\u0325\u0304\u0314 ;"
"$wl } $x > l\u0325\u0314 ;"
"$we } $x > e\u0304\u0314 ;"
"$wo } $x > o\u0304\u0314 ;"
"$wa } $x > a\u0314 ;"
//extra vowels
"$wce} $x > e\u0306\u0314 ;"
"$wco} $x > o\u0306\u0314 ;"
"$wse} $x > e\u0314 ;"
"$wso} $x > o\u0314 ;"
"$om} $x > ''om\u0314 ;"
// independent vowels when preceeded by vowels
"$vowels{$waa > ''a\u0304 ;"
"$vowels{$wai > ''ai ;"
"$vowels{$wau > ''au ;"
"$vowels{$wii > ''i\u0304 ;"
"$vowels{$wi > ''i ;"
"$vowels{$wuu > ''u\u0304 ;"
"$vowels{$wu > ''u ;"
"$vowels{$wrr > ''r\u0325\u0304 ;"
"$vowels{$wr > ''r\u0325 ;"
"$vowels{$wll > ''l\u0325\u0304 ;"
"$vowels{$wl > ''l\u0325 ;"
"$vowels{$we > ''e\u0304 ;"
"$vowels{$wo > ''o\u0304 ;"
"$vowels{$wa > ''a ;"
//extra vowels
"$vowels{$wce > ''e\u0306 ;"
"$vowels{$wco > ''o\u0306 ;"
"$vowels{$wse > ''e ;"
"$vowels{$wso > ''o ;"
// independent vowels (otherwise)
"$waa > a\u0304 ;"
"$wai > ai ;"
"$wau > au ;"
"$wii > i\u0304 ;"
"$wi > i ;"
"$wuu > u\u0304 ;"
"$wu > u ;"
"$wrr > r\u0325\u0304 ;"
"$wr > r\u0325 ;"
"$wll > l\u0325\u0304 ;"
"$wl > l\u0325 ;"
"$we > e\u0304 ;"
"$wo > o\u0304 ;"
"$wa > a ;"
//extra vowels
"$wce > e\u0306 ;"
"$wco > o\u0306 ;"
"$wse > e ;"
"$wso > o ;"
"$om > ''om ;"
//stress marks
"$avagraha > \u0315;"
"$chandrabindu$anusvara>\u0303;"
"$chandrabindu > m\u0310;"
"$visarga>h\u0323;"
//numbers
"$zero > 0;"
"$one > 1;"
"$two > 2;"
"$three > 3;"
"$four > 4;"
"$five > 5;"
"$six > 6;"
"$seven > 7;"
"$eight > 8;"
"$nine > 9;"
"$lm >;"
"$ailm >;"
"$aulm >;"
"$danda>'.';"
"$doubleDanda>'.';"
"\ue070>;" // ABBREVIATION SIGN
// LETTER RA WITH MIDDLE DIAGONAL
"\ue071}$x>ra;"
"\ue071$virama>r;"
"\ue071>ra;"
// LETTER RA WITH LOWER DIAGONAL
"\ue072}$x>ra;"
"\ue072$virama>r;"
"\ue072>ra;"
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE004>;" // DEVANAGARI VOWEL SIGN SHORT A
}
}

View file

@ -1,157 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Malayalam.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Malayalam
t_InterIndic_Mlym {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Malayalam
//:: NFD (NFC) ;
"\ue001>\u0d02;" // REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
"\ue002>\u0d02;" // SIGN ANUSVARA
"\ue003>\u0d03;" // SIGN VISARGA
"\uE004>\u0d05;" // FALLBACK TO LETTER A
"\ue005>\u0d05;" // LETTER A
"\ue006>\u0d06;" // LETTER AA
"\ue007>\u0d07;" // LETTER I
"\ue008>\u0d08;" // LETTER II
"\ue009>\u0d09;" // LETTER U
"\ue00a>\u0d0a;" // LETTER UU
"\ue00b>\u0d0b;" // LETTER VOCALIC R
"\ue00c>\u0d0c;" // LETTER VOCALIC L
"\ue00d>\u0d0e;" // FALLLBACK LETTER E
"\ue00e>\u0d0e;" // LETTER E
"\ue00f>\u0d0f;" // LETTER EE
"\ue010>\u0d10;" // LETTER AI
"\ue011>\u0d12;" // FALLBACK TO O
"\ue012>\u0d12;" // LETTER O
"\ue013>\u0d13;" // LETTER OO
"\ue014>\u0d14;" // LETTER AU
"\ue015>\u0d15;" // LETTER KA
"\ue016>\u0d16;" // LETTER KHA
"\ue017>\u0d17;" // LETTER GA
"\ue018>\u0d18;" // LETTER GHA
"\ue019>\u0d19;" // LETTER NGA
"\ue01a>\u0d1a;" // LETTER CA
"\ue01b>\u0d1b;" // LETTER CHA
"\ue01c>\u0d1c;" // LETTER JA
"\ue01d>\u0d1d;" // LETTER JHA
"\ue01e>\u0d1e;" // LETTER NYA
"\ue01f>\u0d1f;" // LETTER TTA
"\ue020>\u0d20;" // LETTER TTHA
"\ue021>\u0d21;" // LETTER DDA
"\ue022>\u0d22;" // LETTER DDHA
"\ue023>\u0d23;" // LETTER NNA
"\ue024>\u0d24;" // LETTER TA
"\ue025>\u0d25;" // LETTER THA
"\ue026>\u0d26;" // LETTER DA
"\ue027>\u0d27;" // LETTER DHA
"\ue028>\u0d28;" // LETTER NA
"\ue029>\u0d28;" // REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
"\ue02a>\u0d2a;" // LETTER PA
"\ue02b>\u0d2b;" // LETTER PHA
"\ue02c>\u0d2c;" // LETTER BA
"\ue02d>\u0d2d;" // LETTER BHA
"\ue02e>\u0d2e;" // LETTER MA
"\ue02f>\u0d2f;" // LETTER YA
"\ue030\ue03c>\u0d31;"
"\ue030>\u0d30;" // LETTER RA
"\ue031>\u0d31;" // LETTER RRA
"\ue032>\u0d32;" // LETTER LA
"\ue033\ue03c>\u0d34;"
"\ue033>\u0d33;" // LETTER LLA
"\ue034>\u0d34;" // LETTER LLLA
"\ue035>\u0d35;" // LETTER VA
"\ue036>\u0d36;" // LETTER SHA
"\ue037>\u0d37;" // LETTER SSA
"\ue038>\u0d38;" // LETTER SA
"\ue039>\u0d39;" // LETTER HA
"\ue03c>;" // FALLBACK BLOW AWAY NUKTA
"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA
"\ue03e>\u0d3e;" // VOWEL SIGN AA
"\ue03f>\u0d3f;" // VOWEL SIGN I
"\ue040>\u0d40;" // VOWEL SIGN II
"\ue041>\u0d41;" // VOWEL SIGN U
"\ue042>\u0d42;" // VOWEL SIGN UU
"\ue043>\u0d43;" // VOWEL SIGN VOCALIC R
"\ue044>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
"\ue045>\u0d3e;" // REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
"\ue046>\u0d46;" // VOWEL SIGN E
"\ue047>\u0d47;" // VOWEL SIGN EE
"\ue048>\u0d48;" // VOWEL SIGN AI
"\ue049>\u0d4b;" // REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
"\ue04a>\u0d4a;" // VOWEL SIGN O
"\ue04b>\u0d4b;" // VOWEL SIGN OO
"\ue04c>\u0d4c;" // VOWEL SIGN AU
"\ue04d>\u0d4d;" // SIGN VIRAMA
"\ue050>\u0d13\u0d02;" // UNMAPPED InterIndic-Malayalam: OM
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\ue055>;" // FALLBACK BLOW AWAY LENGTH MARK
"\ue056>\u0d48;" // REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0d57;" // AU LENGTH MARK
"\ue058>\u0d15;" // FALLBACK
"\ue059>\u0d16;" // REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
"\ue05a>\u0d17;" // REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
"\ue05b>\u0d1c;" // REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
"\ue05d>\u0d22;" // REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
"\ue05c>\u0d21;" // FALLBACK
"\ue05e>\u0d2b;" // REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
"\ue05f>\u0d2f;" // REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
"\ue060>\u0d60;" // LETTER VOCALIC RR
"\ue061>\u0d61;" // LETTER VOCALIC LL
"\ue062>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L
"\ue063>;" // FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL
"\ue064>'.' ;" // FALLBACK FOR DANDA
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
"\ue066>\u0d66;" // DIGIT ZERO
"\ue067>\u0d67;" // DIGIT ONE
"\ue068>\u0d68;" // DIGIT TWO
"\ue069>\u0d69;" // DIGIT THREE
"\ue06a>\u0d6a;" // DIGIT FOUR
"\ue06b>\u0d6b;" // DIGIT FIVE
"\ue06c>\u0d6c;" // DIGIT SIX
"\ue06d>\u0d6d;" // DIGIT SEVEN
"\ue06e>\u0d6e;" // DIGIT EIGHT
"\ue06f>\u0d6f;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u0d30;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0d30;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u0d35;" // FALLBACK FOR ORIYA LETTER WA
"0 > \u0d66;" // FALLBACK FOR TAMIL
"1 > \u0d67;"
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,153 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Oriya.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Oriya
t_InterIndic_Orya {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Oriya
//:: NFD (NFC) ;
"\ue001>\u0b01;" // SIGN CANDRABINDU
"\ue002>\u0b02;" // SIGN ANUSVARA
"\ue003>\u0b03;" // SIGN VISARGA
"\uE004>\u0b05;" // FALLBACK TO LETTER A
"\ue005>\u0b05;" // LETTER A
"\ue006>\u0b06;" // LETTER AA
"\ue007>\u0b07;" // LETTER I
"\ue008>\u0b08;" // LETTER II
"\ue009>\u0b09;" // LETTER U
"\ue00a>\u0b0a;" // LETTER UU
"\ue00b>\u0b0b;" // LETTER VOCALIC R
"\ue00c>\u0b0c;" // LETTER VOCALIC L
"\ue00d>\u0b0f;" // FALLBACK
"\ue00e>\u0b0f;" // FALLBACK
"\ue00f>\u0b0f;" // LETTER E
"\ue010>\u0b10;" // LETTER AI
"\ue011>\u0b13;" // FALLBACK
"\ue012>\u0b13;" // FALLBACK
"\ue013>\u0b13;" // FALLBACK LETTER OO (\u0b13 = LETTER O)
"\ue014>\u0b14;" // LETTER AU
"\ue015>\u0b15;" // LETTER KA
"\ue016>\u0b16;" // LETTER KHA
"\ue017>\u0b17;" // LETTER GA
"\ue018>\u0b18;" // LETTER GHA
"\ue019>\u0b19;" // LETTER NGA
"\ue01a>\u0b1a;" // LETTER CA
"\ue01b>\u0b1b;" // LETTER CHA
"\ue01c>\u0b1c;" // LETTER JA
"\ue01d>\u0b1d;" // LETTER JHA
"\ue01e>\u0b1e;" // LETTER NYA
"\ue01f>\u0b1f;" // LETTER TTA
"\ue020>\u0b20;" // LETTER TTHA
"\ue021>\u0b21;" // LETTER DDA
"\ue022>\u0b22;" // LETTER DDHA
"\ue023>\u0b23;" // LETTER NNA
"\ue024>\u0b24;" // LETTER TA
"\ue025>\u0b25;" // LETTER THA
"\ue026>\u0b26;" // LETTER DA
"\ue027>\u0b27;" // LETTER DHA
"\ue028>\u0b28;" // LETTER NA
"\ue029>\u0b28\u0b3c;" // FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA
"\ue02a>\u0b2a;" // LETTER PA
"\ue02b>\u0b2b;" // LETTER PHA
"\ue02c>\u0b2c;" // LETTER BA
"\ue02d>\u0b2d;" // LETTER BHA
"\ue02e>\u0b2e;" // LETTER MA
"\ue02f>\u0b2f;" // LETTER YA
"\ue030>\u0b30;" // LETTER RA
"\ue031>\u0b5c;" // LETTER RRA
"\ue032>\u0b32;" // LETTER LA
"\ue033>\u0b33;" // LETTER LLA
"\ue034>\u0b33\u0b3c;" // FALLBACK LETTER LLLA>LETTER LLA
"\ue035>\u0b35;" // LETTER VA
"\ue036>\u0b36;" // LETTER SHA
"\ue037>\u0b37;" // LETTER SSA
"\ue038>\u0b38;" // LETTER SA
"\ue039>\u0b39;" // LETTER HA
"\ue03c>\u0b3c;" // SIGN NUKTA
"\ue03d>\u0b3d;" // SIGN AVAGRAHA
"\ue03e>\u0b3e;" // VOWEL SIGN AA
"\ue03f>\u0b3f;" // VOWEL SIGN I
"\ue040>\u0b40;" // VOWEL SIGN II
"\ue041>\u0b41;" // VOWEL SIGN U
"\ue042>\u0b42;" // VOWEL SIGN UU
"\ue043>\u0b43;" // VOWEL SIGN VOCALIC R
"\ue044>\u0b43\u0b3c;" // FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
"\ue045>\u0b47;" // FALLBACK
"\ue046>\u0b47;" // FALLBACK
"\ue047>\u0b47;" // VOWEL SIGN E
"\ue048>\u0b48;" // VOWEL SIGN AI
"\ue049>\u0b4b;" // FALLBACK
"\ue04a>\u0b4b;" // FALLBACK
"\ue04b>\u0b4b;" // VOWEL SIGN E
"\ue04c>\u0b4c;" // VOWEL SIGN AU
"\ue04d>\u0b4d;" // SIGN VIRAMA
"\ue050>\u0b13\u0b01;" // FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\ue055>;" // UNMAPPED InterIndic-Oriya: LENGTH MARK
"\ue056>\u0b56;" // AI LENGTH MARK
"\ue057>\u0b57;" // AU LENGTH MARK
"\ue059>\u0b16\u0b3c;" // FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
"\ue058>\u0b15\u0b3c;" // FALLBACK
"\ue05a>\u0b17\u0b3c;" // FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
"\ue05b>\u0b1c\u0b3c;" // FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
"\ue05c>\u0b21\u0b3c;" // FALLBACK
"\ue05d>\u0b5d;" // LETTER RHA
"\ue05e>\u0b2b\u0b3c;" // FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
"\ue05f>\u0b5f;" // LETTER YYA
"\ue060>\u0b60;" // LETTER VOCALIC RR
"\ue061>\u0b61;" // LETTER VOCALIC LL
"\ue062>\u0b56\u0b3c;" // FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
"\ue063>\u0b57\u0b3c;" // FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
"\uE064>\u0964;" // DANDA
"\uE065>\u0965;" // DOUBLE DANDA
"\ue066>\u0b66;" // DIGIT ZERO
"\ue067>\u0b67;" // DIGIT ONE
"\ue068>\u0b68;" // DIGIT TWO
"\ue069>\u0b69;" // DIGIT THREE
"\ue06a>\u0b6a;" // DIGIT FOUR
"\ue06b>\u0b6b;" // DIGIT FIVE
"\ue06c>\u0b6c;" // DIGIT SIX
"\ue06d>\u0b6d;" // DIGIT SEVEN
"\ue06e>\u0b6e;" // DIGIT EIGHT
"\ue06f>\u0b6f;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u0b30;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0b30;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>\u0B70;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u0B71;" // LETTER WA
"0 > \u0b66;" // FALLBACK FOR TAMIL
"1 > \u0b67;"
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,167 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Tamil.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Tamil
t_InterIndic_Taml {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Tamil
//:: NFD (NFC) ;
"\ue001>\u0b82;" // FALLBACK SIGN CANDRABINDU
"\ue002>\u0b82;" // SIGN ANUSVARA
"\ue003>\u0b83;" // SIGN VISARGA
"\uE004>\u0b85;" // FALLBACK TO LETTER A
"\ue005>\u0b85;" // LETTER A
"\ue006>\u0b86;" // LETTER AA
"\ue007>\u0b87;" // LETTER I
"\ue008>\u0b88;" // LETTER II
"\ue009>\u0b89;" // LETTER U
"\ue00a>\u0b8a;" // LETTER UU
"\ue00b>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
"\ue00c>\u0bb2;" // FALLBACK LETTER LA
"\ue00d>\u0b8f;" // FALLBACK
"\ue00e>\u0b8e;" // LETTER E
"\ue00f>\u0b8f;" // LETTER EE
"\ue010>\u0b90;" // LETTER AI
"\ue011>\u0b92;" // FALLBACK
"\ue012>\u0b92;" // LETTER O
"\ue013>\u0b93;" // LETTER OO
"\ue014>\u0b94;" // LETTER AU
"\ue015>\u0b95;" // LETTER KA
"\ue016>\u0b95;" // REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
"\ue017>\u0b95;" // REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
"\ue018>\u0b95;" // REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
"\ue019>\u0b99;" // LETTER NGA
"\ue01a>\u0b9a;" // LETTER CA
"\ue01b>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
"\ue01c>\u0b9c;" // LETTER JA
"\ue01d>\u0b9a;" // REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
"\ue01e>\u0b9e;" // LETTER NYA
"\ue01f>\u0b9f;" // LETTER TTA
"\ue020>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
"\ue021>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
"\ue022>\u0b9f;" // REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
"\ue023>\u0ba3;" // LETTER NNA
"\ue024>\u0ba4;" // LETTER TA
"\ue025>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
"\ue026>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
"\ue027>\u0ba4;" // REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
"\ue028\ue03c>\u0ba9;"
"\ue028>\u0ba8;" // LETTER NA
"\ue029>\u0ba9;" // LETTER NNNA
"\ue02a>\u0baa;" // LETTER PA
"\ue02b>\u0baa;" // REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
"\ue02c>\u0baa;" // REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
"\ue02d>\u0baa;" // REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
"\ue02e>\u0bae;" // LETTER MA
"\ue02f>\u0baf;" // LETTER YA
"\ue030\ue03c>\u0bb1;"
"\ue030>\u0bb0;" // LETTER RA
"\ue031>\u0bb1;" // LETTER RRA
"\ue032>\u0bb2;" // LETTER LA
"\ue033\ue03c>\u0bb4;"
"\ue033>\u0bb3;" // LETTER LLA
"\ue034>\u0bb4;" // LETTER LLLA
"\ue035>\u0bb5;" // LETTER VA
"\ue036>\u0bb7;" // REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
"\ue037>\u0bb7;" // LETTER SSA
"\ue038>\u0bb8;" // LETTER SA
"\ue039>\u0bb9;" // LETTER HA
"\ue03c>;" // FALLBACK BLOW AWAY NUKTA
"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA
"\ue03e>\u0bbe;" // VOWEL SIGN AA
"\ue03f>\u0bbf;" // VOWEL SIGN I
"\ue040>\u0bc0;" // VOWEL SIGN II
"\ue041>\u0bc1;" // VOWEL SIGN U
"\ue042>\u0bc2;" // VOWEL SIGN UU
"\ue043>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
"\ue044>\u0bcd\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
"\ue045>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
"\ue046>\u0bc6;" // VOWEL SIGN E
"\ue047>\u0bc7;" // VOWEL SIGN EE
"\ue048>\u0bc8;" // VOWEL SIGN AI
"\ue049>\u0bbe;" // REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
"\ue04a>\u0bca;" // VOWEL SIGN O
"\ue04b>\u0bcb;" // VOWEL SIGN OO
"\ue04c>\u0bcc;" // VOWEL SIGN AU
"\ue04d>\u0bcd;" // SIGN VIRAMA
"\ue050>\u0b93\u0bae\u0bcd;" // REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\ue055>;" // UNMAPPED InterIndic-Tamil: LENGTH MARK
"\ue056>\u0bc8;" // REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
"\ue057>\u0bd7;" // AU LENGTH MARK
"\ue058>\u0b95;" // FALLBACK
"\ue059>\u0b95;" // REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
"\ue05a>\u0b95;" // REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
"\ue05b>\u0b9c;" // REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
"\ue05c>\u0ba4;" // FALLBACK
"\ue05d>\u0b9f;" // REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
"\ue05e>\u0baa;" // REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
"\ue05f>\u0baf;" // REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
"\ue060>\u0bb0\u0bbf;" // REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
"\ue061>\u0bb3;" // FALLBACK LETTER LLA
"\ue062>\u0bbf;" // FALLBACK VOWEL SIGN VOCALIC L
"\ue063>\u0bc0;" // FALLBACK VOWEL SIGN VOCALIC LL
"\ue064>'.' ;" // FALLBACK FOR DANDA
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
"\ue066>\u0030;" // FALLBACK DIGIT ZERO
"\ue067\ue066\ue066\ue066>\u0bF2;"
"\ue067\ue066\ue066>\u0bf1;"
"\ue067\ue066>\u0bF0;"
"\ue067>\u0be7;" // DIGIT ONE
"\ue068>\u0be8;" // DIGIT TWO
"\ue069>\u0be9;" // DIGIT THREE
"\ue06a>\u0bea;" // DIGIT FOUR
"\ue06b>\u0beb;" // DIGIT FIVE
"\ue06c>\u0bec;" // DIGIT SIX
"\ue06d>\u0bed;" // DIGIT SEVEN
"\ue06e>\u0bee;" // DIGIT EIGHT
"\ue06f>\u0bef;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u0bc0;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0bc0;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u0bb5;" // FALLBACK FOR ORIYA LETTER WA
"1000 >\u0BF2;" // NUMBER ONE THOUSAND
"100 >\u0BF1;" // NUMBER ONE HUNDRED
"10 >\u0BF0;" // NUMBER TEN
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,157 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_InterIndic_Telugu.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// InterIndic_Telugu
t_InterIndic_Telu {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// InterIndic-Telugu
//:: NFD (NFC) ;
"\ue001>\u0c01;" // SIGN CANDRABINDU
"\ue002>\u0c02;" // SIGN ANUSVARA
"\ue003>\u0c03;" // SIGN VISARGA
"\uE004>\u0c05;" // FALLBACK TO LETTER A
"\ue005>\u0c05;" // LETTER A
"\ue006>\u0c06;" // LETTER AA
"\ue007>\u0c07;" // LETTER I
"\ue008>\u0c08;" // LETTER II
"\ue009>\u0c09;" // LETTER U
"\ue00a>\u0c0a;" // LETTER UU
"\ue00b>\u0c0b;" // LETTER VOCALIC R
"\ue00c>\u0c0c;" // LETTER VOCALIC L
"\ue00d>\u0c0E;" // FALLBACK MAPPING
"\ue00e>\u0c0E;" // LETTER E
"\ue00f>\u0c0f;" // LETTER EE
"\ue010>\u0c10;" // LETTER AI
"\ue011>\u0c12;" // FALBACK MAPPING
"\ue012>\u0c12;" // LETTER O
"\ue013>\u0c13;" // LETTER OO
"\ue014>\u0c14;" // LETTER AU
"\ue015>\u0c15;" // LETTER KA
"\ue016>\u0c16;" // LETTER KHA
"\ue017>\u0c17;" // LETTER GA
"\ue018>\u0c18;" // LETTER GHA
"\ue019>\u0c19;" // LETTER NGA
"\ue01a>\u0c1a;" // LETTER CA
"\ue01b>\u0c1b;" // LETTER CHA
"\ue01c>\u0c1c;" // LETTER JA
"\ue01d>\u0c1d;" // LETTER JHA
"\ue01e>\u0c1e;" // LETTER NYA
"\ue01f>\u0c1f;" // LETTER TTA
"\ue020>\u0c20;" // LETTER TTHA
"\ue021>\u0c21;" // LETTER DDA
"\ue022>\u0c22;" // LETTER DDHA
"\ue023>\u0c23;" // LETTER NNA
"\ue024>\u0c24;" // LETTER TA
"\ue025>\u0c25;" // LETTER THA
"\ue026>\u0c26;" // LETTER DA
"\ue027>\u0c27;" // LETTER DHA
"\ue028>\u0c28;" // LETTER NA
"\ue029>\u0c28;" // REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
"\ue02a>\u0c2a;" // LETTER PA
"\ue02b>\u0c2b;" // LETTER PHA
"\ue02c>\u0c2c;" // LETTER BA
"\ue02d>\u0c2d;" // LETTER BHA
"\ue02e>\u0c2e;" // LETTER MA
"\ue02f>\u0c2f;" // LETTER YA
"\ue030\ue03c>\u0c31;"
"\ue030>\u0c30;" // LETTER RA
"\ue031>\u0c31;" // LETTER RRA
"\ue032>\u0c32;" // LETTER LA
"\ue033>\u0c33;" // LETTER LLA
"\ue034>\u0c33;" // REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
"\ue035>\u0c35;" // LETTER VA
"\ue036>\u0c36;" // LETTER SHA
"\ue037>\u0c37;" // LETTER SSA
"\ue038>\u0c38;" // LETTER SA
"\ue039>\u0c39;" // LETTER HA
"\ue03c>;" // FALLBACK BLOW AWAY NUKTA
"\ue03d>;" // FALLBACK BLOW AWAY AVAGRAHA
"\ue03e>\u0c3e;" // VOWEL SIGN AA
"\ue03f>\u0c3f;" // VOWEL SIGN I
"\ue040>\u0c40;" // VOWEL SIGN II
"\ue041>\u0c41;" // VOWEL SIGN U
"\ue042>\u0c42;" // VOWEL SIGN UU
"\ue043>\u0c43;" // VOWEL SIGN VOCALIC R
"\ue044>\u0c44;" // VOWEL SIGN VOCALIC RR
"\ue045>\u0c46;" // VOWEL SIGN CANDRA E>VOWEL SIGN E
"\ue046>\u0c46;" // VOWEL SIGN E
"\ue047>\u0c47;" // VOWEL SIGN EE
"\ue048>\u0c48;" // VOWEL SIGN AI
"\ue049>\u0c4a;" // REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
"\ue04a>\u0c4a;" // VOWEL SIGN O
"\ue04b>\u0c4b;" // VOWEL SIGN OO
"\ue04c>\u0c4c;" // VOWEL SIGN AU
"\ue04d>\u0c4d;" // SIGN VIRAMA
"\ue050>\u0c13\u0c02;" // REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
"\ue051>;"
"\ue052>;"
"\ue053>;"
"\ue054>;"
"\ue055>\u0c55;" // LENGTH MARK
"\ue056>\u0c56;" // AI LENGTH MARK
"\ue057>\u0c4c;" // REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
"\ue058>\u0c15;" // REMAP
"\ue059>\u0c16;" // REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
"\ue05a>\u0c17;" // REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
"\ue05b>\u0c1c;" // REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
"\ue05c>\u0c22;" // REMAP
"\ue05d>\u0c22;" // REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
"\ue05e>\u0c2b;" // REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
"\ue05f>\u0c2f;" // REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
"\ue060>\u0c60;" // LETTER VOCALIC RR
"\ue061>\u0c61;" // LETTER VOCALIC LL
"\ue062>\u0c3f;" // REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
"\ue063>\u0c40;" // REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
"\ue064>'.' ;" // FALLBACK FOR DANDA
"\ue065>'.' ;" // FALLBACK FOR DOUBLE DANDA
"\ue066>\u0c66;" // DIGIT ZERO
"\ue067>\u0c67;" // DIGIT ONE
"\ue068>\u0c68;" // DIGIT TWO
"\ue069>\u0c69;" // DIGIT THREE
"\ue06a>\u0c6a;" // DIGIT FOUR
"\ue06b>\u0c6b;" // DIGIT FIVE
"\ue06c>\u0c6c;" // DIGIT SIX
"\ue06d>\u0c6d;" // DIGIT SEVEN
"\ue06e>\u0c6e;" // DIGIT EIGHT
"\ue06f>\u0c6f;" // DIGIT NINE
"\ue070>;" // ABBREVIATION SIGN
"\ue071>\u0c30;" // LETTER RA WITH MIDDLE DIAGONAL
"\ue072>\u0c30;" // LETTER RA WITH LOWER DIAGONAL
"\ue073>;" // RUPEE MARK
"\ue074>;" // RUPEE SIGN
"\ue075>;" // CURRENCY NUMERATOR ONE
"\ue076>;" // CURRENCY NUMERATOR TWO
"\ue077>;" // CURRENCY NUMERATOR THREE
"\ue078>;" // CURRENCY NUMERATOR FOUR
"\ue079>;" // CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
"\ue07A>;" // CURRENCY DENOMINATOR SIXTEEN
"\ue07B>;" // ISSHAR
"\uE07C>;" // TIPPI
"\uE07D>;" // ADDAK
"\uE07E>;" // IRI
"\uE07F>;" // URA
"\uE080>;" // EK ONKAR
"\uE081>\u0c35;" // FALLBACK FOR ORIYA LETTER WA
"0 > \u0c66;" // FALLBACK FOR TAMIL
"1 > \u0c67;"
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,108 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Kannada_InterIndic.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Kannada_InterIndic
t_Knda_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Kannada-InterIndic
"\u0CC6\u0CD5>\uE047;" // VOWEL SIGN EE
"\u0CC6\u0CCD\u0CD6>\uE048\ue04d;" // VOWEL SIGN AI
"\u0CC6\u0CD6>\uE048;" // VOWEL SIGN AI
"\u0CC6\u0CC2\u0CD5>\uE04B;" // VOWEL SIGN OO
"\u0CC6\u0CC2>\uE04A;" // VOWEL SIGN O
"\u0CBF\u0CD5>\uE040;" // VOWEL SIGN II
"\u0C82>\uE002;" // SIGN ANUSVARA
"\u0C83>\uE003;" // SIGN VISARGA
"\u0C85>\uE005;" // LETTER A
"\u0C86>\uE006;" // LETTER AA
"\u0C87>\uE007;" // LETTER I
"\u0C88>\uE008;" // LETTER II
"\u0C89>\uE009;" // LETTER U
"\u0C8A>\uE00A;" // LETTER UU
"\u0C8B>\uE00B;" // LETTER VOCALIC R
"\u0C8C>\uE00C;" // LETTER VOCALIC L
"\u0C8E>\uE00E;" // LETTER E
"\u0C8F>\uE00F;" // LETTER EE
"\u0C90>\uE010;" // LETTER AI
"\u0C92>\uE012;" // LETTER O
"\u0C93>\uE013;" // LETTER OO
"\u0C94>\uE014;" // LETTER AU
"\u0C95>\uE015;" // LETTER KA
"\u0C96>\uE016;" // LETTER KHA
"\u0C97>\uE017;" // LETTER GA
"\u0C98>\uE018;" // LETTER GHA
"\u0C99>\uE019;" // LETTER NGA
"\u0C9A>\uE01A;" // LETTER CA
"\u0C9B>\uE01B;" // LETTER CHA
"\u0C9C>\uE01C;" // LETTER JA
"\u0C9D>\uE01D;" // LETTER JHA
"\u0C9E>\uE01E;" // LETTER NYA
"\u0C9F>\uE01F;" // LETTER TTA
"\u0CA0>\uE020;" // LETTER TTHA
"\u0CA1>\uE021;" // LETTER DDA
"\u0CA2>\uE022;" // LETTER DDHA
"\u0CA3>\uE023;" // LETTER NNA
"\u0CA4>\uE024;" // LETTER TA
"\u0CA5>\uE025;" // LETTER THA
"\u0CA6>\uE026;" // LETTER DA
"\u0CA7>\uE027;" // LETTER DHA
"\u0CA8>\uE028;" // LETTER NA
"\u0CAA>\uE02A;" // LETTER PA
"\u0CAB>\uE02B;" // LETTER PHA
"\u0CAC>\uE02C;" // LETTER BA
"\u0CAD>\uE02D;" // LETTER BHA
"\u0CAE>\uE02E;" // LETTER MA
"\u0CAF>\uE02F;" // LETTER YA
"\u0CB0>\uE030;" // LETTER RA
"\u0CB1>\uE031;" // LETTER RRA
"\u0CB2>\uE032;" // LETTER LA
"\u0CB3>\uE033;" // LETTER LLA
"\u0CB5>\uE035;" // LETTER VA
"\u0CB6>\uE036;" // LETTER SHA
"\u0CB7>\uE037;" // LETTER SSA
"\u0CB8>\uE038;" // LETTER SA
"\u0CB9>\uE039;" // LETTER HA
"\u0CBC>\uE03C;" // SIGN NUKTA
"\u0CBD>\uE03D;" // AVAGRAHA
"\u0CBE>\uE03E;" // VOWEL SIGN AA
"\u0CBF>\uE03F;" // VOWEL SIGN I
"\u0CC1>\uE041;" // VOWEL SIGN U
"\u0CC2>\uE042;" // VOWEL SIGN UU
"\u0CC3>\uE043;" // VOWEL SIGN VOCALIC R
"\u0CC4>\uE044;" // VOWEL SIGN VOCALIC RR
"\u0CC6>\uE046;" // VOWEL SIGN E
"\u0CCC>\uE04C;" // VOWEL SIGN AU
"\u0CCD>\uE04D;" // SIGN VIRAMA
"\u0CD5>\uE055;" // LENGTH MARK
"\u0CD6>\uE056;" // AI LENGTH MARK
"\u0CDE>\uE034;" // LETTER LLLA
"\u0CE0>\uE060;" // LETTER VOCALIC RR
"\u0CE1>\uE061;" // LETTER VOCALIC LL
"\u0CE6>\uE066;" // DIGIT ZERO
"\u0CE7>\uE067;" // DIGIT ONE
"\u0CE8>\uE068;" // DIGIT TWO
"\u0CE9>\uE069;" // DIGIT THREE
"\u0CEA>\uE06A;" // DIGIT FOUR
"\u0CEB>\uE06B;" // DIGIT FIVE
"\u0CEC>\uE06C;" // DIGIT SIX
"\u0CED>\uE06D;" // DIGIT SEVEN
"\u0CEE>\uE06E;" // DIGIT EIGHT
"\u0CEF>\uE06F;" // DIGIT NINE
// eof
}
}

View file

@ -1,399 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Latin_InterIndic.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Latin_InterIndic
t_Latn_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Latin-InterIndic
//:: NFD;
//\u0e00 reserved
//consonants
"$chandrabindu=\ue001;"
"$anusvara=\ue002;"
"$visarga=\ue003;"
//\u0e004 reserved
// w<vowel> represents the stand-alone form
"$wa=\ue005;"
"$waa=\ue006;"
"$wi=\ue007;"
"$wii=\ue008;"
"$wu=\ue009;"
"$wuu=\ue00a;"
"$wr=\ue00b;"
"$wl=\ue00c;"
"$wce=\ue00d;" // LETTER CANDRA E
"$wse=\ue00e;" // LETTER SHORT E
"$we=\ue00f;" // \u090f LETTER E
"$wai=\ue010;"
"$wco=\ue011;" // LETTER CANDRA O
"$wso=\ue012;" // LETTER SHORT O
"$wo=\ue013;" // \u0913 LETTER O
"$wau=\ue014;"
"$ka=\ue015;"
"$kha=\ue016;"
"$ga=\ue017;"
"$gha=\ue018;"
"$nga=\ue019;"
"$ca=\ue01a;"
"$cha=\ue01b;"
"$ja=\ue01c;"
"$jha=\ue01d;"
"$nya=\ue01e;"
"$tta=\ue01f;"
"$ttha=\ue020;"
"$dda=\ue021;"
"$ddha=\ue022;"
"$nna=\ue023;"
"$ta=\ue024;"
"$tha=\ue025;"
"$da=\ue026;"
"$dha=\ue027;"
"$na=\ue028;"
"$ena=\ue029;" //compatibility
"$pa=\ue02a;"
"$pha=\ue02b;"
"$ba=\ue02c;"
"$bha=\ue02d;"
"$ma=\ue02e;"
"$ya=\ue02f;"
"$ra=\ue030;"
"$rra=\ue031;"
"$la=\ue032;"
"$lla=\ue033;"
"$ela=\ue034;" //compatibility
"$va=\ue035;"
"$vva=\ue081;"
"$sha=\ue036;"
"$ssa=\ue037;"
"$sa=\ue038;"
"$ha=\ue039;"
//\u093a Reserved
//\u093b Reserved
"$nukta=\ue03c;"
"$avagraha=\ue03d;" // SIGN AVAGRAHA
// <vowel> represents the dependent form
"$aa=\ue03e;"
"$i=\ue03f;"
"$ii=\ue040;"
"$u=\ue041;"
"$uu=\ue042;"
"$rh=\ue043;"
"$lh=\ue044;"
"$ce=\ue045;" //VOWEL SIGN CANDRA E
"$se=\ue046;" //VOWEL SIGN SHORT E
"$e=\ue047;"
"$ai=\ue048;"
"$co=\ue049;" // VOWEL SIGN CANDRA O
"$so=\ue04a;" // VOWEL SIGN SHORT O
"$o=\ue04b;" // \u094b
"$au=\ue04c;"
"$virama=\ue04d;"
// \u094e Reserved
// \u094f Reserved
"$om = \ue050;" // OM
// \u0951>; # UNMAPPED STRESS SIGN UDATTA
// \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
// \u0953>; # UNMAPPED GRAVE ACCENT
// \u0954>; # UNMAPPED ACUTE ACCENT
"$lm = \ue055;"// Telugu Length Mark
"$ailm=\ue056;"// AI Length Mark
"$aulm=\ue057;"// AU Length Mark
//urdu compatibity forms
"$uka=\ue058;"
"$ukha=\ue059;"
"$ugha=\ue05a;"
"$ujha=\ue05b;"
"$uddha=\ue05c;"
"$udha=\ue05d;"
"$ufa=\ue05e;"
"$uya=\ue05f;"
"$wrr=\ue060;"
"$wll=\ue061;"
"$rrh=\ue062;"
"$llh=\ue063;"
"$danda=\ue064;"
"$doubleDanda=\ue065;"
"$zero=\ue066;" // DIGIT ZERO
"$one=\ue067;" // DIGIT ONE
"$two=\ue068;" // DIGIT TWO
"$three=\ue069;" // DIGIT THREE
"$four=\ue06a;" // DIGIT FOUR
"$five=\ue06b;" // DIGIT FIVE
"$six=\ue06c;" // DIGIT SIX
"$seven=\ue06d;" // DIGIT SEVEN
"$eight=\ue06e;" // DIGIT EIGHT
"$nine=\ue06f;" // DIGIT NINE
// For all other scripts
"$ecp0=\ue070;"
"$ecp1=\ue071;"
"$ecp2=\ue072;"
"$ecp3=\ue073;"
"$ecp4=\ue074;"
"$ecp5=\ue075;"
"$ecp6=\ue076;"
"$ecp7=\ue077;"
"$ecp8=\ue078;"
"$ecp9=\ue079;"
"$ecpA=\ue07a;"
"$ecpB=\ue07b;"
"$ecpC=\ue07c;"
"$ecpD=\ue07d;"
"$ecpE=\ue07e;"
"$ecpF=\ue07f;"
// \u0970>; # UNMAPPED ABBREVIATION SIGN
"$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];"
"$depVowelBelow=[\ue041-\ue044];"
"$endThing=[$danda$doubleDanda];"
// $x was originally called '&'; $z was '%'
"$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];"
"$z=[bcdfghjklmnpqrstvwxyz];"
"$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];"
"\u0315 > $avagraha;"
"\u0303>$chandrabindu$anusvara;"
"m\u0310>$chandrabindu;"
"h\u0323>$visarga;"
"x>$ka$virama$sa;"
// convert to independent forms at start of word or syllable:
// dependent forms for roundtrip
"\u0314a\u0304>$aa;"
"\u0314ai>$ai;"
"\u0314au>$au;"
"\u0314ii>$ii;"
"\u0314i\u0304>$ii;"
"\u0314i>$i;"
"\u0314u\u0304>$uu;"
"\u0314u>$u;"
"\u0314r\u0325\u0304>$rrh;"
"\u0314r\u0325>$rh;"
"\u0314l\u0325\u0304>$llh;"
"\u0314lh>$lh;"
"\u0314l\u0325>$lh;"
"\u0314e\u0304>$e;"
"\u0314o\u0304>$o;"
"\u0314a>;"
"\u0314e\u0306>$ce;"
"\u0314o\u0306>$co;"
"\u0314e>$se;"
"\u0314o>$so;"
// preceeded by consonants
"$consonants{ a\u0304>$aa;"
"$consonants{ ai>$ai;"
"$consonants{ au>$au;"
"$consonants{ ii>$ii;"
"$consonants{ i\u0304>$ii;"
"$consonants{ i>$i;"
"$consonants{ u\u0304>$uu;"
"$consonants{ u>$u;"
"$consonants{ r\u0325\u0304>$rrh;"
"$consonants{ r\u0325a>$rh;"
"$consonants{ r\u0325>$rh;"
"$consonants{ l\u0325\u0304>$llh;"
"$consonants{ lh>$lh;"
"$consonants{ l\u0325>$lh;"
"$consonants{ e\u0304>$e;"
"$consonants{ o\u0304>$o;"
"$consonants{ e\u0306>$ce;"
"$consonants{ o\u0306>$co;"
"$consonants{ e>$se;"
"$consonants{ o>$so;"
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
"a\u0304>$waa;"
"ai>$wai;"
"au>$wau;"
"i\u0304>$wii;"
"i>$wi;"
"u\u0304>$wuu;"
"u>$wu;"
"r\u0325\u0304>$wrr;"
"r\u0325>$wr;"
"l\u0325\u0304>$wll;"
"lh>$wl;"
"l\u0325>$wl;"
"e\u0304>$we;"
"o\u0304>$wo;"
"a>$wa;"
"e\u0306>$wce;"
"o\u0306>$wco;"
"e>$wse;"
"''om>$om;"
"o>$wso;"
// rules for anusvara
"n}r\u0325 > $na|$virama;"
"n}l\u0325 > $na|$virama;"
"n}na > $na|$virama;"
"n\u0307}[kg] > $anusvara;"
"n\u0307}n\u0307 > $anusvara;"
"n\u0304}[cj] > $anusvara;"
"n\u0304}n\u0303 > $anusvara;"
"n\u0323}[tdn]\u0323 > $anusvara;"
"n}[tdn] > $anusvara;"
"m}[pbm] > $anusvara;"
"n}[ylvshr] > $anusvara;"
"m\u0307 > $anusvara;"
//urdu compatibility
"q>$uka|$virama;"
"k\u0331h\u0331>$ukha |$virama;"
"g\u0307> $ugha | $virama;"
"z > $ujha |$virama;"
"f > $ufa|$virama;"
// dev
"y\u0307>$uya|$virama;"
"l\u0331>$ela|$virama;"
"n\u0331>$ena|$virama;"
"n\u0307>$nga|$virama;"
"n\u0303>$nya|$virama;"
"n\u0323>$nna|$virama;"
"t\u0323h>$ttha|$virama;"
"t\u0323>$tta|$virama;"
"r\u0323h>$udha|$virama;"
"r\u0323>$uddha|$virama;"
"d\u0323h>$ddha|$virama;"
"d\u0323>$dda|$virama;"
"kh>$kha|$virama;"
"k>$ka|$virama;"
"gh>$gha|$virama;"
"g>$ga|$virama;"
"ch>$cha|$virama;"
"c>$ca|$virama;"
"jh>$jha|$virama;"
"j>$ja|$virama;"
"ny>$nya|$virama;"
"tth>$ttha|$virama;"
"ddh>$ddha|$virama;"
"th>$tha|$virama;"
"t>$ta|$virama;"
"dh>$dha|$virama;"
"d>$da|$virama;"
"n>$na|$virama;"
"ph>$pha|$virama;"
"p>$pa|$virama;"
"bh>$bha|$virama;"
"b>$ba|$virama;"
"m>$ma|$virama;"
"y>$ya|$virama;"
"r\u0331>$rra|$virama;"
"r>$ra|$virama;"
"l\u0323>$lla|$virama;"
"l>$la|$virama;"
"v>$va|$virama;"
"w\u0307>$vva|$virama;"
"w>$va|$virama;"
"sh>$sha|$virama;"
"ss>$ssa|$virama;"
"s\u0323>$ssa|$virama;"
"s\u0301>$sha|$virama;"
"s>$sa|$virama;"
"h>$ha|$virama;"
"'.'>$danda;"
"$danda'.'>$doubleDanda;"
"$depVowelAbove{'~'>$anusvara;"
"$depVowelBelow{'~'>$chandrabindu;"
// convert to dependent forms after consonant with no vowel:
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
//$virama aa>$aa;
"$virama a\u0304>$aa;"
"$virama ai>$ai;"
"$virama au>$au;"
"$virama ii>$ii;"
"$virama i\u0304>$ii;"
"$virama i>$i;"
//$virama uu>$uu;
"$virama u\u0304>$uu;"
"$virama u>$u;"
//$virama rrh>$rrh;
"$virama r\u0325\u0304>$rrh;"
//$virama rh>$rh;
"$virama r\u0325a>$rh;"
"$virama r\u0325>$rh;"
"$virama l\u0325\u0304>$llh;"
"$virama lh>$lh;"
"$virama l\u0325>$lh;"
"$virama e\u0304>$e;"
"$virama o\u0304>$o;"
"$virama a>;"
"$virama e\u0306>$ce;"
"$virama o\u0306>$co;"
"$virama e>$se;"
"$virama o>$so;"
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
//$virama''aa>$waa;
"$virama''a\u0304>$waa;"
"$virama''ai>$wai;"
"$virama''au>$wau;"
//$virama''ii>$wii;
"$virama''i\u0304>$wii;"
"$virama''i>$wi;"
//$virama''uu>$wuu;
"$virama''u\u0304>$wuu;"
"$virama''u>$wu;"
//$virama''rrh>$wrr;
"$virama''r\u0325\u0304>$wrr;"
//$virama''rh>$wr;
"$virama''r\u0325>$wr;"
"$virama''l\u0325\u0304>$wll;"
//$virama''lh>$wl;
"$virama''l\u0325>$wl;"
"$virama''e\u0304>$we;"
"$virama''o\u0304>$wo;"
"$virama''a>$wa;"
"$virama''e\u0306>$wce;"
"$virama''o\u0306>$wco;"
"$virama''e>$wse;"
"$virama''o>$wso;"
// no virama
"''a\u0304>$waa;"
"''ai>$wai;"
"''au>$wau;"
"''i\u0304>$wii;"
"''i>$wi;"
"''u\u0304>$wuu;"
"''u>$wu;"
"''r\u0325\u0304>$wrr;"
"''r\u0325>$wr;"
"''l\u0325\u0304>$wll;"
"''l\u0325>$wl;"
"''e\u0304>$we;"
"''o\u0304>$wo;"
"''a>$wa;"
"''e\u0306>$wce;"
"''o\u0306>$wco;"
"''e>$wse;"
"''o>$wso;"
"$virama } [$z] > $virama;"
"$virama } ' ' > $virama ;"
"$virama}$endThing>;"
"0>$zero;"
"1>$one;"
"2>$two;"
"3>$three;"
"4>$four;"
"5>$five;"
"6>$six;"
"7>$seven;"
"8>$eight;"
"9>$nine;"
"''>;"
//:: NFC (NFD) ;
}
}

View file

@ -1,538 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Latin_Jamo.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Latin_Jamo
t_Latn_Jamo {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
//- the INDEX file. This transliterator is, by itself, not
//- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
//- inverses thereof.
// Transliteration from Latin characters to Korean script is done in
// two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
// transliteration is done algorithmically following Unicode 3.0
// section 3.11. This file implements the Latin to Jamo
// transliteration using rules.
// Jamo occupy the block 1100-11FF. Within this block there are three
// groups of characters: initial consonants or choseong (I), medial
// vowels or jungseong (M), and trailing consonants or jongseong (F).
// Standard Korean syllables are of the form I+M+F*.
// Section 3.11 describes the use of 'filler' jamo to convert
// nonstandard syllables to standard form: the choseong filler 115F and
// the junseong filler 1160. In this transliterator, we will not use
// 115F or 1160.
// We will, however, insert two 'null' jamo to make foreign words
// conform to Korean syllable structure. These are the null initial
// consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
// we will use the separator in order to disambiguate strings,
// e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
// We will not use all of the characters in the jamo block. We will
// only use the 19 initials, 21 medials, and 27 finals possessing a
// jamo short name as defined in section 4.4 of the Unicode book.
// Rules of thumb. These guidelines provide the basic framework
// for the rules. They are phrased in terms of Latin-Jamo transliteration.
// The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
// just context-free transliteration of jamo to corresponding short names,
// with the addition of separators to maintain round-trip integrity
// in the context of the Latin-Jamo rules.
// A sequence of vowels:
// - Take the longest sequence you can. If there are too many, or you don't
// have a starting consonant, introduce a 110B necessary.
// A sequence of consonants.
// - First join the double consonants: G + G -> GG
// - In the remaining list,
// -- If there is no preceding vowel, take the first consonant, and insert EU
// after it. Continue with the rest of the consonants.
// -- If there is one consonant, attach to the following vowel
// -- If there are two consonants and a following vowel, attach one to the
// preceeding vowel, and one to the following vowel.
// -- If there are more than two consonants, join the first two together if you
// can: L + G => LG
// -- If you still end up with more than 2 consonants, insert EU after the
// first one, and continue with the rest of the consonants.
//----------------------------------------------------------------------
// Variables
// Some latin consonants or consonant pairs only occur as initials, and
// some only as finals, but some occur as both. This makes some jamo
// consonants ambiguous when transliterated into latin.
// Initial only: IEUNG BB DD JJ R
// Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
// Initial and Final: B C D G GG H J K M N P S SS T
"$Gi = \u1100;"
"$GGi = \u1101;"
"$Ni = \u1102;"
"$Di = \u1103;"
"$DD = \u1104;"
"$R = \u1105;"
"$Mi = \u1106;"
"$Bi = \u1107;"
"$BB = \u1108;"
"$Si = \u1109;"
"$SSi = \u110A;"
"$IEUNG = \u110B;" // null initial, inserted during Latin-Jamo
"$Ji = \u110C;"
"$JJ = \u110D;"
"$Ci = \u110E;"
"$Ki = \u110F;"
"$Ti = \u1110;"
"$Pi = \u1111;"
"$Hi = \u1112;"
"$A = \u1161;"
"$AE = \u1162;"
"$YA = \u1163;"
"$YAE = \u1164;"
"$EO = \u1165;"
"$E = \u1166;"
"$YEO = \u1167;"
"$YE = \u1168;"
"$O = \u1169;"
"$WA = \u116A;"
"$WAE = \u116B;"
"$OE = \u116C;"
"$YO = \u116D;"
"$U = \u116E;"
"$WEO = \u116F;"
"$WE = \u1170;"
"$WI = \u1171;"
"$YU = \u1172;"
"$EU = \u1173;" // null medial, inserted during Latin-Jamo
"$YI = \u1174;"
"$I = \u1175;"
"$Gf = \u11A8;"
"$GGf = \u11A9;"
"$GS = \u11AA;"
"$Nf = \u11AB;"
"$NJ = \u11AC;"
"$NH = \u11AD;"
"$Df = \u11AE;"
"$L = \u11AF;"
"$LG = \u11B0;"
"$LM = \u11B1;"
"$LB = \u11B2;"
"$LS = \u11B3;"
"$LT = \u11B4;"
"$LP = \u11B5;"
"$LH = \u11B6;"
"$Mf = \u11B7;"
"$Bf = \u11B8;"
"$BS = \u11B9;"
"$Sf = \u11BA;"
"$SSf = \u11BB;"
"$NG = \u11BC;"
"$Jf = \u11BD;"
"$Cf = \u11BE;"
"$Kf = \u11BF;"
"$Tf = \u11C0;"
"$Pf = \u11C1;"
"$Hf = \u11C2;"
"$jamoInitial = [\u1100-\u1112];"
"$jamoMedial = [\u1161-\u1175];"
"$latinInitial = [bcdghjkmnprst];"
// Any character in the latin transliteration of a medial
"$latinMedial = [aeiouwy];"
// The last character of the latin transliteration of a medial
"$latinMedialEnd = [aeiou];"
// Disambiguation separator
"$sep = \\\';"
//----------------------------------------------------------------------
// Jamo-Latin
// Jamo to latin is relatively simple, since it is the latin that is
// ambiguous. Most rules are straightforward, and we encode them below
// as simple add-on back rule, e.g.:
// $jamoMedial {bs} > $BS;
// becomes
// $jamoMedial {bs} <> $BS;
// Furthermore, we don't care about the ordering for Jamo-Latin because
// we are going from single characters, so we can very easily piggyback
// on the Latin-Jamo.
// The main issue with Jamo-Latin is when to insert separators.
// Separators are inserted to obtain correct round trip behavior. For
// example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
// would then round trip to Ki A GGi E. To prevent this, we insert a
// separator: "kag-ge". IMPORTANT: The need for separators depends
// very specifically on the behavior of the Latin-Jamo rules. A change
// in the Latin-Jamo behavior can completely change the way the
// separator insertion must be done.
// First try to preserve actual separators in the jamo text by doubling
// them. This fixes problems like:
// (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
// => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
// -- if we don't care about losing separators in the jamo, we can delete
// this rule.
"$sep $sep <> $sep;"
// Triple consonants. For three consonants "axxx" we insert a
// separator between the first and second "x" if XXf, Xf, and Xi all
// exist, and we have A Xf XXi. This prevents the reverse
// transliteration to A XXf Xi.
"$sep < $latinMedialEnd g {} $GGi;"
"$sep < $latinMedialEnd s {} $SSi;"
// For vowels the rule is similar. If there is a vowel "ae" such that
// "a" by itself and "e" by itself are vowels, then we want to map A E
// to "a-e" so as not to round trip to AE. However, in the text Ki EO
// IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
// vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
// tested. NOTE: These rules used to have a left context of
// $latinInitial instead of [^$latinMedial]. The problem with this is
// sequences where an initial IEUNG is transliterated away:
// (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
"$sep < [^$latinMedial] [y w] e {} [$O $OE];"
"$sep < [^$latinMedial] e {} [$O $OE $U];"
"$sep < [^$latinMedial] [o a] {} [$E $EO $EU];"
"$sep < [^$latinMedial] [w y] a {} [$E $EO $EU];"
// Similar to the above, but with an intervening $IEUNG.
"$sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];"
"$sep < [^$latinMedial] e {} $IEUNG [$O $OE $U];"
"$sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];"
"$sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];"
// Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
// where Xi also exists, must be transliterated as "ax-e" to prevent
// the round trip conversion to A Xi E.
"$sep < $latinMedialEnd b {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd c {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd d {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd g {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd h {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd j {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd k {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd m {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd n {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd p {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd s {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd t {} $IEUNG $jamoMedial;"
// Double finals followed by IEUNG. Similar to the single finals
// followed by IEUNG. Any latin consonant pair X Y, between medials,
// that we would split by Latin-Jamo, we must handle when it occurs as
// part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
// E.
"$sep < $latinMedialEnd b s {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd g g {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd g s {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd l b {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd l g {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd l h {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd l m {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd l p {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd l s {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd l t {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd n g {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd n h {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd n j {} $IEUNG $jamoMedial;"
"$sep < $latinMedialEnd s s {} $IEUNG $jamoMedial;"
// Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
// we transliterate as "ax-xe" to prevent round trip transliteration as
// A XXi E.
"$sep < $latinMedialEnd b {} $Bi $jamoMedial;"
"$sep < $latinMedialEnd d {} $Di $jamoMedial;"
"$sep < $latinMedialEnd j {} $Ji $jamoMedial;"
"$sep < $latinMedialEnd g {} $Gi $jamoMedial;"
"$sep < $latinMedialEnd s {} $Si $jamoMedial;"
// XYY. This corresponds to the XYY rule in Latin-Jamo. By default
// Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
// "xyy" forms that correspond to XYf Yi must be transliterated as
// "xy-y".
"$sep < $latinMedialEnd b s {} [$Si $SSi];"
"$sep < $latinMedialEnd g s {} [$Si $SSi];"
"$sep < $latinMedialEnd l b {} [$Bi $BB];"
"$sep < $latinMedialEnd l g {} [$Gi $GGi];"
"$sep < $latinMedialEnd l s {} [$Si $SSi];"
"$sep < $latinMedialEnd n g {} [$Gi $GGi];"
"$sep < $latinMedialEnd n j {} [$Ji $JJ];"
// Deletion of IEUNG is handled below.
//----------------------------------------------------------------------
// Latin-Jamo
// [Basic, context-free Jamo-Latin rules are embedded here too. See
// above.]
// Split digraphs: Text of the form 'axye', where 'xy' is a final
// digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
// 'e' are medials, we want to transliterate this as A Xf Yi E rather
// than A XYf IEUNG E. We do NOT include text of the form "axxe",
// since that is handled differently below. These rules are generated
// programmatically from the jamo data.
"$jamoMedial {b s} $latinMedial > $Bf $Si;"
"$jamoMedial {g s} $latinMedial > $Gf $Si;"
"$jamoMedial {l b} $latinMedial > $L $Bi;"
"$jamoMedial {l g} $latinMedial > $L $Gi;"
"$jamoMedial {l h} $latinMedial > $L $Hi;"
"$jamoMedial {l m} $latinMedial > $L $Mi;"
"$jamoMedial {l p} $latinMedial > $L $Pi;"
"$jamoMedial {l s} $latinMedial > $L $Si;"
"$jamoMedial {l t} $latinMedial > $L $Ti;"
"$jamoMedial {n g} $latinMedial > $Nf $Gi;"
"$jamoMedial {n h} $latinMedial > $Nf $Hi;"
"$jamoMedial {n j} $latinMedial > $Nf $Ji;"
// Single consonants are initials: Text of the form 'axe', where 'x'
// can be an initial or a final, and 'a' and 'e' are medials, we want
// to transliterate as A Xi E rather than A Xf IEUNG E.
"$jamoMedial {b} $latinMedial > $Bi;"
"$jamoMedial {c} $latinMedial > $Ci;"
"$jamoMedial {d} $latinMedial > $Di;"
"$jamoMedial {g} $latinMedial > $Gi;"
"$jamoMedial {h} $latinMedial > $Hi;"
"$jamoMedial {j} $latinMedial > $Ji;"
"$jamoMedial {k} $latinMedial > $Ki;"
"$jamoMedial {m} $latinMedial > $Mi;"
"$jamoMedial {n} $latinMedial > $Ni;"
"$jamoMedial {p} $latinMedial > $Pi;"
"$jamoMedial {s} $latinMedial > $Si;"
"$jamoMedial {t} $latinMedial > $Ti;"
// Doubled initials. The sequence "axxe", where XX exists as an initial
// (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
// to transliterate as A XXi E, rather than split to A Xf Xi E.
"$jamoMedial {b b} $latinMedial > $BB;"
"$jamoMedial {d d} $latinMedial > $DD;"
"$jamoMedial {j j} $latinMedial > $JJ;"
"$jamoMedial {g g} $latinMedial > $GGi;"
"$jamoMedial {s s} $latinMedial > $SSi;"
// XYY. Because doubled consonants bind more strongly than XY
// consonants, we must handle the sequence "axyy" specially. Here XYf
// and YYi must exist. In these cases, we map to Xf YYi rather than
// XYf.
"$jamoMedial {b} s s > $Bf;"
"$jamoMedial {g} s s > $Gf;"
"$jamoMedial {l} b b > $L;"
"$jamoMedial {l} g g > $L;"
"$jamoMedial {l} s s > $L;"
"$jamoMedial {n} g g > $Nf;"
"$jamoMedial {n} j j > $Nf;"
// Finals: Attach consonant with preceding medial to preceding medial.
// Do this BEFORE mapping consonants to initials. Longer keys must
// precede shorter keys that they start with, e.g., the rule for 'bs'
// must precede 'b'.
// [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
// block for Jamo-Latin.]
"$jamoMedial {bs} <> $BS;"
"$jamoMedial {b} <> $Bf;"
"$jamoMedial {c} <> $Cf;"
"$jamoMedial {d} <> $Df;"
"$jamoMedial {gg} <> $GGf;"
"$jamoMedial {gs} <> $GS;"
"$jamoMedial {g} <> $Gf;"
"$jamoMedial {h} <> $Hf;"
"$jamoMedial {j} <> $Jf;"
"$jamoMedial {k} <> $Kf;"
"$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;"
"$jamoMedial {lh} <> $LH;"
"$jamoMedial {lm} <> $LM;"
"$jamoMedial {lp} <> $LP;"
"$jamoMedial {ls} <> $LS;"
"$jamoMedial {lt} <> $LT;"
"$jamoMedial {l} <> $L;"
"$jamoMedial {m} <> $Mf;"
"$jamoMedial {ng} <> $NG;"
"$jamoMedial {nh} <> $NH;"
"$jamoMedial {nj} <> $NJ;"
"$jamoMedial {n} <> $Nf;"
"$jamoMedial {p} <> $Pf;"
"$jamoMedial {ss} <> $SSf;"
"$jamoMedial {s} <> $Sf;"
"$jamoMedial {t} <> $Tf;"
// Initials: Attach single consonant to following medial. Do this
// AFTER mapping finals. Longer keys must precede shorter keys that
// they start with, e.g., the rule for 'gg' must precede 'g'.
// [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
// this block for Jamo-Latin.]
"{gg} $latinMedial <> $GGi;"
"{g} $latinMedial <> $Gi;"
"{n} $latinMedial <> $Ni;"
"{dd} $latinMedial <> $DD;"
"{d} $latinMedial <> $Di;"
"{r} $latinMedial <> $R;"
"{m} $latinMedial <> $Mi;"
"{bb} $latinMedial <> $BB;"
"{b} $latinMedial <> $Bi;"
"{ss} $latinMedial <> $SSi;"
"{s} $latinMedial <> $Si;"
"{jj} $latinMedial <> $JJ;"
"{j} $latinMedial <> $Ji;"
"{c} $latinMedial <> $Ci;"
"{k} $latinMedial <> $Ki;"
"{t} $latinMedial <> $Ti;"
"{p} $latinMedial <> $Pi;"
"{h} $latinMedial <> $Hi;"
// 'r' in final position. Because of the equivalency of the 'l' and
// 'r' jamo (the glyphs are the same), we try to provide the same
// equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
// below. If we see an 'r' in an apparent final position, treat it
// like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
// Instead, we want Ki A L Ki A.
"$jamoMedial {r} $latinInitial > | l;"
// Initial + Final: If we match the next rule, we have initial then
// final consonant with no intervening medial. We insert the null
// vowel BEFORE it to create a well-formed syllable. (In the next rule
// we insert a null vowel AFTER an anomalous initial.)
"$jamoInitial {} [bcdghjklmnpst] > $EU;"
// Initial + X: This block matches an initial consonant not followed by
// a medial. We insert the null vowel after it. We handle double
// initials explicitly here; for single initial consonants we insert EU
// (as Latin) after them and let standard rules do the rest.
// BREAKS ROUND TRIP INTEGRITY
"gg > $GGi $EU;"
"dd > $DD $EU;"
"bb > $BB $EU;"
"ss > $SSi $EU;"
"jj > $JJ $EU;"
"([bcdghjkmnprst]) > | $1 eu;"
// X + Final: Finally we have to deal with a consonant that can only be
// interpreted as a final (not an initial) and which is preceded
// neither by an initial nor a medial. It is the start of the
// syllable, but cannot be. Most of these will already be handled by
// the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
// 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
// For this isolated case, we could add a null initial and medial,
// which would give "la" => IEUNG EU L IEUNG A, for example. A more
// economical solution is to transliterate isolated "l" (that is,
// initial "l") to "r". (Other similar conversions of consonants that
// occur neither as initials nor as finals are handled below.)
"l > | r;"
// Medials. If a medial is preceded by an initial, then we proceed
// normally. As usual, longer keys must precede shorter ones.
// [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
// this block for Jamo-Latin.]
"$jamoInitial {ae} <> $AE;"
"$jamoInitial {a} <> $A;"
"$jamoInitial {eo} <> $EO;"
"$jamoInitial {eu} <> $EU;"
"$jamoInitial {e} <> $E;"
"$jamoInitial {i} <> $I;"
"$jamoInitial {oe} <> $OE;"
"$jamoInitial {o} <> $O;"
"$jamoInitial {u} <> $U;"
"$jamoInitial {wae} <> $WAE;"
"$jamoInitial {wa} <> $WA;"
"$jamoInitial {weo} <> $WEO;"
"$jamoInitial {we} <> $WE;"
"$jamoInitial {wi} <> $WI;"
"$jamoInitial {yae} <> $YAE;"
"$jamoInitial {ya} <> $YA;"
"$jamoInitial {yeo} <> $YEO;"
"$jamoInitial {ye} <> $YE;"
"$jamoInitial {yi} <> $YI;"
"$jamoInitial {yo} <> $YO;"
"$jamoInitial {yu} <> $YU;"
// We may see an anomalous isolated 'w' or 'y'. In that case, we
// interpret it as 'wi' and 'yu', respectively.
// BREAKS ROUND TRIP INTEGRITY
"$jamoInitial {w} > | wi;"
"$jamoInitial {y} > | yu;"
// Otherwise, insert a null consonant IEUNG before the medial (which is
// still an untransliterated latin vowel).
"($latinMedial) > $IEUNG | $1;"
// Convert non-jamo latin consonants to equivalents. These occur as
// neither initials nor finals in jamo. 'l' occurs as a final, but not
// an initial; it is handled above. The following letters (left hand
// side) will never be output by Jamo-Latin.
"f > | p;"
"q > | k;"
"v > | b;"
"x > | ks;"
"z > | s;"
// Delete separators (Latin-Jamo).
"$sep > ;"
// Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
// since these may also occur in text.
"< $IEUNG;"
//- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
//- the INDEX file. This transliterator is, by itself, not
//- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
//- inverses thereof.
// eof
}
}

View file

@ -1,511 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Latin_Katakana.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Latin_Katakana
t_Latn_Kana {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// note: a global filter is more efficient, but MUST include all source chars
//:: [\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ;
// MINIMAL FILTER GENERATED FOR: Latin-Katakana
//## WARNING -- must add width filter, both here and below!!! ###
":: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;"
":: [:Latin:] fullwidth-halfwidth ();"
":: NFD (NFC);"
":: Lower ();" // whenever transliterating from cased to uncased script, include this
// :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
// Uses modified Hepburn. Small changes to make unambiguous.
// | Kunrei-shiki: Hepburn/MHepburn
// | ------------------------------
// | si: shi
// | si ~ya: sha
// | si ~yu: shu
// | si ~yo: sho
// | zi: ji
// | zi ~ya: ja
// | zi ~yu: ju
// | zi ~yo: jo
// | ti: chi
// | ti ~ya: cha
// | ti ~yu: chu
// | ti ~yu: cho
// | tu: tsu
// | di: ji/dji
// | du: zu/dzu
// | hu: fu
// | For foreign words:
// | -----------------
// | se ~i si
// | si ~e she
// |
// | ze ~i zi
// | zi ~e je
// |
// | te ~i ti
// | ti ~e che
// | te ~u tu
// |
// | de ~i di
// | de ~u du
// | de ~i di
// |
// | he ~u: hu
// | hu ~a fa
// | hu ~i fi
// | hu ~e he
// | hu ~o ho
// Most small forms are generated, but if necessary
// explicit small forms are given with ~a, ~ya, etc.
//------------------------------------------------------
// Variables
"$vowel = [aeiou] ;"
"$consonant = [bcdfghjklmnpqrstvwxyz] ;"
"$macron = \u0304 ;"
// Variables used for doubled-consonants with tsu
"$kana = [\u3041-\u3094] ;"
"$voice = [\u3099\u309B];"
"$semivoice = [\u309A\u309C];"
"$k_start = [カキクケコかきくけこ] ;"
"$s_start = [サシスセソさしすせそ] ;"
"$j_start = [シし] $voice ;"
"$t_start = [タチツテトたちつてと] ;"
"$n_start = [ナニヌネノンなにぬねの] ;"
"$h_start = [ハヒヘホはひへほ] ;"
"$f_start = [フふ] ;"
"$m_start = [マミムメモまみむめも] ;"
"$y_start = [ヤユヨやゆよ] ;"
"$r_start = [ラリルレロらりるれろ] ;"
"$w_start = [ワヰヱヲわゐゑを] ;"
"$v_start = [ワヰヱヲ]゙ ;"
// if ン is followed by $n_quoter, then it needs an
// apostrophe after its romaji form to disambiguate it.
// e.g., ン ア ! = ナ, so represent as "n'a", not "na".
"$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ヤ ユ ヨ ン] ;"
"$small_y = [ャィュェョ] ;"
"$iteration = \u309D ;"
//------------------------------------------------------
// katakana rules
// Punctuation
"'.' <> 。;"
"',' <> 、;"
// ' ' } [a-z] > ; # delete spaces before latin
// ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
// Iteration Mark
// Copy previous letter & marks
// TODO
// | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
// Specials for katakana -- not shared with hiragana
"va <> ヷ ;"
"vi <> ヸ ;"
"ve <> ヹ ;"
"vo <> ヺ ;"
"'~ka' <> ヵ ;"
"'~ke' <> ヶ ;"
// ~~~ begin shared rules ~~~
//special
"ya < '~'ャ;"
"yi < '~'ィ ;"
"yu < '~'ュ;"
"ye < '~'ェ;"
"yo < '~'ョ;"
//normal
"a <> ア ;"
"b | '~' < ヒ ゙} $small_y ;"
"by } $vowel > ビ | '~y' ;"
"ba <> バ ;"
"bi <> ビ ;"
"bu <> ブ ;"
"be <> ベ ;"
"bo <> ボ ;"
"c } i > | s ;"
"c } e > | s ;"
"da <> ダ ;"
"di <> ディ ;"
"du <> デゥ ;"
"de <> デ ;"
"do <> ド ;"
"dzu <> ヅ ;"
"dja < ヂャ ;"
"dji'~i' < ヂィ ;" // liu
"dju < ヂュ ;"
"dje < ヂェ ;"
"djo < ヂョ ;"
"dji <> ヂ ;"
"dj } $vowel > ヂ | '~y' ;"
// TODO: QUESTION: use ĵĴżŻ instead of dj, dz
"cha < チャ ;"
"chi'~i' < チィ ;" // liu
"chu < チュ ;"
"che < チェ ;"
"cho < チョ ;"
"chi <> チ ;"
"ch } $vowel > チ | '~y' ;"
"e <> エ ;"
"g | '~' < ギ} $small_y ;"
"gy } $vowel > ギ | '~y' ;"
"ga <> ガ ;"
"gi <> ギ ;"
"gu <> グ ;"
"ge <> ゲ ;"
"go <> ゴ ;"
"i <> イ ;"
// j } $vowel > ジ | '~y' ;
"ja <> ジャ ;"
"ji'~i' < ジィ ;" // liu
"ju <> ジュ ;"
"je <> ジェ ;"
"jo <> ジョ ;"
"ji <> ジ ;"
"k | '~' < キ} $small_y ;"
"ky } $vowel > キ | '~y' ;"
"ka <> カ ;"
"ki <> キ ;"
"ku <> ク ;"
"ke <> ケ ;"
"ko <> コ ;"
"m | '~' < ミ} $small_y ;"
"my } $vowel > ミ | '~y' ;"
"ma <> マ ;"
"mi <> ミ ;"
"mu <> ム ;"
"me <> メ ;"
"mo <> モ ;"
"m } [pbfv] > ン ;"
"n | '~' < ニ } $small_y ;"
"ny } $vowel > ニ | '~y' ;"
"na <> ナ ;"
"ni <> ニ ;"
"nu <> ヌ ;"
"ne <> ネ ;"
"no <> ;"
"o <> オ ;"
"p | '~' < ピ } $small_y ;"
"py } $vowel > ピ | '~y' ;"
"pa <> パ ;"
"pi <> ピ ;"
"pu <> プ ;"
"pe <> ペ ;"
"po <> ポ ;"
"h | '~' < ヒ } $small_y ;"
"hy } $vowel > ヒ | '~y' ;"
"ha <> ハ ;"
"hi <> ヒ ;"
"hu <> ヘゥ ;"
"he <> ヘ ;"
"ho <> ホ ;"
// f | '~' < フ } $small_y ;
// f } $vowel > フ | '~' ;
"fa <> ファ ;"
"fi <> フィ ;"
"fe <> フェ ;"
"fo <> フォ ;"
"fu <> フ ;"
"r | '~' < リ } $small_y ;"
"ry } $vowel > リ | '~y' ;"
"ra <> ラ ;"
"ri <> リ ;"
"ru <> ル ;"
"re <> レ ;"
"ro <> ロ ;"
"za <> ザ ;"
"zi <> ゼィ ;"
"zu <> ズ ;"
"ze <> ゼ ;"
"zo <> ゾ ;"
"sa <> サ ;"
"si <> セィ ;"
"su <> ス ;"
"se <> セ ;"
"so <> ソ ;"
"sha < シャ ;"
"shi'~i' < シィ ;" // liu
"shu < シュ ;"
"she < シェ ;"
"sho < ショ ;"
"shi <> シ ;"
"sh } $vowel > シ | '~y' ;"
"ta <> タ ;"
"ti <> ティ ;"
"tu <> テゥ ;"
"te <> テ ;"
"to <> ト ;"
"tsu <> ツ ;"
// v } $vowel > ヴ | '~' ;
//'v~a' < ヴァ ; # liu
//'v~i' < ヴィ ; # liu
//'v~e' < ヴェ ; # liu
//'v~o' < ヴォ ; # liu
"vu <> ヴ ;"
"u <> ウ ;"
// w } $vowel > ウ | '~' ;
"wa <> ワ ;"
"wi <> ヰ ;"
"wu > ウ ;"
"we <> ヱ ;"
"wo <> ヲ ;"
"ya <> ヤ ;"
"yi > イ ;"
"yu <> ユ ;"
"ye > エ ;"
"yo <> ヨ ;"
// double consonants
//specials
"s } sh > ッ ;"
"t } ch > ッ ;"
//voiced
"j } j <> ッ } $j_start ;"
"b } b <> ッ } [$h_start$f_start] $voice;"
"d } d <> ッ } $t_start $voice;"
"g } g <> ッ } $k_start $voice;"
"p } p <> ッ } [$h_start$f_start] $semivoice;"
// v } v <> ッ } [ワヰウヱヲう] $voice ;
"z } z <> ッ } $s_start $voice;"
"v } v <> ッ } $v_start;"
// normal
"k } k <> ッ } $k_start ;"
"m } m <> ッ } $m_start ;"
"n } n <> ッ } $n_start ;"
"h } h <> ッ } $h_start ;"
"f } f <> ッ } $f_start ;"
"r } r <> ッ } $r_start ;"
"t } t <> ッ } $t_start ;"
"s } s <> ッ } $s_start ;"
"w } w <> ッ } $w_start;"
"y } y <> ッ } $y_start;"
// completeness
"x } x > ッ ;"
"c } k > ッ ;"
"c } c > ッ ;"
"c } q > ッ ;"
"l } l > ッ ;"
"q } q > ッ ;"
// y } y > ッ ;
// w } w > ッ ;
// prolonged vowel mark. this indicates a doubling of
// the preceding vowel sound
//a < a { ー ; # liu
//e < e { ー ; # liu
//i < i { ー ; # liu
//o < o { ー ; # liu
//u < u { ー ; # liu
"$macron <> ー ;"
// small forms
"'~a' <> ァ ;"
"'~i' <> ィ ;"
"'~u' <> ゥ ;"
"'~e' <> ェ ;"
"'~o' <> ォ ;"
"'~tsu' <> ッ ;"
"'~wa' <> ヮ ;"
"'~ya' <> ャ ;"
"'~yi' > ィ ;"
"'~yu' <> ュ ;"
"'~ye' > ェ ;"
"'~yo' <> ョ ;"
// iteration marks
// TODO: make more accurate
"j $1 < sh (y* $vowel) {ヽ$voice ;"
"dj $1 < ch (y* $vowel) {ヽ$voice ;"
"dz $1 < ts (y* $vowel) {ヽ$voice ;"
"g $1 < k (y* $vowel) {ヽ$voice ;"
"z $1 < s (y* $vowel) {ヽ$voice ;"
"d $1 < t (y* $vowel) {ヽ$voice ;"
"h $1 < b (y* $vowel) {ヽ$voice ;"
"v $1 < w (y* $vowel) {ヽ$voice ;"
"sh $1 < sh (y* $vowel) {ヽ$voice ;"
"j $1 < j (y* $vowel) {ヽ$voice ;"
"ch $1 < ch (y* $vowel) {ヽ$voice ;"
"dj $1 < dj(y* $vowel) {ヽ$voice ;"
"ts $1 < ts (y* $vowel) {ヽ$voice ;"
"dz $1 < dz (y* $vowel) {ヽ$voice ;"
"$1 < ($consonant y* $vowel) {ヽ$voice? ;"
"$1 < (.) {ヽ $voice? ;" // otherwise repeat last character
"< ヽ $voice? ;" // delete if no characters found
// h- rule: lengthens vowel if not followed by a vowel
"[aeiou] } h > ー ;"
// one-way latin- > kana rules. these do not occur in
// well-formed romaji representing actual japanese text.
// their purpose is to make all romaji map to kana of
// some sort.
// the following are not really necessary, but produce
// slightly more natural results.
"cy > セィ ;"
"dy > ディ ;"
"hy > ヒ ;"
"sy > セィ ;"
"ty > ティ ;"
"zy > ゼィ ;"
"h > ヘ ;"
// isolated consonants listed here so as not to mask
// longer rules above.
"ch > チ;"
"sh > シ ;"
"dz > ヅ ;"
"dj > ヂ;"
"b > ブ ;"
"d > デ ;"
"g > グ ;"
"k > ク ;"
"m > ム ;"
"n'' < ン } $n_quoter ;"
"n <> ン ;"
"p > プ ;"
"r > ル ;"
"s > ス ;"
"t > テ ;"
"y > イ ;"
"z > ズ ;"
"v > ヴ ;"
"f > フ;"
"j > ジ;"
"w > ウ;"
"ß > | ss ;"
"æ > | e ;"
"ð > | d ;"
"ø > | u ;"
"þ > | th ;"
// simple substitutions using backup
"c > | k ;"
"l > | r ;"
"q > | k ;"
"x > | ks ;"
// ~~~ END shared rules ~~~
//------------------------------------------------------
// Final cleanup
"'~' > ;" // delete stray tildes between letters
"[:Katakana:] { '' } [:Latin:] > ;" // delete stray quotes between letters
// [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
":: NFC (NFD) ;"
":: ([:Katakana:] halfwidth-fullwidth);"
// note: a global filter is more efficient, but MUST include all source chars!!
//:: ([\\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);
// MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
":: ( [[\\\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;"
// eof
}
}

View file

@ -1,56 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Latin_NumericPinyin.txt
// Date: Fri May 28 17:07:31 2004
//--------------------------------------------------------------------
// Latin_NumericPinyin
t_Latn_NPinyn {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// According to the pinyin definitions I've been able to find:
// 'a', 'e' are the preferred bases
// otherwise 'o'
// otherwise last vowel
// The trailing form of syllables are the following:
// "a", "ai", "ao", "an", "ang",
// "o", "ou", "ong",
// "e", "ei", "er", "en", "eng",
// "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
// "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
// "ü", "üe", "üan", "ün"
// so the letters the tone will 'hop' are:
"::NFD (NFC);"
"$tone = [\u0304\u0301\u030C\u0300\u0306] ;"
// Move the tone to the end of a syllable, and convert to number
"e {($tone) r} > r &tone-digit($1);"
"($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1);"
"($tone) > &tone-digit($1);"
// The following backs up until it finds the right vowel, then deposits the tone
"$vowel = [aAeEiIoOuUüÜ];"
"$consonant = [[a-z A-Z] - [$vowel]];"
"$digit = [1-5];"
"$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);"
"$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);"
"$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit);"
"&digit-tone($1) < [:letter:] {($digit)};"
"::NFC (NFD);"
}
}

View file

@ -1,101 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Malayalam_InterIndic.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Malayalam_InterIndic
t_Mlym_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Malayalam-InterIndic
//:: NFD (NFC) ;
"\u0D02>\uE002;" // SIGN ANUSVARA
"\u0D03>\uE003;" // SIGN VISARGA
"\u0D05>\uE005;" // LETTER A
"\u0D06>\uE006;" // LETTER AA
"\u0D07>\uE007;" // LETTER I
"\u0D08>\uE008;" // LETTER II
"\u0D09>\uE009;" // LETTER U
"\u0D0A>\uE00A;" // LETTER UU
"\u0D0B>\uE00B;" // LETTER VOCALIC R
"\u0D0C>\uE00C;" // LETTER VOCALIC L
"\u0D0E>\uE00E;" // LETTER E
"\u0D0F>\uE00F;" // LETTER EE
"\u0D10>\uE010;" // LETTER AI
"\u0D12>\uE012;" // LETTER O
"\u0D13>\uE013;" // LETTER OO
"\u0D14>\uE014;" // LETTER AU
"\u0D15>\uE015;" // LETTER KA
"\u0D16>\uE016;" // LETTER KHA
"\u0D17>\uE017;" // LETTER GA
"\u0D18>\uE018;" // LETTER GHA
"\u0D19>\uE019;" // LETTER NGA
"\u0D1A>\uE01A;" // LETTER CA
"\u0D1B>\uE01B;" // LETTER CHA
"\u0D1C>\uE01C;" // LETTER JA
"\u0D1D>\uE01D;" // LETTER JHA
"\u0D1E>\uE01E;" // LETTER NYA
"\u0D1F>\uE01F;" // LETTER TTA
"\u0D20>\uE020;" // LETTER TTHA
"\u0D21>\uE021;" // LETTER DDA
"\u0D22>\uE022;" // LETTER DDHA
"\u0D23>\uE023;" // LETTER NNA
"\u0D24>\uE024;" // LETTER TA
"\u0D25>\uE025;" // LETTER THA
"\u0D26>\uE026;" // LETTER DA
"\u0D27>\uE027;" // LETTER DHA
"\u0D28>\uE028;" // LETTER NA
"\u0D2A>\uE02A;" // LETTER PA
"\u0D2B>\uE02B;" // LETTER PHA
"\u0D2C>\uE02C;" // LETTER BA
"\u0D2D>\uE02D;" // LETTER BHA
"\u0D2E>\uE02E;" // LETTER MA
"\u0D2F>\uE02F;" // LETTER YA
"\u0D30>\uE030;" // LETTER RA
"\u0D31>\uE031;" // LETTER RRA
"\u0D32>\uE032;" // LETTER LA
"\u0D33>\uE033;" // LETTER LLA
"\u0D34>\uE034;" // LETTER LLLA
"\u0D35>\uE035;" // LETTER VA
"\u0D36>\uE036;" // LETTER SHA
"\u0D37>\uE037;" // LETTER SSA
"\u0D38>\uE038;" // LETTER SA
"\u0D39>\uE039;" // LETTER HA
"\u0D3E>\uE03E;" // VOWEL SIGN AA
"\u0D3F>\uE03F;" // VOWEL SIGN I
"\u0D40>\uE040;" // VOWEL SIGN II
"\u0D41>\uE041;" // VOWEL SIGN U
"\u0D42>\uE042;" // VOWEL SIGN UU
"\u0D43>\uE043;" // VOWEL SIGN VOCALIC R
"\u0D46>\uE046;" // VOWEL SIGN E
"\u0D47>\uE047;" // VOWEL SIGN EE
"\u0D48>\uE048;" // VOWEL SIGN AI
"\u0D4D>\uE04D;" // SIGN VIRAMA
"\u0D57>\uE057;" // AU LENGTH MARK
"\u0D60>\uE060;" // LETTER VOCALIC RR
"\u0D61>\uE061;" // LETTER VOCALIC LL
"\u0D66>\uE066;" // DIGIT ZERO
"\u0D67>\uE067;" // DIGIT ONE
"\u0D68>\uE068;" // DIGIT TWO
"\u0D69>\uE069;" // DIGIT THREE
"\u0D6A>\uE06A;" // DIGIT FOUR
"\u0D6B>\uE06B;" // DIGIT FIVE
"\u0D6C>\uE06C;" // DIGIT SIX
"\u0D6D>\uE06D;" // DIGIT SEVEN
"\u0D6E>\uE06E;" // DIGIT EIGHT
"\u0D6F>\uE06F;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,111 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Oriya_InterIndic.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Oriya_InterIndic
t_Orya_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Oriya-InterIndic
//:: NFD (NFC) ;
//\u0B21\u0B3C>\uE05C;# LETTER RRA
//\u0B22\u0B3C>\uE05D;# LETTER RHA
"\u0B47\u0B56>\uE048;"// VOWEL SIGN AI
"\u0B47\u0B3E>\uE04B;"// VOWEL SIGN O
"\u0B47\u0B57>\uE04C;"// VOWEL SIGN AU
"\u0B01>\uE001;" // SIGN CANDRABINDU
"\u0B02>\uE002;" // SIGN ANUSVARA
"\u0B03>\uE003;" // SIGN VISARGA
"\u0B05>\uE005;" // LETTER A
"\u0B06>\uE006;" // LETTER AA
"\u0B07>\uE007;" // LETTER I
"\u0B08>\uE008;" // LETTER II
"\u0B09>\uE009;" // LETTER U
"\u0B0A>\uE00A;" // LETTER UU
"\u0B0B>\uE00B;" // LETTER VOCALIC R
"\u0B0C>\uE00C;" // LETTER VOCALIC L
"\u0B0F>\uE00F;" // LETTER E
"\u0B10>\uE010;" // LETTER AI
"\u0B13>\uE013;" // LETTER O
"\u0B14>\uE014;" // LETTER AU
"\u0B15>\uE015;" // LETTER KA
"\u0B16>\uE016;" // LETTER KHA
"\u0B17>\uE017;" // LETTER GA
"\u0B18>\uE018;" // LETTER GHA
"\u0B19>\uE019;" // LETTER NGA
"\u0B1A>\uE01A;" // LETTER CA
"\u0B1B>\uE01B;" // LETTER CHA
"\u0B1C>\uE01C;" // LETTER JA
"\u0B1D>\uE01D;" // LETTER JHA
"\u0B1E>\uE01E;" // LETTER NYA
"\u0B1F>\uE01F;" // LETTER TTA
"\u0B20>\uE020;" // LETTER TTHA
"\u0B21>\uE021;" // LETTER DDA
"\u0B22>\uE022;" // LETTER DDHA
"\u0B23>\uE023;" // LETTER NNA
"\u0B24>\uE024;" // LETTER TA
"\u0B25>\uE025;" // LETTER THA
"\u0B26>\uE026;" // LETTER DA
"\u0B27>\uE027;" // LETTER DHA
"\u0B28>\uE028;" // LETTER NA
"\u0B2A>\uE02A;" // LETTER PA
"\u0B2B>\uE02B;" // LETTER PHA
"\u0B2C>\uE02C;" // LETTER BA
"\u0B2D>\uE02D;" // LETTER BHA
"\u0B2E>\uE02E;" // LETTER MA
"\u0B2F>\uE02F;" // LETTER YA
"\u0B30>\uE030;" // LETTER RA
"\u0B32>\uE032;" // LETTER LA
"\u0B33>\uE033;" // LETTER LLA
"\u0B35>\uE035;" // LETTER VA
"\u0B36>\uE036;" // LETTER SHA
"\u0B37>\uE037;" // LETTER SSA
"\u0B38>\uE038;" // LETTER SA
"\u0B39>\uE039;" // LETTER HA
"\u0B3C>\uE03C;" // SIGN NUKTA
"\u0B3D>\uE03D;" // SIGN AVAGRAHA
"\u0B3E>\uE03E;" // VOWEL SIGN AA
"\u0B3F>\uE03F;" // VOWEL SIGN I
"\u0B40>\uE040;" // VOWEL SIGN II
"\u0B41>\uE041;" // VOWEL SIGN U
"\u0B42>\uE042;" // VOWEL SIGN UU
"\u0B43>\uE043;" // VOWEL SIGN VOCALIC R
"\u0B47>\uE047;" // VOWEL SIGN E
//
"\u0B4D>\uE04D;" // SIGN VIRAMA
"\u0B56>\uE056;" // AI LENGTH MARK
"\u0B57>\uE057;" // AU LENGTH MARK
"\u0964>\ue064;" // DANDA
"\u0965>\ue065;" // DOUBLE DANDA
//
"\u0B5F>\uE05F;" // LETTER YYA
"\u0B60>\uE060;" // LETTER VOCALIC RR
"\u0B61>\uE061;" // LETTER VOCALIC LL
"\u0B66>\uE066;" // DIGIT ZERO
"\u0B67>\uE067;" // DIGIT ONE
"\u0B68>\uE068;" // DIGIT TWO
"\u0B69>\uE069;" // DIGIT THREE
"\u0B6A>\uE06A;" // DIGIT FOUR
"\u0B6B>\uE06B;" // DIGIT FIVE
"\u0B6C>\uE06C;" // DIGIT SIX
"\u0B6D>\uE06D;" // DIGIT SEVEN
"\u0B6E>\uE06E;" // DIGIT EIGHT
"\u0B6F>\uE06F;" // DIGIT NINE
"\u0B70>\ue07B;" // ISSHAR
"\u0B71>\ue081;" // LETTER WA
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,92 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Tamil_InterIndic.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Tamil_InterIndic
t_Taml_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Tamil-InterIndic
//:: NFD (NFC) ;
"\u0BC6\u0BBE>\uE04A;"// VOWEL SIGN O
"\u0BC7\u0BBE>\uE04B;"// VOWEL SIGN OO
"\u0BC6\u0BD7>\uE04C;"// VOWEL SIGN AU
"\u0B92\u0BD7>\uE014;"// LETTER AU
"\u0B82>\uE002;" // SIGN ANUSVARA
"\u0B83>\uE003;" // SIGN VISARGA
"\u0B85>\uE005;" // LETTER A
"\u0B86>\uE006;" // LETTER AA
"\u0B87>\uE007;" // LETTER I
"\u0B88>\uE008;" // LETTER II
"\u0B89>\uE009;" // LETTER U
"\u0B8A>\uE00A;" // LETTER UU
"\u0B8E>\uE00E;" // LETTER E
"\u0B8F>\uE00F;" // LETTER EE
"\u0B90>\uE010;" // LETTER AI
"\u0B92>\uE012;" // LETTER O
"\u0B93>\uE013;" // LETTER OO
"\u0B94>\uE014;" // LETTER AU
"\u0B95>\uE015;" // LETTER KA
"\u0B99>\uE019;" // LETTER NGA
"\u0B9A>\uE01A;" // LETTER CA
"\u0B9C>\uE01C;" // LETTER JA
"\u0B9E>\uE01E;" // LETTER NYA
"\u0B9F>\uE01F;" // LETTER TTA
"\u0BA3>\uE023;" // LETTER NNA
"\u0BA4>\uE024;" // LETTER TA
"\u0BA8>\uE028;" // LETTER NA
"\u0BA9>\uE029;" // LETTER NNNA
"\u0BAA>\uE02A;" // LETTER PA
"\u0BAE>\uE02E;" // LETTER MA
"\u0BAF>\uE02F;" // LETTER YA
"\u0BB0>\uE030;" // LETTER RA
"\u0BB1>\uE031;" // LETTER RRA
"\u0BB2>\uE032;" // LETTER LA
"\u0BB3>\uE033;" // LETTER LLA
"\u0BB4>\uE034;" // LETTER LLLA
"\u0BB5>\uE035;" // LETTER VA
"\u0BB7>\uE037;" // LETTER SSA
"\u0BB8>\uE038;" // LETTER SA
"\u0BB9>\uE039;" // LETTER HA
"\u0BBE>\uE03E;" // VOWEL SIGN AA
"\u0BBF>\uE03F;" // VOWEL SIGN I
"\u0BC0>\uE040;" // VOWEL SIGN II
"\u0BC1>\uE041;" // VOWEL SIGN U
"\u0BC2>\uE042;" // VOWEL SIGN UU
"\u0BC6>\uE046;" // VOWEL SIGN E
"\u0BC7>\uE047;" // VOWEL SIGN EE
"\u0BC8>\uE048;" // VOWEL SIGN AI
"\u0BCD>\uE04D;" // SIGN VIRAMA
"\u0BD7>\uE057;" // AU LENGTH MARK
"\u0BE7>\uE067;" // DIGIT ONE
"\u0BE8>\uE068;" // DIGIT TWO
"\u0BE9>\uE069;" // DIGIT THREE
"\u0BEA>\uE06A;" // DIGIT FOUR
"\u0BEB>\uE06B;" // DIGIT FIVE
"\u0BEC>\uE06C;" // DIGIT SIX
"\u0BED>\uE06D;" // DIGIT SEVEN
"\u0BEE>\uE06E;" // DIGIT EIGHT
"\u0BEF>\uE06F;" // DIGIT NINE
"\u0BF0>\uE067\uE066;" // UNMAPPED Tamil-InterIndic: NUMBER TEN
"\u0BF1>\uE067\uE066\uE066;" // UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
"\u0BF2>\uE067\uE066\uE066\uE066;"// UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
"0>\ue066;"
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,106 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: dumpICUrules.bat
// Source: ../../../impl/data/Transliterator_Telugu_InterIndic.txt
// Date: Tue May 18 17:24:49 2004
//--------------------------------------------------------------------
// Telugu_InterIndic
t_Telu_InterIndic {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Telugu-InterIndic
//:: NFD (NFC) ;
"\u0c46\u0c4d\u0c56>\ue048\ue04d;"
"\u0C46\u0C56>\uE048;"// VOWEL SIGN AI
"\u0C01>\uE001;" // SIGN CANDRABINDU
"\u0C02>\uE002;" // SIGN ANUSVARA
"\u0C03>\uE003;" // SIGN VISARGA
"\u0C05>\uE005;" // LETTER A
"\u0C06>\uE006;" // LETTER AA
"\u0C07>\uE007;" // LETTER I
"\u0C08>\uE008;" // LETTER II
"\u0C09>\uE009;" // LETTER U
"\u0C0A>\uE00A;" // LETTER UU
"\u0C0B>\uE00B;" // LETTER VOCALIC R
"\u0C0C>\uE00C;" // LETTER VOCALIC L
"\u0C0E>\uE00E;" // LETTER E
"\u0C0F>\uE00F;" // LETTER EE
"\u0C10>\uE010;" // LETTER AI
"\u0C12>\uE012;" // LETTER O
"\u0C13>\uE013;" // LETTER OO
"\u0C14>\uE014;" // LETTER AU
"\u0C15>\uE015;" // LETTER KA
"\u0C16>\uE016;" // LETTER KHA
"\u0C17>\uE017;" // LETTER GA
"\u0C18>\uE018;" // LETTER GHA
"\u0C19>\uE019;" // LETTER NGA
"\u0C1A>\uE01A;" // LETTER CA
"\u0C1B>\uE01B;" // LETTER CHA
"\u0C1C>\uE01C;" // LETTER JA
"\u0C1D>\uE01D;" // LETTER JHA
"\u0C1E>\uE01E;" // LETTER NYA
"\u0C1F>\uE01F;" // LETTER TTA
"\u0C20>\uE020;" // LETTER TTHA
"\u0C21>\uE021;" // LETTER DDA
"\u0C22>\uE022;" // LETTER DDHA
"\u0C23>\uE023;" // LETTER NNA
"\u0C24>\uE024;" // LETTER TA
"\u0C25>\uE025;" // LETTER THA
"\u0C26>\uE026;" // LETTER DA
"\u0C27>\uE027;" // LETTER DHA
"\u0C28>\uE028;" // LETTER NA
"\u0C2A>\uE02A;" // LETTER PA
"\u0C2B>\uE02B;" // LETTER PHA
"\u0C2C>\uE02C;" // LETTER BA
"\u0C2D>\uE02D;" // LETTER BHA
"\u0C2E>\uE02E;" // LETTER MA
"\u0C2F>\uE02F;" // LETTER YA
"\u0C30>\uE030;" // LETTER RA
"\u0C31>\uE031;" // LETTER RRA
"\u0C32>\uE032;" // LETTER LA
"\u0C33>\uE033;" // LETTER LLA
"\u0C35>\uE035;" // LETTER VA
"\u0C36>\uE036;" // LETTER SHA
"\u0C37>\uE037;" // LETTER SSA
"\u0C38>\uE038;" // LETTER SA
"\u0C39>\uE039;" // LETTER HA
"\u0C3E>\uE03E;" // VOWEL SIGN AA
"\u0C3F>\uE03F;" // VOWEL SIGN I
"\u0C40>\uE040;" // VOWEL SIGN II
"\u0C41>\uE041;" // VOWEL SIGN U
"\u0C42>\uE042;" // VOWEL SIGN UU
"\u0C43>\uE043;" // VOWEL SIGN VOCALIC R
"\u0C44>\uE044;" // VOWEL SIGN VOCALIC RR
"\u0C46>\uE046;" // VOWEL SIGN E
"\u0C47>\uE047;" // VOWEL SIGN EE
"\u0C4A>\uE04A;" // VOWEL SIGN O
"\u0C4B>\uE04B;" // VOWEL SIGN OO
"\u0C4C>\uE04C;" // VOWEL SIGN AU
"\u0C4D>\uE04D;" // SIGN VIRAMA
"\u0C55>\uE055;" // LENGTH MARK
"\u0C56>\uE056;" // AI LENGTH MARK
"\u0C60>\uE060;" // LETTER VOCALIC RR
"\u0C61>\uE061;" // LETTER VOCALIC LL
"\u0C66>\uE066;" // DIGIT ZERO
"\u0C67>\uE067;" // DIGIT ONE
"\u0C68>\uE068;" // DIGIT TWO
"\u0C69>\uE069;" // DIGIT THREE
"\u0C6A>\uE06A;" // DIGIT FOUR
"\u0C6B>\uE06B;" // DIGIT FIVE
"\u0C6C>\uE06C;" // DIGIT SIX
"\u0C6D>\uE06D;" // DIGIT SEVEN
"\u0C6E>\uE06E;" // DIGIT EIGHT
"\u0C6F>\uE06F;" // DIGIT NINE
// :: NFC (NFD) ;
// eof
}
}

View file

@ -1,26 +0,0 @@
 // -*- Coding: utf-8; -*-
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
// Source: \icu4j\src\com\ibm\icu\impl\data/Transliterator_Tone_Digit.txt
// Date: Fri May 28 17:07:31 2004
//--------------------------------------------------------------------
// Tone_Digit
t_Tone_Digit {
Rule {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// Only intended for internal use
"\u0304 <> 1;"
"\u0301 <> 2;"
"\u030C <> 3;"
"\u0300 <> 4;"
"< 5;"
}
}

View file

@ -1,275 +0,0 @@
//--------------------------------------------------------------------
// Copyright (c) 1999-2004, International Business Machines
// Corporation and others. All Rights Reserved.
//--------------------------------------------------------------------
// THIS IS A MACHINE-GENERATED FILE
// Tool: \icu4j\src\com\ibm\icu\dev\tool\translit\dumpICURules.bat
// Source: Transliterator_index.txt
// Date: Fri May 28 17:07:30 2004
//--------------------------------------------------------------------
//--------------------------------------------------------------------
// N.B.: This file has been generated mechanically from the
// corresponding ICU4J file, which is the master file that receives
// primary updates. The colon-delimited fields have been split into
// separate strings. For 'file' and 'internal' lines, the encoding
// field has been deleted, since the encoding is processed at build
// time in ICU4C. Certain large rule sets not intended for general
// use have been commented out with the notation "Java only".
//--------------------------------------------------------------------
translit_index {
RuleBasedTransliteratorIDs {
//--------------------------------------------------------------------
//--------------------------------------------------------------------
//
// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
// system transliterators. It allows arbitrary mappings between
// transliterator IDs and file names, and also allows the system to
// define aliases for transliterators, so that "Latin-Hangul", for
// example, can be implemented transparently as the compound
// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
// are invisible to the user, but can be composed together by the
// system to create visible transliterators.
//
// Blank lines and lines beginning with '#' are ignored.
//
// Lines in this file have one of the following forms (text not
// enclosed by <> is literal):
//
// <id>:file:<resource>:<encoding>:<direction>
// <id>:internal:<resource>:<encoding>:<direction>
// <id>:alias:<getInstanceArg>
//
// <id> is the ID of the system transliterator being defined. These
// are public IDs enumerated by Transliterator.getAvailableIDs(),
// unless the second field is "internal".
//
// <resource> is a ResourceReader resource name. Currently these refer
// to file names under com/ibm/text/resources. This string is passed
// directly to ResourceReader, together with <encoding>.
//
// <encoding> is the character encoding to use when reading <resource>;
// passed directly to ResourceReader. E.g., "UTF8".
//
// <direction> is either "FORWARD" or "REVERSE".
//
// <getInstanceArg> is a string to be passed directly to
// Transliterator.getInstance(). The returned Transliterator object
// then has its ID changed to <id> and is returned.
// Bidirectional rule files
{ "Fullwidth-Halfwidth", "file", "t_FWidth_HWidth", "FORWARD" },
{ "Halfwidth-Fullwidth", "file", "t_FWidth_HWidth", "REVERSE" },
{ "Latin-Cyrillic", "file", "t_Cyrl_Latn", "REVERSE" },
{ "Cyrillic-Latin", "file", "t_Cyrl_Latn", "FORWARD" },
{ "Latin-Hebrew", "file", "t_Hebr_Latn", "REVERSE" },
{ "Hebrew-Latin", "file", "t_Hebr_Latn", "FORWARD" },
{ "Latin-Arabic", "file", "t_Arab_Latn", "REVERSE" },
{ "Arabic-Latin", "file", "t_Arab_Latn", "FORWARD" },
{ "Tone-Digit", "internal", "t_Tone_Digit", "FORWARD" },
{ "Digit-Tone", "internal", "t_Tone_Digit", "REVERSE" },
{ "Latin-NumericPinyin", "file", "t_Latn_NPinyn", "FORWARD" },
{ "NumericPinyin-Latin", "file", "t_Latn_NPinyn", "REVERSE" },
{ "Han-Spacedhan", "internal", "t_Hani_SpHan", "FORWARD" },
{ "Spacedhan-Han", "alias", "null", "" },
{ "Han-Latin", "file", "t_Hani_Latn", "FORWARD" },
//Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip!
{ "Latin-Han", "alias", "null", "" },
// Comment these out; they are only for testing
// Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE
// Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD
//Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE
//Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD
{ "Latin-Greek", "file", "t_Grek_Latn", "REVERSE" },
{ "Greek-Latin", "file", "t_Grek_Latn", "FORWARD" },
{ "Latin-Greek/UNGEGN", "file", "t_Grek_Latn_UNGEGN", "REVERSE" },
{ "Greek-Latin/UNGEGN", "file", "t_Grek_Latn_UNGEGN", "FORWARD" },
{ "Latin-Katakana", "file", "t_Latn_Kana", "FORWARD" },
{ "Katakana-Latin", "file", "t_Latn_Kana", "REVERSE" },
{ "Latin-Hiragana", "file", "t_Hira_Latn", "REVERSE" },
{ "Hiragana-Latin", "file", "t_Hira_Latn", "FORWARD" },
//Thai Stuff: will change if we get \b into Transliterator
// Java only: { "Thai-ThaiSemi", "internal", "-", "FORWARD" },
// Java only: { "Thai-ThaiLogical", "internal", "-", "FORWARD" },
// Java only: { "ThaiLogical-Thai", "internal", "-", "REVERSE" },
// Java only: { "ThaiLogical-Latin", "internal", "-", "FORWARD" },
// Java only: { "Latin-ThaiLogical", "internal", "-", "REVERSE" },
// Must use the order below!
// We need two separate passes because of the Thai vowel reversal
// Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces
{ "Thai-Latin", "alias", "[[", "thai", "] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC", "" },
{ "Latin-Thai", "alias", "[[", "Latin", "][", "Mn", "][", "Me", "] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC", "" },
// end of Thai Stuff
{ "Hiragana-Katakana", "file", "t_Hira_Kana", "FORWARD" },
{ "Katakana-Hiragana", "file", "t_Hira_Kana", "REVERSE" },
{ "Any-Accents", "file", "t_Any_Accents", "FORWARD" },
{ "Accents-Any", "file", "t_Any_Accents", "REVERSE" },
{ "Any-Publishing", "file", "t_Any_Publishing", "FORWARD" },
{ "Publishing-Any", "file", "t_Any_Publishing", "REVERSE" },
// Korean
// N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For
// Hangul output use Latin-Hangul.
{ "LowerLatin-Jamo", "internal", "t_Latn_Jamo", "FORWARD" },
{ "Jamo-LowerLatin", "internal", "t_Latn_Jamo", "REVERSE" },
{ "Latin-Jamo", "alias", "['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo", "" },
{ "Jamo-Latin", "alias", "['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC", "" },
{ "Latin-Hangul", "alias", "['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC", "" },
{ "Hangul-Latin", "alias", "['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC", "" },
// Inter-Indic composed rules
{ "Latin-InterIndic", "internal", "t_Latn_InterIndic", "FORWARD" },
{ "Devanagari-InterIndic", "internal", "t_Deva_InterIndic", "FORWARD" },
{ "Bengali-InterIndic", "internal", "t_Beng_InterIndic", "FORWARD" },
{ "Gurmukhi-InterIndic", "internal", "t_Guru_InterIndic", "FORWARD" },
{ "Gujarati-InterIndic", "internal", "t_Gujr_InterIndic", "FORWARD" },
{ "Oriya-InterIndic", "internal", "t_Orya_InterIndic", "FORWARD" },
{ "Tamil-InterIndic", "internal", "t_Taml_InterIndic", "FORWARD" },
{ "Telugu-InterIndic", "internal", "t_Telu_InterIndic", "FORWARD" },
{ "Kannada-InterIndic", "internal", "t_Knda_InterIndic", "FORWARD" },
{ "Malayalam-InterIndic", "internal", "t_Mlym_InterIndic", "FORWARD" },
{ "InterIndic-Latin", "internal", "t_InterIndic_Latn", "FORWARD" },
{ "InterIndic-Devanagari", "internal", "t_InterIndic_Deva", "FORWARD" },
{ "InterIndic-Bengali", "internal", "t_InterIndic_Beng", "FORWARD" },
{ "InterIndic-Gurmukhi", "internal", "t_InterIndic_Guru", "FORWARD" },
{ "InterIndic-Gujarati", "internal", "t_InterIndic_Gujr", "FORWARD" },
{ "InterIndic-Oriya", "internal", "t_InterIndic_Orya", "FORWARD" },
{ "InterIndic-Tamil", "internal", "t_InterIndic_Taml", "FORWARD" },
{ "InterIndic-Telugu", "internal", "t_InterIndic_Telu", "FORWARD" },
{ "InterIndic-Kannada", "internal", "t_InterIndic_Knda", "FORWARD" },
{ "InterIndic-Malayalam", "internal", "t_InterIndic_Mlym", "FORWARD" },
//Latin-Indic transliterators
{ "Latin-Devanagari", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Latin-Bengali", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Latin-Gurmukhi", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Latin-Gujarati", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Latin-Oriya", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Latin-Tamil", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Latin-Telugu", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Latin-Kannada", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Latin-Malayalam", "alias", "['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
//Indic-Latin transliterators
{ "Devanagari-Latin", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
{ "Bengali-Latin", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC", "" },
{ "Gurmukhi-Latin", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC", "" },
{ "Gujarati-Latin", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC", "" },
{ "Oriya-Latin", "alias", "[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC", "" },
{ "Tamil-Latin", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC", "" },
{ "Telugu-Latin", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC", "" },
{ "Kannada-Latin", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC", "" },
{ "Malayalam-Latin", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC", "" },
{ "Devanagari-Bengali", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Devanagari-Gurmukhi", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Devanagari-Gujarati", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Devanagari-Oriya", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Devanagari-Tamil", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Devanagari-Telugu", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Devanagari-Kannada", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Devanagari-Malayalam", "alias", "[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Bengali-Devanagari", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Bengali-Gurmukhi", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Bengali-Gujarati", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Bengali-Oriya", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Bengali-Tamil", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Bengali-Telugu", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Bengali-Kannada", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Bengali-Malayalam", "alias", "[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Gurmukhi-Devanagari", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Gurmukhi-Bengali", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Gurmukhi-Gujarati", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Gurmukhi-Oriya", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Gurmukhi-Tamil", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Gurmukhi-Telugu", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Gurmukhi-Kannada", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Gurmukhi-Malayalam", "alias", "[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Gujarati-Devanagari", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Gujarati-Bengali", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Gujarati-Gurmukhi", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Gujarati-Oriya", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Gujarati-Tamil", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Gujarati-Telugu", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Gujarati-Kannada", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Gujarati-Malayalam", "alias", "[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Oriya-Devanagari", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Oriya-Bengali", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Oriya-Gurmukhi", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Oriya-Gujarati", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Oriya-Tamil", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Oriya-Telugu", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Oriya-Kannada", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Oriya-Malayalam", "alias", "[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Tamil-Devanagari", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Tamil-Bengali", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Tamil-Gurmukhi", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Tamil-Gujarati", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Tamil-Oriya", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Tamil-Telugu", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Tamil-Kannada", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Tamil-Malayalam", "alias", "[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Telugu-Devanagari", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Telugu-Bengali", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Telugu-Gurmukhi", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Telugu-Gujarati", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Telugu-Oriya", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Telugu-Tamil", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Telugu-Kannada", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC", "" },
{ "Telugu-Malayalam", "alias", "[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Kannada-Devanagari", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Kannada-Bengali", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Kannada-Gurmukhi", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Kannada-Gujarati", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Kannada-Oriya", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Kannada-Tamil", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Kannada-Telugu", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Kannada-Malayalam", "alias", "[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC", "" },
{ "Malayalam-Devanagari", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC", "" },
{ "Malayalam-Bengali", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC", "" },
{ "Malayalam-Gurmukhi", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC", "" },
{ "Malayalam-Gujarati", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC", "" },
{ "Malayalam-Oriya", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC", "" },
{ "Malayalam-Tamil", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC", "" },
{ "Malayalam-Telugu", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC", "" },
{ "Malayalam-Kannada", "alias", "[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC", "" },
// eof
}
}

View file

@ -19,44 +19,8 @@
# * To REPLACE the default list and only build with a few
# transliterators:
# _____________________________________________________
# | TRANLIST_SOURCE = translit_index.txt translit_Any_Publishing.txt
# | TRANLIST_SOURCE = el.txt th.txt
#
#
TRANSLIT_SOURCE=t_Any_Accents.txt\
t_Any_Publishing.txt\
t_Arab_Latn.txt\
t_Beng_InterIndic.txt\
t_Cyrl_Latn.txt\
t_Deva_InterIndic.txt\
t_FWidth_HWidth.txt\
t_Grek_Latn.txt\
t_Grek_Latn_UNGEGN.txt\
t_Gujr_InterIndic.txt\
t_Guru_InterIndic.txt\
t_Hani_Latn.txt\
t_Hebr_Latn.txt\
t_Hira_Kana.txt\
t_Hira_Latn.txt\
t_InterIndic_Beng.txt\
t_InterIndic_Deva.txt\
t_InterIndic_Gujr.txt\
t_InterIndic_Guru.txt\
t_InterIndic_Knda.txt\
t_InterIndic_Latn.txt\
t_InterIndic_Mlym.txt\
t_InterIndic_Orya.txt\
t_InterIndic_Taml.txt\
t_InterIndic_Telu.txt\
t_Knda_InterIndic.txt\
t_Latn_InterIndic.txt\
t_Latn_Jamo.txt\
t_Latn_Kana.txt\
t_Mlym_InterIndic.txt\
t_Orya_InterIndic.txt\
t_Taml_InterIndic.txt\
t_Telu_InterIndic.txt\
t_Latn_NPinyn.txt\
t_Tone_Digit.txt\
t_Hani_SpHan.txt\
translit_index.txt
TRANSLIT_SOURCE=el.txt en.txt

View file

@ -17,6 +17,9 @@
#include "unicode/translit.h"
#include "unicode/utypes.h"
#include "unicode/parseerr.h"
#include "unicode/udata.h"
#define U_ICUDATA_TRANSLIT U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "translit"
U_NAMESPACE_BEGIN

View file

@ -725,7 +725,7 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& id,
UnicodeString& result) {
UErrorCode status = U_ZERO_ERROR;
ResourceBundle bundle(u_getDataDirectory(), inLocale, status);
ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status);
// Suspend checking status until later...
@ -1411,13 +1411,23 @@ UBool Transliterator::initializeRegistry() {
}
/* The following code parses the index table located in
* icu/data/translit_index.txt. The index is an n x 4 table
* icu/data/translit/root.txt. The index is an n x 4 table
* that follows this format:
*
* <id>:file:<resource>:<direction>
* <id>:internal:<resource>:<direction>
* <id>:alias:<getInstanceArg>:
*
* <id>{
* file{
* resource{"<resource>"}
* direction{"<direction>"}
* }
* }
* <id>{
* internal{
* resource{"<resource>"}
* direction{"<direction"}
* }
* }
* <id>{
* alias{"<getInstanceArg"}
* }
* <id> is the ID of the system transliterator being defined. These
* are public IDs enumerated by Transliterator.getAvailableIDs(),
* unless the second field is "internal".
@ -1434,10 +1444,10 @@ UBool Transliterator::initializeRegistry() {
*
* The extra blank field on "alias" lines is to make the array square.
*/
static const char translit_index[] = "translit_index";
//static const char translit_index[] = "translit_index";
UResourceBundle *bundle, *transIDs, *colBund;
bundle = ures_openDirect(0, translit_index, &status);
bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open root bundle*/, &status);
transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
int32_t row, maxRows;
@ -1445,11 +1455,11 @@ UBool Transliterator::initializeRegistry() {
maxRows = ures_getSize(transIDs);
for (row = 0; row < maxRows; row++) {
colBund = ures_getByIndex(transIDs, row, 0, &status);
if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
if (U_SUCCESS(status)) {
UnicodeString id = ures_getKey(colBund);
UResourceBundle* res = ures_getNextResource(colBund, NULL, &status);
const char* typeStr = ures_getKey(res);
UChar type = (UChar)*typeStr;
if (U_SUCCESS(status)) {
switch (type) {
@ -1458,9 +1468,11 @@ UBool Transliterator::initializeRegistry() {
// 'file' or 'internal';
// row[2]=resource, row[3]=direction
{
UnicodeString resString = ures_getUnicodeStringByKey(res, "resource", &status);
UBool visible = (type == 0x0066 /*f*/);
UTransDirection dir =
(ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
(ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) ==
0x0046 /*F*/) ?
UTRANS_FORWARD : UTRANS_REVERSE;
registry->put(id, resString, dir, visible);
@ -1468,12 +1480,13 @@ UBool Transliterator::initializeRegistry() {
break;
case 0x61: // 'a'
// 'alias'; row[2]=createInstance argument
UnicodeString resString = ures_getUnicodeString(res, &status);
registry->put(id, resString, TRUE);
break;
}
}
ures_close(res);
}
ures_close(colBund);
}
}

View file

@ -202,7 +202,7 @@ Spec::Spec(const UnicodeString& theSpec) : top(theSpec) {
UErrorCode status = U_ZERO_ERROR;
CharString topch(top);
Locale toploc(topch);
res = new ResourceBundle(u_getDataDirectory(), toploc, status);
res = new ResourceBundle(U_ICUDATA_TRANSLIT, toploc, status);
/* test for NULL */
if (res == 0) {
return;
@ -1208,10 +1208,10 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID
// 2-d array at static init time, as a locale language. We're
// just using the locale mechanism to map through to a file
// name; this in no way represents an actual locale.
CharString ch(entry->stringArg);
UResourceBundle *bundle = ures_openDirect(0, ch, &status);
UnicodeString rules = ures_getUnicodeStringByKey(bundle, RB_RULE, &status);
ures_close(bundle);
//CharString ch(entry->stringArg);
//UResourceBundle *bundle = ures_openDirect(0, ch, &status);
UnicodeString rules = entry->stringArg;
//ures_close(bundle);
if (U_FAILURE(status)) {
// We have a failure of some kind. Remove the ID from the

View file

@ -31,6 +31,7 @@
#include "cstring.h"
#include "umutex.h"
#include "uassert.h"
#include "cmemory.h"
#ifdef XP_MAC_CONSOLE
#include <console.h>
@ -1491,6 +1492,16 @@ UBool IntlTest::assertEquals(const UnicodeString& message,
const char* actual) {
return assertEquals(extractToAssertBuf(message), expected, actual);
}
//--------------------------------------------------------------------
// Time bomb - allows temporary behavior that expires at a given
// release
//--------------------------------------------------------------------
UBool IntlTest::isICUVersionAtLeast(const UVersionInfo x) {
UVersionInfo v;
u_getVersion(v);
return (uprv_memcmp(v, x, U_MAX_VERSION_LENGTH) >= 0);
}
#if !UCONFIG_NO_FORMATTING
UBool IntlTest::assertEquals(const UnicodeString& message,

View file

@ -131,6 +131,12 @@ public:
*/
static float random();
/**
* Ascertain the version of ICU. Useful for
* time bomb testing
*/
UBool isICUVersionAtLeast(const UVersionInfo x);
protected:
/* JUnit-like assertions. Each returns TRUE if it succeeds. */
UBool assertTrue(const char* message, UBool condition, UBool quiet=FALSE);

View file

@ -345,16 +345,6 @@
RelativePath=".\tsputil.h">
</File>
</Filter>
<Filter
Name="conversion"
Filter="">
<File
RelativePath=".\convtest.cpp">
</File>
<File
RelativePath=".\convtest.h">
</File>
</Filter>
<Filter
Name="data &amp; memory"
Filter="">
@ -816,6 +806,16 @@
RelativePath=".\trnserr.h">
</File>
</Filter>
<Filter
Name="conversion"
Filter="">
<File
RelativePath=".\convtest.cpp">
</File>
<File
RelativePath=".\convtest.h">
</File>
</Filter>
</Files>
<Globals>
</Globals>

View file

@ -26,6 +26,9 @@
#include "unicode/rep.h"
#include "unicode/locid.h"
#include "unicode/uniset.h"
static const UVersionInfo ICU_31 = {3,1,0,0};
int32_t getInt(UnicodeString str)
{
char buffer[20];
@ -84,6 +87,9 @@ void TransliteratorAPITest::TestgetID() {
for (i=0; i<Transliterator::countAvailableIDs(); i++){
status = U_ZERO_ERROR;
ID = (UnicodeString) Transliterator::getAvailableID(i);
if(ID.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
continue;
}
t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
if(t == 0){
errln("FAIL: " + ID);

View file

@ -37,6 +37,7 @@
#include "unesctrn.h"
#include "uni2name.h"
#include "cstring.h"
#include "cmemory.h"
#include <stdio.h>
/***********************************************************************
@ -186,6 +187,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
}
}
static const UVersionInfo ICU_31 = {3,1,0,0};
/**
* Make sure every system transliterator can be instantiated.
*
@ -220,6 +222,9 @@ void TransliteratorTest::TestInstantiation() {
i + ") != getAvailableIDs().snext()");
continue;
}
if(id2.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
continue;
}
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
Transliterator* t = Transliterator::createInstance(id,
@ -3472,7 +3477,10 @@ void TransliteratorTest::TestIncrementalProgress(void) {
Transliterator::getAvailableVariant(k, source, target, variant);
UnicodeString id = source + "-" + target + "/" + variant;
if(id.indexOf("Thai")>-1 && isICUVersionAtLeast(ICU_31)){
continue;
}
Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
if (U_FAILURE(status)) {
errln((UnicodeString)"FAIL: Could not create " + id);