ICU-4162 delete the extra sources.. these files are now loaded from icu

X-SVN-Rev: 16764
This commit is contained in:
Ram Viswanadha 2004-11-05 01:44:03 +00:00
parent d0c241160c
commit 84578841ec
40 changed files with 0 additions and 8041 deletions

View file

@ -1,290 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
:: NFD (NFC) ;
# to do: make reversible
# define special conversion characters.
# varients of this could use different characters, or set one or the other to null.
$pre = \< ;
$post = \> ;
# Provide keyboard equivalents for common diacritics used in transliteration
$pre \` $post <> \u0300 ; # COMBINING GRAVE ACCENT
$pre \' $post <> \u0301 ; # COMBINING ACUTE ACCENT
$pre \^ $post <> \u0302 ; # COMBINING CIRCUMFLEX ACCENT
$pre \~ $post <> \u0303 ; # COMBINING TILDE
$pre \- $post <> \u0304 ; # COMBINING MACRON
$pre \" $post <> \u0308 ; # COMBINING DIAERESIS
$pre \* $post <> \u030A ; # COMBINING RING ABOVE
$pre \, $post <> \u0327 ; # COMBINING CEDILLA
$pre '/' $post <> \u0338 ; # COMBINING LONG SOLIDUS OVERLAY
$pre \. $post <> \u0323 ; # COMBINING DOT BELOW
# Combine common characters
$pre AE $post <> \u00C6 ; # LATIN CAPITAL LETTER AE
$pre ae $post <> \u00E6 ; # LATIN SMALL LETTER AE
$pre D $post <> \u00D0 ; # LATIN CAPITAL LETTER ETH
$pre d $post <> \u00F0 ; # LATIN SMALL LETTER ETH
$pre O'/' $post <> \u00D8 ; # LATIN CAPITAL LETTER O WITH STROKE
$pre o'/' $post <> \u00F8 ; # LATIN SMALL LETTER O WITH STROKE
$pre TH $post <> \u00DE ; # LATIN CAPITAL LETTER THORN
$pre th $post <> \u00FE ; # LATIN SMALL LETTER THORN
$pre OE $post <> \u0152 ; # LATIN CAPITAL LIGATURE OE
$pre oe $post <> \u0153 ; # LATIN SMALL LIGATURE OE
$pre ss $post <> \u00DF ; # LATIN SMALL LETTER SHARP S
$pre NG $post <> \u014A ; # LATIN CAPITAL LETTER ENG
$pre ng $post <> \u014B ; # LATIN SMALL LETTER ENG
$pre T $post <> \u0398 ; # THETA
$pre t $post <> \u03B8 ; # THETA
$pre SH $post <> \u01A9 ; # LATIN CAPITAL LETTER ESH
$pre sh $post <> \u0283 ; # LATIN SMALL LETTER ESH
$pre ZH $post <> \u01B7 ; # LATIN CAPITAL LETTER EZH
$pre zh $post <> \u0292 ; # LATIN SMALL LETTER EZH
$pre U $post <> \u01B1 ; # LATIN CAPITAL LETTER UPSILON
$pre u $post <> \u028A ; # LATIN SMALL LETTER UPSILON
$pre A $post <> \u018F ; # LATIN CAPITAL LETTER SCHWA
$pre a $post <> \u0259 ; # LATIN SMALL LETTER SCHWA
$pre O $post <> \u0186 ; # LATIN CAPITAL LETTER OPEN O
$pre o $post <> \u0254 ; # LATIN SMALL LETTER OPEN O
$pre E $post <> \u0190 ; # LATIN CAPITAL LETTER OPEN E
$pre e $post <> \u025B ; # LATIN SMALL LETTER OPEN E
# three that don't have uppercases
$pre '?' $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
$pre i $post <> \u026A ; # LATIN LETTER SMALL CAPITAL I
$pre v $post <> \u028C ; # LATIN SMALL LETTER TURNED V
# Additional Characters that may be added in the future
# $pre XXX $post <> \u0306 ; # COMBINING BREVE
# $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
# $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
# $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
# $pre XXX $post <> \u030C ; # COMBINING CARON
# $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
# $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
# $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
# $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
# $pre XXX $post <> \u031B ; # COMBINING HORN
# $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
# $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
# $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
# $pre XXX $post <> \u0328 ; # COMBINING OGONEK
# $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
# $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
# $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
# $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
# $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
# $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
# $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
# $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
# $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
# $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
# $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
# $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
# $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
# $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
# $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
# $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
# $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
# $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
# $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
# $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
# $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
# $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
# $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
# $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
# $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
# $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
# $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
# $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
# $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
# $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
# $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
# $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
# $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
# $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
# $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
# $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
# $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
# $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
# $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
# $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
# $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
# $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
# $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
# $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
# $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
# $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
# $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
# $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
# $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
# $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
# $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
# $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
# $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
# $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
# $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
# $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
# $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
# $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
# $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
# $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
# $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
# $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
# $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
# $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
# $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
# $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
# $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
# $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
# $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
# $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
# $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
# $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
# $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
# $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
# $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
# $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
# $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
# $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
# $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
# $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
# $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
# $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
# $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
# $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
# $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
# $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
# $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
# $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
# $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
# $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
# $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
# $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
# $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
# $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
# $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
# $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
# $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
# $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
# $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
# $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
# $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
# $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
# $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
# $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
# $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
# $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
# $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
# $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
# $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
# $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
# $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
# $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
# $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
# $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
# $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
# $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
# $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
# $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
# $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
# $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
# $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
# $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
# $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
# $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
# $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
# $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
# $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
# $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
# $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
# $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
# $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
# $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
# $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
# $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
# $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
# $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
# $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
# $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
# $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
# $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
# $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
# $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
# $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
# $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
# $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
# $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
# $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
# $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
# $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
# $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
# $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
# $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
# $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
# $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
# $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
# $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
# $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
# $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
# $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
# $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
# $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
# $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
# $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
# $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
# $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
# $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
# $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
# $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
# $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
# $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
# $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
# $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
# $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
# $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
# $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
# $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
# $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
# $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
# $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
# $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
# $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
# $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
# $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
# $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
# $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
# $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
# $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
# $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
# $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
# $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
# $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
# $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
# $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
# $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
# $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
# $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
# $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
# $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
# $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
# $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
# $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
# $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
# $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
:: NFC (NFD) ;

View file

@ -1,34 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Test case
# "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
# Variables
$single = \' ;
$space = ' ' ;
$double = \" ;
$back = \` ;
$tab = '\u0008' ;
$makeRight = [[:Z:][:Ps:][:Pi:]$] ;
# fix UNIX quotes
$back $back > “ ;
$back > ;
# fix typewriter quotes, by context
$makeRight {$double} <> “ ;
$double <> ” ;
$makeRight {$single} <> ;
$single <> ;
# fix multiple spaces and hyphens
$space {$space} > ;
'--' <> — ;

View file

@ -1,146 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Generally follows UNGEGN <http://www.eki.ee/wgrs/rom1_ar.pdf>
# Occasionally deviates in the direction of ISO 233 <http://homepage.mac.com/sirbinks/pdf/Arabic.pdf>
# a) where required for disambiguation.
# b) with underdot instead of cedilla for letter like SAD, since
# those are explicitly in Unicode for transliteration.
# c) with extra non-Arabic-language letters, like PEH
# Does *not* do assimilation of "al", nor hyphenation.
# While it could be done, we need to determine whether a prefix "al" could
# occur other than as the definite article (since no space is used).
:: [[:Arabic:] [‎ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;
:: NFKD (NFC);
$disambig = ̱ ;
$disambig2 = ̰ ;
$under = ̣ ;
$notAbove = [[:^ccc=0:]&[:^ccc=230:]];
# non-letters
٫ <> '.' $disambig ; # ARABIC DECIMAL SEPARATOR
٬ <> ',' $disambig ; # ARABIC THOUSANDS SEPARATOR
# ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate
، <> ',' ; # ARABIC COMMA
؛ <> ';' ; # ARABIC SEMICOLON
؟ <> '?' ; # ARABIC QUESTION MARK
٪ <> '%' ; # ARABIC PERCENT SIGN
۰ <> 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
۱ <> 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
۲ <> 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
۳ <> 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
۴ <> 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
۵ <> 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
۶ <> 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
۷ <> 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
۸ <> 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
۹ <> 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
٠ <> 0 ; # ARABIC-INDIC DIGIT ZERO
١ <> 1 ; # ARABIC-INDIC DIGIT ONE
٢ <> 2 ; # ARABIC-INDIC DIGIT TWO
٣ <> 3 ; # ARABIC-INDIC DIGIT THREE
٤ <> 4 ; # ARABIC-INDIC DIGIT FOUR
٥ <> 5 ; # ARABIC-INDIC DIGIT FIVE
٦ <> 6 ; # ARABIC-INDIC DIGIT SIX
٧ <> 7 ; # ARABIC-INDIC DIGIT SEVEN
٨ <> 8 ; # ARABIC-INDIC DIGIT EIGHT
٩ <> 9 ; # ARABIC-INDIC DIGIT NINE
# letters
# long vowels
َا<> ā ; # ARABIC FATHA, ARABIC LETTER ALEF
ُو <> ū ; # ARABIC DAMMA, ARABIC LETTER WAW
ِي <> ī ; # ARABIC KASRA, ARABIC LETTER YEH
# longer items moved here to prevent masking
ث <> t h $disambig ; # ARABIC LETTER THEH
ذ <> d h $disambig ; # ARABIC LETTER THAL
ش <> s h $disambig ; # ARABIC LETTER SHEEN
ص <> s $under ; # ARABIC LETTER SAD
ض <> d $under ; # ARABIC LETTER DAD
ط <> t $under ; # ARABIC LETTER TAH
ظ <> z $under ; # ARABIC LETTER ZAH
غ <> g h $disambig ; # ARABIC LETTER GHAIN
# WARNING: special case
# <t, umlaut, half-ring below> will be canonically ordered as <t, half-ring below, umlaut>
# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
# ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
ة <> t \u0308 ; # ARABIC LETTER TEH MARBUTA
ة | $1 < t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
# non-Arabic language
ژ <> z h $disambig ; # ARABIC LETTER JEH
ڭ <> n $disambig g ; # ARABIC LETTER NG
ۋ <> v $disambig ; # ARABIC LETTER VE
ی <> y $disambig2 ; # ARABIC LETTER FARSI YEH
# Arabic language
ء <> ʾ ; # ARABIC LETTER HAMZA
ا <> a $under; # ARABIC LETTER ALEF
ب <> b ; # ARABIC LETTER BEH
ت <> t ; # ARABIC LETTER TEH
ج <> j ; # ARABIC LETTER JEEM
ح <> h $under ; # ARABIC LETTER HAH
خ <> k h $disambig ; # ARABIC LETTER KHAH
د <> d ; # ARABIC LETTER DAL
ر <> r ; # ARABIC LETTER REH
ز <> z ; # ARABIC LETTER ZAIN
س <> s ; # ARABIC LETTER SEEN
ع <> ʿ ; # ARABIC LETTER AIN
ـ > ; # ARABIC TATWEEL
ف <> f ; # ARABIC LETTER FEH
ق <> q ; # ARABIC LETTER QAF
ك <> k ; # ARABIC LETTER KAF
ل <> l ; # ARABIC LETTER LAM
م <> m ; # ARABIC LETTER MEEM
ن <> n ; # ARABIC LETTER NOON
ه <> h ; # ARABIC LETTER HEH
و <> w ; # ARABIC LETTER WAW
ى <> y $disambig ; # ARABIC LETTER ALEF MAKSURA
ي <> y ; # ARABIC LETTER YEH
ً <> aⁿ ; # ARABIC FATHATAN
ٌ <> uⁿ ; # ARABIC DAMMATAN
ٍ <> iⁿ ; # ARABIC KASRATAN
َ <> a ; # ARABIC FATHA
ُ <> u ; # ARABIC DAMMA
ِ <> i ; # ARABIC KASRA
ّ <> ̃ ; # ARABIC SHADDA
ْ <> ̊ ; # ARABIC SUKUN
# special combining marks
ٓ <> ̂ ; # ARABIC MADDAH ABOVE
ٔ <> ̉ ; # ARABIC HAMZA ABOVE
ٕ <> ̹ ; # ARABIC HAMZA BELOW
# Some non-Arabic language (not in UNGEGN)
پ <> p ; # ARABIC LETTER PEH
چ <> c h $disambig ; # ARABIC LETTER TCHEH
ڤ <> v ; # ARABIC LETTER VEH
# ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
# ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
گ <> g ; # ARABIC LETTER GAF
# fallbacks
| s < c } [eiy];
| k < c ;
| i < e ;
| u < o ;
| ks < x ;
| n < ‎ⁿ;
:: (lower) ;
::NFC (NFD);
:: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );

View file

@ -1,103 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Bengali-InterIndic
\u09C7\u09BE>\uE04B; # VOWEL SIGN O
\u09C7\u09D7>\uE04C; # VOWEL SIGN AU
\u0981>\uE001; # SIGN CANDRABINDU
\u0982>\uE002; # SIGN ANUSVARA
\u0983>\uE003; # SIGN VISARGA
\u0985>\uE005; # LETTER A
\u0986>\uE006; # LETTER AA
\u0987>\uE007; # LETTER I
\u0988>\uE008; # LETTER II
\u0989>\uE009; # LETTER U
\u098A>\uE00A; # LETTER UU
\u098B>\uE00B; # LETTER VOCALIC R
\u098C>\uE00C; # LETTER VOCALIC L
\u098F>\uE00F; # LETTER E
\u0990>\uE010; # LETTER AI
\u0993>\uE013; # LETTER O
\u0994>\uE014; # LETTER AU
\u0995>\uE015; # LETTER KA
\u0996>\uE016; # LETTER KHA
\u0997>\uE017; # LETTER GA
\u0998>\uE018; # LETTER GHA
\u0999>\uE019; # LETTER NGA
\u099A>\uE01A; # LETTER CA
\u099B>\uE01B; # LETTER CHA
\u099C>\uE01C; # LETTER JA
\u099D>\uE01D; # LETTER JHA
\u099E>\uE01E; # LETTER NYA
\u099F>\uE01F; # LETTER TTA
\u09A0>\uE020; # LETTER TTHA
\u09A1>\uE021; # LETTER DDA
\u09A2>\uE022; # LETTER DDHA
\u09A3>\uE023; # LETTER NNA
\u09A4>\uE024; # LETTER TA
\u09A5>\uE025; # LETTER THA
\u09A6>\uE026; # LETTER DA
\u09A7>\uE027; # LETTER DHA
\u09A8>\uE028; # LETTER NA
\u09AA>\uE02A; # LETTER PA
\u09AB>\uE02B; # LETTER PHA
\u09AC>\uE02C; # LETTER BA
\u09AD>\uE02D; # LETTER BHA
\u09AE>\uE02E; # LETTER MA
\u09AF>\uE02F; # LETTER YA
\u09B0>\uE030; # LETTER RA
\u09B2>\uE032; # LETTER LA
\u09B6>\uE036; # LETTER SHA
\u09B7>\uE037; # LETTER SSA
\u09B8>\uE038; # LETTER SA
\u09B9>\uE039; # LETTER HA
\u09BC>\uE03C; # SIGN NUKTA
\u09BD>\uE03D; # SIGN AVAGRAHA
\u09BE>\uE03E; # VOWEL SIGN AA
\u09BF>\uE03F; # VOWEL SIGN I
\u09C0>\uE040; # VOWEL SIGN II
\u09C1>\uE041; # VOWEL SIGN U
\u09C2>\uE042; # VOWEL SIGN UU
\u09C3>\uE043; # VOWEL SIGN VOCALIC R
\u09C4>\uE044; # VOWEL SIGN VOCALIC RR
\u09C7>\uE047; # VOWEL SIGN E
\u09C8>\uE048; # VOWEL SIGN AI
\u09CB>\uE04B;
\u09CC>\uE04C;
#
\u09CD>\uE04D; # SIGN VIRAMA
\u09D7>\uE057; # AU LENGTH MARK
#
\u09E0>\uE060; # LETTER VOCALIC RR
\u09E1>\uE061; # LETTER VOCALIC LL
\u09E2>\uE062; # VOWEL SIGN VOCALIC L
\u09E3>\uE063; # VOWEL SIGN VOCALIC LL
\u09E6>\uE066; # DIGIT ZERO
\u09E7>\uE067; # DIGIT ONE
\u09E8>\uE068; # DIGIT TWO
\u09E9>\uE069; # DIGIT THREE
\u09EA>\uE06A; # DIGIT FOUR
\u09EB>\uE06B; # DIGIT FIVE
\u09EC>\uE06C; # DIGIT SIX
\u09ED>\uE06D; # DIGIT SEVEN
\u09EE>\uE06E; # DIGIT EIGHT
\u09EF>\uE06F; # DIGIT NINE
\u09F0>\ue071; # Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
\u09F1>\ue072; # Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
\u09F2>\ue073; # Bengali-InterIndic: RUPEE MARK
\u09F3>\ue074; # Bengali-InterIndic: RUPEE SIGN
\u09F4>\ue075; # Bengali-InterIndic: CURRENCY NUMERATOR ONE
\u09F5>\ue076; # Bengali-InterIndic: CURRENCY NUMERATOR TWO
\u09F6>\ue077; # Bengali-InterIndic: CURRENCY NUMERATOR THREE
\u09F7>\ue078; # Bengali-InterIndic: CURRENCY NUMERATOR FOUR
\u09F8>\ue079; # Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\u09F9>\ue07A; # Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
\u09FA>\ue07B; # ISSHAR
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
# :: NFC (NFD) ;
# eof

View file

@ -1,306 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# TODO: add remaining characters
# Should add variants for Russian-English, Russian-German
# Those can use this as a base, and then remap cases
# like a $hat to ya or ja.
# :: [\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
### WARNING, \u0308 must be added to the generated filters, in both directions ###
# MINIMAL FILTER
:: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;
:: NFD (NFC) ;
$modprime = \u02B9;
$modprime2 = \u02BA;
$grave = \u0300;
$acute = \u0301;
$hat = \u0302;
$breve = \u0306 ;
$dot = \u0307 ;
$caron = \u030C ;
$comma = \u0326 ;
$under = \u0331 ;
# move up so not masked
я <> a $hat ; # CYRILLIC SMALL LETTER YA
Я <> A $hat ; # CYRILLIC CAPITAL LETTER YA
ч <> c $caron ; # CYRILLIC SMALL LETTER CHE
Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE
# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
э <> e $acute; # CYRILLIC SMALL LETTER E
Э <> E $acute; # CYRILLIC CAPITAL LETTER E
є <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE
Є <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
ш <> s $caron ; # CYRILLIC SMALL LETTER SHA
Ш <> S $caron ; # CYRILLIC CAPITAL LETTER SHA
щ <> s $hat ; # CYRILLIC SMALL LETTER SHCHA
Щ <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA
ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE
Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE
# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
ю <> u $hat ; # CYRILLIC SMALL LETTER YU
Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU
і <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
І <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
ј <> j $caron; # CYRILLIC SMALL LETTER JE
Ј <> J $caron; # CYRILLIC CAPITAL LETTER JE
љ <> l $hat ; # CYRILLIC SMALL LETTER LJE
Љ <> L $hat ; # CYRILLIC CAPITAL LETTER LJE
њ <> n $hat ; # CYRILLIC SMALL LETTER NJE
Њ <> N $hat ; # CYRILLIC CAPITAL LETTER NJE
ћ <> c $acute ; # CYRILLIC SMALL LETTER TSHE
Ћ <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE
џ <> d $hat ; # CYRILLIC SMALL LETTER DZHE
Џ <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE
# Normal order
а <> a ; # CYRILLIC SMALL LETTER A
А <> A ; # CYRILLIC CAPITAL LETTER A
ә <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA
Ә <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA
ӕ <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE
Ӕ <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE
б <> b ; # CYRILLIC SMALL LETTER BE
Б <> B ; # CYRILLIC CAPITAL LETTER BE
в <> v ; # CYRILLIC SMALL LETTER VE
В <> V ; # CYRILLIC CAPITAL LETTER VE
ґ <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN
Ґ <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
ғ <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE
Ғ <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
ҕ <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
Ҕ <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
г <> g ; # CYRILLIC SMALL LETTER GHE
Г <> G ; # CYRILLIC CAPITAL LETTER GHE
д <> d; # CYRILLIC SMALL LETTER DE
Д <> D; # CYRILLIC CAPITAL LETTER DE
ђ <> đ ; # CYRILLIC SMALL LETTER DJE
Ђ <> Đ ; # CYRILLIC CAPITAL LETTER DJE
ҙ <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER
Ҙ <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
е <> e ; # CYRILLIC SMALL LETTER IE
Е <> E; # CYRILLIC CAPITAL LETTER IE
ж <> z $caron; # CYRILLIC SMALL LETTER ZHE
Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE
# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
з <> z ; # CYRILLIC SMALL LETTER ZE
З <> Z; # CYRILLIC CAPITAL LETTER ZE
й <> j ; # CYRILLIC SMALL LETTER I
Й <> J ; # CYRILLIC CAPITAL LETTER I
и <> i ; # CYRILLIC SMALL LETTER I
И <> I ; # CYRILLIC CAPITAL LETTER I
к <> k ; # CYRILLIC SMALL LETTER KA
К <> K; # CYRILLIC CAPITAL LETTER KA
# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
л <> l ; # CYRILLIC SMALL LETTER EL
Л <> L; # CYRILLIC CAPITAL LETTER EL
м <> m ; # CYRILLIC SMALL LETTER EM
М <> M ; # CYRILLIC CAPITAL LETTER EM
н <> n ; # CYRILLIC SMALL LETTER EN
Н <> N; # CYRILLIC CAPITAL LETTER EN
# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
о <> o ; # CYRILLIC SMALL LETTER O
О <> O ; # CYRILLIC CAPITAL LETTER O
# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
п <> p ; # CYRILLIC SMALL LETTER PE
П <> P ; # CYRILLIC CAPITAL LETTER PE
# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
р <> r ; # CYRILLIC SMALL LETTER ER
Р <> R ; # CYRILLIC CAPITAL LETTER ER
# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
с <> s ; # CYRILLIC SMALL LETTER ES
С <> S ; # CYRILLIC CAPITAL LETTER ES
# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
т <> t ; # CYRILLIC SMALL LETTER TE
Т <> T ; # CYRILLIC CAPITAL LETTER TE
# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
у <> u ; # CYRILLIC SMALL LETTER U
У <> U ; # CYRILLIC CAPITAL LETTER U
# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
ф <> f ; # CYRILLIC SMALL LETTER EF
Ф <> F ; # CYRILLIC CAPITAL LETTER EF
х <> h ; # CYRILLIC SMALL LETTER HA
Х <> H; # CYRILLIC CAPITAL LETTER HA
# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
ц <> c ; # CYRILLIC SMALL LETTER TSE
Ц <> C; # CYRILLIC CAPITAL LETTER TSE
# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
# ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
# Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
Ъ <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN
ъ <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN
Ь <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN
ь <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN
ы <> y ; # CYRILLIC SMALL LETTER YERU
Ы <> Y ; # CYRILLIC CAPITAL LETTER YERU
# ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
# Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
# Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
# ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
# Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
# ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
# Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
### ӑ <> XXX ; # CYRILLIC SMALL LETTER A
### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
### ӓ <> XXX ; # CYRILLIC SMALL LETTER A
### Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
### ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
### Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
### ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
### Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
### ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
### Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
### ё <> XXX ; # CYRILLIC SMALL LETTER IE
### Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
### ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
### Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
### ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
### Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
### ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
### Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
### ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
### Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
### ѝ <> XXX ; # CYRILLIC SMALL LETTER I
### Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
### ӣ <> XXX ; # CYRILLIC SMALL LETTER I
### Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
### ӥ <> XXX ; # CYRILLIC SMALL LETTER I
### Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
### ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
### Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
### ӧ <> XXX ; # CYRILLIC SMALL LETTER O
### Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
### ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
### Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
### ќ <> XXX ; # CYRILLIC SMALL LETTER KA
### Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
### ӯ <> XXX ; # CYRILLIC SMALL LETTER U
### Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
### ў <> XXX ; # CYRILLIC SMALL LETTER U
### Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
### ӱ <> XXX ; # CYRILLIC SMALL LETTER U
### Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
### ӳ <> XXX ; # CYRILLIC SMALL LETTER U
### Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
### ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
### Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
### ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
### Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
### ӭ <> XXX ; # CYRILLIC SMALL LETTER E
### Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
### ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
### Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
# Completeness
$ignore = [[:Mark:]''] * ;
| k < q ;
| K < Q ;
| u < w ;
| U < W ;
| KS < X } $ignore [:UppercaseLetter:] ;
| KS < [:UppercaseLetter:] $ignore { X ;
| Ks < X ;
| ks < x ;
:: NFC (NFD) ;
# note: a global filter is more efficient, but MUST include all source chars!!
# :: ([\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]);
# MINIMAL FILTER: Latin-Cyrillic
:: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;

View file

@ -1,117 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Devanagari-InterIndic
# :: NFD;
#Rules for Decomposed characters
\u0901>\uE001; # SIGN CANDRABINDU
\u0902>\uE002; # SIGN ANUSVARA
\u0903>\uE003; # SIGN VISARGA
\u0904>\uE004; # SIGN SHORT A
\u0905>\uE005; # LETTER A
\u0906>\uE006; # LETTER AA
\u0907>\uE007; # LETTER I
\u0908>\uE008; # LETTER II
\u0909>\uE009; # LETTER U
\u090A>\uE00A; # LETTER UU
\u090B>\uE00B; # LETTER VOCALIC R
\u090C>\uE00C; # LETTER VOCALIC L
\u090D>\uE00D; # LETTER CANDRA E (For representing English sounds)
\u090E>\uE00E; # UNMAPPED LETTER SHORT E(For Southern Scripts)
\u090F>\uE00F; # LETTER E
\u0910>\uE010; # LETTER AI
\u0911>\uE011; # LETTER CANDRA O (For representing English sounds)
\u0912>\uE012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
\u0913>\uE013; # LETTER O
\u0914>\uE014; # LETTER AU
\u0915>\uE015; # LETTER KA
\u0916>\uE016; # LETTER KHA
\u0917>\uE017; # LETTER GA
\u0918>\uE018; # LETTER GHA
\u0919>\uE019; # LETTER NGA
\u091A>\uE01A; # LETTER CA
\u091B>\uE01B; # LETTER CHA
\u091C>\uE01C; # LETTER JA
\u091D>\uE01D; # LETTER JHA
\u091E>\uE01E; # LETTER NYA
\u091F>\uE01F; # LETTER TTA
\u0920>\uE020; # LETTER TTHA
\u0921>\uE021; # LETTER DDA
\u0922>\uE022; # LETTER DDHA
\u0923>\uE023; # LETTER NNA
\u0924>\uE024; # LETTER TA
\u0925>\uE025; # LETTER THA
\u0926>\uE026; # LETTER DA
\u0927>\uE027; # LETTER DHA
\u0928>\uE028; # LETTER NA
\u0929>\uE029;
\u092A>\uE02A; # LETTER PA
\u092B>\uE02B; # LETTER PHA
\u092C>\uE02C; # LETTER BA
\u092D>\uE02D; # LETTER BHA
\u092E>\uE02E; # LETTER MA
\u092F>\uE02F; # LETTER YA
\u0930>\uE030; # LETTER RA
\u0931>\uE031;
\u0932>\uE032; # LETTER LA
\u0933>\uE033; # LETTER LLA
\u0934>\uE034;
\u0935>\uE035; # LETTER VA
\u0936>\uE036; # LETTER SHA
\u0937>\uE037; # LETTER SSA
\u0938>\uE038; # LETTER SA
\u0939>\uE039; # LETTER HA
\u093C>\uE03C; # SIGN NUKTA
\u093D>\uE03D; # SIGN AVAGRAHA
\u093E>\uE03E; # VOWEL SIGN AA
\u093F>\uE03F; # VOWEL SIGN I
\u0940>\uE040; # VOWEL SIGN II
\u0941>\uE041; # VOWEL SIGN U
\u0942>\uE042; # VOWEL SIGN UU
\u0943>\uE043; # VOWEL SIGN VOCALIC R
\u0944>\uE044; # VOWEL SIGN VOCALIC RR
\u0945>\uE045; # VOWEL SIGN CANDRA E
\u0946>\uE046; # UNMAPPED VOWEL SIGN SHORT E
\u0947>\uE047; # VOWEL SIGN E
\u0948>\uE048; # VOWEL SIGN AI
\u0949>\uE049; # VOWEL SIGN CANDRA O
\u094A>\uE04A; # UNMAPPED VOWEL SIGN SHORT O
\u094B>\uE04B; # VOWEL SIGN O
\u094C>\uE04C; # VOWEL SIGN AU
\u094D>\uE04D; # SIGN VIRAMA
\u0950>\uE050; # OM
\u0951>\uE051; # UNMAPPED STRESS SIGN UDATTA
\u0952>\uE052; # UNMAPPED STRESS SIGN ANUDATTA
\u0953>\uE053; # UNMAPPED GRAVE ACCENT
\u0954>\uE054; # UNMAPPED ACUTE ACCENT
\u0958>\uE058;
\u0959>\uE059;
\u095A>\uE05a;
\u095B>\uE05b;
\u095C>\uE05c;
\u095D>\uE05d;
\u095E>\uE05e;
\u095F>\uE05f;
\u0960>\uE060; # LETTER VOCALIC RR
\u0961>\uE061; # LETTER VOCALIC LL
\u0962>\uE062; # VOWEL SIGN VOCALIC L
\u0963>\uE063; # VOWEL SIGN VOCALIC LL
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
\u0966>\uE066; # DIGIT ZERO
\u0967>\uE067; # DIGIT ONE
\u0968>\uE068; # DIGIT TWO
\u0969>\uE069; # DIGIT THREE
\u096A>\uE06A; # DIGIT FOUR
\u096B>\uE06B; # DIGIT FIVE
\u096C>\uE06C; # DIGIT SIX
\u096D>\uE06D; # DIGIT SEVEN
\u096E>\uE06E; # DIGIT EIGHT
\u096F>\uE06F; # DIGIT NINE
\u0970>\uE070; # Devanagari-InterIndic: ABBREVIATION SIGN
# :: NFC (NFD) ;

View file

@ -1,271 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Fullwidth-Halfwidth
# Mechanically generated from Unicode Character Database
# IDEOGRAPHIC SPACE then added, and
# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
# multicharacter
ガ<>ガ; # to KATAKANA LETTER GA
ギ<>ギ; # to KATAKANA LETTER GI
グ<>グ; # to KATAKANA LETTER GU
ゲ<>ゲ; # to KATAKANA LETTER GE
ゴ<>ゴ; # to KATAKANA LETTER GO
ザ<>ザ; # to KATAKANA LETTER ZA
ジ<>ジ; # to KATAKANA LETTER ZI
ズ<>ズ; # to KATAKANA LETTER ZU
ゼ<>ゼ; # to KATAKANA LETTER ZE
ゾ<>ゾ; # to KATAKANA LETTER ZO
ダ<>ダ; # to KATAKANA LETTER DA
ヂ<>ヂ; # to KATAKANA LETTER DI
ヅ<>ヅ; # to KATAKANA LETTER DU
デ<>デ; # to KATAKANA LETTER DE
ド<>ド; # to KATAKANA LETTER DO
バ<>バ; # to KATAKANA LETTER BA
パ<>パ; # to KATAKANA LETTER PA
ビ<>ビ; # to KATAKANA LETTER BI
ピ<>ピ; # to KATAKANA LETTER PI
ブ<>ブ; # to KATAKANA LETTER BU
プ<>プ; # to KATAKANA LETTER PU
ベ<>ベ; # to KATAKANA LETTER BE
ペ<>ペ; # to KATAKANA LETTER PE
ボ<>ボ; # to KATAKANA LETTER BO
ポ<>ポ; # to KATAKANA LETTER PO
ヴ<>ヴ; # to KATAKANA LETTER VU
ヷ<>ヷ; # to KATAKANA LETTER VA
ヺ<>ヺ; # to KATAKANA LETTER VO
# single character
<>'!'; # from FULLWIDTH EXCLAMATION MARK
<>'\"'; # from FULLWIDTH QUOTATION MARK
<>'#'; # from FULLWIDTH NUMBER SIGN
<>'$'; # from FULLWIDTH DOLLAR SIGN
<>'%'; # from FULLWIDTH PERCENT SIGN
<>'&'; # from FULLWIDTH AMPERSAND
<>''; # from FULLWIDTH APOSTROPHE
<>'('; # from FULLWIDTH LEFT PARENTHESIS
<>')'; # from FULLWIDTH RIGHT PARENTHESIS
<>'*'; # from FULLWIDTH ASTERISK
<>'+'; # from FULLWIDTH PLUS SIGN
<>','; # from FULLWIDTH COMMA
<>'-'; # from FULLWIDTH HYPHEN-MINUS
<>'.'; # from FULLWIDTH FULL STOP
<>'/'; # from FULLWIDTH SOLIDUS
<>'0'; # from FULLWIDTH DIGIT ZERO
<>'1'; # from FULLWIDTH DIGIT ONE
<>'2'; # from FULLWIDTH DIGIT TWO
<>'3'; # from FULLWIDTH DIGIT THREE
<>'4'; # from FULLWIDTH DIGIT FOUR
<>'5'; # from FULLWIDTH DIGIT FIVE
<>'6'; # from FULLWIDTH DIGIT SIX
<>'7'; # from FULLWIDTH DIGIT SEVEN
<>'8'; # from FULLWIDTH DIGIT EIGHT
<>'9'; # from FULLWIDTH DIGIT NINE
<>':'; # from FULLWIDTH COLON
<>';'; # from FULLWIDTH SEMICOLON
<>'<'; # from FULLWIDTH LESS-THAN SIGN
<>'='; # from FULLWIDTH EQUALS SIGN
<>'>'; # from FULLWIDTH GREATER-THAN SIGN
<>'?'; # from FULLWIDTH QUESTION MARK
<>'@'; # from FULLWIDTH COMMERCIAL AT
<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
_<>'_'; # from FULLWIDTH LOW LINE
<>'`'; # from FULLWIDTH GRAVE ACCENT
<>a; # from FULLWIDTH LATIN SMALL LETTER A
<>b; # from FULLWIDTH LATIN SMALL LETTER B
<>c; # from FULLWIDTH LATIN SMALL LETTER C
<>d; # from FULLWIDTH LATIN SMALL LETTER D
<>e; # from FULLWIDTH LATIN SMALL LETTER E
<>f; # from FULLWIDTH LATIN SMALL LETTER F
<>g; # from FULLWIDTH LATIN SMALL LETTER G
<>h; # from FULLWIDTH LATIN SMALL LETTER H
<>i; # from FULLWIDTH LATIN SMALL LETTER I
<>j; # from FULLWIDTH LATIN SMALL LETTER J
<>k; # from FULLWIDTH LATIN SMALL LETTER K
<>l; # from FULLWIDTH LATIN SMALL LETTER L
<>m; # from FULLWIDTH LATIN SMALL LETTER M
<>n; # from FULLWIDTH LATIN SMALL LETTER N
<>o; # from FULLWIDTH LATIN SMALL LETTER O
<>p; # from FULLWIDTH LATIN SMALL LETTER P
<>q; # from FULLWIDTH LATIN SMALL LETTER Q
<>r; # from FULLWIDTH LATIN SMALL LETTER R
<>s; # from FULLWIDTH LATIN SMALL LETTER S
<>t; # from FULLWIDTH LATIN SMALL LETTER T
<>u; # from FULLWIDTH LATIN SMALL LETTER U
<>v; # from FULLWIDTH LATIN SMALL LETTER V
<>w; # from FULLWIDTH LATIN SMALL LETTER W
<>x; # from FULLWIDTH LATIN SMALL LETTER X
<>y; # from FULLWIDTH LATIN SMALL LETTER Y
<>z; # from FULLWIDTH LATIN SMALL LETTER Z
<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
<>'|'; # from FULLWIDTH VERTICAL LINE
<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
<>'~'; # from FULLWIDTH TILDE
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
<>ノ; # to HALFWIDTH KATAKANA LETTER NO
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
<>; # to HALFWIDTH HANGUL FILLER
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
¢<>'¢'; # from FULLWIDTH CENT SIGN
£<>'£'; # from FULLWIDTH POUND SIGN
¬<>'¬'; # from FULLWIDTH NOT SIGN
 ̄<>'¯'; # from FULLWIDTH MACRON
' '<>' '; # ideographic space (place this after MACRON)
¦<>'¦'; # from FULLWIDTH BROKEN BAR
¥<>'¥'; # from FULLWIDTH YEN SIGN
₩<>₩; # from FULLWIDTH WON SIGN
│<>; # to HALFWIDTH FORMS LIGHT VERTICAL
'←'<>'←'; # to HALFWIDTH LEFTWARDS ARROW
↑<>↑; # to HALFWIDTH UPWARDS ARROW
'→'<>'→'; # to HALFWIDTH RIGHTWARDS ARROW
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
■<>■; # to HALFWIDTH BLACK SQUARE
○<>○; # to HALFWIDTH WHITE CIRCLE
# eof

View file

@ -1,345 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Rules are predicated on running NFD first, and NFC afterwards
# :: [\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
# MINIMAL FILTER GENERATED FOR: Greek-Latin
:: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;
:: NFD (NFC) ;
# TEST CASES
# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
# ᾳ ῃ ῳ ὃ ὄ
# ὠς ὡς ὢς ὣς
# Ὠς Ὡς Ὢς Ὣς
# ὨΣ ὩΣ ὪΣ ὫΣ
# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]];
$glower = [[:greek:] & [:Ll:]];
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$accent = [:M:] ;
# NOTE: restrict to just the Greek & Latin accents that we care about
# TODO: broaden out once interation is fixed
$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
$macron = \u0304 ;
$ddot = \u0308 ;
$ddotmac = [$ddot$macron];
$lcgvowel = [αεηιουω] ;
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
$gvowel = [$lcgvowel $ucgvowel] ;
$lcgvowelC = [$lcgvowel $accent] ;
$evowel = [aeiouyAEIOUY];
$evowel2 = [iuyIUY];
$vowel = [ $evowel $gvowel] ;
$gammaLike = [ΓΚΞΧγκξχϰ] ;
$egammaLike = [GKXCgkxc] ;
$smooth = ̓ ;
$rough = ̔ ;
$iotasub = ͅ ;
$evowel_i = [$evowel-[iI]] ;
$evowel2_i = [uyUY];
$underbar = \u0331;
$afterLetter = [:L:] [[:M:]\']* ;
$beforeLetter = [[:M:]\']* [:L:] ;
$beforeLower = $accent * $lower ;
$notLetter = [^[:L:][:M:]] ;
$under = ̱;
# Fix punctuation
# preserve original
\: <> \: $under ;
\? <> \? $under ;
\; <> \? ;
· <> \: ;
# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
\u0342 <> \u0302 ;
# IOTA: convert iota subscript to iota
# first make previous alpha long!
$accent_minus = [[$accent]-[$iotasub$macron]];
Α } $accent_minus * $iotasub > | Α $macron ;
α } $accent_minus * $iotasub > | α $macron ;
# now convert to uppercase if after uppercase, ow to lowercase
$upper $accent * { $iotasub > I ;
$iotasub > i ;
| $1 $iotasub < ($evowel $macron $accentMinus *) i ;
| $1 $iotasub < ($evowel $macron $accentMinus *) I ;
# BREATHING
# Convert rough breathing to h, and move before letters.
# Make A ` x = > H a x
Α ($macron?) $rough } $beforeLower > H | α $1;
Ε $rough } $beforeLower > H | ε;
Η $rough } $beforeLower > H | η ;
Ι ($ddot?) $rough } $beforeLower > H | ι $1;
Ο $rough } $beforeLower > H | ο ;
Υ $rough } $beforeLower > H | υ ;
Ω ($ddot?) $rough } $beforeLower > H | ω $1;
# Make A x ` = > H a x
Α ($glower $macron?) $rough > H | α $1 ;
Ε ($glower) $rough > H | ε $1 ;
Η ($glower) $rough > H | η $1 ;
Ι ($glower $ddot?) $rough > H | ι $1 ;
Ο ($glower) $rough > H | ο $1 ;
Υ ($glower) $rough > H | υ $1 ;
Ω ($glower $ddot?) $rough > H | ω $1 ;
#Otherwise, make x ` into h x and X ` into H X
($lcgvowel + $ddotmac? ) $rough > h | $1 ;
($gvowel + $ddotmac? ) $rough > H | $1 ;
# Go backwards with H
| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;
| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;
| $1 $rough < h ($evowel $macron? $ddot?) ;
| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;
| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;
# titlecase, have to fix individually
# in the future, we should add &uppercase() to make this easier
| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;
| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;
| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;
| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;
| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;
| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;
| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;
| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;
| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;
| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;
| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;
| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;
| A $1 $rough < H a ($macron? $ddot? ) ;
| E $1 $rough < H e ($macron? $ddot? ) ;
| I $1 $rough < H i ($macron? $ddot? ) ;
| O $1 $rough < H o ($macron? $ddot? ) ;
| U $1 $rough < H u ($macron? $ddot? ) ;
| Y $1 $rough < H y ($macron? $ddot? ) ;
# Now do smooth
#delete smooth breathing for Latin
$smooth > ;
# insert in Greek
# the assumption is that all Marks are on letters.
| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;
| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
# TODO: preserve smooth/rough breathing if not
# on initial vowel sequence
# need to have these up here so the rules don't mask
# remove now superfluous macron when returning
Α < A $macron ;
α < a $macron ;
η <> e $macron ;
Η <> E $macron ;
φ <> ph ;
Ψ } $beforeLower <> Ps ;
Ψ <> PS ;
Φ } $beforeLower <> Ph ;
Φ <> PH ;
ψ <> ps ;
ω <> o $macron ;
Ω <> O $macron;
# NORMAL
α <> a ;
Α <> A ;
β <> b ;
Β <> B ;
γ } $gammaLike <> n } $egammaLike ;
γ <> g ;
Γ } $gammaLike <> N } $egammaLike ;
Γ <> G ;
δ <> d ;
Δ <> D ;
ε <> e ;
Ε <> E ;
ζ <> z ;
Ζ <> Z ;
θ <> th ;
Θ } $beforeLower <> Th ;
Θ <> TH ;
ι <> i ;
Ι <> I ;
κ <> k ;
Κ <> K ;
λ <> l ;
Λ <> L ;
μ <> m ;
Μ <> M ;
ν } $gammaLike > n\' ;
ν <> n ;
Ν } $gammaLike <> N\' ;
Ν <> N ;
ξ <> x ;
Ξ <> X ;
ο <> o ;
Ο <> O ;
π <> p ;
Π <> P ;
ρ $rough <> rh;
Ρ $rough } $beforeLower <> Rh ;
Ρ $rough <> RH ;
ρ <> r ;
Ρ <> R ;
# insert separator before things that turn into s
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
# special S variants
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
# underbar means exception
# before a letter, initial
ς } $beforeLetter <> s $underbar } $beforeLetter;
σ } $beforeLetter <> s } $beforeLetter;
# otherwise, after a letter = final
$afterLetter { σ <> $afterLetter { s $underbar;
$afterLetter { ς <> $afterLetter { s ;
# otherwise (isolated) = initial
ς <> s $underbar;
σ <> s ;
# [Pp] { Σ <> \'S ;
Σ <> S ;
τ <> t ;
Τ <> T ;
$vowel {υ } <> u ;
υ <> y ;
$vowel { Υ <> U ;
Υ <> Y ;
χ <> ch ;
Χ } $beforeLower <> Ch ;
Χ <> CH ;
# Completeness for ASCII
$ignore = [[:Mark:]''] * ;
| k < c ;
| ph < f ;
| i < j ;
| k < q ;
| b < v } $vowel ;
| b < w } $vowel;
| u < v ;
| u < w;
| K < C ;
| Ph < F ;
| I < J ;
| K < Q ;
| B < V } $vowel ;
| B < W } $vowel ;
| U < V ;
| U < W ;
$rough } $ignore [:UppercaseLetter:] > H ;
$ignore [:UppercaseLetter:] { $rough > H ;
$rough < H ;
$rough <> h ;
# Completeness for Greek
ϐ > | β ;
ϑ > | θ ;
ϒ > | Υ ;
ϕ > | φ ;
ϖ > | π ;
ϰ > | κ ;
ϱ > | ρ ;
ϲ > | σ ;
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
ϳ > j ;
ϴ > | Θ ;
ϵ > | ε ;
µ > | μ ;
ͺ > i;
# delete any trailing ' marks used for roundtripping
< [Ππ] { \' } [Ss] ;
< [Νν] { \' } $egammaLike ;
::NFC (NFD) ;
# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
# ([\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ;
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
:: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;

View file

@ -1,252 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# For modern Greek, based on UNGEGN rules.
# Rules are predicated on running NFD first, and NFC afterwards
# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
# WARNING: need to add accents to both filters ###
# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ;
::NFD (NFC) ;
# Useful variables
$lower = [[:latin:][:greek:] & [:Ll:]] ;
$upper = [[:latin:][:greek:] & [:Lu:]] ;
$accent = [[:Mn:][:Me:]] ;
$macron = ̄ ;
$ddot = ̈ ;
$lcgvowel = [αεηιουω] ;
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
$gvowel = [$lcgvowel $ucgvowel] ;
$lcgvowelC = [$lcgvowel $accent] ;
$evowel = [aeiouyAEIOUY];
$vowel = [ $evowel $gvowel] ;
$beforeLower = $accent * $lower ;
$gammaLike = [ΓΚΞΧγκξχϰ] ;
$egammaLike = [GKXCgkxc] ;
$smooth = ̓ ;
$rough = ̔ ;
$iotasub = ͅ ;
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
$under = ̱;
$caron = ̌;
$afterLetter = [:L:] [\'$accent]* ;
$beforeLetter = [\'$accent]* [:L:] ;
# Fix punctuation
# preserve orginal
\: <> \: $under ;
\? <> \? $under ;
\; <> \? ;
· <> \: ;
# Fix any ancient characters that creep in
͂ > ́ ;
̂ > ́ ;
̀ > ́ ;
$smooth > ;
$rough > ;
$iotasub > ;
ͺ > ;
# need to have these up here so the rules don't mask
η <> i $under ;
Η <> I $under ;
Ψ } $beforeLower <> Ps ;
Ψ <> PS ;
ψ <> ps ;
ω <> o $under ;
Ω <> O $under;
# at begining or end of word, convert mp to b
[^[:L:]$accent] { μπ > b ;
μπ } [^[:L:]$accent] > b ;
[^[:L:]$accent] { [Μμ][Ππ] > B ;
[Μμ][Ππ] } [^[:L:]$accent] > B ;
μπ < b ;
Μπ < B } $beforeLower ;
ΜΠ < B ;
# handle diphthongs ending with upsilon
ου <> ou ;
ΟΥ <> OU ;
Ου <> Ou ;
οΥ <> oU ;
$fmaker = [aeiAEI] $under ? ;
$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;
υ $1 < ( $shiftForwardVowels )* v $under ;
$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;
υ $1 < ( $shiftForwardVowels )* f $under ;
$fmaker { Υ } $softener <> V $under ;
$fmaker { Υ <> U $under ;
υ <> y ;
Υ <> Y ;
# NORMAL
α <> a ;
Α <> A ;
β <> v ;
Β <> V ;
γ } $gammaLike <> n } $egammaLike ;
γ <> g ;
Γ } $gammaLike <> N } $egammaLike ;
Γ <> G ;
δ <> d ;
Δ <> D ;
ε <> e ;
Ε <> E ;
ζ <> z ;
Ζ <> Z ;
θ <> th ;
Θ } $beforeLower <> Th ;
Θ <> TH ;
ι <> i ;
Ι <> I ;
κ <> k ;
Κ <> K ;
λ <> l ;
Λ <> L ;
μ <> m ;
Μ <> M ;
ν } $gammaLike > n\' ;
ν <> n ;
Ν } $gammaLike <> N\' ;
Ν <> N ;
ξ <> x ;
Ξ <> X ;
ο <> o ;
Ο <> O ;
π <> p ;
Π <> P ;
ρ <> r ;
Ρ <> R ;
# insert separator before things that turn into s
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
# special S variants
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
# Caron means exception
# before a letter, initial
ς } $beforeLetter <> s $under } $beforeLetter;
σ } $beforeLetter <> s } $beforeLetter;
# otherwise, after a letter = final
$afterLetter { σ <> $afterLetter { s $under;
$afterLetter { ς <> $afterLetter { s ;
# otherwise (isolated) = initial
ς <> s $under;
σ <> s ;
# [Pp] { Σ <> \'S ;
Σ <> S ;
τ <> t ;
Τ <> T ;
φ <> f ;
Φ <> F ;
χ <> ch ;
Χ } $beforeLower <> Ch ;
Χ <> CH ;
# Completeness for ASCII
# $ignore = [[:Mark:]''] * ;
| ch < h ;
| k < c ;
| i < j ;
| k < q ;
| b < u } $vowel ;
| b < w } $vowel ;
| y < u ;
| y < w ;
| Ch < H ;
| K < C ;
| I < J ;
| K < Q ;
| B < W } $vowel ;
| B < U } $vowel ;
| Y < W ;
| Y < U ;
# Completeness for Greek
ϐ > | β ;
ϑ > | θ ;
ϒ > | Υ ;
ϕ > | φ ;
ϖ > | π ;
ϰ > | κ ;
ϱ > | ρ ;
ϲ > | σ ;
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
ϳ > j ;
ϴ > | Θ ;
ϵ > | ε ;
µ > | μ ;
# delete any trailing ' marks used for roundtripping
< [Ππ] { \' } [Ss] ;
< [Νν] { \' } $egammaLike ;
::NFC (NFD) ;
# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;

View file

@ -1,91 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Gujarati-InterIndic
#:: NFD (NFC) ;
\u0a81>\ue001; # SIGN CANDRABINDU
\u0a82>\ue002; # SIGN ANUSVARA
\u0a83>\ue003; # SIGN VISARGA
\u0a85>\ue005; # LETTER A
\u0a86>\ue006; # LETTER AA
\u0a87>\ue007; # LETTER I
\u0a88>\ue008; # LETTER II
\u0a89>\ue009; # LETTER U
\u0a8a>\ue00a; # LETTER UU
\u0a8b>\ue00b; # LETTER VOCALIC R
\u0a8c>\ue00c; # LETTER VOCALLIC L
\u0a8d>\ue00d; # VOWEL CANDRA E
\u0a8f>\ue00f; # LETTER E
\u0a90>\ue010; # LETTER AI
\u0a91>\ue011; # VOWEL CANDRA O
\u0a93>\ue013; # LETTER O
\u0a94>\ue014; # LETTER AU
\u0a95>\ue015; # LETTER KA
\u0a96>\ue016; # LETTER KHA
\u0a97>\ue017; # LETTER GA
\u0a98>\ue018; # LETTER GHA
\u0a99>\ue019; # LETTER NGA
\u0a9a>\ue01a; # LETTER CA
\u0a9b>\ue01b; # LETTER CHA
\u0a9c>\ue01c; # LETTER JA
\u0a9d>\ue01d; # LETTER JHA
\u0a9e>\ue01e; # LETTER NYA
\u0a9f>\ue01f; # LETTER TTA
\u0aa0>\ue020; # LETTER TTHA
\u0aa1>\ue021; # LETTER DDA
\u0aa2>\ue022; # LETTER DDHA
\u0aa3>\ue023; # LETTER NNA
\u0aa4>\ue024; # LETTER TA
\u0aa5>\ue025; # LETTER THA
\u0aa6>\ue026; # LETTER DA
\u0aa7>\ue027; # LETTER DHA
\u0aa8>\ue028; # LETTER NA
\u0aaa>\ue02a; # LETTER PA
\u0aab>\ue02b; # LETTER PHA
\u0aac>\ue02c; # LETTER BA
\u0aad>\ue02d; # LETTER BHA
\u0aae>\ue02e; # LETTER MA
\u0aaf>\ue02f; # LETTER YA
\u0ab0>\ue030; # LETTER RA
\u0ab2>\ue032; # LETTER LA
\u0ab3>\ue033; # LETTER LLA
\u0ab5>\ue035; # LETTER VA
\u0ab6>\ue036; # LETTER SHA
\u0ab7>\ue037; # LETTER SSA
\u0ab8>\ue038; # LETTER SA
\u0ab9>\ue039; # LETTER HA
\u0abc>\ue03c; # SIGN NUKTA
\u0abd>\ue03d; # SIGN AVAGRAHA
\u0abe>\ue03e; # VOWEL SIGN AA
\u0abf>\ue03f; # VOWEL SIGN I
\u0ac0>\ue040; # VOWEL SIGN II
\u0ac1>\ue041; # VOWEL SIGN U
\u0ac2>\ue042; # VOWEL SIGN UU
\u0ac3>\ue043; # VOWEL SIGN VOCALIC R
\u0ac4>\ue044; # VOWEL SIGN VOCALIC RR
\u0ac5>\ue045; # VOWEL SIGN CANDRA E
\u0ac7>\ue047; # VOWEL SIGN E
\u0ac8>\ue048; # VOWEL SIGN AI
\u0ac9>\ue049; # VOWEL SIGN CANDRA O
\u0acb>\ue04b; # VOWEL SIGN O
\u0acc>\ue04c; # VOWEL SIGN AU
\u0acd>\ue04d; # SIGN VIRAMA
\u0ad0>\ue050; # OM
\u0ae0>\ue060; # LETTER VOCALIC RR
\u0ae1>\ue061; # LETTER VOCALIC LL
\u0ae6>\ue066; # DIGIT ZERO
\u0ae7>\ue067; # DIGIT ONE
\u0ae8>\ue068; # DIGIT TWO
\u0ae9>\ue069; # DIGIT THREE
\u0aea>\ue06a; # DIGIT FOUR
\u0aeb>\ue06b; # DIGIT FIVE
\u0aec>\ue06c; # DIGIT SIX
\u0aed>\ue06d; # DIGIT SEVEN
\u0aee>\ue06e; # DIGIT EIGHT
\u0aef>\ue06f; # DIGIT NINE
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
# :: NFC (NFD) ;
# eof

View file

@ -1,95 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Gurmukhi-InterIndic
#:: NFD (NFC) ;
#\u0A16\u0A3C>\uE059; # LETTER KHHA
#\u0A17\u0A3C>\uE05A; # LETTER GHHA
#\u0A1C\u0A3C>\uE05B; # LETTER ZA
#\u0A38\u0A3C>\uE036; # LETTER SHA
#\u0A32\u0A3C>\uE033; # LETTER LLA
#\u0A2B\u0A3C>\uE05E; # LETTER FA
\u0A01>\ue001; # SIGN CHANDRABINDU
\u0A02>\uE002; # SIGN BINDI
\u0A05>\uE005; # LETTER A
\u0A06>\uE006; # LETTER AA
\u0A07>\uE007; # LETTER I
\u0A08>\uE008; # LETTER II
\u0A09>\uE009; # LETTER U
\u0A0A>\uE00A; # LETTER UU
\u0A0C>\uE032; # FALLBACK : VOCALLIC LA
\u0A0F>\uE00F; # LETTER EE
\u0A10>\uE010; # LETTER AI
\u0A13>\uE013; # LETTER OO
\u0A14>\uE014; # LETTER AU
\u0A15>\uE015; # LETTER KA
\u0A16>\uE016; # LETTER KHA
\u0A17>\uE017; # LETTER GA
\u0A18>\uE018; # LETTER GHA
\u0A19>\uE019; # LETTER NGA
\u0A1A>\uE01A; # LETTER CA
\u0A1B>\uE01B; # LETTER CHA
\u0A1C>\uE01C; # LETTER JA
\u0A1D>\uE01D; # LETTER JHA
\u0A1E>\uE01E; # LETTER NYA
\u0A1F>\uE01F; # LETTER TTA
\u0A20>\uE020; # LETTER TTHA
\u0A21>\uE021; # LETTER DDA
\u0A22>\uE022; # LETTER DDHA
\u0A23>\uE023; # LETTER NNA
\u0A24>\uE024; # LETTER TA
\u0A25>\uE025; # LETTER THA
\u0A26>\uE026; # LETTER DA
\u0A27>\uE027; # LETTER DHA
\u0A28>\uE028; # LETTER NA
\u0A2A>\uE02A; # LETTER PA
\u0A2B>\uE02B; # LETTER PHA
\u0A2C>\uE02C; # LETTER BA
\u0A2D>\uE02D; # LETTER BHA
\u0A2E>\uE02E; # LETTER MA
\u0A2F>\uE02F; # LETTER YA
\u0A30>\uE030; # LETTER RA
\u0A32>\uE032; # LETTER LA
\u0a33>\uE033; # FALLBACK
\u0A35>\uE035; # LETTER VA
\u0a36>\ue036;
\u0A38\0a3c>\ue036; # FALLBACK
\u0A38>\uE038; # LETTER SA
\u0A39>\uE039; # LETTER HA
\u0A3C>\uE03C; # SIGN NUKTA
\u0A3E>\uE03E; # VOWEL SIGN AA
\u0A3F>\uE03F; # VOWEL SIGN I
\u0A40>\uE040; # VOWEL SIGN II
\u0A41>\uE041; # VOWEL SIGN U
\u0A42>\uE042; # VOWEL SIGN UU
\u0A47>\uE047; # VOWEL SIGN EE
\u0A48>\uE048; # VOWEL SIGN AI
\u0A4B>\uE04B; # VOWEL SIGN OO
\u0A4C>\uE04C; # VOWEL SIGN AU
\u0A4D>\uE04D; # SIGN VIRAMA
\u0A5C>\uE05C; # LETTER RRA
\u0A66>\uE066; # DIGIT ZERO
\u0A67>\uE067; # DIGIT ONE
\u0A68>\uE068; # DIGIT TWO
\u0A69>\uE069; # DIGIT THREE
\u0A6A>\uE06A; # DIGIT FOUR
\u0A6B>\uE06B; # DIGIT FIVE
\u0A6C>\uE06C; # DIGIT SIX
\u0A6D>\uE06D; # DIGIT SEVEN
\u0A6E>\uE06E; # DIGIT EIGHT
\u0A6F>\uE06F; # DIGIT NINE
\u0A70>\uE07C; # TIPPI
\u0A71>\uE07D; # ADDAK
\u0A72>\uE07E; # IRI
\u0A73>\uE07F; # URA
\u0A74>\uE080; # EK ONKAR
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
# :: NFC (NFD) ;
# eof

File diff suppressed because it is too large Load diff

View file

@ -1,24 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Only intended for internal use
:: fullwidth-halfwidth;
。 > '.';
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
$initialPunct = [:Ps:][:Pi:];
# add space between any Han or terminal punctuation and letters, and
# between letters and Han or initial punct
[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;
[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;
# remove spacing between ideographs and other letters
< [:Ideographic:] { ' ' } [:Letter:] ;
< [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;

View file

@ -1,109 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Transliteration table for Hebrew
# Based on the UNGEGN table at:
# http://www.eki.ee/wgrs/rom1_he.pdf
#
# Exceptions:
# - Accents are added to disambiguate letters
# - Combinations of dagesh, shin/sin dot that produce different
# letters are not yet encoded.
#
# To test, open:
# http://oss.software.ibm.com/cgi-bin/icu/tr
# Click Edit, paste in this file, Save As hebrew-latin/XXX
# (where XXX is a username)
# Now go back to the main window, and try it out.
# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
# Paste in hebrew text in Input, and hit Transliterate.
#
# For more information, see"
# http://oss.software.ibm.com/icu/userguide/Transliteration.html
:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;
:: nfkd (nfc) ;
$letterAfter = [:M:]* [:L:] ;
# move longer items here to avoid masking
ח <> ẖ ;
צ <> ẕ } $letterAfter;
ץ <> ẕ ;
ש <> ş ;
ת <> ţ ;
א <> ʼ ;
ב <> b ;
ג <> g ;
ד <> d ;
ה <> h ;
ו <> w ;
ז <> z ;
ט <> t ;
י <> y ;
כ <> k } $letterAfter;
ך <> k ;
ל <> l ;
מ <> m } $letterAfter;
ם <> m ;
נ <> n } $letterAfter;
ן <> n ;
ס <> s ;
ע <> ʻ ;
פ <> p } $letterAfter;
ף <> p ;
ק <> q ;
ר <> r ;
װ > | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV
ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD
ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD
ּ <> ̇ ; # dagesh just goes to overdot for now
ׁ <> ̌ ; # shin dot -> sh
ׂ <> ̂ ; # sin dot -> s
# points
$above = [^[:ccc=0:][:ccc=230:]]*;
‎ֲ‎ > à ;
‎ֲ‎ $1< a ($above) ̀;
‎ָ‎ > á ;
‎ָ‎ $1 < a ($above) ́;
‎ֱ‎ > è ;
‎ֱ‎ $1 < e ($above) ̀;
‎ֵ‎ > é ;
‎ֵ‎ $1 < e ($above) ́;
‎ְ‎ > e ̆ ;
‎ְ‎ $1 < e ($above) ̆;
‎ֹ‎ > ò ;
‎ֹ‎ $1 < o ($above) ̀;
ִ <> i ;
ֻ <> u ;
ַ <> a ;
ֶ <> e ;
ֳ <> o ;
\u05BF <> ̄ ;
# fallbacks
ק < c ;
פ < f } $letterAfter;
ף < f ;
ז < j ;
ו < v ;
כס < x ;
:: (lower);
:: nfc (nfd) ;
:: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);

View file

@ -1,207 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# note: a global filter is more efficient, but MUST include all source chars
:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
:: NFKC ();
# Hiragana-Katakana
# This is largely a one-to-one mapping, but it has a
# few kinks:
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
# Hiragana equivalents. We use Hiragana wa/wi/we/wo
# (308F-3092) with a voicing mark (3099), which is
# semantically equivalent. However, this is a non-
# roundtripping transformation.
# 2. The Katakana small ka/ke (30F5,30F6) have no
# Hiragana equiavlents. We convert them to normal
# Hiragana ka/ke (304B,3051). This is a one-way
# information-losing transformation and precludes
# round-tripping of 30F5 and 30F6.
# 3. The combining marks 3099-309C are in the Hiragana
# block, but they apply to Katakana as well, so we
# leave them untouched.
# 4. The Katakana prolonged sound mark 30FC doubles the
# preceding vowel. This is a one-way information-
# losing transformation from Katakana to Hiragana.
# 5. The Katakana middle dot separates words in foreign
# expressions; we leave this unmodified.
# The above points preclude successful round-trip
# transformations of arbitrary input text. However,
# they provide naturalistic results that should conform
# to user expectations.
# Combining equivalents va/vi/ve/vo
わ゙ <> ヷ;
ゐ゙ <> ヸ;
ゑ゙ <> ヹ;
を゙ <> ヺ;
# One-to-one mappings, main block
# 3041:3094 <> 30A1:30F4
# 309D,E <> 30FD,E
ぁ <> ァ;
あ <> ア;
ぃ <> ィ;
い <> イ;
ぅ <> ゥ;
う <> ウ;
ぇ <> ェ;
え <> エ;
ぉ <> ォ;
お <> オ;
か <> カ;
が <> ガ;
き <> キ;
ぎ <> ギ;
く <> ク;
ぐ <> グ;
け <> ケ;
げ <> ゲ;
こ <> コ;
ご <> ゴ;
さ <> サ;
ざ <> ザ;
し <> シ;
じ <> ジ;
す <> ス;
ず <> ズ;
せ <> セ;
ぜ <> ゼ;
そ <> ソ;
ぞ <> ゾ;
た <> タ;
だ <> ダ;
ち <> チ;
ぢ <> ヂ;
っ <> ッ;
つ <> ツ;
づ <> ヅ;
て <> テ;
で <> デ;
と <> ト;
ど <> ド;
な <> ナ;
に <> ニ;
ぬ <> ヌ;
ね <> ネ;
の <> ;
は <> ハ;
ば <> バ;
ぱ <> パ;
ひ <> ヒ;
び <> ビ;
ぴ <> ピ;
ふ <> フ;
ぶ <> ブ;
ぷ <> プ;
へ <> ヘ;
べ <> ベ;
ぺ <> ペ;
ほ <> ホ;
ぼ <> ボ;
ぽ <> ポ;
ま <> マ;
み <> ミ;
む <> ム;
め <> メ;
も <> モ;
ゃ <> ャ;
や <> ヤ;
ゅ <> ュ;
ゆ <> ユ;
ょ <> ョ;
よ <> ヨ;
ら <> ラ;
り <> リ;
る <> ル;
れ <> レ;
ろ <> ロ;
ゎ <> ヮ;
わ <> ワ;
ゐ <> ヰ;
ゑ <> ヱ;
を <> ヲ;
ん <> ン;
ゔ <> ヴ;
ゝ <> ヽ;
ゞ <> ヾ;
# One-way Katakana-Hiragana xform of small K ka/ke to
# normal H ka/ke.
か < ヵ;
け < ヶ;
# Katakana followed by a prolonged sound mark 30FC has
# its final vowel doubled. This is a Katakana-Hiragana
# one-way information-losing transformation. We
# include the small Katakana (e.g., small A 3041) and
# do not distinguish them from their large
# counterparts. It doesn't make sense to double a
# small counterpart vowel as a small Hiragana vowel, so
# we don't do so. In natural text this should never
# occur anyway. If a 30FC is seen without a preceding
# vowel sound (e.g., after n 30F3) we do not change it.
### $long = ー;
# The following categories are Hiragana, not Katakana
# as might be expected, since by the time we get to the
# 30FC, the preceding character will have already been
# transformed to Hiragana.
# {The following mechanically generated from the
# Unicode 3.0 data:}
$xa = [ \
ぁ あ か が さ ざ \
た だ な は ば ぱ \
ま ゃ や ら ゎ わ \
];
$xi = [ \
ぃ い き ぎ し じ \
ち ぢ に ひ び ぴ \
み り ゐ \
];
$xu = [ \
ぅ う く ぐ す ず \
っ つ づ ぬ ふ ぶ \
ぷ む ゅ ゆ る ゔ \
];
$xe = [ \
ぇ え け げ せ ぜ \
て で ね へ べ ぺ \
め れ ゑ \
];
$xo = [ \
ぉ お こ ご そ ぞ \
と ど の ほ ぼ ぽ \
も ょ よ ろ を \
];
あ < $xa {ー};
い < $xi {ー};
う < $xu {ー};
え < $xe {ー};
お < $xo {ー};
:: (NFKC) ;
# note: a global filter is more efficient, but MUST include all source chars!!
:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
# eof

View file

@ -1,14 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
:: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ;
:: NFD ;
:: Hiragana-Katakana;
:: Katakana-Latin;
:: NFC ;
:: (Lower) ;
:: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ;

View file

@ -1,147 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Bengali
#:: NFD (NFC) ;
\uE001>\u0981; # SIGN CANDRABINDU
\uE002>\u0982; # SIGN ANUSVARA
\uE003>\u0983; # SIGN VISARGA
\uE004>\u0985; # FALLBACK TO LETTER A
\uE005>\u0985; # LETTER A
\uE006>\u0986; # LETTER AA
\uE007>\u0987; # LETTER I
\uE008>\u0988; # LETTER II
\uE009>\u0989; # LETTER U
\uE00A>\u098A; # LETTER UU
\uE00B>\u098B; # LETTER VOCALIC R
\uE00C>\u098C; # LETTER VOCALIC L
\uE00D>\u098F; # FALLBACK
\uE00E>\u098F; # FALLBACK
\uE00F>\u098F; # LETTER E
\uE010>\u0990; # LETTER AI
\uE011>\u0993; # FALLBACK
\uE012>\u0993; # FALLBACK
\uE013>\u0993; # LETTER O
\uE014>\u0994; # LETTER AU
\uE015>\u0995; # LETTER KA
\uE016>\u0996; # LETTER KHA
\uE017>\u0997; # LETTER GA
\uE018>\u0998; # LETTER GHA
\uE019>\u0999; # LETTER NGA
\uE01A>\u099A; # LETTER CA
\uE01B>\u099B; # LETTER CHA
\uE01C>\u099C; # LETTER JA
\uE01D>\u099D; # LETTER JHA
\uE01E>\u099E; # LETTER NYA
\uE01F>\u099F; # LETTER TTA
\uE020>\u09A0; # LETTER TTHA
\uE021>\u09A1; # LETTER DDA
\uE022>\u09A2; # LETTER DDHA
\uE023>\u09A3; # LETTER NNA
\uE024>\u09A4; # LETTER TA
\uE025>\u09A5; # LETTER THA
\uE026>\u09A6; # LETTER DA
\uE027>\u09A7; # LETTER DHA
\uE028>\u09A8; # LETTER NA
\uE029>\u09A8\u09BC; # REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
\uE02A>\u09AA; # LETTER PA
\uE02B>\u09AB; # LETTER PHA
\uE02C>\u09AC; # LETTER BA
\uE02D>\u09AD; # LETTER BHA
\uE02E>\u09AE; # LETTER MA
\uE02F>\u09AF; # LETTER YA
\uE030>\u09B0; # LETTER RA
\uE031>\u09B0\u09BC; # FALLBACK to RA
\uE032>\u09B2; # LETTER LA
\uE033>\u09B2; # REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
\uE034>\u09B2; # REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
\uE035>\u09AC; # REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
\uE036>\u09B6; # LETTER SHA
\uE037>\u09B7; # LETTER SSA
\uE038>\u09B8; # LETTER SA
\uE039>\u09B9; # LETTER HA
\uE03C>\u09BC; # SIGN NUKTA
\uE03D>\u09bd; # SIGN AVAGRAHA
\uE03E>\u09BE; # VOWEL SIGN AA
\uE03F>\u09BF; # VOWEL SIGN I
\uE040>\u09C0; # VOWEL SIGN II
\uE041>\u09C1; # VOWEL SIGN U
\uE042>\u09C2; # VOWEL SIGN UU
\uE043>\u09C3; # VOWEL SIGN VOCALIC R
\uE044>\u09C4; # VOWEL SIGN VOCALIC RR
\uE045>\u09C7; # REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
\uE046>\u09C7; # FALLBACK
\uE047>\u09C7; # VOWEL SIGN E
\uE048>\u09C8; # VOWEL SIGN AI
\uE049>\u09C7\u09BE; # REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
\uE04A>\u09C7\u09BE; # FALLBACK
\uE04B>\u09C7\u09BE; # VOWEL SIGN O
\uE04C>\u09C7\u09D7; # VOWEL SIGN AU
\uE04D>\u09CD; # SIGN VIRAMA
\uE050>\u0993\u0982; # InterIndic-Bengali: OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\uE055>; # LENGTH MARK
\uE056>\u09C8; # REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
\uE057>\u09D7; # AU LENGTH MARK
\uE058>\u0995\u09BC; # FALLBACK
\uE059>\u0996\u09BC; # REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
\uE05A>\u0997\u09BC; # REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
\uE05B>\u099C\u09BC; # REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
\uE05C>\u09A1\u09BC; # FALLBACK
\uE05D>\u09A2\u09BC; # LETTER RHA
\uE05E>\u09AB\u09BC; # REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
\uE05F>\u09AF\u09BC; # LETTER YYA
\uE060>\u09E0; # LETTER VOCALIC RR
\uE061>\u09E1; # LETTER VOCALIC LL
\uE062>\u09E2; # VOWEL SIGN VOCALIC L
\uE063>\u09E3; # VOWEL SIGN VOCALIC LL
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\uE066>\u09E6; # DIGIT ZERO
\uE067>\u09E7; # DIGIT ONE
\uE068>\u09E8; # DIGIT TWO
\uE069>\u09E9; # DIGIT THREE
\uE06A>\u09EA; # DIGIT FOUR
\uE06B>\u09EB; # DIGIT FIVE
\uE06C>\u09EC; # DIGIT SIX
\uE06D>\u09ED; # DIGIT SEVEN
\uE06E>\u09EE; # DIGIT EIGHT
\uE06F>\u09EF; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u09F0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u09F1; # LETTER RA WITH LOWER DIAGONAL
\ue073>\u09F2; # RUPEE MARK
\ue074>\u09F3; # RUPEE SIGN
\ue075>\u09F4; # CURRENCY NUMERATOR ONE
\ue076>\u09F5; # CURRENCY NUMERATOR TWO
\ue077>\u09F6; # CURRENCY NUMERATOR THREE
\ue078>\u09F7; # CURRENCY NUMERATOR FOUR
\ue079>\u09F8; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>\u09F9; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>\u09FA; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u09AC; # FALLBACK FOR ORIYA LETTER WA
0 > \u09E6; # FALLBACK FOR TAMIL
1 > \u09E7;
# :: NFC (NFD) ;
# eof

View file

@ -1,158 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Devanagari
#:: NFD (NFC) ;
#Rules for Decomposed characters
\ue028\ue03c > \u0929; #\ue029
\ue030\ue03c > \u0931; #\ue031
\ue033\ue03c > \u0934; #\ue034
\ue015\ue03c > \u0958; #\ue058 LETTER QA (For Urdu)
\ue016\ue03c > \u0959; #\ue059 LETTER KHHA (For Urdu)
\ue017\ue03c > \u095a; #\ue05a LETTER GHHA (For Urdu)
\ue01c\ue03c > \u095b; #\ue05b LETTER ZA (For Urdu)
\ue021\ue03c > \u095c; #\ue05c LETTER DDDHA (pronounced RRA)
\ue022\ue03c > \u095d; #\ue05d LETTER RHA (pronounced RRHA)
\ue02b\ue03c > \u095e; #\ue05e LETTER FA
\ue02f\ue03c > \u095f; #\ue05f LETTER YYA
#Decomposed compatibility transliterations
\ue012\ue057>\u0914; # FALLBACK FOR TAMIL AU
0 > \u0966; # FALLBACK FOR TAMIL
1 > \u0967;
\ue055>; # FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK
\ue056>; # FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK
\ue057>; # FALLBACK BLOW AWAY TAMIL AU LENGTH MARK
\ue001 > \u0901; # SIGN CANDRABINDU
\ue002 > \u0902; # SIGN ANUSVARA
\ue003 > \u0903; # SIGN VISARGA
\ue004 > \u0904; # SIGN SHORT A
\ue005 > \u0905; # LETTER A
\ue006 > \u0906; # LETTER AA
\ue007 > \u0907; # LETTER I
\ue008 > \u0908; # LETTER II
\ue009 > \u0909; # LETTER U
\ue00a > \u090a; # LETTER UU
\ue00b > \u090b; # LETTER VOCALIC R
\ue00c > \u090c; # LETTER VOCALIC L
\ue00d > \u090d; # LETTER CANDRA E (For representing English sounds)
\ue00e > \u090e; # LETTER SHORT E(For Southern Scripts)
\ue00f > \u090f; # LETTER E
\ue010 > \u0910; # LETTER AI
\ue011 > \u0911; # LETTER CANDRA O (For representing English sounds)
\ue012 > \u0912; # LETTER SHORT O (For Southern Scripts)
\ue013 > \u0913; # LETTER O
\ue014 > \u0914; # LETTER AU
\ue015 > \u0915; # LETTER KA
\ue016 > \u0916; # LETTER KHA
\ue017 > \u0917; # LETTER GA
\ue018 > \u0918; # LETTER GHA
\ue019 > \u0919; # LETTER NGA
\ue01a > \u091a; # LETTER CA
\ue01b > \u091b; # LETTER CHA
\ue01c > \u091c; # LETTER JA
\ue01d > \u091d; # LETTER JHA
\ue01e > \u091e; # LETTER NYA
\ue01f > \u091f; # LETTER TTA
\ue020 > \u0920; # LETTER TTHA
\ue021 > \u0921; # LETTER DDA
\ue022 > \u0922; # LETTER DDHA
\ue023 > \u0923; # LETTER NNA
\ue024 > \u0924; # LETTER TA
\ue025 > \u0925; # LETTER THA
\ue026 > \u0926; # LETTER DA
\ue027 > \u0927; # LETTER DHA
\ue028 > \u0928; # LETTER NA
\ue029 > \u0929; # LETTER NNNA
\ue02a > \u092a; # LETTER PA
\ue02b > \u092b; # LETTER PHA
\ue02c > \u092c; # LETTER BA
\ue02d > \u092d; # LETTER BHA
\ue02e > \u092e; # LETTER MA
\ue02f > \u092f; # LETTER YA
\ue030 > \u0930; # LETTER RA
\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
#\ue031 > \u0930;
\ue032 > \u0932; # LETTER LA
\ue033 > \u0933; # LETTER LLA
\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
#\ue034 > \u0933;
\ue035 > \u0935; # LETTER VA
\ue036 > \u0936; # LETTER SHA
\ue037 > \u0937; # LETTER SSA
\ue038 > \u0938; # LETTER SA
\ue039 > \u0939; # LETTER HA
\ue03c > \u093c; # SIGN NUKTA
\ue03d > \u093d; # SIGN AVAGRAHA
\ue03e > \u093e; # VOWEL SIGN AA
\ue03f > \u093f; # VOWEL SIGN I
\ue040 > \u0940; # VOWEL SIGN II
\ue041 > \u0941; # VOWEL SIGN U
\ue042 > \u0942; # VOWEL SIGN UU
\ue043 > \u0943; # VOWEL SIGN VOCALIC R
\ue044 > \u0944; # VOWEL SIGN VOCALIC RR
\ue045 > \u0945; # VOWEL SIGN CANDRA E
\ue046 > \u0946; # VOWEL SIGN SHORT E
\ue047 > \u0947; # VOWEL SIGN E
\ue048 > \u0948; # VOWEL SIGN AI
\ue049 > \u0949; # VOWEL SIGN CANDRA O
\ue04a > \u094a; # VOWEL SIGN SHORT O
\ue04b > \u094b; # VOWEL SIGN O
\ue04c > \u094c; # VOWEL SIGN AU
\ue04d > \u094d; # SIGN VIRAMA
\ue050 > \u0950; # OM
\ue051 > \u0951; # STRESS SIGN UDATTA
\ue052 > \u0952; # STRESS SIGN ANUDATTA
\ue053 > \u0953; # GRAVE ACCENT
\ue054 > \u0954; # ACUTE ACCENT
\ue058 > \u0958; # LETTER QA (For Urdu)
\ue059 > \u0959; # LETTER KHHA (For Urdu)
\ue05a > \u095a; # LETTER GHHA (For Urdu)
\ue05b > \u095b; # LETTER ZA (For Urdu)
\ue05c > \u095c; # LETTER DDDHA (pronounced RRA)
\ue05d > \u095d; # LETTER RHA (pronounced RRHA)
\ue05e > \u095e; # LETTER FA
\ue05f > \u095f; # LETTER YYA
\ue060 > \u0960; # LETTER VOCALIC RR
\ue061 > \u0961; # LETTER VOCALIC LL
\ue062 > \u0962; # VOWEL SIGN VOCALIC L
\ue063 > \u0963; # VOWEL SIGN VOCALIC LL
\ue064 > \u0964; # DANDA
\ue065 > \u0965; # DOUBLE DANDA
\ue066 > \u0966; # DIGIT ZERO
\ue067 > \u0967; # DIGIT ONE
\ue068 > \u0968; # DIGIT TWO
\ue069 > \u0969; # DIGIT THREE
\ue06a > \u096a; # DIGIT FOUR
\ue06b > \u096b; # DIGIT FIVE
\ue06c > \u096c; # DIGIT SIX
\ue06d > \u096d; # DIGIT SEVEN
\ue06e > \u096e; # DIGIT EIGHT
\ue06f > \u096f; # DIGIT NINE
\ue070>\u0970; # ABBREVIATION SIGN
\ue071>\u0930; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0930; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>\u0930\u0942; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0935; # FALLBACK FOR ORIYA LETTER WA
# \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
# :: NFC;
# eof

View file

@ -1,138 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Gujarati
#:: NFD (NFC) ;
\ue001>\u0a81; # SIGN CANDRABINDU
\ue002>\u0a82; # SIGN ANUSVARA
\ue003>\u0a83; # SIGN VISARGA
\uE004>\u0a85; # FALLBACK TO LETTER A
\ue005>\u0a85; # LETTER A
\ue006>\u0a86; # LETTER AA
\ue007>\u0a87; # LETTER I
\ue008>\u0a88; # LETTER II
\ue009>\u0a89; # LETTER U
\ue00a>\u0a8a; # LETTER UU
\ue00b>\u0a8b; # LETTER VOCALIC R
\ue00c>\u0a8c; # LETTER VOCALIC L
\ue00d>\u0a8d; # GUJARATI VOWEL CANDRA E
\ue00e>\u0a8f; # FALLBACK
\ue00f>\u0a8f; # InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
\ue010>\u0a90; # LETTER AI
\ue011>\u0a91; # FALLBACK
\ue012>\u0a93; # FALLBACK
\ue013>\u0a93; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
\ue014>\u0a94; # LETTER AU
\ue015>\u0a95; # LETTER KA
\ue016>\u0a96; # LETTER KHA
\ue017>\u0a97; # LETTER GA
\ue018>\u0a98; # LETTER GHA
\ue019>\u0a99; # LETTER NGA
\ue01a>\u0a9a; # LETTER CA
\ue01b>\u0a9b; # LETTER CHA
\ue01c>\u0a9c; # LETTER JA
\ue01d>\u0a9d; # LETTER JHA
\ue01e>\u0a9e; # LETTER NYA
\ue01f>\u0a9f; # LETTER TTA
\ue020>\u0aa0; # LETTER TTHA
\ue021>\u0aa1; # LETTER DDA
\ue022>\u0aa2; # LETTER DDHA
\ue023>\u0aa3; # LETTER NNA
\ue024>\u0aa4; # LETTER TA
\ue025>\u0aa5; # LETTER THA
\ue026>\u0aa6; # LETTER DA
\ue027>\u0aa7; # LETTER DHA
\ue028>\u0aa8; # LETTER NA
\ue029>\u0aa8\u0abc; # FALLBACK to NA+NUKTA
\ue02a>\u0aaa; # LETTER PA
\ue02b>\u0aab; # LETTER PHA
\ue02c>\u0aac; # LETTER BA
\ue02d>\u0aad; # LETTER BHA
\ue02e>\u0aae; # LETTER MA
\ue02f>\u0aaf; # LETTER YA
\ue030>\u0ab0; # LETTER RA
\ue031>\u0ab0\u0abc; # FALLBACK
\ue032>\u0ab2; # LETTER LA
\ue033>\u0ab3; # LETTER LLA
\ue034>\u0ab3\u0abc; # LETTER LLLA>LETTER LLA+NUKTA
\ue035>\u0ab5; # LETTER VA
\ue036>\u0ab6; # LETTER SHA
\ue037>\u0ab7; # LETTER SSA
\ue038>\u0ab8; # LETTER SA
\ue039>\u0ab9; # LETTER HA
\ue03c>\u0abc; # SIGN NUKTA
\ue03d>\u0abd; # SIGN AVAGRAHA
\ue03e>\u0abe; # VOWEL SIGN AA
\ue03f>\u0abf; # VOWEL SIGN I
\ue040>\u0ac0; # VOWEL SIGN II
\ue041>\u0ac1; # VOWEL SIGN U
\ue042>\u0ac2; # VOWEL SIGN UU
\ue043>\u0ac3; # VOWEL SIGN VOCALIC R
\ue044>\u0ac4; # VOWEL SIGN VOCALIC RR
\ue045>\u0ac5; # VOWEL SIGN CANDRA E
\ue046>\u0ac7; # FALLBACK
\ue047>\u0ac7; # InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
\ue048>\u0ac8; # VOWEL SIGN AI
\ue049>\u0ac9; # VOWEL SIGN CANDRA O
\ue04a>\u0acb; # FALLBACK
\ue04b>\u0acb; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
\ue04c>\u0acc; # VOWEL SIGN AU
\ue04d>\u0acd; # SIGN VIRAMA
\ue050>\u0ad0; # OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
\ue056>\u0ac8; # REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0acc; # REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0a95\u0abc; # FALLBACK
\ue059>\u0a96\u0abc; # REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
\ue05a>\u0a97\u0abc; # REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
\ue05b>\u0a9c\u0abc; # REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
\ue05c>\u0aa1\u0abc; # FALLBACK
\ue05d>\u0aa2\u0abc; # REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
\ue05e>\u0aab\u0abc; # REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
\ue05f>\u0aaf\u0abc; # REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
\ue060>\u0ae0; # LETTER VOCALIC RR
\ue061>\u0ae1; # LETTER VOCALIC LL
\ue062>\u0abf\u0abc; # REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
\ue063>\u0ac0\u0abc; # REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\ue066>\u0ae6; # DIGIT ZERO
\ue067>\u0ae7; # DIGIT ONE
\ue068>\u0ae8; # DIGIT TWO
\ue069>\u0ae9; # DIGIT THREE
\ue06a>\u0aea; # DIGIT FOUR
\ue06b>\u0aeb; # DIGIT FIVE
\ue06c>\u0aec; # DIGIT SIX
\ue06d>\u0aed; # DIGIT SEVEN
\ue06e>\u0aee; # DIGIT EIGHT
\ue06f>\u0aef; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0ab0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0ab0; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0ab5; # FALLBACK FOR ORIYA LETTER WA
0 > \u0ae6; # FALLBACK FOR TAMIL
1 > \u0ae7;
#\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
# :: NFC (NFD) ;
# eof

View file

@ -1,147 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Gurmukhi
#:: NFD (NFC) ;
$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];
$consonant = [\u0A15-\u0A39];
\ue001>\u0a01; # SIGN CHANDRABINDU
#rules for BINDI
# Anusvara is equivalent to BINDI when preceeded by a vowel
$vowel{\ue002>\u0a02; # SIGN ANUSVARA (\u0a02 = SIGN BINDI)
# else is equivalent to TIPPI
$consonant{\ue002>\u0a70; # SIGN TIPPI
\ue002>\u0a02;
\ue003>; # FALLBACK BLOW AWAY SIGN VISARGA
\uE004>\u0a05; # FALLBACK TO LETTER A
\ue005>\u0a05; # LETTER A
\ue006>\u0a06; # LETTER AA
\ue007>\u0a07; # LETTER I
\ue008>\u0a08; # LETTER II
\ue009>\u0a09; # LETTER U
\ue00a>\u0a0a; # LETTER UU
\ue00b>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
\ue00c>\u0a33; # FALLBACK
\ue00d>\u0a0f; # FALLBACK
\ue00e>\u0a0f; # FALLBACK
\ue00f>\u0a0f; # LETTER EE
\ue010>\u0a10; # LETTER AI
\ue011>\u0a13; # FALLBACK
\ue012>\u0a13; # FALLBACK
\ue013>\u0a13; # LETTER OO
\ue014>\u0a14; # LETTER AU
\ue015>\u0a15; # LETTER KA
\ue016>\u0a16; # LETTER KHA
\ue017>\u0a17; # LETTER GA
\ue018>\u0a18; # LETTER GHA
\ue019>\u0a19; # LETTER NGA
\ue01a>\u0a1a; # LETTER CA
\ue01b>\u0a1b; # LETTER CHA
\ue01c>\u0a1c; # LETTER JA
\ue01d>\u0a1d; # LETTER JHA
\ue01e>\u0a1e; # LETTER NYA
\ue01f>\u0a1f; # LETTER TTA
\ue020>\u0a20; # LETTER TTHA
\ue021>\u0a21; # LETTER DDA
\ue022>\u0a22; # LETTER DDHA
\ue023>\u0a23; # LETTER NNA
\ue024>\u0a24; # LETTER TA
\ue025>\u0a25; # LETTER THA
\ue026>\u0a26; # LETTER DA
\ue027>\u0a27; # LETTER DHA
\ue028>\u0a28; # LETTER NA
\ue029>\u0a28\u0a3c; # REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
\ue02a>\u0a2a; # LETTER PA
\ue02b>\u0a2b; # LETTER PHA
\ue02c>\u0a2c; # LETTER BA
\ue02d>\u0a2d; # LETTER BHA
\ue02e>\u0a2e; # LETTER MA
\ue02f>\u0a2f; # LETTER YA
\ue030>\u0a30; # LETTER RA
\ue031>\u0a30\u0a3c; # FALLBACK LETTER RA+NUKTA
\ue032>\u0a32; # LETTER LA
\ue033>\u0a33; # LETTER LLA
\ue034>\u0a33; # REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
\ue035>\u0a35; # LETTER VA
\ue036>\u0a36; # LETTER SHA
\ue037>\u0a36; # REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
\ue038>\u0a38; # LETTER SA
\ue039>\u0a39; # LETTER HA
\ue03c>\u0a3c; # SIGN NUKTA
\ue03d>; # FALLBACK BLOW AWAY SIGN AVAGRAHA
\ue03e>\u0a3e; # VOWEL SIGN AA
\ue03f>\u0a3f; # VOWEL SIGN I
\ue040>\u0a40; # VOWEL SIGN II
\ue041>\u0a41; # VOWEL SIGN U
\ue042>\u0a42; # VOWEL SIGN UU
\ue043>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
\ue045>\u0a48; # REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
\ue046>\u0a47; # FALLABCK
\ue047>\u0a47; # VOWEL SIGN EE
\ue048>\u0a48; # VOWEL SIGN AI
\ue049>\u0a4c; # REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
\ue04a>\u0a4b; # FALLBACK
\ue04b>\u0a4b; # VOWEL SIGN OO
\ue04c>\u0a4c; # VOWEL SIGN AU
\ue04d>\u0a4d; # SIGN VIRAMA
\ue050>\u0a0f\u0a02; # FALLBACK to OO+BINDI : OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
\ue056>\u0a48; # REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0a4c; # REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0a15\u0a3c; # FALLBACK RA+ NUKTA
\ue059>\u0a59; # LETTER KHHA
\ue05a>\u0a5a; # LETTER GHHA
\ue05b>\u0a5b; # LETTER ZA
\ue05c>\u0a5c; # LETTER RRA
\ue05d>\u0a22\u0a3c; # REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
\ue05e>\u0a5e; # LETTER FA
\ue05f>\u0a2f\u0a3c; # REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
\ue060>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
\ue061>\u0a32\u0a3c; #
\ue062>\u0a3f\u0a3c; # REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
\ue063>\u0a40\u0a3c; # REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\ue066>\u0a66; # DIGIT ZERO
\ue067>\u0a67; # DIGIT ONE
\ue068>\u0a68; # DIGIT TWO
\ue069>\u0a69; # DIGIT THREE
\ue06a>\u0a6a; # DIGIT FOUR
\ue06b>\u0a6b; # DIGIT FIVE
\ue06c>\u0a6c; # DIGIT SIX
\ue06d>\u0a6d; # DIGIT SEVEN
\ue06e>\u0a6e; # DIGIT EIGHT
\ue06f>\u0a6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0a30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0a30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>\u0a70; # TIPPI
\uE07D>\u0a71; # ADDAK
\uE07E>\u0a72; # IRI
\uE07F>\u0a73; # URA
\uE080>\u0a74; # EK ONKAR
\uE081>\u0a35; # FALLBACK FOR ORIYA LETTER WA
0 > \u0a66; # FALLBACK FOR TAMIL
1 > \u0a67;
# :: NFC (NFD) ;
# eof

View file

@ -1,141 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Kannada
#:: NFD (NFC) ;
\ue033\ue03c>\u0cde; # LETTER FA
\ue001>\u0c82; # REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
\ue002>\u0c82; # SIGN ANUSVARA
\ue003>\u0c83; # SIGN VISARGA
\uE004>\u0c85; # FALLBACK TO LETTER A
\ue005>\u0c85; # LETTER A
\ue006>\u0c86; # LETTER AA
\ue007>\u0c87; # LETTER I
\ue008>\u0c88; # LETTER II
\ue009>\u0c89; # LETTER U
\ue00a>\u0c8a; # LETTER UU
\ue00b>\u0c8b; # LETTER VOCALIC R
\ue00c>\u0c8c; # LETTER VOCALIC L
\ue00d>\u0c8e; # LETTER E
\ue00e>\u0c8e; # FALLBACK
\ue00f>\u0c8f; # LETTER EE
\ue010>\u0c90; # LETTER AI
\ue011>\u0c92; # FALLBACK
\ue012>\u0c92; # LETTER O
\ue013>\u0c93; # LETTER OO
\ue014>\u0c94; # LETTER AU
\ue015>\u0c95; # LETTER KA
\ue016>\u0c96; # LETTER KHA
\ue017>\u0c97; # LETTER GA
\ue018>\u0c98; # LETTER GHA
\ue019>\u0c99; # LETTER NGA
\ue01a>\u0c9a; # LETTER CA
\ue01b>\u0c9b; # LETTER CHA
\ue01c>\u0c9c; # LETTER JA
\ue01d>\u0c9d; # LETTER JHA
\ue01e>\u0c9e; # LETTER NYA
\ue01f>\u0c9f; # LETTER TTA
\ue020>\u0ca0; # LETTER TTHA
\ue021>\u0ca1; # LETTER DDA
\ue022>\u0ca2; # LETTER DDHA
\ue023>\u0ca3; # LETTER NNA
\ue024>\u0ca4; # LETTER TA
\ue025>\u0ca5; # LETTER THA
\ue026>\u0ca6; # LETTER DA
\ue027>\u0ca7; # LETTER DHA
\ue028>\u0ca8; # LETTER NA
\ue029>\u0ca8; # REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
\ue02a>\u0caa; # LETTER PA
\ue02b>\u0cab; # LETTER PHA
\ue02c>\u0cac; # LETTER BA
\ue02d>\u0cad; # LETTER BHA
\ue02e>\u0cae; # LETTER MA
\ue02f>\u0caf; # LETTER YA
\ue030\ue03c>\u0cb1;
\ue030>\u0cb0; # LETTER RA
\ue031>\u0cb1; # LETTER RRA
\ue032>\u0cb2; # LETTER LA
\ue033>\u0cb3; # LETTER LLA
\ue034>\u0cde; # REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
\ue035>\u0cb5; # LETTER VA
\ue036>\u0cb6; # LETTER SHA
\ue037>\u0cb7; # LETTER SSA
\ue038>\u0cb8; # LETTER SA
\ue039>\u0cb9; # LETTER HA
\ue03c>\u0cbc; # NUKTA
\ue03d>\u0cbd; # AVAGRAHA
\ue03e>\u0cbe; # VOWEL SIGN AA
\ue03f>\u0cbf; # VOWEL SIGN I
\ue040>\u0cc0; # VOWEL SIGN II
\ue041>\u0cc1; # VOWEL SIGN U
\ue042>\u0cc2; # VOWEL SIGN UU
\ue043>\u0cc3; # VOWEL SIGN VOCALIC R
\ue044>\u0cc4; # VOWEL SIGN VOCALIC RR
\ue045>\u0cc6; # REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
\ue046>\u0cc6; # VOWEL SIGN E
\ue047>\u0cc7; # VOWEL SIGN EE
\ue048>\u0cc8; # VOWEL SIGN AI
\ue049>\u0cca; # REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
\ue04a>\u0cca; # VOWEL SIGN O
\ue04b>\u0ccb; # VOWEL SIGN OO
\ue04c>\u0ccc; # VOWEL SIGN AU
\ue04d>\u0ccd; # SIGN VIRAMA
\ue050>\u0c93\u0c82; # REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>\u0cd5; # LENGTH MARK
\ue056>\u0cd6; # AI LENGTH MARK
\ue057>\u0ccc; # REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0c95; # FALLBACK
\ue059>\u0c96; # REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
\ue05a>\u0c97; # REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
\ue05b>\u0c9c; # REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
\ue05c>\u0ca2; # FALLBACK
\ue05d>\u0ca2; # REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
\ue05e>\u0cde; # LETTER FA
\ue05f>\u0caf; # REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
\ue060>\u0ce0; # LETTER VOCALIC RR
\ue061>\u0ce1; # LETTER VOCALIC LL
\ue062>\u0cbf; # REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
\ue063>\u0cc0; # REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0ce6; # DIGIT ZERO
\ue067>\u0ce7; # DIGIT ONE
\ue068>\u0ce8; # DIGIT TWO
\ue069>\u0ce9; # DIGIT THREE
\ue06a>\u0cea; # DIGIT FOUR
\ue06b>\u0ceb; # DIGIT FIVE
\ue06c>\u0cec; # DIGIT SIX
\ue06d>\u0ced; # DIGIT SEVEN
\ue06e>\u0cee; # DIGIT EIGHT
\ue06f>\u0cef; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0cb0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0cb0; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0cb5; # FALLBACK FOR ORIYA LETTER WA
0 > \u0ce6; # FALLBACK FOR TAMIL
1 > \u0ce7;
# :: NFC (NFD) ;
# eof

View file

@ -1,529 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Latin
#\u0e00 reserved
#consonants
$chandrabindu=\ue001;
$anusvara=\ue002;
$visarga=\ue003;
#\u0e004 reserved
# w<vowel> represents the stand-alone form
$wa=\ue005;
$waa=\ue006;
$wi=\ue007;
$wii=\ue008;
$wu=\ue009;
$wuu=\ue00a;
$wr=\ue00b;
$wl=\ue00c;
$wce=\ue00d; # LETTER CANDRA E
$wse=\ue00e; # LETTER SHORT E
$we=\ue00f; # \u090f LETTER E
$wai=\ue010;
$wco=\ue011; # LETTER CANDRA O
$wso=\ue012; # LETTER SHORT O
$wo=\ue013; # \u0913 LETTER O
$wau=\ue014;
$ka=\ue015;
$kha=\ue016;
$ga=\ue017;
$gha=\ue018;
$nga=\ue019;
$ca=\ue01a;
$cha=\ue01b;
$ja=\ue01c;
$jha=\ue01d;
$nya=\ue01e;
$tta=\ue01f;
$ttha=\ue020;
$dda=\ue021;
$ddha=\ue022;
$nna=\ue023;
$ta=\ue024;
$tha=\ue025;
$da=\ue026;
$dha=\ue027;
$na=\ue028;
$ena=\ue029; #compatibility
$pa=\ue02a;
$pha=\ue02b;
$ba=\ue02c;
$bha=\ue02d;
$ma=\ue02e;
$ya=\ue02f;
$ra=\ue030;
$vva=\ue081;
$rra=\ue031;
$la=\ue032;
$lla=\ue033;
$ela=\ue034; #compatibility
$va=\ue035;
$sha=\ue036;
$ssa=\ue037;
$sa=\ue038;
$ha=\ue039;
#\u093a Reserved
#\u093b Reserved
$nukta=\ue03c;
$avagraha=\ue03d; # SIGN AVAGRAHA
# <vowel> represents the dependent form
$aa=\ue03e;
$i=\ue03f;
$ii=\ue040;
$u=\ue041;
$uu=\ue042;
$rh=\ue043;
$lh=\ue044;
$ce=\ue045; #VOWEL SIGN CANDRA E
$se=\ue046; #VOWEL SIGN SHORT E
$e=\ue047;
$ai=\ue048;
$co=\ue049; # VOWEL SIGN CANDRA O
$so=\ue04a; # VOWEL SIGN SHORT O
$o=\ue04b; # \u094b
$au=\ue04c;
$virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
$om=\ue050; # OM
\ue051>; # UNMAPPED STRESS SIGN UDATTA
\ue052>; # UNMAPPED STRESS SIGN ANUDATTA
\ue053>; # UNMAPPED GRAVE ACCENT
\ue054>; # UNMAPPED ACUTE ACCENT
$lm = \ue055;# Telugu Length Mark
$ailm=\ue056;# AI Length Mark
$aulm=\ue057;# AU Length Mark
#urdu compatibity forms
$uka=\ue058;
$ukha=\ue059;
$ugha=\ue05a;
$ujha=\ue05b;
$uddha=\ue05c;
$udha=\ue05d;
$ufa=\ue05e;
$uya=\ue05f;
$wrr=\ue060;
$wll=\ue061;
$rrh=\ue062;
$llh=\ue063;
$danda=\ue064;
$doubleDanda=\ue065;
$zero=\ue066; # DIGIT ZERO
$one=\ue067; # DIGIT ONE
$two=\ue068; # DIGIT TWO
$three=\ue069; # DIGIT THREE
$four=\ue06a; # DIGIT FOUR
$five=\ue06b; # DIGIT FIVE
$six=\ue06c; # DIGIT SIX
$seven=\ue06d; # DIGIT SEVEN
$eight=\ue06e; # DIGIT EIGHT
$nine=\ue06f; # DIGIT NINE
# \u0970>; # UNMAPPED ABBREVIATION SIGN
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
$depVowelBelow=[\ue041-\ue044];
# $x was originally called '&'; $z was '%'
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$vowels=[aeiour\u0304\u0325\u0306];
$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];
######################################################################
# convert from Native letters to Latin letters
######################################################################
#transliterations for anusvara
$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;
$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;
$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;
$anusvara} [$ta$tha$da$dha$na] > n ;
$anusvara} [$pa$pha$ba$bha$ma] > m ;
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;
$anusvara> m\u0307;
# Urdu compatibility
$ya$nukta}$x > y\u0307 ;
$ya$nukta$virama > y\u0307 ;
$ya$nukta > y\u0307a ;
$la$nukta }$x > l\u0331 ;
$la$nukta$virama > l\u0331 ;
$la$nukta > l\u0331a ;
$na$nukta }$x > n\u0331 ;
$na$nukta$virama > n\u0331 ;
$na$nukta > n\u0331a ;
$ena }$x > n\u0331 ;
$ena$virama > n\u0331 ;
$ena > n\u0331a ;
$uka > qa ;
$ka$nukta }$x > q ;
$ka$nukta$virama > q ;
$ka$nukta > qa ;
$kha$nukta }$x > k\u0331h\u0331 ;
$kha$nukta$virama > k\u0331h\u0331 ;
$kha$nukta > k\u0331h\u0331a ;
$ukha$virama > k\u0331h\u0331;
$ukha > k\u0331h\u0331a;
$ugha > g\u0307a ;
$ga$nukta }$x > g\u0307 ;
$ga$nukta$virama > g\u0307 ;
$ga$nukta > g\u0307a ;
$ujha > za ;
$ja$nukta }$x > z ;
$ja$nukta$virama > z ;
$ja$nukta > za ;
$ddha$nukta}$x > r\u0323h ;
$ddha$nukta$virama > r\u0323h ;
$ddha$nukta > r\u0323ha;
$uddha}$x > r\u0323 ;
$uddha$virama > r\u0323 ;
$uddha > r\u0323a;
$udha > r\u0323a ;
$dda$nukta}$x > r\u0323 ;
$dda$nukta$virama > r\u0323 ;
$dda$nukta > r\u0323a ;
$pha$nukta }$x > f ;
$pha$nukta$virama > f ;
$pha$nukta > fa ;
$ufa }$x > f ;
$ufa$virama > f ;
$ufa > fa ;
$ra$nukta}$x > r\u0331;
$ra$nukta$virama > r\u0331;
$ra$nukta > r\u0331a;
$lla$nukta}$x > l\u0331;
$lla$nukta$virama > l\u0331;
$lla$nukta > l\u0331a;
$ela}$x > l\u0331;
$ela$virama > l\u0331;
$ela > l\u0331a;
$uya}$x > y\u0307;
$uya$virama > y\u0307;
$uya > y\u0307a;
# normal consonants
$ka$virama}$ha>k'';
$ka}$x>k;
$ka$virama>k;
$ka>ka;
$kha}$x>kh;
$kha$virama>kh;
$kha>kha;
$ga$virama}$ha>g'';
$ga}$x>g;
$ga$virama>g;
$ga>ga;
$gha}$x>gh;
$gha$virama>gh;
$gha>gha;
$nga}$x>n\u0307;
$nga$virama>n\u0307;
$nga>n\u0307a ;
$ca$virama}$ha>c'';
$ca}$x>c;
$ca$virama>c;
$ca>ca;
$cha}$x>ch;
$cha$virama>ch;
$cha>cha;
$ja$virama}$ha>j'';
$ja}$x>j;
$ja$virama>j;
$ja>ja;
$jha}$x>jh;
$jha$virama>jh;
$jha>jha;
$nya }$x>n\u0303 ;
$nya$virama>n\u0303;
$nya > n\u0303a ;
$tta$virama}$ha>t\u0323'';
$tta}$x>t\u0323;
$tta$virama>t\u0323;
$tta>t\u0323a;
$ttha}$x>t\u0323h;
$ttha$virama>t\u0323h;
$ttha>t\u0323ha;
$dda}$x$ha>d\u0323'';
$dda}$x>d\u0323;
$dda$virama>d\u0323;
$dda>d\u0323a;
$ddha}$x>d\u0323h;
$ddha$virama>d\u0323h;
$ddha>d\u0323ha;
$nna}$x>n\u0323 ;
$nna$virama>n\u0323;
$nna>n\u0323a ;
$ta$virama}$ha>t'';
$ta$virama}$ttha>t'';
$ta$virama}$tta>t'';
$ta$virama}$tha>t'';
$ta}$x>t;
$ta$virama>t;
$ta>ta;
$tha}$x>th;
$tha$virama>th;
$tha>tha;
$da$virama}$ha>d'';
$da$virama}$ddha>d'';
$da$virama}$dda>d'';
$da$virama}$dha>d'';
$da}$x>d;
$da$virama>d;
$da>da;
$dha}$x>dh;
$dha$virama>dh;
$dha>dha;
$na$virama}$ga>n'';
$na$virama}$ya>n'';
$na}$x>n;
$na$virama>n;
$na>na;
$pa$virama}$ha>p'';
$pa}$x>p;
$pa$virama>p;
$pa>pa;
$pha}$x>ph;
$pha$virama>ph;
$pha>pha;
$ba$virama}$ha>b'';
$ba}$x>b;
$ba$virama>b;
$ba>ba;
$bha}$x>bh;
$bha$virama>bh;
$bha>bha;
$ma$virama}$ma>m'';
$ma}$x>m;
$ma$virama>m;
$ma>ma;
$ya}$x>y;
$ya$virama>y;
$ya>ya;
$ra$virama}$ha>r'';
$ra}$x>r;
$ra$virama>r;
$ra>ra;
$vva$virama}$ha>w\u0307'';
$vva}$x>w\u0307;
$vva$virama>w\u0307;
$vva>w\u0307a;
$rra$virama}$ha>r\u0331'';
$rra}$x>r\u0331;
$rra$virama>r\u0331;
$rra>r\u0331a;
$la$virama}$ha>l'';
$la}$x>l;
$la$virama>l;
$la>la;
$lla$virama}$ha>l\u0323'';
$lla}$x>l\u0323;
$lla$virama>l\u0323;
$lla>l\u0323a;
$va}$x>v;
$va$virama>v;
$va>va;
$sa$virama}$ha>s'';
$sa$virama}$sha>s'';
$sa$virama}$ssa>s'';
$sa$virama}$sa>s'';
$sa}$x>s;
$sa$virama>s;
#for gurmukhi
$sa$nukta}$x>s\u0301;
$sa$nukta$virama>s\u0301;
$sa$nukta>s\u0301a;
$sa>sa;
$sha}$x>s\u0301;
$sha$virama>s\u0301;
$sha>s\u0301a;
$ssa}$x>s\u0323;
$ssa$virama>s\u0323;
$ssa>s\u0323a;
$ha}$x>h;
$ha$virama>h;
$ha>ha;
# dependent vowels (should never occur except following consonants)
$forceIndependentMatra{$aa > \u0314a\u0304 ;
$forceIndependentMatra{$ai > \u0314ai ;
$forceIndependentMatra{$au > \u0314au ;
$forceIndependentMatra{$ii > \u0314i\u0304 ;
$forceIndependentMatra{$i > \u0314i ;
$forceIndependentMatra{$uu > \u0314u\u0304 ;
$forceIndependentMatra{$u > \u0314u ;
$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;
$forceIndependentMatra{$rh > \u0314r\u0325 ;
$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;
$forceIndependentMatra{$lh > \u0314l\u0325 ;
$forceIndependentMatra{$e > \u0314e\u0304 ;
$forceIndependentMatra{$o > \u0314o\u0304 ;
#extra vowels
$forceIndependentMatra{$ce > \u0314e\u0306 ;
$forceIndependentMatra{$co > \u0314o\u0306 ;
$forceIndependentMatra{$se > \u0314e ;
$forceIndependentMatra{$so > \u0314o ;
$forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character
$forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character
$aa > a\u0304 ;
$ai > ai ;
$au > au ;
$ii > i\u0304 ;
$i > i ;
$uu > u\u0304 ;
$u > u ;
$rrh > r\u0325\u0304 ;
$rh > r\u0325 ;
$llh > l\u0325\u0304 ;
$lh > l\u0325 ;
$e > e\u0304 ;
$o > o\u0304 ;
#extra vowels
$ce > e\u0306 ;
$co > o\u0306 ;
$se > e ;
$so > o ;
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
$waa} $x > a\u0304\u0314 ;
$wai} $x > ai\u0314 ;
$wau} $x > au\u0314 ;
$wii} $x > i\u0304\u0314 ;
$wi } $x > i\u0314 ;
$wuu} $x > u\u0304\u0314 ;
$wu } $x > u\u0314 ;
$wrr} $x > r\u0325\u0304\u0314 ;
$wr } $x > r\u0325\u0314 ;
$wll} $x > l\u0325\u0304\u0314 ;
$wl } $x > l\u0325\u0314 ;
$we } $x > e\u0304\u0314 ;
$wo } $x > o\u0304\u0314 ;
$wa } $x > a\u0314 ;
#extra vowels
$wce} $x > e\u0306\u0314 ;
$wco} $x > o\u0306\u0314 ;
$wse} $x > e\u0314 ;
$wso} $x > o\u0314 ;
$om} $x > ''om\u0314 ;
# independent vowels when preceeded by vowels
$vowels{$waa > ''a\u0304 ;
$vowels{$wai > ''ai ;
$vowels{$wau > ''au ;
$vowels{$wii > ''i\u0304 ;
$vowels{$wi > ''i ;
$vowels{$wuu > ''u\u0304 ;
$vowels{$wu > ''u ;
$vowels{$wrr > ''r\u0325\u0304 ;
$vowels{$wr > ''r\u0325 ;
$vowels{$wll > ''l\u0325\u0304 ;
$vowels{$wl > ''l\u0325 ;
$vowels{$we > ''e\u0304 ;
$vowels{$wo > ''o\u0304 ;
$vowels{$wa > ''a ;
#extra vowels
$vowels{$wce > ''e\u0306 ;
$vowels{$wco > ''o\u0306 ;
$vowels{$wse > ''e ;
$vowels{$wso > ''o ;
# independent vowels (otherwise)
$waa > a\u0304 ;
$wai > ai ;
$wau > au ;
$wii > i\u0304 ;
$wi > i ;
$wuu > u\u0304 ;
$wu > u ;
$wrr > r\u0325\u0304 ;
$wr > r\u0325 ;
$wll > l\u0325\u0304 ;
$wl > l\u0325 ;
$we > e\u0304 ;
$wo > o\u0304 ;
$wa > a ;
#extra vowels
$wce > e\u0306 ;
$wco > o\u0306 ;
$wse > e ;
$wso > o ;
$om > ''om ;
#stress marks
$avagraha > \u0315;
$chandrabindu$anusvara>\u0303;
$chandrabindu > m\u0310;
$visarga>h\u0323;
#numbers
$zero > 0;
$one > 1;
$two > 2;
$three > 3;
$four > 4;
$five > 5;
$six > 6;
$seven > 7;
$eight > 8;
$nine > 9;
$lm >;
$ailm >;
$aulm >;
$danda>'.';
$doubleDanda>'.';
\ue070>; # ABBREVIATION SIGN
# LETTER RA WITH MIDDLE DIAGONAL
\ue071}$x>ra;
\ue071$virama>r;
\ue071>ra;
# LETTER RA WITH LOWER DIAGONAL
\ue072}$x>ra;
\ue072$virama>r;
\ue072>ra;
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE004>; # DEVANAGARI VOWEL SIGN SHORT A

View file

@ -1,141 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Malayalam
#:: NFD (NFC) ;
\ue001>\u0d02; # REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
\ue002>\u0d02; # SIGN ANUSVARA
\ue003>\u0d03; # SIGN VISARGA
\uE004>\u0d05; # FALLBACK TO LETTER A
\ue005>\u0d05; # LETTER A
\ue006>\u0d06; # LETTER AA
\ue007>\u0d07; # LETTER I
\ue008>\u0d08; # LETTER II
\ue009>\u0d09; # LETTER U
\ue00a>\u0d0a; # LETTER UU
\ue00b>\u0d0b; # LETTER VOCALIC R
\ue00c>\u0d0c; # LETTER VOCALIC L
\ue00d>\u0d0e; # FALLLBACK LETTER E
\ue00e>\u0d0e; # LETTER E
\ue00f>\u0d0f; # LETTER EE
\ue010>\u0d10; # LETTER AI
\ue011>\u0d12; # FALLBACK TO O
\ue012>\u0d12; # LETTER O
\ue013>\u0d13; # LETTER OO
\ue014>\u0d14; # LETTER AU
\ue015>\u0d15; # LETTER KA
\ue016>\u0d16; # LETTER KHA
\ue017>\u0d17; # LETTER GA
\ue018>\u0d18; # LETTER GHA
\ue019>\u0d19; # LETTER NGA
\ue01a>\u0d1a; # LETTER CA
\ue01b>\u0d1b; # LETTER CHA
\ue01c>\u0d1c; # LETTER JA
\ue01d>\u0d1d; # LETTER JHA
\ue01e>\u0d1e; # LETTER NYA
\ue01f>\u0d1f; # LETTER TTA
\ue020>\u0d20; # LETTER TTHA
\ue021>\u0d21; # LETTER DDA
\ue022>\u0d22; # LETTER DDHA
\ue023>\u0d23; # LETTER NNA
\ue024>\u0d24; # LETTER TA
\ue025>\u0d25; # LETTER THA
\ue026>\u0d26; # LETTER DA
\ue027>\u0d27; # LETTER DHA
\ue028>\u0d28; # LETTER NA
\ue029>\u0d28; # REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
\ue02a>\u0d2a; # LETTER PA
\ue02b>\u0d2b; # LETTER PHA
\ue02c>\u0d2c; # LETTER BA
\ue02d>\u0d2d; # LETTER BHA
\ue02e>\u0d2e; # LETTER MA
\ue02f>\u0d2f; # LETTER YA
\ue030\ue03c>\u0d31;
\ue030>\u0d30; # LETTER RA
\ue031>\u0d31; # LETTER RRA
\ue032>\u0d32; # LETTER LA
\ue033\ue03c>\u0d34;
\ue033>\u0d33; # LETTER LLA
\ue034>\u0d34; # LETTER LLLA
\ue035>\u0d35; # LETTER VA
\ue036>\u0d36; # LETTER SHA
\ue037>\u0d37; # LETTER SSA
\ue038>\u0d38; # LETTER SA
\ue039>\u0d39; # LETTER HA
\ue03c>; # FALLBACK BLOW AWAY NUKTA
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
\ue03e>\u0d3e; # VOWEL SIGN AA
\ue03f>\u0d3f; # VOWEL SIGN I
\ue040>\u0d40; # VOWEL SIGN II
\ue041>\u0d41; # VOWEL SIGN U
\ue042>\u0d42; # VOWEL SIGN UU
\ue043>\u0d43; # VOWEL SIGN VOCALIC R
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
\ue045>\u0d3e; # REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
\ue046>\u0d46; # VOWEL SIGN E
\ue047>\u0d47; # VOWEL SIGN EE
\ue048>\u0d48; # VOWEL SIGN AI
\ue049>\u0d4b; # REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
\ue04a>\u0d4a; # VOWEL SIGN O
\ue04b>\u0d4b; # VOWEL SIGN OO
\ue04c>\u0d4c; # VOWEL SIGN AU
\ue04d>\u0d4d; # SIGN VIRAMA
\ue050>\u0d13\u0d02; # UNMAPPED InterIndic-Malayalam: OM
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
\ue056>\u0d48; # REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0d57; # AU LENGTH MARK
\ue058>\u0d15; # FALLBACK
\ue059>\u0d16; # REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
\ue05a>\u0d17; # REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
\ue05b>\u0d1c; # REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
\ue05d>\u0d22; # REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
\ue05c>\u0d21; # FALLBACK
\ue05e>\u0d2b; # REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
\ue05f>\u0d2f; # REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
\ue060>\u0d60; # LETTER VOCALIC RR
\ue061>\u0d61; # LETTER VOCALIC LL
\ue062>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L
\ue063>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0d66; # DIGIT ZERO
\ue067>\u0d67; # DIGIT ONE
\ue068>\u0d68; # DIGIT TWO
\ue069>\u0d69; # DIGIT THREE
\ue06a>\u0d6a; # DIGIT FOUR
\ue06b>\u0d6b; # DIGIT FIVE
\ue06c>\u0d6c; # DIGIT SIX
\ue06d>\u0d6d; # DIGIT SEVEN
\ue06e>\u0d6e; # DIGIT EIGHT
\ue06f>\u0d6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0d30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0d30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0d35; # FALLBACK FOR ORIYA LETTER WA
0 > \u0d66; # FALLBACK FOR TAMIL
1 > \u0d67;
# :: NFC (NFD) ;
# eof

View file

@ -1,137 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Oriya
#:: NFD (NFC) ;
\ue001>\u0b01; # SIGN CANDRABINDU
\ue002>\u0b02; # SIGN ANUSVARA
\ue003>\u0b03; # SIGN VISARGA
\uE004>\u0b05; # FALLBACK TO LETTER A
\ue005>\u0b05; # LETTER A
\ue006>\u0b06; # LETTER AA
\ue007>\u0b07; # LETTER I
\ue008>\u0b08; # LETTER II
\ue009>\u0b09; # LETTER U
\ue00a>\u0b0a; # LETTER UU
\ue00b>\u0b0b; # LETTER VOCALIC R
\ue00c>\u0b0c; # LETTER VOCALIC L
\ue00d>\u0b0f; # FALLBACK
\ue00e>\u0b0f; # FALLBACK
\ue00f>\u0b0f; # LETTER E
\ue010>\u0b10; # LETTER AI
\ue011>\u0b13; # FALLBACK
\ue012>\u0b13; # FALLBACK
\ue013>\u0b13; # FALLBACK LETTER OO (\u0b13 = LETTER O)
\ue014>\u0b14; # LETTER AU
\ue015>\u0b15; # LETTER KA
\ue016>\u0b16; # LETTER KHA
\ue017>\u0b17; # LETTER GA
\ue018>\u0b18; # LETTER GHA
\ue019>\u0b19; # LETTER NGA
\ue01a>\u0b1a; # LETTER CA
\ue01b>\u0b1b; # LETTER CHA
\ue01c>\u0b1c; # LETTER JA
\ue01d>\u0b1d; # LETTER JHA
\ue01e>\u0b1e; # LETTER NYA
\ue01f>\u0b1f; # LETTER TTA
\ue020>\u0b20; # LETTER TTHA
\ue021>\u0b21; # LETTER DDA
\ue022>\u0b22; # LETTER DDHA
\ue023>\u0b23; # LETTER NNA
\ue024>\u0b24; # LETTER TA
\ue025>\u0b25; # LETTER THA
\ue026>\u0b26; # LETTER DA
\ue027>\u0b27; # LETTER DHA
\ue028>\u0b28; # LETTER NA
\ue029>\u0b28\u0b3c; # FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA
\ue02a>\u0b2a; # LETTER PA
\ue02b>\u0b2b; # LETTER PHA
\ue02c>\u0b2c; # LETTER BA
\ue02d>\u0b2d; # LETTER BHA
\ue02e>\u0b2e; # LETTER MA
\ue02f>\u0b2f; # LETTER YA
\ue030>\u0b30; # LETTER RA
\ue031>\u0b5c; # LETTER RRA
\ue032>\u0b32; # LETTER LA
\ue033>\u0b33; # LETTER LLA
\ue034>\u0b33\u0b3c; # FALLBACK LETTER LLLA>LETTER LLA
\ue035>\u0b35; # LETTER VA
\ue036>\u0b36; # LETTER SHA
\ue037>\u0b37; # LETTER SSA
\ue038>\u0b38; # LETTER SA
\ue039>\u0b39; # LETTER HA
\ue03c>\u0b3c; # SIGN NUKTA
\ue03d>\u0b3d; # SIGN AVAGRAHA
\ue03e>\u0b3e; # VOWEL SIGN AA
\ue03f>\u0b3f; # VOWEL SIGN I
\ue040>\u0b40; # VOWEL SIGN II
\ue041>\u0b41; # VOWEL SIGN U
\ue042>\u0b42; # VOWEL SIGN UU
\ue043>\u0b43; # VOWEL SIGN VOCALIC R
\ue044>\u0b43\u0b3c; # FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
\ue045>\u0b47; # FALLBACK
\ue046>\u0b47; # FALLBACK
\ue047>\u0b47; # VOWEL SIGN E
\ue048>\u0b48; # VOWEL SIGN AI
\ue049>\u0b4b; # FALLBACK
\ue04a>\u0b4b; # FALLBACK
\ue04b>\u0b4b; # VOWEL SIGN E
\ue04c>\u0b4c; # VOWEL SIGN AU
\ue04d>\u0b4d; # SIGN VIRAMA
\ue050>\u0b13\u0b01; # FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
\ue056>\u0b56; # AI LENGTH MARK
\ue057>\u0b57; # AU LENGTH MARK
\ue059>\u0b16\u0b3c; # FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
\ue058>\u0b15\u0b3c; # FALLBACK
\ue05a>\u0b17\u0b3c; # FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
\ue05b>\u0b1c\u0b3c; # FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
\ue05c>\u0b21\u0b3c; # FALLBACK
\ue05d>\u0b5d; # LETTER RHA
\ue05e>\u0b2b\u0b3c; # FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
\ue05f>\u0b5f; # LETTER YYA
\ue060>\u0b60; # LETTER VOCALIC RR
\ue061>\u0b61; # LETTER VOCALIC LL
\ue062>\u0b56\u0b3c; # FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
\ue063>\u0b57\u0b3c; # FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
\uE064>\u0964; # DANDA
\uE065>\u0965; # DOUBLE DANDA
\ue066>\u0b66; # DIGIT ZERO
\ue067>\u0b67; # DIGIT ONE
\ue068>\u0b68; # DIGIT TWO
\ue069>\u0b69; # DIGIT THREE
\ue06a>\u0b6a; # DIGIT FOUR
\ue06b>\u0b6b; # DIGIT FIVE
\ue06c>\u0b6c; # DIGIT SIX
\ue06d>\u0b6d; # DIGIT SEVEN
\ue06e>\u0b6e; # DIGIT EIGHT
\ue06f>\u0b6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0b30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0b30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>\u0B70; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0B71; # LETTER WA
0 > \u0b66; # FALLBACK FOR TAMIL
1 > \u0b67;
# :: NFC (NFD) ;
# eof

View file

@ -1,151 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Tamil
#:: NFD (NFC) ;
\ue001>\u0b82; # FALLBACK SIGN CANDRABINDU
\ue002>\u0b82; # SIGN ANUSVARA
\ue003>\u0b83; # SIGN VISARGA
\uE004>\u0b85; # FALLBACK TO LETTER A
\ue005>\u0b85; # LETTER A
\ue006>\u0b86; # LETTER AA
\ue007>\u0b87; # LETTER I
\ue008>\u0b88; # LETTER II
\ue009>\u0b89; # LETTER U
\ue00a>\u0b8a; # LETTER UU
\ue00b>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
\ue00c>\u0bb2; # FALLBACK LETTER LA
\ue00d>\u0b8f; # FALLBACK
\ue00e>\u0b8e; # LETTER E
\ue00f>\u0b8f; # LETTER EE
\ue010>\u0b90; # LETTER AI
\ue011>\u0b92; # FALLBACK
\ue012>\u0b92; # LETTER O
\ue013>\u0b93; # LETTER OO
\ue014>\u0b94; # LETTER AU
\ue015>\u0b95; # LETTER KA
\ue016>\u0b95; # REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
\ue017>\u0b95; # REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
\ue018>\u0b95; # REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
\ue019>\u0b99; # LETTER NGA
\ue01a>\u0b9a; # LETTER CA
\ue01b>\u0b9a; # REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
\ue01c>\u0b9c; # LETTER JA
\ue01d>\u0b9a; # REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
\ue01e>\u0b9e; # LETTER NYA
\ue01f>\u0b9f; # LETTER TTA
\ue020>\u0b9f; # REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
\ue021>\u0b9f; # REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
\ue022>\u0b9f; # REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
\ue023>\u0ba3; # LETTER NNA
\ue024>\u0ba4; # LETTER TA
\ue025>\u0ba4; # REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
\ue026>\u0ba4; # REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
\ue027>\u0ba4; # REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
\ue028\ue03c>\u0ba9;
\ue028>\u0ba8; # LETTER NA
\ue029>\u0ba9; # LETTER NNNA
\ue02a>\u0baa; # LETTER PA
\ue02b>\u0baa; # REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
\ue02c>\u0baa; # REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
\ue02d>\u0baa; # REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
\ue02e>\u0bae; # LETTER MA
\ue02f>\u0baf; # LETTER YA
\ue030\ue03c>\u0bb1;
\ue030>\u0bb0; # LETTER RA
\ue031>\u0bb1; # LETTER RRA
\ue032>\u0bb2; # LETTER LA
\ue033\ue03c>\u0bb4;
\ue033>\u0bb3; # LETTER LLA
\ue034>\u0bb4; # LETTER LLLA
\ue035>\u0bb5; # LETTER VA
\ue036>\u0bb7; # REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
\ue037>\u0bb7; # LETTER SSA
\ue038>\u0bb8; # LETTER SA
\ue039>\u0bb9; # LETTER HA
\ue03c>; # FALLBACK BLOW AWAY NUKTA
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
\ue03e>\u0bbe; # VOWEL SIGN AA
\ue03f>\u0bbf; # VOWEL SIGN I
\ue040>\u0bc0; # VOWEL SIGN II
\ue041>\u0bc1; # VOWEL SIGN U
\ue042>\u0bc2; # VOWEL SIGN UU
\ue043>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
\ue044>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
\ue045>\u0bbe; # REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
\ue046>\u0bc6; # VOWEL SIGN E
\ue047>\u0bc7; # VOWEL SIGN EE
\ue048>\u0bc8; # VOWEL SIGN AI
\ue049>\u0bbe; # REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
\ue04a>\u0bca; # VOWEL SIGN O
\ue04b>\u0bcb; # VOWEL SIGN OO
\ue04c>\u0bcc; # VOWEL SIGN AU
\ue04d>\u0bcd; # SIGN VIRAMA
\ue050>\u0b93\u0bae\u0bcd; # REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
\ue056>\u0bc8; # REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
\ue057>\u0bd7; # AU LENGTH MARK
\ue058>\u0b95; # FALLBACK
\ue059>\u0b95; # REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
\ue05a>\u0b95; # REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
\ue05b>\u0b9c; # REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
\ue05c>\u0ba4; # FALLBACK
\ue05d>\u0b9f; # REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
\ue05e>\u0baa; # REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
\ue05f>\u0baf; # REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
\ue060>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
\ue061>\u0bb3; # FALLBACK LETTER LLA
\ue062>\u0bbf; # FALLBACK VOWEL SIGN VOCALIC L
\ue063>\u0bc0; # FALLBACK VOWEL SIGN VOCALIC LL
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0030; # FALLBACK DIGIT ZERO
\ue067\ue066\ue066\ue066>\u0bF2;
\ue067\ue066\ue066>\u0bf1;
\ue067\ue066>\u0bF0;
\ue067>\u0be7; # DIGIT ONE
\ue068>\u0be8; # DIGIT TWO
\ue069>\u0be9; # DIGIT THREE
\ue06a>\u0bea; # DIGIT FOUR
\ue06b>\u0beb; # DIGIT FIVE
\ue06c>\u0bec; # DIGIT SIX
\ue06d>\u0bed; # DIGIT SEVEN
\ue06e>\u0bee; # DIGIT EIGHT
\ue06f>\u0bef; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0bc0; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0bc0; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0bb5; # FALLBACK FOR ORIYA LETTER WA
1000 >\u0BF2; # NUMBER ONE THOUSAND
100 >\u0BF1; # NUMBER ONE HUNDRED
10 >\u0BF0; # NUMBER TEN
# :: NFC (NFD) ;
# eof

View file

@ -1,141 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# InterIndic-Telugu
#:: NFD (NFC) ;
\ue001>\u0c01; # SIGN CANDRABINDU
\ue002>\u0c02; # SIGN ANUSVARA
\ue003>\u0c03; # SIGN VISARGA
\uE004>\u0c05; # FALLBACK TO LETTER A
\ue005>\u0c05; # LETTER A
\ue006>\u0c06; # LETTER AA
\ue007>\u0c07; # LETTER I
\ue008>\u0c08; # LETTER II
\ue009>\u0c09; # LETTER U
\ue00a>\u0c0a; # LETTER UU
\ue00b>\u0c0b; # LETTER VOCALIC R
\ue00c>\u0c0c; # LETTER VOCALIC L
\ue00d>\u0c0E; # FALLBACK MAPPING
\ue00e>\u0c0E; # LETTER E
\ue00f>\u0c0f; # LETTER EE
\ue010>\u0c10; # LETTER AI
\ue011>\u0c12; # FALBACK MAPPING
\ue012>\u0c12; # LETTER O
\ue013>\u0c13; # LETTER OO
\ue014>\u0c14; # LETTER AU
\ue015>\u0c15; # LETTER KA
\ue016>\u0c16; # LETTER KHA
\ue017>\u0c17; # LETTER GA
\ue018>\u0c18; # LETTER GHA
\ue019>\u0c19; # LETTER NGA
\ue01a>\u0c1a; # LETTER CA
\ue01b>\u0c1b; # LETTER CHA
\ue01c>\u0c1c; # LETTER JA
\ue01d>\u0c1d; # LETTER JHA
\ue01e>\u0c1e; # LETTER NYA
\ue01f>\u0c1f; # LETTER TTA
\ue020>\u0c20; # LETTER TTHA
\ue021>\u0c21; # LETTER DDA
\ue022>\u0c22; # LETTER DDHA
\ue023>\u0c23; # LETTER NNA
\ue024>\u0c24; # LETTER TA
\ue025>\u0c25; # LETTER THA
\ue026>\u0c26; # LETTER DA
\ue027>\u0c27; # LETTER DHA
\ue028>\u0c28; # LETTER NA
\ue029>\u0c28; # REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
\ue02a>\u0c2a; # LETTER PA
\ue02b>\u0c2b; # LETTER PHA
\ue02c>\u0c2c; # LETTER BA
\ue02d>\u0c2d; # LETTER BHA
\ue02e>\u0c2e; # LETTER MA
\ue02f>\u0c2f; # LETTER YA
\ue030\ue03c>\u0c31;
\ue030>\u0c30; # LETTER RA
\ue031>\u0c31; # LETTER RRA
\ue032>\u0c32; # LETTER LA
\ue033>\u0c33; # LETTER LLA
\ue034>\u0c33; # REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
\ue035>\u0c35; # LETTER VA
\ue036>\u0c36; # LETTER SHA
\ue037>\u0c37; # LETTER SSA
\ue038>\u0c38; # LETTER SA
\ue039>\u0c39; # LETTER HA
\ue03c>; # FALLBACK BLOW AWAY NUKTA
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
\ue03e>\u0c3e; # VOWEL SIGN AA
\ue03f>\u0c3f; # VOWEL SIGN I
\ue040>\u0c40; # VOWEL SIGN II
\ue041>\u0c41; # VOWEL SIGN U
\ue042>\u0c42; # VOWEL SIGN UU
\ue043>\u0c43; # VOWEL SIGN VOCALIC R
\ue044>\u0c44; # VOWEL SIGN VOCALIC RR
\ue045>\u0c46; # VOWEL SIGN CANDRA E>VOWEL SIGN E
\ue046>\u0c46; # VOWEL SIGN E
\ue047>\u0c47; # VOWEL SIGN EE
\ue048>\u0c48; # VOWEL SIGN AI
\ue049>\u0c4a; # REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
\ue04a>\u0c4a; # VOWEL SIGN O
\ue04b>\u0c4b; # VOWEL SIGN OO
\ue04c>\u0c4c; # VOWEL SIGN AU
\ue04d>\u0c4d; # SIGN VIRAMA
\ue050>\u0c13\u0c02; # REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
\ue051>;
\ue052>;
\ue053>;
\ue054>;
\ue055>\u0c55; # LENGTH MARK
\ue056>\u0c56; # AI LENGTH MARK
\ue057>\u0c4c; # REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
\ue058>\u0c15; # REMAP
\ue059>\u0c16; # REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
\ue05a>\u0c17; # REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
\ue05b>\u0c1c; # REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
\ue05c>\u0c22; # REMAP
\ue05d>\u0c22; # REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
\ue05e>\u0c2b; # REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
\ue05f>\u0c2f; # REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
\ue060>\u0c60; # LETTER VOCALIC RR
\ue061>\u0c61; # LETTER VOCALIC LL
\ue062>\u0c3f; # REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
\ue063>\u0c40; # REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
\ue064>'.' ; # FALLBACK FOR DANDA
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
\ue066>\u0c66; # DIGIT ZERO
\ue067>\u0c67; # DIGIT ONE
\ue068>\u0c68; # DIGIT TWO
\ue069>\u0c69; # DIGIT THREE
\ue06a>\u0c6a; # DIGIT FOUR
\ue06b>\u0c6b; # DIGIT FIVE
\ue06c>\u0c6c; # DIGIT SIX
\ue06d>\u0c6d; # DIGIT SEVEN
\ue06e>\u0c6e; # DIGIT EIGHT
\ue06f>\u0c6f; # DIGIT NINE
\ue070>; # ABBREVIATION SIGN
\ue071>\u0c30; # LETTER RA WITH MIDDLE DIAGONAL
\ue072>\u0c30; # LETTER RA WITH LOWER DIAGONAL
\ue073>; # RUPEE MARK
\ue074>; # RUPEE SIGN
\ue075>; # CURRENCY NUMERATOR ONE
\ue076>; # CURRENCY NUMERATOR TWO
\ue077>; # CURRENCY NUMERATOR THREE
\ue078>; # CURRENCY NUMERATOR FOUR
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
\ue07B>; # ISSHAR
\uE07C>; # TIPPI
\uE07D>; # ADDAK
\uE07E>; # IRI
\uE07F>; # URA
\uE080>; # EK ONKAR
\uE081>\u0c35; # FALLBACK FOR ORIYA LETTER WA
0 > \u0c66; # FALLBACK FOR TAMIL
1 > \u0c67;
# :: NFC (NFD) ;
# eof

View file

@ -1,92 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Kannada-InterIndic
\u0CC6\u0CD5>\uE047; # VOWEL SIGN EE
\u0CC6\u0CCD\u0CD6>\uE048\ue04d; # VOWEL SIGN AI
\u0CC6\u0CD6>\uE048; # VOWEL SIGN AI
\u0CC6\u0CC2\u0CD5>\uE04B; # VOWEL SIGN OO
\u0CC6\u0CC2>\uE04A; # VOWEL SIGN O
\u0CBF\u0CD5>\uE040; # VOWEL SIGN II
\u0C82>\uE002; # SIGN ANUSVARA
\u0C83>\uE003; # SIGN VISARGA
\u0C85>\uE005; # LETTER A
\u0C86>\uE006; # LETTER AA
\u0C87>\uE007; # LETTER I
\u0C88>\uE008; # LETTER II
\u0C89>\uE009; # LETTER U
\u0C8A>\uE00A; # LETTER UU
\u0C8B>\uE00B; # LETTER VOCALIC R
\u0C8C>\uE00C; # LETTER VOCALIC L
\u0C8E>\uE00E; # LETTER E
\u0C8F>\uE00F; # LETTER EE
\u0C90>\uE010; # LETTER AI
\u0C92>\uE012; # LETTER O
\u0C93>\uE013; # LETTER OO
\u0C94>\uE014; # LETTER AU
\u0C95>\uE015; # LETTER KA
\u0C96>\uE016; # LETTER KHA
\u0C97>\uE017; # LETTER GA
\u0C98>\uE018; # LETTER GHA
\u0C99>\uE019; # LETTER NGA
\u0C9A>\uE01A; # LETTER CA
\u0C9B>\uE01B; # LETTER CHA
\u0C9C>\uE01C; # LETTER JA
\u0C9D>\uE01D; # LETTER JHA
\u0C9E>\uE01E; # LETTER NYA
\u0C9F>\uE01F; # LETTER TTA
\u0CA0>\uE020; # LETTER TTHA
\u0CA1>\uE021; # LETTER DDA
\u0CA2>\uE022; # LETTER DDHA
\u0CA3>\uE023; # LETTER NNA
\u0CA4>\uE024; # LETTER TA
\u0CA5>\uE025; # LETTER THA
\u0CA6>\uE026; # LETTER DA
\u0CA7>\uE027; # LETTER DHA
\u0CA8>\uE028; # LETTER NA
\u0CAA>\uE02A; # LETTER PA
\u0CAB>\uE02B; # LETTER PHA
\u0CAC>\uE02C; # LETTER BA
\u0CAD>\uE02D; # LETTER BHA
\u0CAE>\uE02E; # LETTER MA
\u0CAF>\uE02F; # LETTER YA
\u0CB0>\uE030; # LETTER RA
\u0CB1>\uE031; # LETTER RRA
\u0CB2>\uE032; # LETTER LA
\u0CB3>\uE033; # LETTER LLA
\u0CB5>\uE035; # LETTER VA
\u0CB6>\uE036; # LETTER SHA
\u0CB7>\uE037; # LETTER SSA
\u0CB8>\uE038; # LETTER SA
\u0CB9>\uE039; # LETTER HA
\u0CBC>\uE03C; # SIGN NUKTA
\u0CBD>\uE03D; # AVAGRAHA
\u0CBE>\uE03E; # VOWEL SIGN AA
\u0CBF>\uE03F; # VOWEL SIGN I
\u0CC1>\uE041; # VOWEL SIGN U
\u0CC2>\uE042; # VOWEL SIGN UU
\u0CC3>\uE043; # VOWEL SIGN VOCALIC R
\u0CC4>\uE044; # VOWEL SIGN VOCALIC RR
\u0CC6>\uE046; # VOWEL SIGN E
\u0CCC>\uE04C; # VOWEL SIGN AU
\u0CCD>\uE04D; # SIGN VIRAMA
\u0CD5>\uE055; # LENGTH MARK
\u0CD6>\uE056; # AI LENGTH MARK
\u0CDE>\uE034; # LETTER LLLA
\u0CE0>\uE060; # LETTER VOCALIC RR
\u0CE1>\uE061; # LETTER VOCALIC LL
\u0CE6>\uE066; # DIGIT ZERO
\u0CE7>\uE067; # DIGIT ONE
\u0CE8>\uE068; # DIGIT TWO
\u0CE9>\uE069; # DIGIT THREE
\u0CEA>\uE06A; # DIGIT FOUR
\u0CEB>\uE06B; # DIGIT FIVE
\u0CEC>\uE06C; # DIGIT SIX
\u0CED>\uE06D; # DIGIT SEVEN
\u0CEE>\uE06E; # DIGIT EIGHT
\u0CEF>\uE06F; # DIGIT NINE
# eof

View file

@ -1,383 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Latin-InterIndic
#:: NFD;
#\u0e00 reserved
#consonants
$chandrabindu=\ue001;
$anusvara=\ue002;
$visarga=\ue003;
#\u0e004 reserved
# w<vowel> represents the stand-alone form
$wa=\ue005;
$waa=\ue006;
$wi=\ue007;
$wii=\ue008;
$wu=\ue009;
$wuu=\ue00a;
$wr=\ue00b;
$wl=\ue00c;
$wce=\ue00d; # LETTER CANDRA E
$wse=\ue00e; # LETTER SHORT E
$we=\ue00f; # \u090f LETTER E
$wai=\ue010;
$wco=\ue011; # LETTER CANDRA O
$wso=\ue012; # LETTER SHORT O
$wo=\ue013; # \u0913 LETTER O
$wau=\ue014;
$ka=\ue015;
$kha=\ue016;
$ga=\ue017;
$gha=\ue018;
$nga=\ue019;
$ca=\ue01a;
$cha=\ue01b;
$ja=\ue01c;
$jha=\ue01d;
$nya=\ue01e;
$tta=\ue01f;
$ttha=\ue020;
$dda=\ue021;
$ddha=\ue022;
$nna=\ue023;
$ta=\ue024;
$tha=\ue025;
$da=\ue026;
$dha=\ue027;
$na=\ue028;
$ena=\ue029; #compatibility
$pa=\ue02a;
$pha=\ue02b;
$ba=\ue02c;
$bha=\ue02d;
$ma=\ue02e;
$ya=\ue02f;
$ra=\ue030;
$rra=\ue031;
$la=\ue032;
$lla=\ue033;
$ela=\ue034; #compatibility
$va=\ue035;
$vva=\ue081;
$sha=\ue036;
$ssa=\ue037;
$sa=\ue038;
$ha=\ue039;
#\u093a Reserved
#\u093b Reserved
$nukta=\ue03c;
$avagraha=\ue03d; # SIGN AVAGRAHA
# <vowel> represents the dependent form
$aa=\ue03e;
$i=\ue03f;
$ii=\ue040;
$u=\ue041;
$uu=\ue042;
$rh=\ue043;
$lh=\ue044;
$ce=\ue045; #VOWEL SIGN CANDRA E
$se=\ue046; #VOWEL SIGN SHORT E
$e=\ue047;
$ai=\ue048;
$co=\ue049; # VOWEL SIGN CANDRA O
$so=\ue04a; # VOWEL SIGN SHORT O
$o=\ue04b; # \u094b
$au=\ue04c;
$virama=\ue04d;
# \u094e Reserved
# \u094f Reserved
$om = \ue050; # OM
# \u0951>; # UNMAPPED STRESS SIGN UDATTA
# \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
# \u0953>; # UNMAPPED GRAVE ACCENT
# \u0954>; # UNMAPPED ACUTE ACCENT
$lm = \ue055;# Telugu Length Mark
$ailm=\ue056;# AI Length Mark
$aulm=\ue057;# AU Length Mark
#urdu compatibity forms
$uka=\ue058;
$ukha=\ue059;
$ugha=\ue05a;
$ujha=\ue05b;
$uddha=\ue05c;
$udha=\ue05d;
$ufa=\ue05e;
$uya=\ue05f;
$wrr=\ue060;
$wll=\ue061;
$rrh=\ue062;
$llh=\ue063;
$danda=\ue064;
$doubleDanda=\ue065;
$zero=\ue066; # DIGIT ZERO
$one=\ue067; # DIGIT ONE
$two=\ue068; # DIGIT TWO
$three=\ue069; # DIGIT THREE
$four=\ue06a; # DIGIT FOUR
$five=\ue06b; # DIGIT FIVE
$six=\ue06c; # DIGIT SIX
$seven=\ue06d; # DIGIT SEVEN
$eight=\ue06e; # DIGIT EIGHT
$nine=\ue06f; # DIGIT NINE
# For all other scripts
$ecp0=\ue070;
$ecp1=\ue071;
$ecp2=\ue072;
$ecp3=\ue073;
$ecp4=\ue074;
$ecp5=\ue075;
$ecp6=\ue076;
$ecp7=\ue077;
$ecp8=\ue078;
$ecp9=\ue079;
$ecpA=\ue07a;
$ecpB=\ue07b;
$ecpC=\ue07c;
$ecpD=\ue07d;
$ecpE=\ue07e;
$ecpF=\ue07f;
# \u0970>; # UNMAPPED ABBREVIATION SIGN
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
$depVowelBelow=[\ue041-\ue044];
$endThing=[$danda$doubleDanda];
# $x was originally called '&'; $z was '%'
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
\u0315 > $avagraha;
\u0303>$chandrabindu$anusvara;
m\u0310>$chandrabindu;
h\u0323>$visarga;
x>$ka$virama$sa;
# convert to independent forms at start of word or syllable:
# dependent forms for roundtrip
\u0314a\u0304>$aa;
\u0314ai>$ai;
\u0314au>$au;
\u0314ii>$ii;
\u0314i\u0304>$ii;
\u0314i>$i;
\u0314u\u0304>$uu;
\u0314u>$u;
\u0314r\u0325\u0304>$rrh;
\u0314r\u0325>$rh;
\u0314l\u0325\u0304>$llh;
\u0314lh>$lh;
\u0314l\u0325>$lh;
\u0314e\u0304>$e;
\u0314o\u0304>$o;
\u0314a>;
\u0314e\u0306>$ce;
\u0314o\u0306>$co;
\u0314e>$se;
\u0314o>$so;
# preceeded by consonants
$consonants{ a\u0304>$aa;
$consonants{ ai>$ai;
$consonants{ au>$au;
$consonants{ ii>$ii;
$consonants{ i\u0304>$ii;
$consonants{ i>$i;
$consonants{ u\u0304>$uu;
$consonants{ u>$u;
$consonants{ r\u0325\u0304>$rrh;
$consonants{ r\u0325a>$rh;
$consonants{ r\u0325>$rh;
$consonants{ l\u0325\u0304>$llh;
$consonants{ lh>$lh;
$consonants{ l\u0325>$lh;
$consonants{ e\u0304>$e;
$consonants{ o\u0304>$o;
$consonants{ e\u0306>$ce;
$consonants{ o\u0306>$co;
$consonants{ e>$se;
$consonants{ o>$so;
# e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
a\u0304>$waa;
ai>$wai;
au>$wau;
i\u0304>$wii;
i>$wi;
u\u0304>$wuu;
u>$wu;
r\u0325\u0304>$wrr;
r\u0325>$wr;
l\u0325\u0304>$wll;
lh>$wl;
l\u0325>$wl;
e\u0304>$we;
o\u0304>$wo;
a>$wa;
e\u0306>$wce;
o\u0306>$wco;
e>$wse;
''om>$om;
o>$wso;
# rules for anusvara
n}r\u0325 > $na|$virama;
n}l\u0325 > $na|$virama;
n}na > $na|$virama;
n\u0307}[kg] > $anusvara;
n\u0307}n\u0307 > $anusvara;
n\u0304}[cj] > $anusvara;
n\u0304}n\u0303 > $anusvara;
n\u0323}[tdn]\u0323 > $anusvara;
n}[tdn] > $anusvara;
m}[pbm] > $anusvara;
n}[ylvshr] > $anusvara;
m\u0307 > $anusvara;
#urdu compatibility
q>$uka|$virama;
k\u0331h\u0331>$ukha |$virama;
g\u0307> $ugha | $virama;
z > $ujha |$virama;
f > $ufa|$virama;
# dev
y\u0307>$uya|$virama;
l\u0331>$ela|$virama;
n\u0331>$ena|$virama;
n\u0307>$nga|$virama;
n\u0303>$nya|$virama;
n\u0323>$nna|$virama;
t\u0323h>$ttha|$virama;
t\u0323>$tta|$virama;
r\u0323h>$udha|$virama;
r\u0323>$uddha|$virama;
d\u0323h>$ddha|$virama;
d\u0323>$dda|$virama;
kh>$kha|$virama;
k>$ka|$virama;
gh>$gha|$virama;
g>$ga|$virama;
ch>$cha|$virama;
c>$ca|$virama;
jh>$jha|$virama;
j>$ja|$virama;
ny>$nya|$virama;
tth>$ttha|$virama;
ddh>$ddha|$virama;
th>$tha|$virama;
t>$ta|$virama;
dh>$dha|$virama;
d>$da|$virama;
n>$na|$virama;
ph>$pha|$virama;
p>$pa|$virama;
bh>$bha|$virama;
b>$ba|$virama;
m>$ma|$virama;
y>$ya|$virama;
r\u0331>$rra|$virama;
r>$ra|$virama;
l\u0323>$lla|$virama;
l>$la|$virama;
v>$va|$virama;
w\u0307>$vva|$virama;
w>$va|$virama;
sh>$sha|$virama;
ss>$ssa|$virama;
s\u0323>$ssa|$virama;
s\u0301>$sha|$virama;
s>$sa|$virama;
h>$ha|$virama;
'.'>$danda;
$danda'.'>$doubleDanda;
$depVowelAbove{'~'>$anusvara;
$depVowelBelow{'~'>$chandrabindu;
# convert to dependent forms after consonant with no vowel:
# e.g. kai -> {ka}{virama}ai -> {ka}{ai}
#$virama aa>$aa;
$virama a\u0304>$aa;
$virama ai>$ai;
$virama au>$au;
$virama ii>$ii;
$virama i\u0304>$ii;
$virama i>$i;
#$virama uu>$uu;
$virama u\u0304>$uu;
$virama u>$u;
#$virama rrh>$rrh;
$virama r\u0325\u0304>$rrh;
#$virama rh>$rh;
$virama r\u0325a>$rh;
$virama r\u0325>$rh;
$virama l\u0325\u0304>$llh;
$virama lh>$lh;
$virama l\u0325>$lh;
$virama e\u0304>$e;
$virama o\u0304>$o;
$virama a>;
$virama e\u0306>$ce;
$virama o\u0306>$co;
$virama e>$se;
$virama o>$so;
# otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
#$virama''aa>$waa;
$virama''a\u0304>$waa;
$virama''ai>$wai;
$virama''au>$wau;
#$virama''ii>$wii;
$virama''i\u0304>$wii;
$virama''i>$wi;
#$virama''uu>$wuu;
$virama''u\u0304>$wuu;
$virama''u>$wu;
#$virama''rrh>$wrr;
$virama''r\u0325\u0304>$wrr;
#$virama''rh>$wr;
$virama''r\u0325>$wr;
$virama''l\u0325\u0304>$wll;
#$virama''lh>$wl;
$virama''l\u0325>$wl;
$virama''e\u0304>$we;
$virama''o\u0304>$wo;
$virama''a>$wa;
$virama''e\u0306>$wce;
$virama''o\u0306>$wco;
$virama''e>$wse;
$virama''o>$wso;
# no virama
''a\u0304>$waa;
''ai>$wai;
''au>$wau;
''i\u0304>$wii;
''i>$wi;
''u\u0304>$wuu;
''u>$wu;
''r\u0325\u0304>$wrr;
''r\u0325>$wr;
''l\u0325\u0304>$wll;
''l\u0325>$wl;
''e\u0304>$we;
''o\u0304>$wo;
''a>$wa;
''e\u0306>$wce;
''o\u0306>$wco;
''e>$wse;
''o>$wso;
$virama } [$z] > $virama;
$virama } ' ' > $virama ;
$virama}$endThing>;
0>$zero;
1>$one;
2>$two;
3>$three;
4>$four;
5>$five;
6>$six;
7>$seven;
8>$eight;
9>$nine;
''>;
#:: NFC (NFD) ;

View file

@ -1,522 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
#- the INDEX file. This transliterator is, by itself, not
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
#- inverses thereof.
# Transliteration from Latin characters to Korean script is done in
# two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
# transliteration is done algorithmically following Unicode 3.0
# section 3.11. This file implements the Latin to Jamo
# transliteration using rules.
# Jamo occupy the block 1100-11FF. Within this block there are three
# groups of characters: initial consonants or choseong (I), medial
# vowels or jungseong (M), and trailing consonants or jongseong (F).
# Standard Korean syllables are of the form I+M+F*.
# Section 3.11 describes the use of 'filler' jamo to convert
# nonstandard syllables to standard form: the choseong filler 115F and
# the junseong filler 1160. In this transliterator, we will not use
# 115F or 1160.
# We will, however, insert two 'null' jamo to make foreign words
# conform to Korean syllable structure. These are the null initial
# consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
# we will use the separator in order to disambiguate strings,
# e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
# We will not use all of the characters in the jamo block. We will
# only use the 19 initials, 21 medials, and 27 finals possessing a
# jamo short name as defined in section 4.4 of the Unicode book.
# Rules of thumb. These guidelines provide the basic framework
# for the rules. They are phrased in terms of Latin-Jamo transliteration.
# The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
# just context-free transliteration of jamo to corresponding short names,
# with the addition of separators to maintain round-trip integrity
# in the context of the Latin-Jamo rules.
# A sequence of vowels:
# - Take the longest sequence you can. If there are too many, or you don't
# have a starting consonant, introduce a 110B necessary.
# A sequence of consonants.
# - First join the double consonants: G + G -> GG
# - In the remaining list,
# -- If there is no preceding vowel, take the first consonant, and insert EU
# after it. Continue with the rest of the consonants.
# -- If there is one consonant, attach to the following vowel
# -- If there are two consonants and a following vowel, attach one to the
# preceeding vowel, and one to the following vowel.
# -- If there are more than two consonants, join the first two together if you
# can: L + G => LG
# -- If you still end up with more than 2 consonants, insert EU after the
# first one, and continue with the rest of the consonants.
#----------------------------------------------------------------------
# Variables
# Some latin consonants or consonant pairs only occur as initials, and
# some only as finals, but some occur as both. This makes some jamo
# consonants ambiguous when transliterated into latin.
# Initial only: IEUNG BB DD JJ R
# Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
# Initial and Final: B C D G GG H J K M N P S SS T
$Gi = \u1100;
$GGi = \u1101;
$Ni = \u1102;
$Di = \u1103;
$DD = \u1104;
$R = \u1105;
$Mi = \u1106;
$Bi = \u1107;
$BB = \u1108;
$Si = \u1109;
$SSi = \u110A;
$IEUNG = \u110B; # null initial, inserted during Latin-Jamo
$Ji = \u110C;
$JJ = \u110D;
$Ci = \u110E;
$Ki = \u110F;
$Ti = \u1110;
$Pi = \u1111;
$Hi = \u1112;
$A = \u1161;
$AE = \u1162;
$YA = \u1163;
$YAE = \u1164;
$EO = \u1165;
$E = \u1166;
$YEO = \u1167;
$YE = \u1168;
$O = \u1169;
$WA = \u116A;
$WAE = \u116B;
$OE = \u116C;
$YO = \u116D;
$U = \u116E;
$WEO = \u116F;
$WE = \u1170;
$WI = \u1171;
$YU = \u1172;
$EU = \u1173; # null medial, inserted during Latin-Jamo
$YI = \u1174;
$I = \u1175;
$Gf = \u11A8;
$GGf = \u11A9;
$GS = \u11AA;
$Nf = \u11AB;
$NJ = \u11AC;
$NH = \u11AD;
$Df = \u11AE;
$L = \u11AF;
$LG = \u11B0;
$LM = \u11B1;
$LB = \u11B2;
$LS = \u11B3;
$LT = \u11B4;
$LP = \u11B5;
$LH = \u11B6;
$Mf = \u11B7;
$Bf = \u11B8;
$BS = \u11B9;
$Sf = \u11BA;
$SSf = \u11BB;
$NG = \u11BC;
$Jf = \u11BD;
$Cf = \u11BE;
$Kf = \u11BF;
$Tf = \u11C0;
$Pf = \u11C1;
$Hf = \u11C2;
$jamoInitial = [\u1100-\u1112];
$jamoMedial = [\u1161-\u1175];
$latinInitial = [bcdghjkmnprst];
# Any character in the latin transliteration of a medial
$latinMedial = [aeiouwy];
# The last character of the latin transliteration of a medial
$latinMedialEnd = [aeiou];
# Disambiguation separator
$sep = \';
#----------------------------------------------------------------------
# Jamo-Latin
# Jamo to latin is relatively simple, since it is the latin that is
# ambiguous. Most rules are straightforward, and we encode them below
# as simple add-on back rule, e.g.:
# $jamoMedial {bs} > $BS;
# becomes
# $jamoMedial {bs} <> $BS;
# Furthermore, we don't care about the ordering for Jamo-Latin because
# we are going from single characters, so we can very easily piggyback
# on the Latin-Jamo.
# The main issue with Jamo-Latin is when to insert separators.
# Separators are inserted to obtain correct round trip behavior. For
# example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
# would then round trip to Ki A GGi E. To prevent this, we insert a
# separator: "kag-ge". IMPORTANT: The need for separators depends
# very specifically on the behavior of the Latin-Jamo rules. A change
# in the Latin-Jamo behavior can completely change the way the
# separator insertion must be done.
# First try to preserve actual separators in the jamo text by doubling
# them. This fixes problems like:
# (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
# => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
# -- if we don't care about losing separators in the jamo, we can delete
# this rule.
$sep $sep <> $sep;
# Triple consonants. For three consonants "axxx" we insert a
# separator between the first and second "x" if XXf, Xf, and Xi all
# exist, and we have A Xf XXi. This prevents the reverse
# transliteration to A XXf Xi.
$sep < $latinMedialEnd g {} $GGi;
$sep < $latinMedialEnd s {} $SSi;
# For vowels the rule is similar. If there is a vowel "ae" such that
# "a" by itself and "e" by itself are vowels, then we want to map A E
# to "a-e" so as not to round trip to AE. However, in the text Ki EO
# IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
# vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
# tested. NOTE: These rules used to have a left context of
# $latinInitial instead of [^$latinMedial]. The problem with this is
# sequences where an initial IEUNG is transliterated away:
# (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
$sep < [^$latinMedial] [y w] e {} [$O $OE];
$sep < [^$latinMedial] e {} [$O $OE $U];
$sep < [^$latinMedial] [o a] {} [$E $EO $EU];
$sep < [^$latinMedial] [w y] a {} [$E $EO $EU];
# Similar to the above, but with an intervening $IEUNG.
$sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];
$sep < [^$latinMedial] e {} $IEUNG [$O $OE $U];
$sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];
$sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];
# Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
# where Xi also exists, must be transliterated as "ax-e" to prevent
# the round trip conversion to A Xi E.
$sep < $latinMedialEnd b {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd c {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd d {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd h {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd j {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd k {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd m {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd p {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd t {} $IEUNG $jamoMedial;
# Double finals followed by IEUNG. Similar to the single finals
# followed by IEUNG. Any latin consonant pair X Y, between medials,
# that we would split by Latin-Jamo, we must handle when it occurs as
# part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
# E.
$sep < $latinMedialEnd b s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd g g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd g s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l b {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l h {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l m {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l p {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l s {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd l t {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n g {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n h {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd n j {} $IEUNG $jamoMedial;
$sep < $latinMedialEnd s s {} $IEUNG $jamoMedial;
# Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
# we transliterate as "ax-xe" to prevent round trip transliteration as
# A XXi E.
$sep < $latinMedialEnd b {} $Bi $jamoMedial;
$sep < $latinMedialEnd d {} $Di $jamoMedial;
$sep < $latinMedialEnd j {} $Ji $jamoMedial;
$sep < $latinMedialEnd g {} $Gi $jamoMedial;
$sep < $latinMedialEnd s {} $Si $jamoMedial;
# XYY. This corresponds to the XYY rule in Latin-Jamo. By default
# Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
# "xyy" forms that correspond to XYf Yi must be transliterated as
# "xy-y".
$sep < $latinMedialEnd b s {} [$Si $SSi];
$sep < $latinMedialEnd g s {} [$Si $SSi];
$sep < $latinMedialEnd l b {} [$Bi $BB];
$sep < $latinMedialEnd l g {} [$Gi $GGi];
$sep < $latinMedialEnd l s {} [$Si $SSi];
$sep < $latinMedialEnd n g {} [$Gi $GGi];
$sep < $latinMedialEnd n j {} [$Ji $JJ];
# Deletion of IEUNG is handled below.
#----------------------------------------------------------------------
# Latin-Jamo
# [Basic, context-free Jamo-Latin rules are embedded here too. See
# above.]
# Split digraphs: Text of the form 'axye', where 'xy' is a final
# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
# 'e' are medials, we want to transliterate this as A Xf Yi E rather
# than A XYf IEUNG E. We do NOT include text of the form "axxe",
# since that is handled differently below. These rules are generated
# programmatically from the jamo data.
$jamoMedial {b s} $latinMedial > $Bf $Si;
$jamoMedial {g s} $latinMedial > $Gf $Si;
$jamoMedial {l b} $latinMedial > $L $Bi;
$jamoMedial {l g} $latinMedial > $L $Gi;
$jamoMedial {l h} $latinMedial > $L $Hi;
$jamoMedial {l m} $latinMedial > $L $Mi;
$jamoMedial {l p} $latinMedial > $L $Pi;
$jamoMedial {l s} $latinMedial > $L $Si;
$jamoMedial {l t} $latinMedial > $L $Ti;
$jamoMedial {n g} $latinMedial > $Nf $Gi;
$jamoMedial {n h} $latinMedial > $Nf $Hi;
$jamoMedial {n j} $latinMedial > $Nf $Ji;
# Single consonants are initials: Text of the form 'axe', where 'x'
# can be an initial or a final, and 'a' and 'e' are medials, we want
# to transliterate as A Xi E rather than A Xf IEUNG E.
$jamoMedial {b} $latinMedial > $Bi;
$jamoMedial {c} $latinMedial > $Ci;
$jamoMedial {d} $latinMedial > $Di;
$jamoMedial {g} $latinMedial > $Gi;
$jamoMedial {h} $latinMedial > $Hi;
$jamoMedial {j} $latinMedial > $Ji;
$jamoMedial {k} $latinMedial > $Ki;
$jamoMedial {m} $latinMedial > $Mi;
$jamoMedial {n} $latinMedial > $Ni;
$jamoMedial {p} $latinMedial > $Pi;
$jamoMedial {s} $latinMedial > $Si;
$jamoMedial {t} $latinMedial > $Ti;
# Doubled initials. The sequence "axxe", where XX exists as an initial
# (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
# to transliterate as A XXi E, rather than split to A Xf Xi E.
$jamoMedial {b b} $latinMedial > $BB;
$jamoMedial {d d} $latinMedial > $DD;
$jamoMedial {j j} $latinMedial > $JJ;
$jamoMedial {g g} $latinMedial > $GGi;
$jamoMedial {s s} $latinMedial > $SSi;
# XYY. Because doubled consonants bind more strongly than XY
# consonants, we must handle the sequence "axyy" specially. Here XYf
# and YYi must exist. In these cases, we map to Xf YYi rather than
# XYf.
$jamoMedial {b} s s > $Bf;
$jamoMedial {g} s s > $Gf;
$jamoMedial {l} b b > $L;
$jamoMedial {l} g g > $L;
$jamoMedial {l} s s > $L;
$jamoMedial {n} g g > $Nf;
$jamoMedial {n} j j > $Nf;
# Finals: Attach consonant with preceding medial to preceding medial.
# Do this BEFORE mapping consonants to initials. Longer keys must
# precede shorter keys that they start with, e.g., the rule for 'bs'
# must precede 'b'.
# [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
# block for Jamo-Latin.]
$jamoMedial {bs} <> $BS;
$jamoMedial {b} <> $Bf;
$jamoMedial {c} <> $Cf;
$jamoMedial {d} <> $Df;
$jamoMedial {gg} <> $GGf;
$jamoMedial {gs} <> $GS;
$jamoMedial {g} <> $Gf;
$jamoMedial {h} <> $Hf;
$jamoMedial {j} <> $Jf;
$jamoMedial {k} <> $Kf;
$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;
$jamoMedial {lh} <> $LH;
$jamoMedial {lm} <> $LM;
$jamoMedial {lp} <> $LP;
$jamoMedial {ls} <> $LS;
$jamoMedial {lt} <> $LT;
$jamoMedial {l} <> $L;
$jamoMedial {m} <> $Mf;
$jamoMedial {ng} <> $NG;
$jamoMedial {nh} <> $NH;
$jamoMedial {nj} <> $NJ;
$jamoMedial {n} <> $Nf;
$jamoMedial {p} <> $Pf;
$jamoMedial {ss} <> $SSf;
$jamoMedial {s} <> $Sf;
$jamoMedial {t} <> $Tf;
# Initials: Attach single consonant to following medial. Do this
# AFTER mapping finals. Longer keys must precede shorter keys that
# they start with, e.g., the rule for 'gg' must precede 'g'.
# [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
# this block for Jamo-Latin.]
{gg} $latinMedial <> $GGi;
{g} $latinMedial <> $Gi;
{n} $latinMedial <> $Ni;
{dd} $latinMedial <> $DD;
{d} $latinMedial <> $Di;
{r} $latinMedial <> $R;
{m} $latinMedial <> $Mi;
{bb} $latinMedial <> $BB;
{b} $latinMedial <> $Bi;
{ss} $latinMedial <> $SSi;
{s} $latinMedial <> $Si;
{jj} $latinMedial <> $JJ;
{j} $latinMedial <> $Ji;
{c} $latinMedial <> $Ci;
{k} $latinMedial <> $Ki;
{t} $latinMedial <> $Ti;
{p} $latinMedial <> $Pi;
{h} $latinMedial <> $Hi;
# 'r' in final position. Because of the equivalency of the 'l' and
# 'r' jamo (the glyphs are the same), we try to provide the same
# equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
# below. If we see an 'r' in an apparent final position, treat it
# like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
# Instead, we want Ki A L Ki A.
$jamoMedial {r} $latinInitial > | l;
# Initial + Final: If we match the next rule, we have initial then
# final consonant with no intervening medial. We insert the null
# vowel BEFORE it to create a well-formed syllable. (In the next rule
# we insert a null vowel AFTER an anomalous initial.)
$jamoInitial {} [bcdghjklmnpst] > $EU;
# Initial + X: This block matches an initial consonant not followed by
# a medial. We insert the null vowel after it. We handle double
# initials explicitly here; for single initial consonants we insert EU
# (as Latin) after them and let standard rules do the rest.
# BREAKS ROUND TRIP INTEGRITY
gg > $GGi $EU;
dd > $DD $EU;
bb > $BB $EU;
ss > $SSi $EU;
jj > $JJ $EU;
([bcdghjkmnprst]) > | $1 eu;
# X + Final: Finally we have to deal with a consonant that can only be
# interpreted as a final (not an initial) and which is preceded
# neither by an initial nor a medial. It is the start of the
# syllable, but cannot be. Most of these will already be handled by
# the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
# 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
# For this isolated case, we could add a null initial and medial,
# which would give "la" => IEUNG EU L IEUNG A, for example. A more
# economical solution is to transliterate isolated "l" (that is,
# initial "l") to "r". (Other similar conversions of consonants that
# occur neither as initials nor as finals are handled below.)
l > | r;
# Medials. If a medial is preceded by an initial, then we proceed
# normally. As usual, longer keys must precede shorter ones.
# [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
# this block for Jamo-Latin.]
$jamoInitial {ae} <> $AE;
$jamoInitial {a} <> $A;
$jamoInitial {eo} <> $EO;
$jamoInitial {eu} <> $EU;
$jamoInitial {e} <> $E;
$jamoInitial {i} <> $I;
$jamoInitial {oe} <> $OE;
$jamoInitial {o} <> $O;
$jamoInitial {u} <> $U;
$jamoInitial {wae} <> $WAE;
$jamoInitial {wa} <> $WA;
$jamoInitial {weo} <> $WEO;
$jamoInitial {we} <> $WE;
$jamoInitial {wi} <> $WI;
$jamoInitial {yae} <> $YAE;
$jamoInitial {ya} <> $YA;
$jamoInitial {yeo} <> $YEO;
$jamoInitial {ye} <> $YE;
$jamoInitial {yi} <> $YI;
$jamoInitial {yo} <> $YO;
$jamoInitial {yu} <> $YU;
# We may see an anomalous isolated 'w' or 'y'. In that case, we
# interpret it as 'wi' and 'yu', respectively.
# BREAKS ROUND TRIP INTEGRITY
$jamoInitial {w} > | wi;
$jamoInitial {y} > | yu;
# Otherwise, insert a null consonant IEUNG before the medial (which is
# still an untransliterated latin vowel).
($latinMedial) > $IEUNG | $1;
# Convert non-jamo latin consonants to equivalents. These occur as
# neither initials nor finals in jamo. 'l' occurs as a final, but not
# an initial; it is handled above. The following letters (left hand
# side) will never be output by Jamo-Latin.
f > | p;
q > | k;
v > | b;
x > | ks;
z > | s;
# Delete separators (Latin-Jamo).
$sep > ;
# Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
# since these may also occur in text.
< $IEUNG;
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
#- the INDEX file. This transliterator is, by itself, not
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
#- inverses thereof.
# eof

View file

@ -1,495 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# note: a global filter is more efficient, but MUST include all source chars
#:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ;
# MINIMAL FILTER GENERATED FOR: Latin-Katakana
### WARNING -- must add width filter, both here and below!!! ###
:: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;
:: [:Latin:] fullwidth-halfwidth ();
:: NFD (NFC);
:: Lower (); # whenever transliterating from cased to uncased script, include this
# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
# Uses modified Hepburn. Small changes to make unambiguous.
# | Kunrei-shiki: Hepburn/MHepburn
# | ------------------------------
# | si: shi
# | si ~ya: sha
# | si ~yu: shu
# | si ~yo: sho
# | zi: ji
# | zi ~ya: ja
# | zi ~yu: ju
# | zi ~yo: jo
# | ti: chi
# | ti ~ya: cha
# | ti ~yu: chu
# | ti ~yu: cho
# | tu: tsu
# | di: ji/dji
# | du: zu/dzu
# | hu: fu
# | For foreign words:
# | -----------------
# | se ~i si
# | si ~e she
# |
# | ze ~i zi
# | zi ~e je
# |
# | te ~i ti
# | ti ~e che
# | te ~u tu
# |
# | de ~i di
# | de ~u du
# | de ~i di
# |
# | he ~u: hu
# | hu ~a fa
# | hu ~i fi
# | hu ~e he
# | hu ~o ho
# Most small forms are generated, but if necessary
# explicit small forms are given with ~a, ~ya, etc.
#------------------------------------------------------
# Variables
$vowel = [aeiou] ;
$consonant = [bcdfghjklmnpqrstvwxyz] ;
$macron = \u0304 ;
# Variables used for doubled-consonants with tsu
$kana = [\u3041-\u3094] ;
$voice = [\u3099\u309B];
$semivoice = [\u309A\u309C];
$k_start = [カキクケコかきくけこ] ;
$s_start = [サシスセソさしすせそ] ;
$j_start = [シし] $voice ;
$t_start = [タチツテトたちつてと] ;
$n_start = [ナニヌネノンなにぬねの] ;
$h_start = [ハヒヘホはひへほ] ;
$f_start = [フふ] ;
$m_start = [マミムメモまみむめも] ;
$y_start = [ヤユヨやゆよ] ;
$r_start = [ラリルレロらりるれろ] ;
$w_start = [ワヰヱヲわゐゑを] ;
$v_start = [ワヰヱヲ]゙ ;
# if ン is followed by $n_quoter, then it needs an
# apostrophe after its romaji form to disambiguate it.
# e.g., ン ア ! = ナ, so represent as "n'a", not "na".
$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ヤ ユ ヨ ン] ;
$small_y = [ャィュェョ] ;
$iteration = \u309D ;
#------------------------------------------------------
# katakana rules
# Punctuation
'.' <> 。;
',' <> 、;
# ' ' } [a-z] > ; # delete spaces before latin
# ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
# Iteration Mark
# Copy previous letter & marks
# TODO
# | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
# Specials for katakana -- not shared with hiragana
va <> ヷ ;
vi <> ヸ ;
ve <> ヹ ;
vo <> ヺ ;
'~ka' <> ヵ ;
'~ke' <> ヶ ;
# ~~~ begin shared rules ~~~
#special
ya < '~'ャ;
yi < '~'ィ ;
yu < '~'ュ;
ye < '~'ェ;
yo < '~'ョ;
#normal
a <> ア ;
b | '~' < ヒ ゙} $small_y ;
by } $vowel > ビ | '~y' ;
ba <> バ ;
bi <> ビ ;
bu <> ブ ;
be <> ベ ;
bo <> ボ ;
c } i > | s ;
c } e > | s ;
da <> ダ ;
di <> ディ ;
du <> デゥ ;
de <> デ ;
do <> ド ;
dzu <> ヅ ;
dja < ヂャ ;
dji'~i' < ヂィ ; # liu
dju < ヂュ ;
dje < ヂェ ;
djo < ヂョ ;
dji <> ヂ ;
dj } $vowel > ヂ | '~y' ;
# TODO: QUESTION: use ĵĴżŻ instead of dj, dz
cha < チャ ;
chi'~i' < チィ ; # liu
chu < チュ ;
che < チェ ;
cho < チョ ;
chi <> チ ;
ch } $vowel > チ | '~y' ;
e <> エ ;
g | '~' < ギ} $small_y ;
gy } $vowel > ギ | '~y' ;
ga <> ガ ;
gi <> ギ ;
gu <> グ ;
ge <> ゲ ;
go <> ゴ ;
i <> イ ;
# j } $vowel > ジ | '~y' ;
ja <> ジャ ;
ji'~i' < ジィ ; # liu
ju <> ジュ ;
je <> ジェ ;
jo <> ジョ ;
ji <> ジ ;
k | '~' < キ} $small_y ;
ky } $vowel > キ | '~y' ;
ka <> カ ;
ki <> キ ;
ku <> ク ;
ke <> ケ ;
ko <> コ ;
m | '~' < ミ} $small_y ;
my } $vowel > ミ | '~y' ;
ma <> マ ;
mi <> ミ ;
mu <> ム ;
me <> メ ;
mo <> モ ;
m } [pbfv] > ン ;
n | '~' < ニ } $small_y ;
ny } $vowel > ニ | '~y' ;
na <> ナ ;
ni <> ニ ;
nu <> ヌ ;
ne <> ネ ;
no <> ;
o <> オ ;
p | '~' < ピ } $small_y ;
py } $vowel > ピ | '~y' ;
pa <> パ ;
pi <> ピ ;
pu <> プ ;
pe <> ペ ;
po <> ポ ;
h | '~' < ヒ } $small_y ;
hy } $vowel > ヒ | '~y' ;
ha <> ハ ;
hi <> ヒ ;
hu <> ヘゥ ;
he <> ヘ ;
ho <> ホ ;
# f | '~' < フ } $small_y ;
# f } $vowel > フ | '~' ;
fa <> ファ ;
fi <> フィ ;
fe <> フェ ;
fo <> フォ ;
fu <> フ ;
r | '~' < リ } $small_y ;
ry } $vowel > リ | '~y' ;
ra <> ラ ;
ri <> リ ;
ru <> ル ;
re <> レ ;
ro <> ロ ;
za <> ザ ;
zi <> ゼィ ;
zu <> ズ ;
ze <> ゼ ;
zo <> ゾ ;
sa <> サ ;
si <> セィ ;
su <> ス ;
se <> セ ;
so <> ソ ;
sha < シャ ;
shi'~i' < シィ ; # liu
shu < シュ ;
she < シェ ;
sho < ショ ;
shi <> シ ;
sh } $vowel > シ | '~y' ;
ta <> タ ;
ti <> ティ ;
tu <> テゥ ;
te <> テ ;
to <> ト ;
tsu <> ツ ;
# v } $vowel > ヴ | '~' ;
#'v~a' < ヴァ ; # liu
#'v~i' < ヴィ ; # liu
#'v~e' < ヴェ ; # liu
#'v~o' < ヴォ ; # liu
vu <> ヴ ;
u <> ウ ;
# w } $vowel > ウ | '~' ;
wa <> ワ ;
wi <> ヰ ;
wu > ウ ;
we <> ヱ ;
wo <> ヲ ;
ya <> ヤ ;
yi > イ ;
yu <> ユ ;
ye > エ ;
yo <> ヨ ;
# double consonants
#specials
s } sh > ッ ;
t } ch > ッ ;
#voiced
j } j <> ッ } $j_start ;
b } b <> ッ } [$h_start$f_start] $voice;
d } d <> ッ } $t_start $voice;
g } g <> ッ } $k_start $voice;
p } p <> ッ } [$h_start$f_start] $semivoice;
# v } v <> ッ } [ワヰウヱヲう] $voice ;
z } z <> ッ } $s_start $voice;
v } v <> ッ } $v_start;
# normal
k } k <> ッ } $k_start ;
m } m <> ッ } $m_start ;
n } n <> ッ } $n_start ;
h } h <> ッ } $h_start ;
f } f <> ッ } $f_start ;
r } r <> ッ } $r_start ;
t } t <> ッ } $t_start ;
s } s <> ッ } $s_start ;
w } w <> ッ } $w_start;
y } y <> ッ } $y_start;
# completeness
x } x > ッ ;
c } k > ッ ;
c } c > ッ ;
c } q > ッ ;
l } l > ッ ;
q } q > ッ ;
# y } y > ッ ;
# w } w > ッ ;
# prolonged vowel mark. this indicates a doubling of
# the preceding vowel sound
#a < a { ー ; # liu
#e < e { ー ; # liu
#i < i { ー ; # liu
#o < o { ー ; # liu
#u < u { ー ; # liu
$macron <> ー ;
# small forms
'~a' <> ァ ;
'~i' <> ィ ;
'~u' <> ゥ ;
'~e' <> ェ ;
'~o' <> ォ ;
'~tsu' <> ッ ;
'~wa' <> ヮ ;
'~ya' <> ャ ;
'~yi' > ィ ;
'~yu' <> ュ ;
'~ye' > ェ ;
'~yo' <> ョ ;
# iteration marks
# TODO: make more accurate
j $1 < sh (y* $vowel) {ヽ$voice ;
dj $1 < ch (y* $vowel) {ヽ$voice ;
dz $1 < ts (y* $vowel) {ヽ$voice ;
g $1 < k (y* $vowel) {ヽ$voice ;
z $1 < s (y* $vowel) {ヽ$voice ;
d $1 < t (y* $vowel) {ヽ$voice ;
h $1 < b (y* $vowel) {ヽ$voice ;
v $1 < w (y* $vowel) {ヽ$voice ;
sh $1 < sh (y* $vowel) {ヽ$voice ;
j $1 < j (y* $vowel) {ヽ$voice ;
ch $1 < ch (y* $vowel) {ヽ$voice ;
dj $1 < dj(y* $vowel) {ヽ$voice ;
ts $1 < ts (y* $vowel) {ヽ$voice ;
dz $1 < dz (y* $vowel) {ヽ$voice ;
$1 < ($consonant y* $vowel) {ヽ$voice? ;
$1 < (.) {ヽ $voice? ; # otherwise repeat last character
< ヽ $voice? ; # delete if no characters found
# h- rule: lengthens vowel if not followed by a vowel
[aeiou] } h > ー ;
# one-way latin- > kana rules. these do not occur in
# well-formed romaji representing actual japanese text.
# their purpose is to make all romaji map to kana of
# some sort.
# the following are not really necessary, but produce
# slightly more natural results.
cy > セィ ;
dy > ディ ;
hy > ヒ ;
sy > セィ ;
ty > ティ ;
zy > ゼィ ;
h > ヘ ;
# isolated consonants listed here so as not to mask
# longer rules above.
ch > チ;
sh > シ ;
dz > ヅ ;
dj > ヂ;
b > ブ ;
d > デ ;
g > グ ;
k > ク ;
m > ム ;
n'' < ン } $n_quoter ;
n <> ン ;
p > プ ;
r > ル ;
s > ス ;
t > テ ;
y > イ ;
z > ズ ;
v > ヴ ;
f > フ;
j > ジ;
w > ウ;
ß > | ss ;
æ > | e ;
ð > | d ;
ø > | u ;
þ > | th ;
# simple substitutions using backup
c > | k ;
l > | r ;
q > | k ;
x > | ks ;
# ~~~ END shared rules ~~~
#------------------------------------------------------
# Final cleanup
'~' > ; # delete stray tildes between letters
[:Katakana:] { '' } [:Latin:] > ; # delete stray quotes between letters
# [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
:: NFC (NFD) ;
:: ([:Katakana:] halfwidth-fullwidth);
# note: a global filter is more efficient, but MUST include all source chars!!
#:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);
# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
:: ( [[\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;
# eof

View file

@ -1,41 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# According to the pinyin definitions I've been able to find:
# 'a', 'e' are the preferred bases
# otherwise 'o'
# otherwise last vowel
# The trailing form of syllables are the following:
# "a", "ai", "ao", "an", "ang",
# "o", "ou", "ong",
# "e", "ei", "er", "en", "eng",
# "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
# "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
# "ü", "üe", "üan", "ün"
# so the letters the tone will 'hop' are:
::NFD (NFC);
$tone = [\u0304\u0301\u030C\u0300\u0306] ;
# Move the tone to the end of a syllable, and convert to number
e {($tone) r} > r &tone-digit($1);
($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1);
($tone) > &tone-digit($1);
# The following backs up until it finds the right vowel, then deposits the tone
$vowel = [aAeEiIoOuUüÜ];
$consonant = [[a-z A-Z] - [$vowel]];
$digit = [1-5];
$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);
$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit);
&digit-tone($1) < [:letter:] {($digit)};
::NFC (NFD);

View file

@ -1,85 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Malayalam-InterIndic
#:: NFD (NFC) ;
\u0D02>\uE002; # SIGN ANUSVARA
\u0D03>\uE003; # SIGN VISARGA
\u0D05>\uE005; # LETTER A
\u0D06>\uE006; # LETTER AA
\u0D07>\uE007; # LETTER I
\u0D08>\uE008; # LETTER II
\u0D09>\uE009; # LETTER U
\u0D0A>\uE00A; # LETTER UU
\u0D0B>\uE00B; # LETTER VOCALIC R
\u0D0C>\uE00C; # LETTER VOCALIC L
\u0D0E>\uE00E; # LETTER E
\u0D0F>\uE00F; # LETTER EE
\u0D10>\uE010; # LETTER AI
\u0D12>\uE012; # LETTER O
\u0D13>\uE013; # LETTER OO
\u0D14>\uE014; # LETTER AU
\u0D15>\uE015; # LETTER KA
\u0D16>\uE016; # LETTER KHA
\u0D17>\uE017; # LETTER GA
\u0D18>\uE018; # LETTER GHA
\u0D19>\uE019; # LETTER NGA
\u0D1A>\uE01A; # LETTER CA
\u0D1B>\uE01B; # LETTER CHA
\u0D1C>\uE01C; # LETTER JA
\u0D1D>\uE01D; # LETTER JHA
\u0D1E>\uE01E; # LETTER NYA
\u0D1F>\uE01F; # LETTER TTA
\u0D20>\uE020; # LETTER TTHA
\u0D21>\uE021; # LETTER DDA
\u0D22>\uE022; # LETTER DDHA
\u0D23>\uE023; # LETTER NNA
\u0D24>\uE024; # LETTER TA
\u0D25>\uE025; # LETTER THA
\u0D26>\uE026; # LETTER DA
\u0D27>\uE027; # LETTER DHA
\u0D28>\uE028; # LETTER NA
\u0D2A>\uE02A; # LETTER PA
\u0D2B>\uE02B; # LETTER PHA
\u0D2C>\uE02C; # LETTER BA
\u0D2D>\uE02D; # LETTER BHA
\u0D2E>\uE02E; # LETTER MA
\u0D2F>\uE02F; # LETTER YA
\u0D30>\uE030; # LETTER RA
\u0D31>\uE031; # LETTER RRA
\u0D32>\uE032; # LETTER LA
\u0D33>\uE033; # LETTER LLA
\u0D34>\uE034; # LETTER LLLA
\u0D35>\uE035; # LETTER VA
\u0D36>\uE036; # LETTER SHA
\u0D37>\uE037; # LETTER SSA
\u0D38>\uE038; # LETTER SA
\u0D39>\uE039; # LETTER HA
\u0D3E>\uE03E; # VOWEL SIGN AA
\u0D3F>\uE03F; # VOWEL SIGN I
\u0D40>\uE040; # VOWEL SIGN II
\u0D41>\uE041; # VOWEL SIGN U
\u0D42>\uE042; # VOWEL SIGN UU
\u0D43>\uE043; # VOWEL SIGN VOCALIC R
\u0D46>\uE046; # VOWEL SIGN E
\u0D47>\uE047; # VOWEL SIGN EE
\u0D48>\uE048; # VOWEL SIGN AI
\u0D4D>\uE04D; # SIGN VIRAMA
\u0D57>\uE057; # AU LENGTH MARK
\u0D60>\uE060; # LETTER VOCALIC RR
\u0D61>\uE061; # LETTER VOCALIC LL
\u0D66>\uE066; # DIGIT ZERO
\u0D67>\uE067; # DIGIT ONE
\u0D68>\uE068; # DIGIT TWO
\u0D69>\uE069; # DIGIT THREE
\u0D6A>\uE06A; # DIGIT FOUR
\u0D6B>\uE06B; # DIGIT FIVE
\u0D6C>\uE06C; # DIGIT SIX
\u0D6D>\uE06D; # DIGIT SEVEN
\u0D6E>\uE06E; # DIGIT EIGHT
\u0D6F>\uE06F; # DIGIT NINE
# :: NFC (NFD) ;
# eof

View file

@ -1,95 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Oriya-InterIndic
#:: NFD (NFC) ;
#\u0B21\u0B3C>\uE05C;# LETTER RRA
#\u0B22\u0B3C>\uE05D;# LETTER RHA
\u0B47\u0B56>\uE048;# VOWEL SIGN AI
\u0B47\u0B3E>\uE04B;# VOWEL SIGN O
\u0B47\u0B57>\uE04C;# VOWEL SIGN AU
\u0B01>\uE001; # SIGN CANDRABINDU
\u0B02>\uE002; # SIGN ANUSVARA
\u0B03>\uE003; # SIGN VISARGA
\u0B05>\uE005; # LETTER A
\u0B06>\uE006; # LETTER AA
\u0B07>\uE007; # LETTER I
\u0B08>\uE008; # LETTER II
\u0B09>\uE009; # LETTER U
\u0B0A>\uE00A; # LETTER UU
\u0B0B>\uE00B; # LETTER VOCALIC R
\u0B0C>\uE00C; # LETTER VOCALIC L
\u0B0F>\uE00F; # LETTER E
\u0B10>\uE010; # LETTER AI
\u0B13>\uE013; # LETTER O
\u0B14>\uE014; # LETTER AU
\u0B15>\uE015; # LETTER KA
\u0B16>\uE016; # LETTER KHA
\u0B17>\uE017; # LETTER GA
\u0B18>\uE018; # LETTER GHA
\u0B19>\uE019; # LETTER NGA
\u0B1A>\uE01A; # LETTER CA
\u0B1B>\uE01B; # LETTER CHA
\u0B1C>\uE01C; # LETTER JA
\u0B1D>\uE01D; # LETTER JHA
\u0B1E>\uE01E; # LETTER NYA
\u0B1F>\uE01F; # LETTER TTA
\u0B20>\uE020; # LETTER TTHA
\u0B21>\uE021; # LETTER DDA
\u0B22>\uE022; # LETTER DDHA
\u0B23>\uE023; # LETTER NNA
\u0B24>\uE024; # LETTER TA
\u0B25>\uE025; # LETTER THA
\u0B26>\uE026; # LETTER DA
\u0B27>\uE027; # LETTER DHA
\u0B28>\uE028; # LETTER NA
\u0B2A>\uE02A; # LETTER PA
\u0B2B>\uE02B; # LETTER PHA
\u0B2C>\uE02C; # LETTER BA
\u0B2D>\uE02D; # LETTER BHA
\u0B2E>\uE02E; # LETTER MA
\u0B2F>\uE02F; # LETTER YA
\u0B30>\uE030; # LETTER RA
\u0B32>\uE032; # LETTER LA
\u0B33>\uE033; # LETTER LLA
\u0B35>\uE035; # LETTER VA
\u0B36>\uE036; # LETTER SHA
\u0B37>\uE037; # LETTER SSA
\u0B38>\uE038; # LETTER SA
\u0B39>\uE039; # LETTER HA
\u0B3C>\uE03C; # SIGN NUKTA
\u0B3D>\uE03D; # SIGN AVAGRAHA
\u0B3E>\uE03E; # VOWEL SIGN AA
\u0B3F>\uE03F; # VOWEL SIGN I
\u0B40>\uE040; # VOWEL SIGN II
\u0B41>\uE041; # VOWEL SIGN U
\u0B42>\uE042; # VOWEL SIGN UU
\u0B43>\uE043; # VOWEL SIGN VOCALIC R
\u0B47>\uE047; # VOWEL SIGN E
#
\u0B4D>\uE04D; # SIGN VIRAMA
\u0B56>\uE056; # AI LENGTH MARK
\u0B57>\uE057; # AU LENGTH MARK
\u0964>\ue064; # DANDA
\u0965>\ue065; # DOUBLE DANDA
#
\u0B5F>\uE05F; # LETTER YYA
\u0B60>\uE060; # LETTER VOCALIC RR
\u0B61>\uE061; # LETTER VOCALIC LL
\u0B66>\uE066; # DIGIT ZERO
\u0B67>\uE067; # DIGIT ONE
\u0B68>\uE068; # DIGIT TWO
\u0B69>\uE069; # DIGIT THREE
\u0B6A>\uE06A; # DIGIT FOUR
\u0B6B>\uE06B; # DIGIT FIVE
\u0B6C>\uE06C; # DIGIT SIX
\u0B6D>\uE06D; # DIGIT SEVEN
\u0B6E>\uE06E; # DIGIT EIGHT
\u0B6F>\uE06F; # DIGIT NINE
\u0B70>\ue07B; # ISSHAR
\u0B71>\ue081; # LETTER WA
# :: NFC (NFD) ;
# eof

View file

@ -1,76 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Tamil-InterIndic
#:: NFD (NFC) ;
\u0BC6\u0BBE>\uE04A;# VOWEL SIGN O
\u0BC7\u0BBE>\uE04B;# VOWEL SIGN OO
\u0BC6\u0BD7>\uE04C;# VOWEL SIGN AU
\u0B92\u0BD7>\uE014;# LETTER AU
\u0B82>\uE002; # SIGN ANUSVARA
\u0B83>\uE003; # SIGN VISARGA
\u0B85>\uE005; # LETTER A
\u0B86>\uE006; # LETTER AA
\u0B87>\uE007; # LETTER I
\u0B88>\uE008; # LETTER II
\u0B89>\uE009; # LETTER U
\u0B8A>\uE00A; # LETTER UU
\u0B8E>\uE00E; # LETTER E
\u0B8F>\uE00F; # LETTER EE
\u0B90>\uE010; # LETTER AI
\u0B92>\uE012; # LETTER O
\u0B93>\uE013; # LETTER OO
\u0B94>\uE014; # LETTER AU
\u0B95>\uE015; # LETTER KA
\u0B99>\uE019; # LETTER NGA
\u0B9A>\uE01A; # LETTER CA
\u0B9C>\uE01C; # LETTER JA
\u0B9E>\uE01E; # LETTER NYA
\u0B9F>\uE01F; # LETTER TTA
\u0BA3>\uE023; # LETTER NNA
\u0BA4>\uE024; # LETTER TA
\u0BA8>\uE028; # LETTER NA
\u0BA9>\uE029; # LETTER NNNA
\u0BAA>\uE02A; # LETTER PA
\u0BAE>\uE02E; # LETTER MA
\u0BAF>\uE02F; # LETTER YA
\u0BB0>\uE030; # LETTER RA
\u0BB1>\uE031; # LETTER RRA
\u0BB2>\uE032; # LETTER LA
\u0BB3>\uE033; # LETTER LLA
\u0BB4>\uE034; # LETTER LLLA
\u0BB5>\uE035; # LETTER VA
\u0BB7>\uE037; # LETTER SSA
\u0BB8>\uE038; # LETTER SA
\u0BB9>\uE039; # LETTER HA
\u0BBE>\uE03E; # VOWEL SIGN AA
\u0BBF>\uE03F; # VOWEL SIGN I
\u0BC0>\uE040; # VOWEL SIGN II
\u0BC1>\uE041; # VOWEL SIGN U
\u0BC2>\uE042; # VOWEL SIGN UU
\u0BC6>\uE046; # VOWEL SIGN E
\u0BC7>\uE047; # VOWEL SIGN EE
\u0BC8>\uE048; # VOWEL SIGN AI
\u0BCD>\uE04D; # SIGN VIRAMA
\u0BD7>\uE057; # AU LENGTH MARK
\u0BE7>\uE067; # DIGIT ONE
\u0BE8>\uE068; # DIGIT TWO
\u0BE9>\uE069; # DIGIT THREE
\u0BEA>\uE06A; # DIGIT FOUR
\u0BEB>\uE06B; # DIGIT FIVE
\u0BEC>\uE06C; # DIGIT SIX
\u0BED>\uE06D; # DIGIT SEVEN
\u0BEE>\uE06E; # DIGIT EIGHT
\u0BEF>\uE06F; # DIGIT NINE
\u0BF0>\uE067\uE066; # UNMAPPED Tamil-InterIndic: NUMBER TEN
\u0BF1>\uE067\uE066\uE066; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
\u0BF2>\uE067\uE066\uE066\uE066;# UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
0>\ue066;
# :: NFC (NFD) ;
# eof

View file

@ -1,90 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Telugu-InterIndic
#:: NFD (NFC) ;
\u0c46\u0c4d\u0c56>\ue048\ue04d;
\u0C46\u0C56>\uE048;# VOWEL SIGN AI
\u0C01>\uE001; # SIGN CANDRABINDU
\u0C02>\uE002; # SIGN ANUSVARA
\u0C03>\uE003; # SIGN VISARGA
\u0C05>\uE005; # LETTER A
\u0C06>\uE006; # LETTER AA
\u0C07>\uE007; # LETTER I
\u0C08>\uE008; # LETTER II
\u0C09>\uE009; # LETTER U
\u0C0A>\uE00A; # LETTER UU
\u0C0B>\uE00B; # LETTER VOCALIC R
\u0C0C>\uE00C; # LETTER VOCALIC L
\u0C0E>\uE00E; # LETTER E
\u0C0F>\uE00F; # LETTER EE
\u0C10>\uE010; # LETTER AI
\u0C12>\uE012; # LETTER O
\u0C13>\uE013; # LETTER OO
\u0C14>\uE014; # LETTER AU
\u0C15>\uE015; # LETTER KA
\u0C16>\uE016; # LETTER KHA
\u0C17>\uE017; # LETTER GA
\u0C18>\uE018; # LETTER GHA
\u0C19>\uE019; # LETTER NGA
\u0C1A>\uE01A; # LETTER CA
\u0C1B>\uE01B; # LETTER CHA
\u0C1C>\uE01C; # LETTER JA
\u0C1D>\uE01D; # LETTER JHA
\u0C1E>\uE01E; # LETTER NYA
\u0C1F>\uE01F; # LETTER TTA
\u0C20>\uE020; # LETTER TTHA
\u0C21>\uE021; # LETTER DDA
\u0C22>\uE022; # LETTER DDHA
\u0C23>\uE023; # LETTER NNA
\u0C24>\uE024; # LETTER TA
\u0C25>\uE025; # LETTER THA
\u0C26>\uE026; # LETTER DA
\u0C27>\uE027; # LETTER DHA
\u0C28>\uE028; # LETTER NA
\u0C2A>\uE02A; # LETTER PA
\u0C2B>\uE02B; # LETTER PHA
\u0C2C>\uE02C; # LETTER BA
\u0C2D>\uE02D; # LETTER BHA
\u0C2E>\uE02E; # LETTER MA
\u0C2F>\uE02F; # LETTER YA
\u0C30>\uE030; # LETTER RA
\u0C31>\uE031; # LETTER RRA
\u0C32>\uE032; # LETTER LA
\u0C33>\uE033; # LETTER LLA
\u0C35>\uE035; # LETTER VA
\u0C36>\uE036; # LETTER SHA
\u0C37>\uE037; # LETTER SSA
\u0C38>\uE038; # LETTER SA
\u0C39>\uE039; # LETTER HA
\u0C3E>\uE03E; # VOWEL SIGN AA
\u0C3F>\uE03F; # VOWEL SIGN I
\u0C40>\uE040; # VOWEL SIGN II
\u0C41>\uE041; # VOWEL SIGN U
\u0C42>\uE042; # VOWEL SIGN UU
\u0C43>\uE043; # VOWEL SIGN VOCALIC R
\u0C44>\uE044; # VOWEL SIGN VOCALIC RR
\u0C46>\uE046; # VOWEL SIGN E
\u0C47>\uE047; # VOWEL SIGN EE
\u0C4A>\uE04A; # VOWEL SIGN O
\u0C4B>\uE04B; # VOWEL SIGN OO
\u0C4C>\uE04C; # VOWEL SIGN AU
\u0C4D>\uE04D; # SIGN VIRAMA
\u0C55>\uE055; # LENGTH MARK
\u0C56>\uE056; # AI LENGTH MARK
\u0C60>\uE060; # LETTER VOCALIC RR
\u0C61>\uE061; # LETTER VOCALIC LL
\u0C66>\uE066; # DIGIT ZERO
\u0C67>\uE067; # DIGIT ONE
\u0C68>\uE068; # DIGIT TWO
\u0C69>\uE069; # DIGIT THREE
\u0C6A>\uE06A; # DIGIT FOUR
\u0C6B>\uE06B; # DIGIT FIVE
\u0C6C>\uE06C; # DIGIT SIX
\u0C6D>\uE06D; # DIGIT SEVEN
\u0C6E>\uE06E; # DIGIT EIGHT
\u0C6F>\uE06F; # DIGIT NINE
# :: NFC (NFD) ;
# eof

View file

@ -1,187 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Thai-Latin
# This set of rules follows ISO 11940
# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
# except that that does not mention an implicit vowel, so we use ọ
#
# The transcription is fairly ugly, so we ought to also do the UNGEGN version
# see: http://www.eki.ee/wgrs/rom1_th.pdf
# and probably make that the main variant.
# Note: this is an internal file. The NFD/NFC is handled externally, in the index
# The insertion of spaces between words, the reversal of the vowels
# and the conversion of space to semicolon are done *outside* of these rules.
# So as far as these rules are concerned, the vowels are in logical order!
# insert implicit vowel (and remove it going the other way)
# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
#$consonant = [ก-ฮ];
#$vowel = [ะ-ฺเ-ไ็];
#{ ( $consonant ) } [^$vowel ] > | $1  ;
# > ọ ;
# < ọ ;
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
# Consonants
# Warning: the 'h's need to be handled carefully!
# What we really want to say is the following, but we can't
# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;
# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
$freeStandingBelow = [\u0325 ];
$hAccent = [ ̄ ̣]
$notHAccent0 = [^$freeStandingBelow$hAccent];
$notHAccent1 = $freeStandingBelow [^$hAccent];
ห > h̄ ; # THAI CHARACTER HO HIP
ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering
ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK
ข <> k̄h ; # THAI CHARACTER KHO KHAI
ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT
ฅ <> kʹh ; # THAI CHARACTER KHO KHON
ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG
ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
ก <> k ; # THAI CHARACTER KO KAI
ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO
ผ <> p̄h ; # THAI CHARACTER PHO PHUNG
พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
ป <> p ; # THAI CHARACTER PO PLA
ฉ <> c̄h ; # THAI CHARACTER CHO CHING
ฌ <> c̣h ; # THAI CHARACTER CHO CHOE
ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
จ <> c ; # THAI CHARACTER CHO CHAN
ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN
ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO
ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO
ถ <> t̄h ; # THAI CHARACTER THO THUNG
ธ <> ṭh ; # THAI CHARACTER THO THONG
ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
ฏ <> t̩ ; # THAI CHARACTER TO PATAK
ต <> t ; # THAI CHARACTER TO TAO
# since there is no singleton g (generated), don't worry about that.
ง <> ng ; # THAI CHARACTER NGO NGU
ณ <> ṇ ; # THAI CHARACTER NO NEN
น <> n ; # THAI CHARACTER NO NU
ญ <> ỵ ; # THAI CHARACTER YO YING
ฎ <> ḍ ; # THAI CHARACTER DO CHADA
ด <> d ; # THAI CHARACTER DO DEK
บ <> b ; # THAI CHARACTER BO BAIMAI
ฝ <> f̄ ; # THAI CHARACTER FO FA
ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering
ม <> m ; # THAI CHARACTER MO MA
ย <> y ; # THAI CHARACTER YO YAK
ร <> r ; # THAI CHARACTER RO RUA
ฤ <> v ; # THAI CHARACTER RU
ฦ <> ł ; # THAI CHARACTER LU
ว <> w ; # THAI CHARACTER WO WAEN
ศ <> ṣ̄ ; # THAI CHARACTER SO SALA***
ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering
ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI
ส > s̄ ; # THAI CHARACTER SO SUA***
ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering
ฬ <> ḷ ; # THAI CHARACTER LO CHULA
ล <> l ; # THAI CHARACTER LO LING
ฟ <> f ; # THAI CHARACTER FO FAN
อ <> x ; # THAI CHARACTER O ANG
ซ <> s ; # THAI CHARACTER SO SO
# vowels
ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT
า > ā ; # THAI CHARACTER SARA AA
า | $1 < a ($notAbove*) ̄; # backward case, account for reordering
# We deviate from ISO for SARA AM for disambiguation
ำ > a ̉; # THAI CHARACTER SARA AM
ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering
ะ <> a ; # THAI CHARACTER SARA A
ี <> ī ; # THAI CHARACTER SARA II
ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering
ื <> ụ̄ ; # THAI CHARACTER SARA UEE
ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering
ึ <> ụ ; # THAI CHARACTER SARA UE
ู <> ū ; # THAI CHARACTER SARA UU
ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering
ุ <> u ; # THAI CHARACTER SARA U
ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI
# ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT
เ <> e ; # THAI CHARACTER SARA E
แ <> æ ; # THAI CHARACTER SARA AE
โ <> o ; # THAI CHARACTER SARA O
ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN
ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI
ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO
็ <> ̆ ; # THAI CHARACTER MAITAIKHU
่ <> ̀ ; # THAI CHARACTER MAI EK
้ <> ̂ ; # THAI CHARACTER MAI THO
๊ <> ́ ; # THAI CHARACTER MAI TRI
๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA
์ <> ̒ ; # THAI CHARACTER THANTHAKHAT
๎ <> '~' ; # THAI CHARACTER YAMAKKAN
# We deviate from ISO for disambiguation
ํ <> ̊ ; # THAI CHARACTER NIKHAHIT
๏ <> § ; # THAI CHARACTER FONGMAN
<> 0 ; # THAI DIGIT ZERO
๑ <> 1 ; # THAI DIGIT ONE
๒ <> 2 ; # THAI DIGIT TWO
๓ <> 3 ; # THAI DIGIT THREE
๔ <> 4 ; # THAI DIGIT FOUR
๕ <> 5 ; # THAI DIGIT FIVE
๖ <> 6 ; # THAI DIGIT SIX
๗ <> 7 ; # THAI DIGIT SEVEN
๘ <> 8 ; # THAI DIGIT EIGHT
๙ <> 9 ; # THAI DIGIT NINE
๚ <> '||' ; # THAI CHARACTER ANGKHANKHU
๛ <> » ; # THAI CHARACTER KHOMUT
ๆ <> « ; # THAI CHARACTER MAIYAMOK
# moved down to make shorter first
#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
ฺ <> ˌ ; # THAI CHARACTER PHINTHU
ิ <> i ; # THAI CHARACTER SARA I
# fallbacks
| k < g ;
| k < h ;
| c < j ;
| k < q ;
| s < z ;
:: (lower);

View file

@ -1,26 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
# The rules that convert space into semicolon are in another file;
# since they have to come BEFORE the break iterator
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
# First convert the semicolon back
' ' < $thai { '; ' } $thai;
# Remove any other spaces between thai letters
< $thai { ' ' } $thai;
# Now vowels
$thai_reversing = [[:Logical_Order_Exception:] & $thai];
$thai_non_reversing = [$thai - $thai_reversing ];
( $thai_reversing ) ( $thai_non_reversing ) > $2 $1;
# other direction
$2 $1 < ( $thai_non_reversing ) ( $thai_reversing ) ;

View file

@ -1,11 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# The rules that convert space into semicolon are in this file;
# since they have to come BEFORE the break iterator.
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
$thai { ' ' } $thai > '; ' ;

View file

@ -1,11 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Only intended for internal use
\u0304 <> 1;
\u0301 <> 2;
\u030C <> 3;
\u0300 <> 4;
< 5;

View file

@ -1,253 +0,0 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2004, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
#
# TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
# system transliterators. It allows arbitrary mappings between
# transliterator IDs and file names, and also allows the system to
# define aliases for transliterators, so that "Latin-Hangul", for
# example, can be implemented transparently as the compound
# "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
# are invisible to the user, but can be composed together by the
# system to create visible transliterators.
#
# Blank lines and lines beginning with '#' are ignored.
#
# Lines in this file have one of the following forms (text not
# enclosed by <> is literal):
#
# <id>:file:<resource>:<encoding>:<direction>
# <id>:internal:<resource>:<encoding>:<direction>
# <id>:alias:<getInstanceArg>
#
# <id> is the ID of the system transliterator being defined. These
# are public IDs enumerated by Transliterator.getAvailableIDs(),
# unless the second field is "internal".
#
# <resource> is a ResourceReader resource name. Currently these refer
# to file names under com/ibm/text/resources. This string is passed
# directly to ResourceReader, together with <encoding>.
#
# <encoding> is the character encoding to use when reading <resource>;
# passed directly to ResourceReader. E.g., "UTF8".
#
# <direction> is either "FORWARD" or "REVERSE".
#
# <getInstanceArg> is a string to be passed directly to
# Transliterator.getInstance(). The returned Transliterator object
# then has its ID changed to <id> and is returned.
# Bidirectional rule files
Fullwidth-Halfwidth:file:Transliterator_Fullwidth_Halfwidth.txt:UTF8:FORWARD
Halfwidth-Fullwidth:file:Transliterator_Fullwidth_Halfwidth.txt:UTF8:REVERSE
Latin-Cyrillic:file:Transliterator_Cyrillic_Latin.txt:UTF8:REVERSE
Cyrillic-Latin:file:Transliterator_Cyrillic_Latin.txt:UTF8:FORWARD
Latin-Hebrew:file:Transliterator_Hebrew_Latin.txt:UTF8:REVERSE
Hebrew-Latin:file:Transliterator_Hebrew_Latin.txt:UTF8:FORWARD
Latin-Arabic:file:Transliterator_Arabic_Latin.txt:UTF8:REVERSE
Arabic-Latin:file:Transliterator_Arabic_Latin.txt:UTF8:FORWARD
Tone-Digit:internal:Transliterator_Tone_Digit.txt:UTF8:FORWARD
Digit-Tone:internal:Transliterator_Tone_Digit.txt:UTF8:REVERSE
Latin-NumericPinyin:file:Transliterator_Latin_NumericPinyin.txt:UTF8:FORWARD
NumericPinyin-Latin:file:Transliterator_Latin_NumericPinyin.txt:UTF8:REVERSE
Han-Spacedhan:internal:Transliterator_Han_Spacedhan.txt:UTF8:FORWARD
Spacedhan-Han:alias:null
Han-Latin:file:Transliterator_Han_Latin.txt:UTF8:FORWARD
#Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip!
Latin-Han:alias:null
# Comment these out; they are only for testing
# Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE
# Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD
#Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE
#Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD
Latin-Greek:file:Transliterator_Greek_Latin.txt:UTF8:REVERSE
Greek-Latin:file:Transliterator_Greek_Latin.txt:UTF8:FORWARD
Latin-Greek/UNGEGN:file:Transliterator_Greek_Latin_UNGEGN.txt:UTF8:REVERSE
Greek-Latin/UNGEGN:file:Transliterator_Greek_Latin_UNGEGN.txt:UTF8:FORWARD
Latin-Katakana:file:Transliterator_Latin_Katakana.txt:UTF8:FORWARD
Katakana-Latin:file:Transliterator_Latin_Katakana.txt:UTF8:REVERSE
Latin-Hiragana:file:Transliterator_Hiragana_Latin.txt:UTF8:REVERSE
Hiragana-Latin:file:Transliterator_Hiragana_Latin.txt:UTF8:FORWARD
#Thai Stuff: will change if we get \b into Transliterator
Thai-ThaiSemi:internal:Transliterator_Thai_ThaiSemi.txt:UTF8:FORWARD
Thai-ThaiLogical:internal:Transliterator_Thai_ThaiLogical.txt:UTF8:FORWARD
ThaiLogical-Thai:internal:Transliterator_Thai_ThaiLogical.txt:UTF8:REVERSE
ThaiLogical-Latin:internal:Transliterator_ThaiLogical_Latin.txt:UTF8:FORWARD
Latin-ThaiLogical:internal:Transliterator_ThaiLogical_Latin.txt:UTF8:REVERSE
# Must use the order below!
# We need two separate passes because of the Thai vowel reversal
# Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces
Thai-Latin:alias:[[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC
Latin-Thai:alias:[[:Latin:][:Mn:][:Me:] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC
# end of Thai Stuff
Hiragana-Katakana:file:Transliterator_Hiragana_Katakana.txt:UTF8:FORWARD
Katakana-Hiragana:file:Transliterator_Hiragana_Katakana.txt:UTF8:REVERSE
Any-Accents:file:Transliterator_Any_Accents.txt:UTF8:FORWARD
Accents-Any:file:Transliterator_Any_Accents.txt:UTF8:REVERSE
Any-Publishing:file:Transliterator_Any_Publishing.txt:UTF8:FORWARD
Publishing-Any:file:Transliterator_Any_Publishing.txt:UTF8:REVERSE
# Korean
# N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For
# Hangul output use Latin-Hangul.
LowerLatin-Jamo:internal:Transliterator_Latin_Jamo.txt:UTF8:FORWARD
Jamo-LowerLatin:internal:Transliterator_Latin_Jamo.txt:UTF8:REVERSE
Latin-Jamo:alias:['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo
Jamo-Latin:alias:['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC
Latin-Hangul:alias:['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC
Hangul-Latin:alias:['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC
# Inter-Indic composed rules
Latin-InterIndic:internal:Transliterator_Latin_InterIndic.txt:UTF8:FORWARD
Devanagari-InterIndic:internal:Transliterator_Devanagari_InterIndic.txt:UTF8:FORWARD
Bengali-InterIndic:internal:Transliterator_Bengali_InterIndic.txt:UTF8:FORWARD
Gurmukhi-InterIndic:internal:Transliterator_Gurmukhi_InterIndic.txt:UTF8:FORWARD
Gujarati-InterIndic:internal:Transliterator_Gujarati_InterIndic.txt:UTF8:FORWARD
Oriya-InterIndic:internal:Transliterator_Oriya_InterIndic.txt:UTF8:FORWARD
Tamil-InterIndic:internal:Transliterator_Tamil_InterIndic.txt:UTF8:FORWARD
Telugu-InterIndic:internal:Transliterator_Telugu_InterIndic.txt:UTF8:FORWARD
Kannada-InterIndic:internal:Transliterator_Kannada_InterIndic.txt:UTF8:FORWARD
Malayalam-InterIndic:internal:Transliterator_Malayalam_InterIndic.txt:UTF8:FORWARD
InterIndic-Latin:internal:Transliterator_InterIndic_Latin.txt:UTF8:FORWARD
InterIndic-Devanagari:internal:Transliterator_InterIndic_Devanagari.txt:UTF8:FORWARD
InterIndic-Bengali:internal:Transliterator_InterIndic_Bengali.txt:UTF8:FORWARD
InterIndic-Gurmukhi:internal:Transliterator_InterIndic_Gurmukhi.txt:UTF8:FORWARD
InterIndic-Gujarati:internal:Transliterator_InterIndic_Gujarati.txt:UTF8:FORWARD
InterIndic-Oriya:internal:Transliterator_InterIndic_Oriya.txt:UTF8:FORWARD
InterIndic-Tamil:internal:Transliterator_InterIndic_Tamil.txt:UTF8:FORWARD
InterIndic-Telugu:internal:Transliterator_InterIndic_Telugu.txt:UTF8:FORWARD
InterIndic-Kannada:internal:Transliterator_InterIndic_Kannada.txt:UTF8:FORWARD
InterIndic-Malayalam:internal:Transliterator_InterIndic_Malayalam.txt:UTF8:FORWARD
#Latin-Indic transliterators
Latin-Devanagari:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC
Latin-Bengali:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC
Latin-Gurmukhi:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC
Latin-Gujarati:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC
Latin-Oriya:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC
Latin-Tamil:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC
Latin-Telugu:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC
Latin-Kannada:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC
Latin-Malayalam:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC
#Indic-Latin transliterators
Devanagari-Latin:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC
Bengali-Latin:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC
Gurmukhi-Latin:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC
Gujarati-Latin:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC
Oriya-Latin:alias:[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC
Tamil-Latin:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC
Telugu-Latin:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC
Kannada-Latin:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC
Malayalam-Latin:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC
Devanagari-Bengali:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC
Devanagari-Gurmukhi:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC
Devanagari-Gujarati:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC
Devanagari-Oriya:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC
Devanagari-Tamil:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC
Devanagari-Telugu:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC
Devanagari-Kannada:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC
Devanagari-Malayalam:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC
Bengali-Devanagari:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC
Bengali-Gurmukhi:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC
Bengali-Gujarati:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC
Bengali-Oriya:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC
Bengali-Tamil:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC
Bengali-Telugu:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC
Bengali-Kannada:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC
Bengali-Malayalam:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC
Gurmukhi-Devanagari:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC
Gurmukhi-Bengali:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC
Gurmukhi-Gujarati:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC
Gurmukhi-Oriya:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC
Gurmukhi-Tamil:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC
Gurmukhi-Telugu:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC
Gurmukhi-Kannada:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC
Gurmukhi-Malayalam:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC
Gujarati-Devanagari:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC
Gujarati-Bengali:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC
Gujarati-Gurmukhi:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC
Gujarati-Oriya:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC
Gujarati-Tamil:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC
Gujarati-Telugu:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC
Gujarati-Kannada:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC
Gujarati-Malayalam:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC
Oriya-Devanagari:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC
Oriya-Bengali:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC
Oriya-Gurmukhi:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC
Oriya-Gujarati:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC
Oriya-Tamil:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC
Oriya-Telugu:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC
Oriya-Kannada:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC
Oriya-Malayalam:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC
Tamil-Devanagari:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC
Tamil-Bengali:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC
Tamil-Gurmukhi:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC
Tamil-Gujarati:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC
Tamil-Oriya:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC
Tamil-Telugu:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC
Tamil-Kannada:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC
Tamil-Malayalam:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC
Telugu-Devanagari:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC
Telugu-Bengali:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC
Telugu-Gurmukhi:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC
Telugu-Gujarati:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC
Telugu-Oriya:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC
Telugu-Tamil:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC
Telugu-Kannada:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC
Telugu-Malayalam:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC
Kannada-Devanagari:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC
Kannada-Bengali:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC
Kannada-Gurmukhi:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC
Kannada-Gujarati:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC
Kannada-Oriya:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC
Kannada-Tamil:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC
Kannada-Telugu:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC
Kannada-Malayalam:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC
Malayalam-Devanagari:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC
Malayalam-Bengali:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC
Malayalam-Gurmukhi:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC
Malayalam-Gujarati:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC
Malayalam-Oriya:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC
Malayalam-Tamil:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC
Malayalam-Telugu:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC
Malayalam-Kannada:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC
# eof