mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-4162 delete the extra sources.. these files are now loaded from icu
X-SVN-Rev: 16764
This commit is contained in:
parent
d0c241160c
commit
84578841ec
40 changed files with 0 additions and 8041 deletions
|
@ -1,290 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
:: NFD (NFC) ;
|
||||
|
||||
# to do: make reversible
|
||||
|
||||
# define special conversion characters.
|
||||
# varients of this could use different characters, or set one or the other to null.
|
||||
|
||||
$pre = \< ;
|
||||
$post = \> ;
|
||||
|
||||
# Provide keyboard equivalents for common diacritics used in transliteration
|
||||
|
||||
$pre \` $post <> \u0300 ; # COMBINING GRAVE ACCENT
|
||||
$pre \' $post <> \u0301 ; # COMBINING ACUTE ACCENT
|
||||
$pre \^ $post <> \u0302 ; # COMBINING CIRCUMFLEX ACCENT
|
||||
$pre \~ $post <> \u0303 ; # COMBINING TILDE
|
||||
$pre \- $post <> \u0304 ; # COMBINING MACRON
|
||||
$pre \" $post <> \u0308 ; # COMBINING DIAERESIS
|
||||
$pre \* $post <> \u030A ; # COMBINING RING ABOVE
|
||||
$pre \, $post <> \u0327 ; # COMBINING CEDILLA
|
||||
$pre '/' $post <> \u0338 ; # COMBINING LONG SOLIDUS OVERLAY
|
||||
$pre \. $post <> \u0323 ; # COMBINING DOT BELOW
|
||||
|
||||
# Combine common characters
|
||||
|
||||
$pre AE $post <> \u00C6 ; # LATIN CAPITAL LETTER AE
|
||||
$pre ae $post <> \u00E6 ; # LATIN SMALL LETTER AE
|
||||
$pre D $post <> \u00D0 ; # LATIN CAPITAL LETTER ETH
|
||||
$pre d $post <> \u00F0 ; # LATIN SMALL LETTER ETH
|
||||
$pre O'/' $post <> \u00D8 ; # LATIN CAPITAL LETTER O WITH STROKE
|
||||
$pre o'/' $post <> \u00F8 ; # LATIN SMALL LETTER O WITH STROKE
|
||||
$pre TH $post <> \u00DE ; # LATIN CAPITAL LETTER THORN
|
||||
$pre th $post <> \u00FE ; # LATIN SMALL LETTER THORN
|
||||
$pre OE $post <> \u0152 ; # LATIN CAPITAL LIGATURE OE
|
||||
$pre oe $post <> \u0153 ; # LATIN SMALL LIGATURE OE
|
||||
|
||||
$pre ss $post <> \u00DF ; # LATIN SMALL LETTER SHARP S
|
||||
|
||||
$pre NG $post <> \u014A ; # LATIN CAPITAL LETTER ENG
|
||||
$pre ng $post <> \u014B ; # LATIN SMALL LETTER ENG
|
||||
|
||||
$pre T $post <> \u0398 ; # THETA
|
||||
$pre t $post <> \u03B8 ; # THETA
|
||||
$pre SH $post <> \u01A9 ; # LATIN CAPITAL LETTER ESH
|
||||
$pre sh $post <> \u0283 ; # LATIN SMALL LETTER ESH
|
||||
$pre ZH $post <> \u01B7 ; # LATIN CAPITAL LETTER EZH
|
||||
$pre zh $post <> \u0292 ; # LATIN SMALL LETTER EZH
|
||||
|
||||
$pre U $post <> \u01B1 ; # LATIN CAPITAL LETTER UPSILON
|
||||
$pre u $post <> \u028A ; # LATIN SMALL LETTER UPSILON
|
||||
$pre A $post <> \u018F ; # LATIN CAPITAL LETTER SCHWA
|
||||
$pre a $post <> \u0259 ; # LATIN SMALL LETTER SCHWA
|
||||
$pre O $post <> \u0186 ; # LATIN CAPITAL LETTER OPEN O
|
||||
$pre o $post <> \u0254 ; # LATIN SMALL LETTER OPEN O
|
||||
$pre E $post <> \u0190 ; # LATIN CAPITAL LETTER OPEN E
|
||||
$pre e $post <> \u025B ; # LATIN SMALL LETTER OPEN E
|
||||
|
||||
# three that don't have uppercases
|
||||
|
||||
$pre '?' $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
|
||||
$pre i $post <> \u026A ; # LATIN LETTER SMALL CAPITAL I
|
||||
$pre v $post <> \u028C ; # LATIN SMALL LETTER TURNED V
|
||||
|
||||
# Additional Characters that may be added in the future
|
||||
|
||||
# $pre XXX $post <> \u0306 ; # COMBINING BREVE
|
||||
# $pre XXX $post <> \u0307 ; # COMBINING DOT ABOVE
|
||||
# $pre XXX $post <> \u0309 ; # COMBINING HOOK ABOVE
|
||||
# $pre XXX $post <> \u030B ; # COMBINING DOUBLE ACUTE ACCENT
|
||||
# $pre XXX $post <> \u030C ; # COMBINING CARON
|
||||
# $pre XXX $post <> \u030F ; # COMBINING DOUBLE GRAVE ACCENT
|
||||
# $pre XXX $post <> \u0311 ; # COMBINING INVERTED BREVE
|
||||
# $pre XXX $post <> \u0313 ; # COMBINING COMMA ABOVE
|
||||
# $pre XXX $post <> \u0314 ; # COMBINING REVERSED COMMA ABOVE
|
||||
# $pre XXX $post <> \u031B ; # COMBINING HORN
|
||||
# $pre XXX $post <> \u0324 ; # COMBINING DIAERESIS BELOW
|
||||
# $pre XXX $post <> \u0325 ; # COMBINING RING BELOW
|
||||
# $pre XXX $post <> \u0326 ; # COMBINING COMMA BELOW
|
||||
# $pre XXX $post <> \u0328 ; # COMBINING OGONEK
|
||||
# $pre XXX $post <> \u032D ; # COMBINING CIRCUMFLEX ACCENT BELOW
|
||||
# $pre XXX $post <> \u032E ; # COMBINING BREVE BELOW
|
||||
# $pre XXX $post <> \u0330 ; # COMBINING TILDE BELOW
|
||||
# $pre XXX $post <> \u0331 ; # COMBINING MACRON BELOW
|
||||
|
||||
# $pre YYY $post <> \u00AA ; # FEMININE ORDINAL INDICATOR
|
||||
# $pre YYY $post <> \u00BA ; # MASCULINE ORDINAL INDICATOR
|
||||
# $pre YYY $post <> \u0110 ; # LATIN CAPITAL LETTER D WITH STROKE
|
||||
# $pre YYY $post <> \u0111 ; # LATIN SMALL LETTER D WITH STROKE
|
||||
# $pre YYY $post <> \u0126 ; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
# $pre YYY $post <> \u0127 ; # LATIN SMALL LETTER H WITH STROKE
|
||||
# $pre YYY $post <> \u0131 ; # LATIN SMALL LETTER DOTLESS I
|
||||
# $pre YYY $post <> \u0138 ; # LATIN SMALL LETTER KRA
|
||||
# $pre YYY $post <> \u013F ; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||
# $pre YYY $post <> \u0140 ; # LATIN SMALL LETTER L WITH MIDDLE DOT
|
||||
# $pre YYY $post <> \u0141 ; # LATIN CAPITAL LETTER L WITH STROKE
|
||||
# $pre YYY $post <> \u0142 ; # LATIN SMALL LETTER L WITH STROKE
|
||||
# $pre YYY $post <> \u0149 ; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
# $pre YYY $post <> \u0166 ; # LATIN CAPITAL LETTER T WITH STROKE
|
||||
# $pre YYY $post <> \u0167 ; # LATIN SMALL LETTER T WITH STROKE
|
||||
# $pre YYY $post <> \u017F ; # LATIN SMALL LETTER LONG S
|
||||
# $pre YYY $post <> \u0180 ; # LATIN SMALL LETTER B WITH STROKE
|
||||
# $pre YYY $post <> \u0181 ; # LATIN CAPITAL LETTER B WITH HOOK
|
||||
# $pre YYY $post <> \u0182 ; # LATIN CAPITAL LETTER B WITH TOPBAR
|
||||
# $pre YYY $post <> \u0183 ; # LATIN SMALL LETTER B WITH TOPBAR
|
||||
# $pre YYY $post <> \u0184 ; # LATIN CAPITAL LETTER TONE SIX
|
||||
# $pre YYY $post <> \u0185 ; # LATIN SMALL LETTER TONE SIX
|
||||
# $pre YYY $post <> \u0187 ; # LATIN CAPITAL LETTER C WITH HOOK
|
||||
# $pre YYY $post <> \u0188 ; # LATIN SMALL LETTER C WITH HOOK
|
||||
# $pre YYY $post <> \u0189 ; # LATIN CAPITAL LETTER AFRICAN D
|
||||
# $pre YYY $post <> \u018A ; # LATIN CAPITAL LETTER D WITH HOOK
|
||||
# $pre YYY $post <> \u018B ; # LATIN CAPITAL LETTER D WITH TOPBAR
|
||||
# $pre YYY $post <> \u018C ; # LATIN SMALL LETTER D WITH TOPBAR
|
||||
# $pre YYY $post <> \u018D ; # LATIN SMALL LETTER TURNED DELTA
|
||||
# $pre YYY $post <> \u018E ; # LATIN CAPITAL LETTER REVERSED E
|
||||
# $pre YYY $post <> \u0191 ; # LATIN CAPITAL LETTER F WITH HOOK
|
||||
# $pre YYY $post <> \u0192 ; # LATIN SMALL LETTER F WITH HOOK
|
||||
# $pre YYY $post <> \u0193 ; # LATIN CAPITAL LETTER G WITH HOOK
|
||||
# $pre YYY $post <> \u0194 ; # LATIN CAPITAL LETTER GAMMA
|
||||
# $pre YYY $post <> \u0195 ; # LATIN SMALL LETTER HV
|
||||
# $pre YYY $post <> \u0196 ; # LATIN CAPITAL LETTER IOTA
|
||||
# $pre YYY $post <> \u0197 ; # LATIN CAPITAL LETTER I WITH STROKE
|
||||
# $pre YYY $post <> \u0198 ; # LATIN CAPITAL LETTER K WITH HOOK
|
||||
# $pre YYY $post <> \u0199 ; # LATIN SMALL LETTER K WITH HOOK
|
||||
# $pre YYY $post <> \u019A ; # LATIN SMALL LETTER L WITH BAR
|
||||
# $pre YYY $post <> \u019B ; # LATIN SMALL LETTER LAMBDA WITH STROKE
|
||||
# $pre YYY $post <> \u019C ; # LATIN CAPITAL LETTER TURNED M
|
||||
# $pre YYY $post <> \u019D ; # LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||
# $pre YYY $post <> \u019E ; # LATIN SMALL LETTER N WITH LONG RIGHT LEG
|
||||
# $pre YYY $post <> \u019F ; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||
# $pre YYY $post <> \u01A2 ; # LATIN CAPITAL LETTER OI
|
||||
# $pre YYY $post <> \u01A3 ; # LATIN SMALL LETTER OI
|
||||
# $pre YYY $post <> \u01A4 ; # LATIN CAPITAL LETTER P WITH HOOK
|
||||
# $pre YYY $post <> \u01A5 ; # LATIN SMALL LETTER P WITH HOOK
|
||||
# $pre YYY $post <> \u01A6 ; # LATIN LETTER YR
|
||||
# $pre YYY $post <> \u01A7 ; # LATIN CAPITAL LETTER TONE TWO
|
||||
# $pre YYY $post <> \u01A8 ; # LATIN SMALL LETTER TONE TWO
|
||||
# $pre YYY $post <> \u01AA ; # LATIN LETTER REVERSED ESH LOOP
|
||||
# $pre YYY $post <> \u01AB ; # LATIN SMALL LETTER T WITH PALATAL HOOK
|
||||
# $pre YYY $post <> \u01AC ; # LATIN CAPITAL LETTER T WITH HOOK
|
||||
# $pre YYY $post <> \u01AD ; # LATIN SMALL LETTER T WITH HOOK
|
||||
# $pre YYY $post <> \u01AE ; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u01B2 ; # LATIN CAPITAL LETTER V WITH HOOK
|
||||
# $pre YYY $post <> \u01B3 ; # LATIN CAPITAL LETTER Y WITH HOOK
|
||||
# $pre YYY $post <> \u01B4 ; # LATIN SMALL LETTER Y WITH HOOK
|
||||
# $pre YYY $post <> \u01B5 ; # LATIN CAPITAL LETTER Z WITH STROKE
|
||||
# $pre YYY $post <> \u01B6 ; # LATIN SMALL LETTER Z WITH STROKE
|
||||
# $pre YYY $post <> \u01B8 ; # LATIN CAPITAL LETTER EZH REVERSED
|
||||
# $pre YYY $post <> \u01B9 ; # LATIN SMALL LETTER EZH REVERSED
|
||||
# $pre YYY $post <> \u01BA ; # LATIN SMALL LETTER EZH WITH TAIL
|
||||
# $pre YYY $post <> \u01BB ; # LATIN LETTER TWO WITH STROKE
|
||||
# $pre YYY $post <> \u01BC ; # LATIN CAPITAL LETTER TONE FIVE
|
||||
# $pre YYY $post <> \u01BD ; # LATIN SMALL LETTER TONE FIVE
|
||||
# $pre YYY $post <> \u01BE ; # LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
|
||||
# $pre YYY $post <> \u01BF ; # LATIN LETTER WYNN
|
||||
# $pre YYY $post <> \u01C0 ; # LATIN LETTER DENTAL CLICK
|
||||
# $pre YYY $post <> \u01C1 ; # LATIN LETTER LATERAL CLICK
|
||||
# $pre YYY $post <> \u01C2 ; # LATIN LETTER ALVEOLAR CLICK
|
||||
# $pre YYY $post <> \u01C3 ; # LATIN LETTER RETROFLEX CLICK
|
||||
# $pre YYY $post <> \u01C4 ; # LATIN CAPITAL LETTER DZ WITH CARON
|
||||
# $pre YYY $post <> \u01C5 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||
# $pre YYY $post <> \u01C6 ; # LATIN SMALL LETTER DZ WITH CARON
|
||||
# $pre YYY $post <> \u01C7 ; # LATIN CAPITAL LETTER LJ
|
||||
# $pre YYY $post <> \u01C8 ; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||
# $pre YYY $post <> \u01C9 ; # LATIN SMALL LETTER LJ
|
||||
# $pre YYY $post <> \u01CA ; # LATIN CAPITAL LETTER NJ
|
||||
# $pre YYY $post <> \u01CB ; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
# $pre YYY $post <> \u01CC ; # LATIN SMALL LETTER NJ
|
||||
# $pre YYY $post <> \u01DD ; # LATIN SMALL LETTER TURNED E
|
||||
# $pre YYY $post <> \u01E4 ; # LATIN CAPITAL LETTER G WITH STROKE
|
||||
# $pre YYY $post <> \u01E5 ; # LATIN SMALL LETTER G WITH STROKE
|
||||
# $pre YYY $post <> \u01F1 ; # LATIN CAPITAL LETTER DZ
|
||||
# $pre YYY $post <> \u01F2 ; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||
# $pre YYY $post <> \u01F3 ; # LATIN SMALL LETTER DZ
|
||||
# $pre YYY $post <> \u01F6 ; # LATIN CAPITAL LETTER HWAIR
|
||||
# $pre YYY $post <> \u01F7 ; # LATIN CAPITAL LETTER WYNN
|
||||
# $pre YYY $post <> \u021C ; # LATIN CAPITAL LETTER YOGH
|
||||
# $pre YYY $post <> \u021D ; # LATIN SMALL LETTER YOGH
|
||||
# $pre YYY $post <> \u0222 ; # LATIN CAPITAL LETTER OU
|
||||
# $pre YYY $post <> \u0223 ; # LATIN SMALL LETTER OU
|
||||
# $pre YYY $post <> \u0224 ; # LATIN CAPITAL LETTER Z WITH HOOK
|
||||
# $pre YYY $post <> \u0225 ; # LATIN SMALL LETTER Z WITH HOOK
|
||||
# $pre YYY $post <> \u0250 ; # LATIN SMALL LETTER TURNED A
|
||||
# $pre YYY $post <> \u0251 ; # LATIN SMALL LETTER ALPHA
|
||||
# $pre YYY $post <> \u0252 ; # LATIN SMALL LETTER TURNED ALPHA
|
||||
# $pre YYY $post <> \u0253 ; # LATIN SMALL LETTER B WITH HOOK
|
||||
# $pre YYY $post <> \u0255 ; # LATIN SMALL LETTER C WITH CURL
|
||||
# $pre YYY $post <> \u0256 ; # LATIN SMALL LETTER D WITH TAIL
|
||||
# $pre YYY $post <> \u0257 ; # LATIN SMALL LETTER D WITH HOOK
|
||||
# $pre YYY $post <> \u0258 ; # LATIN SMALL LETTER REVERSED E
|
||||
# $pre YYY $post <> \u025A ; # LATIN SMALL LETTER SCHWA WITH HOOK
|
||||
# $pre YYY $post <> \u025C ; # LATIN SMALL LETTER REVERSED OPEN E
|
||||
# $pre YYY $post <> \u025D ; # LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
|
||||
# $pre YYY $post <> \u025E ; # LATIN SMALL LETTER CLOSED REVERSED OPEN E
|
||||
# $pre YYY $post <> \u025F ; # LATIN SMALL LETTER DOTLESS J WITH STROKE
|
||||
# $pre YYY $post <> \u0260 ; # LATIN SMALL LETTER G WITH HOOK
|
||||
# $pre YYY $post <> \u0261 ; # LATIN SMALL LETTER SCRIPT G
|
||||
# $pre YYY $post <> \u0262 ; # LATIN LETTER SMALL CAPITAL G
|
||||
# $pre YYY $post <> \u0263 ; # LATIN SMALL LETTER GAMMA
|
||||
# $pre YYY $post <> \u0264 ; # LATIN SMALL LETTER RAMS HORN
|
||||
# $pre YYY $post <> \u0265 ; # LATIN SMALL LETTER TURNED H
|
||||
# $pre YYY $post <> \u0266 ; # LATIN SMALL LETTER H WITH HOOK
|
||||
# $pre YYY $post <> \u0267 ; # LATIN SMALL LETTER HENG WITH HOOK
|
||||
# $pre YYY $post <> \u0268 ; # LATIN SMALL LETTER I WITH STROKE
|
||||
# $pre YYY $post <> \u0269 ; # LATIN SMALL LETTER IOTA
|
||||
# $pre YYY $post <> \u026B ; # LATIN SMALL LETTER L WITH MIDDLE TILDE
|
||||
# $pre YYY $post <> \u026C ; # LATIN SMALL LETTER L WITH BELT
|
||||
# $pre YYY $post <> \u026D ; # LATIN SMALL LETTER L WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u026E ; # LATIN SMALL LETTER LEZH
|
||||
# $pre YYY $post <> \u026F ; # LATIN SMALL LETTER TURNED M
|
||||
# $pre YYY $post <> \u0270 ; # LATIN SMALL LETTER TURNED M WITH LONG LEG
|
||||
# $pre YYY $post <> \u0271 ; # LATIN SMALL LETTER M WITH HOOK
|
||||
# $pre YYY $post <> \u0272 ; # LATIN SMALL LETTER N WITH LEFT HOOK
|
||||
# $pre YYY $post <> \u0273 ; # LATIN SMALL LETTER N WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u0274 ; # LATIN LETTER SMALL CAPITAL N
|
||||
# $pre YYY $post <> \u0275 ; # LATIN SMALL LETTER BARRED O
|
||||
# $pre YYY $post <> \u0276 ; # LATIN LETTER SMALL CAPITAL OE
|
||||
# $pre YYY $post <> \u0277 ; # LATIN SMALL LETTER CLOSED OMEGA
|
||||
# $pre YYY $post <> \u0278 ; # LATIN SMALL LETTER PHI
|
||||
# $pre YYY $post <> \u0279 ; # LATIN SMALL LETTER TURNED R
|
||||
# $pre YYY $post <> \u027A ; # LATIN SMALL LETTER TURNED R WITH LONG LEG
|
||||
# $pre YYY $post <> \u027B ; # LATIN SMALL LETTER TURNED R WITH HOOK
|
||||
# $pre YYY $post <> \u027C ; # LATIN SMALL LETTER R WITH LONG LEG
|
||||
# $pre YYY $post <> \u027D ; # LATIN SMALL LETTER R WITH TAIL
|
||||
# $pre YYY $post <> \u027E ; # LATIN SMALL LETTER R WITH FISHHOOK
|
||||
# $pre YYY $post <> \u027F ; # LATIN SMALL LETTER REVERSED R WITH FISHHOOK
|
||||
# $pre YYY $post <> \u0280 ; # LATIN LETTER SMALL CAPITAL R
|
||||
# $pre YYY $post <> \u0281 ; # LATIN LETTER SMALL CAPITAL INVERTED R
|
||||
# $pre YYY $post <> \u0282 ; # LATIN SMALL LETTER S WITH HOOK
|
||||
# $pre YYY $post <> \u0284 ; # LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
|
||||
# $pre YYY $post <> \u0285 ; # LATIN SMALL LETTER SQUAT REVERSED ESH
|
||||
# $pre YYY $post <> \u0286 ; # LATIN SMALL LETTER ESH WITH CURL
|
||||
# $pre YYY $post <> \u0287 ; # LATIN SMALL LETTER TURNED T
|
||||
# $pre YYY $post <> \u0288 ; # LATIN SMALL LETTER T WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u0289 ; # LATIN SMALL LETTER U BAR
|
||||
# $pre YYY $post <> \u028B ; # LATIN SMALL LETTER V WITH HOOK
|
||||
# $pre YYY $post <> \u028D ; # LATIN SMALL LETTER TURNED W
|
||||
# $pre YYY $post <> \u028E ; # LATIN SMALL LETTER TURNED Y
|
||||
# $pre YYY $post <> \u028F ; # LATIN LETTER SMALL CAPITAL Y
|
||||
# $pre YYY $post <> \u0290 ; # LATIN SMALL LETTER Z WITH RETROFLEX HOOK
|
||||
# $pre YYY $post <> \u0291 ; # LATIN SMALL LETTER Z WITH CURL
|
||||
# $pre YYY $post <> \u0293 ; # LATIN SMALL LETTER EZH WITH CURL
|
||||
# $pre YYY $post <> \u0294 ; # LATIN LETTER GLOTTAL STOP
|
||||
# $pre YYY $post <> \u0295 ; # LATIN LETTER PHARYNGEAL VOICED FRICATIVE
|
||||
# $pre YYY $post <> \u0296 ; # LATIN LETTER INVERTED GLOTTAL STOP
|
||||
# $pre YYY $post <> \u0297 ; # LATIN LETTER STRETCHED C
|
||||
# $pre YYY $post <> \u0298 ; # LATIN LETTER BILABIAL CLICK
|
||||
# $pre YYY $post <> \u0299 ; # LATIN LETTER SMALL CAPITAL B
|
||||
# $pre YYY $post <> \u029A ; # LATIN SMALL LETTER CLOSED OPEN E
|
||||
# $pre YYY $post <> \u029B ; # LATIN LETTER SMALL CAPITAL G WITH HOOK
|
||||
# $pre YYY $post <> \u029C ; # LATIN LETTER SMALL CAPITAL H
|
||||
# $pre YYY $post <> \u029D ; # LATIN SMALL LETTER J WITH CROSSED-TAIL
|
||||
# $pre YYY $post <> \u029E ; # LATIN SMALL LETTER TURNED K
|
||||
# $pre YYY $post <> \u029F ; # LATIN LETTER SMALL CAPITAL L
|
||||
# $pre YYY $post <> \u02A0 ; # LATIN SMALL LETTER Q WITH HOOK
|
||||
# $pre YYY $post <> \u02A1 ; # LATIN LETTER GLOTTAL STOP WITH STROKE
|
||||
# $pre YYY $post <> \u02A2 ; # LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
|
||||
# $pre YYY $post <> \u02A3 ; # LATIN SMALL LETTER DZ DIGRAPH
|
||||
# $pre YYY $post <> \u02A4 ; # LATIN SMALL LETTER DEZH DIGRAPH
|
||||
# $pre YYY $post <> \u02A5 ; # LATIN SMALL LETTER DZ DIGRAPH WITH CURL
|
||||
# $pre YYY $post <> \u02A6 ; # LATIN SMALL LETTER TS DIGRAPH
|
||||
# $pre YYY $post <> \u02A7 ; # LATIN SMALL LETTER TESH DIGRAPH
|
||||
# $pre YYY $post <> \u02A8 ; # LATIN SMALL LETTER TC DIGRAPH WITH CURL
|
||||
# $pre YYY $post <> \u02A9 ; # LATIN SMALL LETTER FENG DIGRAPH
|
||||
# $pre YYY $post <> \u02AA ; # LATIN SMALL LETTER LS DIGRAPH
|
||||
# $pre YYY $post <> \u02AB ; # LATIN SMALL LETTER LZ DIGRAPH
|
||||
# $pre YYY $post <> \u02AC ; # LATIN LETTER BILABIAL PERCUSSIVE
|
||||
# $pre YYY $post <> \u02AD ; # LATIN LETTER BIDENTAL PERCUSSIVE
|
||||
# $pre YYY $post <> \u02B0 ; # MODIFIER LETTER SMALL H
|
||||
# $pre YYY $post <> \u02B1 ; # MODIFIER LETTER SMALL H WITH HOOK
|
||||
# $pre YYY $post <> \u02B2 ; # MODIFIER LETTER SMALL J
|
||||
# $pre YYY $post <> \u02B3 ; # MODIFIER LETTER SMALL R
|
||||
# $pre YYY $post <> \u02B4 ; # MODIFIER LETTER SMALL TURNED R
|
||||
# $pre YYY $post <> \u02B5 ; # MODIFIER LETTER SMALL TURNED R WITH HOOK
|
||||
# $pre YYY $post <> \u02B6 ; # MODIFIER LETTER SMALL CAPITAL INVERTED R
|
||||
# $pre YYY $post <> \u02B7 ; # MODIFIER LETTER SMALL W
|
||||
# $pre YYY $post <> \u02B8 ; # MODIFIER LETTER SMALL Y
|
||||
# $pre YYY $post <> \u02E0 ; # MODIFIER LETTER SMALL GAMMA
|
||||
# $pre YYY $post <> \u02E1 ; # MODIFIER LETTER SMALL L
|
||||
# $pre YYY $post <> \u02E2 ; # MODIFIER LETTER SMALL S
|
||||
# $pre YYY $post <> \u02E3 ; # MODIFIER LETTER SMALL X
|
||||
# $pre YYY $post <> \u02E4 ; # MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
|
||||
# $pre YYY $post <> \u1E9A ; # LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
# $pre YYY $post <> \u207F ; # SUPERSCRIPT LATIN SMALL LETTER N
|
||||
|
||||
:: NFC (NFD) ;
|
|
@ -1,34 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Test case
|
||||
# "The" "(quick)" ('brown') `fox' ` jumped -- "over?"
|
||||
|
||||
# Variables
|
||||
|
||||
$single = \' ;
|
||||
$space = ' ' ;
|
||||
$double = \" ;
|
||||
$back = \` ;
|
||||
$tab = '\u0008' ;
|
||||
$makeRight = [[:Z:][:Ps:][:Pi:]$] ;
|
||||
|
||||
# fix UNIX quotes
|
||||
|
||||
$back $back > “ ;
|
||||
$back > ‘ ;
|
||||
|
||||
# fix typewriter quotes, by context
|
||||
|
||||
$makeRight {$double} <> “ ;
|
||||
$double <> ” ;
|
||||
|
||||
$makeRight {$single} <> ‘ ;
|
||||
$single <> ’;
|
||||
|
||||
# fix multiple spaces and hyphens
|
||||
|
||||
$space {$space} > ;
|
||||
'--' <> — ;
|
|
@ -1,146 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Generally follows UNGEGN <http://www.eki.ee/wgrs/rom1_ar.pdf>
|
||||
# Occasionally deviates in the direction of ISO 233 <http://homepage.mac.com/sirbinks/pdf/Arabic.pdf>
|
||||
# a) where required for disambiguation.
|
||||
# b) with underdot instead of cedilla for letter like SAD, since
|
||||
# those are explicitly in Unicode for transliteration.
|
||||
# c) with extra non-Arabic-language letters, like PEH
|
||||
|
||||
# Does *not* do assimilation of "al", nor hyphenation.
|
||||
# While it could be done, we need to determine whether a prefix "al" could
|
||||
# occur other than as the definite article (since no space is used).
|
||||
|
||||
:: [[:Arabic:] [ⁿ\u060C\u061B\u061F\u0640\u064B-\u0655\u0660-\u066C\u06F0-\u06F9\uFDFC]] ;
|
||||
:: NFKD (NFC);
|
||||
$disambig = ̱ ;
|
||||
$disambig2 = ̰ ;
|
||||
$under = ̣ ;
|
||||
|
||||
$notAbove = [[:^ccc=0:]&[:^ccc=230:]];
|
||||
|
||||
# non-letters
|
||||
|
||||
٫ <> '.' $disambig ; # ARABIC DECIMAL SEPARATOR
|
||||
٬ <> ',' $disambig ; # ARABIC THOUSANDS SEPARATOR
|
||||
# ٭ <> ; # ARABIC FIVE POINTED STAR // no need to transliterate
|
||||
|
||||
، <> ',' ; # ARABIC COMMA
|
||||
؛ <> ';' ; # ARABIC SEMICOLON
|
||||
؟ <> '?' ; # ARABIC QUESTION MARK
|
||||
٪ <> '%' ; # ARABIC PERCENT SIGN
|
||||
|
||||
۰ <> 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
|
||||
۱ <> 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
|
||||
۲ <> 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
|
||||
۳ <> 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
|
||||
۴ <> 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
|
||||
۵ <> 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
|
||||
۶ <> 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
|
||||
۷ <> 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
|
||||
۸ <> 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
|
||||
۹ <> 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
|
||||
|
||||
٠ <> 0 ; # ARABIC-INDIC DIGIT ZERO
|
||||
١ <> 1 ; # ARABIC-INDIC DIGIT ONE
|
||||
٢ <> 2 ; # ARABIC-INDIC DIGIT TWO
|
||||
٣ <> 3 ; # ARABIC-INDIC DIGIT THREE
|
||||
٤ <> 4 ; # ARABIC-INDIC DIGIT FOUR
|
||||
٥ <> 5 ; # ARABIC-INDIC DIGIT FIVE
|
||||
٦ <> 6 ; # ARABIC-INDIC DIGIT SIX
|
||||
٧ <> 7 ; # ARABIC-INDIC DIGIT SEVEN
|
||||
٨ <> 8 ; # ARABIC-INDIC DIGIT EIGHT
|
||||
٩ <> 9 ; # ARABIC-INDIC DIGIT NINE
|
||||
|
||||
# letters
|
||||
|
||||
# long vowels
|
||||
َا<> ā ; # ARABIC FATHA, ARABIC LETTER ALEF
|
||||
ُو <> ū ; # ARABIC DAMMA, ARABIC LETTER WAW
|
||||
ِي <> ī ; # ARABIC KASRA, ARABIC LETTER YEH
|
||||
|
||||
# longer items moved here to prevent masking
|
||||
ث <> t h $disambig ; # ARABIC LETTER THEH
|
||||
ذ <> d h $disambig ; # ARABIC LETTER THAL
|
||||
ش <> s h $disambig ; # ARABIC LETTER SHEEN
|
||||
ص <> s $under ; # ARABIC LETTER SAD
|
||||
ض <> d $under ; # ARABIC LETTER DAD
|
||||
ط <> t $under ; # ARABIC LETTER TAH
|
||||
ظ <> z $under ; # ARABIC LETTER ZAH
|
||||
غ <> g h $disambig ; # ARABIC LETTER GHAIN
|
||||
|
||||
# WARNING: special case
|
||||
# <t, umlaut, half-ring below> will be canonically ordered as <t, half-ring below, umlaut>
|
||||
# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
|
||||
# ةٕ < ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
|
||||
|
||||
ة <> t \u0308 ; # ARABIC LETTER TEH MARBUTA
|
||||
ة | $1 < t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
|
||||
|
||||
# non-Arabic language
|
||||
ژ <> z h $disambig ; # ARABIC LETTER JEH
|
||||
ڭ <> n $disambig g ; # ARABIC LETTER NG
|
||||
ۋ <> v $disambig ; # ARABIC LETTER VE
|
||||
ی <> y $disambig2 ; # ARABIC LETTER FARSI YEH
|
||||
|
||||
# Arabic language
|
||||
|
||||
ء <> ʾ ; # ARABIC LETTER HAMZA
|
||||
ا <> a $under; # ARABIC LETTER ALEF
|
||||
ب <> b ; # ARABIC LETTER BEH
|
||||
ت <> t ; # ARABIC LETTER TEH
|
||||
ج <> j ; # ARABIC LETTER JEEM
|
||||
ح <> h $under ; # ARABIC LETTER HAH
|
||||
خ <> k h $disambig ; # ARABIC LETTER KHAH
|
||||
د <> d ; # ARABIC LETTER DAL
|
||||
ر <> r ; # ARABIC LETTER REH
|
||||
ز <> z ; # ARABIC LETTER ZAIN
|
||||
س <> s ; # ARABIC LETTER SEEN
|
||||
ع <> ʿ ; # ARABIC LETTER AIN
|
||||
ـ > ; # ARABIC TATWEEL
|
||||
ف <> f ; # ARABIC LETTER FEH
|
||||
ق <> q ; # ARABIC LETTER QAF
|
||||
ك <> k ; # ARABIC LETTER KAF
|
||||
ل <> l ; # ARABIC LETTER LAM
|
||||
م <> m ; # ARABIC LETTER MEEM
|
||||
ن <> n ; # ARABIC LETTER NOON
|
||||
ه <> h ; # ARABIC LETTER HEH
|
||||
و <> w ; # ARABIC LETTER WAW
|
||||
ى <> y $disambig ; # ARABIC LETTER ALEF MAKSURA
|
||||
ي <> y ; # ARABIC LETTER YEH
|
||||
ً <> aⁿ ; # ARABIC FATHATAN
|
||||
ٌ <> uⁿ ; # ARABIC DAMMATAN
|
||||
ٍ <> iⁿ ; # ARABIC KASRATAN
|
||||
َ <> a ; # ARABIC FATHA
|
||||
ُ <> u ; # ARABIC DAMMA
|
||||
ِ <> i ; # ARABIC KASRA
|
||||
ّ <> ̃ ; # ARABIC SHADDA
|
||||
ْ <> ̊ ; # ARABIC SUKUN
|
||||
|
||||
# special combining marks
|
||||
ٓ <> ̂ ; # ARABIC MADDAH ABOVE
|
||||
ٔ <> ̉ ; # ARABIC HAMZA ABOVE
|
||||
ٕ <> ̹ ; # ARABIC HAMZA BELOW
|
||||
|
||||
# Some non-Arabic language (not in UNGEGN)
|
||||
پ <> p ; # ARABIC LETTER PEH
|
||||
چ <> c h $disambig ; # ARABIC LETTER TCHEH
|
||||
ڤ <> v ; # ARABIC LETTER VEH
|
||||
# ڥ <> v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
|
||||
# ڢ <> f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
|
||||
گ <> g ; # ARABIC LETTER GAF
|
||||
|
||||
# fallbacks
|
||||
| s < c } [eiy];
|
||||
| k < c ;
|
||||
| i < e ;
|
||||
| u < o ;
|
||||
| ks < x ;
|
||||
| n < ⁿ;
|
||||
|
||||
:: (lower) ;
|
||||
::NFC (NFD);
|
||||
:: ( [[:Latin:] [%,.0-9;?\u02BE-\u02BF\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339\u037E]] );
|
|
@ -1,103 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Bengali-InterIndic
|
||||
|
||||
\u09C7\u09BE>\uE04B; # VOWEL SIGN O
|
||||
\u09C7\u09D7>\uE04C; # VOWEL SIGN AU
|
||||
\u0981>\uE001; # SIGN CANDRABINDU
|
||||
\u0982>\uE002; # SIGN ANUSVARA
|
||||
\u0983>\uE003; # SIGN VISARGA
|
||||
\u0985>\uE005; # LETTER A
|
||||
\u0986>\uE006; # LETTER AA
|
||||
\u0987>\uE007; # LETTER I
|
||||
\u0988>\uE008; # LETTER II
|
||||
\u0989>\uE009; # LETTER U
|
||||
\u098A>\uE00A; # LETTER UU
|
||||
\u098B>\uE00B; # LETTER VOCALIC R
|
||||
\u098C>\uE00C; # LETTER VOCALIC L
|
||||
\u098F>\uE00F; # LETTER E
|
||||
\u0990>\uE010; # LETTER AI
|
||||
\u0993>\uE013; # LETTER O
|
||||
\u0994>\uE014; # LETTER AU
|
||||
\u0995>\uE015; # LETTER KA
|
||||
\u0996>\uE016; # LETTER KHA
|
||||
\u0997>\uE017; # LETTER GA
|
||||
\u0998>\uE018; # LETTER GHA
|
||||
\u0999>\uE019; # LETTER NGA
|
||||
\u099A>\uE01A; # LETTER CA
|
||||
\u099B>\uE01B; # LETTER CHA
|
||||
\u099C>\uE01C; # LETTER JA
|
||||
\u099D>\uE01D; # LETTER JHA
|
||||
\u099E>\uE01E; # LETTER NYA
|
||||
\u099F>\uE01F; # LETTER TTA
|
||||
\u09A0>\uE020; # LETTER TTHA
|
||||
\u09A1>\uE021; # LETTER DDA
|
||||
\u09A2>\uE022; # LETTER DDHA
|
||||
\u09A3>\uE023; # LETTER NNA
|
||||
\u09A4>\uE024; # LETTER TA
|
||||
\u09A5>\uE025; # LETTER THA
|
||||
\u09A6>\uE026; # LETTER DA
|
||||
\u09A7>\uE027; # LETTER DHA
|
||||
\u09A8>\uE028; # LETTER NA
|
||||
\u09AA>\uE02A; # LETTER PA
|
||||
\u09AB>\uE02B; # LETTER PHA
|
||||
\u09AC>\uE02C; # LETTER BA
|
||||
\u09AD>\uE02D; # LETTER BHA
|
||||
\u09AE>\uE02E; # LETTER MA
|
||||
\u09AF>\uE02F; # LETTER YA
|
||||
\u09B0>\uE030; # LETTER RA
|
||||
\u09B2>\uE032; # LETTER LA
|
||||
\u09B6>\uE036; # LETTER SHA
|
||||
\u09B7>\uE037; # LETTER SSA
|
||||
\u09B8>\uE038; # LETTER SA
|
||||
\u09B9>\uE039; # LETTER HA
|
||||
\u09BC>\uE03C; # SIGN NUKTA
|
||||
\u09BD>\uE03D; # SIGN AVAGRAHA
|
||||
\u09BE>\uE03E; # VOWEL SIGN AA
|
||||
\u09BF>\uE03F; # VOWEL SIGN I
|
||||
\u09C0>\uE040; # VOWEL SIGN II
|
||||
\u09C1>\uE041; # VOWEL SIGN U
|
||||
\u09C2>\uE042; # VOWEL SIGN UU
|
||||
\u09C3>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u09C4>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u09C7>\uE047; # VOWEL SIGN E
|
||||
\u09C8>\uE048; # VOWEL SIGN AI
|
||||
\u09CB>\uE04B;
|
||||
\u09CC>\uE04C;
|
||||
#
|
||||
\u09CD>\uE04D; # SIGN VIRAMA
|
||||
\u09D7>\uE057; # AU LENGTH MARK
|
||||
#
|
||||
\u09E0>\uE060; # LETTER VOCALIC RR
|
||||
\u09E1>\uE061; # LETTER VOCALIC LL
|
||||
\u09E2>\uE062; # VOWEL SIGN VOCALIC L
|
||||
\u09E3>\uE063; # VOWEL SIGN VOCALIC LL
|
||||
\u09E6>\uE066; # DIGIT ZERO
|
||||
\u09E7>\uE067; # DIGIT ONE
|
||||
\u09E8>\uE068; # DIGIT TWO
|
||||
\u09E9>\uE069; # DIGIT THREE
|
||||
\u09EA>\uE06A; # DIGIT FOUR
|
||||
\u09EB>\uE06B; # DIGIT FIVE
|
||||
\u09EC>\uE06C; # DIGIT SIX
|
||||
\u09ED>\uE06D; # DIGIT SEVEN
|
||||
\u09EE>\uE06E; # DIGIT EIGHT
|
||||
\u09EF>\uE06F; # DIGIT NINE
|
||||
\u09F0>\ue071; # Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
|
||||
\u09F1>\ue072; # Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
|
||||
\u09F2>\ue073; # Bengali-InterIndic: RUPEE MARK
|
||||
\u09F3>\ue074; # Bengali-InterIndic: RUPEE SIGN
|
||||
\u09F4>\ue075; # Bengali-InterIndic: CURRENCY NUMERATOR ONE
|
||||
\u09F5>\ue076; # Bengali-InterIndic: CURRENCY NUMERATOR TWO
|
||||
\u09F6>\ue077; # Bengali-InterIndic: CURRENCY NUMERATOR THREE
|
||||
\u09F7>\ue078; # Bengali-InterIndic: CURRENCY NUMERATOR FOUR
|
||||
\u09F8>\ue079; # Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\u09F9>\ue07A; # Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
|
||||
\u09FA>\ue07B; # ISSHAR
|
||||
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,306 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# TODO: add remaining characters
|
||||
# Should add variants for Russian-English, Russian-German
|
||||
# Those can use this as a base, and then remap cases
|
||||
# like a $hat to ya or ja.
|
||||
|
||||
# :: [\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ;
|
||||
### WARNING, \u0308 must be added to the generated filters, in both directions ###
|
||||
# MINIMAL FILTER
|
||||
:: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;
|
||||
:: NFD (NFC) ;
|
||||
|
||||
$modprime = \u02B9;
|
||||
$modprime2 = \u02BA;
|
||||
|
||||
$grave = \u0300;
|
||||
$acute = \u0301;
|
||||
$hat = \u0302;
|
||||
$breve = \u0306 ;
|
||||
$dot = \u0307 ;
|
||||
$caron = \u030C ;
|
||||
$comma = \u0326 ;
|
||||
$under = \u0331 ;
|
||||
|
||||
# move up so not masked
|
||||
|
||||
я <> a $hat ; # CYRILLIC SMALL LETTER YA
|
||||
Я <> A $hat ; # CYRILLIC CAPITAL LETTER YA
|
||||
|
||||
ч <> c $caron ; # CYRILLIC SMALL LETTER CHE
|
||||
Ч <> C $caron; # CYRILLIC CAPITAL LETTER CHE
|
||||
# ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER
|
||||
# Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
|
||||
# ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE
|
||||
# Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||
# ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
|
||||
# Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
|
||||
|
||||
э <> e $acute; # CYRILLIC SMALL LETTER E
|
||||
Э <> E $acute; # CYRILLIC CAPITAL LETTER E
|
||||
є <> e $hat; # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
Є <> E $hat; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
|
||||
ш <> s $caron ; # CYRILLIC SMALL LETTER SHA
|
||||
Ш <> S $caron ; # CYRILLIC CAPITAL LETTER SHA
|
||||
щ <> s $hat ; # CYRILLIC SMALL LETTER SHCHA
|
||||
Щ <> S $hat; # CYRILLIC CAPITAL LETTER SHCHA
|
||||
|
||||
ѕ <> z $hat ; # CYRILLIC SMALL LETTER DZE
|
||||
Ѕ <> Z $hat; # CYRILLIC CAPITAL LETTER DZE
|
||||
# ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE
|
||||
# Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||
|
||||
ю <> u $hat ; # CYRILLIC SMALL LETTER YU
|
||||
Ю <> U $hat ; # CYRILLIC CAPITAL LETTER YU
|
||||
|
||||
і <> i $acute; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
І <> I $acute; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
ј <> j $caron; # CYRILLIC SMALL LETTER JE
|
||||
Ј <> J $caron; # CYRILLIC CAPITAL LETTER JE
|
||||
|
||||
љ <> l $hat ; # CYRILLIC SMALL LETTER LJE
|
||||
Љ <> L $hat ; # CYRILLIC CAPITAL LETTER LJE
|
||||
њ <> n $hat ; # CYRILLIC SMALL LETTER NJE
|
||||
Њ <> N $hat ; # CYRILLIC CAPITAL LETTER NJE
|
||||
|
||||
ћ <> c $acute ; # CYRILLIC SMALL LETTER TSHE
|
||||
Ћ <> C $acute ; # CYRILLIC CAPITAL LETTER TSHE
|
||||
|
||||
џ <> d $hat ; # CYRILLIC SMALL LETTER DZHE
|
||||
Џ <> D $hat ; # CYRILLIC CAPITAL LETTER DZHE
|
||||
|
||||
# Normal order
|
||||
|
||||
а <> a ; # CYRILLIC SMALL LETTER A
|
||||
А <> A ; # CYRILLIC CAPITAL LETTER A
|
||||
ә <> \u0259 ; # CYRILLIC SMALL LETTER SCHWA
|
||||
Ә <> \u018F ; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
ӕ <> \u00E6 ; # CYRILLIC SMALL LIGATURE A IE
|
||||
Ӕ <> \u00C6 ; # CYRILLIC CAPITAL LIGATURE A IE
|
||||
б <> b ; # CYRILLIC SMALL LETTER BE
|
||||
Б <> B ; # CYRILLIC CAPITAL LETTER BE
|
||||
в <> v ; # CYRILLIC SMALL LETTER VE
|
||||
В <> V ; # CYRILLIC CAPITAL LETTER VE
|
||||
|
||||
ґ <> g $grave ; # CYRILLIC SMALL LETTER GHE WITH UPTURN
|
||||
Ґ <> G $grave ; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
|
||||
ғ <> g $dot ; # CYRILLIC SMALL LETTER GHE WITH STROKE
|
||||
Ғ <> G $dot; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
|
||||
ҕ <> g $breve; # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
|
||||
Ҕ <> G $breve; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
|
||||
г <> g ; # CYRILLIC SMALL LETTER GHE
|
||||
Г <> G ; # CYRILLIC CAPITAL LETTER GHE
|
||||
|
||||
д <> d; # CYRILLIC SMALL LETTER DE
|
||||
Д <> D; # CYRILLIC CAPITAL LETTER DE
|
||||
ђ <> đ ; # CYRILLIC SMALL LETTER DJE
|
||||
Ђ <> Đ ; # CYRILLIC CAPITAL LETTER DJE
|
||||
ҙ <> z $comma ; # CYRILLIC SMALL LETTER ZE WITH DESCENDER
|
||||
Ҙ <> Z $comma ; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
|
||||
е <> e ; # CYRILLIC SMALL LETTER IE
|
||||
Е <> E; # CYRILLIC CAPITAL LETTER IE
|
||||
|
||||
ж <> z $caron; # CYRILLIC SMALL LETTER ZHE
|
||||
Ж <> Z $caron; # CYRILLIC CAPITAL LETTER ZHE
|
||||
|
||||
# җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER
|
||||
# Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
|
||||
|
||||
з <> z ; # CYRILLIC SMALL LETTER ZE
|
||||
З <> Z; # CYRILLIC CAPITAL LETTER ZE
|
||||
|
||||
й <> j ; # CYRILLIC SMALL LETTER I
|
||||
Й <> J ; # CYRILLIC CAPITAL LETTER I
|
||||
и <> i ; # CYRILLIC SMALL LETTER I
|
||||
И <> I ; # CYRILLIC CAPITAL LETTER I
|
||||
|
||||
к <> k ; # CYRILLIC SMALL LETTER KA
|
||||
К <> K; # CYRILLIC CAPITAL LETTER KA
|
||||
|
||||
# қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER
|
||||
# Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
|
||||
# ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK
|
||||
# Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||
# ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA
|
||||
# Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA
|
||||
# ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE
|
||||
# Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE
|
||||
# ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
|
||||
# Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
|
||||
л <> l ; # CYRILLIC SMALL LETTER EL
|
||||
Л <> L; # CYRILLIC CAPITAL LETTER EL
|
||||
|
||||
м <> m ; # CYRILLIC SMALL LETTER EM
|
||||
М <> M ; # CYRILLIC CAPITAL LETTER EM
|
||||
н <> n ; # CYRILLIC SMALL LETTER EN
|
||||
Н <> N; # CYRILLIC CAPITAL LETTER EN
|
||||
# ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER
|
||||
# Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
|
||||
# ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK
|
||||
# Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||
# ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE
|
||||
# Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE
|
||||
|
||||
о <> o ; # CYRILLIC SMALL LETTER O
|
||||
О <> O ; # CYRILLIC CAPITAL LETTER O
|
||||
# ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
# Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
п <> p ; # CYRILLIC SMALL LETTER PE
|
||||
П <> P ; # CYRILLIC CAPITAL LETTER PE
|
||||
# ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
|
||||
# Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
|
||||
# ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA
|
||||
# Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA
|
||||
р <> r ; # CYRILLIC SMALL LETTER ER
|
||||
Р <> R ; # CYRILLIC CAPITAL LETTER ER
|
||||
# ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK
|
||||
# Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK
|
||||
с <> s ; # CYRILLIC SMALL LETTER ES
|
||||
С <> S ; # CYRILLIC CAPITAL LETTER ES
|
||||
# ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER
|
||||
# Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
|
||||
т <> t ; # CYRILLIC SMALL LETTER TE
|
||||
Т <> T ; # CYRILLIC CAPITAL LETTER TE
|
||||
# ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER
|
||||
# Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
|
||||
|
||||
у <> u ; # CYRILLIC SMALL LETTER U
|
||||
У <> U ; # CYRILLIC CAPITAL LETTER U
|
||||
# ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U
|
||||
# Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U
|
||||
# ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
|
||||
# Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
|
||||
# ѹ <> XXX ; # CYRILLIC SMALL LETTER UK
|
||||
# Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK
|
||||
ф <> f ; # CYRILLIC SMALL LETTER EF
|
||||
Ф <> F ; # CYRILLIC CAPITAL LETTER EF
|
||||
х <> h ; # CYRILLIC SMALL LETTER HA
|
||||
Х <> H; # CYRILLIC CAPITAL LETTER HA
|
||||
# ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER
|
||||
# Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
|
||||
# һ <> XXX ; # CYRILLIC SMALL LETTER SHHA
|
||||
# Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA
|
||||
# ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA
|
||||
# Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA
|
||||
# ѿ <> XXX ; # CYRILLIC SMALL LETTER OT
|
||||
# Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT
|
||||
# ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO
|
||||
# Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||
# ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA
|
||||
# Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||
ц <> c ; # CYRILLIC SMALL LETTER TSE
|
||||
Ц <> C; # CYRILLIC CAPITAL LETTER TSE
|
||||
# ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE
|
||||
# Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE
|
||||
|
||||
# ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE
|
||||
# Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||
# ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
# Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||
|
||||
|
||||
Ъ <> $modprime2 $under ; # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
ъ <> $modprime2 ; # CYRILLIC SMALL LETTER HARD SIGN
|
||||
Ь <> $modprime $under ; # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
ь <> $modprime ; # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
|
||||
ы <> y ; # CYRILLIC SMALL LETTER YERU
|
||||
Ы <> Y ; # CYRILLIC CAPITAL LETTER YERU
|
||||
|
||||
# ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN
|
||||
# Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
|
||||
# ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT
|
||||
# Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT
|
||||
|
||||
# ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E
|
||||
# Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E
|
||||
# ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS
|
||||
# Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS
|
||||
# ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS
|
||||
# Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS
|
||||
# ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
|
||||
# Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
|
||||
# ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS
|
||||
# Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
|
||||
# ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI
|
||||
# Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI
|
||||
# ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI
|
||||
# Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI
|
||||
# ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA
|
||||
# Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA
|
||||
# ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
# Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
# ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA
|
||||
# Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
|
||||
# Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA
|
||||
### ӑ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
### Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
### ӓ <> XXX ; # CYRILLIC SMALL LETTER A
|
||||
### Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A
|
||||
### ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA
|
||||
### Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA
|
||||
### ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE
|
||||
### Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE
|
||||
### ѐ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
### Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
### ё <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
### Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
### ӗ <> XXX ; # CYRILLIC SMALL LETTER IE
|
||||
### Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE
|
||||
### ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
### Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
### ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE
|
||||
### Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE
|
||||
### ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE
|
||||
### Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE
|
||||
### ѝ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
### Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
### ӣ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
### Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
### ӥ <> XXX ; # CYRILLIC SMALL LETTER I
|
||||
### Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I
|
||||
### ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
### Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
### ӧ <> XXX ; # CYRILLIC SMALL LETTER O
|
||||
### Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O
|
||||
### ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O
|
||||
### Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O
|
||||
### ќ <> XXX ; # CYRILLIC SMALL LETTER KA
|
||||
### Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA
|
||||
### ӯ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ў <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ў <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ӱ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ӳ <> XXX ; # CYRILLIC SMALL LETTER U
|
||||
### Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U
|
||||
### ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE
|
||||
### Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE
|
||||
### ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU
|
||||
### Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU
|
||||
### ӭ <> XXX ; # CYRILLIC SMALL LETTER E
|
||||
### Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E
|
||||
### ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA
|
||||
### Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA
|
||||
|
||||
# Completeness
|
||||
$ignore = [[:Mark:]''] * ;
|
||||
| k < q ;
|
||||
| K < Q ;
|
||||
| u < w ;
|
||||
| U < W ;
|
||||
| KS < X } $ignore [:UppercaseLetter:] ;
|
||||
| KS < [:UppercaseLetter:] $ignore { X ;
|
||||
| Ks < X ;
|
||||
| ks < x ;
|
||||
|
||||
:: NFC (NFD) ;
|
||||
# note: a global filter is more efficient, but MUST include all source chars!!
|
||||
# :: ([\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]);
|
||||
# MINIMAL FILTER: Latin-Cyrillic
|
||||
:: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;
|
|
@ -1,117 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Devanagari-InterIndic
|
||||
# :: NFD;
|
||||
#Rules for Decomposed characters
|
||||
|
||||
|
||||
\u0901>\uE001; # SIGN CANDRABINDU
|
||||
\u0902>\uE002; # SIGN ANUSVARA
|
||||
\u0903>\uE003; # SIGN VISARGA
|
||||
\u0904>\uE004; # SIGN SHORT A
|
||||
\u0905>\uE005; # LETTER A
|
||||
\u0906>\uE006; # LETTER AA
|
||||
\u0907>\uE007; # LETTER I
|
||||
\u0908>\uE008; # LETTER II
|
||||
\u0909>\uE009; # LETTER U
|
||||
\u090A>\uE00A; # LETTER UU
|
||||
\u090B>\uE00B; # LETTER VOCALIC R
|
||||
\u090C>\uE00C; # LETTER VOCALIC L
|
||||
\u090D>\uE00D; # LETTER CANDRA E (For representing English sounds)
|
||||
\u090E>\uE00E; # UNMAPPED LETTER SHORT E(For Southern Scripts)
|
||||
\u090F>\uE00F; # LETTER E
|
||||
\u0910>\uE010; # LETTER AI
|
||||
\u0911>\uE011; # LETTER CANDRA O (For representing English sounds)
|
||||
\u0912>\uE012; # UNMAPPED LETTER SHORT O (For Southern Scripts)
|
||||
\u0913>\uE013; # LETTER O
|
||||
\u0914>\uE014; # LETTER AU
|
||||
\u0915>\uE015; # LETTER KA
|
||||
\u0916>\uE016; # LETTER KHA
|
||||
\u0917>\uE017; # LETTER GA
|
||||
\u0918>\uE018; # LETTER GHA
|
||||
\u0919>\uE019; # LETTER NGA
|
||||
\u091A>\uE01A; # LETTER CA
|
||||
\u091B>\uE01B; # LETTER CHA
|
||||
\u091C>\uE01C; # LETTER JA
|
||||
\u091D>\uE01D; # LETTER JHA
|
||||
\u091E>\uE01E; # LETTER NYA
|
||||
\u091F>\uE01F; # LETTER TTA
|
||||
\u0920>\uE020; # LETTER TTHA
|
||||
\u0921>\uE021; # LETTER DDA
|
||||
\u0922>\uE022; # LETTER DDHA
|
||||
\u0923>\uE023; # LETTER NNA
|
||||
\u0924>\uE024; # LETTER TA
|
||||
\u0925>\uE025; # LETTER THA
|
||||
\u0926>\uE026; # LETTER DA
|
||||
\u0927>\uE027; # LETTER DHA
|
||||
\u0928>\uE028; # LETTER NA
|
||||
\u0929>\uE029;
|
||||
\u092A>\uE02A; # LETTER PA
|
||||
\u092B>\uE02B; # LETTER PHA
|
||||
\u092C>\uE02C; # LETTER BA
|
||||
\u092D>\uE02D; # LETTER BHA
|
||||
\u092E>\uE02E; # LETTER MA
|
||||
\u092F>\uE02F; # LETTER YA
|
||||
\u0930>\uE030; # LETTER RA
|
||||
\u0931>\uE031;
|
||||
\u0932>\uE032; # LETTER LA
|
||||
\u0933>\uE033; # LETTER LLA
|
||||
\u0934>\uE034;
|
||||
|
||||
\u0935>\uE035; # LETTER VA
|
||||
\u0936>\uE036; # LETTER SHA
|
||||
\u0937>\uE037; # LETTER SSA
|
||||
\u0938>\uE038; # LETTER SA
|
||||
\u0939>\uE039; # LETTER HA
|
||||
\u093C>\uE03C; # SIGN NUKTA
|
||||
\u093D>\uE03D; # SIGN AVAGRAHA
|
||||
\u093E>\uE03E; # VOWEL SIGN AA
|
||||
\u093F>\uE03F; # VOWEL SIGN I
|
||||
\u0940>\uE040; # VOWEL SIGN II
|
||||
\u0941>\uE041; # VOWEL SIGN U
|
||||
\u0942>\uE042; # VOWEL SIGN UU
|
||||
\u0943>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0944>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u0945>\uE045; # VOWEL SIGN CANDRA E
|
||||
\u0946>\uE046; # UNMAPPED VOWEL SIGN SHORT E
|
||||
\u0947>\uE047; # VOWEL SIGN E
|
||||
\u0948>\uE048; # VOWEL SIGN AI
|
||||
\u0949>\uE049; # VOWEL SIGN CANDRA O
|
||||
\u094A>\uE04A; # UNMAPPED VOWEL SIGN SHORT O
|
||||
\u094B>\uE04B; # VOWEL SIGN O
|
||||
\u094C>\uE04C; # VOWEL SIGN AU
|
||||
\u094D>\uE04D; # SIGN VIRAMA
|
||||
\u0950>\uE050; # OM
|
||||
\u0951>\uE051; # UNMAPPED STRESS SIGN UDATTA
|
||||
\u0952>\uE052; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
\u0953>\uE053; # UNMAPPED GRAVE ACCENT
|
||||
\u0954>\uE054; # UNMAPPED ACUTE ACCENT
|
||||
\u0958>\uE058;
|
||||
\u0959>\uE059;
|
||||
\u095A>\uE05a;
|
||||
\u095B>\uE05b;
|
||||
\u095C>\uE05c;
|
||||
\u095D>\uE05d;
|
||||
\u095E>\uE05e;
|
||||
\u095F>\uE05f;
|
||||
\u0960>\uE060; # LETTER VOCALIC RR
|
||||
\u0961>\uE061; # LETTER VOCALIC LL
|
||||
\u0962>\uE062; # VOWEL SIGN VOCALIC L
|
||||
\u0963>\uE063; # VOWEL SIGN VOCALIC LL
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
\u0966>\uE066; # DIGIT ZERO
|
||||
\u0967>\uE067; # DIGIT ONE
|
||||
\u0968>\uE068; # DIGIT TWO
|
||||
\u0969>\uE069; # DIGIT THREE
|
||||
\u096A>\uE06A; # DIGIT FOUR
|
||||
\u096B>\uE06B; # DIGIT FIVE
|
||||
\u096C>\uE06C; # DIGIT SIX
|
||||
\u096D>\uE06D; # DIGIT SEVEN
|
||||
\u096E>\uE06E; # DIGIT EIGHT
|
||||
\u096F>\uE06F; # DIGIT NINE
|
||||
\u0970>\uE070; # Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
# :: NFC (NFD) ;
|
|
@ -1,271 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Fullwidth-Halfwidth
|
||||
|
||||
# Mechanically generated from Unicode Character Database
|
||||
# IDEOGRAPHIC SPACE then added, and
|
||||
# FULLWIDTH MACRON changed to map to MACRON, not SPACE + COMBINING MACRON
|
||||
|
||||
# multicharacter
|
||||
|
||||
ガ<>ガ; # to KATAKANA LETTER GA
|
||||
ギ<>ギ; # to KATAKANA LETTER GI
|
||||
グ<>グ; # to KATAKANA LETTER GU
|
||||
ゲ<>ゲ; # to KATAKANA LETTER GE
|
||||
ゴ<>ゴ; # to KATAKANA LETTER GO
|
||||
ザ<>ザ; # to KATAKANA LETTER ZA
|
||||
ジ<>ジ; # to KATAKANA LETTER ZI
|
||||
ズ<>ズ; # to KATAKANA LETTER ZU
|
||||
ゼ<>ゼ; # to KATAKANA LETTER ZE
|
||||
ゾ<>ゾ; # to KATAKANA LETTER ZO
|
||||
ダ<>ダ; # to KATAKANA LETTER DA
|
||||
ヂ<>ヂ; # to KATAKANA LETTER DI
|
||||
ヅ<>ヅ; # to KATAKANA LETTER DU
|
||||
デ<>デ; # to KATAKANA LETTER DE
|
||||
ド<>ド; # to KATAKANA LETTER DO
|
||||
バ<>バ; # to KATAKANA LETTER BA
|
||||
パ<>パ; # to KATAKANA LETTER PA
|
||||
ビ<>ビ; # to KATAKANA LETTER BI
|
||||
ピ<>ピ; # to KATAKANA LETTER PI
|
||||
ブ<>ブ; # to KATAKANA LETTER BU
|
||||
プ<>プ; # to KATAKANA LETTER PU
|
||||
ベ<>ベ; # to KATAKANA LETTER BE
|
||||
ペ<>ペ; # to KATAKANA LETTER PE
|
||||
ボ<>ボ; # to KATAKANA LETTER BO
|
||||
ポ<>ポ; # to KATAKANA LETTER PO
|
||||
ヴ<>ヴ; # to KATAKANA LETTER VU
|
||||
ヷ<>ヷ; # to KATAKANA LETTER VA
|
||||
ヺ<>ヺ; # to KATAKANA LETTER VO
|
||||
|
||||
# single character
|
||||
|
||||
!<>'!'; # from FULLWIDTH EXCLAMATION MARK
|
||||
"<>'\"'; # from FULLWIDTH QUOTATION MARK
|
||||
#<>'#'; # from FULLWIDTH NUMBER SIGN
|
||||
$<>'$'; # from FULLWIDTH DOLLAR SIGN
|
||||
%<>'%'; # from FULLWIDTH PERCENT SIGN
|
||||
&<>'&'; # from FULLWIDTH AMPERSAND
|
||||
'<>''; # from FULLWIDTH APOSTROPHE
|
||||
(<>'('; # from FULLWIDTH LEFT PARENTHESIS
|
||||
)<>')'; # from FULLWIDTH RIGHT PARENTHESIS
|
||||
*<>'*'; # from FULLWIDTH ASTERISK
|
||||
+<>'+'; # from FULLWIDTH PLUS SIGN
|
||||
,<>','; # from FULLWIDTH COMMA
|
||||
-<>'-'; # from FULLWIDTH HYPHEN-MINUS
|
||||
.<>'.'; # from FULLWIDTH FULL STOP
|
||||
/<>'/'; # from FULLWIDTH SOLIDUS
|
||||
0<>'0'; # from FULLWIDTH DIGIT ZERO
|
||||
1<>'1'; # from FULLWIDTH DIGIT ONE
|
||||
2<>'2'; # from FULLWIDTH DIGIT TWO
|
||||
3<>'3'; # from FULLWIDTH DIGIT THREE
|
||||
4<>'4'; # from FULLWIDTH DIGIT FOUR
|
||||
5<>'5'; # from FULLWIDTH DIGIT FIVE
|
||||
6<>'6'; # from FULLWIDTH DIGIT SIX
|
||||
7<>'7'; # from FULLWIDTH DIGIT SEVEN
|
||||
8<>'8'; # from FULLWIDTH DIGIT EIGHT
|
||||
9<>'9'; # from FULLWIDTH DIGIT NINE
|
||||
:<>':'; # from FULLWIDTH COLON
|
||||
;<>';'; # from FULLWIDTH SEMICOLON
|
||||
<<>'<'; # from FULLWIDTH LESS-THAN SIGN
|
||||
=<>'='; # from FULLWIDTH EQUALS SIGN
|
||||
><>'>'; # from FULLWIDTH GREATER-THAN SIGN
|
||||
?<>'?'; # from FULLWIDTH QUESTION MARK
|
||||
@<>'@'; # from FULLWIDTH COMMERCIAL AT
|
||||
A<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
B<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
C<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
D<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
E<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
F<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
G<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
H<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
I<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
J<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
K<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
L<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
M<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
N<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
O<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
P<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
Q<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
R<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
S<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
T<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
U<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
V<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
W<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
X<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
Y<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
Z<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
[<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
|
||||
\<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
]<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
^<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
_<>'_'; # from FULLWIDTH LOW LINE
|
||||
`<>'`'; # from FULLWIDTH GRAVE ACCENT
|
||||
a<>a; # from FULLWIDTH LATIN SMALL LETTER A
|
||||
b<>b; # from FULLWIDTH LATIN SMALL LETTER B
|
||||
c<>c; # from FULLWIDTH LATIN SMALL LETTER C
|
||||
d<>d; # from FULLWIDTH LATIN SMALL LETTER D
|
||||
e<>e; # from FULLWIDTH LATIN SMALL LETTER E
|
||||
f<>f; # from FULLWIDTH LATIN SMALL LETTER F
|
||||
g<>g; # from FULLWIDTH LATIN SMALL LETTER G
|
||||
h<>h; # from FULLWIDTH LATIN SMALL LETTER H
|
||||
i<>i; # from FULLWIDTH LATIN SMALL LETTER I
|
||||
j<>j; # from FULLWIDTH LATIN SMALL LETTER J
|
||||
k<>k; # from FULLWIDTH LATIN SMALL LETTER K
|
||||
l<>l; # from FULLWIDTH LATIN SMALL LETTER L
|
||||
m<>m; # from FULLWIDTH LATIN SMALL LETTER M
|
||||
n<>n; # from FULLWIDTH LATIN SMALL LETTER N
|
||||
o<>o; # from FULLWIDTH LATIN SMALL LETTER O
|
||||
p<>p; # from FULLWIDTH LATIN SMALL LETTER P
|
||||
q<>q; # from FULLWIDTH LATIN SMALL LETTER Q
|
||||
r<>r; # from FULLWIDTH LATIN SMALL LETTER R
|
||||
s<>s; # from FULLWIDTH LATIN SMALL LETTER S
|
||||
t<>t; # from FULLWIDTH LATIN SMALL LETTER T
|
||||
u<>u; # from FULLWIDTH LATIN SMALL LETTER U
|
||||
v<>v; # from FULLWIDTH LATIN SMALL LETTER V
|
||||
w<>w; # from FULLWIDTH LATIN SMALL LETTER W
|
||||
x<>x; # from FULLWIDTH LATIN SMALL LETTER X
|
||||
y<>y; # from FULLWIDTH LATIN SMALL LETTER Y
|
||||
z<>z; # from FULLWIDTH LATIN SMALL LETTER Z
|
||||
{<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
|
||||
|<>'|'; # from FULLWIDTH VERTICAL LINE
|
||||
}<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
|
||||
~<>'~'; # from FULLWIDTH TILDE
|
||||
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
|
||||
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
|
||||
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
|
||||
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
|
||||
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
|
||||
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
|
||||
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
|
||||
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
|
||||
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
|
||||
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
|
||||
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
|
||||
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
|
||||
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
|
||||
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
|
||||
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
|
||||
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
|
||||
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
|
||||
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
|
||||
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
|
||||
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
|
||||
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
|
||||
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
|
||||
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
|
||||
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
|
||||
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
|
||||
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
|
||||
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
|
||||
ノ<>ノ; # to HALFWIDTH KATAKANA LETTER NO
|
||||
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
|
||||
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
|
||||
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
|
||||
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
|
||||
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
|
||||
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
|
||||
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
|
||||
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
|
||||
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
|
||||
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
|
||||
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
|
||||
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
|
||||
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
|
||||
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
|
||||
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
|
||||
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
|
||||
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
|
||||
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
|
||||
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
|
||||
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
|
||||
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
ᅠ<>ᅠ; # to HALFWIDTH HANGUL FILLER
|
||||
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
|
||||
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
|
||||
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
|
||||
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
|
||||
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
|
||||
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
|
||||
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
|
||||
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
|
||||
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
|
||||
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
|
||||
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
|
||||
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
|
||||
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
|
||||
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
|
||||
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
|
||||
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
|
||||
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
|
||||
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
|
||||
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
|
||||
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
|
||||
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
|
||||
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
|
||||
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
|
||||
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
|
||||
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
|
||||
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
|
||||
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
|
||||
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
|
||||
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
|
||||
¢<>'¢'; # from FULLWIDTH CENT SIGN
|
||||
£<>'£'; # from FULLWIDTH POUND SIGN
|
||||
¬<>'¬'; # from FULLWIDTH NOT SIGN
|
||||
 ̄<>'¯'; # from FULLWIDTH MACRON
|
||||
' '<>' '; # ideographic space (place this after MACRON)
|
||||
¦<>'¦'; # from FULLWIDTH BROKEN BAR
|
||||
¥<>'¥'; # from FULLWIDTH YEN SIGN
|
||||
₩<>₩; # from FULLWIDTH WON SIGN
|
||||
│<>│; # to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
'←'<>'←'; # to HALFWIDTH LEFTWARDS ARROW
|
||||
↑<>↑; # to HALFWIDTH UPWARDS ARROW
|
||||
'→'<>'→'; # to HALFWIDTH RIGHTWARDS ARROW
|
||||
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
|
||||
■<>■; # to HALFWIDTH BLACK SQUARE
|
||||
○<>○; # to HALFWIDTH WHITE CIRCLE
|
||||
|
||||
# eof
|
||||
|
|
@ -1,345 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Rules are predicated on running NFD first, and NFC afterwards
|
||||
# :: [\u0000-\u007F \u0370-\u03FF [:Greek:] [:nonspacing mark:]] ;
|
||||
# MINIMAL FILTER GENERATED FOR: Greek-Latin
|
||||
:: [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u03F7-\u07FB\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u03F9] ;
|
||||
|
||||
:: NFD (NFC) ;
|
||||
|
||||
# TEST CASES
|
||||
|
||||
# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
||||
# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
||||
# ᾳ ῃ ῳ ὃ ὄ
|
||||
# ὠς ὡς ὢς ὣς
|
||||
# Ὠς Ὡς Ὢς Ὣς
|
||||
# ὨΣ ὩΣ ὪΣ ὫΣ
|
||||
# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
||||
|
||||
# Useful variables
|
||||
|
||||
$lower = [[:latin:][:greek:] & [:Ll:]];
|
||||
$glower = [[:greek:] & [:Ll:]];
|
||||
$upper = [[:latin:][:greek:] & [:Lu:]] ;
|
||||
$accent = [:M:] ;
|
||||
|
||||
# NOTE: restrict to just the Greek & Latin accents that we care about
|
||||
# TODO: broaden out once interation is fixed
|
||||
$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
|
||||
|
||||
$macron = \u0304 ;
|
||||
$ddot = \u0308 ;
|
||||
$ddotmac = [$ddot$macron];
|
||||
|
||||
$lcgvowel = [αεηιουω] ;
|
||||
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
||||
$gvowel = [$lcgvowel $ucgvowel] ;
|
||||
$lcgvowelC = [$lcgvowel $accent] ;
|
||||
|
||||
$evowel = [aeiouyAEIOUY];
|
||||
$evowel2 = [iuyIUY];
|
||||
$vowel = [ $evowel $gvowel] ;
|
||||
|
||||
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
||||
$egammaLike = [GKXCgkxc] ;
|
||||
$smooth = ̓ ;
|
||||
$rough = ̔ ;
|
||||
$iotasub = ͅ ;
|
||||
|
||||
$evowel_i = [$evowel-[iI]] ;
|
||||
$evowel2_i = [uyUY];
|
||||
|
||||
$underbar = \u0331;
|
||||
|
||||
$afterLetter = [:L:] [[:M:]\']* ;
|
||||
$beforeLetter = [[:M:]\']* [:L:] ;
|
||||
$beforeLower = $accent * $lower ;
|
||||
|
||||
$notLetter = [^[:L:][:M:]] ;
|
||||
$under = ̱;
|
||||
|
||||
# Fix punctuation
|
||||
# preserve original
|
||||
\: <> \: $under ;
|
||||
\? <> \? $under ;
|
||||
|
||||
\; <> \? ;
|
||||
· <> \: ;
|
||||
|
||||
# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||||
|
||||
\u0342 <> \u0302 ;
|
||||
|
||||
# IOTA: convert iota subscript to iota
|
||||
# first make previous alpha long!
|
||||
|
||||
$accent_minus = [[$accent]-[$iotasub$macron]];
|
||||
|
||||
Α } $accent_minus * $iotasub > | Α $macron ;
|
||||
α } $accent_minus * $iotasub > | α $macron ;
|
||||
|
||||
# now convert to uppercase if after uppercase, ow to lowercase
|
||||
|
||||
$upper $accent * { $iotasub > I ;
|
||||
$iotasub > i ;
|
||||
|
||||
| $1 $iotasub < ($evowel $macron $accentMinus *) i ;
|
||||
| $1 $iotasub < ($evowel $macron $accentMinus *) I ;
|
||||
|
||||
# BREATHING
|
||||
|
||||
# Convert rough breathing to h, and move before letters.
|
||||
|
||||
# Make A ` x = > H a x
|
||||
|
||||
Α ($macron?) $rough } $beforeLower > H | α $1;
|
||||
Ε $rough } $beforeLower > H | ε;
|
||||
Η $rough } $beforeLower > H | η ;
|
||||
Ι ($ddot?) $rough } $beforeLower > H | ι $1;
|
||||
Ο $rough } $beforeLower > H | ο ;
|
||||
Υ $rough } $beforeLower > H | υ ;
|
||||
Ω ($ddot?) $rough } $beforeLower > H | ω $1;
|
||||
|
||||
# Make A x ` = > H a x
|
||||
|
||||
Α ($glower $macron?) $rough > H | α $1 ;
|
||||
Ε ($glower) $rough > H | ε $1 ;
|
||||
Η ($glower) $rough > H | η $1 ;
|
||||
Ι ($glower $ddot?) $rough > H | ι $1 ;
|
||||
Ο ($glower) $rough > H | ο $1 ;
|
||||
Υ ($glower) $rough > H | υ $1 ;
|
||||
Ω ($glower $ddot?) $rough > H | ω $1 ;
|
||||
|
||||
#Otherwise, make x ` into h x and X ` into H X
|
||||
|
||||
($lcgvowel + $ddotmac? ) $rough > h | $1 ;
|
||||
($gvowel + $ddotmac? ) $rough > H | $1 ;
|
||||
|
||||
# Go backwards with H
|
||||
|
||||
| $1 $rough < h ($evowel $macron $ddot? $evowel2_i $macron?) ;
|
||||
| $1 $rough < h ($evowel $ddot? $evowel2 $macron?) ;
|
||||
| $1 $rough < h ($evowel $macron? $ddot?) ;
|
||||
|
||||
| $1 $rough < H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
|
||||
| $1 $rough < H ([AEIOUY] $ddot? $evowel2 $macron?) ;
|
||||
| $1 $rough < H ([AEIOUY] $macron? $ddot?) ;
|
||||
|
||||
# titlecase, have to fix individually
|
||||
# in the future, we should add &uppercase() to make this easier
|
||||
|
||||
| A $1 $rough < H a ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| E $1 $rough < H e ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| I $1 $rough < H i ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| O $1 $rough < H o ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| U $1 $rough < H u ($macron $ddot? $evowel2_i $macron?) ;
|
||||
| Y $1 $rough < H y ($macron $ddot? $evowel2_i $macron?) ;
|
||||
|
||||
| A $1 $rough < H a ($ddot? $evowel2 $macron?) ;
|
||||
| E $1 $rough < H e ($ddot? $evowel2 $macron?) ;
|
||||
| I $1 $rough < H i ($ddot? $evowel2 $macron?) ;
|
||||
| O $1 $rough < H o ($ddot? $evowel2 $macron?) ;
|
||||
| U $1 $rough < H u ($ddot? $evowel2 $macron?) ;
|
||||
| Y $1 $rough < H y ($ddot? $evowel2 $macron?) ;
|
||||
|
||||
| A $1 $rough < H a ($macron? $ddot? ) ;
|
||||
| E $1 $rough < H e ($macron? $ddot? ) ;
|
||||
| I $1 $rough < H i ($macron? $ddot? ) ;
|
||||
| O $1 $rough < H o ($macron? $ddot? ) ;
|
||||
| U $1 $rough < H u ($macron? $ddot? ) ;
|
||||
| Y $1 $rough < H y ($macron? $ddot? ) ;
|
||||
|
||||
# Now do smooth
|
||||
|
||||
#delete smooth breathing for Latin
|
||||
$smooth > ;
|
||||
|
||||
# insert in Greek
|
||||
# the assumption is that all Marks are on letters.
|
||||
|
||||
| $1 $smooth < $notLetter { ([rR]) } [^hH$smooth$rough] ;
|
||||
| $1 $smooth < $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
|
||||
| $1 $smooth < $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
|
||||
|
||||
# TODO: preserve smooth/rough breathing if not
|
||||
# on initial vowel sequence
|
||||
|
||||
# need to have these up here so the rules don't mask
|
||||
|
||||
# remove now superfluous macron when returning
|
||||
|
||||
Α < A $macron ;
|
||||
α < a $macron ;
|
||||
|
||||
η <> e $macron ;
|
||||
Η <> E $macron ;
|
||||
|
||||
φ <> ph ;
|
||||
Ψ } $beforeLower <> Ps ;
|
||||
Ψ <> PS ;
|
||||
|
||||
Φ } $beforeLower <> Ph ;
|
||||
Φ <> PH ;
|
||||
ψ <> ps ;
|
||||
|
||||
ω <> o $macron ;
|
||||
Ω <> O $macron;
|
||||
|
||||
# NORMAL
|
||||
|
||||
α <> a ;
|
||||
Α <> A ;
|
||||
|
||||
β <> b ;
|
||||
Β <> B ;
|
||||
|
||||
γ } $gammaLike <> n } $egammaLike ;
|
||||
γ <> g ;
|
||||
Γ } $gammaLike <> N } $egammaLike ;
|
||||
Γ <> G ;
|
||||
|
||||
δ <> d ;
|
||||
Δ <> D ;
|
||||
|
||||
ε <> e ;
|
||||
Ε <> E ;
|
||||
|
||||
ζ <> z ;
|
||||
Ζ <> Z ;
|
||||
|
||||
θ <> th ;
|
||||
Θ } $beforeLower <> Th ;
|
||||
Θ <> TH ;
|
||||
|
||||
ι <> i ;
|
||||
Ι <> I ;
|
||||
|
||||
κ <> k ;
|
||||
Κ <> K ;
|
||||
|
||||
λ <> l ;
|
||||
Λ <> L ;
|
||||
|
||||
μ <> m ;
|
||||
Μ <> M ;
|
||||
|
||||
ν } $gammaLike > n\' ;
|
||||
ν <> n ;
|
||||
Ν } $gammaLike <> N\' ;
|
||||
Ν <> N ;
|
||||
|
||||
ξ <> x ;
|
||||
Ξ <> X ;
|
||||
|
||||
ο <> o ;
|
||||
Ο <> O ;
|
||||
|
||||
π <> p ;
|
||||
Π <> P ;
|
||||
|
||||
ρ $rough <> rh;
|
||||
Ρ $rough } $beforeLower <> Rh ;
|
||||
Ρ $rough <> RH ;
|
||||
ρ <> r ;
|
||||
Ρ <> R ;
|
||||
|
||||
# insert separator before things that turn into s
|
||||
|
||||
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
|
||||
|
||||
# special S variants
|
||||
|
||||
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
# underbar means exception
|
||||
|
||||
# before a letter, initial
|
||||
ς } $beforeLetter <> s $underbar } $beforeLetter;
|
||||
σ } $beforeLetter <> s } $beforeLetter;
|
||||
|
||||
# otherwise, after a letter = final
|
||||
$afterLetter { σ <> $afterLetter { s $underbar;
|
||||
$afterLetter { ς <> $afterLetter { s ;
|
||||
|
||||
# otherwise (isolated) = initial
|
||||
ς <> s $underbar;
|
||||
σ <> s ;
|
||||
|
||||
# [Pp] { Σ <> \'S ;
|
||||
Σ <> S ;
|
||||
|
||||
τ <> t ;
|
||||
Τ <> T ;
|
||||
|
||||
$vowel {υ } <> u ;
|
||||
υ <> y ;
|
||||
$vowel { Υ <> U ;
|
||||
Υ <> Y ;
|
||||
|
||||
χ <> ch ;
|
||||
Χ } $beforeLower <> Ch ;
|
||||
Χ <> CH ;
|
||||
|
||||
# Completeness for ASCII
|
||||
|
||||
$ignore = [[:Mark:]''] * ;
|
||||
|
||||
| k < c ;
|
||||
| ph < f ;
|
||||
| i < j ;
|
||||
| k < q ;
|
||||
| b < v } $vowel ;
|
||||
| b < w } $vowel;
|
||||
| u < v ;
|
||||
| u < w;
|
||||
| K < C ;
|
||||
| Ph < F ;
|
||||
| I < J ;
|
||||
| K < Q ;
|
||||
| B < V } $vowel ;
|
||||
| B < W } $vowel ;
|
||||
| U < V ;
|
||||
| U < W ;
|
||||
|
||||
$rough } $ignore [:UppercaseLetter:] > H ;
|
||||
$ignore [:UppercaseLetter:] { $rough > H ;
|
||||
$rough < H ;
|
||||
$rough <> h ;
|
||||
|
||||
# Completeness for Greek
|
||||
|
||||
ϐ > | β ;
|
||||
ϑ > | θ ;
|
||||
ϒ > | Υ ;
|
||||
ϕ > | φ ;
|
||||
ϖ > | π ;
|
||||
|
||||
ϰ > | κ ;
|
||||
ϱ > | ρ ;
|
||||
ϲ > | σ ;
|
||||
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
ϳ > j ;
|
||||
ϴ > | Θ ;
|
||||
ϵ > | ε ;
|
||||
|
||||
µ > | μ ;
|
||||
|
||||
ͺ > i;
|
||||
|
||||
# delete any trailing ' marks used for roundtripping
|
||||
|
||||
< [Ππ] { \' } [Ss] ;
|
||||
< [Νν] { \' } $egammaLike ;
|
||||
|
||||
::NFC (NFD) ;
|
||||
# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
|
||||
# ([\u0000-\u007F \u00B7 [:Latin:] [:nonspacing mark:]]) ;
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
|
||||
:: ( [':?A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0300-\u0337\u0339-\u0345\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AA-\u03B0\u03CA-\u03CE\u03D3-\u03D4\u0400-\u0401\u0403\u0407\u040C-\u040E\u0419\u0439\u0450-\u0451\u0453\u0457\u045C-\u045E\u0476-\u0477\u04C1-\u04C2\u04D0-\u04D3\u04D6-\u04D7\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F5\u04F8-\u04F9\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FC1-\u1FC4\u1FC6-\u1FD3\u1FD6-\u1FDB\u1FDD-\u1FEE\u1FF2-\u1FF4\u1FF6-\u1FFC\u212A-\u212B] ) ;
|
|
@ -1,252 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# For modern Greek, based on UNGEGN rules.
|
||||
|
||||
# Rules are predicated on running NFD first, and NFC afterwards
|
||||
# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
||||
# WARNING: need to add accents to both filters ###
|
||||
# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ\u03F7-\u07FB\u03F9] ;
|
||||
|
||||
:: [[[:Greek:][:Mn:][:Me:]] [\:-;?\u00B7\u037E\u0387]] ;
|
||||
::NFD (NFC) ;
|
||||
|
||||
# Useful variables
|
||||
|
||||
$lower = [[:latin:][:greek:] & [:Ll:]] ;
|
||||
$upper = [[:latin:][:greek:] & [:Lu:]] ;
|
||||
$accent = [[:Mn:][:Me:]] ;
|
||||
|
||||
$macron = ̄ ;
|
||||
$ddot = ̈ ;
|
||||
|
||||
$lcgvowel = [αεηιουω] ;
|
||||
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
||||
$gvowel = [$lcgvowel $ucgvowel] ;
|
||||
$lcgvowelC = [$lcgvowel $accent] ;
|
||||
|
||||
$evowel = [aeiouyAEIOUY];
|
||||
$vowel = [ $evowel $gvowel] ;
|
||||
|
||||
$beforeLower = $accent * $lower ;
|
||||
|
||||
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
||||
$egammaLike = [GKXCgkxc] ;
|
||||
$smooth = ̓ ;
|
||||
$rough = ̔ ;
|
||||
$iotasub = ͅ ;
|
||||
|
||||
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
|
||||
|
||||
$under = ̱;
|
||||
|
||||
$caron = ̌;
|
||||
|
||||
$afterLetter = [:L:] [\'$accent]* ;
|
||||
$beforeLetter = [\'$accent]* [:L:] ;
|
||||
|
||||
# Fix punctuation
|
||||
|
||||
# preserve orginal
|
||||
\: <> \: $under ;
|
||||
\? <> \? $under ;
|
||||
|
||||
\; <> \? ;
|
||||
· <> \: ;
|
||||
|
||||
# Fix any ancient characters that creep in
|
||||
|
||||
͂ > ́ ;
|
||||
̂ > ́ ;
|
||||
̀ > ́ ;
|
||||
$smooth > ;
|
||||
$rough > ;
|
||||
$iotasub > ;
|
||||
ͺ > ;
|
||||
|
||||
# need to have these up here so the rules don't mask
|
||||
|
||||
η <> i $under ;
|
||||
Η <> I $under ;
|
||||
|
||||
Ψ } $beforeLower <> Ps ;
|
||||
Ψ <> PS ;
|
||||
ψ <> ps ;
|
||||
|
||||
ω <> o $under ;
|
||||
Ω <> O $under;
|
||||
|
||||
# at begining or end of word, convert mp to b
|
||||
|
||||
[^[:L:]$accent] { μπ > b ;
|
||||
μπ } [^[:L:]$accent] > b ;
|
||||
[^[:L:]$accent] { [Μμ][Ππ] > B ;
|
||||
[Μμ][Ππ] } [^[:L:]$accent] > B ;
|
||||
|
||||
μπ < b ;
|
||||
Μπ < B } $beforeLower ;
|
||||
ΜΠ < B ;
|
||||
|
||||
# handle diphthongs ending with upsilon
|
||||
|
||||
ου <> ou ;
|
||||
ΟΥ <> OU ;
|
||||
Ου <> Ou ;
|
||||
οΥ <> oU ;
|
||||
|
||||
$fmaker = [aeiAEI] $under ? ;
|
||||
$shiftForwardVowels = [[:Mn:]-[\u0308]]; # note: a diaeresis keeps the items separate
|
||||
|
||||
$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;
|
||||
υ $1 < ( $shiftForwardVowels )* v $under ;
|
||||
|
||||
$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;
|
||||
υ $1 < ( $shiftForwardVowels )* f $under ;
|
||||
|
||||
$fmaker { Υ } $softener <> V $under ;
|
||||
$fmaker { Υ <> U $under ;
|
||||
|
||||
υ <> y ;
|
||||
Υ <> Y ;
|
||||
|
||||
# NORMAL
|
||||
|
||||
α <> a ;
|
||||
Α <> A ;
|
||||
|
||||
β <> v ;
|
||||
Β <> V ;
|
||||
|
||||
γ } $gammaLike <> n } $egammaLike ;
|
||||
γ <> g ;
|
||||
Γ } $gammaLike <> N } $egammaLike ;
|
||||
Γ <> G ;
|
||||
|
||||
δ <> d ;
|
||||
Δ <> D ;
|
||||
|
||||
ε <> e ;
|
||||
Ε <> E ;
|
||||
|
||||
ζ <> z ;
|
||||
Ζ <> Z ;
|
||||
|
||||
θ <> th ;
|
||||
Θ } $beforeLower <> Th ;
|
||||
Θ <> TH ;
|
||||
|
||||
ι <> i ;
|
||||
Ι <> I ;
|
||||
|
||||
κ <> k ;
|
||||
Κ <> K ;
|
||||
|
||||
λ <> l ;
|
||||
Λ <> L ;
|
||||
|
||||
μ <> m ;
|
||||
Μ <> M ;
|
||||
|
||||
ν } $gammaLike > n\' ;
|
||||
ν <> n ;
|
||||
Ν } $gammaLike <> N\' ;
|
||||
Ν <> N ;
|
||||
|
||||
ξ <> x ;
|
||||
Ξ <> X ;
|
||||
|
||||
ο <> o ;
|
||||
Ο <> O ;
|
||||
|
||||
π <> p ;
|
||||
Π <> P ;
|
||||
|
||||
ρ <> r ;
|
||||
Ρ <> R ;
|
||||
|
||||
# insert separator before things that turn into s
|
||||
[Pp] { } [ςσΣϷϸϺϻ] > \' ;
|
||||
|
||||
# special S variants
|
||||
|
||||
Ϸ <> Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
||||
ϸ <> š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
||||
Ϻ <> Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
||||
ϻ <> ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
||||
|
||||
# Caron means exception
|
||||
|
||||
# before a letter, initial
|
||||
ς } $beforeLetter <> s $under } $beforeLetter;
|
||||
σ } $beforeLetter <> s } $beforeLetter;
|
||||
|
||||
# otherwise, after a letter = final
|
||||
$afterLetter { σ <> $afterLetter { s $under;
|
||||
$afterLetter { ς <> $afterLetter { s ;
|
||||
|
||||
# otherwise (isolated) = initial
|
||||
ς <> s $under;
|
||||
σ <> s ;
|
||||
|
||||
# [Pp] { Σ <> \'S ;
|
||||
Σ <> S ;
|
||||
|
||||
τ <> t ;
|
||||
Τ <> T ;
|
||||
|
||||
φ <> f ;
|
||||
Φ <> F ;
|
||||
|
||||
χ <> ch ;
|
||||
Χ } $beforeLower <> Ch ;
|
||||
Χ <> CH ;
|
||||
|
||||
# Completeness for ASCII
|
||||
|
||||
# $ignore = [[:Mark:]''] * ;
|
||||
|
||||
| ch < h ;
|
||||
| k < c ;
|
||||
| i < j ;
|
||||
| k < q ;
|
||||
| b < u } $vowel ;
|
||||
| b < w } $vowel ;
|
||||
| y < u ;
|
||||
| y < w ;
|
||||
|
||||
| Ch < H ;
|
||||
| K < C ;
|
||||
| I < J ;
|
||||
| K < Q ;
|
||||
| B < W } $vowel ;
|
||||
| B < U } $vowel ;
|
||||
| Y < W ;
|
||||
| Y < U ;
|
||||
|
||||
# Completeness for Greek
|
||||
|
||||
ϐ > | β ;
|
||||
ϑ > | θ ;
|
||||
ϒ > | Υ ;
|
||||
ϕ > | φ ;
|
||||
ϖ > | π ;
|
||||
|
||||
ϰ > | κ ;
|
||||
ϱ > | ρ ;
|
||||
ϲ > | σ ;
|
||||
Ϲ > | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
ϳ > j ;
|
||||
ϴ > | Θ ;
|
||||
ϵ > | ε ;
|
||||
µ > | μ ;
|
||||
|
||||
# delete any trailing ' marks used for roundtripping
|
||||
|
||||
< [Ππ] { \' } [Ss] ;
|
||||
< [Νν] { \' } $egammaLike ;
|
||||
|
||||
::NFC (NFD) ;
|
||||
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
||||
:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;
|
|
@ -1,91 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Gujarati-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
\u0a81>\ue001; # SIGN CANDRABINDU
|
||||
\u0a82>\ue002; # SIGN ANUSVARA
|
||||
\u0a83>\ue003; # SIGN VISARGA
|
||||
\u0a85>\ue005; # LETTER A
|
||||
\u0a86>\ue006; # LETTER AA
|
||||
\u0a87>\ue007; # LETTER I
|
||||
\u0a88>\ue008; # LETTER II
|
||||
\u0a89>\ue009; # LETTER U
|
||||
\u0a8a>\ue00a; # LETTER UU
|
||||
\u0a8b>\ue00b; # LETTER VOCALIC R
|
||||
\u0a8c>\ue00c; # LETTER VOCALLIC L
|
||||
\u0a8d>\ue00d; # VOWEL CANDRA E
|
||||
\u0a8f>\ue00f; # LETTER E
|
||||
\u0a90>\ue010; # LETTER AI
|
||||
\u0a91>\ue011; # VOWEL CANDRA O
|
||||
\u0a93>\ue013; # LETTER O
|
||||
\u0a94>\ue014; # LETTER AU
|
||||
\u0a95>\ue015; # LETTER KA
|
||||
\u0a96>\ue016; # LETTER KHA
|
||||
\u0a97>\ue017; # LETTER GA
|
||||
\u0a98>\ue018; # LETTER GHA
|
||||
\u0a99>\ue019; # LETTER NGA
|
||||
\u0a9a>\ue01a; # LETTER CA
|
||||
\u0a9b>\ue01b; # LETTER CHA
|
||||
\u0a9c>\ue01c; # LETTER JA
|
||||
\u0a9d>\ue01d; # LETTER JHA
|
||||
\u0a9e>\ue01e; # LETTER NYA
|
||||
\u0a9f>\ue01f; # LETTER TTA
|
||||
\u0aa0>\ue020; # LETTER TTHA
|
||||
\u0aa1>\ue021; # LETTER DDA
|
||||
\u0aa2>\ue022; # LETTER DDHA
|
||||
\u0aa3>\ue023; # LETTER NNA
|
||||
\u0aa4>\ue024; # LETTER TA
|
||||
\u0aa5>\ue025; # LETTER THA
|
||||
\u0aa6>\ue026; # LETTER DA
|
||||
\u0aa7>\ue027; # LETTER DHA
|
||||
\u0aa8>\ue028; # LETTER NA
|
||||
\u0aaa>\ue02a; # LETTER PA
|
||||
\u0aab>\ue02b; # LETTER PHA
|
||||
\u0aac>\ue02c; # LETTER BA
|
||||
\u0aad>\ue02d; # LETTER BHA
|
||||
\u0aae>\ue02e; # LETTER MA
|
||||
\u0aaf>\ue02f; # LETTER YA
|
||||
\u0ab0>\ue030; # LETTER RA
|
||||
\u0ab2>\ue032; # LETTER LA
|
||||
\u0ab3>\ue033; # LETTER LLA
|
||||
\u0ab5>\ue035; # LETTER VA
|
||||
\u0ab6>\ue036; # LETTER SHA
|
||||
\u0ab7>\ue037; # LETTER SSA
|
||||
\u0ab8>\ue038; # LETTER SA
|
||||
\u0ab9>\ue039; # LETTER HA
|
||||
\u0abc>\ue03c; # SIGN NUKTA
|
||||
\u0abd>\ue03d; # SIGN AVAGRAHA
|
||||
\u0abe>\ue03e; # VOWEL SIGN AA
|
||||
\u0abf>\ue03f; # VOWEL SIGN I
|
||||
\u0ac0>\ue040; # VOWEL SIGN II
|
||||
\u0ac1>\ue041; # VOWEL SIGN U
|
||||
\u0ac2>\ue042; # VOWEL SIGN UU
|
||||
\u0ac3>\ue043; # VOWEL SIGN VOCALIC R
|
||||
\u0ac4>\ue044; # VOWEL SIGN VOCALIC RR
|
||||
\u0ac5>\ue045; # VOWEL SIGN CANDRA E
|
||||
\u0ac7>\ue047; # VOWEL SIGN E
|
||||
\u0ac8>\ue048; # VOWEL SIGN AI
|
||||
\u0ac9>\ue049; # VOWEL SIGN CANDRA O
|
||||
\u0acb>\ue04b; # VOWEL SIGN O
|
||||
\u0acc>\ue04c; # VOWEL SIGN AU
|
||||
\u0acd>\ue04d; # SIGN VIRAMA
|
||||
\u0ad0>\ue050; # OM
|
||||
\u0ae0>\ue060; # LETTER VOCALIC RR
|
||||
\u0ae1>\ue061; # LETTER VOCALIC LL
|
||||
\u0ae6>\ue066; # DIGIT ZERO
|
||||
\u0ae7>\ue067; # DIGIT ONE
|
||||
\u0ae8>\ue068; # DIGIT TWO
|
||||
\u0ae9>\ue069; # DIGIT THREE
|
||||
\u0aea>\ue06a; # DIGIT FOUR
|
||||
\u0aeb>\ue06b; # DIGIT FIVE
|
||||
\u0aec>\ue06c; # DIGIT SIX
|
||||
\u0aed>\ue06d; # DIGIT SEVEN
|
||||
\u0aee>\ue06e; # DIGIT EIGHT
|
||||
\u0aef>\ue06f; # DIGIT NINE
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,95 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Gurmukhi-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
|
||||
#\u0A16\u0A3C>\uE059; # LETTER KHHA
|
||||
#\u0A17\u0A3C>\uE05A; # LETTER GHHA
|
||||
#\u0A1C\u0A3C>\uE05B; # LETTER ZA
|
||||
#\u0A38\u0A3C>\uE036; # LETTER SHA
|
||||
#\u0A32\u0A3C>\uE033; # LETTER LLA
|
||||
#\u0A2B\u0A3C>\uE05E; # LETTER FA
|
||||
\u0A01>\ue001; # SIGN CHANDRABINDU
|
||||
\u0A02>\uE002; # SIGN BINDI
|
||||
\u0A05>\uE005; # LETTER A
|
||||
\u0A06>\uE006; # LETTER AA
|
||||
\u0A07>\uE007; # LETTER I
|
||||
\u0A08>\uE008; # LETTER II
|
||||
\u0A09>\uE009; # LETTER U
|
||||
\u0A0A>\uE00A; # LETTER UU
|
||||
\u0A0C>\uE032; # FALLBACK : VOCALLIC LA
|
||||
\u0A0F>\uE00F; # LETTER EE
|
||||
\u0A10>\uE010; # LETTER AI
|
||||
\u0A13>\uE013; # LETTER OO
|
||||
\u0A14>\uE014; # LETTER AU
|
||||
\u0A15>\uE015; # LETTER KA
|
||||
\u0A16>\uE016; # LETTER KHA
|
||||
\u0A17>\uE017; # LETTER GA
|
||||
\u0A18>\uE018; # LETTER GHA
|
||||
\u0A19>\uE019; # LETTER NGA
|
||||
\u0A1A>\uE01A; # LETTER CA
|
||||
\u0A1B>\uE01B; # LETTER CHA
|
||||
\u0A1C>\uE01C; # LETTER JA
|
||||
\u0A1D>\uE01D; # LETTER JHA
|
||||
\u0A1E>\uE01E; # LETTER NYA
|
||||
\u0A1F>\uE01F; # LETTER TTA
|
||||
\u0A20>\uE020; # LETTER TTHA
|
||||
\u0A21>\uE021; # LETTER DDA
|
||||
\u0A22>\uE022; # LETTER DDHA
|
||||
\u0A23>\uE023; # LETTER NNA
|
||||
\u0A24>\uE024; # LETTER TA
|
||||
\u0A25>\uE025; # LETTER THA
|
||||
\u0A26>\uE026; # LETTER DA
|
||||
\u0A27>\uE027; # LETTER DHA
|
||||
\u0A28>\uE028; # LETTER NA
|
||||
\u0A2A>\uE02A; # LETTER PA
|
||||
\u0A2B>\uE02B; # LETTER PHA
|
||||
\u0A2C>\uE02C; # LETTER BA
|
||||
\u0A2D>\uE02D; # LETTER BHA
|
||||
\u0A2E>\uE02E; # LETTER MA
|
||||
\u0A2F>\uE02F; # LETTER YA
|
||||
\u0A30>\uE030; # LETTER RA
|
||||
\u0A32>\uE032; # LETTER LA
|
||||
\u0a33>\uE033; # FALLBACK
|
||||
\u0A35>\uE035; # LETTER VA
|
||||
\u0a36>\ue036;
|
||||
\u0A38\0a3c>\ue036; # FALLBACK
|
||||
\u0A38>\uE038; # LETTER SA
|
||||
\u0A39>\uE039; # LETTER HA
|
||||
\u0A3C>\uE03C; # SIGN NUKTA
|
||||
\u0A3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0A3F>\uE03F; # VOWEL SIGN I
|
||||
\u0A40>\uE040; # VOWEL SIGN II
|
||||
\u0A41>\uE041; # VOWEL SIGN U
|
||||
\u0A42>\uE042; # VOWEL SIGN UU
|
||||
\u0A47>\uE047; # VOWEL SIGN EE
|
||||
\u0A48>\uE048; # VOWEL SIGN AI
|
||||
\u0A4B>\uE04B; # VOWEL SIGN OO
|
||||
\u0A4C>\uE04C; # VOWEL SIGN AU
|
||||
\u0A4D>\uE04D; # SIGN VIRAMA
|
||||
|
||||
\u0A5C>\uE05C; # LETTER RRA
|
||||
|
||||
\u0A66>\uE066; # DIGIT ZERO
|
||||
\u0A67>\uE067; # DIGIT ONE
|
||||
\u0A68>\uE068; # DIGIT TWO
|
||||
\u0A69>\uE069; # DIGIT THREE
|
||||
\u0A6A>\uE06A; # DIGIT FOUR
|
||||
\u0A6B>\uE06B; # DIGIT FIVE
|
||||
\u0A6C>\uE06C; # DIGIT SIX
|
||||
\u0A6D>\uE06D; # DIGIT SEVEN
|
||||
\u0A6E>\uE06E; # DIGIT EIGHT
|
||||
\u0A6F>\uE06F; # DIGIT NINE
|
||||
\u0A70>\uE07C; # TIPPI
|
||||
\u0A71>\uE07D; # ADDAK
|
||||
\u0A72>\uE07E; # IRI
|
||||
\u0A73>\uE07F; # URA
|
||||
\u0A74>\uE080; # EK ONKAR
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -1,24 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Only intended for internal use
|
||||
:: fullwidth-halfwidth;
|
||||
|
||||
。 > '.';
|
||||
|
||||
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
|
||||
$initialPunct = [:Ps:][:Pi:];
|
||||
|
||||
# add space between any Han or terminal punctuation and letters, and
|
||||
# between letters and Han or initial punct
|
||||
|
||||
[[:Ideographic:] $terminalPunct] {} [:Letter:] > ' ' ;
|
||||
[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] > ' ' ;
|
||||
|
||||
# remove spacing between ideographs and other letters
|
||||
|
||||
< [:Ideographic:] { ' ' } [:Letter:] ;
|
||||
< [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
|
||||
|
|
@ -1,109 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Transliteration table for Hebrew
|
||||
# Based on the UNGEGN table at:
|
||||
# http://www.eki.ee/wgrs/rom1_he.pdf
|
||||
#
|
||||
# Exceptions:
|
||||
# - Accents are added to disambiguate letters
|
||||
# - Combinations of dagesh, shin/sin dot that produce different
|
||||
# letters are not yet encoded.
|
||||
#
|
||||
# To test, open:
|
||||
# http://oss.software.ibm.com/cgi-bin/icu/tr
|
||||
# Click Edit, paste in this file, Save As hebrew-latin/XXX
|
||||
# (where XXX is a username)
|
||||
# Now go back to the main window, and try it out.
|
||||
# Use hebrew-latin/XXX for Output 1, and (Inverse) for Output 2
|
||||
# Paste in hebrew text in Input, and hit Transliterate.
|
||||
#
|
||||
# For more information, see"
|
||||
# http://oss.software.ibm.com/icu/userguide/Transliteration.html
|
||||
|
||||
:: [[:Hebrew:] [:^ccc=0:] [\u05B0-\u05B9\u05BB-\u05BC\u05C1-\u05C2\u2135-\u2138̄\u05BF] - [\u05BD]] ;
|
||||
:: nfkd (nfc) ;
|
||||
$letterAfter = [:M:]* [:L:] ;
|
||||
|
||||
# move longer items here to avoid masking
|
||||
|
||||
ח <> ẖ ;
|
||||
צ <> ẕ } $letterAfter;
|
||||
ץ <> ẕ ;
|
||||
ש <> ş ;
|
||||
ת <> ţ ;
|
||||
|
||||
א <> ʼ ;
|
||||
ב <> b ;
|
||||
ג <> g ;
|
||||
ד <> d ;
|
||||
ה <> h ;
|
||||
ו <> w ;
|
||||
ז <> z ;
|
||||
ט <> t ;
|
||||
י <> y ;
|
||||
כ <> k } $letterAfter;
|
||||
ך <> k ;
|
||||
ל <> l ;
|
||||
מ <> m } $letterAfter;
|
||||
ם <> m ;
|
||||
נ <> n } $letterAfter;
|
||||
ן <> n ;
|
||||
ס <> s ;
|
||||
ע <> ʻ ;
|
||||
פ <> p } $letterAfter;
|
||||
ף <> p ;
|
||||
ק <> q ;
|
||||
ר <> r ;
|
||||
|
||||
װ > | וו; # HEBREW LIGATURE YIDDISH DOUBLE VAV
|
||||
ױ > | וי; # HEBREW LIGATURE YIDDISH VAV YOD
|
||||
ײ > | יי ; # HEBREW LIGATURE YIDDISH DOUBLE YOD
|
||||
|
||||
|
||||
ּ <> ̇ ; # dagesh just goes to overdot for now
|
||||
ׁ <> ̌ ; # shin dot -> sh
|
||||
ׂ <> ̂ ; # sin dot -> s
|
||||
|
||||
# points
|
||||
$above = [^[:ccc=0:][:ccc=230:]]*;
|
||||
|
||||
ֲ > à ;
|
||||
ֲ $1< a ($above) ̀;
|
||||
|
||||
ָ > á ;
|
||||
ָ $1 < a ($above) ́;
|
||||
|
||||
ֱ > è ;
|
||||
ֱ $1 < e ($above) ̀;
|
||||
|
||||
ֵ > é ;
|
||||
ֵ $1 < e ($above) ́;
|
||||
|
||||
ְ > e ̆ ;
|
||||
ְ $1 < e ($above) ̆;
|
||||
|
||||
ֹ > ò ;
|
||||
ֹ $1 < o ($above) ̀;
|
||||
|
||||
ִ <> i ;
|
||||
ֻ <> u ;
|
||||
ַ <> a ;
|
||||
ֶ <> e ;
|
||||
ֳ <> o ;
|
||||
|
||||
\u05BF <> ̄ ;
|
||||
|
||||
# fallbacks
|
||||
ק < c ;
|
||||
פ < f } $letterAfter;
|
||||
ף < f ;
|
||||
ז < j ;
|
||||
ו < v ;
|
||||
כס < x ;
|
||||
|
||||
:: (lower);
|
||||
:: nfc (nfd) ;
|
||||
:: ([[:Latin:] [:^ccc=0:] [\u02BB-\u02BC\u0300-\u0302\u0307\u030C\u0327\u0331\u0340-\u0341 ̄ ]]);
|
|
@ -1,207 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars
|
||||
:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]] ;
|
||||
:: NFKC ();
|
||||
|
||||
# Hiragana-Katakana
|
||||
|
||||
# This is largely a one-to-one mapping, but it has a
|
||||
# few kinks:
|
||||
|
||||
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||||
# Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||||
# (308F-3092) with a voicing mark (3099), which is
|
||||
# semantically equivalent. However, this is a non-
|
||||
# roundtripping transformation.
|
||||
|
||||
# 2. The Katakana small ka/ke (30F5,30F6) have no
|
||||
# Hiragana equiavlents. We convert them to normal
|
||||
# Hiragana ka/ke (304B,3051). This is a one-way
|
||||
# information-losing transformation and precludes
|
||||
# round-tripping of 30F5 and 30F6.
|
||||
|
||||
# 3. The combining marks 3099-309C are in the Hiragana
|
||||
# block, but they apply to Katakana as well, so we
|
||||
# leave them untouched.
|
||||
|
||||
# 4. The Katakana prolonged sound mark 30FC doubles the
|
||||
# preceding vowel. This is a one-way information-
|
||||
# losing transformation from Katakana to Hiragana.
|
||||
|
||||
# 5. The Katakana middle dot separates words in foreign
|
||||
# expressions; we leave this unmodified.
|
||||
|
||||
# The above points preclude successful round-trip
|
||||
# transformations of arbitrary input text. However,
|
||||
# they provide naturalistic results that should conform
|
||||
# to user expectations.
|
||||
|
||||
|
||||
# Combining equivalents va/vi/ve/vo
|
||||
わ゙ <> ヷ;
|
||||
ゐ゙ <> ヸ;
|
||||
ゑ゙ <> ヹ;
|
||||
を゙ <> ヺ;
|
||||
|
||||
# One-to-one mappings, main block
|
||||
# 3041:3094 <> 30A1:30F4
|
||||
# 309D,E <> 30FD,E
|
||||
ぁ <> ァ;
|
||||
あ <> ア;
|
||||
ぃ <> ィ;
|
||||
い <> イ;
|
||||
ぅ <> ゥ;
|
||||
う <> ウ;
|
||||
ぇ <> ェ;
|
||||
え <> エ;
|
||||
ぉ <> ォ;
|
||||
お <> オ;
|
||||
か <> カ;
|
||||
が <> ガ;
|
||||
き <> キ;
|
||||
ぎ <> ギ;
|
||||
く <> ク;
|
||||
ぐ <> グ;
|
||||
け <> ケ;
|
||||
げ <> ゲ;
|
||||
こ <> コ;
|
||||
ご <> ゴ;
|
||||
さ <> サ;
|
||||
ざ <> ザ;
|
||||
し <> シ;
|
||||
じ <> ジ;
|
||||
す <> ス;
|
||||
ず <> ズ;
|
||||
せ <> セ;
|
||||
ぜ <> ゼ;
|
||||
そ <> ソ;
|
||||
ぞ <> ゾ;
|
||||
た <> タ;
|
||||
だ <> ダ;
|
||||
ち <> チ;
|
||||
ぢ <> ヂ;
|
||||
っ <> ッ;
|
||||
つ <> ツ;
|
||||
づ <> ヅ;
|
||||
て <> テ;
|
||||
で <> デ;
|
||||
と <> ト;
|
||||
ど <> ド;
|
||||
な <> ナ;
|
||||
に <> ニ;
|
||||
ぬ <> ヌ;
|
||||
ね <> ネ;
|
||||
の <> ノ;
|
||||
は <> ハ;
|
||||
ば <> バ;
|
||||
ぱ <> パ;
|
||||
ひ <> ヒ;
|
||||
び <> ビ;
|
||||
ぴ <> ピ;
|
||||
ふ <> フ;
|
||||
ぶ <> ブ;
|
||||
ぷ <> プ;
|
||||
へ <> ヘ;
|
||||
べ <> ベ;
|
||||
ぺ <> ペ;
|
||||
ほ <> ホ;
|
||||
ぼ <> ボ;
|
||||
ぽ <> ポ;
|
||||
ま <> マ;
|
||||
み <> ミ;
|
||||
む <> ム;
|
||||
め <> メ;
|
||||
も <> モ;
|
||||
ゃ <> ャ;
|
||||
や <> ヤ;
|
||||
ゅ <> ュ;
|
||||
ゆ <> ユ;
|
||||
ょ <> ョ;
|
||||
よ <> ヨ;
|
||||
ら <> ラ;
|
||||
り <> リ;
|
||||
る <> ル;
|
||||
れ <> レ;
|
||||
ろ <> ロ;
|
||||
ゎ <> ヮ;
|
||||
わ <> ワ;
|
||||
ゐ <> ヰ;
|
||||
ゑ <> ヱ;
|
||||
を <> ヲ;
|
||||
ん <> ン;
|
||||
ゔ <> ヴ;
|
||||
ゝ <> ヽ;
|
||||
ゞ <> ヾ;
|
||||
|
||||
# One-way Katakana-Hiragana xform of small K ka/ke to
|
||||
# normal H ka/ke.
|
||||
か < ヵ;
|
||||
け < ヶ;
|
||||
|
||||
# Katakana followed by a prolonged sound mark 30FC has
|
||||
# its final vowel doubled. This is a Katakana-Hiragana
|
||||
# one-way information-losing transformation. We
|
||||
# include the small Katakana (e.g., small A 3041) and
|
||||
# do not distinguish them from their large
|
||||
# counterparts. It doesn't make sense to double a
|
||||
# small counterpart vowel as a small Hiragana vowel, so
|
||||
# we don't do so. In natural text this should never
|
||||
# occur anyway. If a 30FC is seen without a preceding
|
||||
# vowel sound (e.g., after n 30F3) we do not change it.
|
||||
|
||||
### $long = ー;
|
||||
|
||||
# The following categories are Hiragana, not Katakana
|
||||
# as might be expected, since by the time we get to the
|
||||
# 30FC, the preceding character will have already been
|
||||
# transformed to Hiragana.
|
||||
|
||||
# {The following mechanically generated from the
|
||||
# Unicode 3.0 data:}
|
||||
|
||||
$xa = [ \
|
||||
ぁ あ か が さ ざ \
|
||||
た だ な は ば ぱ \
|
||||
ま ゃ や ら ゎ わ \
|
||||
];
|
||||
|
||||
$xi = [ \
|
||||
ぃ い き ぎ し じ \
|
||||
ち ぢ に ひ び ぴ \
|
||||
み り ゐ \
|
||||
];
|
||||
|
||||
$xu = [ \
|
||||
ぅ う く ぐ す ず \
|
||||
っ つ づ ぬ ふ ぶ \
|
||||
ぷ む ゅ ゆ る ゔ \
|
||||
];
|
||||
|
||||
$xe = [ \
|
||||
ぇ え け げ せ ぜ \
|
||||
て で ね へ べ ぺ \
|
||||
め れ ゑ \
|
||||
];
|
||||
|
||||
$xo = [ \
|
||||
ぉ お こ ご そ ぞ \
|
||||
と ど の ほ ぼ ぽ \
|
||||
も ょ よ ろ を \
|
||||
];
|
||||
|
||||
あ < $xa {ー};
|
||||
い < $xi {ー};
|
||||
う < $xu {ー};
|
||||
え < $xe {ー};
|
||||
お < $xo {ー};
|
||||
|
||||
:: (NFKC) ;
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars!!
|
||||
:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9Fー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]);
|
||||
|
||||
# eof
|
|
@ -1,14 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
:: [\u3041-\u3094\u3099\u309D-\u309E\u30AC\u30AE\u30B0\u30B2\u30B4\u30B6\u30B8\u30BA\u30BC\u30BE\u30C0\u30C2\u30C5\u30C7\u30C9\u30D0\u30D3\u30D6\u30D9\u30DC\u30F4\u30F7-\u30FA\u30FE] ;
|
||||
:: NFD ;
|
||||
|
||||
:: Hiragana-Katakana;
|
||||
:: Katakana-Latin;
|
||||
|
||||
:: NFC ;
|
||||
:: (Lower) ;
|
||||
:: ([',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]) ;
|
|
@ -1,147 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Bengali
|
||||
#:: NFD (NFC) ;
|
||||
\uE001>\u0981; # SIGN CANDRABINDU
|
||||
\uE002>\u0982; # SIGN ANUSVARA
|
||||
\uE003>\u0983; # SIGN VISARGA
|
||||
\uE004>\u0985; # FALLBACK TO LETTER A
|
||||
\uE005>\u0985; # LETTER A
|
||||
\uE006>\u0986; # LETTER AA
|
||||
\uE007>\u0987; # LETTER I
|
||||
\uE008>\u0988; # LETTER II
|
||||
\uE009>\u0989; # LETTER U
|
||||
\uE00A>\u098A; # LETTER UU
|
||||
\uE00B>\u098B; # LETTER VOCALIC R
|
||||
\uE00C>\u098C; # LETTER VOCALIC L
|
||||
\uE00D>\u098F; # FALLBACK
|
||||
\uE00E>\u098F; # FALLBACK
|
||||
\uE00F>\u098F; # LETTER E
|
||||
\uE010>\u0990; # LETTER AI
|
||||
\uE011>\u0993; # FALLBACK
|
||||
\uE012>\u0993; # FALLBACK
|
||||
\uE013>\u0993; # LETTER O
|
||||
\uE014>\u0994; # LETTER AU
|
||||
\uE015>\u0995; # LETTER KA
|
||||
\uE016>\u0996; # LETTER KHA
|
||||
\uE017>\u0997; # LETTER GA
|
||||
\uE018>\u0998; # LETTER GHA
|
||||
\uE019>\u0999; # LETTER NGA
|
||||
\uE01A>\u099A; # LETTER CA
|
||||
\uE01B>\u099B; # LETTER CHA
|
||||
\uE01C>\u099C; # LETTER JA
|
||||
\uE01D>\u099D; # LETTER JHA
|
||||
\uE01E>\u099E; # LETTER NYA
|
||||
\uE01F>\u099F; # LETTER TTA
|
||||
\uE020>\u09A0; # LETTER TTHA
|
||||
\uE021>\u09A1; # LETTER DDA
|
||||
\uE022>\u09A2; # LETTER DDHA
|
||||
\uE023>\u09A3; # LETTER NNA
|
||||
\uE024>\u09A4; # LETTER TA
|
||||
\uE025>\u09A5; # LETTER THA
|
||||
\uE026>\u09A6; # LETTER DA
|
||||
\uE027>\u09A7; # LETTER DHA
|
||||
\uE028>\u09A8; # LETTER NA
|
||||
\uE029>\u09A8\u09BC; # REMAP (indicExceptions.txt): \u09a9>\u09a8 = LETTER NNNA>LETTER NA
|
||||
\uE02A>\u09AA; # LETTER PA
|
||||
\uE02B>\u09AB; # LETTER PHA
|
||||
\uE02C>\u09AC; # LETTER BA
|
||||
\uE02D>\u09AD; # LETTER BHA
|
||||
\uE02E>\u09AE; # LETTER MA
|
||||
\uE02F>\u09AF; # LETTER YA
|
||||
\uE030>\u09B0; # LETTER RA
|
||||
\uE031>\u09B0\u09BC; # FALLBACK to RA
|
||||
\uE032>\u09B2; # LETTER LA
|
||||
\uE033>\u09B2; # REMAP (indicExceptions.txt): \u09b3>\u09b2 = LETTER LLA>LETTER LA
|
||||
\uE034>\u09B2; # REMAP (indicExceptions.txt): \u09b4>\u09b2 = LETTER LLLA>LETTER LA
|
||||
\uE035>\u09AC; # REMAP (indicExceptions.txt): \u09b5>\u09ac = LETTER VA>LETTER BA
|
||||
\uE036>\u09B6; # LETTER SHA
|
||||
\uE037>\u09B7; # LETTER SSA
|
||||
\uE038>\u09B8; # LETTER SA
|
||||
\uE039>\u09B9; # LETTER HA
|
||||
\uE03C>\u09BC; # SIGN NUKTA
|
||||
\uE03D>\u09bd; # SIGN AVAGRAHA
|
||||
\uE03E>\u09BE; # VOWEL SIGN AA
|
||||
\uE03F>\u09BF; # VOWEL SIGN I
|
||||
\uE040>\u09C0; # VOWEL SIGN II
|
||||
\uE041>\u09C1; # VOWEL SIGN U
|
||||
\uE042>\u09C2; # VOWEL SIGN UU
|
||||
\uE043>\u09C3; # VOWEL SIGN VOCALIC R
|
||||
\uE044>\u09C4; # VOWEL SIGN VOCALIC RR
|
||||
\uE045>\u09C7; # REMAP (indicExceptions.txt): \u09c5>\u09c7 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\uE046>\u09C7; # FALLBACK
|
||||
\uE047>\u09C7; # VOWEL SIGN E
|
||||
\uE048>\u09C8; # VOWEL SIGN AI
|
||||
\uE049>\u09C7\u09BE; # REMAP (indicExceptions.txt): \u09c9>\u09cb = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\uE04A>\u09C7\u09BE; # FALLBACK
|
||||
\uE04B>\u09C7\u09BE; # VOWEL SIGN O
|
||||
\uE04C>\u09C7\u09D7; # VOWEL SIGN AU
|
||||
\uE04D>\u09CD; # SIGN VIRAMA
|
||||
\uE050>\u0993\u0982; # InterIndic-Bengali: OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\uE055>; # LENGTH MARK
|
||||
\uE056>\u09C8; # REMAP (indicExceptions.txt): \u09d6>\u09c8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>\u09D7; # AU LENGTH MARK
|
||||
\uE058>\u0995\u09BC; # FALLBACK
|
||||
\uE059>\u0996\u09BC; # REMAP (indicExceptions.txt): \u09d9>\u0996 = LETTER KHHA>LETTER KHA
|
||||
\uE05A>\u0997\u09BC; # REMAP (indicExceptions.txt): \u09da>\u0997 = LETTER GHHA>LETTER GA
|
||||
\uE05B>\u099C\u09BC; # REMAP (indicExceptions.txt): \u09db>\u099c = LETTER ZA>LETTER JA
|
||||
\uE05C>\u09A1\u09BC; # FALLBACK
|
||||
\uE05D>\u09A2\u09BC; # LETTER RHA
|
||||
\uE05E>\u09AB\u09BC; # REMAP (indicExceptions.txt): \u09de>\u09ab = LETTER FA>LETTER PHA
|
||||
\uE05F>\u09AF\u09BC; # LETTER YYA
|
||||
\uE060>\u09E0; # LETTER VOCALIC RR
|
||||
\uE061>\u09E1; # LETTER VOCALIC LL
|
||||
\uE062>\u09E2; # VOWEL SIGN VOCALIC L
|
||||
\uE063>\u09E3; # VOWEL SIGN VOCALIC LL
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\uE066>\u09E6; # DIGIT ZERO
|
||||
\uE067>\u09E7; # DIGIT ONE
|
||||
\uE068>\u09E8; # DIGIT TWO
|
||||
\uE069>\u09E9; # DIGIT THREE
|
||||
\uE06A>\u09EA; # DIGIT FOUR
|
||||
\uE06B>\u09EB; # DIGIT FIVE
|
||||
\uE06C>\u09EC; # DIGIT SIX
|
||||
\uE06D>\u09ED; # DIGIT SEVEN
|
||||
\uE06E>\u09EE; # DIGIT EIGHT
|
||||
\uE06F>\u09EF; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u09F0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u09F1; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>\u09F2; # RUPEE MARK
|
||||
\ue074>\u09F3; # RUPEE SIGN
|
||||
\ue075>\u09F4; # CURRENCY NUMERATOR ONE
|
||||
\ue076>\u09F5; # CURRENCY NUMERATOR TWO
|
||||
\ue077>\u09F6; # CURRENCY NUMERATOR THREE
|
||||
\ue078>\u09F7; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>\u09F8; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>\u09F9; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>\u09FA; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u09AC; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u09E6; # FALLBACK FOR TAMIL
|
||||
1 > \u09E7;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,158 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Devanagari
|
||||
#:: NFD (NFC) ;
|
||||
#Rules for Decomposed characters
|
||||
\ue028\ue03c > \u0929; #\ue029
|
||||
\ue030\ue03c > \u0931; #\ue031
|
||||
\ue033\ue03c > \u0934; #\ue034
|
||||
\ue015\ue03c > \u0958; #\ue058 LETTER QA (For Urdu)
|
||||
\ue016\ue03c > \u0959; #\ue059 LETTER KHHA (For Urdu)
|
||||
\ue017\ue03c > \u095a; #\ue05a LETTER GHHA (For Urdu)
|
||||
\ue01c\ue03c > \u095b; #\ue05b LETTER ZA (For Urdu)
|
||||
\ue021\ue03c > \u095c; #\ue05c LETTER DDDHA (pronounced RRA)
|
||||
\ue022\ue03c > \u095d; #\ue05d LETTER RHA (pronounced RRHA)
|
||||
\ue02b\ue03c > \u095e; #\ue05e LETTER FA
|
||||
\ue02f\ue03c > \u095f; #\ue05f LETTER YYA
|
||||
|
||||
#Decomposed compatibility transliterations
|
||||
\ue012\ue057>\u0914; # FALLBACK FOR TAMIL AU
|
||||
0 > \u0966; # FALLBACK FOR TAMIL
|
||||
1 > \u0967;
|
||||
|
||||
\ue055>; # FALLBACK BLOW AWAY KANNADA AND TELUGU LENGTH MARK
|
||||
\ue056>; # FALLBACK BLOW AWAY KANNADA AND TELUGU AI LENGTH MARK
|
||||
\ue057>; # FALLBACK BLOW AWAY TAMIL AU LENGTH MARK
|
||||
|
||||
\ue001 > \u0901; # SIGN CANDRABINDU
|
||||
\ue002 > \u0902; # SIGN ANUSVARA
|
||||
\ue003 > \u0903; # SIGN VISARGA
|
||||
\ue004 > \u0904; # SIGN SHORT A
|
||||
\ue005 > \u0905; # LETTER A
|
||||
\ue006 > \u0906; # LETTER AA
|
||||
\ue007 > \u0907; # LETTER I
|
||||
\ue008 > \u0908; # LETTER II
|
||||
\ue009 > \u0909; # LETTER U
|
||||
\ue00a > \u090a; # LETTER UU
|
||||
\ue00b > \u090b; # LETTER VOCALIC R
|
||||
\ue00c > \u090c; # LETTER VOCALIC L
|
||||
\ue00d > \u090d; # LETTER CANDRA E (For representing English sounds)
|
||||
\ue00e > \u090e; # LETTER SHORT E(For Southern Scripts)
|
||||
\ue00f > \u090f; # LETTER E
|
||||
\ue010 > \u0910; # LETTER AI
|
||||
\ue011 > \u0911; # LETTER CANDRA O (For representing English sounds)
|
||||
\ue012 > \u0912; # LETTER SHORT O (For Southern Scripts)
|
||||
\ue013 > \u0913; # LETTER O
|
||||
\ue014 > \u0914; # LETTER AU
|
||||
\ue015 > \u0915; # LETTER KA
|
||||
\ue016 > \u0916; # LETTER KHA
|
||||
\ue017 > \u0917; # LETTER GA
|
||||
\ue018 > \u0918; # LETTER GHA
|
||||
\ue019 > \u0919; # LETTER NGA
|
||||
\ue01a > \u091a; # LETTER CA
|
||||
\ue01b > \u091b; # LETTER CHA
|
||||
\ue01c > \u091c; # LETTER JA
|
||||
\ue01d > \u091d; # LETTER JHA
|
||||
\ue01e > \u091e; # LETTER NYA
|
||||
\ue01f > \u091f; # LETTER TTA
|
||||
\ue020 > \u0920; # LETTER TTHA
|
||||
\ue021 > \u0921; # LETTER DDA
|
||||
\ue022 > \u0922; # LETTER DDHA
|
||||
\ue023 > \u0923; # LETTER NNA
|
||||
\ue024 > \u0924; # LETTER TA
|
||||
\ue025 > \u0925; # LETTER THA
|
||||
\ue026 > \u0926; # LETTER DA
|
||||
\ue027 > \u0927; # LETTER DHA
|
||||
\ue028 > \u0928; # LETTER NA
|
||||
\ue029 > \u0929; # LETTER NNNA
|
||||
\ue02a > \u092a; # LETTER PA
|
||||
\ue02b > \u092b; # LETTER PHA
|
||||
\ue02c > \u092c; # LETTER BA
|
||||
\ue02d > \u092d; # LETTER BHA
|
||||
\ue02e > \u092e; # LETTER MA
|
||||
\ue02f > \u092f; # LETTER YA
|
||||
\ue030 > \u0930; # LETTER RA
|
||||
\ue031 > \u0931; # LETTER RRA (Eyelash RA for Southern scripts)
|
||||
#\ue031 > \u0930;
|
||||
\ue032 > \u0932; # LETTER LA
|
||||
\ue033 > \u0933; # LETTER LLA
|
||||
\ue034 > \u0934; # LETTER LLLA (LLLA for Southern scripts)
|
||||
#\ue034 > \u0933;
|
||||
\ue035 > \u0935; # LETTER VA
|
||||
\ue036 > \u0936; # LETTER SHA
|
||||
\ue037 > \u0937; # LETTER SSA
|
||||
\ue038 > \u0938; # LETTER SA
|
||||
\ue039 > \u0939; # LETTER HA
|
||||
\ue03c > \u093c; # SIGN NUKTA
|
||||
\ue03d > \u093d; # SIGN AVAGRAHA
|
||||
\ue03e > \u093e; # VOWEL SIGN AA
|
||||
\ue03f > \u093f; # VOWEL SIGN I
|
||||
\ue040 > \u0940; # VOWEL SIGN II
|
||||
\ue041 > \u0941; # VOWEL SIGN U
|
||||
\ue042 > \u0942; # VOWEL SIGN UU
|
||||
\ue043 > \u0943; # VOWEL SIGN VOCALIC R
|
||||
\ue044 > \u0944; # VOWEL SIGN VOCALIC RR
|
||||
\ue045 > \u0945; # VOWEL SIGN CANDRA E
|
||||
\ue046 > \u0946; # VOWEL SIGN SHORT E
|
||||
\ue047 > \u0947; # VOWEL SIGN E
|
||||
\ue048 > \u0948; # VOWEL SIGN AI
|
||||
\ue049 > \u0949; # VOWEL SIGN CANDRA O
|
||||
\ue04a > \u094a; # VOWEL SIGN SHORT O
|
||||
\ue04b > \u094b; # VOWEL SIGN O
|
||||
\ue04c > \u094c; # VOWEL SIGN AU
|
||||
\ue04d > \u094d; # SIGN VIRAMA
|
||||
\ue050 > \u0950; # OM
|
||||
\ue051 > \u0951; # STRESS SIGN UDATTA
|
||||
\ue052 > \u0952; # STRESS SIGN ANUDATTA
|
||||
\ue053 > \u0953; # GRAVE ACCENT
|
||||
\ue054 > \u0954; # ACUTE ACCENT
|
||||
\ue058 > \u0958; # LETTER QA (For Urdu)
|
||||
\ue059 > \u0959; # LETTER KHHA (For Urdu)
|
||||
\ue05a > \u095a; # LETTER GHHA (For Urdu)
|
||||
\ue05b > \u095b; # LETTER ZA (For Urdu)
|
||||
\ue05c > \u095c; # LETTER DDDHA (pronounced RRA)
|
||||
\ue05d > \u095d; # LETTER RHA (pronounced RRHA)
|
||||
\ue05e > \u095e; # LETTER FA
|
||||
\ue05f > \u095f; # LETTER YYA
|
||||
\ue060 > \u0960; # LETTER VOCALIC RR
|
||||
\ue061 > \u0961; # LETTER VOCALIC LL
|
||||
\ue062 > \u0962; # VOWEL SIGN VOCALIC L
|
||||
\ue063 > \u0963; # VOWEL SIGN VOCALIC LL
|
||||
\ue064 > \u0964; # DANDA
|
||||
\ue065 > \u0965; # DOUBLE DANDA
|
||||
\ue066 > \u0966; # DIGIT ZERO
|
||||
\ue067 > \u0967; # DIGIT ONE
|
||||
\ue068 > \u0968; # DIGIT TWO
|
||||
\ue069 > \u0969; # DIGIT THREE
|
||||
\ue06a > \u096a; # DIGIT FOUR
|
||||
\ue06b > \u096b; # DIGIT FIVE
|
||||
\ue06c > \u096c; # DIGIT SIX
|
||||
\ue06d > \u096d; # DIGIT SEVEN
|
||||
\ue06e > \u096e; # DIGIT EIGHT
|
||||
\ue06f > \u096f; # DIGIT NINE
|
||||
|
||||
\ue070>\u0970; # ABBREVIATION SIGN
|
||||
\ue071>\u0930; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0930; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>\u0930\u0942; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0935; # FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
# \u0970 # UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
# :: NFC;
|
||||
# eof
|
|
@ -1,138 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Gujarati
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0a81; # SIGN CANDRABINDU
|
||||
\ue002>\u0a82; # SIGN ANUSVARA
|
||||
\ue003>\u0a83; # SIGN VISARGA
|
||||
\uE004>\u0a85; # FALLBACK TO LETTER A
|
||||
\ue005>\u0a85; # LETTER A
|
||||
\ue006>\u0a86; # LETTER AA
|
||||
\ue007>\u0a87; # LETTER I
|
||||
\ue008>\u0a88; # LETTER II
|
||||
\ue009>\u0a89; # LETTER U
|
||||
\ue00a>\u0a8a; # LETTER UU
|
||||
\ue00b>\u0a8b; # LETTER VOCALIC R
|
||||
\ue00c>\u0a8c; # LETTER VOCALIC L
|
||||
\ue00d>\u0a8d; # GUJARATI VOWEL CANDRA E
|
||||
\ue00e>\u0a8f; # FALLBACK
|
||||
\ue00f>\u0a8f; # InterIndic-Gujarati: LETTER EE (\u0a8f = LETTER E)
|
||||
\ue010>\u0a90; # LETTER AI
|
||||
\ue011>\u0a91; # FALLBACK
|
||||
\ue012>\u0a93; # FALLBACK
|
||||
\ue013>\u0a93; # UNMAPPED InterIndic-Gujarati: LETTER OO (\u0a93 = LETTER O)
|
||||
\ue014>\u0a94; # LETTER AU
|
||||
\ue015>\u0a95; # LETTER KA
|
||||
\ue016>\u0a96; # LETTER KHA
|
||||
\ue017>\u0a97; # LETTER GA
|
||||
\ue018>\u0a98; # LETTER GHA
|
||||
\ue019>\u0a99; # LETTER NGA
|
||||
\ue01a>\u0a9a; # LETTER CA
|
||||
\ue01b>\u0a9b; # LETTER CHA
|
||||
\ue01c>\u0a9c; # LETTER JA
|
||||
\ue01d>\u0a9d; # LETTER JHA
|
||||
\ue01e>\u0a9e; # LETTER NYA
|
||||
\ue01f>\u0a9f; # LETTER TTA
|
||||
\ue020>\u0aa0; # LETTER TTHA
|
||||
\ue021>\u0aa1; # LETTER DDA
|
||||
\ue022>\u0aa2; # LETTER DDHA
|
||||
\ue023>\u0aa3; # LETTER NNA
|
||||
\ue024>\u0aa4; # LETTER TA
|
||||
\ue025>\u0aa5; # LETTER THA
|
||||
\ue026>\u0aa6; # LETTER DA
|
||||
\ue027>\u0aa7; # LETTER DHA
|
||||
\ue028>\u0aa8; # LETTER NA
|
||||
\ue029>\u0aa8\u0abc; # FALLBACK to NA+NUKTA
|
||||
\ue02a>\u0aaa; # LETTER PA
|
||||
\ue02b>\u0aab; # LETTER PHA
|
||||
\ue02c>\u0aac; # LETTER BA
|
||||
\ue02d>\u0aad; # LETTER BHA
|
||||
\ue02e>\u0aae; # LETTER MA
|
||||
\ue02f>\u0aaf; # LETTER YA
|
||||
\ue030>\u0ab0; # LETTER RA
|
||||
\ue031>\u0ab0\u0abc; # FALLBACK
|
||||
\ue032>\u0ab2; # LETTER LA
|
||||
\ue033>\u0ab3; # LETTER LLA
|
||||
\ue034>\u0ab3\u0abc; # LETTER LLLA>LETTER LLA+NUKTA
|
||||
\ue035>\u0ab5; # LETTER VA
|
||||
\ue036>\u0ab6; # LETTER SHA
|
||||
\ue037>\u0ab7; # LETTER SSA
|
||||
\ue038>\u0ab8; # LETTER SA
|
||||
\ue039>\u0ab9; # LETTER HA
|
||||
\ue03c>\u0abc; # SIGN NUKTA
|
||||
\ue03d>\u0abd; # SIGN AVAGRAHA
|
||||
\ue03e>\u0abe; # VOWEL SIGN AA
|
||||
\ue03f>\u0abf; # VOWEL SIGN I
|
||||
\ue040>\u0ac0; # VOWEL SIGN II
|
||||
\ue041>\u0ac1; # VOWEL SIGN U
|
||||
\ue042>\u0ac2; # VOWEL SIGN UU
|
||||
\ue043>\u0ac3; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0ac4; # VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0ac5; # VOWEL SIGN CANDRA E
|
||||
\ue046>\u0ac7; # FALLBACK
|
||||
\ue047>\u0ac7; # InterIndic-Gujarati: VOWEL SIGN EE (\u0ac7 = VOWEL SIGN E)
|
||||
\ue048>\u0ac8; # VOWEL SIGN AI
|
||||
\ue049>\u0ac9; # VOWEL SIGN CANDRA O
|
||||
\ue04a>\u0acb; # FALLBACK
|
||||
\ue04b>\u0acb; # UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (\u0acb = VOWEL SIGN O)
|
||||
\ue04c>\u0acc; # VOWEL SIGN AU
|
||||
\ue04d>\u0acd; # SIGN VIRAMA
|
||||
\ue050>\u0ad0; # OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # UNMAPPED InterIndic-Gujarati: LENGTH MARK
|
||||
\ue056>\u0ac8; # REMAP (indicExceptions.txt): \u0ad6>\u0ac8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0acc; # REMAP (indicExceptions.txt): \u0ad7>\u0acc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0a95\u0abc; # FALLBACK
|
||||
\ue059>\u0a96\u0abc; # REMAP (indicExceptions.txt): \u0ad9>\u0a96\u0abc = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
\ue05a>\u0a97\u0abc; # REMAP (indicExceptions.txt): \u0ada>\u0a97\u0abc = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
\ue05b>\u0a9c\u0abc; # REMAP (indicExceptions.txt): \u0adb>\u0a9c\u0abc = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
\ue05c>\u0aa1\u0abc; # FALLBACK
|
||||
\ue05d>\u0aa2\u0abc; # REMAP (indicExceptions.txt): \u0add>\u0aa2\u0abc = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
\ue05e>\u0aab\u0abc; # REMAP (indicExceptions.txt): \u0ade>\u0aab\u0abc = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
\ue05f>\u0aaf\u0abc; # REMAP (indicExceptions.txt): \u0adf>\u0aaf\u0abc = LETTER YYA>LETTER YA.SIGN NUKTA
|
||||
\ue060>\u0ae0; # LETTER VOCALIC RR
|
||||
\ue061>\u0ae1; # LETTER VOCALIC LL
|
||||
\ue062>\u0abf\u0abc; # REMAP (indicExceptions.txt): \u0ae2>\u0abf\u0abc = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
\ue063>\u0ac0\u0abc; # REMAP (indicExceptions.txt): \u0ae3>\u0ac0\u0abc = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\ue066>\u0ae6; # DIGIT ZERO
|
||||
\ue067>\u0ae7; # DIGIT ONE
|
||||
\ue068>\u0ae8; # DIGIT TWO
|
||||
\ue069>\u0ae9; # DIGIT THREE
|
||||
\ue06a>\u0aea; # DIGIT FOUR
|
||||
\ue06b>\u0aeb; # DIGIT FIVE
|
||||
\ue06c>\u0aec; # DIGIT SIX
|
||||
\ue06d>\u0aed; # DIGIT SEVEN
|
||||
\ue06e>\u0aee; # DIGIT EIGHT
|
||||
\ue06f>\u0aef; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0ab0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0ab0; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0ab5; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0ae6; # FALLBACK FOR TAMIL
|
||||
1 > \u0ae7;
|
||||
|
||||
#\ue080>; # UNMAPPED InterIndic-Gujarati: ISSHAR
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,147 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Gurmukhi
|
||||
#:: NFD (NFC) ;
|
||||
$vowel = [\u0A05-\u0A14 \u0A3e-\u0A4D];
|
||||
$consonant = [\u0A15-\u0A39];
|
||||
|
||||
\ue001>\u0a01; # SIGN CHANDRABINDU
|
||||
#rules for BINDI
|
||||
|
||||
# Anusvara is equivalent to BINDI when preceeded by a vowel
|
||||
$vowel{\ue002>\u0a02; # SIGN ANUSVARA (\u0a02 = SIGN BINDI)
|
||||
# else is equivalent to TIPPI
|
||||
$consonant{\ue002>\u0a70; # SIGN TIPPI
|
||||
\ue002>\u0a02;
|
||||
|
||||
\ue003>; # FALLBACK BLOW AWAY SIGN VISARGA
|
||||
\uE004>\u0a05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0a05; # LETTER A
|
||||
\ue006>\u0a06; # LETTER AA
|
||||
\ue007>\u0a07; # LETTER I
|
||||
\ue008>\u0a08; # LETTER II
|
||||
\ue009>\u0a09; # LETTER U
|
||||
\ue00a>\u0a0a; # LETTER UU
|
||||
\ue00b>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a0b>\u0a30\u0a3f = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
\ue00c>\u0a33; # FALLBACK
|
||||
\ue00d>\u0a0f; # FALLBACK
|
||||
\ue00e>\u0a0f; # FALLBACK
|
||||
\ue00f>\u0a0f; # LETTER EE
|
||||
\ue010>\u0a10; # LETTER AI
|
||||
\ue011>\u0a13; # FALLBACK
|
||||
\ue012>\u0a13; # FALLBACK
|
||||
\ue013>\u0a13; # LETTER OO
|
||||
\ue014>\u0a14; # LETTER AU
|
||||
\ue015>\u0a15; # LETTER KA
|
||||
\ue016>\u0a16; # LETTER KHA
|
||||
\ue017>\u0a17; # LETTER GA
|
||||
\ue018>\u0a18; # LETTER GHA
|
||||
\ue019>\u0a19; # LETTER NGA
|
||||
\ue01a>\u0a1a; # LETTER CA
|
||||
\ue01b>\u0a1b; # LETTER CHA
|
||||
\ue01c>\u0a1c; # LETTER JA
|
||||
\ue01d>\u0a1d; # LETTER JHA
|
||||
\ue01e>\u0a1e; # LETTER NYA
|
||||
\ue01f>\u0a1f; # LETTER TTA
|
||||
\ue020>\u0a20; # LETTER TTHA
|
||||
\ue021>\u0a21; # LETTER DDA
|
||||
\ue022>\u0a22; # LETTER DDHA
|
||||
\ue023>\u0a23; # LETTER NNA
|
||||
\ue024>\u0a24; # LETTER TA
|
||||
\ue025>\u0a25; # LETTER THA
|
||||
\ue026>\u0a26; # LETTER DA
|
||||
\ue027>\u0a27; # LETTER DHA
|
||||
\ue028>\u0a28; # LETTER NA
|
||||
\ue029>\u0a28\u0a3c; # REMAP (indicExceptions.txt): \u0a29>\u0a28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0a2a; # LETTER PA
|
||||
\ue02b>\u0a2b; # LETTER PHA
|
||||
\ue02c>\u0a2c; # LETTER BA
|
||||
\ue02d>\u0a2d; # LETTER BHA
|
||||
\ue02e>\u0a2e; # LETTER MA
|
||||
\ue02f>\u0a2f; # LETTER YA
|
||||
\ue030>\u0a30; # LETTER RA
|
||||
\ue031>\u0a30\u0a3c; # FALLBACK LETTER RA+NUKTA
|
||||
\ue032>\u0a32; # LETTER LA
|
||||
\ue033>\u0a33; # LETTER LLA
|
||||
\ue034>\u0a33; # REMAP (indicExceptions.txt): \u0a34>\u0a33 = LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0a35; # LETTER VA
|
||||
\ue036>\u0a36; # LETTER SHA
|
||||
\ue037>\u0a36; # REMAP (indicExceptions.txt): \u0a37>\u0a36 = LETTER SSA>LETTER SHA
|
||||
\ue038>\u0a38; # LETTER SA
|
||||
\ue039>\u0a39; # LETTER HA
|
||||
\ue03c>\u0a3c; # SIGN NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY SIGN AVAGRAHA
|
||||
\ue03e>\u0a3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0a3f; # VOWEL SIGN I
|
||||
\ue040>\u0a40; # VOWEL SIGN II
|
||||
\ue041>\u0a41; # VOWEL SIGN U
|
||||
\ue042>\u0a42; # VOWEL SIGN UU
|
||||
\ue043>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC R
|
||||
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0a48; # REMAP (indicExceptions.txt): \u0a45>\u0a48 = VOWEL SIGN CANDRA E>VOWEL SIGN AI
|
||||
\ue046>\u0a47; # FALLABCK
|
||||
\ue047>\u0a47; # VOWEL SIGN EE
|
||||
\ue048>\u0a48; # VOWEL SIGN AI
|
||||
\ue049>\u0a4c; # REMAP (indicExceptions.txt): \u0a49>\u0a4c = VOWEL SIGN CANDRA O>VOWEL SIGN AU
|
||||
\ue04a>\u0a4b; # FALLBACK
|
||||
\ue04b>\u0a4b; # VOWEL SIGN OO
|
||||
\ue04c>\u0a4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0a4d; # SIGN VIRAMA
|
||||
\ue050>\u0a0f\u0a02; # FALLBACK to OO+BINDI : OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
|
||||
\ue056>\u0a48; # REMAP (indicExceptions.txt): \u0a56>\u0a48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0a4c; # REMAP (indicExceptions.txt): \u0a57>\u0a4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0a15\u0a3c; # FALLBACK RA+ NUKTA
|
||||
\ue059>\u0a59; # LETTER KHHA
|
||||
\ue05a>\u0a5a; # LETTER GHHA
|
||||
\ue05b>\u0a5b; # LETTER ZA
|
||||
\ue05c>\u0a5c; # LETTER RRA
|
||||
\ue05d>\u0a22\u0a3c; # REMAP (indicExceptions.txt): \u0a5d>\u0a22\u0a3c = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
\ue05e>\u0a5e; # LETTER FA
|
||||
\ue05f>\u0a2f\u0a3c; # REMAP (indicExceptions.txt): \u0a5f>\u0a2f = LETTER YYA>LETTER YA
|
||||
\ue060>\u0a30\u0a3f; # REMAP (indicExceptions.txt): \u0a60>\u0a30\u0a3f = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
\ue061>\u0a32\u0a3c; #
|
||||
\ue062>\u0a3f\u0a3c; # REMAP (indicExceptions.txt): \u0a62>\u0a3f\u0a3c = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
\ue063>\u0a40\u0a3c; # REMAP (indicExceptions.txt): \u0a63>\u0a40\u0a3c = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\ue066>\u0a66; # DIGIT ZERO
|
||||
\ue067>\u0a67; # DIGIT ONE
|
||||
\ue068>\u0a68; # DIGIT TWO
|
||||
\ue069>\u0a69; # DIGIT THREE
|
||||
\ue06a>\u0a6a; # DIGIT FOUR
|
||||
\ue06b>\u0a6b; # DIGIT FIVE
|
||||
\ue06c>\u0a6c; # DIGIT SIX
|
||||
\ue06d>\u0a6d; # DIGIT SEVEN
|
||||
\ue06e>\u0a6e; # DIGIT EIGHT
|
||||
\ue06f>\u0a6f; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0a30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0a30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>\u0a70; # TIPPI
|
||||
\uE07D>\u0a71; # ADDAK
|
||||
\uE07E>\u0a72; # IRI
|
||||
\uE07F>\u0a73; # URA
|
||||
\uE080>\u0a74; # EK ONKAR
|
||||
\uE081>\u0a35; # FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
0 > \u0a66; # FALLBACK FOR TAMIL
|
||||
1 > \u0a67;
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,141 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Kannada
|
||||
#:: NFD (NFC) ;
|
||||
\ue033\ue03c>\u0cde; # LETTER FA
|
||||
\ue001>\u0c82; # REMAP (indicExceptions.txt): \u0c81>\u0c82 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
\ue002>\u0c82; # SIGN ANUSVARA
|
||||
\ue003>\u0c83; # SIGN VISARGA
|
||||
\uE004>\u0c85; # FALLBACK TO LETTER A
|
||||
\ue005>\u0c85; # LETTER A
|
||||
\ue006>\u0c86; # LETTER AA
|
||||
\ue007>\u0c87; # LETTER I
|
||||
\ue008>\u0c88; # LETTER II
|
||||
\ue009>\u0c89; # LETTER U
|
||||
\ue00a>\u0c8a; # LETTER UU
|
||||
\ue00b>\u0c8b; # LETTER VOCALIC R
|
||||
\ue00c>\u0c8c; # LETTER VOCALIC L
|
||||
\ue00d>\u0c8e; # LETTER E
|
||||
\ue00e>\u0c8e; # FALLBACK
|
||||
\ue00f>\u0c8f; # LETTER EE
|
||||
\ue010>\u0c90; # LETTER AI
|
||||
\ue011>\u0c92; # FALLBACK
|
||||
\ue012>\u0c92; # LETTER O
|
||||
\ue013>\u0c93; # LETTER OO
|
||||
\ue014>\u0c94; # LETTER AU
|
||||
\ue015>\u0c95; # LETTER KA
|
||||
\ue016>\u0c96; # LETTER KHA
|
||||
\ue017>\u0c97; # LETTER GA
|
||||
\ue018>\u0c98; # LETTER GHA
|
||||
\ue019>\u0c99; # LETTER NGA
|
||||
\ue01a>\u0c9a; # LETTER CA
|
||||
\ue01b>\u0c9b; # LETTER CHA
|
||||
\ue01c>\u0c9c; # LETTER JA
|
||||
\ue01d>\u0c9d; # LETTER JHA
|
||||
\ue01e>\u0c9e; # LETTER NYA
|
||||
\ue01f>\u0c9f; # LETTER TTA
|
||||
\ue020>\u0ca0; # LETTER TTHA
|
||||
\ue021>\u0ca1; # LETTER DDA
|
||||
\ue022>\u0ca2; # LETTER DDHA
|
||||
\ue023>\u0ca3; # LETTER NNA
|
||||
\ue024>\u0ca4; # LETTER TA
|
||||
\ue025>\u0ca5; # LETTER THA
|
||||
\ue026>\u0ca6; # LETTER DA
|
||||
\ue027>\u0ca7; # LETTER DHA
|
||||
\ue028>\u0ca8; # LETTER NA
|
||||
\ue029>\u0ca8; # REMAP (indicExceptions.txt): \u0ca9>\u0ca8 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0caa; # LETTER PA
|
||||
\ue02b>\u0cab; # LETTER PHA
|
||||
\ue02c>\u0cac; # LETTER BA
|
||||
\ue02d>\u0cad; # LETTER BHA
|
||||
\ue02e>\u0cae; # LETTER MA
|
||||
\ue02f>\u0caf; # LETTER YA
|
||||
\ue030\ue03c>\u0cb1;
|
||||
\ue030>\u0cb0; # LETTER RA
|
||||
\ue031>\u0cb1; # LETTER RRA
|
||||
\ue032>\u0cb2; # LETTER LA
|
||||
\ue033>\u0cb3; # LETTER LLA
|
||||
\ue034>\u0cde; # REMAP (indicExceptions.txt): \u0cb4>\u0cb3 = LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0cb5; # LETTER VA
|
||||
\ue036>\u0cb6; # LETTER SHA
|
||||
\ue037>\u0cb7; # LETTER SSA
|
||||
\ue038>\u0cb8; # LETTER SA
|
||||
\ue039>\u0cb9; # LETTER HA
|
||||
|
||||
\ue03c>\u0cbc; # NUKTA
|
||||
\ue03d>\u0cbd; # AVAGRAHA
|
||||
|
||||
\ue03e>\u0cbe; # VOWEL SIGN AA
|
||||
\ue03f>\u0cbf; # VOWEL SIGN I
|
||||
\ue040>\u0cc0; # VOWEL SIGN II
|
||||
\ue041>\u0cc1; # VOWEL SIGN U
|
||||
\ue042>\u0cc2; # VOWEL SIGN UU
|
||||
\ue043>\u0cc3; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0cc4; # VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0cc6; # REMAP (indicExceptions.txt): \u0cc5>\u0cc6 = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\ue046>\u0cc6; # VOWEL SIGN E
|
||||
\ue047>\u0cc7; # VOWEL SIGN EE
|
||||
\ue048>\u0cc8; # VOWEL SIGN AI
|
||||
\ue049>\u0cca; # REMAP (indicExceptions.txt): \u0cc9>\u0cca = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\ue04a>\u0cca; # VOWEL SIGN O
|
||||
\ue04b>\u0ccb; # VOWEL SIGN OO
|
||||
\ue04c>\u0ccc; # VOWEL SIGN AU
|
||||
\ue04d>\u0ccd; # SIGN VIRAMA
|
||||
\ue050>\u0c93\u0c82; # REMAP (indicExceptions.txt): \u0cd0>\u0c93\u0c82 = OM>LETTER OO.SIGN ANUSVARA
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>\u0cd5; # LENGTH MARK
|
||||
\ue056>\u0cd6; # AI LENGTH MARK
|
||||
\ue057>\u0ccc; # REMAP (indicExceptions.txt): \u0cd7>\u0ccc = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0c95; # FALLBACK
|
||||
\ue059>\u0c96; # REMAP (indicExceptions.txt): \u0cd9>\u0c96 = LETTER KHHA>LETTER KHA
|
||||
\ue05a>\u0c97; # REMAP (indicExceptions.txt): \u0cda>\u0c97 = LETTER GHHA>LETTER GA
|
||||
\ue05b>\u0c9c; # REMAP (indicExceptions.txt): \u0cdb>\u0c9c = LETTER ZA>LETTER JA
|
||||
\ue05c>\u0ca2; # FALLBACK
|
||||
\ue05d>\u0ca2; # REMAP (indicExceptions.txt): \u0cdd>\u0ca2 = LETTER RHA>LETTER DDHA
|
||||
\ue05e>\u0cde; # LETTER FA
|
||||
\ue05f>\u0caf; # REMAP (indicExceptions.txt): \u0cdf>\u0caf = LETTER YYA>LETTER YA
|
||||
\ue060>\u0ce0; # LETTER VOCALIC RR
|
||||
\ue061>\u0ce1; # LETTER VOCALIC LL
|
||||
\ue062>\u0cbf; # REMAP (indicExceptions.txt): \u0ce2>\u0cbf = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
\ue063>\u0cc0; # REMAP (indicExceptions.txt): \u0ce3>\u0cc0 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
\ue066>\u0ce6; # DIGIT ZERO
|
||||
\ue067>\u0ce7; # DIGIT ONE
|
||||
\ue068>\u0ce8; # DIGIT TWO
|
||||
\ue069>\u0ce9; # DIGIT THREE
|
||||
\ue06a>\u0cea; # DIGIT FOUR
|
||||
\ue06b>\u0ceb; # DIGIT FIVE
|
||||
\ue06c>\u0cec; # DIGIT SIX
|
||||
\ue06d>\u0ced; # DIGIT SEVEN
|
||||
\ue06e>\u0cee; # DIGIT EIGHT
|
||||
\ue06f>\u0cef; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0cb0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0cb0; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0cb5; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0ce6; # FALLBACK FOR TAMIL
|
||||
1 > \u0ce7;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,529 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Latin
|
||||
#\u0e00 reserved
|
||||
#consonants
|
||||
$chandrabindu=\ue001;
|
||||
$anusvara=\ue002;
|
||||
$visarga=\ue003;
|
||||
#\u0e004 reserved
|
||||
# w<vowel> represents the stand-alone form
|
||||
$wa=\ue005;
|
||||
$waa=\ue006;
|
||||
$wi=\ue007;
|
||||
$wii=\ue008;
|
||||
$wu=\ue009;
|
||||
$wuu=\ue00a;
|
||||
$wr=\ue00b;
|
||||
$wl=\ue00c;
|
||||
$wce=\ue00d; # LETTER CANDRA E
|
||||
$wse=\ue00e; # LETTER SHORT E
|
||||
$we=\ue00f; # \u090f LETTER E
|
||||
$wai=\ue010;
|
||||
$wco=\ue011; # LETTER CANDRA O
|
||||
$wso=\ue012; # LETTER SHORT O
|
||||
$wo=\ue013; # \u0913 LETTER O
|
||||
$wau=\ue014;
|
||||
$ka=\ue015;
|
||||
$kha=\ue016;
|
||||
$ga=\ue017;
|
||||
$gha=\ue018;
|
||||
$nga=\ue019;
|
||||
$ca=\ue01a;
|
||||
$cha=\ue01b;
|
||||
$ja=\ue01c;
|
||||
$jha=\ue01d;
|
||||
$nya=\ue01e;
|
||||
$tta=\ue01f;
|
||||
$ttha=\ue020;
|
||||
$dda=\ue021;
|
||||
$ddha=\ue022;
|
||||
$nna=\ue023;
|
||||
$ta=\ue024;
|
||||
$tha=\ue025;
|
||||
$da=\ue026;
|
||||
$dha=\ue027;
|
||||
$na=\ue028;
|
||||
$ena=\ue029; #compatibility
|
||||
$pa=\ue02a;
|
||||
$pha=\ue02b;
|
||||
$ba=\ue02c;
|
||||
$bha=\ue02d;
|
||||
$ma=\ue02e;
|
||||
$ya=\ue02f;
|
||||
$ra=\ue030;
|
||||
$vva=\ue081;
|
||||
$rra=\ue031;
|
||||
$la=\ue032;
|
||||
$lla=\ue033;
|
||||
$ela=\ue034; #compatibility
|
||||
$va=\ue035;
|
||||
$sha=\ue036;
|
||||
$ssa=\ue037;
|
||||
$sa=\ue038;
|
||||
$ha=\ue039;
|
||||
#\u093a Reserved
|
||||
#\u093b Reserved
|
||||
$nukta=\ue03c;
|
||||
$avagraha=\ue03d; # SIGN AVAGRAHA
|
||||
# <vowel> represents the dependent form
|
||||
$aa=\ue03e;
|
||||
$i=\ue03f;
|
||||
$ii=\ue040;
|
||||
$u=\ue041;
|
||||
$uu=\ue042;
|
||||
$rh=\ue043;
|
||||
$lh=\ue044;
|
||||
$ce=\ue045; #VOWEL SIGN CANDRA E
|
||||
$se=\ue046; #VOWEL SIGN SHORT E
|
||||
$e=\ue047;
|
||||
$ai=\ue048;
|
||||
$co=\ue049; # VOWEL SIGN CANDRA O
|
||||
$so=\ue04a; # VOWEL SIGN SHORT O
|
||||
$o=\ue04b; # \u094b
|
||||
$au=\ue04c;
|
||||
$virama=\ue04d;
|
||||
# \u094e Reserved
|
||||
# \u094f Reserved
|
||||
$om=\ue050; # OM
|
||||
\ue051>; # UNMAPPED STRESS SIGN UDATTA
|
||||
\ue052>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
\ue053>; # UNMAPPED GRAVE ACCENT
|
||||
\ue054>; # UNMAPPED ACUTE ACCENT
|
||||
$lm = \ue055;# Telugu Length Mark
|
||||
$ailm=\ue056;# AI Length Mark
|
||||
$aulm=\ue057;# AU Length Mark
|
||||
#urdu compatibity forms
|
||||
$uka=\ue058;
|
||||
$ukha=\ue059;
|
||||
$ugha=\ue05a;
|
||||
$ujha=\ue05b;
|
||||
$uddha=\ue05c;
|
||||
$udha=\ue05d;
|
||||
$ufa=\ue05e;
|
||||
$uya=\ue05f;
|
||||
$wrr=\ue060;
|
||||
$wll=\ue061;
|
||||
$rrh=\ue062;
|
||||
$llh=\ue063;
|
||||
$danda=\ue064;
|
||||
$doubleDanda=\ue065;
|
||||
$zero=\ue066; # DIGIT ZERO
|
||||
$one=\ue067; # DIGIT ONE
|
||||
$two=\ue068; # DIGIT TWO
|
||||
$three=\ue069; # DIGIT THREE
|
||||
$four=\ue06a; # DIGIT FOUR
|
||||
$five=\ue06b; # DIGIT FIVE
|
||||
$six=\ue06c; # DIGIT SIX
|
||||
$seven=\ue06d; # DIGIT SEVEN
|
||||
$eight=\ue06e; # DIGIT EIGHT
|
||||
$nine=\ue06f; # DIGIT NINE
|
||||
|
||||
# \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
|
||||
$depVowelBelow=[\ue041-\ue044];
|
||||
# $x was originally called '&'; $z was '%'
|
||||
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
|
||||
$z=[bcdfghjklmnpqrstvwxyz];
|
||||
$vowels=[aeiour\u0304\u0325\u0306];
|
||||
$forceIndependentMatra = [^[[:L:][\u0300-\u034c]]];
|
||||
######################################################################
|
||||
# convert from Native letters to Latin letters
|
||||
######################################################################
|
||||
#transliterations for anusvara
|
||||
$anusvara} [$ka$kha$ga$gha$nga] > n\u0307;
|
||||
$anusvara} [$ca$cha$ja$jha$nya] > n\u0304;
|
||||
$anusvara} [$tta$ttha$dda$ddha$nna] > n\u0323;
|
||||
$anusvara} [$ta$tha$da$dha$na] > n ;
|
||||
$anusvara} [$pa$pha$ba$bha$ma] > m ;
|
||||
$anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] > n ;
|
||||
$anusvara> m\u0307;
|
||||
|
||||
# Urdu compatibility
|
||||
$ya$nukta}$x > y\u0307 ;
|
||||
$ya$nukta$virama > y\u0307 ;
|
||||
$ya$nukta > y\u0307a ;
|
||||
|
||||
$la$nukta }$x > l\u0331 ;
|
||||
$la$nukta$virama > l\u0331 ;
|
||||
$la$nukta > l\u0331a ;
|
||||
|
||||
$na$nukta }$x > n\u0331 ;
|
||||
$na$nukta$virama > n\u0331 ;
|
||||
$na$nukta > n\u0331a ;
|
||||
|
||||
$ena }$x > n\u0331 ;
|
||||
$ena$virama > n\u0331 ;
|
||||
$ena > n\u0331a ;
|
||||
$uka > qa ;
|
||||
$ka$nukta }$x > q ;
|
||||
$ka$nukta$virama > q ;
|
||||
$ka$nukta > qa ;
|
||||
$kha$nukta }$x > k\u0331h\u0331 ;
|
||||
$kha$nukta$virama > k\u0331h\u0331 ;
|
||||
$kha$nukta > k\u0331h\u0331a ;
|
||||
$ukha$virama > k\u0331h\u0331;
|
||||
$ukha > k\u0331h\u0331a;
|
||||
$ugha > g\u0307a ;
|
||||
$ga$nukta }$x > g\u0307 ;
|
||||
$ga$nukta$virama > g\u0307 ;
|
||||
$ga$nukta > g\u0307a ;
|
||||
|
||||
$ujha > za ;
|
||||
$ja$nukta }$x > z ;
|
||||
$ja$nukta$virama > z ;
|
||||
$ja$nukta > za ;
|
||||
$ddha$nukta}$x > r\u0323h ;
|
||||
$ddha$nukta$virama > r\u0323h ;
|
||||
$ddha$nukta > r\u0323ha;
|
||||
|
||||
$uddha}$x > r\u0323 ;
|
||||
$uddha$virama > r\u0323 ;
|
||||
$uddha > r\u0323a;
|
||||
|
||||
$udha > r\u0323a ;
|
||||
$dda$nukta}$x > r\u0323 ;
|
||||
$dda$nukta$virama > r\u0323 ;
|
||||
$dda$nukta > r\u0323a ;
|
||||
$pha$nukta }$x > f ;
|
||||
$pha$nukta$virama > f ;
|
||||
$pha$nukta > fa ;
|
||||
$ufa }$x > f ;
|
||||
$ufa$virama > f ;
|
||||
$ufa > fa ;
|
||||
|
||||
$ra$nukta}$x > r\u0331;
|
||||
$ra$nukta$virama > r\u0331;
|
||||
$ra$nukta > r\u0331a;
|
||||
$lla$nukta}$x > l\u0331;
|
||||
$lla$nukta$virama > l\u0331;
|
||||
$lla$nukta > l\u0331a;
|
||||
|
||||
$ela}$x > l\u0331;
|
||||
$ela$virama > l\u0331;
|
||||
$ela > l\u0331a;
|
||||
|
||||
$uya}$x > y\u0307;
|
||||
$uya$virama > y\u0307;
|
||||
$uya > y\u0307a;
|
||||
|
||||
|
||||
# normal consonants
|
||||
$ka$virama}$ha>k'';
|
||||
$ka}$x>k;
|
||||
$ka$virama>k;
|
||||
$ka>ka;
|
||||
$kha}$x>kh;
|
||||
$kha$virama>kh;
|
||||
$kha>kha;
|
||||
$ga$virama}$ha>g'';
|
||||
$ga}$x>g;
|
||||
$ga$virama>g;
|
||||
$ga>ga;
|
||||
|
||||
$gha}$x>gh;
|
||||
$gha$virama>gh;
|
||||
$gha>gha;
|
||||
|
||||
$nga}$x>n\u0307;
|
||||
$nga$virama>n\u0307;
|
||||
$nga>n\u0307a ;
|
||||
$ca$virama}$ha>c'';
|
||||
$ca}$x>c;
|
||||
$ca$virama>c;
|
||||
$ca>ca;
|
||||
|
||||
$cha}$x>ch;
|
||||
$cha$virama>ch;
|
||||
$cha>cha;
|
||||
$ja$virama}$ha>j'';
|
||||
$ja}$x>j;
|
||||
$ja$virama>j;
|
||||
$ja>ja;
|
||||
|
||||
$jha}$x>jh;
|
||||
$jha$virama>jh;
|
||||
$jha>jha;
|
||||
|
||||
$nya }$x>n\u0303 ;
|
||||
$nya$virama>n\u0303;
|
||||
$nya > n\u0303a ;
|
||||
|
||||
|
||||
$tta$virama}$ha>t\u0323'';
|
||||
$tta}$x>t\u0323;
|
||||
$tta$virama>t\u0323;
|
||||
$tta>t\u0323a;
|
||||
|
||||
$ttha}$x>t\u0323h;
|
||||
$ttha$virama>t\u0323h;
|
||||
$ttha>t\u0323ha;
|
||||
$dda}$x$ha>d\u0323'';
|
||||
$dda}$x>d\u0323;
|
||||
$dda$virama>d\u0323;
|
||||
$dda>d\u0323a;
|
||||
|
||||
$ddha}$x>d\u0323h;
|
||||
$ddha$virama>d\u0323h;
|
||||
$ddha>d\u0323ha;
|
||||
|
||||
$nna}$x>n\u0323 ;
|
||||
$nna$virama>n\u0323;
|
||||
$nna>n\u0323a ;
|
||||
|
||||
|
||||
$ta$virama}$ha>t'';
|
||||
$ta$virama}$ttha>t'';
|
||||
$ta$virama}$tta>t'';
|
||||
$ta$virama}$tha>t'';
|
||||
$ta}$x>t;
|
||||
$ta$virama>t;
|
||||
$ta>ta;
|
||||
$tha}$x>th;
|
||||
$tha$virama>th;
|
||||
$tha>tha;
|
||||
|
||||
$da$virama}$ha>d'';
|
||||
$da$virama}$ddha>d'';
|
||||
$da$virama}$dda>d'';
|
||||
$da$virama}$dha>d'';
|
||||
$da}$x>d;
|
||||
$da$virama>d;
|
||||
$da>da;
|
||||
$dha}$x>dh;
|
||||
$dha$virama>dh;
|
||||
$dha>dha;
|
||||
$na$virama}$ga>n'';
|
||||
$na$virama}$ya>n'';
|
||||
$na}$x>n;
|
||||
$na$virama>n;
|
||||
$na>na;
|
||||
|
||||
|
||||
$pa$virama}$ha>p'';
|
||||
$pa}$x>p;
|
||||
$pa$virama>p;
|
||||
$pa>pa;
|
||||
$pha}$x>ph;
|
||||
$pha$virama>ph;
|
||||
$pha>pha;
|
||||
$ba$virama}$ha>b'';
|
||||
$ba}$x>b;
|
||||
$ba$virama>b;
|
||||
$ba>ba;
|
||||
|
||||
$bha}$x>bh;
|
||||
$bha$virama>bh;
|
||||
$bha>bha;
|
||||
|
||||
$ma$virama}$ma>m'';
|
||||
$ma}$x>m;
|
||||
$ma$virama>m;
|
||||
$ma>ma;
|
||||
|
||||
$ya}$x>y;
|
||||
$ya$virama>y;
|
||||
$ya>ya;
|
||||
$ra$virama}$ha>r'';
|
||||
$ra}$x>r;
|
||||
$ra$virama>r;
|
||||
$ra>ra;
|
||||
$vva$virama}$ha>w\u0307'';
|
||||
$vva}$x>w\u0307;
|
||||
$vva$virama>w\u0307;
|
||||
$vva>w\u0307a;
|
||||
$rra$virama}$ha>r\u0331'';
|
||||
$rra}$x>r\u0331;
|
||||
$rra$virama>r\u0331;
|
||||
$rra>r\u0331a;
|
||||
$la$virama}$ha>l'';
|
||||
$la}$x>l;
|
||||
$la$virama>l;
|
||||
$la>la;
|
||||
$lla$virama}$ha>l\u0323'';
|
||||
$lla}$x>l\u0323;
|
||||
$lla$virama>l\u0323;
|
||||
$lla>l\u0323a;
|
||||
$va}$x>v;
|
||||
$va$virama>v;
|
||||
$va>va;
|
||||
$sa$virama}$ha>s'';
|
||||
$sa$virama}$sha>s'';
|
||||
$sa$virama}$ssa>s'';
|
||||
$sa$virama}$sa>s'';
|
||||
$sa}$x>s;
|
||||
$sa$virama>s;
|
||||
|
||||
#for gurmukhi
|
||||
$sa$nukta}$x>s\u0301;
|
||||
$sa$nukta$virama>s\u0301;
|
||||
$sa$nukta>s\u0301a;
|
||||
$sa>sa;
|
||||
|
||||
$sha}$x>s\u0301;
|
||||
$sha$virama>s\u0301;
|
||||
$sha>s\u0301a;
|
||||
|
||||
$ssa}$x>s\u0323;
|
||||
$ssa$virama>s\u0323;
|
||||
$ssa>s\u0323a;
|
||||
$ha}$x>h;
|
||||
$ha$virama>h;
|
||||
$ha>ha;
|
||||
|
||||
# dependent vowels (should never occur except following consonants)
|
||||
$forceIndependentMatra{$aa > \u0314a\u0304 ;
|
||||
$forceIndependentMatra{$ai > \u0314ai ;
|
||||
$forceIndependentMatra{$au > \u0314au ;
|
||||
$forceIndependentMatra{$ii > \u0314i\u0304 ;
|
||||
$forceIndependentMatra{$i > \u0314i ;
|
||||
$forceIndependentMatra{$uu > \u0314u\u0304 ;
|
||||
$forceIndependentMatra{$u > \u0314u ;
|
||||
$forceIndependentMatra{$rrh > \u0314r\u0325\u0304 ;
|
||||
$forceIndependentMatra{$rh > \u0314r\u0325 ;
|
||||
$forceIndependentMatra{$llh > \u0314l\u0325\u0304 ;
|
||||
$forceIndependentMatra{$lh > \u0314l\u0325 ;
|
||||
$forceIndependentMatra{$e > \u0314e\u0304 ;
|
||||
$forceIndependentMatra{$o > \u0314o\u0304 ;
|
||||
#extra vowels
|
||||
$forceIndependentMatra{$ce > \u0314e\u0306 ;
|
||||
$forceIndependentMatra{$co > \u0314o\u0306 ;
|
||||
$forceIndependentMatra{$se > \u0314e ;
|
||||
$forceIndependentMatra{$so > \u0314o ;
|
||||
$forceIndependentMatra{$nukta >; # Nukta cannot appear independently or as first character
|
||||
$forceIndependentMatra{$virama >; # Virama cannot appear independently or as first character
|
||||
$aa > a\u0304 ;
|
||||
$ai > ai ;
|
||||
$au > au ;
|
||||
$ii > i\u0304 ;
|
||||
$i > i ;
|
||||
$uu > u\u0304 ;
|
||||
$u > u ;
|
||||
$rrh > r\u0325\u0304 ;
|
||||
$rh > r\u0325 ;
|
||||
$llh > l\u0325\u0304 ;
|
||||
$lh > l\u0325 ;
|
||||
$e > e\u0304 ;
|
||||
$o > o\u0304 ;
|
||||
#extra vowels
|
||||
$ce > e\u0306 ;
|
||||
$co > o\u0306 ;
|
||||
$se > e ;
|
||||
$so > o ;
|
||||
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
|
||||
$waa} $x > a\u0304\u0314 ;
|
||||
$wai} $x > ai\u0314 ;
|
||||
$wau} $x > au\u0314 ;
|
||||
$wii} $x > i\u0304\u0314 ;
|
||||
$wi } $x > i\u0314 ;
|
||||
$wuu} $x > u\u0304\u0314 ;
|
||||
$wu } $x > u\u0314 ;
|
||||
$wrr} $x > r\u0325\u0304\u0314 ;
|
||||
$wr } $x > r\u0325\u0314 ;
|
||||
$wll} $x > l\u0325\u0304\u0314 ;
|
||||
$wl } $x > l\u0325\u0314 ;
|
||||
$we } $x > e\u0304\u0314 ;
|
||||
$wo } $x > o\u0304\u0314 ;
|
||||
$wa } $x > a\u0314 ;
|
||||
#extra vowels
|
||||
$wce} $x > e\u0306\u0314 ;
|
||||
$wco} $x > o\u0306\u0314 ;
|
||||
$wse} $x > e\u0314 ;
|
||||
$wso} $x > o\u0314 ;
|
||||
$om} $x > ''om\u0314 ;
|
||||
|
||||
# independent vowels when preceeded by vowels
|
||||
$vowels{$waa > ''a\u0304 ;
|
||||
$vowels{$wai > ''ai ;
|
||||
$vowels{$wau > ''au ;
|
||||
$vowels{$wii > ''i\u0304 ;
|
||||
$vowels{$wi > ''i ;
|
||||
$vowels{$wuu > ''u\u0304 ;
|
||||
$vowels{$wu > ''u ;
|
||||
$vowels{$wrr > ''r\u0325\u0304 ;
|
||||
$vowels{$wr > ''r\u0325 ;
|
||||
$vowels{$wll > ''l\u0325\u0304 ;
|
||||
$vowels{$wl > ''l\u0325 ;
|
||||
$vowels{$we > ''e\u0304 ;
|
||||
$vowels{$wo > ''o\u0304 ;
|
||||
$vowels{$wa > ''a ;
|
||||
#extra vowels
|
||||
$vowels{$wce > ''e\u0306 ;
|
||||
$vowels{$wco > ''o\u0306 ;
|
||||
$vowels{$wse > ''e ;
|
||||
$vowels{$wso > ''o ;
|
||||
|
||||
# independent vowels (otherwise)
|
||||
$waa > a\u0304 ;
|
||||
$wai > ai ;
|
||||
$wau > au ;
|
||||
$wii > i\u0304 ;
|
||||
$wi > i ;
|
||||
$wuu > u\u0304 ;
|
||||
$wu > u ;
|
||||
$wrr > r\u0325\u0304 ;
|
||||
$wr > r\u0325 ;
|
||||
$wll > l\u0325\u0304 ;
|
||||
$wl > l\u0325 ;
|
||||
$we > e\u0304 ;
|
||||
$wo > o\u0304 ;
|
||||
$wa > a ;
|
||||
#extra vowels
|
||||
$wce > e\u0306 ;
|
||||
$wco > o\u0306 ;
|
||||
$wse > e ;
|
||||
$wso > o ;
|
||||
$om > ''om ;
|
||||
|
||||
#stress marks
|
||||
$avagraha > \u0315;
|
||||
$chandrabindu$anusvara>\u0303;
|
||||
$chandrabindu > m\u0310;
|
||||
$visarga>h\u0323;
|
||||
#numbers
|
||||
$zero > 0;
|
||||
$one > 1;
|
||||
$two > 2;
|
||||
$three > 3;
|
||||
$four > 4;
|
||||
$five > 5;
|
||||
$six > 6;
|
||||
$seven > 7;
|
||||
$eight > 8;
|
||||
$nine > 9;
|
||||
$lm >;
|
||||
$ailm >;
|
||||
$aulm >;
|
||||
|
||||
$danda>'.';
|
||||
$doubleDanda>'.';
|
||||
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
# LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue071}$x>ra;
|
||||
\ue071$virama>r;
|
||||
\ue071>ra;
|
||||
# LETTER RA WITH LOWER DIAGONAL
|
||||
\ue072}$x>ra;
|
||||
\ue072$virama>r;
|
||||
\ue072>ra;
|
||||
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE004>; # DEVANAGARI VOWEL SIGN SHORT A
|
||||
|
|
@ -1,141 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Malayalam
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0d02; # REMAP (indicExceptions.txt): \u0d01>\u0d02 = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
\ue002>\u0d02; # SIGN ANUSVARA
|
||||
\ue003>\u0d03; # SIGN VISARGA
|
||||
\uE004>\u0d05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0d05; # LETTER A
|
||||
\ue006>\u0d06; # LETTER AA
|
||||
\ue007>\u0d07; # LETTER I
|
||||
\ue008>\u0d08; # LETTER II
|
||||
\ue009>\u0d09; # LETTER U
|
||||
\ue00a>\u0d0a; # LETTER UU
|
||||
\ue00b>\u0d0b; # LETTER VOCALIC R
|
||||
\ue00c>\u0d0c; # LETTER VOCALIC L
|
||||
\ue00d>\u0d0e; # FALLLBACK LETTER E
|
||||
\ue00e>\u0d0e; # LETTER E
|
||||
\ue00f>\u0d0f; # LETTER EE
|
||||
\ue010>\u0d10; # LETTER AI
|
||||
\ue011>\u0d12; # FALLBACK TO O
|
||||
\ue012>\u0d12; # LETTER O
|
||||
\ue013>\u0d13; # LETTER OO
|
||||
\ue014>\u0d14; # LETTER AU
|
||||
\ue015>\u0d15; # LETTER KA
|
||||
\ue016>\u0d16; # LETTER KHA
|
||||
\ue017>\u0d17; # LETTER GA
|
||||
\ue018>\u0d18; # LETTER GHA
|
||||
\ue019>\u0d19; # LETTER NGA
|
||||
\ue01a>\u0d1a; # LETTER CA
|
||||
\ue01b>\u0d1b; # LETTER CHA
|
||||
\ue01c>\u0d1c; # LETTER JA
|
||||
\ue01d>\u0d1d; # LETTER JHA
|
||||
\ue01e>\u0d1e; # LETTER NYA
|
||||
\ue01f>\u0d1f; # LETTER TTA
|
||||
\ue020>\u0d20; # LETTER TTHA
|
||||
\ue021>\u0d21; # LETTER DDA
|
||||
\ue022>\u0d22; # LETTER DDHA
|
||||
\ue023>\u0d23; # LETTER NNA
|
||||
\ue024>\u0d24; # LETTER TA
|
||||
\ue025>\u0d25; # LETTER THA
|
||||
\ue026>\u0d26; # LETTER DA
|
||||
\ue027>\u0d27; # LETTER DHA
|
||||
\ue028>\u0d28; # LETTER NA
|
||||
\ue029>\u0d28; # REMAP (indicExceptions.txt): \u0d29>\u0d28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0d2a; # LETTER PA
|
||||
\ue02b>\u0d2b; # LETTER PHA
|
||||
\ue02c>\u0d2c; # LETTER BA
|
||||
\ue02d>\u0d2d; # LETTER BHA
|
||||
\ue02e>\u0d2e; # LETTER MA
|
||||
\ue02f>\u0d2f; # LETTER YA
|
||||
\ue030\ue03c>\u0d31;
|
||||
\ue030>\u0d30; # LETTER RA
|
||||
\ue031>\u0d31; # LETTER RRA
|
||||
\ue032>\u0d32; # LETTER LA
|
||||
\ue033\ue03c>\u0d34;
|
||||
\ue033>\u0d33; # LETTER LLA
|
||||
\ue034>\u0d34; # LETTER LLLA
|
||||
\ue035>\u0d35; # LETTER VA
|
||||
\ue036>\u0d36; # LETTER SHA
|
||||
\ue037>\u0d37; # LETTER SSA
|
||||
\ue038>\u0d38; # LETTER SA
|
||||
\ue039>\u0d39; # LETTER HA
|
||||
|
||||
\ue03c>; # FALLBACK BLOW AWAY NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
\ue03e>\u0d3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0d3f; # VOWEL SIGN I
|
||||
\ue040>\u0d40; # VOWEL SIGN II
|
||||
\ue041>\u0d41; # VOWEL SIGN U
|
||||
\ue042>\u0d42; # VOWEL SIGN UU
|
||||
\ue043>\u0d43; # VOWEL SIGN VOCALIC R
|
||||
\ue044>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0d3e; # REMAP (indicExceptions.txt): \u0d45>\u0d3e = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
\ue046>\u0d46; # VOWEL SIGN E
|
||||
\ue047>\u0d47; # VOWEL SIGN EE
|
||||
\ue048>\u0d48; # VOWEL SIGN AI
|
||||
\ue049>\u0d4b; # REMAP (indicExceptions.txt): \u0d49>\u0d4b = VOWEL SIGN CANDRA O>VOWEL SIGN OO
|
||||
\ue04a>\u0d4a; # VOWEL SIGN O
|
||||
\ue04b>\u0d4b; # VOWEL SIGN OO
|
||||
\ue04c>\u0d4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0d4d; # SIGN VIRAMA
|
||||
\ue050>\u0d13\u0d02; # UNMAPPED InterIndic-Malayalam: OM
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # FALLBACK BLOW AWAY LENGTH MARK
|
||||
\ue056>\u0d48; # REMAP (indicExceptions.txt): \u0d56>\u0d48 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0d57; # AU LENGTH MARK
|
||||
\ue058>\u0d15; # FALLBACK
|
||||
\ue059>\u0d16; # REMAP (indicExceptions.txt): \u0d59>\u0d16 = LETTER KHHA>LETTER KHA
|
||||
\ue05a>\u0d17; # REMAP (indicExceptions.txt): \u0d5a>\u0d17 = LETTER GHHA>LETTER GA
|
||||
\ue05b>\u0d1c; # REMAP (indicExceptions.txt): \u0d5b>\u0d1c = LETTER ZA>LETTER JA
|
||||
\ue05d>\u0d22; # REMAP (indicExceptions.txt): \u0d5d>\u0d22 = LETTER RHA>LETTER DDHA
|
||||
\ue05c>\u0d21; # FALLBACK
|
||||
\ue05e>\u0d2b; # REMAP (indicExceptions.txt): \u0d5e>\u0d2b = LETTER FA>LETTER PHA
|
||||
\ue05f>\u0d2f; # REMAP (indicExceptions.txt): \u0d5f>\u0d2f = LETTER YYA>LETTER YA
|
||||
\ue060>\u0d60; # LETTER VOCALIC RR
|
||||
\ue061>\u0d61; # LETTER VOCALIC LL
|
||||
\ue062>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC L
|
||||
\ue063>; # FALLBACK BLOW AWAY VOWEL SIGN VOCALIC LL
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
\ue066>\u0d66; # DIGIT ZERO
|
||||
\ue067>\u0d67; # DIGIT ONE
|
||||
\ue068>\u0d68; # DIGIT TWO
|
||||
\ue069>\u0d69; # DIGIT THREE
|
||||
\ue06a>\u0d6a; # DIGIT FOUR
|
||||
\ue06b>\u0d6b; # DIGIT FIVE
|
||||
\ue06c>\u0d6c; # DIGIT SIX
|
||||
\ue06d>\u0d6d; # DIGIT SEVEN
|
||||
\ue06e>\u0d6e; # DIGIT EIGHT
|
||||
\ue06f>\u0d6f; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0d30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0d30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0d35; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0d66; # FALLBACK FOR TAMIL
|
||||
1 > \u0d67;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,137 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Oriya
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0b01; # SIGN CANDRABINDU
|
||||
\ue002>\u0b02; # SIGN ANUSVARA
|
||||
\ue003>\u0b03; # SIGN VISARGA
|
||||
\uE004>\u0b05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0b05; # LETTER A
|
||||
\ue006>\u0b06; # LETTER AA
|
||||
\ue007>\u0b07; # LETTER I
|
||||
\ue008>\u0b08; # LETTER II
|
||||
\ue009>\u0b09; # LETTER U
|
||||
\ue00a>\u0b0a; # LETTER UU
|
||||
\ue00b>\u0b0b; # LETTER VOCALIC R
|
||||
\ue00c>\u0b0c; # LETTER VOCALIC L
|
||||
\ue00d>\u0b0f; # FALLBACK
|
||||
\ue00e>\u0b0f; # FALLBACK
|
||||
\ue00f>\u0b0f; # LETTER E
|
||||
\ue010>\u0b10; # LETTER AI
|
||||
\ue011>\u0b13; # FALLBACK
|
||||
\ue012>\u0b13; # FALLBACK
|
||||
\ue013>\u0b13; # FALLBACK LETTER OO (\u0b13 = LETTER O)
|
||||
\ue014>\u0b14; # LETTER AU
|
||||
\ue015>\u0b15; # LETTER KA
|
||||
\ue016>\u0b16; # LETTER KHA
|
||||
\ue017>\u0b17; # LETTER GA
|
||||
\ue018>\u0b18; # LETTER GHA
|
||||
\ue019>\u0b19; # LETTER NGA
|
||||
\ue01a>\u0b1a; # LETTER CA
|
||||
\ue01b>\u0b1b; # LETTER CHA
|
||||
\ue01c>\u0b1c; # LETTER JA
|
||||
\ue01d>\u0b1d; # LETTER JHA
|
||||
\ue01e>\u0b1e; # LETTER NYA
|
||||
\ue01f>\u0b1f; # LETTER TTA
|
||||
\ue020>\u0b20; # LETTER TTHA
|
||||
\ue021>\u0b21; # LETTER DDA
|
||||
\ue022>\u0b22; # LETTER DDHA
|
||||
\ue023>\u0b23; # LETTER NNA
|
||||
\ue024>\u0b24; # LETTER TA
|
||||
\ue025>\u0b25; # LETTER THA
|
||||
\ue026>\u0b26; # LETTER DA
|
||||
\ue027>\u0b27; # LETTER DHA
|
||||
\ue028>\u0b28; # LETTER NA
|
||||
\ue029>\u0b28\u0b3c; # FALLBACK \u0b29>\u0b28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0b2a; # LETTER PA
|
||||
\ue02b>\u0b2b; # LETTER PHA
|
||||
\ue02c>\u0b2c; # LETTER BA
|
||||
\ue02d>\u0b2d; # LETTER BHA
|
||||
\ue02e>\u0b2e; # LETTER MA
|
||||
\ue02f>\u0b2f; # LETTER YA
|
||||
\ue030>\u0b30; # LETTER RA
|
||||
\ue031>\u0b5c; # LETTER RRA
|
||||
\ue032>\u0b32; # LETTER LA
|
||||
\ue033>\u0b33; # LETTER LLA
|
||||
\ue034>\u0b33\u0b3c; # FALLBACK LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0b35; # LETTER VA
|
||||
\ue036>\u0b36; # LETTER SHA
|
||||
\ue037>\u0b37; # LETTER SSA
|
||||
\ue038>\u0b38; # LETTER SA
|
||||
\ue039>\u0b39; # LETTER HA
|
||||
\ue03c>\u0b3c; # SIGN NUKTA
|
||||
\ue03d>\u0b3d; # SIGN AVAGRAHA
|
||||
\ue03e>\u0b3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0b3f; # VOWEL SIGN I
|
||||
\ue040>\u0b40; # VOWEL SIGN II
|
||||
\ue041>\u0b41; # VOWEL SIGN U
|
||||
\ue042>\u0b42; # VOWEL SIGN UU
|
||||
\ue043>\u0b43; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0b43\u0b3c; # FALLBACK \u0b44>\u0b43\u0b3c = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
|
||||
\ue045>\u0b47; # FALLBACK
|
||||
\ue046>\u0b47; # FALLBACK
|
||||
\ue047>\u0b47; # VOWEL SIGN E
|
||||
\ue048>\u0b48; # VOWEL SIGN AI
|
||||
\ue049>\u0b4b; # FALLBACK
|
||||
\ue04a>\u0b4b; # FALLBACK
|
||||
\ue04b>\u0b4b; # VOWEL SIGN E
|
||||
\ue04c>\u0b4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0b4d; # SIGN VIRAMA
|
||||
\ue050>\u0b13\u0b01; # FALLBACK \u0b50>\u0b13\u0b01 = OM>LETTER O.SIGN CANDRABINDU
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # UNMAPPED InterIndic-Oriya: LENGTH MARK
|
||||
\ue056>\u0b56; # AI LENGTH MARK
|
||||
\ue057>\u0b57; # AU LENGTH MARK
|
||||
\ue059>\u0b16\u0b3c; # FALLBACK \u0b59>\u0b16\u0b3c = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
\ue058>\u0b15\u0b3c; # FALLBACK
|
||||
\ue05a>\u0b17\u0b3c; # FALLBACK \u0b5a>\u0b17\u0b3c = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
\ue05b>\u0b1c\u0b3c; # FALLBACK \u0b5b>\u0b1c\u0b3c = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
\ue05c>\u0b21\u0b3c; # FALLBACK
|
||||
\ue05d>\u0b5d; # LETTER RHA
|
||||
\ue05e>\u0b2b\u0b3c; # FALLBACK \u0b5e>\u0b2b\u0b3c = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
\ue05f>\u0b5f; # LETTER YYA
|
||||
\ue060>\u0b60; # LETTER VOCALIC RR
|
||||
\ue061>\u0b61; # LETTER VOCALIC LL
|
||||
\ue062>\u0b56\u0b3c; # FALLBACK \u0b62>\u0b56\u0b3c = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
|
||||
\ue063>\u0b57\u0b3c; # FALLBACK \u0b63>\u0b57\u0b3c = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
|
||||
\uE064>\u0964; # DANDA
|
||||
\uE065>\u0965; # DOUBLE DANDA
|
||||
\ue066>\u0b66; # DIGIT ZERO
|
||||
\ue067>\u0b67; # DIGIT ONE
|
||||
\ue068>\u0b68; # DIGIT TWO
|
||||
\ue069>\u0b69; # DIGIT THREE
|
||||
\ue06a>\u0b6a; # DIGIT FOUR
|
||||
\ue06b>\u0b6b; # DIGIT FIVE
|
||||
\ue06c>\u0b6c; # DIGIT SIX
|
||||
\ue06d>\u0b6d; # DIGIT SEVEN
|
||||
\ue06e>\u0b6e; # DIGIT EIGHT
|
||||
\ue06f>\u0b6f; # DIGIT NINE
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0b30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0b30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>\u0B70; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0B71; # LETTER WA
|
||||
0 > \u0b66; # FALLBACK FOR TAMIL
|
||||
1 > \u0b67;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,151 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Tamil
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0b82; # FALLBACK SIGN CANDRABINDU
|
||||
\ue002>\u0b82; # SIGN ANUSVARA
|
||||
\ue003>\u0b83; # SIGN VISARGA
|
||||
\uE004>\u0b85; # FALLBACK TO LETTER A
|
||||
\ue005>\u0b85; # LETTER A
|
||||
\ue006>\u0b86; # LETTER AA
|
||||
\ue007>\u0b87; # LETTER I
|
||||
\ue008>\u0b88; # LETTER II
|
||||
\ue009>\u0b89; # LETTER U
|
||||
\ue00a>\u0b8a; # LETTER UU
|
||||
\ue00b>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0b8b>\u0bb0\u0bbf = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
\ue00c>\u0bb2; # FALLBACK LETTER LA
|
||||
\ue00d>\u0b8f; # FALLBACK
|
||||
\ue00e>\u0b8e; # LETTER E
|
||||
\ue00f>\u0b8f; # LETTER EE
|
||||
\ue010>\u0b90; # LETTER AI
|
||||
\ue011>\u0b92; # FALLBACK
|
||||
\ue012>\u0b92; # LETTER O
|
||||
\ue013>\u0b93; # LETTER OO
|
||||
\ue014>\u0b94; # LETTER AU
|
||||
\ue015>\u0b95; # LETTER KA
|
||||
\ue016>\u0b95; # REMAP (indicExceptions.txt): \u0b96>\u0b95 = LETTER KHA>LETTER KA
|
||||
\ue017>\u0b95; # REMAP (indicExceptions.txt): \u0b97>\u0b95 = LETTER GA>LETTER KA
|
||||
\ue018>\u0b95; # REMAP (indicExceptions.txt): \u0b98>\u0b95 = LETTER GHA>LETTER KA
|
||||
\ue019>\u0b99; # LETTER NGA
|
||||
\ue01a>\u0b9a; # LETTER CA
|
||||
\ue01b>\u0b9a; # REMAP (indicExceptions.txt): \u0b9b>\u0b9a = LETTER CHA>LETTER CA
|
||||
\ue01c>\u0b9c; # LETTER JA
|
||||
\ue01d>\u0b9a; # REMAP (indicExceptions.txt): \u0b9d>\u0b9a = LETTER JHA>LETTER CA
|
||||
\ue01e>\u0b9e; # LETTER NYA
|
||||
\ue01f>\u0b9f; # LETTER TTA
|
||||
\ue020>\u0b9f; # REMAP (indicExceptions.txt): \u0ba0>\u0b9f = LETTER TTHA>LETTER TTA
|
||||
\ue021>\u0b9f; # REMAP (indicExceptions.txt): \u0ba1>\u0b9f = LETTER DDA>LETTER TTA
|
||||
\ue022>\u0b9f; # REMAP (indicExceptions.txt): \u0ba2>\u0b9f = LETTER DDHA>LETTER TTA
|
||||
\ue023>\u0ba3; # LETTER NNA
|
||||
\ue024>\u0ba4; # LETTER TA
|
||||
\ue025>\u0ba4; # REMAP (indicExceptions.txt): \u0ba5>\u0ba4 = LETTER THA>LETTER TA
|
||||
\ue026>\u0ba4; # REMAP (indicExceptions.txt): \u0ba6>\u0ba4 = LETTER DA>LETTER TA
|
||||
\ue027>\u0ba4; # REMAP (indicExceptions.txt): \u0ba7>\u0ba4 = LETTER DHA>LETTER TA
|
||||
\ue028\ue03c>\u0ba9;
|
||||
\ue028>\u0ba8; # LETTER NA
|
||||
\ue029>\u0ba9; # LETTER NNNA
|
||||
\ue02a>\u0baa; # LETTER PA
|
||||
\ue02b>\u0baa; # REMAP (indicExceptions.txt): \u0bab>\u0baa = LETTER PHA>LETTER PA
|
||||
\ue02c>\u0baa; # REMAP (indicExceptions.txt): \u0bac>\u0baa = LETTER BA>LETTER PA
|
||||
\ue02d>\u0baa; # REMAP (indicExceptions.txt): \u0bad>\u0baa = LETTER BHA>LETTER PA
|
||||
\ue02e>\u0bae; # LETTER MA
|
||||
\ue02f>\u0baf; # LETTER YA
|
||||
\ue030\ue03c>\u0bb1;
|
||||
\ue030>\u0bb0; # LETTER RA
|
||||
\ue031>\u0bb1; # LETTER RRA
|
||||
\ue032>\u0bb2; # LETTER LA
|
||||
\ue033\ue03c>\u0bb4;
|
||||
\ue033>\u0bb3; # LETTER LLA
|
||||
\ue034>\u0bb4; # LETTER LLLA
|
||||
\ue035>\u0bb5; # LETTER VA
|
||||
\ue036>\u0bb7; # REMAP (indicExceptions.txt): \u0bb6>\u0bb7 = LETTER SHA>LETTER SSA
|
||||
\ue037>\u0bb7; # LETTER SSA
|
||||
\ue038>\u0bb8; # LETTER SA
|
||||
\ue039>\u0bb9; # LETTER HA
|
||||
|
||||
\ue03c>; # FALLBACK BLOW AWAY NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
\ue03e>\u0bbe; # VOWEL SIGN AA
|
||||
\ue03f>\u0bbf; # VOWEL SIGN I
|
||||
\ue040>\u0bc0; # VOWEL SIGN II
|
||||
\ue041>\u0bc1; # VOWEL SIGN U
|
||||
\ue042>\u0bc2; # VOWEL SIGN UU
|
||||
\ue043>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc3>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
\ue044>\u0bcd\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0bc4>\u0bcd\u0bb0\u0bbf = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
\ue045>\u0bbe; # REMAP (indicExceptions.txt): \u0bc5>\u0bbe = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
\ue046>\u0bc6; # VOWEL SIGN E
|
||||
\ue047>\u0bc7; # VOWEL SIGN EE
|
||||
\ue048>\u0bc8; # VOWEL SIGN AI
|
||||
\ue049>\u0bbe; # REMAP (indicExceptions.txt): \u0bc9>\u0bbe = VOWEL SIGN CANDRA O>VOWEL SIGN AA
|
||||
\ue04a>\u0bca; # VOWEL SIGN O
|
||||
\ue04b>\u0bcb; # VOWEL SIGN OO
|
||||
\ue04c>\u0bcc; # VOWEL SIGN AU
|
||||
\ue04d>\u0bcd; # SIGN VIRAMA
|
||||
\ue050>\u0b93\u0bae\u0bcd; # REMAP (indicExceptions.txt): \u0bd0>\u0b93\u0bae\u0bcd = OM>LETTER OO.LETTER MA.SIGN VIRAMA
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>; # UNMAPPED InterIndic-Tamil: LENGTH MARK
|
||||
\ue056>\u0bc8; # REMAP (indicExceptions.txt): \u0bd6>\u0bc8 = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\ue057>\u0bd7; # AU LENGTH MARK
|
||||
\ue058>\u0b95; # FALLBACK
|
||||
\ue059>\u0b95; # REMAP (indicExceptions.txt): \u0bd9>\u0b95 = LETTER KHHA>LETTER KA
|
||||
\ue05a>\u0b95; # REMAP (indicExceptions.txt): \u0bda>\u0b95 = LETTER GHHA>LETTER KA
|
||||
\ue05b>\u0b9c; # REMAP (indicExceptions.txt): \u0bdb>\u0b9c = LETTER ZA>LETTER JA
|
||||
\ue05c>\u0ba4; # FALLBACK
|
||||
\ue05d>\u0b9f; # REMAP (indicExceptions.txt): \u0bdd>\u0b9f = LETTER RHA>LETTER TTA
|
||||
\ue05e>\u0baa; # REMAP (indicExceptions.txt): \u0bde>\u0baa = LETTER FA>LETTER PA
|
||||
\ue05f>\u0baf; # REMAP (indicExceptions.txt): \u0bdf>\u0baf = LETTER YYA>LETTER YA
|
||||
\ue060>\u0bb0\u0bbf; # REMAP (indicExceptions.txt): \u0be0>\u0bb0\u0bbf = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
\ue061>\u0bb3; # FALLBACK LETTER LLA
|
||||
\ue062>\u0bbf; # FALLBACK VOWEL SIGN VOCALIC L
|
||||
\ue063>\u0bc0; # FALLBACK VOWEL SIGN VOCALIC LL
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
|
||||
\ue066>\u0030; # FALLBACK DIGIT ZERO
|
||||
|
||||
\ue067\ue066\ue066\ue066>\u0bF2;
|
||||
\ue067\ue066\ue066>\u0bf1;
|
||||
\ue067\ue066>\u0bF0;
|
||||
|
||||
\ue067>\u0be7; # DIGIT ONE
|
||||
\ue068>\u0be8; # DIGIT TWO
|
||||
\ue069>\u0be9; # DIGIT THREE
|
||||
\ue06a>\u0bea; # DIGIT FOUR
|
||||
\ue06b>\u0beb; # DIGIT FIVE
|
||||
\ue06c>\u0bec; # DIGIT SIX
|
||||
\ue06d>\u0bed; # DIGIT SEVEN
|
||||
\ue06e>\u0bee; # DIGIT EIGHT
|
||||
\ue06f>\u0bef; # DIGIT NINE
|
||||
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0bc0; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0bc0; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0bb5; # FALLBACK FOR ORIYA LETTER WA
|
||||
|
||||
1000 >\u0BF2; # NUMBER ONE THOUSAND
|
||||
100 >\u0BF1; # NUMBER ONE HUNDRED
|
||||
10 >\u0BF0; # NUMBER TEN
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,141 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Telugu
|
||||
#:: NFD (NFC) ;
|
||||
\ue001>\u0c01; # SIGN CANDRABINDU
|
||||
\ue002>\u0c02; # SIGN ANUSVARA
|
||||
\ue003>\u0c03; # SIGN VISARGA
|
||||
\uE004>\u0c05; # FALLBACK TO LETTER A
|
||||
\ue005>\u0c05; # LETTER A
|
||||
\ue006>\u0c06; # LETTER AA
|
||||
\ue007>\u0c07; # LETTER I
|
||||
\ue008>\u0c08; # LETTER II
|
||||
\ue009>\u0c09; # LETTER U
|
||||
\ue00a>\u0c0a; # LETTER UU
|
||||
\ue00b>\u0c0b; # LETTER VOCALIC R
|
||||
\ue00c>\u0c0c; # LETTER VOCALIC L
|
||||
\ue00d>\u0c0E; # FALLBACK MAPPING
|
||||
\ue00e>\u0c0E; # LETTER E
|
||||
\ue00f>\u0c0f; # LETTER EE
|
||||
\ue010>\u0c10; # LETTER AI
|
||||
\ue011>\u0c12; # FALBACK MAPPING
|
||||
\ue012>\u0c12; # LETTER O
|
||||
\ue013>\u0c13; # LETTER OO
|
||||
\ue014>\u0c14; # LETTER AU
|
||||
\ue015>\u0c15; # LETTER KA
|
||||
\ue016>\u0c16; # LETTER KHA
|
||||
\ue017>\u0c17; # LETTER GA
|
||||
\ue018>\u0c18; # LETTER GHA
|
||||
\ue019>\u0c19; # LETTER NGA
|
||||
\ue01a>\u0c1a; # LETTER CA
|
||||
\ue01b>\u0c1b; # LETTER CHA
|
||||
\ue01c>\u0c1c; # LETTER JA
|
||||
\ue01d>\u0c1d; # LETTER JHA
|
||||
\ue01e>\u0c1e; # LETTER NYA
|
||||
\ue01f>\u0c1f; # LETTER TTA
|
||||
\ue020>\u0c20; # LETTER TTHA
|
||||
\ue021>\u0c21; # LETTER DDA
|
||||
\ue022>\u0c22; # LETTER DDHA
|
||||
\ue023>\u0c23; # LETTER NNA
|
||||
\ue024>\u0c24; # LETTER TA
|
||||
\ue025>\u0c25; # LETTER THA
|
||||
\ue026>\u0c26; # LETTER DA
|
||||
\ue027>\u0c27; # LETTER DHA
|
||||
\ue028>\u0c28; # LETTER NA
|
||||
\ue029>\u0c28; # REMAP (indicExceptions.txt): \u0c29>\u0c28 = LETTER NNNA>LETTER NA
|
||||
\ue02a>\u0c2a; # LETTER PA
|
||||
\ue02b>\u0c2b; # LETTER PHA
|
||||
\ue02c>\u0c2c; # LETTER BA
|
||||
\ue02d>\u0c2d; # LETTER BHA
|
||||
\ue02e>\u0c2e; # LETTER MA
|
||||
\ue02f>\u0c2f; # LETTER YA
|
||||
\ue030\ue03c>\u0c31;
|
||||
\ue030>\u0c30; # LETTER RA
|
||||
\ue031>\u0c31; # LETTER RRA
|
||||
\ue032>\u0c32; # LETTER LA
|
||||
\ue033>\u0c33; # LETTER LLA
|
||||
\ue034>\u0c33; # REMAP (indicExceptions.txt): \u0c34>\u0c33 = LETTER LLLA>LETTER LLA
|
||||
\ue035>\u0c35; # LETTER VA
|
||||
\ue036>\u0c36; # LETTER SHA
|
||||
\ue037>\u0c37; # LETTER SSA
|
||||
\ue038>\u0c38; # LETTER SA
|
||||
\ue039>\u0c39; # LETTER HA
|
||||
|
||||
\ue03c>; # FALLBACK BLOW AWAY NUKTA
|
||||
\ue03d>; # FALLBACK BLOW AWAY AVAGRAHA
|
||||
|
||||
\ue03e>\u0c3e; # VOWEL SIGN AA
|
||||
\ue03f>\u0c3f; # VOWEL SIGN I
|
||||
\ue040>\u0c40; # VOWEL SIGN II
|
||||
\ue041>\u0c41; # VOWEL SIGN U
|
||||
\ue042>\u0c42; # VOWEL SIGN UU
|
||||
\ue043>\u0c43; # VOWEL SIGN VOCALIC R
|
||||
\ue044>\u0c44; # VOWEL SIGN VOCALIC RR
|
||||
\ue045>\u0c46; # VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\ue046>\u0c46; # VOWEL SIGN E
|
||||
\ue047>\u0c47; # VOWEL SIGN EE
|
||||
\ue048>\u0c48; # VOWEL SIGN AI
|
||||
\ue049>\u0c4a; # REMAP (indicExceptions.txt): \u0c49>\u0c4a = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\ue04a>\u0c4a; # VOWEL SIGN O
|
||||
\ue04b>\u0c4b; # VOWEL SIGN OO
|
||||
\ue04c>\u0c4c; # VOWEL SIGN AU
|
||||
\ue04d>\u0c4d; # SIGN VIRAMA
|
||||
\ue050>\u0c13\u0c02; # REMAP (indicExceptions.txt): \u0c50>\u0c13\u0c02 = OM>LETTER OO.SIGN ANUSVARA
|
||||
\ue051>;
|
||||
\ue052>;
|
||||
\ue053>;
|
||||
\ue054>;
|
||||
\ue055>\u0c55; # LENGTH MARK
|
||||
\ue056>\u0c56; # AI LENGTH MARK
|
||||
\ue057>\u0c4c; # REMAP (indicExceptions.txt): \u0c57>\u0c4c = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\ue058>\u0c15; # REMAP
|
||||
\ue059>\u0c16; # REMAP (indicExceptions.txt): \u0c59>\u0c16 = LETTER KHHA>LETTER KHA
|
||||
\ue05a>\u0c17; # REMAP (indicExceptions.txt): \u0c5a>\u0c17 = LETTER GHHA>LETTER GA
|
||||
\ue05b>\u0c1c; # REMAP (indicExceptions.txt): \u0c5b>\u0c1c = LETTER ZA>LETTER JA
|
||||
\ue05c>\u0c22; # REMAP
|
||||
\ue05d>\u0c22; # REMAP (indicExceptions.txt): \u0c5d>\u0c22 = LETTER RHA>LETTER DDHA
|
||||
\ue05e>\u0c2b; # REMAP (indicExceptions.txt): \u0c5e>\u0c2b = LETTER FA>LETTER PHA
|
||||
\ue05f>\u0c2f; # REMAP (indicExceptions.txt): \u0c5f>\u0c2f = LETTER YYA>LETTER YA
|
||||
\ue060>\u0c60; # LETTER VOCALIC RR
|
||||
\ue061>\u0c61; # LETTER VOCALIC LL
|
||||
\ue062>\u0c3f; # REMAP (indicExceptions.txt): \u0c62>\u0c3f = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
\ue063>\u0c40; # REMAP (indicExceptions.txt): \u0c63>\u0c40 = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
\ue064>'.' ; # FALLBACK FOR DANDA
|
||||
\ue065>'.' ; # FALLBACK FOR DOUBLE DANDA
|
||||
\ue066>\u0c66; # DIGIT ZERO
|
||||
\ue067>\u0c67; # DIGIT ONE
|
||||
\ue068>\u0c68; # DIGIT TWO
|
||||
\ue069>\u0c69; # DIGIT THREE
|
||||
\ue06a>\u0c6a; # DIGIT FOUR
|
||||
\ue06b>\u0c6b; # DIGIT FIVE
|
||||
\ue06c>\u0c6c; # DIGIT SIX
|
||||
\ue06d>\u0c6d; # DIGIT SEVEN
|
||||
\ue06e>\u0c6e; # DIGIT EIGHT
|
||||
\ue06f>\u0c6f; # DIGIT NINE
|
||||
|
||||
\ue070>; # ABBREVIATION SIGN
|
||||
\ue071>\u0c30; # LETTER RA WITH MIDDLE DIAGONAL
|
||||
\ue072>\u0c30; # LETTER RA WITH LOWER DIAGONAL
|
||||
\ue073>; # RUPEE MARK
|
||||
\ue074>; # RUPEE SIGN
|
||||
\ue075>; # CURRENCY NUMERATOR ONE
|
||||
\ue076>; # CURRENCY NUMERATOR TWO
|
||||
\ue077>; # CURRENCY NUMERATOR THREE
|
||||
\ue078>; # CURRENCY NUMERATOR FOUR
|
||||
\ue079>; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
\ue07A>; # CURRENCY DENOMINATOR SIXTEEN
|
||||
\ue07B>; # ISSHAR
|
||||
\uE07C>; # TIPPI
|
||||
\uE07D>; # ADDAK
|
||||
\uE07E>; # IRI
|
||||
\uE07F>; # URA
|
||||
\uE080>; # EK ONKAR
|
||||
\uE081>\u0c35; # FALLBACK FOR ORIYA LETTER WA
|
||||
0 > \u0c66; # FALLBACK FOR TAMIL
|
||||
1 > \u0c67;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,92 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Kannada-InterIndic
|
||||
\u0CC6\u0CD5>\uE047; # VOWEL SIGN EE
|
||||
\u0CC6\u0CCD\u0CD6>\uE048\ue04d; # VOWEL SIGN AI
|
||||
\u0CC6\u0CD6>\uE048; # VOWEL SIGN AI
|
||||
\u0CC6\u0CC2\u0CD5>\uE04B; # VOWEL SIGN OO
|
||||
\u0CC6\u0CC2>\uE04A; # VOWEL SIGN O
|
||||
\u0CBF\u0CD5>\uE040; # VOWEL SIGN II
|
||||
|
||||
\u0C82>\uE002; # SIGN ANUSVARA
|
||||
\u0C83>\uE003; # SIGN VISARGA
|
||||
\u0C85>\uE005; # LETTER A
|
||||
\u0C86>\uE006; # LETTER AA
|
||||
\u0C87>\uE007; # LETTER I
|
||||
\u0C88>\uE008; # LETTER II
|
||||
\u0C89>\uE009; # LETTER U
|
||||
\u0C8A>\uE00A; # LETTER UU
|
||||
\u0C8B>\uE00B; # LETTER VOCALIC R
|
||||
\u0C8C>\uE00C; # LETTER VOCALIC L
|
||||
\u0C8E>\uE00E; # LETTER E
|
||||
\u0C8F>\uE00F; # LETTER EE
|
||||
\u0C90>\uE010; # LETTER AI
|
||||
\u0C92>\uE012; # LETTER O
|
||||
\u0C93>\uE013; # LETTER OO
|
||||
\u0C94>\uE014; # LETTER AU
|
||||
\u0C95>\uE015; # LETTER KA
|
||||
\u0C96>\uE016; # LETTER KHA
|
||||
\u0C97>\uE017; # LETTER GA
|
||||
\u0C98>\uE018; # LETTER GHA
|
||||
\u0C99>\uE019; # LETTER NGA
|
||||
\u0C9A>\uE01A; # LETTER CA
|
||||
\u0C9B>\uE01B; # LETTER CHA
|
||||
\u0C9C>\uE01C; # LETTER JA
|
||||
\u0C9D>\uE01D; # LETTER JHA
|
||||
\u0C9E>\uE01E; # LETTER NYA
|
||||
\u0C9F>\uE01F; # LETTER TTA
|
||||
\u0CA0>\uE020; # LETTER TTHA
|
||||
\u0CA1>\uE021; # LETTER DDA
|
||||
\u0CA2>\uE022; # LETTER DDHA
|
||||
\u0CA3>\uE023; # LETTER NNA
|
||||
\u0CA4>\uE024; # LETTER TA
|
||||
\u0CA5>\uE025; # LETTER THA
|
||||
\u0CA6>\uE026; # LETTER DA
|
||||
\u0CA7>\uE027; # LETTER DHA
|
||||
\u0CA8>\uE028; # LETTER NA
|
||||
\u0CAA>\uE02A; # LETTER PA
|
||||
\u0CAB>\uE02B; # LETTER PHA
|
||||
\u0CAC>\uE02C; # LETTER BA
|
||||
\u0CAD>\uE02D; # LETTER BHA
|
||||
\u0CAE>\uE02E; # LETTER MA
|
||||
\u0CAF>\uE02F; # LETTER YA
|
||||
\u0CB0>\uE030; # LETTER RA
|
||||
\u0CB1>\uE031; # LETTER RRA
|
||||
\u0CB2>\uE032; # LETTER LA
|
||||
\u0CB3>\uE033; # LETTER LLA
|
||||
\u0CB5>\uE035; # LETTER VA
|
||||
\u0CB6>\uE036; # LETTER SHA
|
||||
\u0CB7>\uE037; # LETTER SSA
|
||||
\u0CB8>\uE038; # LETTER SA
|
||||
\u0CB9>\uE039; # LETTER HA
|
||||
\u0CBC>\uE03C; # SIGN NUKTA
|
||||
\u0CBD>\uE03D; # AVAGRAHA
|
||||
\u0CBE>\uE03E; # VOWEL SIGN AA
|
||||
\u0CBF>\uE03F; # VOWEL SIGN I
|
||||
\u0CC1>\uE041; # VOWEL SIGN U
|
||||
\u0CC2>\uE042; # VOWEL SIGN UU
|
||||
\u0CC3>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0CC4>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u0CC6>\uE046; # VOWEL SIGN E
|
||||
\u0CCC>\uE04C; # VOWEL SIGN AU
|
||||
\u0CCD>\uE04D; # SIGN VIRAMA
|
||||
\u0CD5>\uE055; # LENGTH MARK
|
||||
\u0CD6>\uE056; # AI LENGTH MARK
|
||||
\u0CDE>\uE034; # LETTER LLLA
|
||||
\u0CE0>\uE060; # LETTER VOCALIC RR
|
||||
\u0CE1>\uE061; # LETTER VOCALIC LL
|
||||
\u0CE6>\uE066; # DIGIT ZERO
|
||||
\u0CE7>\uE067; # DIGIT ONE
|
||||
\u0CE8>\uE068; # DIGIT TWO
|
||||
\u0CE9>\uE069; # DIGIT THREE
|
||||
\u0CEA>\uE06A; # DIGIT FOUR
|
||||
\u0CEB>\uE06B; # DIGIT FIVE
|
||||
\u0CEC>\uE06C; # DIGIT SIX
|
||||
\u0CED>\uE06D; # DIGIT SEVEN
|
||||
\u0CEE>\uE06E; # DIGIT EIGHT
|
||||
\u0CEF>\uE06F; # DIGIT NINE
|
||||
|
||||
# eof
|
|
@ -1,383 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Latin-InterIndic
|
||||
#:: NFD;
|
||||
#\u0e00 reserved
|
||||
#consonants
|
||||
$chandrabindu=\ue001;
|
||||
$anusvara=\ue002;
|
||||
$visarga=\ue003;
|
||||
#\u0e004 reserved
|
||||
# w<vowel> represents the stand-alone form
|
||||
$wa=\ue005;
|
||||
$waa=\ue006;
|
||||
$wi=\ue007;
|
||||
$wii=\ue008;
|
||||
$wu=\ue009;
|
||||
$wuu=\ue00a;
|
||||
$wr=\ue00b;
|
||||
$wl=\ue00c;
|
||||
$wce=\ue00d; # LETTER CANDRA E
|
||||
$wse=\ue00e; # LETTER SHORT E
|
||||
$we=\ue00f; # \u090f LETTER E
|
||||
$wai=\ue010;
|
||||
$wco=\ue011; # LETTER CANDRA O
|
||||
$wso=\ue012; # LETTER SHORT O
|
||||
$wo=\ue013; # \u0913 LETTER O
|
||||
$wau=\ue014;
|
||||
$ka=\ue015;
|
||||
$kha=\ue016;
|
||||
$ga=\ue017;
|
||||
$gha=\ue018;
|
||||
$nga=\ue019;
|
||||
$ca=\ue01a;
|
||||
$cha=\ue01b;
|
||||
$ja=\ue01c;
|
||||
$jha=\ue01d;
|
||||
$nya=\ue01e;
|
||||
$tta=\ue01f;
|
||||
$ttha=\ue020;
|
||||
$dda=\ue021;
|
||||
$ddha=\ue022;
|
||||
$nna=\ue023;
|
||||
$ta=\ue024;
|
||||
$tha=\ue025;
|
||||
$da=\ue026;
|
||||
$dha=\ue027;
|
||||
$na=\ue028;
|
||||
$ena=\ue029; #compatibility
|
||||
$pa=\ue02a;
|
||||
$pha=\ue02b;
|
||||
$ba=\ue02c;
|
||||
$bha=\ue02d;
|
||||
$ma=\ue02e;
|
||||
$ya=\ue02f;
|
||||
$ra=\ue030;
|
||||
$rra=\ue031;
|
||||
$la=\ue032;
|
||||
$lla=\ue033;
|
||||
$ela=\ue034; #compatibility
|
||||
$va=\ue035;
|
||||
$vva=\ue081;
|
||||
$sha=\ue036;
|
||||
$ssa=\ue037;
|
||||
$sa=\ue038;
|
||||
$ha=\ue039;
|
||||
#\u093a Reserved
|
||||
#\u093b Reserved
|
||||
$nukta=\ue03c;
|
||||
$avagraha=\ue03d; # SIGN AVAGRAHA
|
||||
# <vowel> represents the dependent form
|
||||
$aa=\ue03e;
|
||||
$i=\ue03f;
|
||||
$ii=\ue040;
|
||||
$u=\ue041;
|
||||
$uu=\ue042;
|
||||
$rh=\ue043;
|
||||
$lh=\ue044;
|
||||
$ce=\ue045; #VOWEL SIGN CANDRA E
|
||||
$se=\ue046; #VOWEL SIGN SHORT E
|
||||
$e=\ue047;
|
||||
$ai=\ue048;
|
||||
$co=\ue049; # VOWEL SIGN CANDRA O
|
||||
$so=\ue04a; # VOWEL SIGN SHORT O
|
||||
$o=\ue04b; # \u094b
|
||||
$au=\ue04c;
|
||||
$virama=\ue04d;
|
||||
# \u094e Reserved
|
||||
# \u094f Reserved
|
||||
$om = \ue050; # OM
|
||||
# \u0951>; # UNMAPPED STRESS SIGN UDATTA
|
||||
# \u0952>; # UNMAPPED STRESS SIGN ANUDATTA
|
||||
# \u0953>; # UNMAPPED GRAVE ACCENT
|
||||
# \u0954>; # UNMAPPED ACUTE ACCENT
|
||||
$lm = \ue055;# Telugu Length Mark
|
||||
$ailm=\ue056;# AI Length Mark
|
||||
$aulm=\ue057;# AU Length Mark
|
||||
#urdu compatibity forms
|
||||
$uka=\ue058;
|
||||
$ukha=\ue059;
|
||||
$ugha=\ue05a;
|
||||
$ujha=\ue05b;
|
||||
$uddha=\ue05c;
|
||||
$udha=\ue05d;
|
||||
$ufa=\ue05e;
|
||||
$uya=\ue05f;
|
||||
$wrr=\ue060;
|
||||
$wll=\ue061;
|
||||
$rrh=\ue062;
|
||||
$llh=\ue063;
|
||||
$danda=\ue064;
|
||||
$doubleDanda=\ue065;
|
||||
$zero=\ue066; # DIGIT ZERO
|
||||
$one=\ue067; # DIGIT ONE
|
||||
$two=\ue068; # DIGIT TWO
|
||||
$three=\ue069; # DIGIT THREE
|
||||
$four=\ue06a; # DIGIT FOUR
|
||||
$five=\ue06b; # DIGIT FIVE
|
||||
$six=\ue06c; # DIGIT SIX
|
||||
$seven=\ue06d; # DIGIT SEVEN
|
||||
$eight=\ue06e; # DIGIT EIGHT
|
||||
$nine=\ue06f; # DIGIT NINE
|
||||
# For all other scripts
|
||||
$ecp0=\ue070;
|
||||
$ecp1=\ue071;
|
||||
$ecp2=\ue072;
|
||||
$ecp3=\ue073;
|
||||
$ecp4=\ue074;
|
||||
$ecp5=\ue075;
|
||||
$ecp6=\ue076;
|
||||
$ecp7=\ue077;
|
||||
$ecp8=\ue078;
|
||||
$ecp9=\ue079;
|
||||
$ecpA=\ue07a;
|
||||
$ecpB=\ue07b;
|
||||
$ecpC=\ue07c;
|
||||
$ecpD=\ue07d;
|
||||
$ecpE=\ue07e;
|
||||
$ecpF=\ue07f;
|
||||
# \u0970>; # UNMAPPED ABBREVIATION SIGN
|
||||
$depVowelAbove=[\ue03e-\ue040\ue045-\ue04c];
|
||||
$depVowelBelow=[\ue041-\ue044];
|
||||
$endThing=[$danda$doubleDanda];
|
||||
# $x was originally called '&'; $z was '%'
|
||||
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
|
||||
$z=[bcdfghjklmnpqrstvwxyz];
|
||||
$consonants=[[$ka-$ha]$z[\u0915-\u0939][\u0995-\u09b9][\u0a15-\u0a39][\u0a95-\u0ab9][\u0b15-\u0b39][\u0b95-\u0bb9][\u0c15-\u0c39][\u0c95-\u0cb9][\u0d15-\u0d39]];
|
||||
\u0315 > $avagraha;
|
||||
\u0303>$chandrabindu$anusvara;
|
||||
m\u0310>$chandrabindu;
|
||||
h\u0323>$visarga;
|
||||
x>$ka$virama$sa;
|
||||
# convert to independent forms at start of word or syllable:
|
||||
# dependent forms for roundtrip
|
||||
\u0314a\u0304>$aa;
|
||||
\u0314ai>$ai;
|
||||
\u0314au>$au;
|
||||
\u0314ii>$ii;
|
||||
\u0314i\u0304>$ii;
|
||||
\u0314i>$i;
|
||||
\u0314u\u0304>$uu;
|
||||
\u0314u>$u;
|
||||
\u0314r\u0325\u0304>$rrh;
|
||||
\u0314r\u0325>$rh;
|
||||
\u0314l\u0325\u0304>$llh;
|
||||
\u0314lh>$lh;
|
||||
\u0314l\u0325>$lh;
|
||||
\u0314e\u0304>$e;
|
||||
\u0314o\u0304>$o;
|
||||
\u0314a>;
|
||||
\u0314e\u0306>$ce;
|
||||
\u0314o\u0306>$co;
|
||||
\u0314e>$se;
|
||||
\u0314o>$so;
|
||||
|
||||
# preceeded by consonants
|
||||
$consonants{ a\u0304>$aa;
|
||||
$consonants{ ai>$ai;
|
||||
$consonants{ au>$au;
|
||||
$consonants{ ii>$ii;
|
||||
$consonants{ i\u0304>$ii;
|
||||
$consonants{ i>$i;
|
||||
$consonants{ u\u0304>$uu;
|
||||
$consonants{ u>$u;
|
||||
$consonants{ r\u0325\u0304>$rrh;
|
||||
$consonants{ r\u0325a>$rh;
|
||||
$consonants{ r\u0325>$rh;
|
||||
$consonants{ l\u0325\u0304>$llh;
|
||||
$consonants{ lh>$lh;
|
||||
$consonants{ l\u0325>$lh;
|
||||
$consonants{ e\u0304>$e;
|
||||
$consonants{ o\u0304>$o;
|
||||
$consonants{ e\u0306>$ce;
|
||||
$consonants{ o\u0306>$co;
|
||||
$consonants{ e>$se;
|
||||
$consonants{ o>$so;
|
||||
|
||||
# e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
||||
a\u0304>$waa;
|
||||
ai>$wai;
|
||||
au>$wau;
|
||||
i\u0304>$wii;
|
||||
i>$wi;
|
||||
u\u0304>$wuu;
|
||||
u>$wu;
|
||||
r\u0325\u0304>$wrr;
|
||||
r\u0325>$wr;
|
||||
l\u0325\u0304>$wll;
|
||||
lh>$wl;
|
||||
l\u0325>$wl;
|
||||
e\u0304>$we;
|
||||
o\u0304>$wo;
|
||||
a>$wa;
|
||||
e\u0306>$wce;
|
||||
o\u0306>$wco;
|
||||
e>$wse;
|
||||
''om>$om;
|
||||
o>$wso;
|
||||
|
||||
# rules for anusvara
|
||||
n}r\u0325 > $na|$virama;
|
||||
n}l\u0325 > $na|$virama;
|
||||
n}na > $na|$virama;
|
||||
n\u0307}[kg] > $anusvara;
|
||||
n\u0307}n\u0307 > $anusvara;
|
||||
n\u0304}[cj] > $anusvara;
|
||||
n\u0304}n\u0303 > $anusvara;
|
||||
n\u0323}[tdn]\u0323 > $anusvara;
|
||||
n}[tdn] > $anusvara;
|
||||
m}[pbm] > $anusvara;
|
||||
n}[ylvshr] > $anusvara;
|
||||
m\u0307 > $anusvara;
|
||||
|
||||
#urdu compatibility
|
||||
q>$uka|$virama;
|
||||
k\u0331h\u0331>$ukha |$virama;
|
||||
g\u0307> $ugha | $virama;
|
||||
z > $ujha |$virama;
|
||||
f > $ufa|$virama;
|
||||
|
||||
# dev
|
||||
y\u0307>$uya|$virama;
|
||||
l\u0331>$ela|$virama;
|
||||
n\u0331>$ena|$virama;
|
||||
n\u0307>$nga|$virama;
|
||||
n\u0303>$nya|$virama;
|
||||
n\u0323>$nna|$virama;
|
||||
t\u0323h>$ttha|$virama;
|
||||
t\u0323>$tta|$virama;
|
||||
r\u0323h>$udha|$virama;
|
||||
r\u0323>$uddha|$virama;
|
||||
d\u0323h>$ddha|$virama;
|
||||
d\u0323>$dda|$virama;
|
||||
kh>$kha|$virama;
|
||||
k>$ka|$virama;
|
||||
gh>$gha|$virama;
|
||||
g>$ga|$virama;
|
||||
ch>$cha|$virama;
|
||||
c>$ca|$virama;
|
||||
jh>$jha|$virama;
|
||||
j>$ja|$virama;
|
||||
ny>$nya|$virama;
|
||||
tth>$ttha|$virama;
|
||||
ddh>$ddha|$virama;
|
||||
th>$tha|$virama;
|
||||
t>$ta|$virama;
|
||||
dh>$dha|$virama;
|
||||
d>$da|$virama;
|
||||
n>$na|$virama;
|
||||
ph>$pha|$virama;
|
||||
p>$pa|$virama;
|
||||
bh>$bha|$virama;
|
||||
b>$ba|$virama;
|
||||
m>$ma|$virama;
|
||||
y>$ya|$virama;
|
||||
r\u0331>$rra|$virama;
|
||||
r>$ra|$virama;
|
||||
l\u0323>$lla|$virama;
|
||||
l>$la|$virama;
|
||||
v>$va|$virama;
|
||||
w\u0307>$vva|$virama;
|
||||
w>$va|$virama;
|
||||
sh>$sha|$virama;
|
||||
ss>$ssa|$virama;
|
||||
s\u0323>$ssa|$virama;
|
||||
s\u0301>$sha|$virama;
|
||||
s>$sa|$virama;
|
||||
h>$ha|$virama;
|
||||
'.'>$danda;
|
||||
$danda'.'>$doubleDanda;
|
||||
$depVowelAbove{'~'>$anusvara;
|
||||
$depVowelBelow{'~'>$chandrabindu;
|
||||
# convert to dependent forms after consonant with no vowel:
|
||||
# e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
||||
#$virama aa>$aa;
|
||||
$virama a\u0304>$aa;
|
||||
$virama ai>$ai;
|
||||
$virama au>$au;
|
||||
$virama ii>$ii;
|
||||
$virama i\u0304>$ii;
|
||||
$virama i>$i;
|
||||
#$virama uu>$uu;
|
||||
$virama u\u0304>$uu;
|
||||
$virama u>$u;
|
||||
#$virama rrh>$rrh;
|
||||
$virama r\u0325\u0304>$rrh;
|
||||
#$virama rh>$rh;
|
||||
$virama r\u0325a>$rh;
|
||||
$virama r\u0325>$rh;
|
||||
$virama l\u0325\u0304>$llh;
|
||||
$virama lh>$lh;
|
||||
$virama l\u0325>$lh;
|
||||
$virama e\u0304>$e;
|
||||
$virama o\u0304>$o;
|
||||
$virama a>;
|
||||
$virama e\u0306>$ce;
|
||||
$virama o\u0306>$co;
|
||||
$virama e>$se;
|
||||
$virama o>$so;
|
||||
|
||||
|
||||
# otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
||||
#$virama''aa>$waa;
|
||||
$virama''a\u0304>$waa;
|
||||
$virama''ai>$wai;
|
||||
$virama''au>$wau;
|
||||
#$virama''ii>$wii;
|
||||
$virama''i\u0304>$wii;
|
||||
$virama''i>$wi;
|
||||
#$virama''uu>$wuu;
|
||||
$virama''u\u0304>$wuu;
|
||||
$virama''u>$wu;
|
||||
#$virama''rrh>$wrr;
|
||||
$virama''r\u0325\u0304>$wrr;
|
||||
#$virama''rh>$wr;
|
||||
$virama''r\u0325>$wr;
|
||||
$virama''l\u0325\u0304>$wll;
|
||||
#$virama''lh>$wl;
|
||||
$virama''l\u0325>$wl;
|
||||
$virama''e\u0304>$we;
|
||||
$virama''o\u0304>$wo;
|
||||
$virama''a>$wa;
|
||||
$virama''e\u0306>$wce;
|
||||
$virama''o\u0306>$wco;
|
||||
$virama''e>$wse;
|
||||
$virama''o>$wso;
|
||||
# no virama
|
||||
''a\u0304>$waa;
|
||||
''ai>$wai;
|
||||
''au>$wau;
|
||||
''i\u0304>$wii;
|
||||
''i>$wi;
|
||||
''u\u0304>$wuu;
|
||||
''u>$wu;
|
||||
''r\u0325\u0304>$wrr;
|
||||
''r\u0325>$wr;
|
||||
''l\u0325\u0304>$wll;
|
||||
''l\u0325>$wl;
|
||||
''e\u0304>$we;
|
||||
''o\u0304>$wo;
|
||||
''a>$wa;
|
||||
''e\u0306>$wce;
|
||||
''o\u0306>$wco;
|
||||
''e>$wse;
|
||||
''o>$wso;
|
||||
|
||||
$virama } [$z] > $virama;
|
||||
$virama } ' ' > $virama ;
|
||||
$virama}$endThing>;
|
||||
0>$zero;
|
||||
1>$one;
|
||||
2>$two;
|
||||
3>$three;
|
||||
4>$four;
|
||||
5>$five;
|
||||
6>$six;
|
||||
7>$seven;
|
||||
8>$eight;
|
||||
9>$nine;
|
||||
''>;
|
||||
#:: NFC (NFD) ;
|
|
@ -1,522 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
|
||||
#- the INDEX file. This transliterator is, by itself, not
|
||||
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
|
||||
#- inverses thereof.
|
||||
|
||||
# Transliteration from Latin characters to Korean script is done in
|
||||
# two steps: Latin to Jamo, then Jamo to Hangul. The Jamo-Hangul
|
||||
# transliteration is done algorithmically following Unicode 3.0
|
||||
# section 3.11. This file implements the Latin to Jamo
|
||||
# transliteration using rules.
|
||||
|
||||
# Jamo occupy the block 1100-11FF. Within this block there are three
|
||||
# groups of characters: initial consonants or choseong (I), medial
|
||||
# vowels or jungseong (M), and trailing consonants or jongseong (F).
|
||||
# Standard Korean syllables are of the form I+M+F*.
|
||||
|
||||
# Section 3.11 describes the use of 'filler' jamo to convert
|
||||
# nonstandard syllables to standard form: the choseong filler 115F and
|
||||
# the junseong filler 1160. In this transliterator, we will not use
|
||||
# 115F or 1160.
|
||||
|
||||
# We will, however, insert two 'null' jamo to make foreign words
|
||||
# conform to Korean syllable structure. These are the null initial
|
||||
# consonant 110B (IEUNG) and the null vowel 1173 (EU). In Latin text,
|
||||
# we will use the separator in order to disambiguate strings,
|
||||
# e.g. "kan-ggan" (initial GG) vs. "kanggan" (final NG + initial G).
|
||||
|
||||
# We will not use all of the characters in the jamo block. We will
|
||||
# only use the 19 initials, 21 medials, and 27 finals possessing a
|
||||
# jamo short name as defined in section 4.4 of the Unicode book.
|
||||
|
||||
# Rules of thumb. These guidelines provide the basic framework
|
||||
# for the rules. They are phrased in terms of Latin-Jamo transliteration.
|
||||
# The Jamo-Latin rules derive from these, since the Jamo-Latin rules are
|
||||
# just context-free transliteration of jamo to corresponding short names,
|
||||
# with the addition of separators to maintain round-trip integrity
|
||||
# in the context of the Latin-Jamo rules.
|
||||
|
||||
# A sequence of vowels:
|
||||
# - Take the longest sequence you can. If there are too many, or you don't
|
||||
# have a starting consonant, introduce a 110B necessary.
|
||||
|
||||
# A sequence of consonants.
|
||||
# - First join the double consonants: G + G -> GG
|
||||
# - In the remaining list,
|
||||
# -- If there is no preceding vowel, take the first consonant, and insert EU
|
||||
# after it. Continue with the rest of the consonants.
|
||||
# -- If there is one consonant, attach to the following vowel
|
||||
# -- If there are two consonants and a following vowel, attach one to the
|
||||
# preceeding vowel, and one to the following vowel.
|
||||
# -- If there are more than two consonants, join the first two together if you
|
||||
# can: L + G => LG
|
||||
# -- If you still end up with more than 2 consonants, insert EU after the
|
||||
# first one, and continue with the rest of the consonants.
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Variables
|
||||
|
||||
# Some latin consonants or consonant pairs only occur as initials, and
|
||||
# some only as finals, but some occur as both. This makes some jamo
|
||||
# consonants ambiguous when transliterated into latin.
|
||||
# Initial only: IEUNG BB DD JJ R
|
||||
# Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
|
||||
# Initial and Final: B C D G GG H J K M N P S SS T
|
||||
|
||||
$Gi = \u1100;
|
||||
$GGi = \u1101;
|
||||
$Ni = \u1102;
|
||||
$Di = \u1103;
|
||||
$DD = \u1104;
|
||||
$R = \u1105;
|
||||
$Mi = \u1106;
|
||||
$Bi = \u1107;
|
||||
$BB = \u1108;
|
||||
$Si = \u1109;
|
||||
$SSi = \u110A;
|
||||
$IEUNG = \u110B; # null initial, inserted during Latin-Jamo
|
||||
$Ji = \u110C;
|
||||
$JJ = \u110D;
|
||||
$Ci = \u110E;
|
||||
$Ki = \u110F;
|
||||
$Ti = \u1110;
|
||||
$Pi = \u1111;
|
||||
$Hi = \u1112;
|
||||
|
||||
$A = \u1161;
|
||||
$AE = \u1162;
|
||||
$YA = \u1163;
|
||||
$YAE = \u1164;
|
||||
$EO = \u1165;
|
||||
$E = \u1166;
|
||||
$YEO = \u1167;
|
||||
$YE = \u1168;
|
||||
$O = \u1169;
|
||||
$WA = \u116A;
|
||||
$WAE = \u116B;
|
||||
$OE = \u116C;
|
||||
$YO = \u116D;
|
||||
$U = \u116E;
|
||||
$WEO = \u116F;
|
||||
$WE = \u1170;
|
||||
$WI = \u1171;
|
||||
$YU = \u1172;
|
||||
$EU = \u1173; # null medial, inserted during Latin-Jamo
|
||||
$YI = \u1174;
|
||||
$I = \u1175;
|
||||
|
||||
$Gf = \u11A8;
|
||||
$GGf = \u11A9;
|
||||
$GS = \u11AA;
|
||||
$Nf = \u11AB;
|
||||
$NJ = \u11AC;
|
||||
$NH = \u11AD;
|
||||
$Df = \u11AE;
|
||||
$L = \u11AF;
|
||||
$LG = \u11B0;
|
||||
$LM = \u11B1;
|
||||
$LB = \u11B2;
|
||||
$LS = \u11B3;
|
||||
$LT = \u11B4;
|
||||
$LP = \u11B5;
|
||||
$LH = \u11B6;
|
||||
$Mf = \u11B7;
|
||||
$Bf = \u11B8;
|
||||
$BS = \u11B9;
|
||||
$Sf = \u11BA;
|
||||
$SSf = \u11BB;
|
||||
$NG = \u11BC;
|
||||
$Jf = \u11BD;
|
||||
$Cf = \u11BE;
|
||||
$Kf = \u11BF;
|
||||
$Tf = \u11C0;
|
||||
$Pf = \u11C1;
|
||||
$Hf = \u11C2;
|
||||
|
||||
$jamoInitial = [\u1100-\u1112];
|
||||
|
||||
$jamoMedial = [\u1161-\u1175];
|
||||
|
||||
$latinInitial = [bcdghjkmnprst];
|
||||
|
||||
# Any character in the latin transliteration of a medial
|
||||
$latinMedial = [aeiouwy];
|
||||
|
||||
# The last character of the latin transliteration of a medial
|
||||
$latinMedialEnd = [aeiou];
|
||||
|
||||
# Disambiguation separator
|
||||
$sep = \';
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Jamo-Latin
|
||||
|
||||
# Jamo to latin is relatively simple, since it is the latin that is
|
||||
# ambiguous. Most rules are straightforward, and we encode them below
|
||||
# as simple add-on back rule, e.g.:
|
||||
|
||||
# $jamoMedial {bs} > $BS;
|
||||
|
||||
# becomes
|
||||
|
||||
# $jamoMedial {bs} <> $BS;
|
||||
|
||||
# Furthermore, we don't care about the ordering for Jamo-Latin because
|
||||
# we are going from single characters, so we can very easily piggyback
|
||||
# on the Latin-Jamo.
|
||||
|
||||
# The main issue with Jamo-Latin is when to insert separators.
|
||||
# Separators are inserted to obtain correct round trip behavior. For
|
||||
# example, the sequence Ki A Gf Gi E, if transliterated to "kagge",
|
||||
# would then round trip to Ki A GGi E. To prevent this, we insert a
|
||||
# separator: "kag-ge". IMPORTANT: The need for separators depends
|
||||
# very specifically on the behavior of the Latin-Jamo rules. A change
|
||||
# in the Latin-Jamo behavior can completely change the way the
|
||||
# separator insertion must be done.
|
||||
|
||||
# First try to preserve actual separators in the jamo text by doubling
|
||||
# them. This fixes problems like:
|
||||
# (Di)(A)(Ji)(U)(NG)-(IEUNG)(YEO)(Nf)(Gi)(YEO)(L) => dajung-yeongyeol
|
||||
# => (Di)(A)(Ji)(U)(NG)(IEUNG)(YEO)(Nf)(Gi)(YEO)(L). This is optional
|
||||
# -- if we don't care about losing separators in the jamo, we can delete
|
||||
# this rule.
|
||||
|
||||
$sep $sep <> $sep;
|
||||
|
||||
# Triple consonants. For three consonants "axxx" we insert a
|
||||
# separator between the first and second "x" if XXf, Xf, and Xi all
|
||||
# exist, and we have A Xf XXi. This prevents the reverse
|
||||
# transliteration to A XXf Xi.
|
||||
|
||||
$sep < $latinMedialEnd g {} $GGi;
|
||||
$sep < $latinMedialEnd s {} $SSi;
|
||||
|
||||
# For vowels the rule is similar. If there is a vowel "ae" such that
|
||||
# "a" by itself and "e" by itself are vowels, then we want to map A E
|
||||
# to "a-e" so as not to round trip to AE. However, in the text Ki EO
|
||||
# IEUNG E we don't need to map to "keo-e". "keoe" suffices. For
|
||||
# vowels of the form "aei", both "ae" + "i" and "a" + "ei" must be
|
||||
# tested. NOTE: These rules used to have a left context of
|
||||
# $latinInitial instead of [^$latinMedial]. The problem with this is
|
||||
# sequences where an initial IEUNG is transliterated away:
|
||||
# (IEUNG)(A)(IEUNG)(EO) => aeo => (IEUNG)(AE)(IEUNG)(O)
|
||||
|
||||
$sep < [^$latinMedial] [y w] e {} [$O $OE];
|
||||
$sep < [^$latinMedial] e {} [$O $OE $U];
|
||||
$sep < [^$latinMedial] [o a] {} [$E $EO $EU];
|
||||
$sep < [^$latinMedial] [w y] a {} [$E $EO $EU];
|
||||
|
||||
# Similar to the above, but with an intervening $IEUNG.
|
||||
|
||||
$sep < [^$latinMedial] [y w] e {} $IEUNG [$O $OE];
|
||||
$sep < [^$latinMedial] e {} $IEUNG [$O $OE $U];
|
||||
$sep < [^$latinMedial] [o a] {} $IEUNG [$E $EO $EU];
|
||||
$sep < [^$latinMedial] [w y] a {} $IEUNG [$E $EO $EU];
|
||||
|
||||
# Single finals followed by IEUNG. The jamo sequence A Xf IEUNG E,
|
||||
# where Xi also exists, must be transliterated as "ax-e" to prevent
|
||||
# the round trip conversion to A Xi E.
|
||||
|
||||
$sep < $latinMedialEnd b {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd c {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd d {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd h {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd j {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd k {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd m {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd p {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd t {} $IEUNG $jamoMedial;
|
||||
|
||||
# Double finals followed by IEUNG. Similar to the single finals
|
||||
# followed by IEUNG. Any latin consonant pair X Y, between medials,
|
||||
# that we would split by Latin-Jamo, we must handle when it occurs as
|
||||
# part of A XYf IEUNG E, to prevent round trip conversion to A Xf Yi
|
||||
# E.
|
||||
|
||||
$sep < $latinMedialEnd b s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd g g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd g s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l b {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l h {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l m {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l p {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l s {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd l t {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n g {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n h {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd n j {} $IEUNG $jamoMedial;
|
||||
$sep < $latinMedialEnd s s {} $IEUNG $jamoMedial;
|
||||
|
||||
# Split doubles. Text of the form A Xi Xf E, where XXi also occurs,
|
||||
# we transliterate as "ax-xe" to prevent round trip transliteration as
|
||||
# A XXi E.
|
||||
|
||||
$sep < $latinMedialEnd b {} $Bi $jamoMedial;
|
||||
$sep < $latinMedialEnd d {} $Di $jamoMedial;
|
||||
$sep < $latinMedialEnd j {} $Ji $jamoMedial;
|
||||
$sep < $latinMedialEnd g {} $Gi $jamoMedial;
|
||||
$sep < $latinMedialEnd s {} $Si $jamoMedial;
|
||||
|
||||
# XYY. This corresponds to the XYY rule in Latin-Jamo. By default
|
||||
# Latin-Jamo maps "xyy" to Xf YYi, to keep YY together. As a result,
|
||||
# "xyy" forms that correspond to XYf Yi must be transliterated as
|
||||
# "xy-y".
|
||||
|
||||
$sep < $latinMedialEnd b s {} [$Si $SSi];
|
||||
$sep < $latinMedialEnd g s {} [$Si $SSi];
|
||||
$sep < $latinMedialEnd l b {} [$Bi $BB];
|
||||
$sep < $latinMedialEnd l g {} [$Gi $GGi];
|
||||
$sep < $latinMedialEnd l s {} [$Si $SSi];
|
||||
$sep < $latinMedialEnd n g {} [$Gi $GGi];
|
||||
$sep < $latinMedialEnd n j {} [$Ji $JJ];
|
||||
|
||||
# Deletion of IEUNG is handled below.
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Latin-Jamo
|
||||
|
||||
# [Basic, context-free Jamo-Latin rules are embedded here too. See
|
||||
# above.]
|
||||
|
||||
# Split digraphs: Text of the form 'axye', where 'xy' is a final
|
||||
# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
|
||||
# 'e' are medials, we want to transliterate this as A Xf Yi E rather
|
||||
# than A XYf IEUNG E. We do NOT include text of the form "axxe",
|
||||
# since that is handled differently below. These rules are generated
|
||||
# programmatically from the jamo data.
|
||||
|
||||
$jamoMedial {b s} $latinMedial > $Bf $Si;
|
||||
$jamoMedial {g s} $latinMedial > $Gf $Si;
|
||||
$jamoMedial {l b} $latinMedial > $L $Bi;
|
||||
$jamoMedial {l g} $latinMedial > $L $Gi;
|
||||
$jamoMedial {l h} $latinMedial > $L $Hi;
|
||||
$jamoMedial {l m} $latinMedial > $L $Mi;
|
||||
$jamoMedial {l p} $latinMedial > $L $Pi;
|
||||
$jamoMedial {l s} $latinMedial > $L $Si;
|
||||
$jamoMedial {l t} $latinMedial > $L $Ti;
|
||||
$jamoMedial {n g} $latinMedial > $Nf $Gi;
|
||||
$jamoMedial {n h} $latinMedial > $Nf $Hi;
|
||||
$jamoMedial {n j} $latinMedial > $Nf $Ji;
|
||||
|
||||
# Single consonants are initials: Text of the form 'axe', where 'x'
|
||||
# can be an initial or a final, and 'a' and 'e' are medials, we want
|
||||
# to transliterate as A Xi E rather than A Xf IEUNG E.
|
||||
|
||||
$jamoMedial {b} $latinMedial > $Bi;
|
||||
$jamoMedial {c} $latinMedial > $Ci;
|
||||
$jamoMedial {d} $latinMedial > $Di;
|
||||
$jamoMedial {g} $latinMedial > $Gi;
|
||||
$jamoMedial {h} $latinMedial > $Hi;
|
||||
$jamoMedial {j} $latinMedial > $Ji;
|
||||
$jamoMedial {k} $latinMedial > $Ki;
|
||||
$jamoMedial {m} $latinMedial > $Mi;
|
||||
$jamoMedial {n} $latinMedial > $Ni;
|
||||
$jamoMedial {p} $latinMedial > $Pi;
|
||||
$jamoMedial {s} $latinMedial > $Si;
|
||||
$jamoMedial {t} $latinMedial > $Ti;
|
||||
|
||||
# Doubled initials. The sequence "axxe", where XX exists as an initial
|
||||
# (XXi), and also Xi and Xf exist (true of all digraphs XX), we want
|
||||
# to transliterate as A XXi E, rather than split to A Xf Xi E.
|
||||
|
||||
$jamoMedial {b b} $latinMedial > $BB;
|
||||
$jamoMedial {d d} $latinMedial > $DD;
|
||||
$jamoMedial {j j} $latinMedial > $JJ;
|
||||
$jamoMedial {g g} $latinMedial > $GGi;
|
||||
$jamoMedial {s s} $latinMedial > $SSi;
|
||||
|
||||
# XYY. Because doubled consonants bind more strongly than XY
|
||||
# consonants, we must handle the sequence "axyy" specially. Here XYf
|
||||
# and YYi must exist. In these cases, we map to Xf YYi rather than
|
||||
# XYf.
|
||||
|
||||
$jamoMedial {b} s s > $Bf;
|
||||
$jamoMedial {g} s s > $Gf;
|
||||
$jamoMedial {l} b b > $L;
|
||||
$jamoMedial {l} g g > $L;
|
||||
$jamoMedial {l} s s > $L;
|
||||
$jamoMedial {n} g g > $Nf;
|
||||
$jamoMedial {n} j j > $Nf;
|
||||
|
||||
# Finals: Attach consonant with preceding medial to preceding medial.
|
||||
# Do this BEFORE mapping consonants to initials. Longer keys must
|
||||
# precede shorter keys that they start with, e.g., the rule for 'bs'
|
||||
# must precede 'b'.
|
||||
|
||||
# [BASIC Jamo-Latin FINALS handled here. Order irrelevant within this
|
||||
# block for Jamo-Latin.]
|
||||
|
||||
$jamoMedial {bs} <> $BS;
|
||||
$jamoMedial {b} <> $Bf;
|
||||
$jamoMedial {c} <> $Cf;
|
||||
$jamoMedial {d} <> $Df;
|
||||
$jamoMedial {gg} <> $GGf;
|
||||
$jamoMedial {gs} <> $GS;
|
||||
$jamoMedial {g} <> $Gf;
|
||||
$jamoMedial {h} <> $Hf;
|
||||
$jamoMedial {j} <> $Jf;
|
||||
$jamoMedial {k} <> $Kf;
|
||||
$jamoMedial {lb} <> $LB; $jamoMedial {lg} <> $LG;
|
||||
$jamoMedial {lh} <> $LH;
|
||||
$jamoMedial {lm} <> $LM;
|
||||
$jamoMedial {lp} <> $LP;
|
||||
$jamoMedial {ls} <> $LS;
|
||||
$jamoMedial {lt} <> $LT;
|
||||
$jamoMedial {l} <> $L;
|
||||
$jamoMedial {m} <> $Mf;
|
||||
$jamoMedial {ng} <> $NG;
|
||||
$jamoMedial {nh} <> $NH;
|
||||
$jamoMedial {nj} <> $NJ;
|
||||
$jamoMedial {n} <> $Nf;
|
||||
$jamoMedial {p} <> $Pf;
|
||||
$jamoMedial {ss} <> $SSf;
|
||||
$jamoMedial {s} <> $Sf;
|
||||
$jamoMedial {t} <> $Tf;
|
||||
|
||||
# Initials: Attach single consonant to following medial. Do this
|
||||
# AFTER mapping finals. Longer keys must precede shorter keys that
|
||||
# they start with, e.g., the rule for 'gg' must precede 'g'.
|
||||
|
||||
# [BASIC Jamo-Latin INITIALS handled here. Order irrelevant within
|
||||
# this block for Jamo-Latin.]
|
||||
|
||||
{gg} $latinMedial <> $GGi;
|
||||
{g} $latinMedial <> $Gi;
|
||||
{n} $latinMedial <> $Ni;
|
||||
{dd} $latinMedial <> $DD;
|
||||
{d} $latinMedial <> $Di;
|
||||
{r} $latinMedial <> $R;
|
||||
{m} $latinMedial <> $Mi;
|
||||
{bb} $latinMedial <> $BB;
|
||||
{b} $latinMedial <> $Bi;
|
||||
{ss} $latinMedial <> $SSi;
|
||||
{s} $latinMedial <> $Si;
|
||||
{jj} $latinMedial <> $JJ;
|
||||
{j} $latinMedial <> $Ji;
|
||||
{c} $latinMedial <> $Ci;
|
||||
{k} $latinMedial <> $Ki;
|
||||
{t} $latinMedial <> $Ti;
|
||||
{p} $latinMedial <> $Pi;
|
||||
{h} $latinMedial <> $Hi;
|
||||
|
||||
# 'r' in final position. Because of the equivalency of the 'l' and
|
||||
# 'r' jamo (the glyphs are the same), we try to provide the same
|
||||
# equivalency in Latin-Jamo. The 'l' to 'r' conversion is handled
|
||||
# below. If we see an 'r' in an apparent final position, treat it
|
||||
# like 'l'. For example, "karka" => Ki A R EU Ki A without this rule.
|
||||
# Instead, we want Ki A L Ki A.
|
||||
|
||||
$jamoMedial {r} $latinInitial > | l;
|
||||
|
||||
# Initial + Final: If we match the next rule, we have initial then
|
||||
# final consonant with no intervening medial. We insert the null
|
||||
# vowel BEFORE it to create a well-formed syllable. (In the next rule
|
||||
# we insert a null vowel AFTER an anomalous initial.)
|
||||
|
||||
$jamoInitial {} [bcdghjklmnpst] > $EU;
|
||||
|
||||
# Initial + X: This block matches an initial consonant not followed by
|
||||
# a medial. We insert the null vowel after it. We handle double
|
||||
# initials explicitly here; for single initial consonants we insert EU
|
||||
# (as Latin) after them and let standard rules do the rest.
|
||||
|
||||
# BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
gg > $GGi $EU;
|
||||
dd > $DD $EU;
|
||||
bb > $BB $EU;
|
||||
ss > $SSi $EU;
|
||||
jj > $JJ $EU;
|
||||
|
||||
([bcdghjkmnprst]) > | $1 eu;
|
||||
|
||||
# X + Final: Finally we have to deal with a consonant that can only be
|
||||
# interpreted as a final (not an initial) and which is preceded
|
||||
# neither by an initial nor a medial. It is the start of the
|
||||
# syllable, but cannot be. Most of these will already be handled by
|
||||
# the above rules. 'bs' splits into Bi EU Sf. Similar for 'gs' 'ng'
|
||||
# 'nh' 'nj'. The only problem is 'l' and digraphs starting with 'l'.
|
||||
# For this isolated case, we could add a null initial and medial,
|
||||
# which would give "la" => IEUNG EU L IEUNG A, for example. A more
|
||||
# economical solution is to transliterate isolated "l" (that is,
|
||||
# initial "l") to "r". (Other similar conversions of consonants that
|
||||
# occur neither as initials nor as finals are handled below.)
|
||||
|
||||
l > | r;
|
||||
|
||||
# Medials. If a medial is preceded by an initial, then we proceed
|
||||
# normally. As usual, longer keys must precede shorter ones.
|
||||
|
||||
# [BASIC Jamo-Latin MEDIALS handled here. Order irrelevant within
|
||||
# this block for Jamo-Latin.]
|
||||
|
||||
$jamoInitial {ae} <> $AE;
|
||||
$jamoInitial {a} <> $A;
|
||||
$jamoInitial {eo} <> $EO;
|
||||
$jamoInitial {eu} <> $EU;
|
||||
$jamoInitial {e} <> $E;
|
||||
$jamoInitial {i} <> $I;
|
||||
$jamoInitial {oe} <> $OE;
|
||||
$jamoInitial {o} <> $O;
|
||||
$jamoInitial {u} <> $U;
|
||||
$jamoInitial {wae} <> $WAE;
|
||||
$jamoInitial {wa} <> $WA;
|
||||
$jamoInitial {weo} <> $WEO;
|
||||
$jamoInitial {we} <> $WE;
|
||||
$jamoInitial {wi} <> $WI;
|
||||
$jamoInitial {yae} <> $YAE;
|
||||
$jamoInitial {ya} <> $YA;
|
||||
$jamoInitial {yeo} <> $YEO;
|
||||
$jamoInitial {ye} <> $YE;
|
||||
$jamoInitial {yi} <> $YI;
|
||||
$jamoInitial {yo} <> $YO;
|
||||
$jamoInitial {yu} <> $YU;
|
||||
|
||||
# We may see an anomalous isolated 'w' or 'y'. In that case, we
|
||||
# interpret it as 'wi' and 'yu', respectively.
|
||||
|
||||
# BREAKS ROUND TRIP INTEGRITY
|
||||
|
||||
$jamoInitial {w} > | wi;
|
||||
$jamoInitial {y} > | yu;
|
||||
|
||||
# Otherwise, insert a null consonant IEUNG before the medial (which is
|
||||
# still an untransliterated latin vowel).
|
||||
|
||||
($latinMedial) > $IEUNG | $1;
|
||||
|
||||
# Convert non-jamo latin consonants to equivalents. These occur as
|
||||
# neither initials nor finals in jamo. 'l' occurs as a final, but not
|
||||
# an initial; it is handled above. The following letters (left hand
|
||||
# side) will never be output by Jamo-Latin.
|
||||
|
||||
f > | p;
|
||||
q > | k;
|
||||
v > | b;
|
||||
x > | ks;
|
||||
z > | s;
|
||||
|
||||
# Delete separators (Latin-Jamo).
|
||||
|
||||
$sep > ;
|
||||
|
||||
# Delete null consonants (Jamo-Latin). Do NOT delete null EU vowels,
|
||||
# since these may also occur in text.
|
||||
|
||||
< $IEUNG;
|
||||
|
||||
#- N.B. DO NOT put any filters, NFD, etc. here -- those are aliased in
|
||||
#- the INDEX file. This transliterator is, by itself, not
|
||||
#- instantiated. It is used as a part of Latin-Jamo, Latin-Hangul, or
|
||||
#- inverses thereof.
|
||||
|
||||
# eof
|
|
@ -1,495 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars
|
||||
#:: [\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]] ;
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Katakana
|
||||
### WARNING -- must add width filter, both here and below!!! ###
|
||||
:: [[\u1100-\u1112\u111A\u1121\u1160-\u1175\u11AA\u11AC-\u11AD\u11B0-\u11B5\u2190-\u2193\u2502\u25A0\u25CB\u3000-\u3002\u300C-\u300D\u3099-\u309A\u30A1-\u30ED\u30EF\u30F2-\u30F4\u30F7\u30FA-\u30FC\uFF01-\uFF5E\uFFE0-\uFFE6][',.A-Za-z~\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0304\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1FB1\u1FB9\u1FD1\u1FD9\u1FE1\u1FE9\u212A-\u212B]] ;
|
||||
|
||||
:: [:Latin:] fullwidth-halfwidth ();
|
||||
:: NFD (NFC);
|
||||
:: Lower (); # whenever transliterating from cased to uncased script, include this
|
||||
# :: NFD () ; # this would catch the odd cases where a lowercase is not in NFD, but none are important for Japanese
|
||||
|
||||
# Uses modified Hepburn. Small changes to make unambiguous.
|
||||
|
||||
# | Kunrei-shiki: Hepburn/MHepburn
|
||||
# | ------------------------------
|
||||
# | si: shi
|
||||
# | si ~ya: sha
|
||||
# | si ~yu: shu
|
||||
# | si ~yo: sho
|
||||
# | zi: ji
|
||||
# | zi ~ya: ja
|
||||
# | zi ~yu: ju
|
||||
# | zi ~yo: jo
|
||||
# | ti: chi
|
||||
# | ti ~ya: cha
|
||||
# | ti ~yu: chu
|
||||
# | ti ~yu: cho
|
||||
# | tu: tsu
|
||||
# | di: ji/dji
|
||||
# | du: zu/dzu
|
||||
# | hu: fu
|
||||
|
||||
# | For foreign words:
|
||||
# | -----------------
|
||||
# | se ~i si
|
||||
# | si ~e she
|
||||
# |
|
||||
# | ze ~i zi
|
||||
# | zi ~e je
|
||||
# |
|
||||
# | te ~i ti
|
||||
# | ti ~e che
|
||||
# | te ~u tu
|
||||
# |
|
||||
# | de ~i di
|
||||
# | de ~u du
|
||||
# | de ~i di
|
||||
# |
|
||||
# | he ~u: hu
|
||||
# | hu ~a fa
|
||||
# | hu ~i fi
|
||||
# | hu ~e he
|
||||
# | hu ~o ho
|
||||
|
||||
# Most small forms are generated, but if necessary
|
||||
# explicit small forms are given with ~a, ~ya, etc.
|
||||
|
||||
#------------------------------------------------------
|
||||
# Variables
|
||||
|
||||
$vowel = [aeiou] ;
|
||||
$consonant = [bcdfghjklmnpqrstvwxyz] ;
|
||||
$macron = \u0304 ;
|
||||
|
||||
# Variables used for doubled-consonants with tsu
|
||||
|
||||
$kana = [\u3041-\u3094] ;
|
||||
|
||||
$voice = [\u3099\u309B];
|
||||
$semivoice = [\u309A\u309C];
|
||||
|
||||
$k_start = [カキクケコかきくけこ] ;
|
||||
|
||||
$s_start = [サシスセソさしすせそ] ;
|
||||
|
||||
$j_start = [シし] $voice ;
|
||||
|
||||
$t_start = [タチツテトたちつてと] ;
|
||||
|
||||
$n_start = [ナニヌネノンなにぬねの] ;
|
||||
|
||||
$h_start = [ハヒヘホはひへほ] ;
|
||||
$f_start = [フふ] ;
|
||||
|
||||
$m_start = [マミムメモまみむめも] ;
|
||||
|
||||
$y_start = [ヤユヨやゆよ] ;
|
||||
|
||||
$r_start = [ラリルレロらりるれろ] ;
|
||||
|
||||
$w_start = [ワヰヱヲわゐゑを] ;
|
||||
|
||||
$v_start = [ワヰヱヲ]゙ ;
|
||||
|
||||
# if ン is followed by $n_quoter, then it needs an
|
||||
# apostrophe after its romaji form to disambiguate it.
|
||||
# e.g., ン ア ! = ナ, so represent as "n'a", not "na".
|
||||
|
||||
$n_quoter = [ア イ ウ エ オ ナ ニ ヌ ネ ノ ヤ ユ ヨ ン] ;
|
||||
|
||||
$small_y = [ャィュェョ] ;
|
||||
|
||||
$iteration = \u309D ;
|
||||
|
||||
#------------------------------------------------------
|
||||
# katakana rules
|
||||
|
||||
# Punctuation
|
||||
|
||||
'.' <> 。;
|
||||
',' <> 、;
|
||||
# ' ' } [a-z] > ; # delete spaces before latin
|
||||
# ' ' < [^' '\u30A0-\u30ff] {} ['\u30A0-\u30ff] ; #insert spaces before hiragana
|
||||
|
||||
# Iteration Mark
|
||||
# Copy previous letter & marks
|
||||
|
||||
# TODO
|
||||
# | $1 $1 < ($kana [[:M:]$voice$semivoice]?) $iteration
|
||||
|
||||
# Specials for katakana -- not shared with hiragana
|
||||
|
||||
va <> ヷ ;
|
||||
vi <> ヸ ;
|
||||
ve <> ヹ ;
|
||||
vo <> ヺ ;
|
||||
'~ka' <> ヵ ;
|
||||
'~ke' <> ヶ ;
|
||||
|
||||
# ~~~ begin shared rules ~~~
|
||||
|
||||
#special
|
||||
|
||||
ya < '~'ャ;
|
||||
yi < '~'ィ ;
|
||||
yu < '~'ュ;
|
||||
ye < '~'ェ;
|
||||
yo < '~'ョ;
|
||||
|
||||
#normal
|
||||
|
||||
a <> ア ;
|
||||
|
||||
b | '~' < ヒ ゙} $small_y ;
|
||||
by } $vowel > ビ | '~y' ;
|
||||
|
||||
ba <> バ ;
|
||||
bi <> ビ ;
|
||||
bu <> ブ ;
|
||||
be <> ベ ;
|
||||
bo <> ボ ;
|
||||
|
||||
c } i > | s ;
|
||||
c } e > | s ;
|
||||
|
||||
da <> ダ ;
|
||||
di <> ディ ;
|
||||
du <> デゥ ;
|
||||
de <> デ ;
|
||||
do <> ド ;
|
||||
dzu <> ヅ ;
|
||||
dja < ヂャ ;
|
||||
dji'~i' < ヂィ ; # liu
|
||||
dju < ヂュ ;
|
||||
dje < ヂェ ;
|
||||
djo < ヂョ ;
|
||||
dji <> ヂ ;
|
||||
dj } $vowel > ヂ | '~y' ;
|
||||
|
||||
# TODO: QUESTION: use ĵĴżŻ instead of dj, dz
|
||||
|
||||
cha < チャ ;
|
||||
chi'~i' < チィ ; # liu
|
||||
chu < チュ ;
|
||||
che < チェ ;
|
||||
cho < チョ ;
|
||||
chi <> チ ;
|
||||
ch } $vowel > チ | '~y' ;
|
||||
|
||||
e <> エ ;
|
||||
|
||||
g | '~' < ギ} $small_y ;
|
||||
gy } $vowel > ギ | '~y' ;
|
||||
|
||||
ga <> ガ ;
|
||||
gi <> ギ ;
|
||||
gu <> グ ;
|
||||
ge <> ゲ ;
|
||||
go <> ゴ ;
|
||||
|
||||
i <> イ ;
|
||||
|
||||
# j } $vowel > ジ | '~y' ;
|
||||
|
||||
ja <> ジャ ;
|
||||
ji'~i' < ジィ ; # liu
|
||||
ju <> ジュ ;
|
||||
je <> ジェ ;
|
||||
jo <> ジョ ;
|
||||
ji <> ジ ;
|
||||
|
||||
k | '~' < キ} $small_y ;
|
||||
ky } $vowel > キ | '~y' ;
|
||||
|
||||
ka <> カ ;
|
||||
ki <> キ ;
|
||||
ku <> ク ;
|
||||
ke <> ケ ;
|
||||
ko <> コ ;
|
||||
|
||||
m | '~' < ミ} $small_y ;
|
||||
my } $vowel > ミ | '~y' ;
|
||||
|
||||
ma <> マ ;
|
||||
mi <> ミ ;
|
||||
mu <> ム ;
|
||||
me <> メ ;
|
||||
mo <> モ ;
|
||||
|
||||
m } [pbfv] > ン ;
|
||||
|
||||
n | '~' < ニ } $small_y ;
|
||||
ny } $vowel > ニ | '~y' ;
|
||||
|
||||
na <> ナ ;
|
||||
ni <> ニ ;
|
||||
nu <> ヌ ;
|
||||
ne <> ネ ;
|
||||
no <> ノ ;
|
||||
|
||||
o <> オ ;
|
||||
|
||||
p | '~' < ピ } $small_y ;
|
||||
py } $vowel > ピ | '~y' ;
|
||||
|
||||
pa <> パ ;
|
||||
pi <> ピ ;
|
||||
pu <> プ ;
|
||||
pe <> ペ ;
|
||||
po <> ポ ;
|
||||
|
||||
h | '~' < ヒ } $small_y ;
|
||||
hy } $vowel > ヒ | '~y' ;
|
||||
|
||||
ha <> ハ ;
|
||||
hi <> ヒ ;
|
||||
hu <> ヘゥ ;
|
||||
he <> ヘ ;
|
||||
ho <> ホ ;
|
||||
|
||||
# f | '~' < フ } $small_y ;
|
||||
# f } $vowel > フ | '~' ;
|
||||
|
||||
fa <> ファ ;
|
||||
fi <> フィ ;
|
||||
fe <> フェ ;
|
||||
fo <> フォ ;
|
||||
fu <> フ ;
|
||||
|
||||
r | '~' < リ } $small_y ;
|
||||
ry } $vowel > リ | '~y' ;
|
||||
|
||||
ra <> ラ ;
|
||||
ri <> リ ;
|
||||
ru <> ル ;
|
||||
re <> レ ;
|
||||
ro <> ロ ;
|
||||
|
||||
za <> ザ ;
|
||||
zi <> ゼィ ;
|
||||
zu <> ズ ;
|
||||
ze <> ゼ ;
|
||||
zo <> ゾ ;
|
||||
|
||||
sa <> サ ;
|
||||
si <> セィ ;
|
||||
su <> ス ;
|
||||
se <> セ ;
|
||||
so <> ソ ;
|
||||
|
||||
sha < シャ ;
|
||||
shi'~i' < シィ ; # liu
|
||||
shu < シュ ;
|
||||
she < シェ ;
|
||||
sho < ショ ;
|
||||
shi <> シ ;
|
||||
sh } $vowel > シ | '~y' ;
|
||||
|
||||
ta <> タ ;
|
||||
ti <> ティ ;
|
||||
tu <> テゥ ;
|
||||
te <> テ ;
|
||||
to <> ト ;
|
||||
|
||||
tsu <> ツ ;
|
||||
|
||||
# v } $vowel > ヴ | '~' ;
|
||||
|
||||
#'v~a' < ヴァ ; # liu
|
||||
#'v~i' < ヴィ ; # liu
|
||||
#'v~e' < ヴェ ; # liu
|
||||
#'v~o' < ヴォ ; # liu
|
||||
vu <> ヴ ;
|
||||
|
||||
u <> ウ ;
|
||||
|
||||
# w } $vowel > ウ | '~' ;
|
||||
|
||||
wa <> ワ ;
|
||||
wi <> ヰ ;
|
||||
wu > ウ ;
|
||||
we <> ヱ ;
|
||||
wo <> ヲ ;
|
||||
|
||||
ya <> ヤ ;
|
||||
yi > イ ;
|
||||
yu <> ユ ;
|
||||
ye > エ ;
|
||||
yo <> ヨ ;
|
||||
|
||||
# double consonants
|
||||
|
||||
#specials
|
||||
s } sh > ッ ;
|
||||
t } ch > ッ ;
|
||||
|
||||
#voiced
|
||||
|
||||
j } j <> ッ } $j_start ;
|
||||
b } b <> ッ } [$h_start$f_start] $voice;
|
||||
d } d <> ッ } $t_start $voice;
|
||||
g } g <> ッ } $k_start $voice;
|
||||
p } p <> ッ } [$h_start$f_start] $semivoice;
|
||||
# v } v <> ッ } [ワヰウヱヲう] $voice ;
|
||||
z } z <> ッ } $s_start $voice;
|
||||
v } v <> ッ } $v_start;
|
||||
|
||||
# normal
|
||||
|
||||
k } k <> ッ } $k_start ;
|
||||
m } m <> ッ } $m_start ;
|
||||
n } n <> ッ } $n_start ;
|
||||
h } h <> ッ } $h_start ;
|
||||
f } f <> ッ } $f_start ;
|
||||
r } r <> ッ } $r_start ;
|
||||
t } t <> ッ } $t_start ;
|
||||
s } s <> ッ } $s_start ;
|
||||
|
||||
w } w <> ッ } $w_start;
|
||||
y } y <> ッ } $y_start;
|
||||
|
||||
# completeness
|
||||
x } x > ッ ;
|
||||
c } k > ッ ;
|
||||
c } c > ッ ;
|
||||
c } q > ッ ;
|
||||
l } l > ッ ;
|
||||
q } q > ッ ;
|
||||
# y } y > ッ ;
|
||||
# w } w > ッ ;
|
||||
|
||||
# prolonged vowel mark. this indicates a doubling of
|
||||
# the preceding vowel sound
|
||||
|
||||
#a < a { ー ; # liu
|
||||
#e < e { ー ; # liu
|
||||
#i < i { ー ; # liu
|
||||
#o < o { ー ; # liu
|
||||
#u < u { ー ; # liu
|
||||
|
||||
$macron <> ー ;
|
||||
|
||||
# small forms
|
||||
|
||||
'~a' <> ァ ;
|
||||
'~i' <> ィ ;
|
||||
'~u' <> ゥ ;
|
||||
'~e' <> ェ ;
|
||||
'~o' <> ォ ;
|
||||
'~tsu' <> ッ ;
|
||||
'~wa' <> ヮ ;
|
||||
'~ya' <> ャ ;
|
||||
'~yi' > ィ ;
|
||||
'~yu' <> ュ ;
|
||||
'~ye' > ェ ;
|
||||
'~yo' <> ョ ;
|
||||
|
||||
# iteration marks
|
||||
# TODO: make more accurate
|
||||
|
||||
j $1 < sh (y* $vowel) {ヽ$voice ;
|
||||
dj $1 < ch (y* $vowel) {ヽ$voice ;
|
||||
dz $1 < ts (y* $vowel) {ヽ$voice ;
|
||||
|
||||
g $1 < k (y* $vowel) {ヽ$voice ;
|
||||
z $1 < s (y* $vowel) {ヽ$voice ;
|
||||
d $1 < t (y* $vowel) {ヽ$voice ;
|
||||
h $1 < b (y* $vowel) {ヽ$voice ;
|
||||
v $1 < w (y* $vowel) {ヽ$voice ;
|
||||
|
||||
sh $1 < sh (y* $vowel) {ヽ$voice ;
|
||||
j $1 < j (y* $vowel) {ヽ$voice ;
|
||||
ch $1 < ch (y* $vowel) {ヽ$voice ;
|
||||
dj $1 < dj(y* $vowel) {ヽ$voice ;
|
||||
ts $1 < ts (y* $vowel) {ヽ$voice ;
|
||||
dz $1 < dz (y* $vowel) {ヽ$voice ;
|
||||
|
||||
$1 < ($consonant y* $vowel) {ヽ$voice? ;
|
||||
$1 < (.) {ヽ $voice? ; # otherwise repeat last character
|
||||
< ヽ $voice? ; # delete if no characters found
|
||||
|
||||
# h- rule: lengthens vowel if not followed by a vowel
|
||||
|
||||
[aeiou] } h > ー ;
|
||||
|
||||
# one-way latin- > kana rules. these do not occur in
|
||||
# well-formed romaji representing actual japanese text.
|
||||
# their purpose is to make all romaji map to kana of
|
||||
# some sort.
|
||||
|
||||
# the following are not really necessary, but produce
|
||||
# slightly more natural results.
|
||||
|
||||
cy > セィ ;
|
||||
dy > ディ ;
|
||||
hy > ヒ ;
|
||||
sy > セィ ;
|
||||
ty > ティ ;
|
||||
zy > ゼィ ;
|
||||
|
||||
h > ヘ ;
|
||||
|
||||
# isolated consonants listed here so as not to mask
|
||||
# longer rules above.
|
||||
|
||||
ch > チ;
|
||||
sh > シ ;
|
||||
dz > ヅ ;
|
||||
dj > ヂ;
|
||||
|
||||
b > ブ ;
|
||||
d > デ ;
|
||||
g > グ ;
|
||||
k > ク ;
|
||||
m > ム ;
|
||||
n'' < ン } $n_quoter ;
|
||||
n <> ン ;
|
||||
p > プ ;
|
||||
r > ル ;
|
||||
s > ス ;
|
||||
t > テ ;
|
||||
y > イ ;
|
||||
z > ズ ;
|
||||
v > ヴ ;
|
||||
|
||||
f > フ;
|
||||
j > ジ;
|
||||
w > ウ;
|
||||
|
||||
ß > | ss ;
|
||||
æ > | e ;
|
||||
ð > | d ;
|
||||
ø > | u ;
|
||||
þ > | th ;
|
||||
|
||||
# simple substitutions using backup
|
||||
|
||||
c > | k ;
|
||||
l > | r ;
|
||||
q > | k ;
|
||||
x > | ks ;
|
||||
|
||||
# ~~~ END shared rules ~~~
|
||||
|
||||
#------------------------------------------------------
|
||||
# Final cleanup
|
||||
|
||||
'~' > ; # delete stray tildes between letters
|
||||
[:Katakana:] { '' } [:Latin:] > ; # delete stray quotes between letters
|
||||
# [\u02BE[:Nonspacing Mark:]-[\u3099-\u309C]] > ; # delete any non-spacing marks that we didn't use
|
||||
|
||||
:: NFC (NFD) ;
|
||||
:: ([:Katakana:] halfwidth-fullwidth);
|
||||
|
||||
# note: a global filter is more efficient, but MUST include all source chars!!
|
||||
#:: ([\u0000-\u007E \u3001\u3002 \u3099-\u309C \u30A1-\u30FC \uFF61-\uFF9F [:Latin:][:Katakana:] [:nonspacing mark:]]);
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Katakana BACKWARD
|
||||
:: ( [[\ -~\u00A2-\u00A3\u00A5-\u00A6\u00AC\u0304\u20A9\uFF61-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC\uFFE8-\uFFEE][~\u3001-\u3002\u304C\u304E\u3050\u3052\u3054\u3056\u3058\u305A\u305C\u305E\u3060\u3062\u3065\u3067\u3069\u3070-\u3071\u3073-\u3074\u3076-\u3077\u3079-\u307A\u307C-\u307D\u3094\u3099-\u309B\u309E\u30A1-\u30FA\u30FC-\u30FE]] ) ;
|
||||
|
||||
# eof
|
|
@ -1,41 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# According to the pinyin definitions I've been able to find:
|
||||
# 'a', 'e' are the preferred bases
|
||||
# otherwise 'o'
|
||||
# otherwise last vowel
|
||||
|
||||
# The trailing form of syllables are the following:
|
||||
# "a", "ai", "ao", "an", "ang",
|
||||
# "o", "ou", "ong",
|
||||
# "e", "ei", "er", "en", "eng",
|
||||
# "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
|
||||
# "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
|
||||
# "ü", "üe", "üan", "ün"
|
||||
# so the letters the tone will 'hop' are:
|
||||
|
||||
::NFD (NFC);
|
||||
$tone = [\u0304\u0301\u030C\u0300\u0306] ;
|
||||
|
||||
# Move the tone to the end of a syllable, and convert to number
|
||||
e {($tone) r} > r &tone-digit($1);
|
||||
($tone) ( [i o n u {o n} {n g}]) > $2 &tone-digit($1);
|
||||
($tone) > &tone-digit($1);
|
||||
|
||||
# The following backs up until it finds the right vowel, then deposits the tone
|
||||
|
||||
$vowel = [aAeEiIoOuUüÜ];
|
||||
$consonant = [[a-z A-Z] - [$vowel]];
|
||||
$digit = [1-5];
|
||||
$1 &digit-tone($3) $2 < ([aAeE]) ($vowel* $consonant*) ($digit);
|
||||
$1 &digit-tone($3) $2 < ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
|
||||
$1 &digit-tone($3) $2 < ($vowel) ($consonant*) ($digit);
|
||||
&digit-tone($1) < [:letter:] {($digit)};
|
||||
|
||||
::NFC (NFD);
|
||||
|
||||
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Malayalam-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
|
||||
\u0D02>\uE002; # SIGN ANUSVARA
|
||||
\u0D03>\uE003; # SIGN VISARGA
|
||||
\u0D05>\uE005; # LETTER A
|
||||
\u0D06>\uE006; # LETTER AA
|
||||
\u0D07>\uE007; # LETTER I
|
||||
\u0D08>\uE008; # LETTER II
|
||||
\u0D09>\uE009; # LETTER U
|
||||
\u0D0A>\uE00A; # LETTER UU
|
||||
\u0D0B>\uE00B; # LETTER VOCALIC R
|
||||
\u0D0C>\uE00C; # LETTER VOCALIC L
|
||||
\u0D0E>\uE00E; # LETTER E
|
||||
\u0D0F>\uE00F; # LETTER EE
|
||||
\u0D10>\uE010; # LETTER AI
|
||||
\u0D12>\uE012; # LETTER O
|
||||
\u0D13>\uE013; # LETTER OO
|
||||
\u0D14>\uE014; # LETTER AU
|
||||
\u0D15>\uE015; # LETTER KA
|
||||
\u0D16>\uE016; # LETTER KHA
|
||||
\u0D17>\uE017; # LETTER GA
|
||||
\u0D18>\uE018; # LETTER GHA
|
||||
\u0D19>\uE019; # LETTER NGA
|
||||
\u0D1A>\uE01A; # LETTER CA
|
||||
\u0D1B>\uE01B; # LETTER CHA
|
||||
\u0D1C>\uE01C; # LETTER JA
|
||||
\u0D1D>\uE01D; # LETTER JHA
|
||||
\u0D1E>\uE01E; # LETTER NYA
|
||||
\u0D1F>\uE01F; # LETTER TTA
|
||||
\u0D20>\uE020; # LETTER TTHA
|
||||
\u0D21>\uE021; # LETTER DDA
|
||||
\u0D22>\uE022; # LETTER DDHA
|
||||
\u0D23>\uE023; # LETTER NNA
|
||||
\u0D24>\uE024; # LETTER TA
|
||||
\u0D25>\uE025; # LETTER THA
|
||||
\u0D26>\uE026; # LETTER DA
|
||||
\u0D27>\uE027; # LETTER DHA
|
||||
\u0D28>\uE028; # LETTER NA
|
||||
\u0D2A>\uE02A; # LETTER PA
|
||||
\u0D2B>\uE02B; # LETTER PHA
|
||||
\u0D2C>\uE02C; # LETTER BA
|
||||
\u0D2D>\uE02D; # LETTER BHA
|
||||
\u0D2E>\uE02E; # LETTER MA
|
||||
\u0D2F>\uE02F; # LETTER YA
|
||||
\u0D30>\uE030; # LETTER RA
|
||||
\u0D31>\uE031; # LETTER RRA
|
||||
\u0D32>\uE032; # LETTER LA
|
||||
\u0D33>\uE033; # LETTER LLA
|
||||
\u0D34>\uE034; # LETTER LLLA
|
||||
\u0D35>\uE035; # LETTER VA
|
||||
\u0D36>\uE036; # LETTER SHA
|
||||
\u0D37>\uE037; # LETTER SSA
|
||||
\u0D38>\uE038; # LETTER SA
|
||||
\u0D39>\uE039; # LETTER HA
|
||||
\u0D3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0D3F>\uE03F; # VOWEL SIGN I
|
||||
\u0D40>\uE040; # VOWEL SIGN II
|
||||
\u0D41>\uE041; # VOWEL SIGN U
|
||||
\u0D42>\uE042; # VOWEL SIGN UU
|
||||
\u0D43>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0D46>\uE046; # VOWEL SIGN E
|
||||
\u0D47>\uE047; # VOWEL SIGN EE
|
||||
\u0D48>\uE048; # VOWEL SIGN AI
|
||||
\u0D4D>\uE04D; # SIGN VIRAMA
|
||||
\u0D57>\uE057; # AU LENGTH MARK
|
||||
\u0D60>\uE060; # LETTER VOCALIC RR
|
||||
\u0D61>\uE061; # LETTER VOCALIC LL
|
||||
\u0D66>\uE066; # DIGIT ZERO
|
||||
\u0D67>\uE067; # DIGIT ONE
|
||||
\u0D68>\uE068; # DIGIT TWO
|
||||
\u0D69>\uE069; # DIGIT THREE
|
||||
\u0D6A>\uE06A; # DIGIT FOUR
|
||||
\u0D6B>\uE06B; # DIGIT FIVE
|
||||
\u0D6C>\uE06C; # DIGIT SIX
|
||||
\u0D6D>\uE06D; # DIGIT SEVEN
|
||||
\u0D6E>\uE06E; # DIGIT EIGHT
|
||||
\u0D6F>\uE06F; # DIGIT NINE
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,95 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Oriya-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
#\u0B21\u0B3C>\uE05C;# LETTER RRA
|
||||
#\u0B22\u0B3C>\uE05D;# LETTER RHA
|
||||
\u0B47\u0B56>\uE048;# VOWEL SIGN AI
|
||||
\u0B47\u0B3E>\uE04B;# VOWEL SIGN O
|
||||
\u0B47\u0B57>\uE04C;# VOWEL SIGN AU
|
||||
|
||||
\u0B01>\uE001; # SIGN CANDRABINDU
|
||||
\u0B02>\uE002; # SIGN ANUSVARA
|
||||
\u0B03>\uE003; # SIGN VISARGA
|
||||
\u0B05>\uE005; # LETTER A
|
||||
\u0B06>\uE006; # LETTER AA
|
||||
\u0B07>\uE007; # LETTER I
|
||||
\u0B08>\uE008; # LETTER II
|
||||
\u0B09>\uE009; # LETTER U
|
||||
\u0B0A>\uE00A; # LETTER UU
|
||||
\u0B0B>\uE00B; # LETTER VOCALIC R
|
||||
\u0B0C>\uE00C; # LETTER VOCALIC L
|
||||
\u0B0F>\uE00F; # LETTER E
|
||||
\u0B10>\uE010; # LETTER AI
|
||||
\u0B13>\uE013; # LETTER O
|
||||
\u0B14>\uE014; # LETTER AU
|
||||
\u0B15>\uE015; # LETTER KA
|
||||
\u0B16>\uE016; # LETTER KHA
|
||||
\u0B17>\uE017; # LETTER GA
|
||||
\u0B18>\uE018; # LETTER GHA
|
||||
\u0B19>\uE019; # LETTER NGA
|
||||
\u0B1A>\uE01A; # LETTER CA
|
||||
\u0B1B>\uE01B; # LETTER CHA
|
||||
\u0B1C>\uE01C; # LETTER JA
|
||||
\u0B1D>\uE01D; # LETTER JHA
|
||||
\u0B1E>\uE01E; # LETTER NYA
|
||||
\u0B1F>\uE01F; # LETTER TTA
|
||||
\u0B20>\uE020; # LETTER TTHA
|
||||
\u0B21>\uE021; # LETTER DDA
|
||||
\u0B22>\uE022; # LETTER DDHA
|
||||
\u0B23>\uE023; # LETTER NNA
|
||||
\u0B24>\uE024; # LETTER TA
|
||||
\u0B25>\uE025; # LETTER THA
|
||||
\u0B26>\uE026; # LETTER DA
|
||||
\u0B27>\uE027; # LETTER DHA
|
||||
\u0B28>\uE028; # LETTER NA
|
||||
\u0B2A>\uE02A; # LETTER PA
|
||||
\u0B2B>\uE02B; # LETTER PHA
|
||||
\u0B2C>\uE02C; # LETTER BA
|
||||
\u0B2D>\uE02D; # LETTER BHA
|
||||
\u0B2E>\uE02E; # LETTER MA
|
||||
\u0B2F>\uE02F; # LETTER YA
|
||||
\u0B30>\uE030; # LETTER RA
|
||||
\u0B32>\uE032; # LETTER LA
|
||||
\u0B33>\uE033; # LETTER LLA
|
||||
\u0B35>\uE035; # LETTER VA
|
||||
\u0B36>\uE036; # LETTER SHA
|
||||
\u0B37>\uE037; # LETTER SSA
|
||||
\u0B38>\uE038; # LETTER SA
|
||||
\u0B39>\uE039; # LETTER HA
|
||||
\u0B3C>\uE03C; # SIGN NUKTA
|
||||
\u0B3D>\uE03D; # SIGN AVAGRAHA
|
||||
\u0B3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0B3F>\uE03F; # VOWEL SIGN I
|
||||
\u0B40>\uE040; # VOWEL SIGN II
|
||||
\u0B41>\uE041; # VOWEL SIGN U
|
||||
\u0B42>\uE042; # VOWEL SIGN UU
|
||||
\u0B43>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0B47>\uE047; # VOWEL SIGN E
|
||||
#
|
||||
\u0B4D>\uE04D; # SIGN VIRAMA
|
||||
\u0B56>\uE056; # AI LENGTH MARK
|
||||
\u0B57>\uE057; # AU LENGTH MARK
|
||||
\u0964>\ue064; # DANDA
|
||||
\u0965>\ue065; # DOUBLE DANDA
|
||||
#
|
||||
\u0B5F>\uE05F; # LETTER YYA
|
||||
\u0B60>\uE060; # LETTER VOCALIC RR
|
||||
\u0B61>\uE061; # LETTER VOCALIC LL
|
||||
\u0B66>\uE066; # DIGIT ZERO
|
||||
\u0B67>\uE067; # DIGIT ONE
|
||||
\u0B68>\uE068; # DIGIT TWO
|
||||
\u0B69>\uE069; # DIGIT THREE
|
||||
\u0B6A>\uE06A; # DIGIT FOUR
|
||||
\u0B6B>\uE06B; # DIGIT FIVE
|
||||
\u0B6C>\uE06C; # DIGIT SIX
|
||||
\u0B6D>\uE06D; # DIGIT SEVEN
|
||||
\u0B6E>\uE06E; # DIGIT EIGHT
|
||||
\u0B6F>\uE06F; # DIGIT NINE
|
||||
\u0B70>\ue07B; # ISSHAR
|
||||
\u0B71>\ue081; # LETTER WA
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,76 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Tamil-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
|
||||
\u0BC6\u0BBE>\uE04A;# VOWEL SIGN O
|
||||
\u0BC7\u0BBE>\uE04B;# VOWEL SIGN OO
|
||||
\u0BC6\u0BD7>\uE04C;# VOWEL SIGN AU
|
||||
\u0B92\u0BD7>\uE014;# LETTER AU
|
||||
|
||||
\u0B82>\uE002; # SIGN ANUSVARA
|
||||
\u0B83>\uE003; # SIGN VISARGA
|
||||
\u0B85>\uE005; # LETTER A
|
||||
\u0B86>\uE006; # LETTER AA
|
||||
\u0B87>\uE007; # LETTER I
|
||||
\u0B88>\uE008; # LETTER II
|
||||
\u0B89>\uE009; # LETTER U
|
||||
\u0B8A>\uE00A; # LETTER UU
|
||||
\u0B8E>\uE00E; # LETTER E
|
||||
\u0B8F>\uE00F; # LETTER EE
|
||||
\u0B90>\uE010; # LETTER AI
|
||||
\u0B92>\uE012; # LETTER O
|
||||
\u0B93>\uE013; # LETTER OO
|
||||
\u0B94>\uE014; # LETTER AU
|
||||
\u0B95>\uE015; # LETTER KA
|
||||
\u0B99>\uE019; # LETTER NGA
|
||||
\u0B9A>\uE01A; # LETTER CA
|
||||
\u0B9C>\uE01C; # LETTER JA
|
||||
\u0B9E>\uE01E; # LETTER NYA
|
||||
\u0B9F>\uE01F; # LETTER TTA
|
||||
\u0BA3>\uE023; # LETTER NNA
|
||||
\u0BA4>\uE024; # LETTER TA
|
||||
\u0BA8>\uE028; # LETTER NA
|
||||
\u0BA9>\uE029; # LETTER NNNA
|
||||
\u0BAA>\uE02A; # LETTER PA
|
||||
\u0BAE>\uE02E; # LETTER MA
|
||||
\u0BAF>\uE02F; # LETTER YA
|
||||
\u0BB0>\uE030; # LETTER RA
|
||||
\u0BB1>\uE031; # LETTER RRA
|
||||
\u0BB2>\uE032; # LETTER LA
|
||||
\u0BB3>\uE033; # LETTER LLA
|
||||
\u0BB4>\uE034; # LETTER LLLA
|
||||
\u0BB5>\uE035; # LETTER VA
|
||||
\u0BB7>\uE037; # LETTER SSA
|
||||
\u0BB8>\uE038; # LETTER SA
|
||||
\u0BB9>\uE039; # LETTER HA
|
||||
\u0BBE>\uE03E; # VOWEL SIGN AA
|
||||
\u0BBF>\uE03F; # VOWEL SIGN I
|
||||
\u0BC0>\uE040; # VOWEL SIGN II
|
||||
\u0BC1>\uE041; # VOWEL SIGN U
|
||||
\u0BC2>\uE042; # VOWEL SIGN UU
|
||||
\u0BC6>\uE046; # VOWEL SIGN E
|
||||
\u0BC7>\uE047; # VOWEL SIGN EE
|
||||
\u0BC8>\uE048; # VOWEL SIGN AI
|
||||
|
||||
\u0BCD>\uE04D; # SIGN VIRAMA
|
||||
\u0BD7>\uE057; # AU LENGTH MARK
|
||||
\u0BE7>\uE067; # DIGIT ONE
|
||||
\u0BE8>\uE068; # DIGIT TWO
|
||||
\u0BE9>\uE069; # DIGIT THREE
|
||||
\u0BEA>\uE06A; # DIGIT FOUR
|
||||
\u0BEB>\uE06B; # DIGIT FIVE
|
||||
\u0BEC>\uE06C; # DIGIT SIX
|
||||
\u0BED>\uE06D; # DIGIT SEVEN
|
||||
\u0BEE>\uE06E; # DIGIT EIGHT
|
||||
\u0BEF>\uE06F; # DIGIT NINE
|
||||
\u0BF0>\uE067\uE066; # UNMAPPED Tamil-InterIndic: NUMBER TEN
|
||||
\u0BF1>\uE067\uE066\uE066; # UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
|
||||
\u0BF2>\uE067\uE066\uE066\uE066;# UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
|
||||
0>\ue066;
|
||||
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,90 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Telugu-InterIndic
|
||||
#:: NFD (NFC) ;
|
||||
\u0c46\u0c4d\u0c56>\ue048\ue04d;
|
||||
\u0C46\u0C56>\uE048;# VOWEL SIGN AI
|
||||
\u0C01>\uE001; # SIGN CANDRABINDU
|
||||
\u0C02>\uE002; # SIGN ANUSVARA
|
||||
\u0C03>\uE003; # SIGN VISARGA
|
||||
\u0C05>\uE005; # LETTER A
|
||||
\u0C06>\uE006; # LETTER AA
|
||||
\u0C07>\uE007; # LETTER I
|
||||
\u0C08>\uE008; # LETTER II
|
||||
\u0C09>\uE009; # LETTER U
|
||||
\u0C0A>\uE00A; # LETTER UU
|
||||
\u0C0B>\uE00B; # LETTER VOCALIC R
|
||||
\u0C0C>\uE00C; # LETTER VOCALIC L
|
||||
\u0C0E>\uE00E; # LETTER E
|
||||
\u0C0F>\uE00F; # LETTER EE
|
||||
\u0C10>\uE010; # LETTER AI
|
||||
\u0C12>\uE012; # LETTER O
|
||||
\u0C13>\uE013; # LETTER OO
|
||||
\u0C14>\uE014; # LETTER AU
|
||||
\u0C15>\uE015; # LETTER KA
|
||||
\u0C16>\uE016; # LETTER KHA
|
||||
\u0C17>\uE017; # LETTER GA
|
||||
\u0C18>\uE018; # LETTER GHA
|
||||
\u0C19>\uE019; # LETTER NGA
|
||||
\u0C1A>\uE01A; # LETTER CA
|
||||
\u0C1B>\uE01B; # LETTER CHA
|
||||
\u0C1C>\uE01C; # LETTER JA
|
||||
\u0C1D>\uE01D; # LETTER JHA
|
||||
\u0C1E>\uE01E; # LETTER NYA
|
||||
\u0C1F>\uE01F; # LETTER TTA
|
||||
\u0C20>\uE020; # LETTER TTHA
|
||||
\u0C21>\uE021; # LETTER DDA
|
||||
\u0C22>\uE022; # LETTER DDHA
|
||||
\u0C23>\uE023; # LETTER NNA
|
||||
\u0C24>\uE024; # LETTER TA
|
||||
\u0C25>\uE025; # LETTER THA
|
||||
\u0C26>\uE026; # LETTER DA
|
||||
\u0C27>\uE027; # LETTER DHA
|
||||
\u0C28>\uE028; # LETTER NA
|
||||
\u0C2A>\uE02A; # LETTER PA
|
||||
\u0C2B>\uE02B; # LETTER PHA
|
||||
\u0C2C>\uE02C; # LETTER BA
|
||||
\u0C2D>\uE02D; # LETTER BHA
|
||||
\u0C2E>\uE02E; # LETTER MA
|
||||
\u0C2F>\uE02F; # LETTER YA
|
||||
\u0C30>\uE030; # LETTER RA
|
||||
\u0C31>\uE031; # LETTER RRA
|
||||
\u0C32>\uE032; # LETTER LA
|
||||
\u0C33>\uE033; # LETTER LLA
|
||||
\u0C35>\uE035; # LETTER VA
|
||||
\u0C36>\uE036; # LETTER SHA
|
||||
\u0C37>\uE037; # LETTER SSA
|
||||
\u0C38>\uE038; # LETTER SA
|
||||
\u0C39>\uE039; # LETTER HA
|
||||
\u0C3E>\uE03E; # VOWEL SIGN AA
|
||||
\u0C3F>\uE03F; # VOWEL SIGN I
|
||||
\u0C40>\uE040; # VOWEL SIGN II
|
||||
\u0C41>\uE041; # VOWEL SIGN U
|
||||
\u0C42>\uE042; # VOWEL SIGN UU
|
||||
\u0C43>\uE043; # VOWEL SIGN VOCALIC R
|
||||
\u0C44>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
\u0C46>\uE046; # VOWEL SIGN E
|
||||
\u0C47>\uE047; # VOWEL SIGN EE
|
||||
\u0C4A>\uE04A; # VOWEL SIGN O
|
||||
\u0C4B>\uE04B; # VOWEL SIGN OO
|
||||
\u0C4C>\uE04C; # VOWEL SIGN AU
|
||||
\u0C4D>\uE04D; # SIGN VIRAMA
|
||||
\u0C55>\uE055; # LENGTH MARK
|
||||
\u0C56>\uE056; # AI LENGTH MARK
|
||||
\u0C60>\uE060; # LETTER VOCALIC RR
|
||||
\u0C61>\uE061; # LETTER VOCALIC LL
|
||||
\u0C66>\uE066; # DIGIT ZERO
|
||||
\u0C67>\uE067; # DIGIT ONE
|
||||
\u0C68>\uE068; # DIGIT TWO
|
||||
\u0C69>\uE069; # DIGIT THREE
|
||||
\u0C6A>\uE06A; # DIGIT FOUR
|
||||
\u0C6B>\uE06B; # DIGIT FIVE
|
||||
\u0C6C>\uE06C; # DIGIT SIX
|
||||
\u0C6D>\uE06D; # DIGIT SEVEN
|
||||
\u0C6E>\uE06E; # DIGIT EIGHT
|
||||
\u0C6F>\uE06F; # DIGIT NINE
|
||||
# :: NFC (NFD) ;
|
||||
# eof
|
|
@ -1,187 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Thai-Latin
|
||||
# This set of rules follows ISO 11940
|
||||
# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
|
||||
# except that that does not mention an implicit vowel, so we use ọ
|
||||
#
|
||||
# The transcription is fairly ugly, so we ought to also do the UNGEGN version
|
||||
# see: http://www.eki.ee/wgrs/rom1_th.pdf
|
||||
# and probably make that the main variant.
|
||||
|
||||
# Note: this is an internal file. The NFD/NFC is handled externally, in the index
|
||||
# The insertion of spaces between words, the reversal of the vowels
|
||||
# and the conversion of space to semicolon are done *outside* of these rules.
|
||||
# So as far as these rules are concerned, the vowels are in logical order!
|
||||
|
||||
# insert implicit vowel (and remove it going the other way)
|
||||
# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
|
||||
#$consonant = [ก-ฮ];
|
||||
#$vowel = [ะ-ฺเ-ไ็];
|
||||
|
||||
#{ ( $consonant ) } [^$vowel ] > | $1 ;
|
||||
# > ọ ;
|
||||
# < ọ ;
|
||||
|
||||
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
|
||||
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
|
||||
|
||||
# Consonants
|
||||
# Warning: the 'h's need to be handled carefully!
|
||||
# What we really want to say is the following, but we can't
|
||||
# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;
|
||||
|
||||
# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
|
||||
$freeStandingBelow = [\u0325 ];
|
||||
$hAccent = [ ̄ ̣]
|
||||
$notHAccent0 = [^$freeStandingBelow$hAccent];
|
||||
$notHAccent1 = $freeStandingBelow [^$hAccent];
|
||||
|
||||
ห > h̄ ; # THAI CHARACTER HO HIP
|
||||
ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering
|
||||
ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK
|
||||
|
||||
ข <> k̄h ; # THAI CHARACTER KHO KHAI
|
||||
ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT
|
||||
ฅ <> kʹh ; # THAI CHARACTER KHO KHON
|
||||
ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG
|
||||
ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
|
||||
ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
|
||||
ก <> k ; # THAI CHARACTER KO KAI
|
||||
|
||||
ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO
|
||||
ผ <> p̄h ; # THAI CHARACTER PHO PHUNG
|
||||
พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
|
||||
พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
|
||||
ป <> p ; # THAI CHARACTER PO PLA
|
||||
|
||||
ฉ <> c̄h ; # THAI CHARACTER CHO CHING
|
||||
ฌ <> c̣h ; # THAI CHARACTER CHO CHOE
|
||||
ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
|
||||
ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
|
||||
จ <> c ; # THAI CHARACTER CHO CHAN
|
||||
|
||||
ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN
|
||||
ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO
|
||||
ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO
|
||||
ถ <> t̄h ; # THAI CHARACTER THO THUNG
|
||||
ธ <> ṭh ; # THAI CHARACTER THO THONG
|
||||
ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
|
||||
ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
|
||||
#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
|
||||
ฏ <> t̩ ; # THAI CHARACTER TO PATAK
|
||||
ต <> t ; # THAI CHARACTER TO TAO
|
||||
|
||||
# since there is no singleton g (generated), don't worry about that.
|
||||
ง <> ng ; # THAI CHARACTER NGO NGU
|
||||
ณ <> ṇ ; # THAI CHARACTER NO NEN
|
||||
น <> n ; # THAI CHARACTER NO NU
|
||||
|
||||
ญ <> ỵ ; # THAI CHARACTER YO YING
|
||||
ฎ <> ḍ ; # THAI CHARACTER DO CHADA
|
||||
ด <> d ; # THAI CHARACTER DO DEK
|
||||
|
||||
บ <> b ; # THAI CHARACTER BO BAIMAI
|
||||
ฝ <> f̄ ; # THAI CHARACTER FO FA
|
||||
ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering
|
||||
|
||||
ม <> m ; # THAI CHARACTER MO MA
|
||||
ย <> y ; # THAI CHARACTER YO YAK
|
||||
ร <> r ; # THAI CHARACTER RO RUA
|
||||
ฤ <> v ; # THAI CHARACTER RU
|
||||
ฦ <> ł ; # THAI CHARACTER LU
|
||||
ว <> w ; # THAI CHARACTER WO WAEN
|
||||
|
||||
ศ <> ṣ̄ ; # THAI CHARACTER SO SALA***
|
||||
ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering
|
||||
ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI
|
||||
ส > s̄ ; # THAI CHARACTER SO SUA***
|
||||
ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering
|
||||
|
||||
ฬ <> ḷ ; # THAI CHARACTER LO CHULA
|
||||
ล <> l ; # THAI CHARACTER LO LING
|
||||
ฟ <> f ; # THAI CHARACTER FO FAN
|
||||
|
||||
อ <> x ; # THAI CHARACTER O ANG
|
||||
ซ <> s ; # THAI CHARACTER SO SO
|
||||
|
||||
# vowels
|
||||
|
||||
ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT
|
||||
|
||||
า > ā ; # THAI CHARACTER SARA AA
|
||||
า | $1 < a ($notAbove*) ̄; # backward case, account for reordering
|
||||
|
||||
# We deviate from ISO for SARA AM for disambiguation
|
||||
ำ > a ̉; # THAI CHARACTER SARA AM
|
||||
ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering
|
||||
|
||||
ะ <> a ; # THAI CHARACTER SARA A
|
||||
ี <> ī ; # THAI CHARACTER SARA II
|
||||
ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering
|
||||
|
||||
ื <> ụ̄ ; # THAI CHARACTER SARA UEE
|
||||
ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering
|
||||
|
||||
ึ <> ụ ; # THAI CHARACTER SARA UE
|
||||
ู <> ū ; # THAI CHARACTER SARA UU
|
||||
ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering
|
||||
|
||||
ุ <> u ; # THAI CHARACTER SARA U
|
||||
|
||||
ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI
|
||||
|
||||
# ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT
|
||||
|
||||
เ <> e ; # THAI CHARACTER SARA E
|
||||
แ <> æ ; # THAI CHARACTER SARA AE
|
||||
โ <> o ; # THAI CHARACTER SARA O
|
||||
ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN
|
||||
ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI
|
||||
ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO
|
||||
็ <> ̆ ; # THAI CHARACTER MAITAIKHU
|
||||
่ <> ̀ ; # THAI CHARACTER MAI EK
|
||||
้ <> ̂ ; # THAI CHARACTER MAI THO
|
||||
๊ <> ́ ; # THAI CHARACTER MAI TRI
|
||||
๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA
|
||||
์ <> ̒ ; # THAI CHARACTER THANTHAKHAT
|
||||
๎ <> '~' ; # THAI CHARACTER YAMAKKAN
|
||||
|
||||
# We deviate from ISO for disambiguation
|
||||
ํ <> ̊ ; # THAI CHARACTER NIKHAHIT
|
||||
|
||||
๏ <> § ; # THAI CHARACTER FONGMAN
|
||||
|
||||
๐ <> 0 ; # THAI DIGIT ZERO
|
||||
๑ <> 1 ; # THAI DIGIT ONE
|
||||
๒ <> 2 ; # THAI DIGIT TWO
|
||||
๓ <> 3 ; # THAI DIGIT THREE
|
||||
๔ <> 4 ; # THAI DIGIT FOUR
|
||||
๕ <> 5 ; # THAI DIGIT FIVE
|
||||
๖ <> 6 ; # THAI DIGIT SIX
|
||||
๗ <> 7 ; # THAI DIGIT SEVEN
|
||||
๘ <> 8 ; # THAI DIGIT EIGHT
|
||||
๙ <> 9 ; # THAI DIGIT NINE
|
||||
|
||||
๚ <> '||' ; # THAI CHARACTER ANGKHANKHU
|
||||
|
||||
๛ <> » ; # THAI CHARACTER KHOMUT
|
||||
ๆ <> « ; # THAI CHARACTER MAIYAMOK
|
||||
|
||||
# moved down to make shorter first
|
||||
#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
|
||||
ฺ <> ˌ ; # THAI CHARACTER PHINTHU
|
||||
ิ <> i ; # THAI CHARACTER SARA I
|
||||
|
||||
# fallbacks
|
||||
|
||||
| k < g ;
|
||||
| k < h ;
|
||||
| c < j ;
|
||||
| k < q ;
|
||||
| s < z ;
|
||||
|
||||
:: (lower);
|
|
@ -1,26 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# This reverses the Thai LogicalOrderException vowels, and does (part of) spaces
|
||||
# The rules that convert space into semicolon are in another file;
|
||||
# since they have to come BEFORE the break iterator
|
||||
|
||||
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
|
||||
|
||||
# First convert the semicolon back
|
||||
|
||||
' ' < $thai { '; ' } $thai;
|
||||
|
||||
# Remove any other spaces between thai letters
|
||||
|
||||
< $thai { ' ' } $thai;
|
||||
|
||||
# Now vowels
|
||||
$thai_reversing = [[:Logical_Order_Exception:] & $thai];
|
||||
$thai_non_reversing = [$thai - $thai_reversing ];
|
||||
|
||||
( $thai_reversing ) ( $thai_non_reversing ) > $2 $1;
|
||||
# other direction
|
||||
$2 $1 < ( $thai_non_reversing ) ( $thai_reversing ) ;
|
|
@ -1,11 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# The rules that convert space into semicolon are in this file;
|
||||
# since they have to come BEFORE the break iterator.
|
||||
|
||||
$thai = [[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B] ;
|
||||
|
||||
$thai { ' ' } $thai > '; ' ;
|
|
@ -1,11 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Only intended for internal use
|
||||
\u0304 <> 1;
|
||||
\u0301 <> 2;
|
||||
\u030C <> 3;
|
||||
\u0300 <> 4;
|
||||
< 5;
|
|
@ -1,253 +0,0 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2004, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
#
|
||||
# TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
|
||||
# system transliterators. It allows arbitrary mappings between
|
||||
# transliterator IDs and file names, and also allows the system to
|
||||
# define aliases for transliterators, so that "Latin-Hangul", for
|
||||
# example, can be implemented transparently as the compound
|
||||
# "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
|
||||
# are invisible to the user, but can be composed together by the
|
||||
# system to create visible transliterators.
|
||||
#
|
||||
# Blank lines and lines beginning with '#' are ignored.
|
||||
#
|
||||
# Lines in this file have one of the following forms (text not
|
||||
# enclosed by <> is literal):
|
||||
#
|
||||
# <id>:file:<resource>:<encoding>:<direction>
|
||||
# <id>:internal:<resource>:<encoding>:<direction>
|
||||
# <id>:alias:<getInstanceArg>
|
||||
#
|
||||
# <id> is the ID of the system transliterator being defined. These
|
||||
# are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
# unless the second field is "internal".
|
||||
#
|
||||
# <resource> is a ResourceReader resource name. Currently these refer
|
||||
# to file names under com/ibm/text/resources. This string is passed
|
||||
# directly to ResourceReader, together with <encoding>.
|
||||
#
|
||||
# <encoding> is the character encoding to use when reading <resource>;
|
||||
# passed directly to ResourceReader. E.g., "UTF8".
|
||||
#
|
||||
# <direction> is either "FORWARD" or "REVERSE".
|
||||
#
|
||||
# <getInstanceArg> is a string to be passed directly to
|
||||
# Transliterator.getInstance(). The returned Transliterator object
|
||||
# then has its ID changed to <id> and is returned.
|
||||
|
||||
|
||||
# Bidirectional rule files
|
||||
|
||||
Fullwidth-Halfwidth:file:Transliterator_Fullwidth_Halfwidth.txt:UTF8:FORWARD
|
||||
Halfwidth-Fullwidth:file:Transliterator_Fullwidth_Halfwidth.txt:UTF8:REVERSE
|
||||
|
||||
Latin-Cyrillic:file:Transliterator_Cyrillic_Latin.txt:UTF8:REVERSE
|
||||
Cyrillic-Latin:file:Transliterator_Cyrillic_Latin.txt:UTF8:FORWARD
|
||||
|
||||
Latin-Hebrew:file:Transliterator_Hebrew_Latin.txt:UTF8:REVERSE
|
||||
Hebrew-Latin:file:Transliterator_Hebrew_Latin.txt:UTF8:FORWARD
|
||||
|
||||
Latin-Arabic:file:Transliterator_Arabic_Latin.txt:UTF8:REVERSE
|
||||
Arabic-Latin:file:Transliterator_Arabic_Latin.txt:UTF8:FORWARD
|
||||
|
||||
Tone-Digit:internal:Transliterator_Tone_Digit.txt:UTF8:FORWARD
|
||||
Digit-Tone:internal:Transliterator_Tone_Digit.txt:UTF8:REVERSE
|
||||
|
||||
Latin-NumericPinyin:file:Transliterator_Latin_NumericPinyin.txt:UTF8:FORWARD
|
||||
NumericPinyin-Latin:file:Transliterator_Latin_NumericPinyin.txt:UTF8:REVERSE
|
||||
|
||||
Han-Spacedhan:internal:Transliterator_Han_Spacedhan.txt:UTF8:FORWARD
|
||||
Spacedhan-Han:alias:null
|
||||
|
||||
Han-Latin:file:Transliterator_Han_Latin.txt:UTF8:FORWARD
|
||||
#Latin-Han:file:Transliterator_Han_Latin.txt:UTF8:REVERSE # no round trip!
|
||||
Latin-Han:alias:null
|
||||
|
||||
# Comment these out; they are only for testing
|
||||
# Latin-Han/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:REVERSE
|
||||
# Han-Latin/definition:file:Transliterator_Han_Latin_Definition.txt:UTF8:FORWARD
|
||||
|
||||
#Latin-Han/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:REVERSE
|
||||
#Han-Latin/EDICT:file:Transliterator_Han_Latin_EDICT.txt:UTF8:FORWARD
|
||||
|
||||
Latin-Greek:file:Transliterator_Greek_Latin.txt:UTF8:REVERSE
|
||||
Greek-Latin:file:Transliterator_Greek_Latin.txt:UTF8:FORWARD
|
||||
|
||||
Latin-Greek/UNGEGN:file:Transliterator_Greek_Latin_UNGEGN.txt:UTF8:REVERSE
|
||||
Greek-Latin/UNGEGN:file:Transliterator_Greek_Latin_UNGEGN.txt:UTF8:FORWARD
|
||||
|
||||
Latin-Katakana:file:Transliterator_Latin_Katakana.txt:UTF8:FORWARD
|
||||
Katakana-Latin:file:Transliterator_Latin_Katakana.txt:UTF8:REVERSE
|
||||
|
||||
Latin-Hiragana:file:Transliterator_Hiragana_Latin.txt:UTF8:REVERSE
|
||||
Hiragana-Latin:file:Transliterator_Hiragana_Latin.txt:UTF8:FORWARD
|
||||
|
||||
#Thai Stuff: will change if we get \b into Transliterator
|
||||
|
||||
Thai-ThaiSemi:internal:Transliterator_Thai_ThaiSemi.txt:UTF8:FORWARD
|
||||
|
||||
Thai-ThaiLogical:internal:Transliterator_Thai_ThaiLogical.txt:UTF8:FORWARD
|
||||
ThaiLogical-Thai:internal:Transliterator_Thai_ThaiLogical.txt:UTF8:REVERSE
|
||||
|
||||
ThaiLogical-Latin:internal:Transliterator_ThaiLogical_Latin.txt:UTF8:FORWARD
|
||||
Latin-ThaiLogical:internal:Transliterator_ThaiLogical_Latin.txt:UTF8:REVERSE
|
||||
|
||||
# Must use the order below!
|
||||
# We need two separate passes because of the Thai vowel reversal
|
||||
# Thai-Logical also converts spaces to semicolons. That has to be done before we insert latin spaces
|
||||
|
||||
Thai-Latin:alias:[[:thai:] \u0E01-\u0E3A\u0E40-\u0E5B];NFD;Thai-ThaiSemi;Any-BreakInternal;Thai-ThaiLogical;ThaiLogical-Latin;NFC
|
||||
Latin-Thai:alias:[[:Latin:][:Mn:][:Me:] \ \;0-9|~\u00A7\u00AB\u00BB\u02B9\u0374\u2021\u02CC];NFD;Latin-ThaiLogical;ThaiLogical-Thai;NFC
|
||||
|
||||
# end of Thai Stuff
|
||||
|
||||
Hiragana-Katakana:file:Transliterator_Hiragana_Katakana.txt:UTF8:FORWARD
|
||||
Katakana-Hiragana:file:Transliterator_Hiragana_Katakana.txt:UTF8:REVERSE
|
||||
|
||||
Any-Accents:file:Transliterator_Any_Accents.txt:UTF8:FORWARD
|
||||
Accents-Any:file:Transliterator_Any_Accents.txt:UTF8:REVERSE
|
||||
|
||||
Any-Publishing:file:Transliterator_Any_Publishing.txt:UTF8:FORWARD
|
||||
Publishing-Any:file:Transliterator_Any_Publishing.txt:UTF8:REVERSE
|
||||
|
||||
# Korean
|
||||
# N.B. Don't end Latin-Jamo with NFC; that produces Hangul. For
|
||||
# Hangul output use Latin-Hangul.
|
||||
|
||||
LowerLatin-Jamo:internal:Transliterator_Latin_Jamo.txt:UTF8:FORWARD
|
||||
Jamo-LowerLatin:internal:Transliterator_Latin_Jamo.txt:UTF8:REVERSE
|
||||
Latin-Jamo:alias:['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo
|
||||
Jamo-Latin:alias:['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\uAC00-\uD7A3];NFD;Jamo-LowerLatin;NFC
|
||||
Latin-Hangul:alias:['A-Za-z\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E1\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u01FB\u0200-\u021B\u021E-\u021F\u0226-\u0233\u1E00-\u1E99\u1EA0-\u1EF9\u212A-\u212B];NFD;Lower;LowerLatin-Jamo;NFC
|
||||
Hangul-Latin:alias:['\u1100-\u1112\u1161-\u1175\u11A8-\u11C2\u3131-\u313F\u3141-\u3143\u3145-\u3163\u3200-\u321C\u3260-\u327B\uAC00-\uD7A3\uFF07\uFFA1-\uFFAF\uFFB1-\uFFB3\uFFB5-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC];NFKD;Jamo-LowerLatin;NFC
|
||||
|
||||
# Inter-Indic composed rules
|
||||
Latin-InterIndic:internal:Transliterator_Latin_InterIndic.txt:UTF8:FORWARD
|
||||
Devanagari-InterIndic:internal:Transliterator_Devanagari_InterIndic.txt:UTF8:FORWARD
|
||||
Bengali-InterIndic:internal:Transliterator_Bengali_InterIndic.txt:UTF8:FORWARD
|
||||
Gurmukhi-InterIndic:internal:Transliterator_Gurmukhi_InterIndic.txt:UTF8:FORWARD
|
||||
Gujarati-InterIndic:internal:Transliterator_Gujarati_InterIndic.txt:UTF8:FORWARD
|
||||
Oriya-InterIndic:internal:Transliterator_Oriya_InterIndic.txt:UTF8:FORWARD
|
||||
Tamil-InterIndic:internal:Transliterator_Tamil_InterIndic.txt:UTF8:FORWARD
|
||||
Telugu-InterIndic:internal:Transliterator_Telugu_InterIndic.txt:UTF8:FORWARD
|
||||
Kannada-InterIndic:internal:Transliterator_Kannada_InterIndic.txt:UTF8:FORWARD
|
||||
Malayalam-InterIndic:internal:Transliterator_Malayalam_InterIndic.txt:UTF8:FORWARD
|
||||
|
||||
InterIndic-Latin:internal:Transliterator_InterIndic_Latin.txt:UTF8:FORWARD
|
||||
InterIndic-Devanagari:internal:Transliterator_InterIndic_Devanagari.txt:UTF8:FORWARD
|
||||
InterIndic-Bengali:internal:Transliterator_InterIndic_Bengali.txt:UTF8:FORWARD
|
||||
InterIndic-Gurmukhi:internal:Transliterator_InterIndic_Gurmukhi.txt:UTF8:FORWARD
|
||||
InterIndic-Gujarati:internal:Transliterator_InterIndic_Gujarati.txt:UTF8:FORWARD
|
||||
InterIndic-Oriya:internal:Transliterator_InterIndic_Oriya.txt:UTF8:FORWARD
|
||||
InterIndic-Tamil:internal:Transliterator_InterIndic_Tamil.txt:UTF8:FORWARD
|
||||
InterIndic-Telugu:internal:Transliterator_InterIndic_Telugu.txt:UTF8:FORWARD
|
||||
InterIndic-Kannada:internal:Transliterator_InterIndic_Kannada.txt:UTF8:FORWARD
|
||||
InterIndic-Malayalam:internal:Transliterator_InterIndic_Malayalam.txt:UTF8:FORWARD
|
||||
|
||||
#Latin-Indic transliterators
|
||||
Latin-Devanagari:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Devanagari;NFC
|
||||
Latin-Bengali:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Bengali;NFC
|
||||
Latin-Gurmukhi:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Latin-Gujarati:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Gujarati;NFC
|
||||
Latin-Oriya:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Oriya;NFC
|
||||
Latin-Tamil:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Tamil;NFC
|
||||
Latin-Telugu:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Telugu;NFC
|
||||
Latin-Kannada:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Kannada;NFC
|
||||
Latin-Malayalam:alias:['.0-9A-Za-z~\u00C0-\u00C5\u00C7-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00E5\u00E7-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u010F\u0112-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01ED\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0301\u0303-\u0304\u0306-\u0307\u0310\u0314-\u0315\u0323\u0325\u0331\u0341\u0344\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0403\u040C\u040E\u0419\u0439\u0453\u045C\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u04E2-\u04E3\u04EE-\u04EF\u1E00-\u1E99\u1EA0-\u1EF9\u1F01\u1F03-\u1F05\u1F07\u1F09\u1F0B-\u1F0D\u1F0F\u1F11\u1F13-\u1F15\u1F19\u1F1B-\u1F1D\u1F21\u1F23-\u1F25\u1F27\u1F29\u1F2B-\u1F2D\u1F2F\u1F31\u1F33-\u1F35\u1F37\u1F39\u1F3B-\u1F3D\u1F3F\u1F41\u1F43-\u1F45\u1F49\u1F4B-\u1F4D\u1F51\u1F53-\u1F55\u1F57\u1F59\u1F5B\u1F5D\u1F5F\u1F61\u1F63-\u1F65\u1F67\u1F69\u1F6B-\u1F6D\u1F6F\u1F71\u1F73\u1F75\u1F77\u1F79\u1F7B\u1F7D\u1F81\u1F83-\u1F85\u1F87\u1F89\u1F8B-\u1F8D\u1F8F\u1F91\u1F93-\u1F95\u1F97\u1F99\u1F9B-\u1F9D\u1F9F\u1FA1\u1FA3-\u1FA5\u1FA7\u1FA9\u1FAB-\u1FAD\u1FAF-\u1FB1\u1FB4\u1FB8-\u1FB9\u1FBB\u1FC4\u1FC9\u1FCB\u1FCE\u1FD0-\u1FD1\u1FD3\u1FD8-\u1FD9\u1FDB\u1FDE\u1FE0-\u1FE1\u1FE3\u1FE5\u1FE8-\u1FE9\u1FEB-\u1FEC\u1FEE\u1FF4\u1FF9\u1FFB\u212A-\u212B\uE04D\uE064];NFD;Lower;Latin-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
#Indic-Latin transliterators
|
||||
Devanagari-Latin:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Latin;NFC
|
||||
Bengali-Latin:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Latin;NFC
|
||||
Gurmukhi-Latin:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Latin;NFC
|
||||
Gujarati-Latin:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Latin;NFC
|
||||
Oriya-Latin:alias:[\u0964-\u0965\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35\u0B36-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B70\u0B71];NFD;Oriya-InterIndic;InterIndic-Latin;NFC
|
||||
Tamil-Latin:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Latin;NFC
|
||||
Telugu-Latin:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Latin;NFC
|
||||
Kannada-Latin:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Latin;NFC
|
||||
Malayalam-Latin:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Latin;NFC
|
||||
|
||||
Devanagari-Bengali:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Bengali;NFC
|
||||
Devanagari-Gurmukhi:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Devanagari-Gujarati:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Gujarati;NFC
|
||||
Devanagari-Oriya:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Oriya;NFC
|
||||
Devanagari-Tamil:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Tamil;NFC
|
||||
Devanagari-Telugu:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Telugu;NFC
|
||||
Devanagari-Kannada:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Kannada;NFC
|
||||
Devanagari-Malayalam:alias:[\u0901-\u0903\u0904\u0905-\u0939\u093C-\u094D\u0950-\u0954\u0958-\u096F];NFD;Devanagari-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Bengali-Devanagari:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Devanagari;NFC
|
||||
Bengali-Gurmukhi:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Bengali-Gujarati:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Gujarati;NFC
|
||||
Bengali-Oriya:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Oriya;NFC
|
||||
Bengali-Tamil:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Tamil;NFC
|
||||
Bengali-Telugu:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Telugu;NFC
|
||||
Bengali-Kannada:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Kannada;NFC
|
||||
Bengali-Malayalam:alias:[\u0964-\u0965\u0981-\u0983\u0985-\u098C\u098F-\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7-\u09C8\u09CB-\u09CD\u09D7\u09DC-\u09DD\u09DF-\u09E3\u09E6-\u09FA];NFD;Bengali-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Gurmukhi-Devanagari:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Devanagari;NFC
|
||||
Gurmukhi-Bengali:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Bengali;NFC
|
||||
Gurmukhi-Gujarati:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Gujarati;NFC
|
||||
Gurmukhi-Oriya:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Oriya;NFC
|
||||
Gurmukhi-Tamil:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Tamil;NFC
|
||||
Gurmukhi-Telugu:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Telugu;NFC
|
||||
Gurmukhi-Kannada:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Kannada;NFC
|
||||
Gurmukhi-Malayalam:alias:[\u0964-\u0965\u0A01\u0A02\u0A05-\u0A0A\u0A0F-\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32-\u0A33\u0A35-\u0A36\u0A38-\u0A39\u0A3C\u0A3E-\u0A42\u0A47-\u0A48\u0A4B-\u0A4D\u0A59-\u0A5C\u0A5E\u0A66-\u0A74];NFD;Gurmukhi-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Gujarati-Devanagari:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Devanagari;NFC
|
||||
Gujarati-Bengali:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Bengali;NFC
|
||||
Gujarati-Gurmukhi:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Gujarati-Oriya:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Oriya;NFC
|
||||
Gujarati-Tamil:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Tamil;NFC
|
||||
Gujarati-Telugu:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Telugu;NFC
|
||||
Gujarati-Kannada:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Kannada;NFC
|
||||
Gujarati-Malayalam:alias:[\u0964-\u0965\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2-\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0\u0AE1\u0AE6-\u0AEF];NFD;Gujarati-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Oriya-Devanagari:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Devanagari;NFC
|
||||
Oriya-Bengali:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Bengali;NFC
|
||||
Oriya-Gurmukhi:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Oriya-Gujarati:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Gujarati;NFC
|
||||
Oriya-Tamil:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Tamil;NFC
|
||||
Oriya-Telugu:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Telugu;NFC
|
||||
Oriya-Kannada:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Kannada;NFC
|
||||
Oriya-Malayalam:alias:[\u0B01-\u0B03\u0B05-\u0B0C\u0B0F-\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32-\u0B33\u0B35-\u0B39\u0B3C-\u0B43\u0B47-\u0B48\u0B4B-\u0B4D\u0B56-\u0B57\u0B5C-\u0B5D\u0B5F-\u0B61\u0B66-\u0B71];NFD;Oriya-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Tamil-Devanagari:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Devanagari;NFC
|
||||
Tamil-Bengali:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Bengali;NFC
|
||||
Tamil-Gurmukhi:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Tamil-Gujarati:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Gujarati;NFC
|
||||
Tamil-Oriya:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Oriya;NFC
|
||||
Tamil-Telugu:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Telugu;NFC
|
||||
Tamil-Kannada:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Kannada;NFC
|
||||
Tamil-Malayalam:alias:[0\u0B82-\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99-\u0B9A\u0B9C\u0B9E-\u0B9F\u0BA3-\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB5\u0BB7-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0BE7-\u0BF2];NFD;Tamil-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Telugu-Devanagari:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Devanagari;NFC
|
||||
Telugu-Bengali:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Bengali;NFC
|
||||
Telugu-Gurmukhi:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Telugu-Gujarati:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Gujarati;NFC
|
||||
Telugu-Oriya:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Oriya;NFC
|
||||
Telugu-Tamil:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Tamil;NFC
|
||||
Telugu-Kannada:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Kannada;NFC
|
||||
Telugu-Malayalam:alias:[\u0C01-\u0C03\u0C05-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C33\u0C35-\u0C39\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55-\u0C56\u0C60-\u0C61\u0C66-\u0C6F];NFD;Telugu-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Kannada-Devanagari:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Devanagari;NFC
|
||||
Kannada-Bengali:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Bengali;NFC
|
||||
Kannada-Gurmukhi:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Kannada-Gujarati:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Gujarati;NFC
|
||||
Kannada-Oriya:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Oriya;NFC
|
||||
Kannada-Tamil:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Tamil;NFC
|
||||
Kannada-Telugu:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Telugu;NFC
|
||||
Kannada-Malayalam:alias:[\u0C82-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC\u0CBD\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5-\u0CD6\u0CDE\u0CE0-\u0CE1\u0CE6-\u0CEF];NFD;Kannada-InterIndic;InterIndic-Malayalam;NFC
|
||||
|
||||
Malayalam-Devanagari:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Devanagari;NFC
|
||||
Malayalam-Bengali:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Bengali;NFC
|
||||
Malayalam-Gurmukhi:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gurmukhi;NFC
|
||||
Malayalam-Gujarati:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Gujarati;NFC
|
||||
Malayalam-Oriya:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Oriya;NFC
|
||||
Malayalam-Tamil:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Tamil;NFC
|
||||
Malayalam-Telugu:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Telugu;NFC
|
||||
Malayalam-Kannada:alias:[\u0D02-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D28\u0D2A-\u0D39\u0D3E-\u0D43\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D60-\u0D61\u0D66-\u0D6F];NFD;Malayalam-InterIndic;InterIndic-Kannada;NFC
|
||||
|
||||
# eof
|
Loading…
Add table
Reference in a new issue