Updated to Mark Davis' new rules

X-SVN-Rev: 451
This commit is contained in:
Alan Liu 1999-12-21 23:02:36 +00:00
parent 6edcd7320e
commit 8623cf04e2

View file

@ -8,359 +8,304 @@ public class TransliterationRule$Latin$Cyrillic extends ListResourceBundle {
*/
public Object[][] getContents() {
return new Object[][] {
{ "Description",
"xxxxxxxxxxxx" },
{ "Rule", "" // Russian Letters
{ "Rule",
// Russian Letters
+ "YO=\u0401;"
+ "J=\u0408;"
+ "A=\u0410;"
+ "B=\u0411;"
+ "V=\u0412;"
+ "G=\u0413;"
+ "D=\u0414;"
+ "YE=\u0415;"
+ "ZH=\u0416;"
+ "Z=\u0417;"
+ "YI=\u0418;"
+ "Y=\u0419;"
+ "K=\u041A;"
+ "L=\u041B;"
+ "M=\u041C;"
+ "N=\u041D;"
+ "O=\u041E;"
+ "P=\u041F;"
+ "R=\u0420;"
+ "S=\u0421;"
+ "T=\u0422;"
+ "U=\u0423;"
+ "F=\u0424;"
+ "KH=\u0425;"
+ "TS=\u0426;"
+ "CH=\u0427;"
+ "SH=\u0428;"
+ "SHCH=\u0429;"
+ "HARD=\u042A;"
+ "I=\u042B;"
+ "SOFT=\u042C;"
+ "E=\u042D;"
+ "YU=\u042E;"
+ "YA=\u042F;"
// Lowercase
"cyA=\u0410;" +
"cyBe=\u0411;" +
"cyVe=\u0412;" +
"cyGe=\u0413;" +
"cyDe=\u0414;" +
"cyYe=\u0415;" +
"cyYo=\u0416;" +
"cyZhe=\u0417;" +
"cyZe=\u0418;" +
"cyYi=\u0419;" +
"cyY=\u0419;" +
"cyKe=\u041a;" +
"cyLe=\u041b;" +
"cyMe=\u041c;" +
"cyNe=\u041d;" +
"cyO=\u041e;" +
"cyPe=\u041f;" +
+ "a=\u0430;"
+ "b=\u0431;"
+ "v=\u0432;"
+ "g=\u0433;"
+ "d=\u0434;"
+ "ye=\u0435;"
+ "zh=\u0436;"
+ "z=\u0437;"
+ "yi=\u0438;"
+ "y=\u0439;"
+ "k=\u043a;"
+ "l=\u043b;"
+ "m=\u043c;"
+ "n=\u043d;"
+ "o=\u043e;"
+ "p=\u043f;"
+ "r=\u0440;"
+ "s=\u0441;"
+ "t=\u0442;"
+ "u=\u0443;"
+ "f=\u0444;"
+ "kh=\u0445;"
+ "ts=\u0446;"
+ "ch=\u0447;"
+ "sh=\u0448;"
+ "shch=\u0449;"
+ "hard=\u044a;"
+ "i=\u044b;"
+ "soft=\u044c;"
+ "e=\u044d;"
+ "yu=\u044e;"
+ "ya=\u044f;"
"cyRe=\u0420;" +
"cySe=\u0421;" +
"cyTe=\u0422;" +
"cyU=\u0423;" +
"cyFe=\u0424;" +
"cyKhe=\u0425;" +
"cyTse=\u0426;" +
"cyChe=\u0427;" +
"cyShe=\u0428;" +
"cyShche=\u0429;" +
"cyHard=\u042a;" +
"cyI=\u042b;" +
"cySoft=\u042c;" +
"cyE=\u042d;" +
"cyYu=\u042e;" +
"cyYa=\u042f;" +
+ "yo=\u0451;"
+ "j=\u0458;"
// variables
// some are duplicated so lowercasing works
+ "csoft=[eiyEIY];"
+ "CSOFT=[eiyEIY];"
+ "BECOMES_H=[{HARD}{hard}];"
+ "becomes_h=[{HARD}{hard}];"
+ "BECOMES_S=[{S}{s}];"
+ "becomes_s=[{S}{s}];"
+ "BECOMES_C=[{CH}{ch}];"
+ "becomes_c=[{CH}{ch}];"
+ "BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
+ "becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
+ "letter=[[:Lu:][:Ll:]];"
+ "lower=[[:Ll:]];"
+ "Agrave=\u00C0;"
+ "Egrave=\u00C8;"
+ "Igrave=\u00CC;"
+ "Ograve=\u00D2;"
+ "Ugrave=\u00D9;"
+ "Ydiaeresis=\u009F;" // Non-standard?
"cya=\u0430;" +
"cybe=\u0431;" +
"cyve=\u0432;" +
"cyge=\u0433;" +
"cyde=\u0434;" +
"cyye=\u0435;" +
"cyzhe=\u0436;" +
"cyze=\u0437;" +
"cyyi=\u0438;" +
"cyy=\u0439;" +
"cyke=\u043a;" +
"cyle=\u043b;" +
"cyme=\u043c;" +
"cyne=\u043d;" +
"cyo=\u043e;" +
"cype=\u043f;" +
+ "agrave=\u00E0;"
+ "egrave=\u00E8;"
+ "igrave=\u00EC;"
+ "ograve=\u00F2;"
+ "ugrave=\u00F9;"
+ "ydiaeresis=\u00FF;"
"cyre=\u0440;" +
"cyse=\u0441;" +
"cyte=\u0442;" +
"cyu=\u0443;" +
"cyfe=\u0444;" +
"cykhe=\u0445;" +
"cytse=\u0446;" +
"cyche=\u0447;" +
"cyshe=\u0448;" +
"cyshche=\u0449;" +
"cyhard=\u044a;" +
"cyi=\u044b;" +
"cysoft=\u044c;" +
"cye=\u044d;" +
"cyyu=\u044e;" +
"cyya=\u044f;" +
/*
Modified to combine display transliterator and typing transliterator.
The display mapping uses accents for the "soft" vowels.
It does not, although it could, use characters like \u009A instead of digraphs
like sh.
*/
// #############################################
// Special titlecase forms, not duplicated
// #############################################
+ "Ch>{CH};" + "Ch<{CH}[{lower};"
+ "Kh>{KH};" + "Kh<{KH}[{lower};"
+ "Shch>{SHCH};" + "Shch<{SHCH}[{lower};"
+ "Sh>{SH};" + "Sh<{SH}[{lower};"
+ "Ts>{TS};" + "Ts<{TS}[{lower};"
+ "Zh>{ZH};" + "Zh<{ZH}[{lower};"
+ "Yi>{YI};" //+ "Yi<{YI}[{lower};"
+ "Ye>{YE};" //+ "Ye<{YE}[{lower};"
+ "Yo>{YO};" //+ "Yo<{YO}[{lower};"
+ "Yu>{YU};" //+ "Yu<{YU}[{lower};"
+ "Ya>{YA};" //+ "Ya<{YA}[{lower};"
// #############################################
// Rules to Duplicate
// To get the lowercase versions, copy these and lowercase
// #############################################
"cyyo=\u0451;" +
// variant spellings in English
+ "SHTCH>{SHCH};"
+ "TCH>{CH};"
+ "TH>{Z};"
+ "Q>{K};"
+ "WH>{V};"
+ "W>{V};"
+ "X>{K}{S};" //+ "X<{K}{S};"
// Separate letters that would otherwise join
+ "SH''<{SH}[{BECOMES_C};"
+ "T''<{T}[{BECOMES_S};"
+ "K''<{K}[{BECOMES_H};"
+ "S''<{S}[{BECOMES_H};"
+ "T''<{T}[{BECOMES_H};"
+ "Z''<{Z}[{BECOMES_H};"
+ "Y''<{Y}[{BECOMES_VOWEL};"
// Main letters
"a=[aA];" +
"c=[cC];" +
"e=[eE];" +
"h=[hH];" +
"i=[iI];" +
"o=[oO];" +
"s=[sS];" +
"t=[tT];" +
"u=[uU];" +
"iey=[ieyIEY];" +
"lower=[:Lu:];" +
+ "A<>{A};"
+ "B<>{B};"
+ "CH<>{CH};"
+ "D<>{D};"
+ "E<>{E};"
+ "F<>{F};"
+ "G<>{G};"
+ "{Igrave}<>{YI};"
+ "I<>{I};"
+ "KH<>{KH};"
+ "K<>{K};"
+ "L<>{L};"
+ "M<>{M};"
+ "N<>{N};"
+ "O<>{O};"
+ "P<>{P};"
+ "R<>{R};"
+ "SHCH<>{SHCH};"
+ "SH<>{SH};"
+ "S<>{S};"
+ "TS<>{TS};"
+ "T<>{T};"
+ "U<>{U};"
+ "V<>{V};"
//AEOU + grave
+ "YE>{YE};" //+ "YE<{YE};"
+ "{Egrave}<>{YE};"
+ "YO>{YO};" //+ "YO<{YO};"
+ "{Ograve}<>{YO};"
+ "YU>{YU};" //+ "YU<{YU};"
+ "{Ugrave}<>{YU};"
+ "YA>{YA};" //+ "YA<{YA};"
+ "{Agrave}<>{YA};"
+ "Y<>{Y};"
+ "ZH<>{ZH};"
+ "Z<>{Z};"
// convert English to Russian
"Russian>\u041f\u0420\u0410\u0412\u0414\u0410\u00D1\u0020\u0411\u044d\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f\u002c\u0020\u043a\u044b\u0440\u0433\u044b\u0437\u002c\u0020\u041c\u043e\u043b\u0434\u043e\u0432\u044d\u043d\u044f\u0441\u043a\u044d\u002e;" +
+ "H<>{HARD};"
+ "{Ydiaeresis}<>{SOFT};"
// Non-russian
+ "J<>{J};"
//special equivs for ay, oy, ...
"Y{a}{i}>{cyYa}{cyY};" +
"Y{e}{i}>{cyYe}{cyY};" +
"Y{i}{i}>{cyYi}{cyY};" +
"Y{o}{i}>{cyYo}{cyY};" +
"Y{u}{i}>{cyYu}{cyY};" +
"A{i}>{cyA}{cyY};" +
"E{i}>{cyE}{cyY};" +
//skip II, since it is the soft sign
"O{i}>{cyO}{cyY};" +
"U{i}>{cyU}{cyY};" +
// variant spellings in English
+ "C[{csoft}>{S};"
+ "C>{K};"
"A>{cyA};" +
"B>{cyBe};" +
"C{h}>{cyChe};" +
"C[{iey}>{cySe};" +
"C>{cyKe};" +
"D>{cyDe};" +
"E>{cyE};" +
"F>{cyFe};" +
"G>{cyGe};" +
"H>{cyHard};" +
"I{i}>{cySoft};" +
"I>{cyI};" +
"J>{cyDe}{cyZhe};" +
"K{h}>{cyKhe};" +
"K>{cyKe};" +
"L>{cyLe};" +
"M>{cyMe};" +
"N>{cyNe};" +
"O>{cyO};" +
"P>{cyPe};" +
"Q{u}>{cyKe}{cyVe};" +
"R>{cyRe};" +
"S{h}{t}{c}{h}>{cyShche};" +
"S{h}{c}{h}>{cyShche};" +
"S{h}>{cyShe};" +
"S>{cySe};" +
"T{c}{h}>{cyChe};" +
"T{h}>{cyZe};" +
"T{s}>{cyTse};" +
"T>{cyTe};" +
"U>{cyU};" +
"V>{cyVe};" +
"W{h}>{cyVe};" +
"W>{cyVe};" +
"X>{cyKe}{cySe};" +
"Y{e}>{cyYe};" +
"Y{o}>{cyYo};" +
"Y{u}>{cyYu};" +
"Y{a}>{cyYa};" +
"Y{i}>{cyYi};" +
"Y>{cyY};" +
"Z{h}>{cyZhe};" +
"Z>{cyZe};" +
"X>{cyKe}{cySe};" +
// #############################################
// Duplicated Rules
// Copy and lowercase the above rules
// #############################################
// variant spellings in english
+ "shtch>{shch};"
+ "tch>{ch};"
+ "th>{z};"
+ "q>{k};"
+ "wh>{v};"
+ "w>{v};"
+ "x>{k}{s};" //+ "x<{k}{s};"
// separate letters that would otherwise join
+ "sh''<{sh}[{becomes_c};"
+ "t''<{t}[{becomes_s};"
+ "k''<{k}[{becomes_h};"
+ "s''<{s}[{becomes_h};"
+ "t''<{t}[{becomes_h};"
+ "z''<{z}[{becomes_h};"
+ "y''<{y}[{becomes_vowel};"
// main letters
//lower case: doesn''t solve join bug
"y{a}{i}>{cyya}{cyy};" +
"y{e}{i}>{cyye}{cyy};" +
"y{i}{i}>{cyyi}{cyy};" +
"y{o}{i}>{cyyo}{cyy};" +
"y{u}{i}>{cyyu}{cyy};" +
"a{i}>{cya}{cyy};" +
"e{i}>{cye}{cyy};" +
//skip ii, since it is the soft sign
"o{i}>{cyo}{cyy};" +
"u{i}>{cyu}{cyy};" +
+ "a<>{a};"
+ "b<>{b};"
+ "ch<>{ch};"
+ "d<>{d};"
+ "e<>{e};"
+ "f<>{f};"
+ "g<>{g};"
+ "{igrave}<>{yi};"
+ "i<>{i};"
+ "kh<>{kh};"
+ "k<>{k};"
+ "l<>{l};"
+ "m<>{m};"
+ "n<>{n};"
+ "o<>{o};"
+ "p<>{p};"
+ "r<>{r};"
+ "shch<>{shch};"
+ "sh<>{sh};"
+ "s<>{s};"
+ "ts<>{ts};"
+ "t<>{t};"
+ "u<>{u};"
+ "v<>{v};"
//aeou + grave
+ "ye>{ye};" //+ "ye<{ye};"
+ "{egrave}<>{ye};"
+ "yo>{yo};" //+ "yo<{yo};"
+ "{ograve}<>{yo};"
+ "yu>{yu};" //+ "yu<{yu};"
+ "{ugrave}<>{yu};"
+ "ya>{ya};" //+ "ya<{ya};"
+ "{agrave}<>{ya};"
+ "y<>{y};"
+ "zh<>{zh};"
+ "z<>{z};"
"a>{cya};" +
"b>{cybe};" +
"c{h}>{cyche};" +
"c[{iey}>{cyse};" +
"c>{cyke};" +
"d>{cyde};" +
"e>{cye};" +
"f>{cyfe};" +
"g>{cyge};" +
"h>{cyhard};" +
"i{i}>{cysoft};" +
"i>{cyi};" +
"j>{cyde}{cyzhe};" +
"k{h}>{cykhe};" +
"k>{cyke};" +
"l>{cyle};" +
"m>{cyme};" +
"n>{cyne};" +
"o>{cyo};" +
"p>{cype};" +
"q{u}>{cyke}{cyve};" +
"r>{cyre};" +
"s{h}{t}{c}{h}>{cyshche};" +
"s{h}{c}{h}>{cyshche};" +
"s{h}>{cyshe};" +
"s>{cyse};" +
"t{c}{h}>{cyche};" +
"t{h}>{cyze};" +
"t{s}>{cytse};" +
"t>{cyte};" +
"u>{cyu};" +
"v>{cyve};" +
"w{h}>{cyve};" +
"w>{cyve};" +
"x>{cyke}{cyse};" +
"y{e}>{cyye};" +
"y{o}>{cyyo};" +
"y{u}>{cyyu};" +
"y{a}>{cyya};" +
"y{i}>{cyyi};" +
"y>{cyy};" +
"z{h}>{cyzhe};" +
"z>{cyze};" +
"x>{cyke}{cyse};" +
+ "h<>{hard};"
+ "{ydiaeresis}<>{soft};"
// non-russian
+ "j<>{j};"
//generally the last rule
"''>;" +
// variant spellings in english
+ "c[{csoft}>{s};"
+ "c>{k};"
//now Russian to English
"Y''<{cyY}[{cyA};" +
"Y''<{cyY}[{cyE};" +
"Y''<{cyY}[{cyI};" +
"Y''<{cyY}[{cyO};" +
"Y''<{cyY}[{cyU};" +
"Y''<{cyY}[{cya};" +
"Y''<{cyY}[{cye};" +
"Y''<{cyY}[{cyi};" +
"Y''<{cyY}[{cyo};" +
"Y''<{cyY}[{cyu};" +
"A<{cyA};" +
"B<{cyBe};" +
"J<{cyDe}{cyZhe};" +
"J<{cyDe}{cyzhe};" +
"D<{cyDe};" +
"V<{cyVe};" +
"G<{cyGe};" +
"Zh<{cyZhe}[{lower};" +
"ZH<{cyZhe};" +
"Z''<{cyZe}[{cyHard};" +
"Z''<{cyZe}[{cyhard};" +
"Z<{cyZe};" +
"Ye<{cyYe}[{lower};" +
"YE<{cyYe};" +
"Yo<{cyYo}[{lower};" +
"YO<{cyYo};" +
"Yu<{cyYu}[{lower};" +
"YU<{cyYu};" +
"Ya<{cyYa}[{lower};" +
"YA<{cyYa};" +
"Yi<{cyYi}[{lower};" +
"YI<{cyYi};" +
"Y<{cyY};" +
"Kh<{cyKhe}[{lower};" +
"KH<{cyKhe};" +
"K''<{cyKe}[{cyHard};" +
"K''<{cyKe}[{cyhard};" +
"X<{cyKe}{cySe};" +
"X<{cyKe}{cyse};" +
"K<{cyKe};" +
"L<{cyLe};" +
"M<{cyMe};" +
"N<{cyNe};" +
"O<{cyO};" +
"P<{cyPe};" +
"R<{cyRe};" +
"Shch<{cyShche}[{lower};" +
"SHCH<{cyShche};" +
"Sh''<{cyShe}[{cyche};" +
"SH''<{cyShe}[{cyChe};" +
"Sh<{cyShe}[{lower};" +
"SH<{cyShe};" +
"S''<{cySe}[{cyHard};" +
"S''<{cySe}[{cyhard};" +
"S<{cySe};" +
"Ts<{cyTse}[{lower};" +
"TS<{cyTse};" +
"T''<{cyTe}[{cySe};" +
"T''<{cyTe}[{cyse};" +
"T''<{cyTe}[{cyHard};" +
"T''<{cyTe}[{cyhard};" +
"T<{cyTe};" +
"U<{cyU};" +
"F<{cyFe};" +
"Ch<{cyChe}[{lower};" +
"CH<{cyChe};" +
"H<{cyHard};" +
"I''<{cyI}[{cyI};" +
"I''<{cyI}[{cyi};" +
"I<{cyI};" +
"Ii<{cySoft}[{lower};" +
"II<{cySoft};" +
"E<{cyE};" +
//lowercase
"y''<{cyy}[{cya};" +
"y''<{cyy}[{cye};" +
"y''<{cyy}[{cyi};" +
"y''<{cyy}[{cyo};" +
"y''<{cyy}[{cyu};" +
"y''<{cyy}[{cyA};" +
"y''<{cyy}[{cyE};" +
"y''<{cyy}[{cyI};" +
"y''<{cyy}[{cyO};" +
"y''<{cyy}[{cyU};" +
"a<{cya};" +
"b<{cybe};" +
"j<{cyde}{cyzhe};" +
"j<{cyde}{cyZhe};" +
"d<{cyde};" +
"v<{cyve};" +
"g<{cyge};" +
"zh<{cyzhe};" +
"z''<{cyze}[{cyhard};" +
"z''<{cyze}[{cyHard};" +
"z<{cyze};" +
"ye<{cyye};" +
"yo<{cyyo};" +
"yu<{cyyu};" +
"ya<{cyya};" +
"yi<{cyyi};" +
"y<{cyy};" +
"kh<{cykhe};" +
"k''<{cyke}[{cyhard};" +
"k''<{cyke}[{cyHard};" +
"x<{cyke}{cyse};" +
"x<{cyke}{cySe};" +
"k<{cyke};" +
"l<{cyle};" +
"m<{cyme};" +
"n<{cyne};" +
"o<{cyo};" +
"p<{cype};" +
"r<{cyre};" +
"shch<{cyshche};" +
"sh''<{cyshe}[{cyche};" +
"sh''<{cyshe}[{cyChe};" +
"sh<{cyshe};" +
"s''<{cyse}[{cyhard};" +
"s''<{cyse}[{cyHard};" +
"s<{cyse};" +
"ts<{cytse};" +
"t''<{cyte}[{cyse};" +
"t''<{cyte}[{cySe};" +
"t''<{cyte}[{cyhard};" +
"t''<{cyte}[{cyHard};" +
"t<{cyte};" +
"u<{cyu};" +
"f<{cyfe};" +
"ch<{cyche};" +
"h<{cyhard};" +
"i''<{cyi}[{cyI};" +
"i''<{cyi}[{cyi};" +
"i<{cyi};" +
"ii<{cysoft};" +
"e<{cye};" +
//generally the last rule
"''>;"
//the end
// #############################################
// End of Duplicated Rules
// #############################################
//generally the last rule
+ "''>;"
//the end
}
};
}