mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 12:40:02 +00:00
ICU-199 new rule syntax; update rules
X-SVN-Rev: 562
This commit is contained in:
parent
031b4b6ca0
commit
2169d01e5e
11 changed files with 6283 additions and 7919 deletions
File diff suppressed because it is too large
Load diff
|
@ -10,119 +10,119 @@
|
|||
|
||||
kbdescl1 {
|
||||
Rule {
|
||||
"esc=''\n"
|
||||
"grave=`\n"
|
||||
"acute=''\n"
|
||||
"hat=^\n"
|
||||
"tilde=~\n"
|
||||
"umlaut=:\n"
|
||||
"ring=.\n"
|
||||
"cedilla=,\n"
|
||||
"slash=/\n"
|
||||
"super=^\n"
|
||||
"esc='';"
|
||||
"grave=`;"
|
||||
"acute='';"
|
||||
"hat=^;"
|
||||
"tilde=~;"
|
||||
"umlaut=:;"
|
||||
"ring=.;"
|
||||
"cedilla=,;"
|
||||
"slash=/;"
|
||||
"super=^;"
|
||||
|
||||
// Make keyboard entry of {esc} possible
|
||||
// and of backslash
|
||||
"'\\'{esc}>{esc}\n"
|
||||
"'\\\\'>'\\'\n"
|
||||
"'\\'{esc}>{esc};"
|
||||
"'\\\\'>'\\';"
|
||||
|
||||
// Long keys
|
||||
"cur{esc}>\u00A4\n"
|
||||
"sec{esc}>\u00A7\n"
|
||||
"not{esc}>\u00AC\n"
|
||||
"mul{esc}>\u00D7\n"
|
||||
"div{esc}>\u00F7\n"
|
||||
"cur{esc}>\u00A4;"
|
||||
"sec{esc}>\u00A7;"
|
||||
"not{esc}>\u00AC;"
|
||||
"mul{esc}>\u00D7;"
|
||||
"div{esc}>\u00F7;"
|
||||
|
||||
" {esc}>\u00A0\n" // non-breaking space
|
||||
"!{esc}>\u00A1\n" // inverted exclamation
|
||||
"c/{esc}>\u00A2\n" // cent sign
|
||||
"lb{esc}>\u00A3\n" // pound sign
|
||||
"'|'{esc}>\u00A6\n" // broken vertical bar
|
||||
":{esc}>\u00A8\n" // umlaut
|
||||
"{super}a{esc}>\u00AA\n" // feminine ordinal
|
||||
"'<<'{esc}>\u00AB\n"
|
||||
"r{esc}>\u00AE\n"
|
||||
"--{esc}>\u00AF\n"
|
||||
"-{esc}>\u00AD\n"
|
||||
"+-{esc}>\u00B1\n"
|
||||
"{super}2{esc}>\u00B2\n"
|
||||
"{super}3{esc}>\u00B3\n"
|
||||
"{acute}{esc}>\u00B4\n"
|
||||
"m{esc}>\u00B5\n"
|
||||
"para{esc}>\u00B6\n"
|
||||
"dot{esc}>\u00B7\n"
|
||||
"{cedilla}{esc}>\u00B8\n"
|
||||
"{super}1{esc}>\u00B9\n"
|
||||
"{super}o{esc}>\u00BA\n" // masculine ordinal
|
||||
"'>>'{esc}>\u00BB\n"
|
||||
"1/4{esc}>\u00BC\n"
|
||||
"1/2{esc}>\u00BD\n"
|
||||
"3/4{esc}>\u00BE\n"
|
||||
"?{esc}>\u00BF\n"
|
||||
"A{grave}{esc}>\u00C0\n"
|
||||
"A{acute}{esc}>\u00C1\n"
|
||||
"A{hat}{esc}>\u00C2\n"
|
||||
"A{tilde}{esc}>\u00C3\n"
|
||||
"A{umlaut}{esc}>\u00C4\n"
|
||||
"A{ring}{esc}>\u00C5\n"
|
||||
"AE{esc}>\u00C6\n"
|
||||
"C{cedilla}{esc}>\u00C7\n"
|
||||
"E{grave}{esc}>\u00C8\n"
|
||||
"E{acute}{esc}>\u00C9\n"
|
||||
"E{hat}{esc}>\u00CA\n"
|
||||
"E{umlaut}{esc}>\u00CB\n"
|
||||
"I{grave}{esc}>\u00CC\n"
|
||||
"I{acute}{esc}>\u00CD\n"
|
||||
"I{hat}{esc}>\u00CE\n"
|
||||
"I{umlaut}{esc}>\u00CF\n"
|
||||
"D-{esc}>\u00D0\n"
|
||||
"N{tilde}{esc}>\u00D1\n"
|
||||
"O{grave}{esc}>\u00D2\n"
|
||||
"O{acute}{esc}>\u00D3\n"
|
||||
"O{hat}{esc}>\u00D4\n"
|
||||
"O{tilde}{esc}>\u00D5\n"
|
||||
"O{umlaut}{esc}>\u00D6\n"
|
||||
"O{slash}{esc}>\u00D8\n"
|
||||
"U{grave}{esc}>\u00D9\n"
|
||||
"U{acute}{esc}>\u00DA\n"
|
||||
"U{hat}{esc}>\u00DB\n"
|
||||
"U{umlaut}{esc}>\u00DC\n"
|
||||
"Y{acute}{esc}>\u00DD\n"
|
||||
"TH{esc}>\u00DE\n"
|
||||
"ss{esc}>\u00DF\n"
|
||||
"a{grave}{esc}>\u00E0\n"
|
||||
"a{acute}{esc}>\u00E1\n"
|
||||
"a{hat}{esc}>\u00E2\n"
|
||||
"a{tilde}{esc}>\u00E3\n"
|
||||
"a{umlaut}{esc}>\u00E4\n"
|
||||
"a{ring}{esc}>\u00E5\n"
|
||||
"ae{esc}>\u00E6\n"
|
||||
"c{cedilla}{esc}>\u00E7\n"
|
||||
"c{esc}>\u00A9\n" // copyright - after c{cedilla}
|
||||
"e{grave}{esc}>\u00E8\n"
|
||||
"e{acute}{esc}>\u00E9\n"
|
||||
"e{hat}{esc}>\u00EA\n"
|
||||
"e{umlaut}{esc}>\u00EB\n"
|
||||
"i{grave}{esc}>\u00EC\n"
|
||||
"i{acute}{esc}>\u00ED\n"
|
||||
"i{hat}{esc}>\u00EE\n"
|
||||
"i{umlaut}{esc}>\u00EF\n"
|
||||
"d-{esc}>\u00F0\n"
|
||||
"n{tilde}{esc}>\u00F1\n"
|
||||
"o{grave}{esc}>\u00F2\n"
|
||||
"o{acute}{esc}>\u00F3\n"
|
||||
"o{hat}{esc}>\u00F4\n"
|
||||
"o{tilde}{esc}>\u00F5\n"
|
||||
"o{umlaut}{esc}>\u00F6\n"
|
||||
"o{slash}{esc}>\u00F8\n"
|
||||
"o{esc}>\u00B0\n"
|
||||
"u{grave}{esc}>\u00F9\n"
|
||||
"u{acute}{esc}>\u00FA\n"
|
||||
"u{hat}{esc}>\u00FB\n"
|
||||
"u{umlaut}{esc}>\u00FC\n"
|
||||
"y{acute}{esc}>\u00FD\n"
|
||||
"y{esc}>\u00A5\n" // yen sign
|
||||
"th{esc}>\u00FE\n"
|
||||
"ss{esc}>\u00FF\n"
|
||||
"\\ {esc}>\u00A0;" // non-breaking space
|
||||
"!{esc}>\u00A1;" // inverted exclamation
|
||||
"c/{esc}>\u00A2;" // cent sign
|
||||
"lb{esc}>\u00A3;" // pound sign
|
||||
"'|'{esc}>\u00A6;" // broken vertical bar
|
||||
":{esc}>\u00A8;" // umlaut
|
||||
"{super}a{esc}>\u00AA;" // feminine ordinal
|
||||
"'<<'{esc}>\u00AB;"
|
||||
"r{esc}>\u00AE;"
|
||||
"--{esc}>\u00AF;"
|
||||
"-{esc}>\u00AD;"
|
||||
"+-{esc}>\u00B1;"
|
||||
"{super}2{esc}>\u00B2;"
|
||||
"{super}3{esc}>\u00B3;"
|
||||
"{acute}{esc}>\u00B4;"
|
||||
"m{esc}>\u00B5;"
|
||||
"para{esc}>\u00B6;"
|
||||
"dot{esc}>\u00B7;"
|
||||
"{cedilla}{esc}>\u00B8;"
|
||||
"{super}1{esc}>\u00B9;"
|
||||
"{super}o{esc}>\u00BA;" // masculine ordinal
|
||||
"'>>'{esc}>\u00BB;"
|
||||
"1/4{esc}>\u00BC;"
|
||||
"1/2{esc}>\u00BD;"
|
||||
"3/4{esc}>\u00BE;"
|
||||
"?{esc}>\u00BF;"
|
||||
"A{grave}{esc}>\u00C0;"
|
||||
"A{acute}{esc}>\u00C1;"
|
||||
"A{hat}{esc}>\u00C2;"
|
||||
"A{tilde}{esc}>\u00C3;"
|
||||
"A{umlaut}{esc}>\u00C4;"
|
||||
"A{ring}{esc}>\u00C5;"
|
||||
"AE{esc}>\u00C6;"
|
||||
"C{cedilla}{esc}>\u00C7;"
|
||||
"E{grave}{esc}>\u00C8;"
|
||||
"E{acute}{esc}>\u00C9;"
|
||||
"E{hat}{esc}>\u00CA;"
|
||||
"E{umlaut}{esc}>\u00CB;"
|
||||
"I{grave}{esc}>\u00CC;"
|
||||
"I{acute}{esc}>\u00CD;"
|
||||
"I{hat}{esc}>\u00CE;"
|
||||
"I{umlaut}{esc}>\u00CF;"
|
||||
"D-{esc}>\u00D0;"
|
||||
"N{tilde}{esc}>\u00D1;"
|
||||
"O{grave}{esc}>\u00D2;"
|
||||
"O{acute}{esc}>\u00D3;"
|
||||
"O{hat}{esc}>\u00D4;"
|
||||
"O{tilde}{esc}>\u00D5;"
|
||||
"O{umlaut}{esc}>\u00D6;"
|
||||
"O{slash}{esc}>\u00D8;"
|
||||
"U{grave}{esc}>\u00D9;"
|
||||
"U{acute}{esc}>\u00DA;"
|
||||
"U{hat}{esc}>\u00DB;"
|
||||
"U{umlaut}{esc}>\u00DC;"
|
||||
"Y{acute}{esc}>\u00DD;"
|
||||
"TH{esc}>\u00DE;"
|
||||
"ss{esc}>\u00DF;"
|
||||
"a{grave}{esc}>\u00E0;"
|
||||
"a{acute}{esc}>\u00E1;"
|
||||
"a{hat}{esc}>\u00E2;"
|
||||
"a{tilde}{esc}>\u00E3;"
|
||||
"a{umlaut}{esc}>\u00E4;"
|
||||
"a{ring}{esc}>\u00E5;"
|
||||
"ae{esc}>\u00E6;"
|
||||
"c{cedilla}{esc}>\u00E7;"
|
||||
"c{esc}>\u00A9;" // copyright - after c{cedilla}
|
||||
"e{grave}{esc}>\u00E8;"
|
||||
"e{acute}{esc}>\u00E9;"
|
||||
"e{hat}{esc}>\u00EA;"
|
||||
"e{umlaut}{esc}>\u00EB;"
|
||||
"i{grave}{esc}>\u00EC;"
|
||||
"i{acute}{esc}>\u00ED;"
|
||||
"i{hat}{esc}>\u00EE;"
|
||||
"i{umlaut}{esc}>\u00EF;"
|
||||
"d-{esc}>\u00F0;"
|
||||
"n{tilde}{esc}>\u00F1;"
|
||||
"o{grave}{esc}>\u00F2;"
|
||||
"o{acute}{esc}>\u00F3;"
|
||||
"o{hat}{esc}>\u00F4;"
|
||||
"o{tilde}{esc}>\u00F5;"
|
||||
"o{umlaut}{esc}>\u00F6;"
|
||||
"o{slash}{esc}>\u00F8;"
|
||||
"o{esc}>\u00B0;"
|
||||
"u{grave}{esc}>\u00F9;"
|
||||
"u{acute}{esc}>\u00FA;"
|
||||
"u{hat}{esc}>\u00FB;"
|
||||
"u{umlaut}{esc}>\u00FC;"
|
||||
"y{acute}{esc}>\u00FD;"
|
||||
"y{esc}>\u00A5;" // yen sign
|
||||
"th{esc}>\u00FE;"
|
||||
//masked: "ss{esc}>\u00FF;"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,229 +12,229 @@ larabic {
|
|||
Rule {
|
||||
// To Do: finish adding shadda, add sokoon
|
||||
|
||||
"alefmadda=\u0622\n"
|
||||
"alefuhamza=\u0623\n"
|
||||
"wauuhamza=\u0624\n"
|
||||
"alefhamza=\u0625\n"
|
||||
"yehuhamza=\u0626\n"
|
||||
"alef=\u0627\n"
|
||||
"beh=\u0628\n"
|
||||
"tehmarbuta=\u0629\n"
|
||||
"teh=\u062A\n"
|
||||
"theh=\u062B\n"
|
||||
"geem=\u062C\n"
|
||||
"hah=\u062D\n"
|
||||
"kha=\u062E\n"
|
||||
"dal=\u062F\n"
|
||||
"dhal=\u0630\n"
|
||||
"reh=\u0631\n"
|
||||
"zain=\u0632\n"
|
||||
"seen=\u0633\n"
|
||||
"sheen=\u0634\n"
|
||||
"sad=\u0635\n"
|
||||
"dad=\u0636\n"
|
||||
"tah=\u0637\n"
|
||||
"zah=\u0638\n"
|
||||
"ein=\u0639\n"
|
||||
"ghein=\u063A\n"
|
||||
"feh=\u0641\n"
|
||||
"qaaf=\u0642\n"
|
||||
"kaf=\u0643\n"
|
||||
"lam=\u0644\n"
|
||||
"meem=\u0645\n"
|
||||
"noon=\u0646\n"
|
||||
"heh=\u0647\n"
|
||||
"wau=\u0648\n"
|
||||
"yehmaqsura=\u0649\n"
|
||||
"yeh=\u064A\n"
|
||||
"peh=\u06A4\n"
|
||||
"alefmadda=\u0622;"
|
||||
"alefuhamza=\u0623;"
|
||||
"wauuhamza=\u0624;"
|
||||
"alefhamza=\u0625;"
|
||||
"yehuhamza=\u0626;"
|
||||
"alef=\u0627;"
|
||||
"beh=\u0628;"
|
||||
"tehmarbuta=\u0629;"
|
||||
"teh=\u062A;"
|
||||
"theh=\u062B;"
|
||||
"geem=\u062C;"
|
||||
"hah=\u062D;"
|
||||
"kha=\u062E;"
|
||||
"dal=\u062F;"
|
||||
"dhal=\u0630;"
|
||||
"reh=\u0631;"
|
||||
"zain=\u0632;"
|
||||
"seen=\u0633;"
|
||||
"sheen=\u0634;"
|
||||
"sad=\u0635;"
|
||||
"dad=\u0636;"
|
||||
"tah=\u0637;"
|
||||
"zah=\u0638;"
|
||||
"ein=\u0639;"
|
||||
"ghein=\u063A;"
|
||||
"feh=\u0641;"
|
||||
"qaaf=\u0642;"
|
||||
"kaf=\u0643;"
|
||||
"lam=\u0644;"
|
||||
"meem=\u0645;"
|
||||
"noon=\u0646;"
|
||||
"heh=\u0647;"
|
||||
"wau=\u0648;"
|
||||
"yehmaqsura=\u0649;"
|
||||
"yeh=\u064A;"
|
||||
"peh=\u06A4;"
|
||||
|
||||
"hamza=\u0621\n"
|
||||
"fathatein=\u064B\n"
|
||||
"dammatein=\u064C\n"
|
||||
"kasratein=\u064D\n"
|
||||
"fatha=\u064E\n"
|
||||
"damma=\u064F\n"
|
||||
"kasra=\u0650\n"
|
||||
"shadda=\u0651\n"
|
||||
"sokoon=\u0652\n"
|
||||
"hamza=\u0621;"
|
||||
"fathatein=\u064B;"
|
||||
"dammatein=\u064C;"
|
||||
"kasratein=\u064D;"
|
||||
"fatha=\u064E;"
|
||||
"damma=\u064F;"
|
||||
"kasra=\u0650;"
|
||||
"shadda=\u0651;"
|
||||
"sokoon=\u0652;"
|
||||
|
||||
// convert English to Arabic
|
||||
"Arabic>"
|
||||
"\u062a\u062a\u0645\u062a\u0639\u0020"
|
||||
"\u0627\u0644\u0644\u063a\u0629\u0020"
|
||||
"\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"
|
||||
"\u0628\u0628\u0646\u0638\u0645\u0020"
|
||||
"\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"
|
||||
"\u062c\u0645\u064a\u0644\u0629\n"
|
||||
"\u062a\u062a\u0645\u062a\u0639' '"
|
||||
"\u0627\u0644\u0644\u063a\u0629' '"
|
||||
"\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629' '"
|
||||
"\u0628\u0628\u0646\u0638\u0645' '"
|
||||
"\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"
|
||||
"\u062c\u0645\u064a\u0644\u0629;"
|
||||
|
||||
"ai>{alefmadda}\n"
|
||||
"ae>{alefuhamza}\n"
|
||||
"ao>{alefhamza}\n"
|
||||
"aa>{alef}\n"
|
||||
"an>{fathatein}\n"
|
||||
"a>{fatha}\n"
|
||||
"b>{beh}\n"
|
||||
"c>{kaf}\n"
|
||||
"{dhal}]dh>{shadda}\n"
|
||||
"dh>{dhal}\n"
|
||||
"{dad}]dd>{shadda}\n"
|
||||
"dd>{dad}\n"
|
||||
"{dal}]d>{shadda}\n"
|
||||
"d>{dal}\n"
|
||||
"e>{ein}\n"
|
||||
"f>{feh}\n"
|
||||
"gh>{ghein}\n"
|
||||
"g>{geem}\n"
|
||||
"hh>{hah}\n"
|
||||
"h>{heh}\n"
|
||||
"ii>{kasratein}\n"
|
||||
"i>{kasra}\n"
|
||||
"j>{geem}\n"
|
||||
"kh>{kha}\n"
|
||||
"k>{kaf}\n"
|
||||
"l>{lam}\n"
|
||||
"m>{meem}\n"
|
||||
"n>{noon}\n"
|
||||
"o>{hamza}\n"
|
||||
"p>{peh}\n"
|
||||
"q>{qaaf}\n"
|
||||
"r>{reh}\n"
|
||||
"sh>{sheen}\n"
|
||||
"ss>{sad}\n"
|
||||
"s>{seen}\n"
|
||||
"th>{theh}\n"
|
||||
"tm>{tehmarbuta}\n"
|
||||
"tt>{tah}\n"
|
||||
"t>{teh}\n"
|
||||
"uu>{dammatein}\n"
|
||||
"u>{damma}\n"
|
||||
"v>{beh}\n"
|
||||
"we>{wauuhamza}\n"
|
||||
"w>{wau}\n"
|
||||
"x>{kaf}{shadda}{seen}\n"
|
||||
"ye>{yehuhamza}\n"
|
||||
"ym>{yehmaqsura}\n"
|
||||
"y>{yeh}\n"
|
||||
"zz>{zah}\n"
|
||||
"z>{zain}\n"
|
||||
"ai>{alefmadda};"
|
||||
"ae>{alefuhamza};"
|
||||
"ao>{alefhamza};"
|
||||
"aa>{alef};"
|
||||
"an>{fathatein};"
|
||||
"a>{fatha};"
|
||||
"b>{beh};"
|
||||
"c>{kaf};"
|
||||
"{dhal})dh>{shadda};"
|
||||
"dh>{dhal};"
|
||||
"{dad})dd>{shadda};"
|
||||
"dd>{dad};"
|
||||
"{dal})d>{shadda};"
|
||||
"d>{dal};"
|
||||
"e>{ein};"
|
||||
"f>{feh};"
|
||||
"gh>{ghein};"
|
||||
"g>{geem};"
|
||||
"hh>{hah};"
|
||||
"h>{heh};"
|
||||
"ii>{kasratein};"
|
||||
"i>{kasra};"
|
||||
"j>{geem};"
|
||||
"kh>{kha};"
|
||||
"k>{kaf};"
|
||||
"l>{lam};"
|
||||
"m>{meem};"
|
||||
"n>{noon};"
|
||||
"o>{hamza};"
|
||||
"p>{peh};"
|
||||
"q>{qaaf};"
|
||||
"r>{reh};"
|
||||
"sh>{sheen};"
|
||||
"ss>{sad};"
|
||||
"s>{seen};"
|
||||
"th>{theh};"
|
||||
"tm>{tehmarbuta};"
|
||||
"tt>{tah};"
|
||||
"t>{teh};"
|
||||
"uu>{dammatein};"
|
||||
"u>{damma};"
|
||||
"v>{beh};"
|
||||
"we>{wauuhamza};"
|
||||
"w>{wau};"
|
||||
"x>{kaf}{shadda}{seen};"
|
||||
"ye>{yehuhamza};"
|
||||
"ym>{yehmaqsura};"
|
||||
"y>{yeh};"
|
||||
"zz>{zah};"
|
||||
"z>{zain};"
|
||||
|
||||
"0>\u0660\n"+ // Arabic digit 0
|
||||
"1>\u0661\n"+ // Arabic digit 1
|
||||
"2>\u0662\n"+ // Arabic digit 2
|
||||
"3>\u0663\n"+ // Arabic digit 3
|
||||
"4>\u0664\n"+ // Arabic digit 4
|
||||
"5>\u0665\n"+ // Arabic digit 5
|
||||
"6>\u0666\n"+ // Arabic digit 6
|
||||
"7>\u0667\n"+ // Arabic digit 7
|
||||
"8>\u0668\n"+ // Arabic digit 8
|
||||
"9>\u0669\n"+ // Arabic digit 9
|
||||
"%>\u066A\n"+ // Arabic %
|
||||
".>\u066B\n"+ // Arabic decimal separator
|
||||
",>\u066C\n"+ // Arabic thousands separator
|
||||
"*>\u066D\n"+ // Arabic five-pointed star
|
||||
"0>\u0660;"+ // Arabic digit 0
|
||||
"1>\u0661;"+ // Arabic digit 1
|
||||
"2>\u0662;"+ // Arabic digit 2
|
||||
"3>\u0663;"+ // Arabic digit 3
|
||||
"4>\u0664;"+ // Arabic digit 4
|
||||
"5>\u0665;"+ // Arabic digit 5
|
||||
"6>\u0666;"+ // Arabic digit 6
|
||||
"7>\u0667;"+ // Arabic digit 7
|
||||
"8>\u0668;"+ // Arabic digit 8
|
||||
"9>\u0669;"+ // Arabic digit 9
|
||||
"%>\u066A;"+ // Arabic %
|
||||
".>\u066B;"+ // Arabic decimal separator
|
||||
",>\u066C;"+ // Arabic thousands separator
|
||||
"*>\u066D;"+ // Arabic five-pointed star
|
||||
|
||||
"`0>0\n"+ // Escaped forms of the above
|
||||
"`1>1\n"
|
||||
"`2>2\n"
|
||||
"`3>3\n"
|
||||
"`4>4\n"
|
||||
"`5>5\n"
|
||||
"`6>6\n"
|
||||
"`7>7\n"
|
||||
"`8>8\n"
|
||||
"`9>9\n"
|
||||
"`%>%\n"
|
||||
"`.>.\n"
|
||||
"`,>,\n"
|
||||
"`*>*\n"
|
||||
"``>`\n"
|
||||
"`0>0;"+ // Escaped forms of the above
|
||||
"`1>1;"
|
||||
"`2>2;"
|
||||
"`3>3;"
|
||||
"`4>4;"
|
||||
"`5>5;"
|
||||
"`6>6;"
|
||||
"`7>7;"
|
||||
"`8>8;"
|
||||
"`9>9;"
|
||||
"`%>%;"
|
||||
"`.>.;"
|
||||
"`,>,;"
|
||||
"`*>*;"
|
||||
"``>`;"
|
||||
|
||||
"''>\n"
|
||||
"''>;"
|
||||
|
||||
// now Arabic to English
|
||||
|
||||
"''ai<a]{alefmadda}\n"
|
||||
"ai<{alefmadda}\n"
|
||||
"''ae<a]{alefuhamza}\n"
|
||||
"ae<{alefuhamza}\n"
|
||||
"''ao<a]{alefhamza}\n"
|
||||
"ao<{alefhamza}\n"
|
||||
"''aa<a]{alef}\n"
|
||||
"aa<{alef}\n"
|
||||
"''an<a]{fathatein}\n"
|
||||
"an<{fathatein}\n"
|
||||
"''a<a]{fatha}\n"
|
||||
"a<{fatha}\n"
|
||||
"b<{beh}\n"
|
||||
"''dh<d]{dhal}\n"
|
||||
"dh<{dhal}\n"
|
||||
"''dd<d]{dad}\n"
|
||||
"dd<{dad}\n"
|
||||
"''d<d]{dal}\n"
|
||||
"d<{dal}\n"
|
||||
"''e<a]{ein}\n"
|
||||
"''e<w]{ein}\n"
|
||||
"''e<y]{ein}\n"
|
||||
"e<{ein}\n"
|
||||
"f<{feh}\n"
|
||||
"gh<{ghein}\n"
|
||||
"''hh<d]{hah}\n"
|
||||
"''hh<t]{hah}\n"
|
||||
"''hh<k]{hah}\n"
|
||||
"''hh<s]{hah}\n"
|
||||
"hh<{hah}\n"
|
||||
"''h<d]{heh}\n"
|
||||
"''h<t]{heh}\n"
|
||||
"''h<k]{heh}\n"
|
||||
"''h<s]{heh}\n"
|
||||
"h<{heh}\n"
|
||||
"''ii<i]{kasratein}\n"
|
||||
"ii<{kasratein}\n"
|
||||
"''i<i]{kasra}\n"
|
||||
"i<{kasra}\n"
|
||||
"j<{geem}\n"
|
||||
"kh<{kha}\n"
|
||||
"x<{kaf}{shadda}{seen}\n"
|
||||
"k<{kaf}\n"
|
||||
"l<{lam}\n"
|
||||
"''m<y]{meem}\n"
|
||||
"''m<t]{meem}\n"
|
||||
"m<{meem}\n"
|
||||
"n<{noon}\n"
|
||||
"''o<a]{hamza}\n"
|
||||
"o<{hamza}\n"
|
||||
"p<{peh}\n"
|
||||
"q<{qaaf}\n"
|
||||
"r<{reh}\n"
|
||||
"sh<{sheen}\n"
|
||||
"''ss<s]{sad}\n"
|
||||
"ss<{sad}\n"
|
||||
"''s<s]{seen}\n"
|
||||
"s<{seen}\n"
|
||||
"th<{theh}\n"
|
||||
"tm<{tehmarbuta}\n"
|
||||
"''tt<t]{tah}\n"
|
||||
"tt<{tah}\n"
|
||||
"''t<t]{teh}\n"
|
||||
"t<{teh}\n"
|
||||
"''uu<u]{dammatein}\n"
|
||||
"uu<{dammatein}\n"
|
||||
"''u<u]{damma}\n"
|
||||
"u<{damma}\n"
|
||||
"we<{wauuhamza}\n"
|
||||
"w<{wau}\n"
|
||||
"ye<{yehuhamza}\n"
|
||||
"ym<{yehmaqsura}\n"
|
||||
"''y<y]{yeh}\n"
|
||||
"y<{yeh}\n"
|
||||
"''zz<z]{zah}\n"
|
||||
"zz<{zah}\n"
|
||||
"''z<z]{zain}\n"
|
||||
"z<{zain}\n"
|
||||
"''ai<a){alefmadda};"
|
||||
"ai<{alefmadda};"
|
||||
"''ae<a){alefuhamza};"
|
||||
"ae<{alefuhamza};"
|
||||
"''ao<a){alefhamza};"
|
||||
"ao<{alefhamza};"
|
||||
"''aa<a){alef};"
|
||||
"aa<{alef};"
|
||||
"''an<a){fathatein};"
|
||||
"an<{fathatein};"
|
||||
"''a<a){fatha};"
|
||||
"a<{fatha};"
|
||||
"b<{beh};"
|
||||
"''dh<d){dhal};"
|
||||
"dh<{dhal};"
|
||||
"''dd<d){dad};"
|
||||
"dd<{dad};"
|
||||
"''d<d){dal};"
|
||||
"d<{dal};"
|
||||
"''e<a){ein};"
|
||||
"''e<w){ein};"
|
||||
"''e<y){ein};"
|
||||
"e<{ein};"
|
||||
"f<{feh};"
|
||||
"gh<{ghein};"
|
||||
"''hh<d){hah};"
|
||||
"''hh<t){hah};"
|
||||
"''hh<k){hah};"
|
||||
"''hh<s){hah};"
|
||||
"hh<{hah};"
|
||||
"''h<d){heh};"
|
||||
"''h<t){heh};"
|
||||
"''h<k){heh};"
|
||||
"''h<s){heh};"
|
||||
"h<{heh};"
|
||||
"''ii<i){kasratein};"
|
||||
"ii<{kasratein};"
|
||||
"''i<i){kasra};"
|
||||
"i<{kasra};"
|
||||
"j<{geem};"
|
||||
"kh<{kha};"
|
||||
"x<{kaf}{shadda}{seen};"
|
||||
"k<{kaf};"
|
||||
"l<{lam};"
|
||||
"''m<y){meem};"
|
||||
"''m<t){meem};"
|
||||
"m<{meem};"
|
||||
"n<{noon};"
|
||||
"''o<a){hamza};"
|
||||
"o<{hamza};"
|
||||
"p<{peh};"
|
||||
"q<{qaaf};"
|
||||
"r<{reh};"
|
||||
"sh<{sheen};"
|
||||
"''ss<s){sad};"
|
||||
"ss<{sad};"
|
||||
"''s<s){seen};"
|
||||
"s<{seen};"
|
||||
"th<{theh};"
|
||||
"tm<{tehmarbuta};"
|
||||
"''tt<t){tah};"
|
||||
"tt<{tah};"
|
||||
"''t<t){teh};"
|
||||
"t<{teh};"
|
||||
"''uu<u){dammatein};"
|
||||
"uu<{dammatein};"
|
||||
"''u<u){damma};"
|
||||
"u<{damma};"
|
||||
"we<{wauuhamza};"
|
||||
"w<{wau};"
|
||||
"ye<{yehuhamza};"
|
||||
"ym<{yehmaqsura};"
|
||||
"''y<y){yeh};"
|
||||
"y<{yeh};"
|
||||
"''zz<z){zah};"
|
||||
"zz<{zah};"
|
||||
"''z<z){zain};"
|
||||
"z<{zain};"
|
||||
|
||||
"dh<dh]{shadda}\n"
|
||||
"dd<dd]{shadda}\n"
|
||||
"''d<d]{shadda}\n"
|
||||
"dh<dh){shadda};"
|
||||
"dd<dd){shadda};"
|
||||
"''d<d){shadda};"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,94 +43,94 @@ ldevan {
|
|||
//#####################################################################
|
||||
|
||||
//consonants
|
||||
"candrabindu=\u0901\n"
|
||||
"bindu=\u0902\n"
|
||||
"visarga=\u0903\n"
|
||||
"candrabindu=\u0901;"
|
||||
"bindu=\u0902;"
|
||||
"visarga=\u0903;"
|
||||
|
||||
// w<vowel> represents the stand-alone form
|
||||
"wa=\u0905\n"
|
||||
"waa=\u0906\n"
|
||||
"wi=\u0907\n"
|
||||
"wii=\u0908\n"
|
||||
"wu=\u0909\n"
|
||||
"wuu=\u090A\n"
|
||||
"wr=\u090B\n"
|
||||
"wl=\u090C\n"
|
||||
"we=\u090F\n"
|
||||
"wai=\u0910\n"
|
||||
"wo=\u0913\n"
|
||||
"wau=\u0914\n"
|
||||
"wa=\u0905;"
|
||||
"waa=\u0906;"
|
||||
"wi=\u0907;"
|
||||
"wii=\u0908;"
|
||||
"wu=\u0909;"
|
||||
"wuu=\u090A;"
|
||||
"wr=\u090B;"
|
||||
"wl=\u090C;"
|
||||
"we=\u090F;"
|
||||
"wai=\u0910;"
|
||||
"wo=\u0913;"
|
||||
"wau=\u0914;"
|
||||
|
||||
"ka=\u0915\n"
|
||||
"kha=\u0916\n"
|
||||
"ga=\u0917\n"
|
||||
"gha=\u0918\n"
|
||||
"nga=\u0919\n"
|
||||
"ka=\u0915;"
|
||||
"kha=\u0916;"
|
||||
"ga=\u0917;"
|
||||
"gha=\u0918;"
|
||||
"nga=\u0919;"
|
||||
|
||||
"ca=\u091A\n"
|
||||
"cha=\u091B\n"
|
||||
"ja=\u091C\n"
|
||||
"jha=\u091D\n"
|
||||
"nya=\u091E\n"
|
||||
"ca=\u091A;"
|
||||
"cha=\u091B;"
|
||||
"ja=\u091C;"
|
||||
"jha=\u091D;"
|
||||
"nya=\u091E;"
|
||||
|
||||
"tta=\u091F\n"
|
||||
"ttha=\u0920\n"
|
||||
"dda=\u0921\n"
|
||||
"ddha=\u0922\n"
|
||||
"nna=\u0923\n"
|
||||
"tta=\u091F;"
|
||||
"ttha=\u0920;"
|
||||
"dda=\u0921;"
|
||||
"ddha=\u0922;"
|
||||
"nna=\u0923;"
|
||||
|
||||
"ta=\u0924\n"
|
||||
"tha=\u0925\n"
|
||||
"da=\u0926\n"
|
||||
"dha=\u0927\n"
|
||||
"na=\u0928\n"
|
||||
"ta=\u0924;"
|
||||
"tha=\u0925;"
|
||||
"da=\u0926;"
|
||||
"dha=\u0927;"
|
||||
"na=\u0928;"
|
||||
|
||||
"pa=\u092A\n"
|
||||
"pha=\u092B\n"
|
||||
"ba=\u092C\n"
|
||||
"bha=\u092D\n"
|
||||
"ma=\u092E\n"
|
||||
"pa=\u092A;"
|
||||
"pha=\u092B;"
|
||||
"ba=\u092C;"
|
||||
"bha=\u092D;"
|
||||
"ma=\u092E;"
|
||||
|
||||
"ya=\u092F\n"
|
||||
"ra=\u0930\n"
|
||||
"rra=\u0931\n"
|
||||
"la=\u0933\n"
|
||||
"va=\u0935\n"
|
||||
"ya=\u092F;"
|
||||
"ra=\u0930;"
|
||||
"rra=\u0931;"
|
||||
"la=\u0933;"
|
||||
"va=\u0935;"
|
||||
|
||||
"sha=\u0936\n"
|
||||
"ssa=\u0937\n"
|
||||
"sa=\u0938\n"
|
||||
"ha=\u0939\n"
|
||||
"sha=\u0936;"
|
||||
"ssa=\u0937;"
|
||||
"sa=\u0938;"
|
||||
"ha=\u0939;"
|
||||
|
||||
// <vowel> represents the dependent form
|
||||
"aa=\u093E\n"
|
||||
"i=\u093F\n"
|
||||
"ii=\u0940\n"
|
||||
"u=\u0941\n"
|
||||
"uu=\u0942\n"
|
||||
"rh=\u0943\n"
|
||||
"lh=\u0944\n"
|
||||
"e=\u0947\n"
|
||||
"ai=\u0948\n"
|
||||
"o=\u094B\n"
|
||||
"au=\u094C\n"
|
||||
"aa=\u093E;"
|
||||
"i=\u093F;"
|
||||
"ii=\u0940;"
|
||||
"u=\u0941;"
|
||||
"uu=\u0942;"
|
||||
"rh=\u0943;"
|
||||
"lh=\u0944;"
|
||||
"e=\u0947;"
|
||||
"ai=\u0948;"
|
||||
"o=\u094B;"
|
||||
"au=\u094C;"
|
||||
|
||||
"virama=\u094D\n"
|
||||
"virama=\u094D;"
|
||||
|
||||
"wrr=\u0960\n"
|
||||
"rrh=\u0962\n"
|
||||
"wrr=\u0960;"
|
||||
"rrh=\u0962;"
|
||||
|
||||
"danda=\u0964\n"
|
||||
"doubleDanda=\u0965\n"
|
||||
"depVowelAbove=[\u093E-\u0940\u0945-\u094C]\n"
|
||||
"depVowelBelow=[\u0941-\u0944]\n"
|
||||
"danda=\u0964;"
|
||||
"doubleDanda=\u0965;"
|
||||
"depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
|
||||
"depVowelBelow=[\u0941-\u0944];"
|
||||
// Ech: Double escape U+0000, so UnicodeString doesn't consider it
|
||||
// to be the end of the string. This is only necessary for U+0000
|
||||
// right now. [liu]
|
||||
"endThing=[{danda}{doubleDanda}\\u0000-\u08FF\u0980-\uFFFF]\n"
|
||||
"endThing=[{danda}{doubleDanda}\\u0000-\u08FF\u0980-\uFFFF];"
|
||||
|
||||
"&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}]\n"
|
||||
"%=[bcdfghjklmnpqrstvwxyz]\n"
|
||||
"&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
|
||||
"%=[bcdfghjklmnpqrstvwxyz];"
|
||||
|
||||
//#####################################################################
|
||||
// convert from Latin letters to Native letters
|
||||
|
@ -139,110 +139,110 @@ ldevan {
|
|||
|
||||
// special forms with no good conversion
|
||||
|
||||
"mm>{bindu}\n"
|
||||
"x>{visarga}\n"
|
||||
"mm>{bindu};"
|
||||
"x>{visarga};"
|
||||
|
||||
// convert to independent forms at start of word or syllable:
|
||||
// e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
|
||||
// Moved up [LIU]
|
||||
|
||||
"aa>{waa}\n"
|
||||
"ai>{wai}\n"
|
||||
"au>{wau}\n"
|
||||
"ii>{wii}\n"
|
||||
"i>{wi}\n"
|
||||
"uu>{wuu}\n"
|
||||
"u>{wu}\n"
|
||||
"rrh>{wrr}\n"
|
||||
"rh>{wr}\n"
|
||||
"lh>{wl}\n"
|
||||
"e>{we}\n"
|
||||
"o>{wo}\n"
|
||||
"a>{wa}\n"
|
||||
"aa>{waa};"
|
||||
"ai>{wai};"
|
||||
"au>{wau};"
|
||||
"ii>{wii};"
|
||||
"i>{wi};"
|
||||
"uu>{wuu};"
|
||||
"u>{wu};"
|
||||
"rrh>{wrr};"
|
||||
"rh>{wr};"
|
||||
"lh>{wl};"
|
||||
"e>{we};"
|
||||
"o>{wo};"
|
||||
"a>{wa};"
|
||||
|
||||
// normal consonants
|
||||
|
||||
"kh>{kha}|{virama}\n"
|
||||
"k>{ka}|{virama}\n"
|
||||
"q>{ka}|{virama}\n"
|
||||
"gh>{gha}|{virama}\n"
|
||||
"g>{ga}|{virama}\n"
|
||||
"ng>{nga}|{virama}\n"
|
||||
"ch>{cha}|{virama}\n"
|
||||
"c>{ca}|{virama}\n"
|
||||
"jh>{jha}|{virama}\n"
|
||||
"j>{ja}|{virama}\n"
|
||||
"ny>{nya}|{virama}\n"
|
||||
"tth>{ttha}|{virama}\n"
|
||||
"tt>{tta}|{virama}\n"
|
||||
"ddh>{ddha}|{virama}\n"
|
||||
"dd>{dda}|{virama}\n"
|
||||
"nn>{nna}|{virama}\n"
|
||||
"th>{tha}|{virama}\n"
|
||||
"t>{ta}|{virama}\n"
|
||||
"dh>{dha}|{virama}\n"
|
||||
"d>{da}|{virama}\n"
|
||||
"n>{na}|{virama}\n"
|
||||
"ph>{pha}|{virama}\n"
|
||||
"p>{pa}|{virama}\n"
|
||||
"bh>{bha}|{virama}\n"
|
||||
"b>{ba}|{virama}\n"
|
||||
"m>{ma}|{virama}\n"
|
||||
"y>{ya}|{virama}\n"
|
||||
"r>{ra}|{virama}\n"
|
||||
"l>{la}|{virama}\n"
|
||||
"v>{va}|{virama}\n"
|
||||
"f>{va}|{virama}\n"
|
||||
"w>{va}|{virama}\n"
|
||||
"sh>{sha}|{virama}\n"
|
||||
"ss>{ssa}|{virama}\n"
|
||||
"s>{sa}|{virama}\n"
|
||||
"z>{sa}|{virama}\n"
|
||||
"h>{ha}|{virama}\n"
|
||||
"kh>{kha}|{virama};"
|
||||
"k>{ka}|{virama};"
|
||||
"q>{ka}|{virama};"
|
||||
"gh>{gha}|{virama};"
|
||||
"g>{ga}|{virama};"
|
||||
"ng>{nga}|{virama};"
|
||||
"ch>{cha}|{virama};"
|
||||
"c>{ca}|{virama};"
|
||||
"jh>{jha}|{virama};"
|
||||
"j>{ja}|{virama};"
|
||||
"ny>{nya}|{virama};"
|
||||
"tth>{ttha}|{virama};"
|
||||
"tt>{tta}|{virama};"
|
||||
"ddh>{ddha}|{virama};"
|
||||
"dd>{dda}|{virama};"
|
||||
"nn>{nna}|{virama};"
|
||||
"th>{tha}|{virama};"
|
||||
"t>{ta}|{virama};"
|
||||
"dh>{dha}|{virama};"
|
||||
"d>{da}|{virama};"
|
||||
"n>{na}|{virama};"
|
||||
"ph>{pha}|{virama};"
|
||||
"p>{pa}|{virama};"
|
||||
"bh>{bha}|{virama};"
|
||||
"b>{ba}|{virama};"
|
||||
"m>{ma}|{virama};"
|
||||
"y>{ya}|{virama};"
|
||||
"r>{ra}|{virama};"
|
||||
"l>{la}|{virama};"
|
||||
"v>{va}|{virama};"
|
||||
"f>{va}|{virama};"
|
||||
"w>{va}|{virama};"
|
||||
"sh>{sha}|{virama};"
|
||||
"ss>{ssa}|{virama};"
|
||||
"s>{sa}|{virama};"
|
||||
"z>{sa}|{virama};"
|
||||
"h>{ha}|{virama};"
|
||||
|
||||
".>{danda}\n"
|
||||
"{danda}.>{doubleDanda}\n"
|
||||
"{depVowelAbove}]~>{bindu}\n"
|
||||
"{depVowelBelow}]~>{candrabindu}\n"
|
||||
".>{danda};"
|
||||
"{danda}.>{doubleDanda};"
|
||||
"{depVowelAbove})~>{bindu};"
|
||||
"{depVowelBelow})~>{candrabindu};"
|
||||
|
||||
// convert to dependent forms after consonant with no vowel:
|
||||
// e.g. kai -> {ka}{virama}ai -> {ka}{ai}
|
||||
|
||||
"{virama}aa>{aa}\n"
|
||||
"{virama}ai>{ai}\n"
|
||||
"{virama}au>{au}\n"
|
||||
"{virama}ii>{ii}\n"
|
||||
"{virama}i>{i}\n"
|
||||
"{virama}uu>{uu}\n"
|
||||
"{virama}u>{u}\n"
|
||||
"{virama}rrh>{rrh}\n"
|
||||
"{virama}rh>{rh}\n"
|
||||
"{virama}lh>{lh}\n"
|
||||
"{virama}e>{e}\n"
|
||||
"{virama}o>{o}\n"
|
||||
"{virama}a>\n"
|
||||
"{virama}aa>{aa};"
|
||||
"{virama}ai>{ai};"
|
||||
"{virama}au>{au};"
|
||||
"{virama}ii>{ii};"
|
||||
"{virama}i>{i};"
|
||||
"{virama}uu>{uu};"
|
||||
"{virama}u>{u};"
|
||||
"{virama}rrh>{rrh};"
|
||||
"{virama}rh>{rh};"
|
||||
"{virama}lh>{lh};"
|
||||
"{virama}e>{e};"
|
||||
"{virama}o>{o};"
|
||||
"{virama}a>;"
|
||||
|
||||
// otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
|
||||
|
||||
"{virama}''aa>{waa}\n"
|
||||
"{virama}''ai>{wai}\n"
|
||||
"{virama}''au>{wau}\n"
|
||||
"{virama}''ii>{wii}\n"
|
||||
"{virama}''i>{wi}\n"
|
||||
"{virama}''uu>{wuu}\n"
|
||||
"{virama}''u>{wu}\n"
|
||||
"{virama}''rrh>{wrr}\n"
|
||||
"{virama}''rh>{wr}\n"
|
||||
"{virama}''lh>{wl}\n"
|
||||
"{virama}''e>{we}\n"
|
||||
"{virama}''o>{wo}\n"
|
||||
"{virama}''a>{wa}\n"
|
||||
"{virama}''aa>{waa};"
|
||||
"{virama}''ai>{wai};"
|
||||
"{virama}''au>{wau};"
|
||||
"{virama}''ii>{wii};"
|
||||
"{virama}''i>{wi};"
|
||||
"{virama}''uu>{wuu};"
|
||||
"{virama}''u>{wu};"
|
||||
"{virama}''rrh>{wrr};"
|
||||
"{virama}''rh>{wr};"
|
||||
"{virama}''lh>{wl};"
|
||||
"{virama}''e>{we};"
|
||||
"{virama}''o>{wo};"
|
||||
"{virama}''a>{wa};"
|
||||
|
||||
"{virama}[{endThing}>\n"
|
||||
"{virama}({endThing}>;"
|
||||
|
||||
// convert any left-over apostrophes used for separation
|
||||
|
||||
"''>\n"
|
||||
"''>;"
|
||||
|
||||
//#####################################################################
|
||||
// convert from Native letters to Latin letters
|
||||
|
@ -250,162 +250,162 @@ ldevan {
|
|||
|
||||
// special forms with no good conversion
|
||||
|
||||
"mm<{bindu}\n"
|
||||
"x<{visarga}\n"
|
||||
"mm<{bindu};"
|
||||
"x<{visarga};"
|
||||
|
||||
// normal consonants
|
||||
|
||||
"kh<{kha}[&\n"
|
||||
"kha<{kha}\n"
|
||||
"k''<{ka}{virama}[{ha}\n"
|
||||
"k<{ka}[&\n"
|
||||
"ka<{ka}\n"
|
||||
"gh<{gha}[&\n"
|
||||
"gha<{gha}\n"
|
||||
"g''<{ga}{virama}[{ha}\n"
|
||||
"g<{ga}[&\n"
|
||||
"ga<{ga}\n"
|
||||
"ng<{nga}[&\n"
|
||||
"nga<{nga}\n"
|
||||
"ch<{cha}[&\n"
|
||||
"cha<{cha}\n"
|
||||
"c''<{ca}{virama}[{ha}\n"
|
||||
"c<{ca}[&\n"
|
||||
"ca<{ca}\n"
|
||||
"jh<{jha}[&\n"
|
||||
"jha<{jha}\n"
|
||||
"j''<{ja}{virama}[{ha}\n"
|
||||
"j<{ja}[&\n"
|
||||
"ja<{ja}\n"
|
||||
"ny<{nya}[&\n"
|
||||
"nya<{nya}\n"
|
||||
"tth<{ttha}[&\n"
|
||||
"ttha<{ttha}\n"
|
||||
"tt''<{tta}{virama}[{ha}\n"
|
||||
"tt<{tta}[&\n"
|
||||
"tta<{tta}\n"
|
||||
"ddh<{ddha}[&\n"
|
||||
"ddha<{ddha}\n"
|
||||
"dd''<{dda}[&{ha}\n"
|
||||
"dd<{dda}[&\n"
|
||||
"dda<{dda}\n"
|
||||
"dh<{dha}[&\n"
|
||||
"dha<{dha}\n"
|
||||
"d''<{da}{virama}[{ha}\n"
|
||||
"d''<{da}{virama}[{ddha}\n"
|
||||
"d''<{da}{virama}[{dda}\n"
|
||||
"d''<{da}{virama}[{dha}\n"
|
||||
"d''<{da}{virama}[{da}\n"
|
||||
"d<{da}[&\n"
|
||||
"da<{da}\n"
|
||||
"th<{tha}[&\n"
|
||||
"tha<{tha}\n"
|
||||
"t''<{ta}{virama}[{ha}\n"
|
||||
"t''<{ta}{virama}[{ttha}\n"
|
||||
"t''<{ta}{virama}[{tta}\n"
|
||||
"t''<{ta}{virama}[{tha}\n"
|
||||
"t''<{ta}{virama}[{ta}\n"
|
||||
"t<{ta}[&\n"
|
||||
"ta<{ta}\n"
|
||||
"n''<{na}{virama}[{ga}\n"
|
||||
"n''<{na}{virama}[{ya}\n"
|
||||
"n<{na}[&\n"
|
||||
"na<{na}\n"
|
||||
"ph<{pha}[&\n"
|
||||
"pha<{pha}\n"
|
||||
"p''<{pa}{virama}[{ha}\n"
|
||||
"p<{pa}[&\n"
|
||||
"pa<{pa}\n"
|
||||
"bh<{bha}[&\n"
|
||||
"bha<{bha}\n"
|
||||
"b''<{ba}{virama}[{ha}\n"
|
||||
"b<{ba}[&\n"
|
||||
"ba<{ba}\n"
|
||||
"m''<{ma}{virama}[{ma}\n"
|
||||
"m''<{ma}{virama}[{bindu}\n"
|
||||
"m<{ma}[&\n"
|
||||
"ma<{ma}\n"
|
||||
"y<{ya}[&\n"
|
||||
"ya<{ya}\n"
|
||||
"r''<{ra}{virama}[{ha}\n"
|
||||
"r<{ra}[&\n"
|
||||
"ra<{ra}\n"
|
||||
"l''<{la}{virama}[{ha}\n"
|
||||
"l<{la}[&\n"
|
||||
"la<{la}\n"
|
||||
"v<{va}[&\n"
|
||||
"va<{va}\n"
|
||||
"sh<{sha}[&\n"
|
||||
"sha<{sha}\n"
|
||||
"ss<{ssa}[&\n"
|
||||
"ssa<{ssa}\n"
|
||||
"s''<{sa}{virama}[{ha}\n"
|
||||
"s''<{sa}{virama}[{sha}\n"
|
||||
"s''<{sa}{virama}[{ssa}\n"
|
||||
"s''<{sa}{virama}[{sa}\n"
|
||||
"s<{sa}[&\n"
|
||||
"sa<{sa}\n"
|
||||
"h<{ha}[&\n"
|
||||
"ha<{ha}\n"
|
||||
"kh<{kha}(&;"
|
||||
"kha<{kha};"
|
||||
"k''<{ka}{virama}({ha};"
|
||||
"k<{ka}(&;"
|
||||
"ka<{ka};"
|
||||
"gh<{gha}(&;"
|
||||
"gha<{gha};"
|
||||
"g''<{ga}{virama}({ha};"
|
||||
"g<{ga}(&;"
|
||||
"ga<{ga};"
|
||||
"ng<{nga}(&;"
|
||||
"nga<{nga};"
|
||||
"ch<{cha}(&;"
|
||||
"cha<{cha};"
|
||||
"c''<{ca}{virama}({ha};"
|
||||
"c<{ca}(&;"
|
||||
"ca<{ca};"
|
||||
"jh<{jha}(&;"
|
||||
"jha<{jha};"
|
||||
"j''<{ja}{virama}({ha};"
|
||||
"j<{ja}(&;"
|
||||
"ja<{ja};"
|
||||
"ny<{nya}(&;"
|
||||
"nya<{nya};"
|
||||
"tth<{ttha}(&;"
|
||||
"ttha<{ttha};"
|
||||
"tt''<{tta}{virama}({ha};"
|
||||
"tt<{tta}(&;"
|
||||
"tta<{tta};"
|
||||
"ddh<{ddha}(&;"
|
||||
"ddha<{ddha};"
|
||||
"dd''<{dda}(&{ha};"
|
||||
"dd<{dda}(&;"
|
||||
"dda<{dda};"
|
||||
"dh<{dha}(&;"
|
||||
"dha<{dha};"
|
||||
"d''<{da}{virama}({ha};"
|
||||
"d''<{da}{virama}({ddha};"
|
||||
"d''<{da}{virama}({dda};"
|
||||
"d''<{da}{virama}({dha};"
|
||||
"d''<{da}{virama}({da};"
|
||||
"d<{da}(&;"
|
||||
"da<{da};"
|
||||
"th<{tha}(&;"
|
||||
"tha<{tha};"
|
||||
"t''<{ta}{virama}({ha};"
|
||||
"t''<{ta}{virama}({ttha};"
|
||||
"t''<{ta}{virama}({tta};"
|
||||
"t''<{ta}{virama}({tha};"
|
||||
"t''<{ta}{virama}({ta};"
|
||||
"t<{ta}(&;"
|
||||
"ta<{ta};"
|
||||
"n''<{na}{virama}({ga};"
|
||||
"n''<{na}{virama}({ya};"
|
||||
"n<{na}(&;"
|
||||
"na<{na};"
|
||||
"ph<{pha}(&;"
|
||||
"pha<{pha};"
|
||||
"p''<{pa}{virama}({ha};"
|
||||
"p<{pa}(&;"
|
||||
"pa<{pa};"
|
||||
"bh<{bha}(&;"
|
||||
"bha<{bha};"
|
||||
"b''<{ba}{virama}({ha};"
|
||||
"b<{ba}(&;"
|
||||
"ba<{ba};"
|
||||
"m''<{ma}{virama}({ma};"
|
||||
"m''<{ma}{virama}({bindu};"
|
||||
"m<{ma}(&;"
|
||||
"ma<{ma};"
|
||||
"y<{ya}(&;"
|
||||
"ya<{ya};"
|
||||
"r''<{ra}{virama}({ha};"
|
||||
"r<{ra}(&;"
|
||||
"ra<{ra};"
|
||||
"l''<{la}{virama}({ha};"
|
||||
"l<{la}(&;"
|
||||
"la<{la};"
|
||||
"v<{va}(&;"
|
||||
"va<{va};"
|
||||
"sh<{sha}(&;"
|
||||
"sha<{sha};"
|
||||
"ss<{ssa}(&;"
|
||||
"ssa<{ssa};"
|
||||
"s''<{sa}{virama}({ha};"
|
||||
"s''<{sa}{virama}({sha};"
|
||||
"s''<{sa}{virama}({ssa};"
|
||||
"s''<{sa}{virama}({sa};"
|
||||
"s<{sa}(&;"
|
||||
"sa<{sa};"
|
||||
"h<{ha}(&;"
|
||||
"ha<{ha};"
|
||||
|
||||
// dependent vowels (should never occur except following consonants)
|
||||
|
||||
"aa<{aa}\n"
|
||||
"ai<{ai}\n"
|
||||
"au<{au}\n"
|
||||
"ii<{ii}\n"
|
||||
"i<{i}\n"
|
||||
"uu<{uu}\n"
|
||||
"u<{u}\n"
|
||||
"rrh<{rrh}\n"
|
||||
"rh<{rh}\n"
|
||||
"lh<{lh}\n"
|
||||
"e<{e}\n"
|
||||
"o<{o}\n"
|
||||
"aa<{aa};"
|
||||
"ai<{ai};"
|
||||
"au<{au};"
|
||||
"ii<{ii};"
|
||||
"i<{i};"
|
||||
"uu<{uu};"
|
||||
"u<{u};"
|
||||
"rrh<{rrh};"
|
||||
"rh<{rh};"
|
||||
"lh<{lh};"
|
||||
"e<{e};"
|
||||
"o<{o};"
|
||||
|
||||
// independent vowels (when following consonants)
|
||||
|
||||
"''aa<a]{waa}\n"
|
||||
"''aa<%]{waa}\n"
|
||||
"''ai<a]{wai}\n"
|
||||
"''ai<%]{wai}\n"
|
||||
"''au<a]{wau}\n"
|
||||
"''au<%]{wau}\n"
|
||||
"''ii<a]{wii}\n"
|
||||
"''ii<%]{wii}\n"
|
||||
"''i<a]{wi}\n"
|
||||
"''i<%]{wi}\n"
|
||||
"''uu<a]{wuu}\n"
|
||||
"''uu<%]{wuu}\n"
|
||||
"''u<a]{wu}\n"
|
||||
"''u<%]{wu}\n"
|
||||
"''rrh<%]{wrr}\n"
|
||||
"''rh<%]{wr}\n"
|
||||
"''lh<%]{wl}\n"
|
||||
"''e<%]{we}\n"
|
||||
"''o<%]{wo}\n"
|
||||
"''a<a]{wa}\n"
|
||||
"''a<%]{wa}\n"
|
||||
"''aa<a){waa};"
|
||||
"''aa<%){waa};"
|
||||
"''ai<a){wai};"
|
||||
"''ai<%){wai};"
|
||||
"''au<a){wau};"
|
||||
"''au<%){wau};"
|
||||
"''ii<a){wii};"
|
||||
"''ii<%){wii};"
|
||||
"''i<a){wi};"
|
||||
"''i<%){wi};"
|
||||
"''uu<a){wuu};"
|
||||
"''uu<%){wuu};"
|
||||
"''u<a){wu};"
|
||||
"''u<%){wu};"
|
||||
"''rrh<%){wrr};"
|
||||
"''rh<%){wr};"
|
||||
"''lh<%){wl};"
|
||||
"''e<%){we};"
|
||||
"''o<%){wo};"
|
||||
"''a<a){wa};"
|
||||
"''a<%){wa};"
|
||||
|
||||
|
||||
// independent vowels (otherwise)
|
||||
|
||||
"aa<{waa}\n"
|
||||
"ai<{wai}\n"
|
||||
"au<{wau}\n"
|
||||
"ii<{wii}\n"
|
||||
"i<{wi}\n"
|
||||
"uu<{wuu}\n"
|
||||
"u<{wu}\n"
|
||||
"rrh<{wrr}\n"
|
||||
"rh<{wr}\n"
|
||||
"lh<{wl}\n"
|
||||
"e<{we}\n"
|
||||
"o<{wo}\n"
|
||||
"a<{wa}\n"
|
||||
"aa<{waa};"
|
||||
"ai<{wai};"
|
||||
"au<{wau};"
|
||||
"ii<{wii};"
|
||||
"i<{wi};"
|
||||
"uu<{wuu};"
|
||||
"u<{wu};"
|
||||
"rrh<{wrr};"
|
||||
"rh<{wr};"
|
||||
"lh<{wl};"
|
||||
"e<{we};"
|
||||
"o<{wo};"
|
||||
"a<{wa};"
|
||||
|
||||
// blow away any remaining viramas
|
||||
|
||||
"<{virama}\n"
|
||||
"<{virama};"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,371 +10,345 @@
|
|||
|
||||
lgreek {
|
||||
Rule {
|
||||
// ==============================================
|
||||
// Modern Greek Transliteration Rules
|
||||
//
|
||||
// This transliterates modern Greek characters, but using rules
|
||||
// that are traditional for Ancient Greek, and
|
||||
// thus more resemble Greek words that have become part
|
||||
// of English. It differs from the official Greek
|
||||
// transliteration, which is more phonetic (since
|
||||
// most modern Greek vowels, for example, have
|
||||
// degenerated simply to sound like "ee").
|
||||
//
|
||||
// There are only a few tricky parts.
|
||||
// 1. eta and omega don't map directly to Latin vowels,
|
||||
// so we use a macron on e and o, and some
|
||||
// other combinations if they are accented.
|
||||
// 2. The accented, diaeresis i and y are substituted too.
|
||||
// 3. Some letters use digraphs, like "ph". While typical,
|
||||
// they need some special handling.
|
||||
// 4. A gamma before a gamma or a few other letters is
|
||||
// transliterated as an "n", as in "Anglo"
|
||||
// 5. An ypsilon after a vowel is a "u", as in
|
||||
// "Mouseio". Otherwise it is a "y" as in "Physikon"
|
||||
// 6. The construction of the rules is made simpler by making sure
|
||||
// that most rules for lowercase letters exactly correspond to the
|
||||
// rules for uppercase letters, *except* for the case of the letters
|
||||
// in the rule itself. That way, after modifying the uppercase rules,
|
||||
// you can just copy, paste, and "set to lowercase" to get
|
||||
// the rules for lowercase letters!
|
||||
// ==============================================
|
||||
|
||||
// ==============================================
|
||||
// Variables, used to make the rules more comprehensible
|
||||
// and for conditionals.
|
||||
// ==============================================
|
||||
|
||||
// Latin Letters
|
||||
|
||||
"E-MACRON=\u0112;"
|
||||
"e-macron=\u0113;"
|
||||
"O-MACRON=\u014C;"
|
||||
"o-macron=\u014D;"
|
||||
"Y-UMLAUT=\u0178;"
|
||||
"y-umlaut=\u00FF;"
|
||||
|
||||
//* // with real accents.
|
||||
//* "E-MACRON-ACUTE=\u0112\u0301;"
|
||||
//* "e-macron-acute=\u0113\u0301;"
|
||||
//* "O-MACRON-ACUTE=\u014C\u0301;"
|
||||
//* "o-macron-acute=\u014D\u0301;"
|
||||
//* "y-umlaut-acute=\u00FF\u0301;"
|
||||
//* "\u00ef-acute=\u00ef\u0301;"
|
||||
//* "\u00fc-acute=\u00fc\u0301;"
|
||||
//* //
|
||||
|
||||
// single letter equivalents
|
||||
|
||||
"E-MACRON-ACUTE=\u00CA;"
|
||||
"e-macron-acute=\u00EA;"
|
||||
"O-MACRON-ACUTE=\u00D4;"
|
||||
"o-macron-acute=\u00F4;"
|
||||
"y-umlaut-acute=\u0177;"
|
||||
"\u00ef-acute=\u00EE;"
|
||||
"\u00fc-acute=\u00FB;"
|
||||
|
||||
// Greek Letters
|
||||
|
||||
"grAl=\u0391\n"
|
||||
"grBe=\u0392\n"
|
||||
"grGa=\u0393\n"
|
||||
"grDe=\u0394\n"
|
||||
"grEp=\u0395\n"
|
||||
"grZe=\u0396\n"
|
||||
"grEt=\u0397\n"
|
||||
"grTh=\u0398\n"
|
||||
"grIo=\u0399\n"
|
||||
"grKa=\u039A\n"
|
||||
"grLa=\u039B\n"
|
||||
"grMu=\u039C\n"
|
||||
"grNu=\u039D\n"
|
||||
"grKs=\u039E\n"
|
||||
"grOm=\u039F\n"
|
||||
"grPi=\u03A0\n"
|
||||
"grRh=\u03A1\n"
|
||||
"grSi=\u03A3\n"
|
||||
"grTa=\u03A4\n"
|
||||
"grUp=\u03A5\n"
|
||||
"grPh=\u03A6\n"
|
||||
"grKh=\u03A7\n"
|
||||
"grPs=\u03A8\n"
|
||||
"grOme=\u03A9\n"
|
||||
"ALPHA=\u0391;"
|
||||
"BETA=\u0392;"
|
||||
"GAMMA=\u0393;"
|
||||
"DELTA=\u0394;"
|
||||
"EPSILON=\u0395;"
|
||||
"ZETA=\u0396;"
|
||||
"ETA=\u0397;"
|
||||
"THETA=\u0398;"
|
||||
"IOTA=\u0399;"
|
||||
"KAPPA=\u039A;"
|
||||
"LAMBDA=\u039B;"
|
||||
"MU=\u039C;"
|
||||
"NU=\u039D;"
|
||||
"XI=\u039E;"
|
||||
"OMICRON=\u039F;"
|
||||
"PI=\u03A0;"
|
||||
"RHO=\u03A1;"
|
||||
"SIGMA=\u03A3;"
|
||||
"TAU=\u03A4;"
|
||||
"YPSILON=\u03A5;"
|
||||
"PHI=\u03A6;"
|
||||
"CHI=\u03A7;"
|
||||
"PSI=\u03A8;"
|
||||
"OMEGA=\u03A9;"
|
||||
|
||||
"gral=\u03B1\n"
|
||||
"grbe=\u03B2\n"
|
||||
"grga=\u03B3\n"
|
||||
"grde=\u03B4\n"
|
||||
"grep=\u03B5\n"
|
||||
"grze=\u03B6\n"
|
||||
"gret=\u03B7\n"
|
||||
"grth=\u03B8\n"
|
||||
"grio=\u03B9\n"
|
||||
"grka=\u03BA\n"
|
||||
"grla=\u03BB\n"
|
||||
"grmu=\u03BC\n"
|
||||
"grnu=\u03BD\n"
|
||||
"grks=\u03BE\n"
|
||||
"grom=\u03BF\n"
|
||||
"grpi=\u03C0\n"
|
||||
"grrh=\u03C1\n"
|
||||
"grsi=\u03C3\n"
|
||||
"grta=\u03C4\n"
|
||||
"grup=\u03C5\n"
|
||||
"grph=\u03C6\n"
|
||||
"grkh=\u03C7\n"
|
||||
"grps=\u03C8\n"
|
||||
"grome=\u03C9\n"
|
||||
"ALPHA+=\u0386;"
|
||||
"EPSILON+=\u0388;"
|
||||
"ETA+=\u0389;"
|
||||
"IOTA+=\u038A;"
|
||||
"OMICRON+=\u038C;"
|
||||
"YPSILON+=\u038E;"
|
||||
"OMEGA+=\u038F;"
|
||||
"IOTA\u00a8=\u03AA;"
|
||||
"YPSILON\u00a8=\u03AB;"
|
||||
|
||||
"alpha=\u03B1;"
|
||||
"beta=\u03B2;"
|
||||
"gamma=\u03B3;"
|
||||
"delta=\u03B4;"
|
||||
"epsilon=\u03B5;"
|
||||
"zeta=\u03B6;"
|
||||
"eta=\u03B7;"
|
||||
"theta=\u03B8;"
|
||||
"iota=\u03B9;"
|
||||
"kappa=\u03BA;"
|
||||
"lambda=\u03BB;"
|
||||
"mu=\u03BC;"
|
||||
"nu=\u03BD;"
|
||||
"xi=\u03BE;"
|
||||
"omicron=\u03BF;"
|
||||
"pi=\u03C0;"
|
||||
"rho=\u03C1;"
|
||||
"sigma=\u03C3;"
|
||||
"tau=\u03C4;"
|
||||
"ypsilon=\u03C5;"
|
||||
"phi=\u03C6;"
|
||||
"chi=\u03C7;"
|
||||
"psi=\u03C8;"
|
||||
"omega=\u03C9;"
|
||||
|
||||
//forms
|
||||
"grfinal=\u03C2\n"
|
||||
|
||||
"grAcAl=\u0386\n"
|
||||
"grAcEp=\u0388\n"
|
||||
"grAcEt=\u0389\n"
|
||||
"grAcIo=\u038A\n"
|
||||
"grAcOm=\u038C\n"
|
||||
"grAcUp=\u038E\n"
|
||||
"grAcOme=\u038F\n"
|
||||
"grDiIo=\u03AA\n"
|
||||
"grDiUp=\u03AB\n"
|
||||
"alpha+=\u03AC;"
|
||||
"epsilon+=\u03AD;"
|
||||
"eta+=\u03AE;"
|
||||
"iota+=\u03AF;"
|
||||
"omicron+=\u03CC;"
|
||||
"ypsilon+=\u03CD;"
|
||||
"omega+=\u03CE;"
|
||||
"iota\u00a8=\u03CA;"
|
||||
"ypsilon\u00a8=\u03CB;"
|
||||
"iota\u00a8+=\u0390;"
|
||||
"ypsilon\u00a8+=\u03B0;"
|
||||
"sigma+=\u03C2;"
|
||||
|
||||
"gracal=\u03AC\n"
|
||||
"gracep=\u03AD\n"
|
||||
"gracet=\u03AE\n"
|
||||
"gracio=\u03AF\n"
|
||||
"gracom=\u03CC\n"
|
||||
"gracup=\u03CD\n"
|
||||
"gracome=\u03CE\n"
|
||||
"grdiio=\u03CA\n"
|
||||
"grdiup=\u03CB\n"
|
||||
// Variables for conditional mappings
|
||||
|
||||
//gracdiio=\u00FD
|
||||
//gracdiup=\u00FE
|
||||
// Use lowercase for all variable names, to allow cut/paste below.
|
||||
|
||||
"letter=[[:Lu:][:Ll:]]\n"
|
||||
"letter=[[:Lu:][:Ll:]];"
|
||||
"lower=[[:Ll:]];"
|
||||
"vowel=[aeiouAEIOU"
|
||||
"{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
|
||||
"{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
|
||||
"{IOTA\u00a8}{YPSILON\u00a8}"
|
||||
"{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
|
||||
"{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
|
||||
"{iota\u00a8}{ypsilon\u00a8}"
|
||||
"{iota\u00a8+}{ypsilon\u00a8+}"
|
||||
"];"
|
||||
"n-gamma=[GKXCgkxc];"
|
||||
"gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
|
||||
"pp=[Pp];"
|
||||
|
||||
// convert Roman to Native
|
||||
"Greek>\u039c\u0397\u039d\u0399\u039d\u0020\u0391\u0395\u0399\u0394\u0395\u002c\u0020\u0398\u0395\u0391\u002c\u0020--\u0397\u039b\u0397\u0399\u0391\u0394\u0395\u03a9\u0020\u0391\u03a7\u0399\u039b\u0397\u039f\u03a3\n"
|
||||
// ==============================================
|
||||
// Rules
|
||||
// ==============================================
|
||||
// The following are special titlecases, and should
|
||||
// not be copied when duplicating the lowercase
|
||||
// ==============================================
|
||||
|
||||
"AV`>{grAl}{grAcUp}\n"
|
||||
"EV`>{grEp}{grAcUp}\n"
|
||||
"AV>{grAl}{grUp}\n"
|
||||
"EV>{grEp}{grUp}\n"
|
||||
"NG>{grGa}{grGa}\n"
|
||||
"NK>{grGa}{grKa}\n"
|
||||
"NX>{grGa}{grKs}\n"
|
||||
"NCH>{grGa}{grKh}\n"
|
||||
"Th<{THETA}({lower};"
|
||||
"Ph<{PHI}({lower};"
|
||||
"Ch<{CHI}({lower};"
|
||||
//masked: "Ps<{PHI}({lower};"
|
||||
|
||||
//+ "final = [ .;]\n" // Syntax error, unused anyway - Liu
|
||||
// Because there is no uppercase forms for final sigma,
|
||||
// we had to move all the sigma rules up here.
|
||||
|
||||
"A`>{grAcAl}\n"
|
||||
"EE`>{grAcEt}\n"
|
||||
"E`>{grAcEp}\n"
|
||||
"I`>{grAcIo}\n"
|
||||
"U`>{grAcUp}\n"
|
||||
"OO`>{grAcOme}\n"
|
||||
"O`>{grAcOm}\n"
|
||||
"''I>{grDiIo}\n"
|
||||
"''U>{grDiUp}\n"
|
||||
"A>{grAl}\n"
|
||||
"B>{grBe}\n"
|
||||
"C[I>{grSi}\n"
|
||||
"C[E>{grSi}\n"
|
||||
"C[Y>{grSi}\n"
|
||||
"CH>{grKh}\n"
|
||||
"C>{grKa}\n"
|
||||
"D>{grDe}\n"
|
||||
"EE>{grEt}\n"
|
||||
"E>{grEp}\n"
|
||||
"F>{grPh}\n"
|
||||
"G>{grGa}\n"
|
||||
"H>{grKh}\n"
|
||||
"I>{grIo}\n"
|
||||
"J>{grIo}\n"
|
||||
"KS>{grKs}\n"
|
||||
"KH>{grKh}\n"
|
||||
"K>{grKa}\n"
|
||||
"L>{grLa}\n"
|
||||
"M>{grMu}\n"
|
||||
"N>{grNu}\n"
|
||||
"OO>{grOme}\n"
|
||||
"O>{grOm}\n"
|
||||
"PS>{grPs}\n"
|
||||
"PH>{grPh}\n"
|
||||
"P>{grPi}\n"
|
||||
"Q>{grKa}\n"
|
||||
"R>{grRh}\n"
|
||||
"S>{grSi}\n"
|
||||
"TH>{grTh}\n"
|
||||
"T>{grTa}\n"
|
||||
"W>{grUp}{grUp}\n"
|
||||
"U>{grUp}\n"
|
||||
"V>{grUp}\n"
|
||||
"X>{grKs}\n"
|
||||
"Y>{grUp}\n"
|
||||
"Z>{grZe}\n"
|
||||
// insert ' to preserve round trip, for double letters
|
||||
// don't need to do this for the digraphs with h,
|
||||
// since it is not created when mapping back from greek
|
||||
|
||||
//now Native to Roman
|
||||
"''S<{pp}){SIGMA};" // for PS
|
||||
"''s<{pp}){sigma};" // for ps
|
||||
"''s<{pp}){sigma+};" // for ps
|
||||
|
||||
"AV<{grAl}{grUp}\n"
|
||||
"EV<{grEp}{grUp}\n"
|
||||
"AV`<{grAl}{grAcUp}\n"
|
||||
"EV`<{grEp}{grAcUp}\n"
|
||||
"N''<{grNu}[{grGa}\n"
|
||||
"NG<{grGa}{grGa}\n"
|
||||
"N''<{grNu}[{grKa}\n"
|
||||
"NK<{grGa}{grKa}\n"
|
||||
"N''<{grNu}[{grKs}\n"
|
||||
"NX<{grGa}{grKs}\n"
|
||||
"N''<{grNu}[{grKh}\n"
|
||||
"NCH<{grGa}{grKh}\n"
|
||||
"S({letter}>{SIGMA};" "S<{SIGMA};"
|
||||
"s({letter}>{sigma};" "s<{sigma};"
|
||||
"s<>{sigma+};"
|
||||
|
||||
"A<{grAl}\n"
|
||||
"B<{grBe}\n"
|
||||
"G<{grGa}\n"
|
||||
"D<{grDe}\n"
|
||||
"E''<{grEp}[{grEp}\n"
|
||||
"E''<{grEp}[{grEt}\n"
|
||||
"E''<{grEp}[{grAcEp}\n"
|
||||
"E''<{grEp}[{grAcEt}\n"
|
||||
"E<{grEp}\n"
|
||||
"Z<{grZe}\n"
|
||||
"EE<{grEt}\n"
|
||||
"TH<{grTh}\n"
|
||||
"I<{grIo}\n"
|
||||
"K<{grKa}\n"
|
||||
"L<{grLa}\n"
|
||||
"M<{grMu}\n"
|
||||
"N<{grNu}\n"
|
||||
"X<{grKs}\n"
|
||||
"O''<{grOm}[{grOm}\n"
|
||||
"O''<{grOm}[{grOme}\n"
|
||||
"O''<{grOm}[{grAcOm}\n"
|
||||
"O''<{grOm}[{grAcOme}\n"
|
||||
"O<{grOm}\n"
|
||||
"P''<{grPi}[{grSi}\n"
|
||||
"P''<{grPi}[{grfinal}\n"
|
||||
"P<{grPi}\n"
|
||||
"R<{grRh}\n"
|
||||
"S<{grSi}\n"
|
||||
"T<{grTa}\n"
|
||||
"W<{grUp}{grUp}\n"
|
||||
// because there are no uppercase forms, had to move these up too.
|
||||
|
||||
"V<{grUp}[{grAcAl}\n"
|
||||
"V<{grUp}[{grAcEp}\n"
|
||||
"V<{grUp}[{grAcEt}\n"
|
||||
"V<{grUp}[{grAcIo}\n"
|
||||
"V<{grUp}[{grAcOm}\n"
|
||||
"V<{grUp}[{grAcUp}\n"
|
||||
"V<{grUp}[{grAcOme}\n"
|
||||
"i\"`>{iota\u00a8+};"
|
||||
"y\"`>{ypsilon\u00a8+};"
|
||||
|
||||
"V<{grUp}[{grAl}\n"
|
||||
"V<{grUp}[{grEp}\n"
|
||||
"V<{grUp}[{grEt}\n"
|
||||
"V<{grUp}[{grIo}\n"
|
||||
"V<{grUp}[{grOm}\n"
|
||||
//{grUp}[{grUp}<V
|
||||
"V<{grUp}[{grOme}\n"
|
||||
"{\u00ef-acute}<>{iota\u00a8+};"
|
||||
"{vowel}){\u00fc-acute}>{ypsilon\u00a8+};" "{\u00fc-acute}<{vowel}){ypsilon\u00a8+};"
|
||||
"{y-umlaut-acute}<>{ypsilon\u00a8+};"
|
||||
|
||||
"U<{grUp}\n"
|
||||
"PH<{grPh}\n"
|
||||
"CH<{grKh}\n"
|
||||
"PS<{grPs}\n"
|
||||
"OO<{grOme}\n"
|
||||
//forms
|
||||
"A`<{grAcAl}\n"
|
||||
"E`<{grAcEp}\n"
|
||||
"EE`<{grAcEt}\n"
|
||||
"I`<{grAcIo}\n"
|
||||
"O`<{grAcOm}\n"
|
||||
"U`<{grAcUp}\n"
|
||||
"OO`<{grAcOme}\n"
|
||||
"''I<{grDiIo}\n"
|
||||
"''U<{grDiUp}\n"
|
||||
// ==============================================
|
||||
// Uppercase Forms.
|
||||
// To make lowercase forms, just copy and lowercase below
|
||||
// ==============================================
|
||||
|
||||
//{gracdiio}<XX
|
||||
//{gracdiup}<XX
|
||||
//{grfinal}<XX
|
||||
// Typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
"av`>{gral}{gracup}\n"
|
||||
"ev`>{grep}{gracup}\n"
|
||||
"av>{gral}{grup}\n"
|
||||
"ev>{grep}{grup}\n"
|
||||
"ng>{grga}{grga}\n"
|
||||
"nk>{grga}{grka}\n"
|
||||
"nx>{grga}{grks}\n"
|
||||
"nch>{grga}{grkh}\n"
|
||||
"A`>{ALPHA+};"
|
||||
"E`>{EPSILON+};"
|
||||
"EE`>{ETA+};"
|
||||
"EE>{ETA};"
|
||||
"I`>{IOTA+};"
|
||||
"O`>{OMICRON+};"
|
||||
"OO`>{OMEGA+};"
|
||||
"OO>{OMEGA};"
|
||||
"I\">{IOTA\u00a8};"
|
||||
"Y\">{YPSILON\u00a8};"
|
||||
|
||||
"a`>{gracal}\n"
|
||||
"ee`>{gracet}\n"
|
||||
"e`>{gracep}\n"
|
||||
"i`>{gracio}\n"
|
||||
"u`>{gracup}\n"
|
||||
"oo`>{gracome}\n"
|
||||
"o`>{gracom}\n"
|
||||
"''i>{grdiio}\n"
|
||||
"''u>{grdiup}\n"
|
||||
"a>{gral}\n"
|
||||
"b>{grbe}\n"
|
||||
"c[i>{grsi}\n"
|
||||
"c[e>{grsi}\n"
|
||||
"c[y>{grsi}\n"
|
||||
"ch>{grkh}\n"
|
||||
"c>{grka}\n"
|
||||
"d>{grde}\n"
|
||||
"ee>{gret}\n"
|
||||
"e>{grep}\n"
|
||||
"f>{grph}\n"
|
||||
"g>{grga}\n"
|
||||
"h>{grkh}\n"
|
||||
"i>{grio}\n"
|
||||
"j>{grio}\n"
|
||||
"ks>{grks}\n"
|
||||
"kh>{grkh}\n"
|
||||
"k>{grka}\n"
|
||||
"l>{grla}\n"
|
||||
"m>{grmu}\n"
|
||||
"n>{grnu}\n"
|
||||
"oo>{grome}\n"
|
||||
"o>{grom}\n"
|
||||
"ps>{grps}\n"
|
||||
"ph>{grph}\n"
|
||||
"p>{grpi}\n"
|
||||
"q>{grka}\n"
|
||||
"r>{grrh}\n"
|
||||
"s>|{grfinal}\n"
|
||||
"{grfinal}[{letter}>{grsi}\n"
|
||||
"th>{grth}\n"
|
||||
"t>{grta}\n"
|
||||
"w>{grup}{grup}\n"
|
||||
"u>{grup}\n"
|
||||
"v>{grup}\n"
|
||||
"x>{grks}\n"
|
||||
"y>{grup}\n"
|
||||
"z>{grze}\n"
|
||||
// Basic Letters
|
||||
|
||||
"A<>{ALPHA};"
|
||||
"\u00c1<>{ALPHA+};"
|
||||
"B<>{BETA};"
|
||||
"N){n-gamma}>{GAMMA};" "N<{GAMMA}({gamma-n};"
|
||||
"G<>{GAMMA};"
|
||||
"D<>{DELTA};"
|
||||
"E<>{EPSILON};"
|
||||
"\u00c9<>{EPSILON+};"
|
||||
"Z<>{ZETA};"
|
||||
"{E-MACRON-ACUTE}<>{ETA+};"
|
||||
"{E-MACRON}<>{ETA};"
|
||||
"TH<>{THETA};"
|
||||
"I<>{IOTA};"
|
||||
"\u00cd<>{IOTA+};"
|
||||
"\u00cf<>{IOTA\u00a8};"
|
||||
"K<>{KAPPA};"
|
||||
"L<>{LAMBDA};"
|
||||
"M<>{MU};"
|
||||
"N<>{NU};"
|
||||
"X<>{XI};"
|
||||
"O<>{OMICRON};"
|
||||
"\u00d3>{OMICRON+};" "\u00d3<{OMEGA+};"
|
||||
"PH<>{PHI};" // needs ordering before P
|
||||
"PS<>{PSI};" // needs ordering before P
|
||||
"P<>{PI};"
|
||||
"R<>{RHO};"
|
||||
"T<>{TAU};"
|
||||
"{vowel})U>{YPSILON};" "U<{vowel}){YPSILON};"
|
||||
"{vowel})\u00da>{YPSILON+};" "\u00da<{vowel}){YPSILON+};"
|
||||
"{vowel})\u00dc>{YPSILON\u00a8};" "\u00dc<{vowel}){YPSILON\u00a8};"
|
||||
"Y<>{YPSILON};"
|
||||
"\u00dd<>{YPSILON+};"
|
||||
"{Y-UMLAUT}<>{YPSILON\u00a8};"
|
||||
"CH<>{CHI};"
|
||||
"{O-MACRON-ACUTE}>{OMEGA+};" "{O-MACRON-ACUTE}<{OMICRON+};"
|
||||
"{O-MACRON}<>{OMEGA};"
|
||||
|
||||
//forms
|
||||
"''>\n"
|
||||
//now native to roman
|
||||
// Extra English Letters. Mapped for completeness
|
||||
|
||||
"av<{gral}{grup}\n"
|
||||
"ev<{grep}{grup}\n"
|
||||
"av`<{gral}{gracup}\n"
|
||||
"ev`<{grep}{gracup}\n"
|
||||
"n''<{grnu}[{grga}\n"
|
||||
"ng<{grga}{grga}\n"
|
||||
"n''<{grnu}[{grka}\n"
|
||||
"nk<{grga}{grka}\n"
|
||||
"n''<{grnu}[{grks}\n"
|
||||
"nx<{grga}{grks}\n"
|
||||
"n''<{grnu}[{grkh}\n"
|
||||
"nch<{grga}{grkh}\n"
|
||||
"C(I>{SIGMA};"
|
||||
"C(E>{SIGMA};"
|
||||
"C(Y>{SIGMA};"
|
||||
"C>{KAPPA};"
|
||||
"F>{PHI};"
|
||||
"H>{CHI};"
|
||||
"J>{IOTA};"
|
||||
"Q>{KAPPA};"
|
||||
"V>{YPSILON};"
|
||||
"W>{YPSILON};"
|
||||
|
||||
"a<{gral}\n"
|
||||
"b<{grbe}\n"
|
||||
"g<{grga}\n"
|
||||
"d<{grde}\n"
|
||||
"e''<{grep}[{grep}\n"
|
||||
"e''<{grep}[{gret}\n"
|
||||
"e''<{grep}[{gracep}\n"
|
||||
"e''<{grep}[{gracet}\n"
|
||||
"e<{grep}\n"
|
||||
"z<{grze}\n"
|
||||
"ee<{gret}\n"
|
||||
"th<{grth}\n"
|
||||
"i<{grio}\n"
|
||||
"k<{grka}\n"
|
||||
"l<{grla}\n"
|
||||
"m<{grmu}\n"
|
||||
"n<{grnu}\n"
|
||||
"x<{grks}\n"
|
||||
"o''<{grom}[{grom}\n"
|
||||
"o''<{grom}[{grome}\n"
|
||||
"o''<{grom}[{gracom}\n"
|
||||
"o''<{grom}[{gracome}\n"
|
||||
"o<{grom}\n"
|
||||
"p''<{grpi}[{grsi}\n"
|
||||
"p''<{grpi}[{grfinal}\n"
|
||||
"p<{grpi}\n"
|
||||
"r<{grrh}\n"
|
||||
"s<{grsi}\n"
|
||||
"s<{grfinal}\n"
|
||||
"t<{grta}\n"
|
||||
"w<{grup}{grup}\n"
|
||||
// ==============================================
|
||||
// Lowercase Forms. Just copy above and lowercase
|
||||
// ==============================================
|
||||
|
||||
"v<{grup}[{gracal}\n"
|
||||
"v<{grup}[{gracep}\n"
|
||||
"v<{grup}[{gracet}\n"
|
||||
"v<{grup}[{gracio}\n"
|
||||
"v<{grup}[{gracom}\n"
|
||||
"v<{grup}[{gracup}\n"
|
||||
"v<{grup}[{gracome}\n"
|
||||
// typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
"v<{grup}[{gral}\n"
|
||||
"v<{grup}[{grep}\n"
|
||||
"v<{grup}[{gret}\n"
|
||||
"v<{grup}[{grio}\n"
|
||||
"v<{grup}[{grom}\n"
|
||||
//{grup}[{grup}<v
|
||||
"v<{grup}[{grome}\n"
|
||||
"a`>{alpha+};"
|
||||
"e`>{epsilon+};"
|
||||
"ee`>{eta+};"
|
||||
"ee>{eta};"
|
||||
"i`>{iota+};"
|
||||
"o`>{omicron+};"
|
||||
"oo`>{omega+};"
|
||||
"oo>{omega};"
|
||||
"i\">{iota\u00a8};"
|
||||
"y\">{ypsilon\u00a8};"
|
||||
|
||||
"u<{grup}\n"
|
||||
"ph<{grph}\n"
|
||||
"ch<{grkh}\n"
|
||||
"ps<{grps}\n"
|
||||
"oo<{grome}\n"
|
||||
//forms
|
||||
"a`<{gracal}\n"
|
||||
"e`<{gracep}\n"
|
||||
"ee`<{gracet}\n"
|
||||
"i`<{gracio}\n"
|
||||
"o`<{gracom}\n"
|
||||
"u`<{gracup}\n"
|
||||
"oo`<{gracome}\n"
|
||||
"''i<{grdiio}\n"
|
||||
"''u<{grdiup}\n"
|
||||
"<''\n"
|
||||
// basic letters
|
||||
|
||||
//{gracdiio}<xx
|
||||
//{gracdiup}<xx
|
||||
//{grfinal}<xx
|
||||
"a<>{alpha};"
|
||||
"\u00e1<>{alpha+};"
|
||||
"b<>{beta};"
|
||||
"n){n-gamma}>{gamma};" "n<{gamma}({gamma-n};"
|
||||
"g<>{gamma};"
|
||||
"d<>{delta};"
|
||||
"e<>{epsilon};"
|
||||
"\u00e9<>{epsilon+};"
|
||||
"z<>{zeta};"
|
||||
"{e-macron-acute}<>{eta+};"
|
||||
"{e-macron}<>{eta};"
|
||||
"th<>{theta};"
|
||||
"i<>{iota};"
|
||||
"\u00ed<>{iota+};"
|
||||
"\u00ef<>{iota\u00a8};"
|
||||
"k<>{kappa};"
|
||||
"l<>{lambda};"
|
||||
"m<>{mu};"
|
||||
"n<>{nu};"
|
||||
"x<>{xi};"
|
||||
"o<>{omicron};"
|
||||
"\u00f3>{omicron+};" "\u00f3<{omega+};"
|
||||
"ph<>{phi};" // needs ordering before p
|
||||
"ps<>{psi};" // needs ordering before p
|
||||
"p<>{pi};"
|
||||
"r<>{rho};"
|
||||
"t<>{tau};"
|
||||
"{vowel})u>{ypsilon};" "u<{vowel}){ypsilon};"
|
||||
"{vowel})\u00fa>{ypsilon+};" "\u00fa<{vowel}){ypsilon+};"
|
||||
"{vowel})\u00fc>{ypsilon\u00a8};" "\u00fc<{vowel}){ypsilon\u00a8};"
|
||||
"y<>{ypsilon};"
|
||||
"\u00fd<>{ypsilon+};"
|
||||
"{y-umlaut}<>{ypsilon\u00a8};"
|
||||
"ch<>{chi};"
|
||||
"{o-macron-acute}>{omega+};" "{o-macron-acute}<{omicron+};"
|
||||
"{o-macron}<>{omega};"
|
||||
|
||||
// extra english letters. mapped for completeness
|
||||
|
||||
"c(i>{sigma};"
|
||||
"c(e>{sigma};"
|
||||
"c(y>{sigma};"
|
||||
"c>{kappa};"
|
||||
"f>{phi};"
|
||||
"h>{chi};"
|
||||
"j>{iota};"
|
||||
"q>{kappa};"
|
||||
"v>{ypsilon};"
|
||||
"w>{ypsilon};"
|
||||
|
||||
// ====================================
|
||||
// Normal final rule: remove '
|
||||
// ====================================
|
||||
|
||||
"''>;"
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -12,66 +12,66 @@ lhebrew {
|
|||
Rule {
|
||||
//variable names, derived from the Unicode names.
|
||||
|
||||
"POINT_SHEVA=\u05B0\n"
|
||||
"POINT_HATAF_SEGOL=\u05B1\n"
|
||||
"POINT_HATAF_PATAH=\u05B2\n"
|
||||
"POINT_HATAF_QAMATS=\u05B3\n"
|
||||
"POINT_HIRIQ=\u05B4\n"
|
||||
"POINT_TSERE=\u05B5\n"
|
||||
"POINT_SEGOL=\u05B6\n"
|
||||
"POINT_PATAH=\u05B7\n"
|
||||
"POINT_QAMATS=\u05B8\n"
|
||||
"POINT_HOLAM=\u05B9\n"
|
||||
"POINT_QUBUTS=\u05BB\n"
|
||||
"POINT_DAGESH_OR_MAPIQ=\u05BC\n"
|
||||
"POINT_METEG=\u05BD\n"
|
||||
"PUNCTUATION_MAQAF=\u05BE\n"
|
||||
"POINT_RAFE=\u05BF\n"
|
||||
"PUNCTUATION_PASEQ=\u05C0\n"
|
||||
"POINT_SHIN_DOT=\u05C1\n"
|
||||
"POINT_SIN_DOT=\u05C2\n"
|
||||
"PUNCTUATION_SOF_PASUQ=\u05C3\n"
|
||||
"ALEF=\u05D0\n"
|
||||
"BET=\u05D1\n"
|
||||
"GIMEL=\u05D2\n"
|
||||
"DALET=\u05D3\n"
|
||||
"HE=\u05D4\n"
|
||||
"VAV=\u05D5\n"
|
||||
"ZAYIN=\u05D6\n"
|
||||
"HET=\u05D7\n"
|
||||
"TET=\u05D8\n"
|
||||
"YOD=\u05D9\n"
|
||||
"FINAL_KAF=\u05DA\n"
|
||||
"KAF=\u05DB\n"
|
||||
"LAMED=\u05DC\n"
|
||||
"FINAL_MEM=\u05DD\n"
|
||||
"MEM=\u05DE\n"
|
||||
"FINAL_NUN=\u05DF\n"
|
||||
"NUN=\u05E0\n"
|
||||
"SAMEKH=\u05E1\n"
|
||||
"AYIN=\u05E2\n"
|
||||
"FINAL_PE=\u05E3\n"
|
||||
"PE=\u05E4\n"
|
||||
"FINAL_TSADI=\u05E5\n"
|
||||
"TSADI=\u05E6\n"
|
||||
"QOF=\u05E7\n"
|
||||
"RESH=\u05E8\n"
|
||||
"SHIN=\u05E9\n"
|
||||
"TAV=\u05EA\n"
|
||||
"YIDDISH_DOUBLE_VAV=\u05F0\n"
|
||||
"YIDDISH_VAV_YOD=\u05F1\n"
|
||||
"YIDDISH_DOUBLE_YOD=\u05F2\n"
|
||||
"PUNCTUATION_GERESH=\u05F3\n"
|
||||
"PUNCTUATION_GERSHAYIM=\u05F4\n"
|
||||
"POINT_SHEVA=\u05B0;"
|
||||
"POINT_HATAF_SEGOL=\u05B1;"
|
||||
"POINT_HATAF_PATAH=\u05B2;"
|
||||
"POINT_HATAF_QAMATS=\u05B3;"
|
||||
"POINT_HIRIQ=\u05B4;"
|
||||
"POINT_TSERE=\u05B5;"
|
||||
"POINT_SEGOL=\u05B6;"
|
||||
"POINT_PATAH=\u05B7;"
|
||||
"POINT_QAMATS=\u05B8;"
|
||||
"POINT_HOLAM=\u05B9;"
|
||||
"POINT_QUBUTS=\u05BB;"
|
||||
"POINT_DAGESH_OR_MAPIQ=\u05BC;"
|
||||
"POINT_METEG=\u05BD;"
|
||||
"PUNCTUATION_MAQAF=\u05BE;"
|
||||
"POINT_RAFE=\u05BF;"
|
||||
"PUNCTUATION_PASEQ=\u05C0;"
|
||||
"POINT_SHIN_DOT=\u05C1;"
|
||||
"POINT_SIN_DOT=\u05C2;"
|
||||
"PUNCTUATION_SOF_PASUQ=\u05C3;"
|
||||
"ALEF=\u05D0;"
|
||||
"BET=\u05D1;"
|
||||
"GIMEL=\u05D2;"
|
||||
"DALET=\u05D3;"
|
||||
"HE=\u05D4;"
|
||||
"VAV=\u05D5;"
|
||||
"ZAYIN=\u05D6;"
|
||||
"HET=\u05D7;"
|
||||
"TET=\u05D8;"
|
||||
"YOD=\u05D9;"
|
||||
"FINAL_KAF=\u05DA;"
|
||||
"KAF=\u05DB;"
|
||||
"LAMED=\u05DC;"
|
||||
"FINAL_MEM=\u05DD;"
|
||||
"MEM=\u05DE;"
|
||||
"FINAL_NUN=\u05DF;"
|
||||
"NUN=\u05E0;"
|
||||
"SAMEKH=\u05E1;"
|
||||
"AYIN=\u05E2;"
|
||||
"FINAL_PE=\u05E3;"
|
||||
"PE=\u05E4;"
|
||||
"FINAL_TSADI=\u05E5;"
|
||||
"TSADI=\u05E6;"
|
||||
"QOF=\u05E7;"
|
||||
"RESH=\u05E8;"
|
||||
"SHIN=\u05E9;"
|
||||
"TAV=\u05EA;"
|
||||
"YIDDISH_DOUBLE_VAV=\u05F0;"
|
||||
"YIDDISH_VAV_YOD=\u05F1;"
|
||||
"YIDDISH_DOUBLE_YOD=\u05F2;"
|
||||
"PUNCTUATION_GERESH=\u05F3;"
|
||||
"PUNCTUATION_GERSHAYIM=\u05F4;"
|
||||
|
||||
//wildcards
|
||||
//The values can be anything we don't use in this file: start at E000.
|
||||
|
||||
"letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]\n"
|
||||
"letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
|
||||
|
||||
"softvowel=[eiyEIY]\n"
|
||||
"softvowel=[eiyEIY];"
|
||||
|
||||
"vowellike=[{ALEF}{AYIN}{YOD}{VAV}]\n"
|
||||
"vowellike=[{ALEF}{AYIN}{YOD}{VAV}];"
|
||||
|
||||
//?>{POINT_SHEVA}
|
||||
//?>{POINT_HATAF_SEGOL}
|
||||
|
@ -93,117 +93,117 @@ lhebrew {
|
|||
//?>{POINT_SIN_DOT}
|
||||
//?>{PUNCTUATION_SOF_PASUQ}
|
||||
|
||||
"a>{ALEF}\n"
|
||||
"A>{ALEF}\n"
|
||||
"a>{ALEF};"
|
||||
"A>{ALEF};"
|
||||
|
||||
"b>{BET}\n"
|
||||
"B>{BET}\n"
|
||||
"b>{BET};"
|
||||
"B>{BET};"
|
||||
|
||||
"c[{softvowel}>{SAMEKH}\n"
|
||||
"C[{softvowel}>{SAMEKH}\n"
|
||||
"c[{letter}>{KAF}\n"
|
||||
"C[{letter}>{KAF}\n"
|
||||
"c>{FINAL_KAF}\n"
|
||||
"C>{FINAL_KAF}\n"
|
||||
"c({softvowel}>{SAMEKH};"
|
||||
"C({softvowel}>{SAMEKH};"
|
||||
"c({letter}>{KAF};"
|
||||
"C({letter}>{KAF};"
|
||||
"c>{FINAL_KAF};"
|
||||
"C>{FINAL_KAF};"
|
||||
|
||||
"d>{DALET}\n"
|
||||
"D>{DALET}\n"
|
||||
"d>{DALET};"
|
||||
"D>{DALET};"
|
||||
|
||||
"e>{AYIN}\n"
|
||||
"E>{AYIN}\n"
|
||||
"e>{AYIN};"
|
||||
"E>{AYIN};"
|
||||
|
||||
"f[{letter}>{PE}\n"
|
||||
"f>{FINAL_PE}\n"
|
||||
"F[{letter}>{PE}\n"
|
||||
"F>{FINAL_PE}\n"
|
||||
"f({letter}>{PE};"
|
||||
"f>{FINAL_PE};"
|
||||
"F({letter}>{PE};"
|
||||
"F>{FINAL_PE};"
|
||||
|
||||
"g>{GIMEL}\n"
|
||||
"G>{GIMEL}\n"
|
||||
"g>{GIMEL};"
|
||||
"G>{GIMEL};"
|
||||
|
||||
"h>{HE}\n"
|
||||
"H>{HE}\n"
|
||||
"h>{HE};"
|
||||
"H>{HE};"
|
||||
|
||||
"i>{YOD}\n"
|
||||
"I>{YOD}\n"
|
||||
"i>{YOD};"
|
||||
"I>{YOD};"
|
||||
|
||||
"j>{DALET}{SHIN}\n"
|
||||
"J>{DALET}{SHIN}\n"
|
||||
"j>{DALET}{SHIN};"
|
||||
"J>{DALET}{SHIN};"
|
||||
|
||||
"kH>{HET}\n"
|
||||
"kh>{HET}\n"
|
||||
"KH>{HET}\n"
|
||||
"Kh>{HET}\n"
|
||||
"k[{letter}>{KAF}\n"
|
||||
"K[{letter}>{KAF}\n"
|
||||
"k>{FINAL_KAF}\n"
|
||||
"K>{FINAL_KAF}\n"
|
||||
"kH>{HET};"
|
||||
"kh>{HET};"
|
||||
"KH>{HET};"
|
||||
"Kh>{HET};"
|
||||
"k({letter}>{KAF};"
|
||||
"K({letter}>{KAF};"
|
||||
"k>{FINAL_KAF};"
|
||||
"K>{FINAL_KAF};"
|
||||
|
||||
"l>{LAMED}\n"
|
||||
"L>{LAMED}\n"
|
||||
"l>{LAMED};"
|
||||
"L>{LAMED};"
|
||||
|
||||
"m[{letter}>{MEM}\n"
|
||||
"m>{FINAL_MEM}\n"
|
||||
"M[{letter}>{MEM}\n"
|
||||
"M>{FINAL_MEM}\n"
|
||||
"m({letter}>{MEM};"
|
||||
"m>{FINAL_MEM};"
|
||||
"M({letter}>{MEM};"
|
||||
"M>{FINAL_MEM};"
|
||||
|
||||
"n[{letter}>{NUN}\n"
|
||||
"n>{FINAL_NUN}\n"
|
||||
"N[{letter}>{NUN}\n"
|
||||
"N>{FINAL_NUN}\n"
|
||||
"n({letter}>{NUN};"
|
||||
"n>{FINAL_NUN};"
|
||||
"N({letter}>{NUN};"
|
||||
"N>{FINAL_NUN};"
|
||||
|
||||
"o>{VAV}\n"
|
||||
"O>{VAV}\n"
|
||||
"o>{VAV};"
|
||||
"O>{VAV};"
|
||||
|
||||
"p[{letter}>{PE}\n"
|
||||
"p>{FINAL_PE}\n"
|
||||
"P[{letter}>{PE}\n"
|
||||
"P>{FINAL_PE}\n"
|
||||
"p({letter}>{PE};"
|
||||
"p>{FINAL_PE};"
|
||||
"P({letter}>{PE};"
|
||||
"P>{FINAL_PE};"
|
||||
|
||||
"q>{QOF}\n"
|
||||
"Q>{QOF}\n"
|
||||
"q>{QOF};"
|
||||
"Q>{QOF};"
|
||||
|
||||
"r>{RESH}\n"
|
||||
"R>{RESH}\n"
|
||||
"r>{RESH};"
|
||||
"R>{RESH};"
|
||||
|
||||
"sH>{SHIN}\n"
|
||||
"sh>{SHIN}\n"
|
||||
"SH>{SHIN}\n"
|
||||
"Sh>{SHIN}\n"
|
||||
"s>{SAMEKH}\n"
|
||||
"S>{SAMEKH}\n"
|
||||
"sH>{SHIN};"
|
||||
"sh>{SHIN};"
|
||||
"SH>{SHIN};"
|
||||
"Sh>{SHIN};"
|
||||
"s>{SAMEKH};"
|
||||
"S>{SAMEKH};"
|
||||
|
||||
"th>{TAV}\n"
|
||||
"tH>{TAV}\n"
|
||||
"TH>{TAV}\n"
|
||||
"Th>{TAV}\n"
|
||||
"tS[{letter}>{TSADI}\n"
|
||||
"ts[{letter}>{TSADI}\n"
|
||||
"Ts[{letter}>{TSADI}\n"
|
||||
"TS[{letter}>{TSADI}\n"
|
||||
"tS>{FINAL_TSADI}\n"
|
||||
"ts>{FINAL_TSADI}\n"
|
||||
"Ts>{FINAL_TSADI}\n"
|
||||
"TS>{FINAL_TSADI}\n"
|
||||
"t>{TET}\n"
|
||||
"T>{TET}\n"
|
||||
"th>{TAV};"
|
||||
"tH>{TAV};"
|
||||
"TH>{TAV};"
|
||||
"Th>{TAV};"
|
||||
"tS({letter}>{TSADI};"
|
||||
"ts({letter}>{TSADI};"
|
||||
"Ts({letter}>{TSADI};"
|
||||
"TS({letter}>{TSADI};"
|
||||
"tS>{FINAL_TSADI};"
|
||||
"ts>{FINAL_TSADI};"
|
||||
"Ts>{FINAL_TSADI};"
|
||||
"TS>{FINAL_TSADI};"
|
||||
"t>{TET};"
|
||||
"T>{TET};"
|
||||
|
||||
"u>{VAV}\n"
|
||||
"U>{VAV}\n"
|
||||
"u>{VAV};"
|
||||
"U>{VAV};"
|
||||
|
||||
"v>{VAV}\n"
|
||||
"V>{VAV}\n"
|
||||
"v>{VAV};"
|
||||
"V>{VAV};"
|
||||
|
||||
"w>{VAV}\n"
|
||||
"W>{VAV}\n"
|
||||
"w>{VAV};"
|
||||
"W>{VAV};"
|
||||
|
||||
"x>{KAF}{SAMEKH}\n"
|
||||
"X>{KAF}{SAMEKH}\n"
|
||||
"x>{KAF}{SAMEKH};"
|
||||
"X>{KAF}{SAMEKH};"
|
||||
|
||||
"y>{YOD}\n"
|
||||
"Y>{YOD}\n"
|
||||
"y>{YOD};"
|
||||
"Y>{YOD};"
|
||||
|
||||
"z>{ZAYIN}\n"
|
||||
"Z>{ZAYIN}\n"
|
||||
"z>{ZAYIN};"
|
||||
"Z>{ZAYIN};"
|
||||
|
||||
//#?>{YIDDISH_DOUBLE_VAV}
|
||||
//?>{YIDDISH_VAV_YOD}
|
||||
|
@ -211,7 +211,7 @@ lhebrew {
|
|||
//?>{PUNCTUATION_GERESH}
|
||||
//?>{PUNCTUATION_GERSHAYIM}
|
||||
|
||||
"''>\n"
|
||||
"''>;"
|
||||
|
||||
//{POINT_SHEVA}>@
|
||||
//{POINT_HATAF_SEGOL}>@
|
||||
|
@ -233,40 +233,39 @@ lhebrew {
|
|||
//{POINT_SIN_DOT}>@
|
||||
//{PUNCTUATION_SOF_PASUQ}>@
|
||||
|
||||
"a<{ALEF}\n"
|
||||
"e<{AYIN}\n"
|
||||
"b<{BET}\n"
|
||||
"d<{DALET}\n"
|
||||
"k<{FINAL_KAF}\n"
|
||||
"m<{FINAL_MEM}\n"
|
||||
"n<{FINAL_NUN}\n"
|
||||
"p<{FINAL_PE}\n"
|
||||
"ts<{FINAL_TSADI}\n"
|
||||
"g<{GIMEL}\n"
|
||||
"kh<{HET}\n"
|
||||
"h<{HE}\n"
|
||||
"k''<{KAF}[{HE}\n"
|
||||
"k<{KAF}\n"
|
||||
"l<{LAMED}\n"
|
||||
"m<{MEM}\n"
|
||||
"n<{NUN}\n"
|
||||
"p<{PE}\n"
|
||||
"q<{QOF}\n"
|
||||
"r<{RESH}\n"
|
||||
"s''<{SAMEKH}[{HE}\n"
|
||||
"s<{SAMEKH}\n"
|
||||
"sh<{SHIN}\n"
|
||||
"th<{TAV}\n"
|
||||
"t''<{TET}[{HE}\n"
|
||||
"t''<{TET}[{HE}\n"
|
||||
"t''<{TET}[{SAMEKH}\n"
|
||||
"t''<{TET}[{SHIN}\n"
|
||||
"t<{TET}\n"
|
||||
"ts<{TSADI}\n"
|
||||
"v<{VAV}[{vowellike}\n"
|
||||
"u<{VAV}\n"
|
||||
"y<{YOD}\n"
|
||||
"z<{ZAYIN}\n"
|
||||
"a<{ALEF};"
|
||||
"e<{AYIN};"
|
||||
"b<{BET};"
|
||||
"d<{DALET};"
|
||||
"k<{FINAL_KAF};"
|
||||
"m<{FINAL_MEM};"
|
||||
"n<{FINAL_NUN};"
|
||||
"p<{FINAL_PE};"
|
||||
"ts<{FINAL_TSADI};"
|
||||
"g<{GIMEL};"
|
||||
"kh<{HET};"
|
||||
"h<{HE};"
|
||||
"k''<{KAF}({HE};"
|
||||
"k<{KAF};"
|
||||
"l<{LAMED};"
|
||||
"m<{MEM};"
|
||||
"n<{NUN};"
|
||||
"p<{PE};"
|
||||
"q<{QOF};"
|
||||
"r<{RESH};"
|
||||
"s''<{SAMEKH}({HE};"
|
||||
"s<{SAMEKH};"
|
||||
"sh<{SHIN};"
|
||||
"th<{TAV};"
|
||||
"t''<{TET}({HE};"
|
||||
"t''<{TET}({SAMEKH};"
|
||||
"t''<{TET}({SHIN};"
|
||||
"t<{TET};"
|
||||
"ts<{TSADI};"
|
||||
"v<{VAV}({vowellike};"
|
||||
"u<{VAV};"
|
||||
"y<{YOD};"
|
||||
"z<{ZAYIN};"
|
||||
|
||||
//{YIDDISH_DOUBLE_VAV}>@
|
||||
//{YIDDISH_VAV_YOD}>@
|
||||
|
@ -274,6 +273,6 @@ lhebrew {
|
|||
//{PUNCTUATION_GERESH}>@
|
||||
//{PUNCTUATION_GERSHAYIM}>@
|
||||
|
||||
"<''\n"
|
||||
"<'';"
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -7,358 +7,301 @@
|
|||
// 12/10/99 aliu Fix case handling.
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Latin-Russion
|
||||
|
||||
lrussian {
|
||||
Rule {
|
||||
// Russian Letters
|
||||
//* /* This class is designed to be a general Latin-Cyrillic
|
||||
//* transliteration. The standard Russian transliterations
|
||||
//* are generally used for the letters from Russian,
|
||||
//* with additional Cyrillic characters given consistent
|
||||
//* mappings.
|
||||
//* */
|
||||
|
||||
"cyA=\u0410\n"
|
||||
"cyBe=\u0411\n"
|
||||
"cyVe=\u0412\n"
|
||||
"cyGe=\u0413\n"
|
||||
"cyDe=\u0414\n"
|
||||
"cyYe=\u0415\n"
|
||||
"cyYo=\u0416\n"
|
||||
"cyZhe=\u0417\n"
|
||||
"cyZe=\u0418\n"
|
||||
"cyYi=\u0419\n"
|
||||
"cyY=\u0419\n"
|
||||
"cyKe=\u041a\n"
|
||||
"cyLe=\u041b\n"
|
||||
"cyMe=\u041c\n"
|
||||
"cyNe=\u041d\n"
|
||||
"cyO=\u041e\n"
|
||||
"cyPe=\u041f\n"
|
||||
"S-hacek=\u0160;"
|
||||
"s-hacek=\u0161;"
|
||||
|
||||
"cyRe=\u0420\n"
|
||||
"cySe=\u0421\n"
|
||||
"cyTe=\u0422\n"
|
||||
"cyU=\u0423\n"
|
||||
"cyFe=\u0424\n"
|
||||
"cyKhe=\u0425\n"
|
||||
"cyTse=\u0426\n"
|
||||
"cyChe=\u0427\n"
|
||||
"cyShe=\u0428\n"
|
||||
"cyShche=\u0429\n"
|
||||
"cyHard=\u042a\n"
|
||||
"cyI=\u042b\n"
|
||||
"cySoft=\u042c\n"
|
||||
"cyE=\u042d\n"
|
||||
"cyYu=\u042e\n"
|
||||
"cyYa=\u042f\n"
|
||||
"YO=\u0401;"
|
||||
"J=\u0408;"
|
||||
"A=\u0410;"
|
||||
"B=\u0411;"
|
||||
"V=\u0412;"
|
||||
"G=\u0413;"
|
||||
"D=\u0414;"
|
||||
"YE=\u0415;"
|
||||
"ZH=\u0416;"
|
||||
"Z=\u0417;"
|
||||
"YI=\u0418;"
|
||||
"Y=\u0419;"
|
||||
"K=\u041A;"
|
||||
"L=\u041B;"
|
||||
"M=\u041C;"
|
||||
"N=\u041D;"
|
||||
"O=\u041E;"
|
||||
"P=\u041F;"
|
||||
"R=\u0420;"
|
||||
"S=\u0421;"
|
||||
"T=\u0422;"
|
||||
"U=\u0423;"
|
||||
"F=\u0424;"
|
||||
"KH=\u0425;"
|
||||
"TS=\u0426;"
|
||||
"CH=\u0427;"
|
||||
"SH=\u0428;"
|
||||
"SHCH=\u0429;"
|
||||
"HARD=\u042A;"
|
||||
"I=\u042B;"
|
||||
"SOFT=\u042C;"
|
||||
"E=\u042D;"
|
||||
"YU=\u042E;"
|
||||
"YA=\u042F;"
|
||||
|
||||
"cya=\u0430\n"
|
||||
"cybe=\u0431\n"
|
||||
"cyve=\u0432\n"
|
||||
"cyge=\u0433\n"
|
||||
"cyde=\u0434\n"
|
||||
"cyye=\u0435\n"
|
||||
"cyzhe=\u0436\n"
|
||||
"cyze=\u0437\n"
|
||||
"cyyi=\u0438\n"
|
||||
"cyy=\u0439\n"
|
||||
"cyke=\u043a\n"
|
||||
"cyle=\u043b\n"
|
||||
"cyme=\u043c\n"
|
||||
"cyne=\u043d\n"
|
||||
"cyo=\u043e\n"
|
||||
"cype=\u043f\n"
|
||||
// Lowercase
|
||||
|
||||
"cyre=\u0440\n"
|
||||
"cyse=\u0441\n"
|
||||
"cyte=\u0442\n"
|
||||
"cyu=\u0443\n"
|
||||
"cyfe=\u0444\n"
|
||||
"cykhe=\u0445\n"
|
||||
"cytse=\u0446\n"
|
||||
"cyche=\u0447\n"
|
||||
"cyshe=\u0448\n"
|
||||
"cyshche=\u0449\n"
|
||||
"cyhard=\u044a\n"
|
||||
"cyi=\u044b\n"
|
||||
"cysoft=\u044c\n"
|
||||
"cye=\u044d\n"
|
||||
"cyyu=\u044e\n"
|
||||
"cyya=\u044f\n"
|
||||
"a=\u0430;"
|
||||
"b=\u0431;"
|
||||
"v=\u0432;"
|
||||
"g=\u0433;"
|
||||
"d=\u0434;"
|
||||
"ye=\u0435;"
|
||||
"zh=\u0436;"
|
||||
"z=\u0437;"
|
||||
"yi=\u0438;"
|
||||
"y=\u0439;"
|
||||
"k=\u043a;"
|
||||
"l=\u043b;"
|
||||
"m=\u043c;"
|
||||
"n=\u043d;"
|
||||
"o=\u043e;"
|
||||
"p=\u043f;"
|
||||
"r=\u0440;"
|
||||
"s=\u0441;"
|
||||
"t=\u0442;"
|
||||
"u=\u0443;"
|
||||
"f=\u0444;"
|
||||
"kh=\u0445;"
|
||||
"ts=\u0446;"
|
||||
"ch=\u0447;"
|
||||
"sh=\u0448;"
|
||||
"shch=\u0449;"
|
||||
"hard=\u044a;"
|
||||
"i=\u044b;"
|
||||
"soft=\u044c;"
|
||||
"e=\u044d;"
|
||||
"yu=\u044e;"
|
||||
"ya=\u044f;"
|
||||
|
||||
"cyyo=\u0451\n"
|
||||
"yo=\u0451;"
|
||||
"j=\u0458;"
|
||||
|
||||
"a=[aA]\n"
|
||||
"c=[cC]\n"
|
||||
"e=[eE]\n"
|
||||
"h=[hH]\n"
|
||||
"i=[iI]\n"
|
||||
"o=[oO]\n"
|
||||
"s=[sS]\n"
|
||||
"t=[tT]\n"
|
||||
"u=[uU]\n"
|
||||
"iey=[ieyIEY]\n"
|
||||
"lower=[:Lu:]\n"
|
||||
// variables
|
||||
// some are duplicated so lowercasing works
|
||||
|
||||
// convert English to Russian
|
||||
"Russian>\u041f\u0420\u0410\u0412\u0414\u0410\u00D1\u0020\u0411\u044d\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f\u002c\u0020\u043a\u044b\u0440\u0433\u044b\u0437\u002c\u0020\u041c\u043e\u043b\u0434\u043e\u0432\u044d\u043d\u044f\u0441\u043a\u044d\u002e\n"
|
||||
"csoft=[eiyEIY];"
|
||||
"CSOFT=[eiyEIY];"
|
||||
|
||||
//special equivs for ay, oy, ...
|
||||
"Y{a}{i}>{cyYa}{cyY}\n"
|
||||
"Y{e}{i}>{cyYe}{cyY}\n"
|
||||
"Y{i}{i}>{cyYi}{cyY}\n"
|
||||
"Y{o}{i}>{cyYo}{cyY}\n"
|
||||
"Y{u}{i}>{cyYu}{cyY}\n"
|
||||
"A{i}>{cyA}{cyY}\n"
|
||||
"E{i}>{cyE}{cyY}\n"
|
||||
//skip II, since it is the soft sign
|
||||
"O{i}>{cyO}{cyY}\n"
|
||||
"U{i}>{cyU}{cyY}\n"
|
||||
"BECOMES_H=[{HARD}{hard}];"
|
||||
"becomes_h=[{HARD}{hard}];"
|
||||
|
||||
"A>{cyA}\n"
|
||||
"B>{cyBe}\n"
|
||||
"C{h}>{cyChe}\n"
|
||||
"C[{iey}>{cySe}\n"
|
||||
"C>{cyKe}\n"
|
||||
"D>{cyDe}\n"
|
||||
"E>{cyE}\n"
|
||||
"F>{cyFe}\n"
|
||||
"G>{cyGe}\n"
|
||||
"H>{cyHard}\n"
|
||||
"I{i}>{cySoft}\n"
|
||||
"I>{cyI}\n"
|
||||
"J>{cyDe}{cyZhe}\n"
|
||||
"K{h}>{cyKhe}\n"
|
||||
"K>{cyKe}\n"
|
||||
"L>{cyLe}\n"
|
||||
"M>{cyMe}\n"
|
||||
"N>{cyNe}\n"
|
||||
"O>{cyO}\n"
|
||||
"P>{cyPe}\n"
|
||||
"Q{u}>{cyKe}{cyVe}\n"
|
||||
"R>{cyRe}\n"
|
||||
"S{h}{t}{c}{h}>{cyShche}\n"
|
||||
"S{h}{c}{h}>{cyShche}\n"
|
||||
"S{h}>{cyShe}\n"
|
||||
"S>{cySe}\n"
|
||||
"T{c}{h}>{cyChe}\n"
|
||||
"T{h}>{cyZe}\n"
|
||||
"T{s}>{cyTse}\n"
|
||||
"T>{cyTe}\n"
|
||||
"U>{cyU}\n"
|
||||
"V>{cyVe}\n"
|
||||
"W{h}>{cyVe}\n"
|
||||
"W>{cyVe}\n"
|
||||
"X>{cyKe}{cySe}\n"
|
||||
"Y{e}>{cyYe}\n"
|
||||
"Y{o}>{cyYo}\n"
|
||||
"Y{u}>{cyYu}\n"
|
||||
"Y{a}>{cyYa}\n"
|
||||
"Y{i}>{cyYi}\n"
|
||||
"Y>{cyY}\n"
|
||||
"Z{h}>{cyZhe}\n"
|
||||
"Z>{cyZe}\n"
|
||||
"X>{cyKe}{cySe}\n"
|
||||
"BECOMES_S=[{S}{s}];"
|
||||
"becomes_s=[{S}{s}];"
|
||||
|
||||
//lower case: doesn''t solve join bug
|
||||
"y{a}{i}>{cyya}{cyy}\n"
|
||||
"y{e}{i}>{cyye}{cyy}\n"
|
||||
"y{i}{i}>{cyyi}{cyy}\n"
|
||||
"y{o}{i}>{cyyo}{cyy}\n"
|
||||
"y{u}{i}>{cyyu}{cyy}\n"
|
||||
"a{i}>{cya}{cyy}\n"
|
||||
"e{i}>{cye}{cyy}\n"
|
||||
//skip ii, since it is the soft sign
|
||||
"o{i}>{cyo}{cyy}\n"
|
||||
"u{i}>{cyu}{cyy}\n"
|
||||
"BECOMES_C=[{CH}{ch}];"
|
||||
"becomes_c=[{CH}{ch}];"
|
||||
|
||||
"a>{cya}\n"
|
||||
"b>{cybe}\n"
|
||||
"c{h}>{cyche}\n"
|
||||
"c[{iey}>{cyse}\n"
|
||||
"c>{cyke}\n"
|
||||
"d>{cyde}\n"
|
||||
"e>{cye}\n"
|
||||
"f>{cyfe}\n"
|
||||
"g>{cyge}\n"
|
||||
"h>{cyhard}\n"
|
||||
"i{i}>{cysoft}\n"
|
||||
"i>{cyi}\n"
|
||||
"j>{cyde}{cyzhe}\n"
|
||||
"k{h}>{cykhe}\n"
|
||||
"k>{cyke}\n"
|
||||
"l>{cyle}\n"
|
||||
"m>{cyme}\n"
|
||||
"n>{cyne}\n"
|
||||
"o>{cyo}\n"
|
||||
"p>{cype}\n"
|
||||
"q{u}>{cyke}{cyve}\n"
|
||||
"r>{cyre}\n"
|
||||
"s{h}{t}{c}{h}>{cyshche}\n"
|
||||
"s{h}{c}{h}>{cyshche}\n"
|
||||
"s{h}>{cyshe}\n"
|
||||
"s>{cyse}\n"
|
||||
"t{c}{h}>{cyche}\n"
|
||||
"t{h}>{cyze}\n"
|
||||
"t{s}>{cytse}\n"
|
||||
"t>{cyte}\n"
|
||||
"u>{cyu}\n"
|
||||
"v>{cyve}\n"
|
||||
"w{h}>{cyve}\n"
|
||||
"w>{cyve}\n"
|
||||
"x>{cyke}{cyse}\n"
|
||||
"y{e}>{cyye}\n"
|
||||
"y{o}>{cyyo}\n"
|
||||
"y{u}>{cyyu}\n"
|
||||
"y{a}>{cyya}\n"
|
||||
"y{i}>{cyyi}\n"
|
||||
"y>{cyy}\n"
|
||||
"z{h}>{cyzhe}\n"
|
||||
"z>{cyze}\n"
|
||||
"x>{cyke}{cyse}\n"
|
||||
"BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
"becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
|
||||
|
||||
"letter=[[:Lu:][:Ll:]];"
|
||||
"lower=[[:Ll:]];"
|
||||
|
||||
//* /*
|
||||
//* Modified to combine display transliterator and typing transliterator.
|
||||
//* The display mapping uses accents for the "soft" vowels.
|
||||
//* It does not, although it could, use characters like \u0161 instead of digraphs
|
||||
//* like sh.
|
||||
//* */
|
||||
|
||||
// #############################################
|
||||
// Special titlecase forms, not duplicated
|
||||
// #############################################
|
||||
|
||||
"Ch>{CH};" "Ch<{CH}({lower};"
|
||||
"Kh>{KH};" "Kh<{KH}({lower};"
|
||||
"Shch>{SHCH};" "Shch<{SHCH}({lower};"
|
||||
"Sh>{SH};" "Sh<{SH}({lower};"
|
||||
"Ts>{TS};" "Ts<{TS}({lower};"
|
||||
"Zh>{ZH};" "Zh<{ZH}({lower};"
|
||||
"Yi>{YI};" //+ "Yi<{YI}({lower};"
|
||||
"Ye>{YE};" //+ "Ye<{YE}({lower};"
|
||||
"Yo>{YO};" //+ "Yo<{YO}({lower};"
|
||||
"Yu>{YU};" //+ "Yu<{YU}({lower};"
|
||||
"Ya>{YA};" //+ "Ya<{YA}({lower};"
|
||||
|
||||
// #############################################
|
||||
// Rules to Duplicate
|
||||
// To get the lowercase versions, copy these and lowercase
|
||||
// #############################################
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"SHTCH>{SHCH};"
|
||||
"TCH>{CH};"
|
||||
"TH>{Z};"
|
||||
"Q>{K};"
|
||||
"WH>{V};"
|
||||
"W>{V};"
|
||||
"X>{K}{S};" //+ "X<{K}{S};"
|
||||
|
||||
// Separate letters that would otherwise join
|
||||
|
||||
"SH''<{SH}({BECOMES_C};"
|
||||
"T''<{T}({BECOMES_S};"
|
||||
|
||||
"K''<{K}({BECOMES_H};"
|
||||
"S''<{S}({BECOMES_H};"
|
||||
"T''<{T}({BECOMES_H};"
|
||||
"Z''<{Z}({BECOMES_H};"
|
||||
|
||||
"Y''<{Y}({BECOMES_VOWEL};"
|
||||
|
||||
// Main letters
|
||||
|
||||
"A<>{A};"
|
||||
"B<>{B};"
|
||||
"CH<>{CH};"
|
||||
"D<>{D};"
|
||||
"E<>{E};"
|
||||
"F<>{F};"
|
||||
"G<>{G};"
|
||||
"\u00cc<>{YI};"
|
||||
"I<>{I};"
|
||||
"KH<>{KH};"
|
||||
"K<>{K};"
|
||||
"L<>{L};"
|
||||
"M<>{M};"
|
||||
"N<>{N};"
|
||||
"O<>{O};"
|
||||
"P<>{P};"
|
||||
"R<>{R};"
|
||||
"SHCH<>{SHCH};"
|
||||
"SH>{SH};" //+ "SH<{SH};"
|
||||
"{S-hacek}<>{SH};"
|
||||
"S<>{S};"
|
||||
"TS<>{TS};"
|
||||
"T<>{T};"
|
||||
"U<>{U};"
|
||||
"V<>{V};"
|
||||
//\u00cc\u00c0\u00c8\u00d2\u00d9
|
||||
"YE>{YE};" //+ "YE<{YE};"
|
||||
"\u00c8<>{YE};"
|
||||
"YO>{YO};" //+ "YO<{YO};"
|
||||
"\u00d2<>{YO};"
|
||||
"YU>{YU};" //+ "YU<{YU};"
|
||||
"\u00d9<>{YU};"
|
||||
"YA>{YA};" //+ "YA<{YA};"
|
||||
"\u00c0<>{YA};"
|
||||
"Y<>{Y};"
|
||||
"ZH<>{ZH};"
|
||||
"Z<>{Z};"
|
||||
|
||||
"H<>{HARD};"
|
||||
"\u0178<>{SOFT};"
|
||||
|
||||
// Non-russian
|
||||
|
||||
"J<>{J};"
|
||||
|
||||
// variant spellings in English
|
||||
|
||||
"C({csoft}>{S};"
|
||||
"C>{K};"
|
||||
|
||||
// #############################################
|
||||
// Duplicated Rules
|
||||
// Copy and lowercase the above rules
|
||||
// #############################################
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"shtch>{shch};"
|
||||
"tch>{ch};"
|
||||
"th>{z};"
|
||||
"q>{k};"
|
||||
"wh>{v};"
|
||||
"w>{v};"
|
||||
"x>{k}{s};" //+ "x<{k}{s};"
|
||||
|
||||
// separate letters that would otherwise join
|
||||
|
||||
"sh''<{sh}({becomes_c};"
|
||||
"t''<{t}({becomes_s};"
|
||||
|
||||
"k''<{k}({becomes_h};"
|
||||
"s''<{s}({becomes_h};"
|
||||
"t''<{t}({becomes_h};"
|
||||
"z''<{z}({becomes_h};"
|
||||
|
||||
"y''<{y}({becomes_vowel};"
|
||||
|
||||
// main letters
|
||||
|
||||
"a<>{a};"
|
||||
"b<>{b};"
|
||||
"ch<>{ch};"
|
||||
"d<>{d};"
|
||||
"e<>{e};"
|
||||
"f<>{f};"
|
||||
"g<>{g};"
|
||||
"\u00ec<>{yi};"
|
||||
"i<>{i};"
|
||||
"kh<>{kh};"
|
||||
"k<>{k};"
|
||||
"l<>{l};"
|
||||
"m<>{m};"
|
||||
"n<>{n};"
|
||||
"o<>{o};"
|
||||
"p<>{p};"
|
||||
"r<>{r};"
|
||||
"shch<>{shch};"
|
||||
"sh>{sh};" //+ "sh<{sh};"
|
||||
"{s-hacek}<>{sh};"
|
||||
"s<>{s};"
|
||||
"ts<>{ts};"
|
||||
"t<>{t};"
|
||||
"u<>{u};"
|
||||
"v<>{v};"
|
||||
//\u00ec\u00e0\u00e8\u00f2\u00f9
|
||||
"ye>{ye};" //+ "ye<{ye};"
|
||||
"\u00e8<>{ye};"
|
||||
"yo>{yo};" //+ "yo<{yo};"
|
||||
"\u00f2<>{yo};"
|
||||
"yu>{yu};" //+ "yu<{yu};"
|
||||
"\u00f9<>{yu};"
|
||||
"ya>{ya};" //+ "ya<{ya};"
|
||||
"\u00e0<>{ya};"
|
||||
"y<>{y};"
|
||||
"zh<>{zh};"
|
||||
"z<>{z};"
|
||||
|
||||
"h<>{hard};"
|
||||
"\u00ff<>{soft};"
|
||||
|
||||
// non-russian
|
||||
|
||||
"j<>{j};"
|
||||
|
||||
// variant spellings in english
|
||||
|
||||
"c({csoft}>{s};"
|
||||
"c>{k};"
|
||||
|
||||
|
||||
|
||||
// #############################################
|
||||
// End of Duplicated Rules
|
||||
// #############################################
|
||||
|
||||
//generally the last rule
|
||||
"''>\n"
|
||||
|
||||
//now Russian to English
|
||||
|
||||
"Y''<{cyY}[{cyA}\n"
|
||||
"Y''<{cyY}[{cyE}\n"
|
||||
"Y''<{cyY}[{cyI}\n"
|
||||
"Y''<{cyY}[{cyO}\n"
|
||||
"Y''<{cyY}[{cyU}\n"
|
||||
"Y''<{cyY}[{cya}\n"
|
||||
"Y''<{cyY}[{cye}\n"
|
||||
"Y''<{cyY}[{cyi}\n"
|
||||
"Y''<{cyY}[{cyo}\n"
|
||||
"Y''<{cyY}[{cyu}\n"
|
||||
"A<{cyA}\n"
|
||||
"B<{cyBe}\n"
|
||||
"J<{cyDe}{cyZhe}\n"
|
||||
"J<{cyDe}{cyzhe}\n"
|
||||
"D<{cyDe}\n"
|
||||
"V<{cyVe}\n"
|
||||
"G<{cyGe}\n"
|
||||
"Zh<{cyZhe}[{lower}\n"
|
||||
"ZH<{cyZhe}\n"
|
||||
"Z''<{cyZe}[{cyHard}\n"
|
||||
"Z''<{cyZe}[{cyhard}\n"
|
||||
"Z<{cyZe}\n"
|
||||
"Ye<{cyYe}[{lower}\n"
|
||||
"YE<{cyYe}\n"
|
||||
"Yo<{cyYo}[{lower}\n"
|
||||
"YO<{cyYo}\n"
|
||||
"Yu<{cyYu}[{lower}\n"
|
||||
"YU<{cyYu}\n"
|
||||
"Ya<{cyYa}[{lower}\n"
|
||||
"YA<{cyYa}\n"
|
||||
"Yi<{cyYi}[{lower}\n"
|
||||
"YI<{cyYi}\n"
|
||||
"Y<{cyY}\n"
|
||||
"Kh<{cyKhe}[{lower}\n"
|
||||
"KH<{cyKhe}\n"
|
||||
"K''<{cyKe}[{cyHard}\n"
|
||||
"K''<{cyKe}[{cyhard}\n"
|
||||
"X<{cyKe}{cySe}\n"
|
||||
"X<{cyKe}{cyse}\n"
|
||||
"K<{cyKe}\n"
|
||||
"L<{cyLe}\n"
|
||||
"M<{cyMe}\n"
|
||||
"N<{cyNe}\n"
|
||||
"O<{cyO}\n"
|
||||
"P<{cyPe}\n"
|
||||
|
||||
"R<{cyRe}\n"
|
||||
"Shch<{cyShche}[{lower}\n"
|
||||
"SHCH<{cyShche}\n"
|
||||
"Sh''<{cyShe}[{cyche}\n"
|
||||
"SH''<{cyShe}[{cyChe}\n"
|
||||
"Sh<{cyShe}[{lower}\n"
|
||||
"SH<{cyShe}\n"
|
||||
"S''<{cySe}[{cyHard}\n"
|
||||
"S''<{cySe}[{cyhard}\n"
|
||||
"S<{cySe}\n"
|
||||
"Ts<{cyTse}[{lower}\n"
|
||||
"TS<{cyTse}\n"
|
||||
"T''<{cyTe}[{cySe}\n"
|
||||
"T''<{cyTe}[{cyse}\n"
|
||||
"T''<{cyTe}[{cyHard}\n"
|
||||
"T''<{cyTe}[{cyhard}\n"
|
||||
"T<{cyTe}\n"
|
||||
"U<{cyU}\n"
|
||||
"F<{cyFe}\n"
|
||||
"Ch<{cyChe}[{lower}\n"
|
||||
"CH<{cyChe}\n"
|
||||
"H<{cyHard}\n"
|
||||
"I''<{cyI}[{cyI}\n"
|
||||
"I''<{cyI}[{cyi}\n"
|
||||
"I<{cyI}\n"
|
||||
"Ii<{cySoft}[{lower}\n"
|
||||
"II<{cySoft}\n"
|
||||
"E<{cyE}\n"
|
||||
|
||||
//lowercase
|
||||
"y''<{cyy}[{cya}\n"
|
||||
"y''<{cyy}[{cye}\n"
|
||||
"y''<{cyy}[{cyi}\n"
|
||||
"y''<{cyy}[{cyo}\n"
|
||||
"y''<{cyy}[{cyu}\n"
|
||||
"y''<{cyy}[{cyA}\n"
|
||||
"y''<{cyy}[{cyE}\n"
|
||||
"y''<{cyy}[{cyI}\n"
|
||||
"y''<{cyy}[{cyO}\n"
|
||||
"y''<{cyy}[{cyU}\n"
|
||||
"a<{cya}\n"
|
||||
"b<{cybe}\n"
|
||||
"j<{cyde}{cyzhe}\n"
|
||||
"j<{cyde}{cyZhe}\n"
|
||||
"d<{cyde}\n"
|
||||
"v<{cyve}\n"
|
||||
"g<{cyge}\n"
|
||||
"zh<{cyzhe}\n"
|
||||
"z''<{cyze}[{cyhard}\n"
|
||||
"z''<{cyze}[{cyHard}\n"
|
||||
"z<{cyze}\n"
|
||||
"ye<{cyye}\n"
|
||||
"yo<{cyyo}\n"
|
||||
"yu<{cyyu}\n"
|
||||
"ya<{cyya}\n"
|
||||
"yi<{cyyi}\n"
|
||||
"y<{cyy}\n"
|
||||
"kh<{cykhe}\n"
|
||||
"k''<{cyke}[{cyhard}\n"
|
||||
"k''<{cyke}[{cyHard}\n"
|
||||
"x<{cyke}{cyse}\n"
|
||||
"x<{cyke}{cySe}\n"
|
||||
"k<{cyke}\n"
|
||||
"l<{cyle}\n"
|
||||
"m<{cyme}\n"
|
||||
"n<{cyne}\n"
|
||||
"o<{cyo}\n"
|
||||
"p<{cype}\n"
|
||||
|
||||
"r<{cyre}\n"
|
||||
"shch<{cyshche}\n"
|
||||
"sh''<{cyshe}[{cyche}\n"
|
||||
"sh''<{cyshe}[{cyChe}\n"
|
||||
"sh<{cyshe}\n"
|
||||
"s''<{cyse}[{cyhard}\n"
|
||||
"s''<{cyse}[{cyHard}\n"
|
||||
"s<{cyse}\n"
|
||||
"ts<{cytse}\n"
|
||||
"t''<{cyte}[{cyse}\n"
|
||||
"t''<{cyte}[{cySe}\n"
|
||||
"t''<{cyte}[{cyhard}\n"
|
||||
"t''<{cyte}[{cyHard}\n"
|
||||
"t<{cyte}\n"
|
||||
"u<{cyu}\n"
|
||||
"f<{cyfe}\n"
|
||||
"ch<{cyche}\n"
|
||||
"h<{cyhard}\n"
|
||||
"i''<{cyi}[{cyI}\n"
|
||||
"i''<{cyi}[{cyi}\n"
|
||||
"i<{cyi}\n"
|
||||
"ii<{cysoft}\n"
|
||||
"e<{cye}\n"
|
||||
|
||||
//generally the last rule
|
||||
"''>\n"
|
||||
"''>;"
|
||||
//the end
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,73 +11,73 @@
|
|||
quotes {
|
||||
Rule {
|
||||
// Rewritten using character codes [LIU]
|
||||
"white=[[:Zs:][:Zl:][:Zp:]]\n"
|
||||
"black=[^[:Zs:][:Zl:][:Zp:]]\n"
|
||||
"open=[[:Ps:]]\n"
|
||||
"dquote=\"\n"
|
||||
"white=[[:Zs:][:Zl:][:Zp:]];"
|
||||
"black=[^{white}];"
|
||||
"open=[:Ps:];"
|
||||
"dquote=\";"
|
||||
|
||||
"lAng=\u3008\n"
|
||||
"ldAng=\u300A\n"
|
||||
"lBrk='['\n"
|
||||
"lBrc='{'\n"
|
||||
"lAng=\u3008;"
|
||||
"ldAng=\u300A;"
|
||||
"lBrk='[';"
|
||||
"lBrc='{';"
|
||||
|
||||
"lquote=\u2018\n"
|
||||
"rquote=\u2019\n"
|
||||
"ldquote=\u201C\n"
|
||||
"rdquote=\u201D\n"
|
||||
"lquote=\u2018;"
|
||||
"rquote=\u2019;"
|
||||
"ldquote=\u201C;"
|
||||
"rdquote=\u201D;"
|
||||
|
||||
"ldguill=\u00AB\n"
|
||||
"rdguill=\u00BB\n"
|
||||
"lguill=\u2039\n"
|
||||
"rguill=\u203A\n"
|
||||
"ldguill=\u00AB;"
|
||||
"rdguill=\u00BB;"
|
||||
"lguill=\u2039;"
|
||||
"rguill=\u203A;"
|
||||
|
||||
"mdash=\u2014\n"
|
||||
"mdash=\u2014;"
|
||||
|
||||
//#######################################
|
||||
// Conversions from input
|
||||
//#######################################
|
||||
|
||||
// join single quotes
|
||||
"{lquote}''>{ldquote}\n"
|
||||
"{lquote}{lquote}>{ldquote}\n"
|
||||
"{rquote}''>{rdquote}\n"
|
||||
"{rquote}{rquote}>{rdquote}\n"
|
||||
"{lquote}''>{ldquote};"
|
||||
"{lquote}{lquote}>{ldquote};"
|
||||
"{rquote}''>{rdquote};"
|
||||
"{rquote}{rquote}>{rdquote};"
|
||||
|
||||
//smart single quotes
|
||||
"{white}]''>{lquote}\n"
|
||||
"{open}]''>{lquote}\n"
|
||||
"{black}]''>{rquote}\n"
|
||||
"''>{lquote}\n"
|
||||
"{white})''>{lquote};"
|
||||
"{open})''>{lquote};"
|
||||
"{black})''>{rquote};"
|
||||
"''>{lquote};"
|
||||
|
||||
//smart doubles
|
||||
"{white}]{dquote}>{ldquote}\n"
|
||||
"{open}]{dquote}>{ldquote}\n"
|
||||
"{black}]{dquote}>{rdquote}\n"
|
||||
"{dquote}>{ldquote}\n"
|
||||
"{white}){dquote}>{ldquote};"
|
||||
"{open}){dquote}>{ldquote};"
|
||||
"{black}){dquote}>{rdquote};"
|
||||
"{dquote}>{ldquote};"
|
||||
|
||||
// join single guillemets
|
||||
"{rguill}{rguill}>{rdguill}\n"
|
||||
"'>>'>{rdguill}\n"
|
||||
"{lguill}{lguill}>{ldguill}\n"
|
||||
"'<<'>{ldguill}\n"
|
||||
"{rguill}{rguill}>{rdguill};"
|
||||
"'>>'>{rdguill};"
|
||||
"{lguill}{lguill}>{ldguill};"
|
||||
"'<<'>{ldguill};"
|
||||
|
||||
// prevent double spaces
|
||||
" ] >\n"
|
||||
"\\ )\\ >;"
|
||||
|
||||
// join hyphens into dash
|
||||
"-->{mdash}\n"
|
||||
"-->{mdash};"
|
||||
|
||||
//#######################################
|
||||
// Conversions back to input
|
||||
//#######################################
|
||||
|
||||
//smart quotes
|
||||
"''<{lquote}\n"
|
||||
"''<{rquote}\n"
|
||||
"{dquote}<{ldquote}\n"
|
||||
"{dquote}<{rdquote}\n"
|
||||
|
||||
"''<{lquote};"
|
||||
"''<{rquote};"
|
||||
"{dquote}<{ldquote};"
|
||||
"{dquote}<{rdquote};"
|
||||
|
||||
//hyphens
|
||||
"--<{mdash}\n"
|
||||
"--<{mdash};"
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue