mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
Load RuleBasedTransliterator files from UTF8 files instead of ResourceBundles
X-SVN-Rev: 3550
This commit is contained in:
parent
1ac04345f0
commit
189ab5836c
40 changed files with 40290 additions and 70 deletions
33
.gitattributes
vendored
33
.gitattributes
vendored
|
@ -91,6 +91,39 @@ icu4j/src/com/ibm/icu/dev/data/unicode/Draft-TestSuite.txt -text
|
|||
icu4j/src/com/ibm/icu/impl/data/thai_dict -text
|
||||
icu4j/src/com/ibm/icu/text/unames.dat -text
|
||||
icu4j/src/com/ibm/icu/text/uprops.dat -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Bengali_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Devanagari_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Gujarati_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Gurmukhi_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Han_Pinyin.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Hiragana_Katakana.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Bengali.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Devanagari.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gujarati.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gurmukhi.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Kannada.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Malayalam.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Oriya.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Tamil.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Telugu.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_English.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_OnRomaji.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Kannada_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_KeyboardEscape_Latin1.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Arabic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Cyrillic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Devanagari.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Greek.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Hebrew.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Kana.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Malayalam_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Oriya_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_StraightQuotes_CurlyQuotes.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Tamil_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_Telugu_InterIndic.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/Transliterator_UnicodeName_UnicodeChar.utf8.txt -text
|
||||
icu4j/src/com/ibm/text/resources/thai_dict -text
|
||||
icu4j/src/data/holidays_jp.ucs -text
|
||||
icu4j/src/data/thai6.ucs -text
|
||||
|
|
88
icu4j/src/com/ibm/icu/impl/data/ResourceReader.java
Executable file
88
icu4j/src/com/ibm/icu/impl/data/ResourceReader.java
Executable file
|
@ -0,0 +1,88 @@
|
|||
package com.ibm.text.resources;
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* A reader for text resource data in the current package. The
|
||||
* resource data is loaded through the class loader, so it will
|
||||
* typically be a file in the same directory as the *.class files, or
|
||||
* a file within a JAR file in the corresponding subdirectory. The
|
||||
* file must be a text file in one of the supported encoding; when the
|
||||
* resource is opened by constructing a <code>ResourceReader</code>
|
||||
* object the encoding is specified.
|
||||
*
|
||||
* <p>Although this class has a public API, it is designed for
|
||||
* internal use by classes in the <code>com.ibm.text</code> package.
|
||||
*
|
||||
* @author Alan Liu
|
||||
*/
|
||||
public class ResourceReader {
|
||||
private BufferedReader reader;
|
||||
private String resourceName;
|
||||
private String encoding;
|
||||
private boolean isReset; // TRUE if we are at the start of the file
|
||||
|
||||
/**
|
||||
* Construct a reader object for the text file of the given name
|
||||
* in this package, in the given encoding.
|
||||
* @param resourceName thqe name of the text file located in this
|
||||
* package
|
||||
* @param encoding the encoding of the text file; if unsupported
|
||||
* an exception is thrown
|
||||
* @exception UnsupportedEncodingException if
|
||||
* <code>encoding</code> is not supported by the JDK.
|
||||
*/
|
||||
public ResourceReader(String resourceName, String encoding)
|
||||
throws UnsupportedEncodingException {
|
||||
|
||||
this.resourceName = resourceName;
|
||||
this.encoding = encoding;
|
||||
isReset = false;
|
||||
_reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and return the next line of the file or <code>null</code>
|
||||
* if the end of the file has been reached.
|
||||
*/
|
||||
public String readLine() throws IOException {
|
||||
isReset = false;
|
||||
return reader.readLine();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset this reader so that the next call to
|
||||
* <code>readLine()</code> returns the first line of the file
|
||||
* again. This is a somewhat expensive call, however, calling
|
||||
* <code>reset()</code> after calling it the first time does
|
||||
* nothing if <code>readLine()</code> has not been called in
|
||||
* between.
|
||||
*/
|
||||
public void reset() {
|
||||
try {
|
||||
_reset();
|
||||
} catch (UnsupportedEncodingException e) {}
|
||||
// We swallow this exception, if there is one. If the encoding is
|
||||
// invalid, the constructor will have thrown this exception already and
|
||||
// the caller shouldn't use the object afterwards.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset to the start by reconstructing the stream and readers.
|
||||
* We could also use mark() and reset() on the stream or reader,
|
||||
* but that would cause them to keep the stream data around in
|
||||
* memory. We don't want that because some of the resource files
|
||||
* are large, e.g., 400k.
|
||||
*/
|
||||
private void _reset() throws UnsupportedEncodingException {
|
||||
if (isReset) {
|
||||
return;
|
||||
}
|
||||
InputStream is = getClass().getResourceAsStream(resourceName);
|
||||
if (is == null) {
|
||||
throw new IllegalArgumentException("Can't open " + resourceName);
|
||||
}
|
||||
InputStreamReader isr = new InputStreamReader(is, encoding);
|
||||
reader = new BufferedReader(isr);
|
||||
isReset = true;
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2000/08/31 17:11:42 $
|
||||
* $Revision: 1.39 $
|
||||
* $Date: 2001/02/03 00:46:21 $
|
||||
* $Revision: 1.40 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -16,6 +16,7 @@ import java.util.Hashtable;
|
|||
import java.util.Vector;
|
||||
import java.text.ParsePosition;
|
||||
import com.ibm.util.Utility;
|
||||
import com.ibm.text.resources.ResourceReader;
|
||||
|
||||
/**
|
||||
* <code>RuleBasedTransliterator</code> is a transliterator
|
||||
|
@ -278,7 +279,7 @@ import com.ibm.util.Utility;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.39 $ $Date: 2000/08/31 17:11:42 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.40 $ $Date: 2001/02/03 00:46:21 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
|
@ -329,6 +330,10 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
return parse(new String[] { rules }, direction);
|
||||
}
|
||||
|
||||
static Data parse(ResourceReader rules, int direction) {
|
||||
return new Parser(rules, direction).getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
@ -597,6 +602,86 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
private static final char SEGMENT_OPEN = '(';
|
||||
private static final char SEGMENT_CLOSE = ')';
|
||||
|
||||
/**
|
||||
* A private abstract class representing the interface to rule
|
||||
* source code that is broken up into lines. Handles the
|
||||
* folding of lines terminated by a backslash. This folding
|
||||
* is limited; it does not account for comments, quotes, or
|
||||
* escapes, so its use to be limited.
|
||||
*/
|
||||
private abstract class RuleBody {
|
||||
|
||||
/**
|
||||
* Retrieve the next line of the source, or return null if
|
||||
* none. Folds lines terminated by a backslash into the
|
||||
* next line, without regard for comments, quotes, or
|
||||
* escapes.
|
||||
*/
|
||||
String nextLine() {
|
||||
String s = handleNextLine();
|
||||
if (s != null &&
|
||||
s.length() > 0 &&
|
||||
s.charAt(s.length() - 1) == '\\') {
|
||||
|
||||
StringBuffer b = new StringBuffer(s);
|
||||
do {
|
||||
b.deleteCharAt(b.length()-1);
|
||||
s = handleNextLine();
|
||||
if (s == null) {
|
||||
break;
|
||||
}
|
||||
b.append(s);
|
||||
} while (s.length() > 0 &&
|
||||
s.charAt(s.length() - 1) == '\\');
|
||||
|
||||
s = b.toString();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset to the first line of the source.
|
||||
*/
|
||||
abstract void reset();
|
||||
|
||||
/**
|
||||
* Subclass method to return the next line of the source.
|
||||
*/
|
||||
abstract String handleNextLine();
|
||||
};
|
||||
|
||||
/**
|
||||
* RuleBody subclass for a String[] array.
|
||||
*/
|
||||
private class RuleArray extends RuleBody {
|
||||
String[] array;
|
||||
int i;
|
||||
public RuleArray(String[] array) { this.array = array; i = 0; }
|
||||
public String handleNextLine() {
|
||||
return (i < array.length) ? array[i++] : null;
|
||||
}
|
||||
public void reset() {
|
||||
i = 0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* RuleBody subclass for a ResourceReader.
|
||||
*/
|
||||
private class RuleReader extends RuleBody {
|
||||
ResourceReader reader;
|
||||
public RuleReader(ResourceReader reader) { this.reader = reader; }
|
||||
public String handleNextLine() {
|
||||
try {
|
||||
return reader.readLine();
|
||||
} catch (java.io.IOException e) {}
|
||||
return null;
|
||||
}
|
||||
public void reset() {
|
||||
reader.reset();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @param rules list of rules, separated by semicolon characters
|
||||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
|
@ -605,7 +690,16 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
public Parser(String[] ruleArray, int direction) {
|
||||
this.direction = direction;
|
||||
data = new Data();
|
||||
parseRules(ruleArray);
|
||||
parseRules(new RuleArray(ruleArray));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param rules resource reader for the rules
|
||||
*/
|
||||
public Parser(ResourceReader rules, int direction) {
|
||||
this.direction = direction;
|
||||
data = new Data();
|
||||
parseRules(new RuleReader(rules));
|
||||
}
|
||||
|
||||
public Data getData() {
|
||||
|
@ -622,7 +716,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
* rules
|
||||
*/
|
||||
private void parseRules(String[] ruleArray) {
|
||||
private void parseRules(RuleBody ruleArray) {
|
||||
determineVariableRange(ruleArray);
|
||||
setVariablesVector = new Vector();
|
||||
parseData = new ParseData();
|
||||
|
@ -630,9 +724,13 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
StringBuffer errors = null;
|
||||
int errorCount = 0;
|
||||
|
||||
ruleArray.reset();
|
||||
main:
|
||||
for (int i=0; i<ruleArray.length; ++i) {
|
||||
String rule = ruleArray[i];
|
||||
for (;;) {
|
||||
String rule = ruleArray.nextLine();
|
||||
if (rule == null) {
|
||||
break;
|
||||
}
|
||||
int pos = 0;
|
||||
int limit = rule.length();
|
||||
while (pos < limit) {
|
||||
|
@ -1192,7 +1290,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* When done, everything not in the hash is available for use. In practice,
|
||||
* this method may employ some other algorithm for improved speed.
|
||||
*/
|
||||
private final void determineVariableRange(String[] ruleArray) {
|
||||
private final void determineVariableRange(RuleBody ruleArray) {
|
||||
// As an initial implementation, we just run through all the
|
||||
// characters, ignoring any quoting. This works since the quote
|
||||
// mechanisms are outside the private use area.
|
||||
|
@ -1309,12 +1407,16 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* characters in this range, then this range itself is
|
||||
* returned.
|
||||
*/
|
||||
Range largestUnusedSubrange(String[] strings) {
|
||||
Range largestUnusedSubrange(RuleBody strings) {
|
||||
Vector v = new Vector(1);
|
||||
v.addElement(clone());
|
||||
|
||||
for (int k=0; k<strings.length; ++k) {
|
||||
String str = strings[k];
|
||||
strings.reset();
|
||||
for (;;) {
|
||||
String str = strings.nextLine();
|
||||
if (str == null) {
|
||||
break;
|
||||
}
|
||||
int n = str.length();
|
||||
for (int i=0; i<n; ++i) {
|
||||
char c = str.charAt(i);
|
||||
|
@ -1349,6 +1451,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.40 2001/02/03 00:46:21 alan4j
|
||||
* Load RuleBasedTransliterator files from UTF8 files instead of ResourceBundles
|
||||
*
|
||||
* Revision 1.39 2000/08/31 17:11:42 alan4j
|
||||
* Implement anchors.
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
||||
* $Date: 2000/10/06 23:07:40 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2001/02/03 00:46:21 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -14,6 +14,8 @@ package com.ibm.text;
|
|||
|
||||
import java.util.*;
|
||||
import java.text.MessageFormat;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import com.ibm.text.resources.ResourceReader;
|
||||
|
||||
/**
|
||||
* <code>Transliterator</code> is an abstract class that
|
||||
|
@ -210,7 +212,7 @@ import java.text.MessageFormat;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.22 $ $Date: 2000/10/06 23:07:40 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.23 $ $Date: 2001/02/03 00:46:21 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
|
@ -418,6 +420,16 @@ public abstract class Transliterator {
|
|||
*/
|
||||
private static final String RB_RULE = "Rule";
|
||||
|
||||
/**
|
||||
* Prefix string to identify UTF8 RuleBasedTransliterator resource.
|
||||
*/
|
||||
private static final String RBT_UTF8_PREFIX = "Transliterator_";
|
||||
|
||||
/**
|
||||
* Suffix string to identify UTF8 RuleBasedTransliterator resource.
|
||||
*/
|
||||
private static final String RBT_UTF8_SUFFIX = ".utf8.txt";
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
|
@ -873,37 +885,60 @@ public abstract class Transliterator {
|
|||
} else {
|
||||
synchronized (cache) {
|
||||
boolean isReverse = (obj == REVERSE_RULE_BASED_PLACEHOLDER);
|
||||
String resourceName = RB_RULE_BASED_PREFIX;
|
||||
String resourceName = ID;
|
||||
int i = ID.indexOf('-');
|
||||
if (i < 0) {
|
||||
resourceName += ID;
|
||||
} else {
|
||||
if (i > 0) {
|
||||
String IDLeft = ID.substring(0, i);
|
||||
String IDRight = ID.substring(i+1);
|
||||
resourceName += isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
|
||||
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
|
||||
resourceName = isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
|
||||
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
|
||||
}
|
||||
|
||||
ResourceReader r = null;
|
||||
try {
|
||||
ResourceBundle resource = ResourceBundle.getBundle(resourceName);
|
||||
|
||||
// We allow the resource bundle to contain either an array
|
||||
// of rules, or a single rule string.
|
||||
String[] ruleArray;
|
||||
try {
|
||||
ruleArray = resource.getStringArray(RB_RULE);
|
||||
} catch (Exception e) {
|
||||
// This is a ClassCastException under JDK 1.1.8
|
||||
ruleArray = new String[] { resource.getString(RB_RULE) };
|
||||
}
|
||||
|
||||
data = RuleBasedTransliterator.parse(ruleArray,
|
||||
r = new ResourceReader(RBT_UTF8_PREFIX + resourceName + RBT_UTF8_SUFFIX,
|
||||
"UTF8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// This should never happen; UTF8 is always supported
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Can't load UTF8 file
|
||||
}
|
||||
|
||||
if (r != null) {
|
||||
data = RuleBasedTransliterator.parse(r,
|
||||
isReverse
|
||||
? RuleBasedTransliterator.REVERSE
|
||||
: RuleBasedTransliterator.FORWARD);
|
||||
|
||||
|
||||
cache.put(ID, data);
|
||||
// Fall through to construct transliterator from Data object.
|
||||
} catch (MissingResourceException e) {}
|
||||
} else {
|
||||
// Unable to load the UTF8 file; try the resource
|
||||
// bundles. Eventually, when we phase support for this
|
||||
// out, we can delete this clause. Leave it in for now.
|
||||
try {
|
||||
ResourceBundle resource = ResourceBundle.getBundle(RB_RULE_BASED_PREFIX +
|
||||
resourceName);
|
||||
|
||||
// We allow the resource bundle to contain either an array
|
||||
// of rules, or a single rule string.
|
||||
String[] ruleArray;
|
||||
try {
|
||||
ruleArray = resource.getStringArray(RB_RULE);
|
||||
} catch (Exception e) {
|
||||
// This is a ClassCastException under JDK 1.1.8
|
||||
ruleArray = new String[] { resource.getString(RB_RULE) };
|
||||
}
|
||||
|
||||
data = RuleBasedTransliterator.parse(ruleArray,
|
||||
isReverse
|
||||
? RuleBasedTransliterator.REVERSE
|
||||
: RuleBasedTransliterator.FORWARD);
|
||||
|
||||
cache.put(ID, data);
|
||||
// Fall through to construct transliterator from Data object.
|
||||
} catch (MissingResourceException e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
|
||||
* $Date: 2000/08/31 17:11:42 $
|
||||
* $Revision: 1.39 $
|
||||
* $Date: 2001/02/03 00:46:21 $
|
||||
* $Revision: 1.40 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -16,6 +16,7 @@ import java.util.Hashtable;
|
|||
import java.util.Vector;
|
||||
import java.text.ParsePosition;
|
||||
import com.ibm.util.Utility;
|
||||
import com.ibm.text.resources.ResourceReader;
|
||||
|
||||
/**
|
||||
* <code>RuleBasedTransliterator</code> is a transliterator
|
||||
|
@ -278,7 +279,7 @@ import com.ibm.util.Utility;
|
|||
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.39 $ $Date: 2000/08/31 17:11:42 $
|
||||
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.40 $ $Date: 2001/02/03 00:46:21 $
|
||||
*/
|
||||
public class RuleBasedTransliterator extends Transliterator {
|
||||
|
||||
|
@ -329,6 +330,10 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
return parse(new String[] { rules }, direction);
|
||||
}
|
||||
|
||||
static Data parse(ResourceReader rules, int direction) {
|
||||
return new Parser(rules, direction).getData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements {@link Transliterator#handleTransliterate}.
|
||||
*/
|
||||
|
@ -597,6 +602,86 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
private static final char SEGMENT_OPEN = '(';
|
||||
private static final char SEGMENT_CLOSE = ')';
|
||||
|
||||
/**
|
||||
* A private abstract class representing the interface to rule
|
||||
* source code that is broken up into lines. Handles the
|
||||
* folding of lines terminated by a backslash. This folding
|
||||
* is limited; it does not account for comments, quotes, or
|
||||
* escapes, so its use to be limited.
|
||||
*/
|
||||
private abstract class RuleBody {
|
||||
|
||||
/**
|
||||
* Retrieve the next line of the source, or return null if
|
||||
* none. Folds lines terminated by a backslash into the
|
||||
* next line, without regard for comments, quotes, or
|
||||
* escapes.
|
||||
*/
|
||||
String nextLine() {
|
||||
String s = handleNextLine();
|
||||
if (s != null &&
|
||||
s.length() > 0 &&
|
||||
s.charAt(s.length() - 1) == '\\') {
|
||||
|
||||
StringBuffer b = new StringBuffer(s);
|
||||
do {
|
||||
b.deleteCharAt(b.length()-1);
|
||||
s = handleNextLine();
|
||||
if (s == null) {
|
||||
break;
|
||||
}
|
||||
b.append(s);
|
||||
} while (s.length() > 0 &&
|
||||
s.charAt(s.length() - 1) == '\\');
|
||||
|
||||
s = b.toString();
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset to the first line of the source.
|
||||
*/
|
||||
abstract void reset();
|
||||
|
||||
/**
|
||||
* Subclass method to return the next line of the source.
|
||||
*/
|
||||
abstract String handleNextLine();
|
||||
};
|
||||
|
||||
/**
|
||||
* RuleBody subclass for a String[] array.
|
||||
*/
|
||||
private class RuleArray extends RuleBody {
|
||||
String[] array;
|
||||
int i;
|
||||
public RuleArray(String[] array) { this.array = array; i = 0; }
|
||||
public String handleNextLine() {
|
||||
return (i < array.length) ? array[i++] : null;
|
||||
}
|
||||
public void reset() {
|
||||
i = 0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* RuleBody subclass for a ResourceReader.
|
||||
*/
|
||||
private class RuleReader extends RuleBody {
|
||||
ResourceReader reader;
|
||||
public RuleReader(ResourceReader reader) { this.reader = reader; }
|
||||
public String handleNextLine() {
|
||||
try {
|
||||
return reader.readLine();
|
||||
} catch (java.io.IOException e) {}
|
||||
return null;
|
||||
}
|
||||
public void reset() {
|
||||
reader.reset();
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @param rules list of rules, separated by semicolon characters
|
||||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
|
@ -605,7 +690,16 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
public Parser(String[] ruleArray, int direction) {
|
||||
this.direction = direction;
|
||||
data = new Data();
|
||||
parseRules(ruleArray);
|
||||
parseRules(new RuleArray(ruleArray));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param rules resource reader for the rules
|
||||
*/
|
||||
public Parser(ResourceReader rules, int direction) {
|
||||
this.direction = direction;
|
||||
data = new Data();
|
||||
parseRules(new RuleReader(rules));
|
||||
}
|
||||
|
||||
public Data getData() {
|
||||
|
@ -622,7 +716,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* @exception IllegalArgumentException if there is a syntax error in the
|
||||
* rules
|
||||
*/
|
||||
private void parseRules(String[] ruleArray) {
|
||||
private void parseRules(RuleBody ruleArray) {
|
||||
determineVariableRange(ruleArray);
|
||||
setVariablesVector = new Vector();
|
||||
parseData = new ParseData();
|
||||
|
@ -630,9 +724,13 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
StringBuffer errors = null;
|
||||
int errorCount = 0;
|
||||
|
||||
ruleArray.reset();
|
||||
main:
|
||||
for (int i=0; i<ruleArray.length; ++i) {
|
||||
String rule = ruleArray[i];
|
||||
for (;;) {
|
||||
String rule = ruleArray.nextLine();
|
||||
if (rule == null) {
|
||||
break;
|
||||
}
|
||||
int pos = 0;
|
||||
int limit = rule.length();
|
||||
while (pos < limit) {
|
||||
|
@ -1192,7 +1290,7 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* When done, everything not in the hash is available for use. In practice,
|
||||
* this method may employ some other algorithm for improved speed.
|
||||
*/
|
||||
private final void determineVariableRange(String[] ruleArray) {
|
||||
private final void determineVariableRange(RuleBody ruleArray) {
|
||||
// As an initial implementation, we just run through all the
|
||||
// characters, ignoring any quoting. This works since the quote
|
||||
// mechanisms are outside the private use area.
|
||||
|
@ -1309,12 +1407,16 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
* characters in this range, then this range itself is
|
||||
* returned.
|
||||
*/
|
||||
Range largestUnusedSubrange(String[] strings) {
|
||||
Range largestUnusedSubrange(RuleBody strings) {
|
||||
Vector v = new Vector(1);
|
||||
v.addElement(clone());
|
||||
|
||||
for (int k=0; k<strings.length; ++k) {
|
||||
String str = strings[k];
|
||||
strings.reset();
|
||||
for (;;) {
|
||||
String str = strings.nextLine();
|
||||
if (str == null) {
|
||||
break;
|
||||
}
|
||||
int n = str.length();
|
||||
for (int i=0; i<n; ++i) {
|
||||
char c = str.charAt(i);
|
||||
|
@ -1349,6 +1451,9 @@ public class RuleBasedTransliterator extends Transliterator {
|
|||
|
||||
/**
|
||||
* $Log: RuleBasedTransliterator.java,v $
|
||||
* Revision 1.40 2001/02/03 00:46:21 alan4j
|
||||
* Load RuleBasedTransliterator files from UTF8 files instead of ResourceBundles
|
||||
*
|
||||
* Revision 1.39 2000/08/31 17:11:42 alan4j
|
||||
* Implement anchors.
|
||||
*
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
|
||||
* $Date: 2000/10/06 23:07:40 $
|
||||
* $Revision: 1.22 $
|
||||
* $Date: 2001/02/03 00:46:21 $
|
||||
* $Revision: 1.23 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -14,6 +14,8 @@ package com.ibm.text;
|
|||
|
||||
import java.util.*;
|
||||
import java.text.MessageFormat;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import com.ibm.text.resources.ResourceReader;
|
||||
|
||||
/**
|
||||
* <code>Transliterator</code> is an abstract class that
|
||||
|
@ -210,7 +212,7 @@ import java.text.MessageFormat;
|
|||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||
*
|
||||
* @author Alan Liu
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.22 $ $Date: 2000/10/06 23:07:40 $
|
||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.23 $ $Date: 2001/02/03 00:46:21 $
|
||||
*/
|
||||
public abstract class Transliterator {
|
||||
/**
|
||||
|
@ -418,6 +420,16 @@ public abstract class Transliterator {
|
|||
*/
|
||||
private static final String RB_RULE = "Rule";
|
||||
|
||||
/**
|
||||
* Prefix string to identify UTF8 RuleBasedTransliterator resource.
|
||||
*/
|
||||
private static final String RBT_UTF8_PREFIX = "Transliterator_";
|
||||
|
||||
/**
|
||||
* Suffix string to identify UTF8 RuleBasedTransliterator resource.
|
||||
*/
|
||||
private static final String RBT_UTF8_SUFFIX = ".utf8.txt";
|
||||
|
||||
private static final String COPYRIGHT =
|
||||
"\u00A9 IBM Corporation 1999. All rights reserved.";
|
||||
|
||||
|
@ -873,37 +885,60 @@ public abstract class Transliterator {
|
|||
} else {
|
||||
synchronized (cache) {
|
||||
boolean isReverse = (obj == REVERSE_RULE_BASED_PLACEHOLDER);
|
||||
String resourceName = RB_RULE_BASED_PREFIX;
|
||||
String resourceName = ID;
|
||||
int i = ID.indexOf('-');
|
||||
if (i < 0) {
|
||||
resourceName += ID;
|
||||
} else {
|
||||
if (i > 0) {
|
||||
String IDLeft = ID.substring(0, i);
|
||||
String IDRight = ID.substring(i+1);
|
||||
resourceName += isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
|
||||
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
|
||||
resourceName = isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
|
||||
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
|
||||
}
|
||||
|
||||
ResourceReader r = null;
|
||||
try {
|
||||
ResourceBundle resource = ResourceBundle.getBundle(resourceName);
|
||||
|
||||
// We allow the resource bundle to contain either an array
|
||||
// of rules, or a single rule string.
|
||||
String[] ruleArray;
|
||||
try {
|
||||
ruleArray = resource.getStringArray(RB_RULE);
|
||||
} catch (Exception e) {
|
||||
// This is a ClassCastException under JDK 1.1.8
|
||||
ruleArray = new String[] { resource.getString(RB_RULE) };
|
||||
}
|
||||
|
||||
data = RuleBasedTransliterator.parse(ruleArray,
|
||||
r = new ResourceReader(RBT_UTF8_PREFIX + resourceName + RBT_UTF8_SUFFIX,
|
||||
"UTF8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
// This should never happen; UTF8 is always supported
|
||||
} catch (IllegalArgumentException e2) {
|
||||
// Can't load UTF8 file
|
||||
}
|
||||
|
||||
if (r != null) {
|
||||
data = RuleBasedTransliterator.parse(r,
|
||||
isReverse
|
||||
? RuleBasedTransliterator.REVERSE
|
||||
: RuleBasedTransliterator.FORWARD);
|
||||
|
||||
|
||||
cache.put(ID, data);
|
||||
// Fall through to construct transliterator from Data object.
|
||||
} catch (MissingResourceException e) {}
|
||||
} else {
|
||||
// Unable to load the UTF8 file; try the resource
|
||||
// bundles. Eventually, when we phase support for this
|
||||
// out, we can delete this clause. Leave it in for now.
|
||||
try {
|
||||
ResourceBundle resource = ResourceBundle.getBundle(RB_RULE_BASED_PREFIX +
|
||||
resourceName);
|
||||
|
||||
// We allow the resource bundle to contain either an array
|
||||
// of rules, or a single rule string.
|
||||
String[] ruleArray;
|
||||
try {
|
||||
ruleArray = resource.getStringArray(RB_RULE);
|
||||
} catch (Exception e) {
|
||||
// This is a ClassCastException under JDK 1.1.8
|
||||
ruleArray = new String[] { resource.getString(RB_RULE) };
|
||||
}
|
||||
|
||||
data = RuleBasedTransliterator.parse(ruleArray,
|
||||
isReverse
|
||||
? RuleBasedTransliterator.REVERSE
|
||||
: RuleBasedTransliterator.FORWARD);
|
||||
|
||||
cache.put(ID, data);
|
||||
// Fall through to construct transliterator from Data object.
|
||||
} catch (MissingResourceException e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
88
icu4j/src/com/ibm/text/resources/ResourceReader.java
Executable file
88
icu4j/src/com/ibm/text/resources/ResourceReader.java
Executable file
|
@ -0,0 +1,88 @@
|
|||
package com.ibm.text.resources;
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* A reader for text resource data in the current package. The
|
||||
* resource data is loaded through the class loader, so it will
|
||||
* typically be a file in the same directory as the *.class files, or
|
||||
* a file within a JAR file in the corresponding subdirectory. The
|
||||
* file must be a text file in one of the supported encoding; when the
|
||||
* resource is opened by constructing a <code>ResourceReader</code>
|
||||
* object the encoding is specified.
|
||||
*
|
||||
* <p>Although this class has a public API, it is designed for
|
||||
* internal use by classes in the <code>com.ibm.text</code> package.
|
||||
*
|
||||
* @author Alan Liu
|
||||
*/
|
||||
public class ResourceReader {
|
||||
private BufferedReader reader;
|
||||
private String resourceName;
|
||||
private String encoding;
|
||||
private boolean isReset; // TRUE if we are at the start of the file
|
||||
|
||||
/**
|
||||
* Construct a reader object for the text file of the given name
|
||||
* in this package, in the given encoding.
|
||||
* @param resourceName thqe name of the text file located in this
|
||||
* package
|
||||
* @param encoding the encoding of the text file; if unsupported
|
||||
* an exception is thrown
|
||||
* @exception UnsupportedEncodingException if
|
||||
* <code>encoding</code> is not supported by the JDK.
|
||||
*/
|
||||
public ResourceReader(String resourceName, String encoding)
|
||||
throws UnsupportedEncodingException {
|
||||
|
||||
this.resourceName = resourceName;
|
||||
this.encoding = encoding;
|
||||
isReset = false;
|
||||
_reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and return the next line of the file or <code>null</code>
|
||||
* if the end of the file has been reached.
|
||||
*/
|
||||
public String readLine() throws IOException {
|
||||
isReset = false;
|
||||
return reader.readLine();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset this reader so that the next call to
|
||||
* <code>readLine()</code> returns the first line of the file
|
||||
* again. This is a somewhat expensive call, however, calling
|
||||
* <code>reset()</code> after calling it the first time does
|
||||
* nothing if <code>readLine()</code> has not been called in
|
||||
* between.
|
||||
*/
|
||||
public void reset() {
|
||||
try {
|
||||
_reset();
|
||||
} catch (UnsupportedEncodingException e) {}
|
||||
// We swallow this exception, if there is one. If the encoding is
|
||||
// invalid, the constructor will have thrown this exception already and
|
||||
// the caller shouldn't use the object afterwards.
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset to the start by reconstructing the stream and readers.
|
||||
* We could also use mark() and reset() on the stream or reader,
|
||||
* but that would cause them to keep the stream data around in
|
||||
* memory. We don't want that because some of the resource files
|
||||
* are large, e.g., 400k.
|
||||
*/
|
||||
private void _reset() throws UnsupportedEncodingException {
|
||||
if (isReset) {
|
||||
return;
|
||||
}
|
||||
InputStream is = getClass().getResourceAsStream(resourceName);
|
||||
if (is == null) {
|
||||
throw new IllegalArgumentException("Can't open " + resourceName);
|
||||
}
|
||||
InputStreamReader isr = new InputStreamReader(is, encoding);
|
||||
reader = new BufferedReader(isr);
|
||||
isReset = true;
|
||||
}
|
||||
}
|
100
icu4j/src/com/ibm/text/resources/Transliterator_Bengali_InterIndic.utf8.txt
Executable file
100
icu4j/src/com/ibm/text/resources/Transliterator_Bengali_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,100 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:57 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Bengali-InterIndic
|
||||
|
||||
ঁ>\uE001; # SIGN CANDRABINDU
|
||||
ং>\uE002; # SIGN ANUSVARA
|
||||
ঃ>\uE003; # SIGN VISARGA
|
||||
অ>\uE005; # LETTER A
|
||||
আ>\uE006; # LETTER AA
|
||||
ই>\uE007; # LETTER I
|
||||
ঈ>\uE008; # LETTER II
|
||||
উ>\uE009; # LETTER U
|
||||
ঊ>\uE00A; # LETTER UU
|
||||
ঋ>\uE00B; # LETTER VOCALIC R
|
||||
ঌ>\uE00C; # LETTER VOCALIC L
|
||||
এ>\uE081; # LETTER E
|
||||
ঐ>\uE010; # LETTER AI
|
||||
ও>\uE082; # LETTER O
|
||||
ঔ>\uE014; # LETTER AU
|
||||
ক>\uE015; # LETTER KA
|
||||
খ>\uE016; # LETTER KHA
|
||||
গ>\uE017; # LETTER GA
|
||||
ঘ>\uE018; # LETTER GHA
|
||||
ঙ>\uE019; # LETTER NGA
|
||||
চ>\uE01A; # LETTER CA
|
||||
ছ>\uE01B; # LETTER CHA
|
||||
জ>\uE01C; # LETTER JA
|
||||
ঝ>\uE01D; # LETTER JHA
|
||||
ঞ>\uE01E; # LETTER NYA
|
||||
ট>\uE01F; # LETTER TTA
|
||||
ঠ>\uE020; # LETTER TTHA
|
||||
ড>\uE021; # LETTER DDA
|
||||
ঢ>\uE022; # LETTER DDHA
|
||||
ণ>\uE023; # LETTER NNA
|
||||
ত>\uE024; # LETTER TA
|
||||
থ>\uE025; # LETTER THA
|
||||
দ>\uE026; # LETTER DA
|
||||
ধ>\uE027; # LETTER DHA
|
||||
ন>\uE028; # LETTER NA
|
||||
প>\uE02A; # LETTER PA
|
||||
ফ>\uE02B; # LETTER PHA
|
||||
ব>\uE02C; # LETTER BA
|
||||
ভ>\uE02D; # LETTER BHA
|
||||
ম>\uE02E; # LETTER MA
|
||||
য>\uE02F; # LETTER YA
|
||||
র>\uE030; # LETTER RA
|
||||
ল>\uE032; # LETTER LA
|
||||
শ>\uE036; # LETTER SHA
|
||||
ষ>\uE037; # LETTER SSA
|
||||
স>\uE038; # LETTER SA
|
||||
হ>\uE039; # LETTER HA
|
||||
়>\uE03C; # SIGN NUKTA
|
||||
া>\uE03E; # VOWEL SIGN AA
|
||||
ি>\uE03F; # VOWEL SIGN I
|
||||
ী>\uE040; # VOWEL SIGN II
|
||||
ু>\uE041; # VOWEL SIGN U
|
||||
ূ>\uE042; # VOWEL SIGN UU
|
||||
ৃ>\uE043; # VOWEL SIGN VOCALIC R
|
||||
ৄ>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
ে>\uE084; # VOWEL SIGN E
|
||||
ৈ>\uE048; # VOWEL SIGN AI
|
||||
ো>\uE085; # VOWEL SIGN O
|
||||
ৌ>\uE04C; # VOWEL SIGN AU
|
||||
্>\uE04D; # SIGN VIRAMA
|
||||
ৗ>\uE057; # AU LENGTH MARK
|
||||
ড়>\uE083; # LETTER RRA
|
||||
ঢ়>\uE05D; # LETTER RHA
|
||||
য়>\uE05F; # LETTER YYA
|
||||
ৠ>\uE060; # LETTER VOCALIC RR
|
||||
ৡ>\uE061; # LETTER VOCALIC LL
|
||||
ৢ>\uE062; # VOWEL SIGN VOCALIC L
|
||||
ৣ>\uE063; # VOWEL SIGN VOCALIC LL
|
||||
০>\uE066; # DIGIT ZERO
|
||||
১>\uE067; # DIGIT ONE
|
||||
২>\uE068; # DIGIT TWO
|
||||
৩>\uE069; # DIGIT THREE
|
||||
৪>\uE06A; # DIGIT FOUR
|
||||
৫>\uE06B; # DIGIT FIVE
|
||||
৬>\uE06C; # DIGIT SIX
|
||||
৭>\uE06D; # DIGIT SEVEN
|
||||
৮>\uE06E; # DIGIT EIGHT
|
||||
৯>\uE06F; # DIGIT NINE
|
||||
# ৰ>; // UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
|
||||
# ৱ>; // UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
|
||||
# ৲>; // UNMAPPED Bengali-InterIndic: RUPEE MARK
|
||||
# ৳>; // UNMAPPED Bengali-InterIndic: RUPEE SIGN
|
||||
# ৴>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE
|
||||
# ৵>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR TWO
|
||||
# ৶>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR THREE
|
||||
# ৷>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR FOUR
|
||||
# ৸>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
|
||||
# ৹>; // UNMAPPED Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
|
||||
৺>\uE080; # ISSHAR
|
||||
|
||||
# eof
|
115
icu4j/src/com/ibm/text/resources/Transliterator_Devanagari_InterIndic.utf8.txt
Executable file
115
icu4j/src/com/ibm/text/resources/Transliterator_Devanagari_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,115 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:57 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Devanagari-InterIndic
|
||||
|
||||
ँ>\uE001; # SIGN CANDRABINDU
|
||||
ं>\uE002; # SIGN ANUSVARA
|
||||
ः>\uE003; # SIGN VISARGA
|
||||
अ>\uE005; # LETTER A
|
||||
आ>\uE006; # LETTER AA
|
||||
इ>\uE007; # LETTER I
|
||||
ई>\uE008; # LETTER II
|
||||
उ>\uE009; # LETTER U
|
||||
ऊ>\uE00A; # LETTER UU
|
||||
ऋ>\uE00B; # LETTER VOCALIC R
|
||||
ऌ>\uE00C; # LETTER VOCALIC L
|
||||
# ऍ>; // UNMAPPED Devanagari-InterIndic: LETTER CANDRA E
|
||||
# ऎ>; // UNMAPPED Devanagari-InterIndic: LETTER SHORT E
|
||||
ए>\uE081; # LETTER E
|
||||
ऐ>\uE010; # LETTER AI
|
||||
# ऑ>; // UNMAPPED Devanagari-InterIndic: LETTER CANDRA O
|
||||
# ऒ>; // UNMAPPED Devanagari-InterIndic: LETTER SHORT O
|
||||
ओ>\uE082; # LETTER O
|
||||
औ>\uE014; # LETTER AU
|
||||
क>\uE015; # LETTER KA
|
||||
ख>\uE016; # LETTER KHA
|
||||
ग>\uE017; # LETTER GA
|
||||
घ>\uE018; # LETTER GHA
|
||||
ङ>\uE019; # LETTER NGA
|
||||
च>\uE01A; # LETTER CA
|
||||
छ>\uE01B; # LETTER CHA
|
||||
ज>\uE01C; # LETTER JA
|
||||
झ>\uE01D; # LETTER JHA
|
||||
ञ>\uE01E; # LETTER NYA
|
||||
ट>\uE01F; # LETTER TTA
|
||||
ठ>\uE020; # LETTER TTHA
|
||||
ड>\uE021; # LETTER DDA
|
||||
ढ>\uE022; # LETTER DDHA
|
||||
ण>\uE023; # LETTER NNA
|
||||
त>\uE024; # LETTER TA
|
||||
थ>\uE025; # LETTER THA
|
||||
द>\uE026; # LETTER DA
|
||||
ध>\uE027; # LETTER DHA
|
||||
न>\uE028; # LETTER NA
|
||||
ऩ>\uE029; # LETTER NNNA
|
||||
प>\uE02A; # LETTER PA
|
||||
फ>\uE02B; # LETTER PHA
|
||||
ब>\uE02C; # LETTER BA
|
||||
भ>\uE02D; # LETTER BHA
|
||||
म>\uE02E; # LETTER MA
|
||||
य>\uE02F; # LETTER YA
|
||||
र>\uE030; # LETTER RA
|
||||
ऱ>\uE083; # LETTER RRA
|
||||
ल>\uE032; # LETTER LA
|
||||
ळ>\uE033; # LETTER LLA
|
||||
ऴ>\uE034; # LETTER LLLA
|
||||
व>\uE035; # LETTER VA
|
||||
श>\uE036; # LETTER SHA
|
||||
ष>\uE037; # LETTER SSA
|
||||
स>\uE038; # LETTER SA
|
||||
ह>\uE039; # LETTER HA
|
||||
़>\uE03C; # SIGN NUKTA
|
||||
ऽ>\uE03D; # SIGN AVAGRAHA
|
||||
ा>\uE03E; # VOWEL SIGN AA
|
||||
ि>\uE03F; # VOWEL SIGN I
|
||||
ी>\uE040; # VOWEL SIGN II
|
||||
ु>\uE041; # VOWEL SIGN U
|
||||
ू>\uE042; # VOWEL SIGN UU
|
||||
ृ>\uE043; # VOWEL SIGN VOCALIC R
|
||||
ॄ>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
ॅ>\uE045; # VOWEL SIGN CANDRA E
|
||||
# ॆ>; // UNMAPPED Devanagari-InterIndic: VOWEL SIGN SHORT E
|
||||
े>\uE084; # VOWEL SIGN E
|
||||
ै>\uE048; # VOWEL SIGN AI
|
||||
ॉ>\uE049; # VOWEL SIGN CANDRA O
|
||||
# ॊ>; // UNMAPPED Devanagari-InterIndic: VOWEL SIGN SHORT O
|
||||
ो>\uE085; # VOWEL SIGN O
|
||||
ौ>\uE04C; # VOWEL SIGN AU
|
||||
्>\uE04D; # SIGN VIRAMA
|
||||
ॐ>\uE050; # OM
|
||||
# ॑>; // UNMAPPED Devanagari-InterIndic: STRESS SIGN UDATTA
|
||||
# ॒>; // UNMAPPED Devanagari-InterIndic: STRESS SIGN ANUDATTA
|
||||
# ॓>; // UNMAPPED Devanagari-InterIndic: GRAVE ACCENT
|
||||
# ॔>; // UNMAPPED Devanagari-InterIndic: ACUTE ACCENT
|
||||
# क़>; // UNMAPPED Devanagari-InterIndic: LETTER QA
|
||||
ख़>\uE059; # LETTER KHHA
|
||||
ग़>\uE05A; # LETTER GHHA
|
||||
ज़>\uE05B; # LETTER ZA
|
||||
# ड़>; // UNMAPPED Devanagari-InterIndic: LETTER DDDHA
|
||||
ढ़>\uE05D; # LETTER RHA
|
||||
फ़>\uE05E; # LETTER FA
|
||||
य़>\uE05F; # LETTER YYA
|
||||
ॠ>\uE060; # LETTER VOCALIC RR
|
||||
ॡ>\uE061; # LETTER VOCALIC LL
|
||||
ॢ>\uE062; # VOWEL SIGN VOCALIC L
|
||||
ॣ>\uE063; # VOWEL SIGN VOCALIC LL
|
||||
# ।>; // UNMAPPED Devanagari-InterIndic: DANDA
|
||||
# ॥>; // UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
|
||||
०>\uE066; # DIGIT ZERO
|
||||
१>\uE067; # DIGIT ONE
|
||||
२>\uE068; # DIGIT TWO
|
||||
३>\uE069; # DIGIT THREE
|
||||
४>\uE06A; # DIGIT FOUR
|
||||
५>\uE06B; # DIGIT FIVE
|
||||
६>\uE06C; # DIGIT SIX
|
||||
७>\uE06D; # DIGIT SEVEN
|
||||
८>\uE06E; # DIGIT EIGHT
|
||||
९>\uE06F; # DIGIT NINE
|
||||
# ॰>; // UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
|
||||
|
||||
# eof
|
269
icu4j/src/com/ibm/text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
Executable file
269
icu4j/src/com/ibm/text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
Executable file
|
@ -0,0 +1,269 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:57 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Fullwidth-Halfwidth
|
||||
|
||||
# Mechanically generated from Unicode Character Database
|
||||
|
||||
# multicharacter
|
||||
|
||||
ガ<>ガ; # to KATAKANA LETTER GA
|
||||
ギ<>ギ; # to KATAKANA LETTER GI
|
||||
グ<>グ; # to KATAKANA LETTER GU
|
||||
ゲ<>ゲ; # to KATAKANA LETTER GE
|
||||
ゴ<>ゴ; # to KATAKANA LETTER GO
|
||||
ザ<>ザ; # to KATAKANA LETTER ZA
|
||||
ジ<>ジ; # to KATAKANA LETTER ZI
|
||||
ズ<>ズ; # to KATAKANA LETTER ZU
|
||||
ゼ<>ゼ; # to KATAKANA LETTER ZE
|
||||
ゾ<>ゾ; # to KATAKANA LETTER ZO
|
||||
ダ<>ダ; # to KATAKANA LETTER DA
|
||||
ヂ<>ヂ; # to KATAKANA LETTER DI
|
||||
ヅ<>ヅ; # to KATAKANA LETTER DU
|
||||
デ<>デ; # to KATAKANA LETTER DE
|
||||
ド<>ド; # to KATAKANA LETTER DO
|
||||
バ<>バ; # to KATAKANA LETTER BA
|
||||
パ<>パ; # to KATAKANA LETTER PA
|
||||
ビ<>ビ; # to KATAKANA LETTER BI
|
||||
ピ<>ピ; # to KATAKANA LETTER PI
|
||||
ブ<>ブ; # to KATAKANA LETTER BU
|
||||
プ<>プ; # to KATAKANA LETTER PU
|
||||
ベ<>ベ; # to KATAKANA LETTER BE
|
||||
ペ<>ペ; # to KATAKANA LETTER PE
|
||||
ボ<>ボ; # to KATAKANA LETTER BO
|
||||
ポ<>ポ; # to KATAKANA LETTER PO
|
||||
ヴ<>ヴ; # to KATAKANA LETTER VU
|
||||
ヷ<>ヷ; # to KATAKANA LETTER VA
|
||||
ヺ<>ヺ; # to KATAKANA LETTER VO
|
||||
|
||||
# single character
|
||||
|
||||
!<>'!'; # from FULLWIDTH EXCLAMATION MARK
|
||||
"<>'\"'; # from FULLWIDTH QUOTATION MARK
|
||||
#<>'#'; # from FULLWIDTH NUMBER SIGN
|
||||
$<>'$'; # from FULLWIDTH DOLLAR SIGN
|
||||
%<>'%'; # from FULLWIDTH PERCENT SIGN
|
||||
&<>'&'; # from FULLWIDTH AMPERSAND
|
||||
'<>''; # from FULLWIDTH APOSTROPHE
|
||||
(<>'('; # from FULLWIDTH LEFT PARENTHESIS
|
||||
)<>')'; # from FULLWIDTH RIGHT PARENTHESIS
|
||||
*<>'*'; # from FULLWIDTH ASTERISK
|
||||
+<>'+'; # from FULLWIDTH PLUS SIGN
|
||||
,<>','; # from FULLWIDTH COMMA
|
||||
-<>'-'; # from FULLWIDTH HYPHEN-MINUS
|
||||
.<>'.'; # from FULLWIDTH FULL STOP
|
||||
/<>'/'; # from FULLWIDTH SOLIDUS
|
||||
0<>'0'; # from FULLWIDTH DIGIT ZERO
|
||||
1<>'1'; # from FULLWIDTH DIGIT ONE
|
||||
2<>'2'; # from FULLWIDTH DIGIT TWO
|
||||
3<>'3'; # from FULLWIDTH DIGIT THREE
|
||||
4<>'4'; # from FULLWIDTH DIGIT FOUR
|
||||
5<>'5'; # from FULLWIDTH DIGIT FIVE
|
||||
6<>'6'; # from FULLWIDTH DIGIT SIX
|
||||
7<>'7'; # from FULLWIDTH DIGIT SEVEN
|
||||
8<>'8'; # from FULLWIDTH DIGIT EIGHT
|
||||
9<>'9'; # from FULLWIDTH DIGIT NINE
|
||||
:<>':'; # from FULLWIDTH COLON
|
||||
;<>';'; # from FULLWIDTH SEMICOLON
|
||||
<<>'<'; # from FULLWIDTH LESS-THAN SIGN
|
||||
=<>'='; # from FULLWIDTH EQUALS SIGN
|
||||
><>'>'; # from FULLWIDTH GREATER-THAN SIGN
|
||||
?<>'?'; # from FULLWIDTH QUESTION MARK
|
||||
@<>'@'; # from FULLWIDTH COMMERCIAL AT
|
||||
A<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
|
||||
B<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
|
||||
C<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
|
||||
D<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
|
||||
E<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
|
||||
F<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
|
||||
G<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
|
||||
H<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
|
||||
I<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
|
||||
J<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
|
||||
K<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
|
||||
L<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
|
||||
M<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
|
||||
N<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
|
||||
O<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
|
||||
P<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
|
||||
Q<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
|
||||
R<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
|
||||
S<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
|
||||
T<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
|
||||
U<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
|
||||
V<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
|
||||
W<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
|
||||
X<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
|
||||
Y<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
|
||||
Z<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
|
||||
[<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
|
||||
\<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
|
||||
]<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
|
||||
^<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
|
||||
_<>'_'; # from FULLWIDTH LOW LINE
|
||||
`<>'`'; # from FULLWIDTH GRAVE ACCENT
|
||||
a<>a; # from FULLWIDTH LATIN SMALL LETTER A
|
||||
b<>b; # from FULLWIDTH LATIN SMALL LETTER B
|
||||
c<>c; # from FULLWIDTH LATIN SMALL LETTER C
|
||||
d<>d; # from FULLWIDTH LATIN SMALL LETTER D
|
||||
e<>e; # from FULLWIDTH LATIN SMALL LETTER E
|
||||
f<>f; # from FULLWIDTH LATIN SMALL LETTER F
|
||||
g<>g; # from FULLWIDTH LATIN SMALL LETTER G
|
||||
h<>h; # from FULLWIDTH LATIN SMALL LETTER H
|
||||
i<>i; # from FULLWIDTH LATIN SMALL LETTER I
|
||||
j<>j; # from FULLWIDTH LATIN SMALL LETTER J
|
||||
k<>k; # from FULLWIDTH LATIN SMALL LETTER K
|
||||
l<>l; # from FULLWIDTH LATIN SMALL LETTER L
|
||||
m<>m; # from FULLWIDTH LATIN SMALL LETTER M
|
||||
n<>n; # from FULLWIDTH LATIN SMALL LETTER N
|
||||
o<>o; # from FULLWIDTH LATIN SMALL LETTER O
|
||||
p<>p; # from FULLWIDTH LATIN SMALL LETTER P
|
||||
q<>q; # from FULLWIDTH LATIN SMALL LETTER Q
|
||||
r<>r; # from FULLWIDTH LATIN SMALL LETTER R
|
||||
s<>s; # from FULLWIDTH LATIN SMALL LETTER S
|
||||
t<>t; # from FULLWIDTH LATIN SMALL LETTER T
|
||||
u<>u; # from FULLWIDTH LATIN SMALL LETTER U
|
||||
v<>v; # from FULLWIDTH LATIN SMALL LETTER V
|
||||
w<>w; # from FULLWIDTH LATIN SMALL LETTER W
|
||||
x<>x; # from FULLWIDTH LATIN SMALL LETTER X
|
||||
y<>y; # from FULLWIDTH LATIN SMALL LETTER Y
|
||||
z<>z; # from FULLWIDTH LATIN SMALL LETTER Z
|
||||
{<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
|
||||
|<>'|'; # from FULLWIDTH VERTICAL LINE
|
||||
}<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
|
||||
~<>'~'; # from FULLWIDTH TILDE
|
||||
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
|
||||
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
|
||||
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
|
||||
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
|
||||
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
|
||||
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
|
||||
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
|
||||
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
|
||||
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
|
||||
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
|
||||
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
|
||||
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
|
||||
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
|
||||
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
|
||||
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
|
||||
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
|
||||
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
|
||||
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
|
||||
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
|
||||
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
|
||||
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
|
||||
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
|
||||
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
|
||||
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
|
||||
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
|
||||
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
|
||||
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
|
||||
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
|
||||
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
|
||||
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
|
||||
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
|
||||
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
|
||||
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
|
||||
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
|
||||
ノ<>ノ; # to HALFWIDTH KATAKANA LETTER NO
|
||||
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
|
||||
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
|
||||
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
|
||||
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
|
||||
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
|
||||
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
|
||||
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
|
||||
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
|
||||
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
|
||||
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
|
||||
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
|
||||
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
|
||||
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
|
||||
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
|
||||
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
|
||||
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
|
||||
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
|
||||
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
|
||||
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
|
||||
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
|
||||
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
||||
ᅠ<>ᅠ; # to HALFWIDTH HANGUL FILLER
|
||||
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
|
||||
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
|
||||
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
|
||||
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
|
||||
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
|
||||
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
|
||||
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
|
||||
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
|
||||
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
|
||||
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
|
||||
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
|
||||
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
|
||||
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
|
||||
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
|
||||
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
|
||||
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
|
||||
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
|
||||
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
|
||||
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
|
||||
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
|
||||
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
|
||||
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
|
||||
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
|
||||
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
|
||||
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
|
||||
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
|
||||
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
|
||||
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
|
||||
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
|
||||
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
|
||||
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
|
||||
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
|
||||
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
|
||||
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
|
||||
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
|
||||
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
|
||||
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
|
||||
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
|
||||
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
|
||||
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
|
||||
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
|
||||
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
|
||||
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
|
||||
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
|
||||
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
|
||||
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
|
||||
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
|
||||
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
|
||||
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
|
||||
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
|
||||
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
|
||||
¢<>'¢'; # from FULLWIDTH CENT SIGN
|
||||
£<>'£'; # from FULLWIDTH POUND SIGN
|
||||
¬<>'¬'; # from FULLWIDTH NOT SIGN
|
||||
 ̄<>' '̄; # from FULLWIDTH MACRON
|
||||
¦<>'¦'; # from FULLWIDTH BROKEN BAR
|
||||
¥<>'¥'; # from FULLWIDTH YEN SIGN
|
||||
₩<>₩; # from FULLWIDTH WON SIGN
|
||||
│<>│; # to HALFWIDTH FORMS LIGHT VERTICAL
|
||||
←<>←; # to HALFWIDTH LEFTWARDS ARROW
|
||||
↑<>↑; # to HALFWIDTH UPWARDS ARROW
|
||||
→<>→; # to HALFWIDTH RIGHTWARDS ARROW
|
||||
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
|
||||
■<>■; # to HALFWIDTH BLACK SQUARE
|
||||
○<>○; # to HALFWIDTH WHITE CIRCLE
|
||||
|
||||
# eof
|
89
icu4j/src/com/ibm/text/resources/Transliterator_Gujarati_InterIndic.utf8.txt
Executable file
89
icu4j/src/com/ibm/text/resources/Transliterator_Gujarati_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,89 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:58 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Gujarati-InterIndic
|
||||
|
||||
ઁ>\uE001; # SIGN CANDRABINDU
|
||||
ં>\uE002; # SIGN ANUSVARA
|
||||
ઃ>\uE003; # SIGN VISARGA
|
||||
અ>\uE005; # LETTER A
|
||||
આ>\uE006; # LETTER AA
|
||||
ઇ>\uE007; # LETTER I
|
||||
ઈ>\uE008; # LETTER II
|
||||
ઉ>\uE009; # LETTER U
|
||||
ઊ>\uE00A; # LETTER UU
|
||||
ઋ>\uE00B; # LETTER VOCALIC R
|
||||
# ઍ>; // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E
|
||||
એ>\uE081; # LETTER E
|
||||
ઐ>\uE010; # LETTER AI
|
||||
# ઑ>; // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O
|
||||
ઓ>\uE082; # LETTER O
|
||||
ઔ>\uE014; # LETTER AU
|
||||
ક>\uE015; # LETTER KA
|
||||
ખ>\uE016; # LETTER KHA
|
||||
ગ>\uE017; # LETTER GA
|
||||
ઘ>\uE018; # LETTER GHA
|
||||
ઙ>\uE019; # LETTER NGA
|
||||
ચ>\uE01A; # LETTER CA
|
||||
છ>\uE01B; # LETTER CHA
|
||||
જ>\uE01C; # LETTER JA
|
||||
ઝ>\uE01D; # LETTER JHA
|
||||
ઞ>\uE01E; # LETTER NYA
|
||||
ટ>\uE01F; # LETTER TTA
|
||||
ઠ>\uE020; # LETTER TTHA
|
||||
ડ>\uE021; # LETTER DDA
|
||||
ઢ>\uE022; # LETTER DDHA
|
||||
ણ>\uE023; # LETTER NNA
|
||||
ત>\uE024; # LETTER TA
|
||||
થ>\uE025; # LETTER THA
|
||||
દ>\uE026; # LETTER DA
|
||||
ધ>\uE027; # LETTER DHA
|
||||
ન>\uE028; # LETTER NA
|
||||
પ>\uE02A; # LETTER PA
|
||||
ફ>\uE02B; # LETTER PHA
|
||||
બ>\uE02C; # LETTER BA
|
||||
ભ>\uE02D; # LETTER BHA
|
||||
મ>\uE02E; # LETTER MA
|
||||
ય>\uE02F; # LETTER YA
|
||||
ર>\uE030; # LETTER RA
|
||||
લ>\uE032; # LETTER LA
|
||||
ળ>\uE033; # LETTER LLA
|
||||
વ>\uE035; # LETTER VA
|
||||
શ>\uE036; # LETTER SHA
|
||||
ષ>\uE037; # LETTER SSA
|
||||
સ>\uE038; # LETTER SA
|
||||
હ>\uE039; # LETTER HA
|
||||
઼>\uE03C; # SIGN NUKTA
|
||||
ઽ>\uE03D; # SIGN AVAGRAHA
|
||||
ા>\uE03E; # VOWEL SIGN AA
|
||||
િ>\uE03F; # VOWEL SIGN I
|
||||
ી>\uE040; # VOWEL SIGN II
|
||||
ુ>\uE041; # VOWEL SIGN U
|
||||
ૂ>\uE042; # VOWEL SIGN UU
|
||||
ૃ>\uE043; # VOWEL SIGN VOCALIC R
|
||||
ૄ>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
ૅ>\uE045; # VOWEL SIGN CANDRA E
|
||||
ે>\uE084; # VOWEL SIGN E
|
||||
ૈ>\uE048; # VOWEL SIGN AI
|
||||
ૉ>\uE049; # VOWEL SIGN CANDRA O
|
||||
ો>\uE085; # VOWEL SIGN O
|
||||
ૌ>\uE04C; # VOWEL SIGN AU
|
||||
્>\uE04D; # SIGN VIRAMA
|
||||
ૐ>\uE050; # OM
|
||||
ૠ>\uE060; # LETTER VOCALIC RR
|
||||
૦>\uE066; # DIGIT ZERO
|
||||
૧>\uE067; # DIGIT ONE
|
||||
૨>\uE068; # DIGIT TWO
|
||||
૩>\uE069; # DIGIT THREE
|
||||
૪>\uE06A; # DIGIT FOUR
|
||||
૫>\uE06B; # DIGIT FIVE
|
||||
૬>\uE06C; # DIGIT SIX
|
||||
૭>\uE06D; # DIGIT SEVEN
|
||||
૮>\uE06E; # DIGIT EIGHT
|
||||
૯>\uE06F; # DIGIT NINE
|
||||
|
||||
# eof
|
86
icu4j/src/com/ibm/text/resources/Transliterator_Gurmukhi_InterIndic.utf8.txt
Executable file
86
icu4j/src/com/ibm/text/resources/Transliterator_Gurmukhi_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,86 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:58 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Gurmukhi-InterIndic
|
||||
|
||||
ਂ>\uE001; # REMAP (indicExceptions.txt): ਁ>ਂ = SIGN CANDRABINDU>SIGN BINDI
|
||||
ਅ>\uE005; # LETTER A
|
||||
ਆ>\uE006; # LETTER AA
|
||||
ਇ>\uE007; # LETTER I
|
||||
ਈ>\uE008; # LETTER II
|
||||
ਉ>\uE009; # LETTER U
|
||||
ਊ>\uE00A; # LETTER UU
|
||||
ਏ>\uE00F; # LETTER EE
|
||||
ਐ>\uE010; # LETTER AI
|
||||
ਓ>\uE013; # LETTER OO
|
||||
ਔ>\uE014; # LETTER AU
|
||||
ਕ>\uE015; # LETTER KA
|
||||
ਖ>\uE016; # LETTER KHA
|
||||
ਗ>\uE017; # LETTER GA
|
||||
ਘ>\uE018; # LETTER GHA
|
||||
ਙ>\uE019; # LETTER NGA
|
||||
ਚ>\uE01A; # LETTER CA
|
||||
ਛ>\uE01B; # LETTER CHA
|
||||
ਜ>\uE01C; # LETTER JA
|
||||
ਝ>\uE01D; # LETTER JHA
|
||||
ਞ>\uE01E; # LETTER NYA
|
||||
ਟ>\uE01F; # LETTER TTA
|
||||
ਠ>\uE020; # LETTER TTHA
|
||||
ਡ>\uE021; # LETTER DDA
|
||||
ਢ>\uE022; # LETTER DDHA
|
||||
ਣ>\uE023; # LETTER NNA
|
||||
ਤ>\uE024; # LETTER TA
|
||||
ਥ>\uE025; # LETTER THA
|
||||
ਦ>\uE026; # LETTER DA
|
||||
ਧ>\uE027; # LETTER DHA
|
||||
ਨ>\uE028; # LETTER NA
|
||||
ਪ>\uE02A; # LETTER PA
|
||||
ਫ>\uE02B; # LETTER PHA
|
||||
ਬ>\uE02C; # LETTER BA
|
||||
ਭ>\uE02D; # LETTER BHA
|
||||
ਮ>\uE02E; # LETTER MA
|
||||
ਯ>\uE02F; # LETTER YA
|
||||
ਰ>\uE030; # LETTER RA
|
||||
ਲ>\uE032; # LETTER LA
|
||||
ਲ਼>\uE033; # LETTER LLA
|
||||
ਵ>\uE035; # LETTER VA
|
||||
ਸ਼>\uE036; # LETTER SHA
|
||||
ਸ>\uE038; # LETTER SA
|
||||
ਹ>\uE039; # LETTER HA
|
||||
਼>\uE03C; # SIGN NUKTA
|
||||
ਾ>\uE03E; # VOWEL SIGN AA
|
||||
ਿ>\uE03F; # VOWEL SIGN I
|
||||
ੀ>\uE040; # VOWEL SIGN II
|
||||
ੁ>\uE041; # VOWEL SIGN U
|
||||
ੂ>\uE042; # VOWEL SIGN UU
|
||||
ੇ>\uE047; # VOWEL SIGN EE
|
||||
ੈ>\uE048; # VOWEL SIGN AI
|
||||
ੋ>\uE04B; # VOWEL SIGN OO
|
||||
ੌ>\uE04C; # VOWEL SIGN AU
|
||||
੍>\uE04D; # SIGN VIRAMA
|
||||
ਖ਼>\uE059; # LETTER KHHA
|
||||
ਗ਼>\uE05A; # LETTER GHHA
|
||||
ਜ਼>\uE05B; # LETTER ZA
|
||||
ੜ>\uE083; # LETTER RRA
|
||||
ਫ਼>\uE05E; # LETTER FA
|
||||
੦>\uE066; # DIGIT ZERO
|
||||
੧>\uE067; # DIGIT ONE
|
||||
੨>\uE068; # DIGIT TWO
|
||||
੩>\uE069; # DIGIT THREE
|
||||
੪>\uE06A; # DIGIT FOUR
|
||||
੫>\uE06B; # DIGIT FIVE
|
||||
੬>\uE06C; # DIGIT SIX
|
||||
੭>\uE06D; # DIGIT SEVEN
|
||||
੮>\uE06E; # DIGIT EIGHT
|
||||
੯>\uE06F; # DIGIT NINE
|
||||
# ੰ>; // UNMAPPED Gurmukhi-InterIndic: TIPPI
|
||||
# ੱ>; // UNMAPPED Gurmukhi-InterIndic: ADDAK
|
||||
# ੲ>; // UNMAPPED Gurmukhi-InterIndic: IRI
|
||||
# ੳ>; // UNMAPPED Gurmukhi-InterIndic: URA
|
||||
# ੴ>; // UNMAPPED Gurmukhi-InterIndic: EK ONKAR
|
||||
|
||||
# eof
|
20365
icu4j/src/com/ibm/text/resources/Transliterator_Han_Pinyin.utf8.txt
Executable file
20365
icu4j/src/com/ibm/text/resources/Transliterator_Han_Pinyin.utf8.txt
Executable file
File diff suppressed because it is too large
Load diff
200
icu4j/src/com/ibm/text/resources/Transliterator_Hiragana_Katakana.utf8.txt
Executable file
200
icu4j/src/com/ibm/text/resources/Transliterator_Hiragana_Katakana.utf8.txt
Executable file
|
@ -0,0 +1,200 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Hiragana-Katana
|
||||
|
||||
# This is largely a one-to-one mapping, but it has a
|
||||
# few kinks:
|
||||
|
||||
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
||||
# Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
||||
# (308F-3092) with a voicing mark (3099), which is
|
||||
# semantically equivalent. However, this is a non-
|
||||
# roundtripping transformation.
|
||||
|
||||
# 2. The Katakana small ka/ke (30F5,30F6) have no
|
||||
# Hiragana equiavlents. We convert them to normal
|
||||
# Hiragana ka/ke (304B,3051). This is a one-way
|
||||
# information-losing transformation and precludes
|
||||
# round-tripping of 30F5 and 30F6.
|
||||
|
||||
# 3. The combining marks 3099-309C are in the Hiragana
|
||||
# block, but they apply to Katakana as well, so we
|
||||
# leave them untouched.
|
||||
|
||||
# 4. The Katakana prolonged sound mark 30FC doubles the
|
||||
# preceding vowel. This is a one-way information-
|
||||
# losing transformation from Katakana to Hiragana.
|
||||
|
||||
# 5. The Katakana middle dot separates words in foreign
|
||||
# expressions; we leave this unmodified.
|
||||
|
||||
# The above points preclude successful round-trip
|
||||
# transformations of arbitrary input text. However,
|
||||
# they provide naturalistic results that should conform
|
||||
# to user expectations.
|
||||
|
||||
|
||||
# Combining equivalents va/vi/ve/vo
|
||||
わ゙ <> ヷ;
|
||||
ゐ゙ <> ヸ;
|
||||
ゑ゙ <> ヹ;
|
||||
を゙ <> ヺ;
|
||||
|
||||
# One-to-one mappings, main block
|
||||
# 3041:3094 <> 30A1:30F4
|
||||
# 309D,E <> 30FD,E
|
||||
ぁ <> ァ;
|
||||
あ <> ア;
|
||||
ぃ <> ィ;
|
||||
い <> イ;
|
||||
ぅ <> ゥ;
|
||||
う <> ウ;
|
||||
ぇ <> ェ;
|
||||
え <> エ;
|
||||
ぉ <> ォ;
|
||||
お <> オ;
|
||||
か <> カ;
|
||||
が <> ガ;
|
||||
き <> キ;
|
||||
ぎ <> ギ;
|
||||
く <> ク;
|
||||
ぐ <> グ;
|
||||
け <> ケ;
|
||||
げ <> ゲ;
|
||||
こ <> コ;
|
||||
ご <> ゴ;
|
||||
さ <> サ;
|
||||
ざ <> ザ;
|
||||
し <> シ;
|
||||
じ <> ジ;
|
||||
す <> ス;
|
||||
ず <> ズ;
|
||||
せ <> セ;
|
||||
ぜ <> ゼ;
|
||||
そ <> ソ;
|
||||
ぞ <> ゾ;
|
||||
た <> タ;
|
||||
だ <> ダ;
|
||||
ち <> チ;
|
||||
ぢ <> ヂ;
|
||||
っ <> ッ;
|
||||
つ <> ツ;
|
||||
づ <> ヅ;
|
||||
て <> テ;
|
||||
で <> デ;
|
||||
と <> ト;
|
||||
ど <> ド;
|
||||
な <> ナ;
|
||||
に <> ニ;
|
||||
ぬ <> ヌ;
|
||||
ね <> ネ;
|
||||
の <> ノ;
|
||||
は <> ハ;
|
||||
ば <> バ;
|
||||
ぱ <> パ;
|
||||
ひ <> ヒ;
|
||||
び <> ビ;
|
||||
ぴ <> ピ;
|
||||
ふ <> フ;
|
||||
ぶ <> ブ;
|
||||
ぷ <> プ;
|
||||
へ <> ヘ;
|
||||
べ <> ベ;
|
||||
ぺ <> ペ;
|
||||
ほ <> ホ;
|
||||
ぼ <> ボ;
|
||||
ぽ <> ポ;
|
||||
ま <> マ;
|
||||
み <> ミ;
|
||||
む <> ム;
|
||||
め <> メ;
|
||||
も <> モ;
|
||||
ゃ <> ャ;
|
||||
や <> ヤ;
|
||||
ゅ <> ュ;
|
||||
ゆ <> ユ;
|
||||
ょ <> ョ;
|
||||
よ <> ヨ;
|
||||
ら <> ラ;
|
||||
り <> リ;
|
||||
る <> ル;
|
||||
れ <> レ;
|
||||
ろ <> ロ;
|
||||
ゎ <> ヮ;
|
||||
わ <> ワ;
|
||||
ゐ <> ヰ;
|
||||
ゑ <> ヱ;
|
||||
を <> ヲ;
|
||||
ん <> ン;
|
||||
ゔ <> ヴ;
|
||||
ゝ <> ヽ;
|
||||
ゞ <> ヾ;
|
||||
|
||||
# One-way Katakana-Hiragana xform of small K ka/ke to
|
||||
# normal H ka/ke.
|
||||
か < ヵ;
|
||||
け < ヶ;
|
||||
|
||||
# Katakana followed by a prolonged sound mark 30FC has
|
||||
# its final vowel doubled. This is a Katakana-Hiragana
|
||||
# one-way information-losing transformation. We
|
||||
# include the small Katakana (e.g., small A 3041) and
|
||||
# do not distinguish them from their large
|
||||
# counterparts. It doesn't make sense to double a
|
||||
# small counterpart vowel as a small Hiragana vowel, so
|
||||
# we don't do so. In natural text this should never
|
||||
# occur anyway. If a 30FC is seen without a preceding
|
||||
# vowel sound (e.g., after n 30F3) we do not change it.
|
||||
|
||||
### $long = ー;
|
||||
|
||||
# The following categories are Hiragana, not Katakana
|
||||
# as might be expected, since by the time we get to the
|
||||
# 30FC, the preceding character will have already been
|
||||
# transformed to Hiragana.
|
||||
|
||||
# {The following mechanically generated from the
|
||||
# Unicode 3.0 data:}
|
||||
|
||||
$xa = [ \
|
||||
ぁ あ か が さ ざ \
|
||||
た だ な は ば ぱ \
|
||||
ま ゃ や ら ゎ わ \
|
||||
];
|
||||
|
||||
$xi = [ \
|
||||
ぃ い き ぎ し じ \
|
||||
ち ぢ に ひ び ぴ \
|
||||
み り ゐ \
|
||||
];
|
||||
|
||||
$xu = [ \
|
||||
ぅ う く ぐ す ず \
|
||||
っ つ づ ぬ ふ ぶ \
|
||||
ぷ む ゅ ゆ る ゔ \
|
||||
];
|
||||
|
||||
$xe = [ \
|
||||
ぇ え け げ せ ぜ \
|
||||
て で ね へ べ ぺ \
|
||||
め れ ゑ \
|
||||
];
|
||||
|
||||
$xo = [ \
|
||||
ぉ お こ ご そ ぞ \
|
||||
と ど の ほ ぼ ぽ \
|
||||
も ょ よ ろ を \
|
||||
];
|
||||
|
||||
あ < $xa {ー};
|
||||
い < $xi {ー};
|
||||
う < $xu {ー};
|
||||
え < $xe {ー};
|
||||
お < $xo {ー};
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Bengali.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Bengali.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:59 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Bengali
|
||||
|
||||
\uE001>ঁ; # SIGN CANDRABINDU
|
||||
\uE002>ং; # SIGN ANUSVARA
|
||||
\uE003>ঃ; # SIGN VISARGA
|
||||
\uE005>অ; # LETTER A
|
||||
\uE006>আ; # LETTER AA
|
||||
\uE007>ই; # LETTER I
|
||||
\uE008>ঈ; # LETTER II
|
||||
\uE009>উ; # LETTER U
|
||||
\uE00A>ঊ; # LETTER UU
|
||||
\uE00B>ঋ; # LETTER VOCALIC R
|
||||
\uE00C>ঌ; # LETTER VOCALIC L
|
||||
# \uE00F>; // UNMAPPED InterIndic-Bengali: LETTER EE (এ = LETTER E)
|
||||
\uE010>ঐ; # LETTER AI
|
||||
# \uE013>; // UNMAPPED InterIndic-Bengali: LETTER OO (ও = LETTER O)
|
||||
\uE014>ঔ; # LETTER AU
|
||||
\uE015>ক; # LETTER KA
|
||||
\uE016>খ; # LETTER KHA
|
||||
\uE017>গ; # LETTER GA
|
||||
\uE018>ঘ; # LETTER GHA
|
||||
\uE019>ঙ; # LETTER NGA
|
||||
\uE01A>চ; # LETTER CA
|
||||
\uE01B>ছ; # LETTER CHA
|
||||
\uE01C>জ; # LETTER JA
|
||||
\uE01D>ঝ; # LETTER JHA
|
||||
\uE01E>ঞ; # LETTER NYA
|
||||
\uE01F>ট; # LETTER TTA
|
||||
\uE020>ঠ; # LETTER TTHA
|
||||
\uE021>ড; # LETTER DDA
|
||||
\uE022>ঢ; # LETTER DDHA
|
||||
\uE023>ণ; # LETTER NNA
|
||||
\uE024>ত; # LETTER TA
|
||||
\uE025>থ; # LETTER THA
|
||||
\uE026>দ; # LETTER DA
|
||||
\uE027>ধ; # LETTER DHA
|
||||
\uE028>ন; # LETTER NA
|
||||
\uE029>ন; # REMAP (indicExceptions.txt): >ন = LETTER NNNA>LETTER NA
|
||||
\uE02A>প; # LETTER PA
|
||||
\uE02B>ফ; # LETTER PHA
|
||||
\uE02C>ব; # LETTER BA
|
||||
\uE02D>ভ; # LETTER BHA
|
||||
\uE02E>ম; # LETTER MA
|
||||
\uE02F>য; # LETTER YA
|
||||
\uE030>র; # LETTER RA
|
||||
\uE032>ল; # LETTER LA
|
||||
\uE033>ল; # REMAP (indicExceptions.txt): >ল = LETTER LLA>LETTER LA
|
||||
\uE034>ল; # REMAP (indicExceptions.txt): >ল = LETTER LLLA>LETTER LA
|
||||
\uE035>ব; # REMAP (indicExceptions.txt): >ব = LETTER VA>LETTER BA
|
||||
\uE036>শ; # LETTER SHA
|
||||
\uE037>ষ; # LETTER SSA
|
||||
\uE038>স; # LETTER SA
|
||||
\uE039>হ; # LETTER HA
|
||||
\uE03C>়; # SIGN NUKTA
|
||||
# \uE03D>; // UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA
|
||||
\uE03E>া; # VOWEL SIGN AA
|
||||
\uE03F>ি; # VOWEL SIGN I
|
||||
\uE040>ী; # VOWEL SIGN II
|
||||
\uE041>ু; # VOWEL SIGN U
|
||||
\uE042>ূ; # VOWEL SIGN UU
|
||||
\uE043>ৃ; # VOWEL SIGN VOCALIC R
|
||||
\uE044>ৄ; # VOWEL SIGN VOCALIC RR
|
||||
\uE045>ে; # REMAP (indicExceptions.txt): >ে = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
# \uE047>; // UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (ে = VOWEL SIGN E)
|
||||
\uE048>ৈ; # VOWEL SIGN AI
|
||||
\uE049>ো; # REMAP (indicExceptions.txt): >ো = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
# \uE04B>; // UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (ো = VOWEL SIGN O)
|
||||
\uE04C>ৌ; # VOWEL SIGN AU
|
||||
\uE04D>্; # SIGN VIRAMA
|
||||
# \uE050>; // UNMAPPED InterIndic-Bengali: OM
|
||||
# \uE055>; // UNMAPPED InterIndic-Bengali: LENGTH MARK
|
||||
\uE056>ৈ; # REMAP (indicExceptions.txt): >ৈ = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>ৗ; # AU LENGTH MARK
|
||||
\uE059>খ; # REMAP (indicExceptions.txt): >খ = LETTER KHHA>LETTER KHA
|
||||
\uE05A>গ; # REMAP (indicExceptions.txt): >গ = LETTER GHHA>LETTER GA
|
||||
\uE05B>জ; # REMAP (indicExceptions.txt): >জ = LETTER ZA>LETTER JA
|
||||
\uE05D>ঢ়; # LETTER RHA
|
||||
\uE05E>ফ; # REMAP (indicExceptions.txt): >ফ = LETTER FA>LETTER PHA
|
||||
\uE05F>য়; # LETTER YYA
|
||||
\uE060>ৠ; # LETTER VOCALIC RR
|
||||
\uE061>ৡ; # LETTER VOCALIC LL
|
||||
\uE062>ৢ; # VOWEL SIGN VOCALIC L
|
||||
\uE063>ৣ; # VOWEL SIGN VOCALIC LL
|
||||
\uE066>০; # DIGIT ZERO
|
||||
\uE067>১; # DIGIT ONE
|
||||
\uE068>২; # DIGIT TWO
|
||||
\uE069>৩; # DIGIT THREE
|
||||
\uE06A>৪; # DIGIT FOUR
|
||||
\uE06B>৫; # DIGIT FIVE
|
||||
\uE06C>৬; # DIGIT SIX
|
||||
\uE06D>৭; # DIGIT SEVEN
|
||||
\uE06E>৮; # DIGIT EIGHT
|
||||
\uE06F>৯; # DIGIT NINE
|
||||
\uE080>৺; # ISSHAR
|
||||
\uE081>এ; # LETTER E
|
||||
\uE082>ও; # LETTER O
|
||||
\uE083>ড়; # LETTER RRA
|
||||
\uE084>ে; # VOWEL SIGN E
|
||||
\uE085>ো; # VOWEL SIGN O
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Devanagari.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Devanagari.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:59 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Devanagari
|
||||
|
||||
\uE001>ँ; # SIGN CANDRABINDU
|
||||
\uE002>ं; # SIGN ANUSVARA
|
||||
\uE003>ः; # SIGN VISARGA
|
||||
\uE005>अ; # LETTER A
|
||||
\uE006>आ; # LETTER AA
|
||||
\uE007>इ; # LETTER I
|
||||
\uE008>ई; # LETTER II
|
||||
\uE009>उ; # LETTER U
|
||||
\uE00A>ऊ; # LETTER UU
|
||||
\uE00B>ऋ; # LETTER VOCALIC R
|
||||
\uE00C>ऌ; # LETTER VOCALIC L
|
||||
# \uE00F>; // UNMAPPED InterIndic-Devanagari: LETTER EE (ए = LETTER E)
|
||||
\uE010>ऐ; # LETTER AI
|
||||
# \uE013>; // UNMAPPED InterIndic-Devanagari: LETTER OO (ओ = LETTER O)
|
||||
\uE014>औ; # LETTER AU
|
||||
\uE015>क; # LETTER KA
|
||||
\uE016>ख; # LETTER KHA
|
||||
\uE017>ग; # LETTER GA
|
||||
\uE018>घ; # LETTER GHA
|
||||
\uE019>ङ; # LETTER NGA
|
||||
\uE01A>च; # LETTER CA
|
||||
\uE01B>छ; # LETTER CHA
|
||||
\uE01C>ज; # LETTER JA
|
||||
\uE01D>झ; # LETTER JHA
|
||||
\uE01E>ञ; # LETTER NYA
|
||||
\uE01F>ट; # LETTER TTA
|
||||
\uE020>ठ; # LETTER TTHA
|
||||
\uE021>ड; # LETTER DDA
|
||||
\uE022>ढ; # LETTER DDHA
|
||||
\uE023>ण; # LETTER NNA
|
||||
\uE024>त; # LETTER TA
|
||||
\uE025>थ; # LETTER THA
|
||||
\uE026>द; # LETTER DA
|
||||
\uE027>ध; # LETTER DHA
|
||||
\uE028>न; # LETTER NA
|
||||
\uE029>ऩ; # LETTER NNNA
|
||||
\uE02A>प; # LETTER PA
|
||||
\uE02B>फ; # LETTER PHA
|
||||
\uE02C>ब; # LETTER BA
|
||||
\uE02D>भ; # LETTER BHA
|
||||
\uE02E>म; # LETTER MA
|
||||
\uE02F>य; # LETTER YA
|
||||
\uE030>र; # LETTER RA
|
||||
\uE032>ल; # LETTER LA
|
||||
\uE033>ळ; # LETTER LLA
|
||||
\uE034>ऴ; # LETTER LLLA
|
||||
\uE035>व; # LETTER VA
|
||||
\uE036>श; # LETTER SHA
|
||||
\uE037>ष; # LETTER SSA
|
||||
\uE038>स; # LETTER SA
|
||||
\uE039>ह; # LETTER HA
|
||||
\uE03C>़; # SIGN NUKTA
|
||||
\uE03D>ऽ; # SIGN AVAGRAHA
|
||||
\uE03E>ा; # VOWEL SIGN AA
|
||||
\uE03F>ि; # VOWEL SIGN I
|
||||
\uE040>ी; # VOWEL SIGN II
|
||||
\uE041>ु; # VOWEL SIGN U
|
||||
\uE042>ू; # VOWEL SIGN UU
|
||||
\uE043>ृ; # VOWEL SIGN VOCALIC R
|
||||
\uE044>ॄ; # VOWEL SIGN VOCALIC RR
|
||||
\uE045>ॅ; # VOWEL SIGN CANDRA E
|
||||
# \uE047>; // UNMAPPED InterIndic-Devanagari: VOWEL SIGN EE (े = VOWEL SIGN E)
|
||||
\uE048>ै; # VOWEL SIGN AI
|
||||
\uE049>ॉ; # VOWEL SIGN CANDRA O
|
||||
# \uE04B>; // UNMAPPED InterIndic-Devanagari: VOWEL SIGN OO (ो = VOWEL SIGN O)
|
||||
\uE04C>ौ; # VOWEL SIGN AU
|
||||
\uE04D>्; # SIGN VIRAMA
|
||||
\uE050>ॐ; # OM
|
||||
# \uE055>; // UNMAPPED InterIndic-Devanagari: LENGTH MARK
|
||||
\uE056>ै; # REMAP (indicExceptions.txt): ॖ>ै = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>ौ; # REMAP (indicExceptions.txt): ॗ>ौ = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\uE059>ख़; # LETTER KHHA
|
||||
\uE05A>ग़; # LETTER GHHA
|
||||
\uE05B>ज़; # LETTER ZA
|
||||
\uE05D>ढ़; # LETTER RHA
|
||||
\uE05E>फ़; # LETTER FA
|
||||
\uE05F>य़; # LETTER YYA
|
||||
\uE060>ॠ; # LETTER VOCALIC RR
|
||||
\uE061>ॡ; # LETTER VOCALIC LL
|
||||
\uE062>ॢ; # VOWEL SIGN VOCALIC L
|
||||
\uE063>ॣ; # VOWEL SIGN VOCALIC LL
|
||||
\uE066>०; # DIGIT ZERO
|
||||
\uE067>१; # DIGIT ONE
|
||||
\uE068>२; # DIGIT TWO
|
||||
\uE069>३; # DIGIT THREE
|
||||
\uE06A>४; # DIGIT FOUR
|
||||
\uE06B>५; # DIGIT FIVE
|
||||
\uE06C>६; # DIGIT SIX
|
||||
\uE06D>७; # DIGIT SEVEN
|
||||
\uE06E>८; # DIGIT EIGHT
|
||||
\uE06F>९; # DIGIT NINE
|
||||
# \uE080>; // UNMAPPED InterIndic-Devanagari: ISSHAR
|
||||
\uE081>ए; # LETTER E
|
||||
\uE082>ओ; # LETTER O
|
||||
\uE083>ऱ; # LETTER RRA
|
||||
\uE084>े; # VOWEL SIGN E
|
||||
\uE085>ो; # VOWEL SIGN O
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gujarati.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gujarati.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:41:59 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Gujarati
|
||||
|
||||
\uE001>ઁ; # SIGN CANDRABINDU
|
||||
\uE002>ં; # SIGN ANUSVARA
|
||||
\uE003>ઃ; # SIGN VISARGA
|
||||
\uE005>અ; # LETTER A
|
||||
\uE006>આ; # LETTER AA
|
||||
\uE007>ઇ; # LETTER I
|
||||
\uE008>ઈ; # LETTER II
|
||||
\uE009>ઉ; # LETTER U
|
||||
\uE00A>ઊ; # LETTER UU
|
||||
\uE00B>ઋ; # LETTER VOCALIC R
|
||||
\uE00C>લૃ; # REMAP (indicExceptions.txt): ઌ>લૃ = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R
|
||||
# \uE00F>; // UNMAPPED InterIndic-Gujarati: LETTER EE (એ = LETTER E)
|
||||
\uE010>ઐ; # LETTER AI
|
||||
# \uE013>; // UNMAPPED InterIndic-Gujarati: LETTER OO (ઓ = LETTER O)
|
||||
\uE014>ઔ; # LETTER AU
|
||||
\uE015>ક; # LETTER KA
|
||||
\uE016>ખ; # LETTER KHA
|
||||
\uE017>ગ; # LETTER GA
|
||||
\uE018>ઘ; # LETTER GHA
|
||||
\uE019>ઙ; # LETTER NGA
|
||||
\uE01A>ચ; # LETTER CA
|
||||
\uE01B>છ; # LETTER CHA
|
||||
\uE01C>જ; # LETTER JA
|
||||
\uE01D>ઝ; # LETTER JHA
|
||||
\uE01E>ઞ; # LETTER NYA
|
||||
\uE01F>ટ; # LETTER TTA
|
||||
\uE020>ઠ; # LETTER TTHA
|
||||
\uE021>ડ; # LETTER DDA
|
||||
\uE022>ઢ; # LETTER DDHA
|
||||
\uE023>ણ; # LETTER NNA
|
||||
\uE024>ત; # LETTER TA
|
||||
\uE025>થ; # LETTER THA
|
||||
\uE026>દ; # LETTER DA
|
||||
\uE027>ધ; # LETTER DHA
|
||||
\uE028>ન; # LETTER NA
|
||||
\uE029>ન; # REMAP (indicExceptions.txt): >ન = LETTER NNNA>LETTER NA
|
||||
\uE02A>પ; # LETTER PA
|
||||
\uE02B>ફ; # LETTER PHA
|
||||
\uE02C>બ; # LETTER BA
|
||||
\uE02D>ભ; # LETTER BHA
|
||||
\uE02E>મ; # LETTER MA
|
||||
\uE02F>ય; # LETTER YA
|
||||
\uE030>ર; # LETTER RA
|
||||
\uE032>લ; # LETTER LA
|
||||
\uE033>ળ; # LETTER LLA
|
||||
\uE034>ળ; # REMAP (indicExceptions.txt): >ળ = LETTER LLLA>LETTER LLA
|
||||
\uE035>વ; # LETTER VA
|
||||
\uE036>શ; # LETTER SHA
|
||||
\uE037>ષ; # LETTER SSA
|
||||
\uE038>સ; # LETTER SA
|
||||
\uE039>હ; # LETTER HA
|
||||
\uE03C>઼; # SIGN NUKTA
|
||||
\uE03D>ઽ; # SIGN AVAGRAHA
|
||||
\uE03E>ા; # VOWEL SIGN AA
|
||||
\uE03F>િ; # VOWEL SIGN I
|
||||
\uE040>ી; # VOWEL SIGN II
|
||||
\uE041>ુ; # VOWEL SIGN U
|
||||
\uE042>ૂ; # VOWEL SIGN UU
|
||||
\uE043>ૃ; # VOWEL SIGN VOCALIC R
|
||||
\uE044>ૄ; # VOWEL SIGN VOCALIC RR
|
||||
\uE045>ૅ; # VOWEL SIGN CANDRA E
|
||||
# \uE047>; // UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (ે = VOWEL SIGN E)
|
||||
\uE048>ૈ; # VOWEL SIGN AI
|
||||
\uE049>ૉ; # VOWEL SIGN CANDRA O
|
||||
# \uE04B>; // UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (ો = VOWEL SIGN O)
|
||||
\uE04C>ૌ; # VOWEL SIGN AU
|
||||
\uE04D>્; # SIGN VIRAMA
|
||||
\uE050>ૐ; # OM
|
||||
# \uE055>; // UNMAPPED InterIndic-Gujarati: LENGTH MARK
|
||||
\uE056>ૈ; # REMAP (indicExceptions.txt): >ૈ = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>ૌ; # REMAP (indicExceptions.txt): >ૌ = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\uE059>ખ઼; # REMAP (indicExceptions.txt): >ખ઼ = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
\uE05A>ગ઼; # REMAP (indicExceptions.txt): >ગ઼ = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
\uE05B>જ઼; # REMAP (indicExceptions.txt): >જ઼ = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
\uE05D>ઢ઼; # REMAP (indicExceptions.txt): >ઢ઼ = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
\uE05E>ફ઼; # REMAP (indicExceptions.txt): >ફ઼ = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
\uE05F>ય઼; # REMAP (indicExceptions.txt): >ય઼ = LETTER YYA>LETTER YA.SIGN NUKTA
|
||||
\uE060>ૠ; # LETTER VOCALIC RR
|
||||
\uE061>લૃ; # REMAP (indicExceptions.txt): ૡ>લૃ = LETTER VOCALIC LL>LETTER LA.VOWEL SIGN VOCALIC R
|
||||
\uE062>િ઼; # REMAP (indicExceptions.txt): ૢ>િ઼ = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
\uE063>ી઼; # REMAP (indicExceptions.txt): ૣ>ી઼ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
\uE066>૦; # DIGIT ZERO
|
||||
\uE067>૧; # DIGIT ONE
|
||||
\uE068>૨; # DIGIT TWO
|
||||
\uE069>૩; # DIGIT THREE
|
||||
\uE06A>૪; # DIGIT FOUR
|
||||
\uE06B>૫; # DIGIT FIVE
|
||||
\uE06C>૬; # DIGIT SIX
|
||||
\uE06D>૭; # DIGIT SEVEN
|
||||
\uE06E>૮; # DIGIT EIGHT
|
||||
\uE06F>૯; # DIGIT NINE
|
||||
# \uE080>; // UNMAPPED InterIndic-Gujarati: ISSHAR
|
||||
\uE081>એ; # LETTER E
|
||||
\uE082>ઓ; # LETTER O
|
||||
# \uE083>; // UNMAPPED InterIndic-Gujarati: LETTER RRA (ઃ = SIGN VISARGA)
|
||||
\uE084>ે; # VOWEL SIGN E
|
||||
\uE085>ો; # VOWEL SIGN O
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gurmukhi.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gurmukhi.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:00 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Gurmukhi
|
||||
|
||||
\uE001>ਂ; # REMAP (indicExceptions.txt): ਁ>ਂ = SIGN CANDRABINDU>SIGN BINDI
|
||||
# \uE002>; // UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (ਂ = SIGN BINDI)
|
||||
# \uE003>; // UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA
|
||||
\uE005>ਅ; # LETTER A
|
||||
\uE006>ਆ; # LETTER AA
|
||||
\uE007>ਇ; # LETTER I
|
||||
\uE008>ਈ; # LETTER II
|
||||
\uE009>ਉ; # LETTER U
|
||||
\uE00A>ਊ; # LETTER UU
|
||||
\uE00B>ਰਿ; # REMAP (indicExceptions.txt): >ਰਿ = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
\uE00C>ਇ; # REMAP (indicExceptions.txt): >ਇ = LETTER VOCALIC L>LETTER I
|
||||
\uE00F>ਏ; # LETTER EE
|
||||
\uE010>ਐ; # LETTER AI
|
||||
\uE013>ਓ; # LETTER OO
|
||||
\uE014>ਔ; # LETTER AU
|
||||
\uE015>ਕ; # LETTER KA
|
||||
\uE016>ਖ; # LETTER KHA
|
||||
\uE017>ਗ; # LETTER GA
|
||||
\uE018>ਘ; # LETTER GHA
|
||||
\uE019>ਙ; # LETTER NGA
|
||||
\uE01A>ਚ; # LETTER CA
|
||||
\uE01B>ਛ; # LETTER CHA
|
||||
\uE01C>ਜ; # LETTER JA
|
||||
\uE01D>ਝ; # LETTER JHA
|
||||
\uE01E>ਞ; # LETTER NYA
|
||||
\uE01F>ਟ; # LETTER TTA
|
||||
\uE020>ਠ; # LETTER TTHA
|
||||
\uE021>ਡ; # LETTER DDA
|
||||
\uE022>ਢ; # LETTER DDHA
|
||||
\uE023>ਣ; # LETTER NNA
|
||||
\uE024>ਤ; # LETTER TA
|
||||
\uE025>ਥ; # LETTER THA
|
||||
\uE026>ਦ; # LETTER DA
|
||||
\uE027>ਧ; # LETTER DHA
|
||||
\uE028>ਨ; # LETTER NA
|
||||
\uE029>ਨ; # REMAP (indicExceptions.txt): >ਨ = LETTER NNNA>LETTER NA
|
||||
\uE02A>ਪ; # LETTER PA
|
||||
\uE02B>ਫ; # LETTER PHA
|
||||
\uE02C>ਬ; # LETTER BA
|
||||
\uE02D>ਭ; # LETTER BHA
|
||||
\uE02E>ਮ; # LETTER MA
|
||||
\uE02F>ਯ; # LETTER YA
|
||||
\uE030>ਰ; # LETTER RA
|
||||
\uE032>ਲ; # LETTER LA
|
||||
\uE033>ਲ਼; # LETTER LLA
|
||||
\uE034>ਲ਼; # REMAP (indicExceptions.txt): >ਲ਼ = LETTER LLLA>LETTER LLA
|
||||
\uE035>ਵ; # LETTER VA
|
||||
\uE036>ਸ਼; # LETTER SHA
|
||||
\uE037>ਸ਼; # REMAP (indicExceptions.txt): >ਸ਼ = LETTER SSA>LETTER SHA
|
||||
\uE038>ਸ; # LETTER SA
|
||||
\uE039>ਹ; # LETTER HA
|
||||
\uE03C>਼; # SIGN NUKTA
|
||||
# \uE03D>; // UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA
|
||||
\uE03E>ਾ; # VOWEL SIGN AA
|
||||
\uE03F>ਿ; # VOWEL SIGN I
|
||||
\uE040>ੀ; # VOWEL SIGN II
|
||||
\uE041>ੁ; # VOWEL SIGN U
|
||||
\uE042>ੂ; # VOWEL SIGN UU
|
||||
# \uE043>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R
|
||||
# \uE044>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR
|
||||
\uE045>ੈ; # REMAP (indicExceptions.txt): >ੈ = VOWEL SIGN CANDRA E>VOWEL SIGN AI
|
||||
\uE047>ੇ; # VOWEL SIGN EE
|
||||
\uE048>ੈ; # VOWEL SIGN AI
|
||||
\uE049>ੌ; # REMAP (indicExceptions.txt): >ੌ = VOWEL SIGN CANDRA O>VOWEL SIGN AU
|
||||
\uE04B>ੋ; # VOWEL SIGN OO
|
||||
\uE04C>ੌ; # VOWEL SIGN AU
|
||||
\uE04D>੍; # SIGN VIRAMA
|
||||
# \uE050>; // UNMAPPED InterIndic-Gurmukhi: OM
|
||||
# \uE055>; // UNMAPPED InterIndic-Gurmukhi: LENGTH MARK
|
||||
\uE056>ੈ; # REMAP (indicExceptions.txt): >ੈ = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>ੌ; # REMAP (indicExceptions.txt): >ੌ = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\uE059>ਖ਼; # LETTER KHHA
|
||||
\uE05A>ਗ਼; # LETTER GHHA
|
||||
\uE05B>ਜ਼; # LETTER ZA
|
||||
\uE05D>ਢ਼; # REMAP (indicExceptions.txt): >ਢ਼ = LETTER RHA>LETTER DDHA.SIGN NUKTA
|
||||
\uE05E>ਫ਼; # LETTER FA
|
||||
\uE05F>ਯ; # REMAP (indicExceptions.txt): >ਯ = LETTER YYA>LETTER YA
|
||||
\uE060>ਰਿ; # REMAP (indicExceptions.txt): >ਰਿ = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
\uE061>ਈ਼; # REMAP (indicExceptions.txt): >ਈ਼ = LETTER VOCALIC LL>LETTER II.SIGN NUKTA
|
||||
\uE062>ਿ਼; # REMAP (indicExceptions.txt): >ਿ਼ = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
|
||||
\uE063>ੀ਼; # REMAP (indicExceptions.txt): >ੀ਼ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
|
||||
\uE066>੦; # DIGIT ZERO
|
||||
\uE067>੧; # DIGIT ONE
|
||||
\uE068>੨; # DIGIT TWO
|
||||
\uE069>੩; # DIGIT THREE
|
||||
\uE06A>੪; # DIGIT FOUR
|
||||
\uE06B>੫; # DIGIT FIVE
|
||||
\uE06C>੬; # DIGIT SIX
|
||||
\uE06D>੭; # DIGIT SEVEN
|
||||
\uE06E>੮; # DIGIT EIGHT
|
||||
\uE06F>੯; # DIGIT NINE
|
||||
# \uE080>; // UNMAPPED InterIndic-Gurmukhi: ISSHAR
|
||||
# \uE081>; // UNMAPPED InterIndic-Gurmukhi: LETTER E
|
||||
# \uE082>; // UNMAPPED InterIndic-Gurmukhi: LETTER O (ਂ = SIGN BINDI)
|
||||
\uE083>ੜ; # LETTER RRA
|
||||
# \uE084>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E
|
||||
# \uE085>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (ਅ = LETTER A)
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Kannada.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Kannada.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:00 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Kannada
|
||||
|
||||
\uE001>ಂ; # REMAP (indicExceptions.txt): ಁ>ಂ = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
\uE002>ಂ; # SIGN ANUSVARA
|
||||
\uE003>ಃ; # SIGN VISARGA
|
||||
\uE005>ಅ; # LETTER A
|
||||
\uE006>ಆ; # LETTER AA
|
||||
\uE007>ಇ; # LETTER I
|
||||
\uE008>ಈ; # LETTER II
|
||||
\uE009>ಉ; # LETTER U
|
||||
\uE00A>ಊ; # LETTER UU
|
||||
\uE00B>ಋ; # LETTER VOCALIC R
|
||||
\uE00C>ಌ; # LETTER VOCALIC L
|
||||
\uE00F>ಏ; # LETTER EE
|
||||
\uE010>ಐ; # LETTER AI
|
||||
\uE013>ಓ; # LETTER OO
|
||||
\uE014>ಔ; # LETTER AU
|
||||
\uE015>ಕ; # LETTER KA
|
||||
\uE016>ಖ; # LETTER KHA
|
||||
\uE017>ಗ; # LETTER GA
|
||||
\uE018>ಘ; # LETTER GHA
|
||||
\uE019>ಙ; # LETTER NGA
|
||||
\uE01A>ಚ; # LETTER CA
|
||||
\uE01B>ಛ; # LETTER CHA
|
||||
\uE01C>ಜ; # LETTER JA
|
||||
\uE01D>ಝ; # LETTER JHA
|
||||
\uE01E>ಞ; # LETTER NYA
|
||||
\uE01F>ಟ; # LETTER TTA
|
||||
\uE020>ಠ; # LETTER TTHA
|
||||
\uE021>ಡ; # LETTER DDA
|
||||
\uE022>ಢ; # LETTER DDHA
|
||||
\uE023>ಣ; # LETTER NNA
|
||||
\uE024>ತ; # LETTER TA
|
||||
\uE025>ಥ; # LETTER THA
|
||||
\uE026>ದ; # LETTER DA
|
||||
\uE027>ಧ; # LETTER DHA
|
||||
\uE028>ನ; # LETTER NA
|
||||
\uE029>ನ; # REMAP (indicExceptions.txt): >ನ = LETTER NNNA>LETTER NA
|
||||
\uE02A>ಪ; # LETTER PA
|
||||
\uE02B>ಫ; # LETTER PHA
|
||||
\uE02C>ಬ; # LETTER BA
|
||||
\uE02D>ಭ; # LETTER BHA
|
||||
\uE02E>ಮ; # LETTER MA
|
||||
\uE02F>ಯ; # LETTER YA
|
||||
\uE030>ರ; # LETTER RA
|
||||
\uE032>ಲ; # LETTER LA
|
||||
\uE033>ಳ; # LETTER LLA
|
||||
\uE034>ಳ; # REMAP (indicExceptions.txt): >ಳ = LETTER LLLA>LETTER LLA
|
||||
\uE035>ವ; # LETTER VA
|
||||
\uE036>ಶ; # LETTER SHA
|
||||
\uE037>ಷ; # LETTER SSA
|
||||
\uE038>ಸ; # LETTER SA
|
||||
\uE039>ಹ; # LETTER HA
|
||||
# \uE03C>; // UNMAPPED InterIndic-Kannada: SIGN NUKTA
|
||||
# \uE03D>; // UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA
|
||||
\uE03E>ಾ; # VOWEL SIGN AA
|
||||
\uE03F>ಿ; # VOWEL SIGN I
|
||||
\uE040>ೀ; # VOWEL SIGN II
|
||||
\uE041>ು; # VOWEL SIGN U
|
||||
\uE042>ೂ; # VOWEL SIGN UU
|
||||
\uE043>ೃ; # VOWEL SIGN VOCALIC R
|
||||
\uE044>ೄ; # VOWEL SIGN VOCALIC RR
|
||||
\uE045>ೆ; # REMAP (indicExceptions.txt): >ೆ = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\uE047>ೇ; # VOWEL SIGN EE
|
||||
\uE048>ೈ; # VOWEL SIGN AI
|
||||
\uE049>ೊ; # REMAP (indicExceptions.txt): >ೊ = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\uE04B>ೋ; # VOWEL SIGN OO
|
||||
\uE04C>ೌ; # VOWEL SIGN AU
|
||||
\uE04D>್; # SIGN VIRAMA
|
||||
\uE050>ಓಂ; # REMAP (indicExceptions.txt): >ಓಂ = OM>LETTER OO.SIGN ANUSVARA
|
||||
\uE055>ೕ; # LENGTH MARK
|
||||
\uE056>ೖ; # AI LENGTH MARK
|
||||
\uE057>ೌ; # REMAP (indicExceptions.txt): >ೌ = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\uE059>ಖ; # REMAP (indicExceptions.txt): >ಖ = LETTER KHHA>LETTER KHA
|
||||
\uE05A>ಗ; # REMAP (indicExceptions.txt): >ಗ = LETTER GHHA>LETTER GA
|
||||
\uE05B>ಜ; # REMAP (indicExceptions.txt): >ಜ = LETTER ZA>LETTER JA
|
||||
\uE05D>ಢ; # REMAP (indicExceptions.txt): ೝ>ಢ = LETTER RHA>LETTER DDHA
|
||||
\uE05E>ೞ; # LETTER FA
|
||||
\uE05F>ಯ; # REMAP (indicExceptions.txt): >ಯ = LETTER YYA>LETTER YA
|
||||
\uE060>ೠ; # LETTER VOCALIC RR
|
||||
\uE061>ೡ; # LETTER VOCALIC LL
|
||||
\uE062>ಿ; # REMAP (indicExceptions.txt): ೢ>ಿ = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
\uE063>ೀ; # REMAP (indicExceptions.txt): ೣ>ೀ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
\uE066>೦; # DIGIT ZERO
|
||||
\uE067>೧; # DIGIT ONE
|
||||
\uE068>೨; # DIGIT TWO
|
||||
\uE069>೩; # DIGIT THREE
|
||||
\uE06A>೪; # DIGIT FOUR
|
||||
\uE06B>೫; # DIGIT FIVE
|
||||
\uE06C>೬; # DIGIT SIX
|
||||
\uE06D>೭; # DIGIT SEVEN
|
||||
\uE06E>೮; # DIGIT EIGHT
|
||||
\uE06F>೯; # DIGIT NINE
|
||||
# \uE080>; // UNMAPPED InterIndic-Kannada: ISSHAR
|
||||
\uE081>ಎ; # LETTER E
|
||||
\uE082>ಒ; # LETTER O
|
||||
\uE083>ಱ; # LETTER RRA
|
||||
\uE084>ೆ; # VOWEL SIGN E
|
||||
\uE085>ೊ; # VOWEL SIGN O
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Malayalam.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Malayalam.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:00 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Malayalam
|
||||
|
||||
\uE001>ം; # REMAP (indicExceptions.txt): ഁ>ം = SIGN CANDRABINDU>SIGN ANUSVARA
|
||||
\uE002>ം; # SIGN ANUSVARA
|
||||
\uE003>ഃ; # SIGN VISARGA
|
||||
\uE005>അ; # LETTER A
|
||||
\uE006>ആ; # LETTER AA
|
||||
\uE007>ഇ; # LETTER I
|
||||
\uE008>ഈ; # LETTER II
|
||||
\uE009>ഉ; # LETTER U
|
||||
\uE00A>ഊ; # LETTER UU
|
||||
\uE00B>ഋ; # LETTER VOCALIC R
|
||||
\uE00C>ഌ; # LETTER VOCALIC L
|
||||
\uE00F>ഏ; # LETTER EE
|
||||
\uE010>ഐ; # LETTER AI
|
||||
\uE013>ഓ; # LETTER OO
|
||||
\uE014>ഔ; # LETTER AU
|
||||
\uE015>ക; # LETTER KA
|
||||
\uE016>ഖ; # LETTER KHA
|
||||
\uE017>ഗ; # LETTER GA
|
||||
\uE018>ഘ; # LETTER GHA
|
||||
\uE019>ങ; # LETTER NGA
|
||||
\uE01A>ച; # LETTER CA
|
||||
\uE01B>ഛ; # LETTER CHA
|
||||
\uE01C>ജ; # LETTER JA
|
||||
\uE01D>ഝ; # LETTER JHA
|
||||
\uE01E>ഞ; # LETTER NYA
|
||||
\uE01F>ട; # LETTER TTA
|
||||
\uE020>ഠ; # LETTER TTHA
|
||||
\uE021>ഡ; # LETTER DDA
|
||||
\uE022>ഢ; # LETTER DDHA
|
||||
\uE023>ണ; # LETTER NNA
|
||||
\uE024>ത; # LETTER TA
|
||||
\uE025>ഥ; # LETTER THA
|
||||
\uE026>ദ; # LETTER DA
|
||||
\uE027>ധ; # LETTER DHA
|
||||
\uE028>ന; # LETTER NA
|
||||
\uE029>ന; # REMAP (indicExceptions.txt): ഩ>ന = LETTER NNNA>LETTER NA
|
||||
\uE02A>പ; # LETTER PA
|
||||
\uE02B>ഫ; # LETTER PHA
|
||||
\uE02C>ബ; # LETTER BA
|
||||
\uE02D>ഭ; # LETTER BHA
|
||||
\uE02E>മ; # LETTER MA
|
||||
\uE02F>യ; # LETTER YA
|
||||
\uE030>ര; # LETTER RA
|
||||
\uE032>ല; # LETTER LA
|
||||
\uE033>ള; # LETTER LLA
|
||||
\uE034>ഴ; # LETTER LLLA
|
||||
\uE035>വ; # LETTER VA
|
||||
\uE036>ശ; # LETTER SHA
|
||||
\uE037>ഷ; # LETTER SSA
|
||||
\uE038>സ; # LETTER SA
|
||||
\uE039>ഹ; # LETTER HA
|
||||
# \uE03C>; // UNMAPPED InterIndic-Malayalam: SIGN NUKTA
|
||||
# \uE03D>; // UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA
|
||||
\uE03E>ാ; # VOWEL SIGN AA
|
||||
\uE03F>ി; # VOWEL SIGN I
|
||||
\uE040>ീ; # VOWEL SIGN II
|
||||
\uE041>ു; # VOWEL SIGN U
|
||||
\uE042>ൂ; # VOWEL SIGN UU
|
||||
\uE043>ൃ; # VOWEL SIGN VOCALIC R
|
||||
# \uE044>; // UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR
|
||||
\uE045>ാ; # REMAP (indicExceptions.txt): >ാ = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
\uE047>േ; # VOWEL SIGN EE
|
||||
\uE048>ൈ; # VOWEL SIGN AI
|
||||
\uE049>ോ; # REMAP (indicExceptions.txt): >ോ = VOWEL SIGN CANDRA O>VOWEL SIGN OO
|
||||
\uE04B>ോ; # VOWEL SIGN OO
|
||||
\uE04C>ൌ; # VOWEL SIGN AU
|
||||
\uE04D>്; # SIGN VIRAMA
|
||||
# \uE050>; // UNMAPPED InterIndic-Malayalam: OM
|
||||
# \uE055>; // UNMAPPED InterIndic-Malayalam: LENGTH MARK
|
||||
\uE056>ൈ; # REMAP (indicExceptions.txt): ൖ>ൈ = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>ൗ; # AU LENGTH MARK
|
||||
\uE059>ഖ; # REMAP (indicExceptions.txt): ൙>ഖ = LETTER KHHA>LETTER KHA
|
||||
\uE05A>ഗ; # REMAP (indicExceptions.txt): ൚>ഗ = LETTER GHHA>LETTER GA
|
||||
\uE05B>ജ; # REMAP (indicExceptions.txt): ൛>ജ = LETTER ZA>LETTER JA
|
||||
\uE05D>ഢ; # REMAP (indicExceptions.txt): ൝>ഢ = LETTER RHA>LETTER DDHA
|
||||
\uE05E>ഫ; # REMAP (indicExceptions.txt): ൞>ഫ = LETTER FA>LETTER PHA
|
||||
\uE05F>യ; # REMAP (indicExceptions.txt): ൟ>യ = LETTER YYA>LETTER YA
|
||||
\uE060>ൠ; # LETTER VOCALIC RR
|
||||
\uE061>ൡ; # LETTER VOCALIC LL
|
||||
# \uE062>; // UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L
|
||||
# \uE063>; // UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL
|
||||
\uE066>൦; # DIGIT ZERO
|
||||
\uE067>൧; # DIGIT ONE
|
||||
\uE068>൨; # DIGIT TWO
|
||||
\uE069>൩; # DIGIT THREE
|
||||
\uE06A>൪; # DIGIT FOUR
|
||||
\uE06B>൫; # DIGIT FIVE
|
||||
\uE06C>൬; # DIGIT SIX
|
||||
\uE06D>൭; # DIGIT SEVEN
|
||||
\uE06E>൮; # DIGIT EIGHT
|
||||
\uE06F>൯; # DIGIT NINE
|
||||
# \uE080>; // UNMAPPED InterIndic-Malayalam: ISSHAR
|
||||
\uE081>എ; # LETTER E
|
||||
\uE082>ഒ; # LETTER O
|
||||
\uE083>റ; # LETTER RRA
|
||||
\uE084>െ; # VOWEL SIGN E
|
||||
\uE085>ൊ; # VOWEL SIGN O
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Oriya.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Oriya.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:01 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Oriya
|
||||
|
||||
\uE001>ଁ; # SIGN CANDRABINDU
|
||||
\uE002>ଂ; # SIGN ANUSVARA
|
||||
\uE003>ଃ; # SIGN VISARGA
|
||||
\uE005>ଅ; # LETTER A
|
||||
\uE006>ଆ; # LETTER AA
|
||||
\uE007>ଇ; # LETTER I
|
||||
\uE008>ଈ; # LETTER II
|
||||
\uE009>ଉ; # LETTER U
|
||||
\uE00A>ଊ; # LETTER UU
|
||||
\uE00B>ଋ; # LETTER VOCALIC R
|
||||
\uE00C>ଌ; # LETTER VOCALIC L
|
||||
# \uE00F>; // UNMAPPED InterIndic-Oriya: LETTER EE (ଏ = LETTER E)
|
||||
\uE010>ଐ; # LETTER AI
|
||||
# \uE013>; // UNMAPPED InterIndic-Oriya: LETTER OO (ଓ = LETTER O)
|
||||
\uE014>ଔ; # LETTER AU
|
||||
\uE015>କ; # LETTER KA
|
||||
\uE016>ଖ; # LETTER KHA
|
||||
\uE017>ଗ; # LETTER GA
|
||||
\uE018>ଘ; # LETTER GHA
|
||||
\uE019>ଙ; # LETTER NGA
|
||||
\uE01A>ଚ; # LETTER CA
|
||||
\uE01B>ଛ; # LETTER CHA
|
||||
\uE01C>ଜ; # LETTER JA
|
||||
\uE01D>ଝ; # LETTER JHA
|
||||
\uE01E>ଞ; # LETTER NYA
|
||||
\uE01F>ଟ; # LETTER TTA
|
||||
\uE020>ଠ; # LETTER TTHA
|
||||
\uE021>ଡ; # LETTER DDA
|
||||
\uE022>ଢ; # LETTER DDHA
|
||||
\uE023>ଣ; # LETTER NNA
|
||||
\uE024>ତ; # LETTER TA
|
||||
\uE025>ଥ; # LETTER THA
|
||||
\uE026>ଦ; # LETTER DA
|
||||
\uE027>ଧ; # LETTER DHA
|
||||
\uE028>ନ; # LETTER NA
|
||||
\uE029>ନ; # REMAP (indicExceptions.txt): >ନ = LETTER NNNA>LETTER NA
|
||||
\uE02A>ପ; # LETTER PA
|
||||
\uE02B>ଫ; # LETTER PHA
|
||||
\uE02C>ବ; # LETTER BA
|
||||
\uE02D>ଭ; # LETTER BHA
|
||||
\uE02E>ମ; # LETTER MA
|
||||
\uE02F>ଯ; # LETTER YA
|
||||
\uE030>ର; # LETTER RA
|
||||
\uE032>ଲ; # LETTER LA
|
||||
\uE033>ଳ; # LETTER LLA
|
||||
\uE034>ଳ; # REMAP (indicExceptions.txt): >ଳ = LETTER LLLA>LETTER LLA
|
||||
\uE035>ବ; # REMAP (indicExceptions.txt): ଵ>ବ = LETTER VA>LETTER BA
|
||||
\uE036>ଶ; # LETTER SHA
|
||||
\uE037>ଷ; # LETTER SSA
|
||||
\uE038>ସ; # LETTER SA
|
||||
\uE039>ହ; # LETTER HA
|
||||
\uE03C>଼; # SIGN NUKTA
|
||||
\uE03D>ଽ; # SIGN AVAGRAHA
|
||||
\uE03E>ା; # VOWEL SIGN AA
|
||||
\uE03F>ି; # VOWEL SIGN I
|
||||
\uE040>ୀ; # VOWEL SIGN II
|
||||
\uE041>ୁ; # VOWEL SIGN U
|
||||
\uE042>ୂ; # VOWEL SIGN UU
|
||||
\uE043>ୃ; # VOWEL SIGN VOCALIC R
|
||||
\uE044>ୃ଼; # REMAP (indicExceptions.txt): ୄ>ୃ଼ = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
|
||||
\uE045>େ; # REMAP (indicExceptions.txt): >େ = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
# \uE047>; // UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (େ = VOWEL SIGN E)
|
||||
\uE048>ୈ; # VOWEL SIGN AI
|
||||
\uE049>ୋ; # REMAP (indicExceptions.txt): >ୋ = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
# \uE04B>; // UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (ୋ = VOWEL SIGN O)
|
||||
\uE04C>ୌ; # VOWEL SIGN AU
|
||||
\uE04D>୍; # SIGN VIRAMA
|
||||
\uE050>ଓଁ; # REMAP (indicExceptions.txt): >ଓଁ = OM>LETTER O.SIGN CANDRABINDU
|
||||
# \uE055>; // UNMAPPED InterIndic-Oriya: LENGTH MARK
|
||||
\uE056>ୖ; # AI LENGTH MARK
|
||||
\uE057>ୗ; # AU LENGTH MARK
|
||||
\uE059>ଖ଼; # REMAP (indicExceptions.txt): >ଖ଼ = LETTER KHHA>LETTER KHA.SIGN NUKTA
|
||||
\uE05A>ଗ଼; # REMAP (indicExceptions.txt): >ଗ଼ = LETTER GHHA>LETTER GA.SIGN NUKTA
|
||||
\uE05B>ଜ଼; # REMAP (indicExceptions.txt): >ଜ଼ = LETTER ZA>LETTER JA.SIGN NUKTA
|
||||
\uE05D>ଢ଼; # LETTER RHA
|
||||
\uE05E>ଫ଼; # REMAP (indicExceptions.txt): >ଫ଼ = LETTER FA>LETTER PHA.SIGN NUKTA
|
||||
\uE05F>ୟ; # LETTER YYA
|
||||
\uE060>ୠ; # LETTER VOCALIC RR
|
||||
\uE061>ୡ; # LETTER VOCALIC LL
|
||||
\uE062>ୖ଼; # REMAP (indicExceptions.txt): ୢ>ୖ଼ = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
|
||||
\uE063>ୗ଼; # REMAP (indicExceptions.txt): ୣ>ୗ଼ = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
|
||||
\uE066>୦; # DIGIT ZERO
|
||||
\uE067>୧; # DIGIT ONE
|
||||
\uE068>୨; # DIGIT TWO
|
||||
\uE069>୩; # DIGIT THREE
|
||||
\uE06A>୪; # DIGIT FOUR
|
||||
\uE06B>୫; # DIGIT FIVE
|
||||
\uE06C>୬; # DIGIT SIX
|
||||
\uE06D>୭; # DIGIT SEVEN
|
||||
\uE06E>୮; # DIGIT EIGHT
|
||||
\uE06F>୯; # DIGIT NINE
|
||||
\uE080>୰; # ISSHAR
|
||||
\uE081>ଏ; # LETTER E
|
||||
\uE082>ଓ; # LETTER O
|
||||
\uE083>ଡ଼; # LETTER RRA
|
||||
\uE084>େ; # VOWEL SIGN E
|
||||
\uE085>ୋ; # VOWEL SIGN O
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Tamil.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Tamil.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:01 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Tamil
|
||||
|
||||
# \uE001>; // UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU
|
||||
\uE002>ஂ; # SIGN ANUSVARA
|
||||
\uE003>ஃ; # SIGN VISARGA
|
||||
\uE005>அ; # LETTER A
|
||||
\uE006>ஆ; # LETTER AA
|
||||
\uE007>இ; # LETTER I
|
||||
\uE008>ஈ; # LETTER II
|
||||
\uE009>உ; # LETTER U
|
||||
\uE00A>ஊ; # LETTER UU
|
||||
\uE00B>ரி; # REMAP (indicExceptions.txt): >ரி = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
|
||||
\uE00C>இ; # REMAP (indicExceptions.txt): >இ = LETTER VOCALIC L>LETTER I
|
||||
\uE00F>ஏ; # LETTER EE
|
||||
\uE010>ஐ; # LETTER AI
|
||||
\uE013>ஓ; # LETTER OO
|
||||
\uE014>ஔ; # LETTER AU
|
||||
\uE015>க; # LETTER KA
|
||||
\uE016>க; # REMAP (indicExceptions.txt): >க = LETTER KHA>LETTER KA
|
||||
\uE017>க; # REMAP (indicExceptions.txt): >க = LETTER GA>LETTER KA
|
||||
\uE018>க; # REMAP (indicExceptions.txt): >க = LETTER GHA>LETTER KA
|
||||
\uE019>ங; # LETTER NGA
|
||||
\uE01A>ச; # LETTER CA
|
||||
\uE01B>ச; # REMAP (indicExceptions.txt): >ச = LETTER CHA>LETTER CA
|
||||
\uE01C>ஜ; # LETTER JA
|
||||
\uE01D>ச; # REMAP (indicExceptions.txt): >ச = LETTER JHA>LETTER CA
|
||||
\uE01E>ஞ; # LETTER NYA
|
||||
\uE01F>ட; # LETTER TTA
|
||||
\uE020>ட; # REMAP (indicExceptions.txt): >ட = LETTER TTHA>LETTER TTA
|
||||
\uE021>ட; # REMAP (indicExceptions.txt): >ட = LETTER DDA>LETTER TTA
|
||||
\uE022>ட; # REMAP (indicExceptions.txt): >ட = LETTER DDHA>LETTER TTA
|
||||
\uE023>ண; # LETTER NNA
|
||||
\uE024>த; # LETTER TA
|
||||
\uE025>த; # REMAP (indicExceptions.txt): >த = LETTER THA>LETTER TA
|
||||
\uE026>த; # REMAP (indicExceptions.txt): >த = LETTER DA>LETTER TA
|
||||
\uE027>த; # REMAP (indicExceptions.txt): >த = LETTER DHA>LETTER TA
|
||||
\uE028>ந; # LETTER NA
|
||||
\uE029>ன; # LETTER NNNA
|
||||
\uE02A>ப; # LETTER PA
|
||||
\uE02B>ப; # REMAP (indicExceptions.txt): >ப = LETTER PHA>LETTER PA
|
||||
\uE02C>ப; # REMAP (indicExceptions.txt): >ப = LETTER BA>LETTER PA
|
||||
\uE02D>ப; # REMAP (indicExceptions.txt): >ப = LETTER BHA>LETTER PA
|
||||
\uE02E>ம; # LETTER MA
|
||||
\uE02F>ய; # LETTER YA
|
||||
\uE030>ர; # LETTER RA
|
||||
\uE032>ல; # LETTER LA
|
||||
\uE033>ள; # LETTER LLA
|
||||
\uE034>ழ; # LETTER LLLA
|
||||
\uE035>வ; # LETTER VA
|
||||
\uE036>ஷ; # REMAP (indicExceptions.txt): ஶ>ஷ = LETTER SHA>LETTER SSA
|
||||
\uE037>ஷ; # LETTER SSA
|
||||
\uE038>ஸ; # LETTER SA
|
||||
\uE039>ஹ; # LETTER HA
|
||||
# \uE03C>; // UNMAPPED InterIndic-Tamil: SIGN NUKTA
|
||||
# \uE03D>; // UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA
|
||||
\uE03E>ா; # VOWEL SIGN AA
|
||||
\uE03F>ி; # VOWEL SIGN I
|
||||
\uE040>ீ; # VOWEL SIGN II
|
||||
\uE041>ு; # VOWEL SIGN U
|
||||
\uE042>ூ; # VOWEL SIGN UU
|
||||
\uE043>்ரி; # REMAP (indicExceptions.txt): >்ரி = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
\uE044>்ரி; # REMAP (indicExceptions.txt): >்ரி = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
|
||||
\uE045>ா; # REMAP (indicExceptions.txt): >ா = VOWEL SIGN CANDRA E>VOWEL SIGN AA
|
||||
\uE047>ே; # VOWEL SIGN EE
|
||||
\uE048>ை; # VOWEL SIGN AI
|
||||
\uE049>ா; # REMAP (indicExceptions.txt): >ா = VOWEL SIGN CANDRA O>VOWEL SIGN AA
|
||||
\uE04B>ோ; # VOWEL SIGN OO
|
||||
\uE04C>ௌ; # VOWEL SIGN AU
|
||||
\uE04D>்; # SIGN VIRAMA
|
||||
\uE050>ஓம்; # REMAP (indicExceptions.txt): ௐ>ஓம் = OM>LETTER OO.LETTER MA.SIGN VIRAMA
|
||||
# \uE055>; // UNMAPPED InterIndic-Tamil: LENGTH MARK
|
||||
\uE056>ை; # REMAP (indicExceptions.txt): >ை = AI LENGTH MARK>VOWEL SIGN AI
|
||||
\uE057>ௗ; # AU LENGTH MARK
|
||||
\uE059>க; # REMAP (indicExceptions.txt): >க = LETTER KHHA>LETTER KA
|
||||
\uE05A>க; # REMAP (indicExceptions.txt): >க = LETTER GHHA>LETTER KA
|
||||
\uE05B>ஜ; # REMAP (indicExceptions.txt): >ஜ = LETTER ZA>LETTER JA
|
||||
\uE05D>ட; # REMAP (indicExceptions.txt): >ட = LETTER RHA>LETTER TTA
|
||||
\uE05E>ப; # REMAP (indicExceptions.txt): >ப = LETTER FA>LETTER PA
|
||||
\uE05F>ய; # REMAP (indicExceptions.txt): >ய = LETTER YYA>LETTER YA
|
||||
\uE060>ரி; # REMAP (indicExceptions.txt): >ரி = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
|
||||
\uE061>ஈ; # REMAP (indicExceptions.txt): >ஈ = LETTER VOCALIC LL>LETTER II
|
||||
# \uE062>; // UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L
|
||||
# \uE063>; // UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL
|
||||
# \uE066>; // UNMAPPED InterIndic-Tamil: DIGIT ZERO
|
||||
\uE067>௧; # DIGIT ONE
|
||||
\uE068>௨; # DIGIT TWO
|
||||
\uE069>௩; # DIGIT THREE
|
||||
\uE06A>௪; # DIGIT FOUR
|
||||
\uE06B>௫; # DIGIT FIVE
|
||||
\uE06C>௬; # DIGIT SIX
|
||||
\uE06D>௭; # DIGIT SEVEN
|
||||
\uE06E>௮; # DIGIT EIGHT
|
||||
\uE06F>௯; # DIGIT NINE
|
||||
# \uE080>; // UNMAPPED InterIndic-Tamil: ISSHAR
|
||||
\uE081>எ; # LETTER E
|
||||
\uE082>ஒ; # LETTER O
|
||||
\uE083>ற; # LETTER RRA
|
||||
\uE084>ெ; # VOWEL SIGN E
|
||||
\uE085>ொ; # VOWEL SIGN O
|
||||
|
||||
# eof
|
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Telugu.utf8.txt
Executable file
108
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Telugu.utf8.txt
Executable file
|
@ -0,0 +1,108 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:01 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# InterIndic-Telugu
|
||||
|
||||
\uE001>ఁ; # SIGN CANDRABINDU
|
||||
\uE002>ం; # SIGN ANUSVARA
|
||||
\uE003>ః; # SIGN VISARGA
|
||||
\uE005>అ; # LETTER A
|
||||
\uE006>ఆ; # LETTER AA
|
||||
\uE007>ఇ; # LETTER I
|
||||
\uE008>ఈ; # LETTER II
|
||||
\uE009>ఉ; # LETTER U
|
||||
\uE00A>ఊ; # LETTER UU
|
||||
\uE00B>ఋ; # LETTER VOCALIC R
|
||||
\uE00C>ఌ; # LETTER VOCALIC L
|
||||
\uE00F>ఏ; # LETTER EE
|
||||
\uE010>ఐ; # LETTER AI
|
||||
\uE013>ఓ; # LETTER OO
|
||||
\uE014>ఔ; # LETTER AU
|
||||
\uE015>క; # LETTER KA
|
||||
\uE016>ఖ; # LETTER KHA
|
||||
\uE017>గ; # LETTER GA
|
||||
\uE018>ఘ; # LETTER GHA
|
||||
\uE019>ఙ; # LETTER NGA
|
||||
\uE01A>చ; # LETTER CA
|
||||
\uE01B>ఛ; # LETTER CHA
|
||||
\uE01C>జ; # LETTER JA
|
||||
\uE01D>ఝ; # LETTER JHA
|
||||
\uE01E>ఞ; # LETTER NYA
|
||||
\uE01F>ట; # LETTER TTA
|
||||
\uE020>ఠ; # LETTER TTHA
|
||||
\uE021>డ; # LETTER DDA
|
||||
\uE022>ఢ; # LETTER DDHA
|
||||
\uE023>ణ; # LETTER NNA
|
||||
\uE024>త; # LETTER TA
|
||||
\uE025>థ; # LETTER THA
|
||||
\uE026>ద; # LETTER DA
|
||||
\uE027>ధ; # LETTER DHA
|
||||
\uE028>న; # LETTER NA
|
||||
\uE029>న; # REMAP (indicExceptions.txt): >న = LETTER NNNA>LETTER NA
|
||||
\uE02A>ప; # LETTER PA
|
||||
\uE02B>ఫ; # LETTER PHA
|
||||
\uE02C>బ; # LETTER BA
|
||||
\uE02D>భ; # LETTER BHA
|
||||
\uE02E>మ; # LETTER MA
|
||||
\uE02F>య; # LETTER YA
|
||||
\uE030>ర; # LETTER RA
|
||||
\uE032>ల; # LETTER LA
|
||||
\uE033>ళ; # LETTER LLA
|
||||
\uE034>ళ; # REMAP (indicExceptions.txt): ఴ>ళ = LETTER LLLA>LETTER LLA
|
||||
\uE035>వ; # LETTER VA
|
||||
\uE036>శ; # LETTER SHA
|
||||
\uE037>ష; # LETTER SSA
|
||||
\uE038>స; # LETTER SA
|
||||
\uE039>హ; # LETTER HA
|
||||
# \uE03C>; // UNMAPPED InterIndic-Telugu: SIGN NUKTA
|
||||
# \uE03D>; // UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA
|
||||
\uE03E>ా; # VOWEL SIGN AA
|
||||
\uE03F>ి; # VOWEL SIGN I
|
||||
\uE040>ీ; # VOWEL SIGN II
|
||||
\uE041>ు; # VOWEL SIGN U
|
||||
\uE042>ూ; # VOWEL SIGN UU
|
||||
\uE043>ృ; # VOWEL SIGN VOCALIC R
|
||||
\uE044>ౄ; # VOWEL SIGN VOCALIC RR
|
||||
\uE045>ె; # REMAP (indicExceptions.txt): >ె = VOWEL SIGN CANDRA E>VOWEL SIGN E
|
||||
\uE047>ే; # VOWEL SIGN EE
|
||||
\uE048>ై; # VOWEL SIGN AI
|
||||
\uE049>ొ; # REMAP (indicExceptions.txt): >ొ = VOWEL SIGN CANDRA O>VOWEL SIGN O
|
||||
\uE04B>ో; # VOWEL SIGN OO
|
||||
\uE04C>ౌ; # VOWEL SIGN AU
|
||||
\uE04D>్; # SIGN VIRAMA
|
||||
\uE050>ఓం; # REMAP (indicExceptions.txt): >ఓం = OM>LETTER OO.SIGN ANUSVARA
|
||||
\uE055>ౕ; # LENGTH MARK
|
||||
\uE056>ౖ; # AI LENGTH MARK
|
||||
\uE057>ౌ; # REMAP (indicExceptions.txt): >ౌ = AU LENGTH MARK>VOWEL SIGN AU
|
||||
\uE059>ఖ; # REMAP (indicExceptions.txt): ౙ>ఖ = LETTER KHHA>LETTER KHA
|
||||
\uE05A>గ; # REMAP (indicExceptions.txt): ౚ>గ = LETTER GHHA>LETTER GA
|
||||
\uE05B>జ; # REMAP (indicExceptions.txt): >జ = LETTER ZA>LETTER JA
|
||||
\uE05D>ఢ; # REMAP (indicExceptions.txt): ౝ>ఢ = LETTER RHA>LETTER DDHA
|
||||
\uE05E>ఫ; # REMAP (indicExceptions.txt): >ఫ = LETTER FA>LETTER PHA
|
||||
\uE05F>య; # REMAP (indicExceptions.txt): >య = LETTER YYA>LETTER YA
|
||||
\uE060>ౠ; # LETTER VOCALIC RR
|
||||
\uE061>ౡ; # LETTER VOCALIC LL
|
||||
\uE062>ి; # REMAP (indicExceptions.txt): ౢ>ి = VOWEL SIGN VOCALIC L>VOWEL SIGN I
|
||||
\uE063>ీ; # REMAP (indicExceptions.txt): ౣ>ీ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
|
||||
\uE066>౦; # DIGIT ZERO
|
||||
\uE067>౧; # DIGIT ONE
|
||||
\uE068>౨; # DIGIT TWO
|
||||
\uE069>౩; # DIGIT THREE
|
||||
\uE06A>౪; # DIGIT FOUR
|
||||
\uE06B>౫; # DIGIT FIVE
|
||||
\uE06C>౬; # DIGIT SIX
|
||||
\uE06D>౭; # DIGIT SEVEN
|
||||
\uE06E>౮; # DIGIT EIGHT
|
||||
\uE06F>౯; # DIGIT NINE
|
||||
# \uE080>; // UNMAPPED InterIndic-Telugu: ISSHAR
|
||||
\uE081>ఎ; # LETTER E
|
||||
\uE082>ఒ; # LETTER O
|
||||
\uE083>ఱ; # LETTER RRA
|
||||
\uE084>ె; # VOWEL SIGN E
|
||||
\uE085>ొ; # VOWEL SIGN O
|
||||
|
||||
# eof
|
6366
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_English.utf8.txt
Executable file
6366
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_English.utf8.txt
Executable file
File diff suppressed because it is too large
Load diff
6216
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_OnRomaji.utf8.txt
Executable file
6216
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_OnRomaji.utf8.txt
Executable file
File diff suppressed because it is too large
Load diff
91
icu4j/src/com/ibm/text/resources/Transliterator_Kannada_InterIndic.utf8.txt
Executable file
91
icu4j/src/com/ibm/text/resources/Transliterator_Kannada_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,91 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:05 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Kannada-InterIndic
|
||||
|
||||
ಂ>\uE002; # SIGN ANUSVARA
|
||||
ಃ>\uE003; # SIGN VISARGA
|
||||
ಅ>\uE005; # LETTER A
|
||||
ಆ>\uE006; # LETTER AA
|
||||
ಇ>\uE007; # LETTER I
|
||||
ಈ>\uE008; # LETTER II
|
||||
ಉ>\uE009; # LETTER U
|
||||
ಊ>\uE00A; # LETTER UU
|
||||
ಋ>\uE00B; # LETTER VOCALIC R
|
||||
ಌ>\uE00C; # LETTER VOCALIC L
|
||||
ಎ>\uE081; # LETTER E
|
||||
ಏ>\uE00F; # LETTER EE
|
||||
ಐ>\uE010; # LETTER AI
|
||||
ಒ>\uE082; # LETTER O
|
||||
ಓ>\uE013; # LETTER OO
|
||||
ಔ>\uE014; # LETTER AU
|
||||
ಕ>\uE015; # LETTER KA
|
||||
ಖ>\uE016; # LETTER KHA
|
||||
ಗ>\uE017; # LETTER GA
|
||||
ಘ>\uE018; # LETTER GHA
|
||||
ಙ>\uE019; # LETTER NGA
|
||||
ಚ>\uE01A; # LETTER CA
|
||||
ಛ>\uE01B; # LETTER CHA
|
||||
ಜ>\uE01C; # LETTER JA
|
||||
ಝ>\uE01D; # LETTER JHA
|
||||
ಞ>\uE01E; # LETTER NYA
|
||||
ಟ>\uE01F; # LETTER TTA
|
||||
ಠ>\uE020; # LETTER TTHA
|
||||
ಡ>\uE021; # LETTER DDA
|
||||
ಢ>\uE022; # LETTER DDHA
|
||||
ಣ>\uE023; # LETTER NNA
|
||||
ತ>\uE024; # LETTER TA
|
||||
ಥ>\uE025; # LETTER THA
|
||||
ದ>\uE026; # LETTER DA
|
||||
ಧ>\uE027; # LETTER DHA
|
||||
ನ>\uE028; # LETTER NA
|
||||
ಪ>\uE02A; # LETTER PA
|
||||
ಫ>\uE02B; # LETTER PHA
|
||||
ಬ>\uE02C; # LETTER BA
|
||||
ಭ>\uE02D; # LETTER BHA
|
||||
ಮ>\uE02E; # LETTER MA
|
||||
ಯ>\uE02F; # LETTER YA
|
||||
ರ>\uE030; # LETTER RA
|
||||
ಱ>\uE083; # LETTER RRA
|
||||
ಲ>\uE032; # LETTER LA
|
||||
ಳ>\uE033; # LETTER LLA
|
||||
ವ>\uE035; # LETTER VA
|
||||
ಶ>\uE036; # LETTER SHA
|
||||
ಷ>\uE037; # LETTER SSA
|
||||
ಸ>\uE038; # LETTER SA
|
||||
ಹ>\uE039; # LETTER HA
|
||||
ಾ>\uE03E; # VOWEL SIGN AA
|
||||
ಿ>\uE03F; # VOWEL SIGN I
|
||||
ೀ>\uE040; # VOWEL SIGN II
|
||||
ು>\uE041; # VOWEL SIGN U
|
||||
ೂ>\uE042; # VOWEL SIGN UU
|
||||
ೃ>\uE043; # VOWEL SIGN VOCALIC R
|
||||
ೄ>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
ೆ>\uE084; # VOWEL SIGN E
|
||||
ೇ>\uE047; # VOWEL SIGN EE
|
||||
ೈ>\uE048; # VOWEL SIGN AI
|
||||
ೊ>\uE085; # VOWEL SIGN O
|
||||
ೋ>\uE04B; # VOWEL SIGN OO
|
||||
ೌ>\uE04C; # VOWEL SIGN AU
|
||||
್>\uE04D; # SIGN VIRAMA
|
||||
ೕ>\uE055; # LENGTH MARK
|
||||
ೖ>\uE056; # AI LENGTH MARK
|
||||
ೞ>\uE05E; # LETTER FA
|
||||
ೠ>\uE060; # LETTER VOCALIC RR
|
||||
ೡ>\uE061; # LETTER VOCALIC LL
|
||||
೦>\uE066; # DIGIT ZERO
|
||||
೧>\uE067; # DIGIT ONE
|
||||
೨>\uE068; # DIGIT TWO
|
||||
೩>\uE069; # DIGIT THREE
|
||||
೪>\uE06A; # DIGIT FOUR
|
||||
೫>\uE06B; # DIGIT FIVE
|
||||
೬>\uE06C; # DIGIT SIX
|
||||
೭>\uE06D; # DIGIT SEVEN
|
||||
೮>\uE06E; # DIGIT EIGHT
|
||||
೯>\uE06F; # DIGIT NINE
|
||||
|
||||
# eof
|
125
icu4j/src/com/ibm/text/resources/Transliterator_KeyboardEscape_Latin1.utf8.txt
Executable file
125
icu4j/src/com/ibm/text/resources/Transliterator_KeyboardEscape_Latin1.utf8.txt
Executable file
|
@ -0,0 +1,125 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:05 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# KeyboardEscape-Latin1
|
||||
|
||||
### $esc='';
|
||||
### $grave='`';
|
||||
### $acute='';
|
||||
### $hat='^';
|
||||
### $tilde='~';
|
||||
### $umlaut=':';
|
||||
### $ring='.';
|
||||
### $cedilla=',';
|
||||
### $slash='/';
|
||||
### $super='^';
|
||||
|
||||
# Make keyboard entry of {esc} possible
|
||||
# and of backslash
|
||||
'\\'''>'';
|
||||
'\\\\'>'\\';
|
||||
|
||||
# Long keys
|
||||
cur''>¤;
|
||||
sec''>§;
|
||||
not''>¬;
|
||||
mul''>×;
|
||||
div''>÷;
|
||||
|
||||
\\ ''> ; # non-breaking space
|
||||
'!'''>¡; # inverted exclamation
|
||||
c'/'''>¢; # cent sign
|
||||
lb''>£; # pound sign
|
||||
'|'''>¦; # broken vertical bar
|
||||
':'''>¨; # umlaut
|
||||
'^' a''>ª; # feminine ordinal
|
||||
'<<'''>«;
|
||||
r''>®;
|
||||
'--'''>¯;
|
||||
'-'''>;
|
||||
'+-'''>±;
|
||||
'^' 2''>²;
|
||||
'^' 3''>³;
|
||||
''''>´;
|
||||
m''>µ;
|
||||
para''>¶;
|
||||
dot''>·;
|
||||
','''>¸;
|
||||
'^' 1''>¹;
|
||||
'^' o''>º; # masculine ordinal
|
||||
'>>'''>»;
|
||||
'1/4'''>¼;
|
||||
'1/2'''>½;
|
||||
'3/4'''>¾;
|
||||
'?'''>¿;
|
||||
A'`'''>À;
|
||||
A''''>Á;
|
||||
A'^'''>Â;
|
||||
A'~'''>Ã;
|
||||
A':'''>Ä;
|
||||
A'.'''>Å;
|
||||
AE''>Æ;
|
||||
C','''>Ç;
|
||||
E'`'''>È;
|
||||
E''''>É;
|
||||
E'^'''>Ê;
|
||||
E':'''>Ë;
|
||||
I'`'''>Ì;
|
||||
I''''>Í;
|
||||
I'^'''>Î;
|
||||
I':'''>Ï;
|
||||
'D-'''>Ð;
|
||||
N'~'''>Ñ;
|
||||
O'`'''>Ò;
|
||||
O''''>Ó;
|
||||
O'^'''>Ô;
|
||||
O'~'''>Õ;
|
||||
O':'''>Ö;
|
||||
O'/'''>Ø;
|
||||
U'`'''>Ù;
|
||||
U''''>Ú;
|
||||
U'^'''>Û;
|
||||
U':'''>Ü;
|
||||
Y''''>Ý;
|
||||
TH''>Þ;
|
||||
ss''>ß;
|
||||
a'`'''>à;
|
||||
a''''>á;
|
||||
a'^'''>â;
|
||||
a'~'''>ã;
|
||||
a':'''>ä;
|
||||
a'.'''>å;
|
||||
ae''>æ;
|
||||
c','''>ç;
|
||||
c''>©; # copyright - after c{cedilla}
|
||||
e'`'''>è;
|
||||
e''''>é;
|
||||
e'^'''>ê;
|
||||
e':'''>ë;
|
||||
i'`'''>ì;
|
||||
i''''>í;
|
||||
i'^'''>î;
|
||||
i':'''>ï;
|
||||
'd-'''>ð;
|
||||
n'~'''>ñ;
|
||||
o'`'''>ò;
|
||||
o''''>ó;
|
||||
o'^'''>ô;
|
||||
o'~'''>õ;
|
||||
o':'''>ö;
|
||||
o'/'''>ø;
|
||||
o''>°;
|
||||
u'`'''>ù;
|
||||
u''''>ú;
|
||||
u'^'''>û;
|
||||
u':'''>ü;
|
||||
y''''>ý;
|
||||
y''>¥; # yen sign
|
||||
th''>þ;
|
||||
#masked: + "ss''>ÿ;"
|
||||
|
||||
# eof
|
189
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Arabic.utf8.txt
Executable file
189
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Arabic.utf8.txt
Executable file
|
@ -0,0 +1,189 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# To Do: finish adding shadda, add sokoon
|
||||
# [Is this done? Can someone finish this?]
|
||||
|
||||
### $alefmadda=آ;
|
||||
### $alefuhamza=أ;
|
||||
### $wauuhamza=ؤ;
|
||||
### $alefhamza=إ;
|
||||
### $yehuhamza=ئ;
|
||||
### $alef=ا;
|
||||
### $beh=ب;
|
||||
### $tehmarbuta=ة;
|
||||
### $teh=ت;
|
||||
### $theh=ث;
|
||||
### $geem=ج;
|
||||
### $hah=ح;
|
||||
### $kha=خ;
|
||||
### $dal=د;
|
||||
### $dhal=ذ;
|
||||
### $reh=ر;
|
||||
### $zain=ز;
|
||||
### $seen=س;
|
||||
### $sheen=ش;
|
||||
### $sad=ص;
|
||||
### $dad=ض;
|
||||
### $tah=ط;
|
||||
### $zah=ظ;
|
||||
### $ein=ع;
|
||||
### $ghein=غ;
|
||||
### $feh=ف;
|
||||
### $qaaf=ق;
|
||||
### $kaf=ك;
|
||||
### $lam=ل;
|
||||
### $meem=م;
|
||||
### $noon=ن;
|
||||
### $heh=ه;
|
||||
### $wau=و;
|
||||
### $yehmaqsura=ى;
|
||||
### $yeh=ي;
|
||||
### $peh=ڤ;
|
||||
|
||||
### $hamza=ء;
|
||||
### $fathatein=ً;
|
||||
### $dammatein=ٌ;
|
||||
### $kasratein=ٍ;
|
||||
### $fatha=َ;
|
||||
### $damma=ُ;
|
||||
### $kasra=ِ;
|
||||
### $shadda=ّ;
|
||||
### $sokoon=ْ;
|
||||
|
||||
# Doubles - liu
|
||||
t'' < {ت} [تثةط];
|
||||
h'' < {ه} [هح];
|
||||
s'' < {س} ش;
|
||||
|
||||
# A few pathological special cases to make round
|
||||
# trip work. - liu
|
||||
d'~'d <> د د;
|
||||
dh'~'dh <> ذ ذ;
|
||||
dd'~'dd <> ض ض;
|
||||
|
||||
# Shadda: Map x ّ to x x, where x is dh, dd, or
|
||||
# d (that is, ذ, ض, or د). If x is d, d'd is
|
||||
# output. Net effect is to map s.th. like ض ّ
|
||||
# to dd'dd. - liu
|
||||
ذ {dh} <> dh {ّ};
|
||||
ض {dd} <> dd {ّ};
|
||||
د {''d} [^dh] <> d {ّ}; # Avoid d'dd or d'dh
|
||||
|
||||
# [This should be removed, but it's good for demos]
|
||||
Arabic> \
|
||||
تتمتع' ' \
|
||||
اللغة' ' \
|
||||
العرببية' ' \
|
||||
ببنظم' ' \
|
||||
كتاببية' ' \
|
||||
جميلة;
|
||||
|
||||
# Main rules
|
||||
''ai<a{آ;
|
||||
ai<>آ;
|
||||
''ae<a{أ;
|
||||
ae<>أ;
|
||||
''ao<a{إ;
|
||||
ao<>إ;
|
||||
''aa<a{ا;
|
||||
aa<>ا;
|
||||
''an<a{ً;
|
||||
an<>ً;
|
||||
''a<a{َ;
|
||||
a<>َ;
|
||||
b<>ب;
|
||||
''dh<d{ذ;
|
||||
dh<>ذ;
|
||||
''dd<d{ض;
|
||||
dd<>ض;
|
||||
''d<d{د;
|
||||
d<>د;
|
||||
''e<a{ع;
|
||||
''e<w{ع;
|
||||
''e<y{ع;
|
||||
e<>ع;
|
||||
f<>ف;
|
||||
gh<>غ;
|
||||
''hh<d{ح;
|
||||
''hh<t{ح;
|
||||
''hh<k{ح;
|
||||
''hh<s{ح;
|
||||
hh<>ح;
|
||||
''h<d{ه;
|
||||
''h<t{ه;
|
||||
''h<k{ه;
|
||||
''h<s{ه;
|
||||
h<>ه;
|
||||
''ii<i{ٍ;
|
||||
ii<>ٍ;
|
||||
''i<i{ِ;
|
||||
i<>ِ;
|
||||
j<>ج;
|
||||
kh<>خ;
|
||||
k<>ك;
|
||||
l<>ل;
|
||||
''m<y{م;
|
||||
''m<t{م;
|
||||
m<>م;
|
||||
n<>ن;
|
||||
''o<a{ء;
|
||||
o<>ء;
|
||||
p<>ڤ;
|
||||
q<>ق;
|
||||
r<>ر;
|
||||
sh<>ش;
|
||||
''ss<s{ص;
|
||||
ss<>ص;
|
||||
''s<s{س;
|
||||
s<>س;
|
||||
th<>ث;
|
||||
tm<>ة;
|
||||
''tt<t{ط;
|
||||
tt<>ط;
|
||||
''t<t{ت;
|
||||
t<>ت;
|
||||
''uu<u{ٌ;
|
||||
uu<>ٌ;
|
||||
''u<u{ُ;
|
||||
u<>ُ;
|
||||
we<>ؤ;
|
||||
w<>و;
|
||||
ye<>ئ;
|
||||
ym<>ى;
|
||||
''y<y{ي;
|
||||
y<>ي;
|
||||
''zz<z{ظ;
|
||||
zz<>ظ;
|
||||
''z<z{ز;
|
||||
z<>ز;
|
||||
|
||||
# One-way Latin-Arabic compatability rules
|
||||
c>ك;
|
||||
g>ج;
|
||||
x>كّس;
|
||||
v>ب;
|
||||
|
||||
# Digits
|
||||
0<>٠; # Arabic digit 0
|
||||
1<>١; # Arabic digit 1
|
||||
2<>٢; # Arabic digit 2
|
||||
3<>٣; # Arabic digit 3
|
||||
4<>٤; # Arabic digit 4
|
||||
5<>٥; # Arabic digit 5
|
||||
6<>٦; # Arabic digit 6
|
||||
7<>٧; # Arabic digit 7
|
||||
8<>٨; # Arabic digit 8
|
||||
9<>٩; # Arabic digit 9
|
||||
'%'<>٪; # Arabic %
|
||||
'.'<>٫; # Arabic decimal separator
|
||||
','<>٬; # Arabic thousands separator
|
||||
'*'<>٭; # Arabic five-pointed star
|
||||
|
||||
''>;
|
||||
|
||||
# eof
|
305
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Cyrillic.utf8.txt
Executable file
305
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Cyrillic.utf8.txt
Executable file
|
@ -0,0 +1,305 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:05 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Latin-Cyrillic
|
||||
|
||||
|
||||
# These rules provide general Latin-Cyrillic
|
||||
# transliteration. The standard Russian transliterations
|
||||
# are generally used for the letters from Russian,
|
||||
# with additional Cyrillic characters given consistent
|
||||
# mappings.
|
||||
|
||||
### $S_hacek=Š;
|
||||
### $s_hacek=š;
|
||||
|
||||
### $YO=Ё;
|
||||
### $J=Ј;
|
||||
### $A=А;
|
||||
### $B=Б;
|
||||
### $V=В;
|
||||
### $G=Г;
|
||||
### $D=Д;
|
||||
### $YE=Е;
|
||||
### $ZH=Ж;
|
||||
### $Z=З;
|
||||
### $YI=И;
|
||||
### $Y=Й;
|
||||
### $K=К;
|
||||
### $L=Л;
|
||||
### $M=М;
|
||||
### $N=Н;
|
||||
### $O=О;
|
||||
### $P=П;
|
||||
### $R=Р;
|
||||
### $S=С;
|
||||
### $T=Т;
|
||||
### $U=У;
|
||||
### $F=Ф;
|
||||
### $KH=Х;
|
||||
### $TS=Ц;
|
||||
### $CH=Ч;
|
||||
### $SH=Ш;
|
||||
### $SHCH=Щ;
|
||||
### $HARD=Ъ;
|
||||
### $I=Ы;
|
||||
### $SOFT=Ь;
|
||||
### $E=Э;
|
||||
### $YU=Ю;
|
||||
### $YA=Я;
|
||||
|
||||
# Lowercase
|
||||
|
||||
### $a=а;
|
||||
### $b=б;
|
||||
### $v=в;
|
||||
### $g=г;
|
||||
### $d=д;
|
||||
### $ye=е;
|
||||
### $zh=ж;
|
||||
### $z=з;
|
||||
### $yi=и;
|
||||
### $y=й;
|
||||
### $k=к;
|
||||
### $l=л;
|
||||
### $m=м;
|
||||
### $n=н;
|
||||
### $o=о;
|
||||
### $p=п;
|
||||
### $r=р;
|
||||
### $s=с;
|
||||
### $t=т;
|
||||
### $u=у;
|
||||
### $f=ф;
|
||||
### $kh=х;
|
||||
### $ts=ц;
|
||||
### $ch=ч;
|
||||
### $sh=ш;
|
||||
### $shch=щ;
|
||||
### $hard=ъ;
|
||||
### $i=ы;
|
||||
### $soft=ь;
|
||||
### $e=э;
|
||||
### $yu=ю;
|
||||
### $ya=я;
|
||||
|
||||
### $yo=ё;
|
||||
### $j=ј;
|
||||
|
||||
# variables
|
||||
# some are duplicated so lowercasing works
|
||||
|
||||
$csoft=[eiyEIY];
|
||||
$CSOFT=[eiyEIY];
|
||||
|
||||
$BECOMES_H=[Ъъ];
|
||||
$becomes_h=[Ъъ];
|
||||
|
||||
$BECOMES_S=[Сс];
|
||||
$becomes_s=[Сс];
|
||||
|
||||
$BECOMES_C=[Чч];
|
||||
$becomes_c=[Чч];
|
||||
|
||||
$BECOMES_VOWEL=[АЭЫОУаэыоу];
|
||||
$becomes_vowel=[АЭЫОУаэыоу];
|
||||
|
||||
$letter=[[:Lu:][:Ll:]];
|
||||
$lower=[[:Ll:]];
|
||||
|
||||
# Modified to combine display transliterator and typing transliterator.
|
||||
# The display mapping uses accents for the "soft" vowels.
|
||||
# It does not, although it could, use characters like š instead of digraphs
|
||||
# like sh.
|
||||
|
||||
# #############################################
|
||||
# Special titlecase forms, not duplicated
|
||||
# #############################################
|
||||
|
||||
Sh''ch<>Шч; # LIU Distinguish Шч from Щ
|
||||
|
||||
Ch <> {Ч} $lower;
|
||||
Kh <> {Х} $lower;
|
||||
Shch <> {Щ}$lower;
|
||||
Sh <> {Ш} $lower;
|
||||
Ts <> {Ц} $lower;
|
||||
Zh <> {Ж} $lower;
|
||||
Yi>И;
|
||||
Ye>Е;
|
||||
Yo>Ё;
|
||||
Yu>Ю;
|
||||
Ya>Я;
|
||||
|
||||
# #############################################
|
||||
# Rules to Duplicate
|
||||
# To get the lowercase versions, copy these and lowercase
|
||||
# #############################################
|
||||
|
||||
# variant spellings in English
|
||||
|
||||
SHTCH>Щ;
|
||||
TCH>Ч;
|
||||
TH>З;
|
||||
Q>К;
|
||||
WH>В;
|
||||
W>В;
|
||||
X>КС; #+ "X<КС;"
|
||||
|
||||
# Separate letters that would otherwise join
|
||||
|
||||
SH''<Ш}$BECOMES_C;
|
||||
T''<Т}$BECOMES_S;
|
||||
T''<Т}[ЧЩщ]; # LIU add special cases
|
||||
|
||||
K''<К}$BECOMES_H;
|
||||
S''<С}$BECOMES_H;
|
||||
T''<Т}$BECOMES_H;
|
||||
Z''<З}$BECOMES_H;
|
||||
|
||||
Y''<Й}$BECOMES_VOWEL;
|
||||
|
||||
# Main letters
|
||||
|
||||
A<>А;
|
||||
B<>Б;
|
||||
CH<>Ч;
|
||||
D<>Д;
|
||||
E<>Э;
|
||||
F<>Ф;
|
||||
G<>Г;
|
||||
Ì<>И;
|
||||
I<>Ы;
|
||||
KH<>Х;
|
||||
K<>К;
|
||||
L<>Л;
|
||||
M<>М;
|
||||
N<>Н;
|
||||
O<>О;
|
||||
P<>П;
|
||||
R<>Р;
|
||||
SHCH<>Щ;
|
||||
SH>Ш; #+ "SH<Ш;"
|
||||
Š<>Ш;
|
||||
S<>С;
|
||||
TS<>Ц;
|
||||
T<>Т;
|
||||
U<>У;
|
||||
V<>В;
|
||||
#ÌÀÈÒÙ
|
||||
YE>Е; #+ "YE<Е;"
|
||||
È<>Е;
|
||||
YO>Ё; #+ "YO<Ё;"
|
||||
Ò<>Ё;
|
||||
YU>Ю; #+ "YU<Ю;"
|
||||
Ù<>Ю;
|
||||
YA>Я; #+ "YA<Я;"
|
||||
À<>Я;
|
||||
Y<>Й;
|
||||
ZH<>Ж;
|
||||
Z<>З;
|
||||
|
||||
H<>Ъ;
|
||||
Ÿ<>Ь;
|
||||
|
||||
# Non-russian
|
||||
|
||||
J<>Ј;
|
||||
|
||||
# variant spellings in English
|
||||
|
||||
C}$csoft>С;
|
||||
C>К;
|
||||
|
||||
# #############################################
|
||||
# Duplicated Rules
|
||||
# Copy and lowercase the above rules
|
||||
# #############################################
|
||||
|
||||
# variant spellings in english
|
||||
|
||||
shtch>щ;
|
||||
tch>ч;
|
||||
th>з;
|
||||
q>к;
|
||||
wh>в;
|
||||
w>в;
|
||||
x>кс; #+ "x<кс;"
|
||||
|
||||
# separate letters that would otherwise join
|
||||
|
||||
sh''<ш}$becomes_c;
|
||||
t''<т}$becomes_s;
|
||||
t''<т}[чщ]; # LIU add special cases
|
||||
|
||||
k''<к}$becomes_h;
|
||||
s''<с}$becomes_h;
|
||||
t''<т}$becomes_h;
|
||||
z''<з}$becomes_h;
|
||||
|
||||
y''<й}$becomes_vowel;
|
||||
|
||||
# main letters
|
||||
|
||||
a<>а;
|
||||
b<>б;
|
||||
ch<>ч;
|
||||
d<>д;
|
||||
e<>э;
|
||||
f<>ф;
|
||||
g<>г;
|
||||
ì<>и;
|
||||
i<>ы;
|
||||
kh<>х;
|
||||
k<>к;
|
||||
l<>л;
|
||||
m<>м;
|
||||
n<>н;
|
||||
o<>о;
|
||||
p<>п;
|
||||
r<>р;
|
||||
shch<>щ;
|
||||
sh>ш; #+ "sh<ш;"
|
||||
š<>ш;
|
||||
s<>с;
|
||||
ts<>ц;
|
||||
t<>т;
|
||||
u<>у;
|
||||
v<>в;
|
||||
#ìàèòù
|
||||
ye>е; #+ "ye<е;"
|
||||
è<>е;
|
||||
yo>ё; #+ "yo<ё;"
|
||||
ò<>ё;
|
||||
yu>ю; #+ "yu<ю;"
|
||||
ù<>ю;
|
||||
ya>я; #+ "ya<я;"
|
||||
à<>я;
|
||||
y<>й;
|
||||
zh<>ж;
|
||||
z<>з;
|
||||
|
||||
h<>ъ;
|
||||
ÿ<>ь;
|
||||
|
||||
# non-russian
|
||||
|
||||
j<>ј;
|
||||
|
||||
# variant spellings in english
|
||||
|
||||
c}$csoft>с;
|
||||
c>к;
|
||||
|
||||
|
||||
# #############################################
|
||||
# End of Duplicated Rules
|
||||
# #############################################
|
||||
|
||||
#generally the last rule
|
||||
''>;
|
||||
|
||||
# eof
|
BIN
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Devanagari.utf8.txt
Executable file
BIN
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Devanagari.utf8.txt
Executable file
Binary file not shown.
375
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Greek.utf8.txt
Executable file
375
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Greek.utf8.txt
Executable file
|
@ -0,0 +1,375 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:18:45 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Latin-Greek
|
||||
|
||||
# ==============================================
|
||||
# Modern Greek Transliteration Rules
|
||||
#
|
||||
# This transliterates modern Greek characters, but using rules
|
||||
# that are traditional for Ancient Greek, and
|
||||
# thus more resemble Greek words that have become part
|
||||
# of English. It differs from the official Greek
|
||||
# transliteration, which is more phonetic (since
|
||||
# most modern Greek vowels, for example, have
|
||||
# degenerated simply to sound like "ee").
|
||||
#
|
||||
# There are only a few tricky parts.
|
||||
# 1. eta and omega don't map directly to Latin vowels,
|
||||
# so we use a macron on e and o, and some
|
||||
# other combinations if they are accented.
|
||||
# 2. The accented, diaeresis i and y are substituted too.
|
||||
# 3. Some letters use digraphs, like "ph". While typical,
|
||||
# they need some special handling.
|
||||
# 4. A gamma before a gamma or a few other letters is
|
||||
# transliterated as an "n", as in "Anglo"
|
||||
# 5. An ypsilon after a vowel is a "u", as in
|
||||
# "Mouseio". Otherwise it is a "y" as in "Physikon"
|
||||
# 6. The construction of the rules is made simpler by making sure
|
||||
# that most rules for lowercase letters exactly correspond to the
|
||||
# rules for uppercase letters, *except* for the case of the letters
|
||||
# in the rule itself. That way, after modifying the uppercase rules,
|
||||
# you can just copy, paste, and "set to lowercase" to get
|
||||
# the rules for lowercase letters!
|
||||
# ==============================================
|
||||
|
||||
# ==============================================
|
||||
# Variables, used to make the rules more comprehensible
|
||||
# and for conditionals.
|
||||
# ==============================================
|
||||
|
||||
### $quote='\"';
|
||||
|
||||
# Latin Letters
|
||||
|
||||
### $E_MACRON=Ē;
|
||||
### $e_macron=ē;
|
||||
### $O_MACRON=Ō;
|
||||
### $o_macron=ō;
|
||||
### $Y_UMLAUT=Ÿ;
|
||||
### $y_umlaut=ÿ;
|
||||
|
||||
#! // with real accents.
|
||||
#! + "$E_MACRON_ACUTE=Ḗ;"
|
||||
#! + "$e_macron_acute=ḗ;"
|
||||
#! + "$O_MACRON_ACUTE=Ṓ;"
|
||||
#! + "$o_macron_acute=ṓ;"
|
||||
#! + "$y_umlaut_acute=ÿ́;"
|
||||
#! + "$u00ef_acute=ḯ;"
|
||||
#! + "$u00fc_acute=ǘ;"
|
||||
#! //
|
||||
|
||||
# single letter equivalents
|
||||
|
||||
### $E_MACRON_ACUTE=Ê;
|
||||
### $e_macron_acute=ê;
|
||||
### $O_MACRON_ACUTE=Ô;
|
||||
### $o_macron_acute=ô;
|
||||
### $y_umlaut_acute=ŷ;
|
||||
### $u00ef_acute=î;
|
||||
### $u00fc_acute=û;
|
||||
|
||||
# Greek Letters
|
||||
|
||||
### $ALPHA=Α;
|
||||
### $BETA=Β;
|
||||
### $GAMMA=Γ;
|
||||
### $DELTA=Δ;
|
||||
### $EPSILON=Ε;
|
||||
### $ZETA=Ζ;
|
||||
### $ETA=Η;
|
||||
### $THETA=Θ;
|
||||
### $IOTA=Ι;
|
||||
### $KAPPA=Κ;
|
||||
### $LAMBDA=Λ;
|
||||
### $MU=Μ;
|
||||
### $NU=Ν;
|
||||
### $XI=Ξ;
|
||||
### $OMICRON=Ο;
|
||||
### $PI=Π;
|
||||
### $RHO=Ρ;
|
||||
### $SIGMA=Σ;
|
||||
### $TAU=Τ;
|
||||
### $YPSILON=Υ;
|
||||
### $PHI=Φ;
|
||||
### $CHI=Χ;
|
||||
### $PSI=Ψ;
|
||||
### $OMEGA=Ω;
|
||||
|
||||
### $ALPHA2=Ά;
|
||||
### $EPSILON2=Έ;
|
||||
### $ETA2=Ή;
|
||||
### $IOTA2=Ί;
|
||||
### $OMICRON2=Ό;
|
||||
### $YPSILON2=Ύ;
|
||||
### $OMEGA2=Ώ;
|
||||
### $IOTA_DIAERESIS=Ϊ;
|
||||
### $YPSILON_DIAERESIS=Ϋ;
|
||||
|
||||
### $alpha=α;
|
||||
### $beta=β;
|
||||
### $gamma=γ;
|
||||
### $delta=δ;
|
||||
### $epsilon=ε;
|
||||
### $zeta=ζ;
|
||||
### $eta=η;
|
||||
### $theta=θ;
|
||||
### $iota=ι;
|
||||
### $kappa=κ;
|
||||
### $lambda=λ;
|
||||
### $mu=μ;
|
||||
### $nu=ν;
|
||||
### $xi=ξ;
|
||||
### $omicron=ο;
|
||||
### $pi=π;
|
||||
### $rho=ρ;
|
||||
### $sigma=σ;
|
||||
### $tau=τ;
|
||||
### $ypsilon=υ;
|
||||
### $phi=φ;
|
||||
### $chi=χ;
|
||||
### $psi=ψ;
|
||||
### $omega=ω;
|
||||
|
||||
#forms
|
||||
|
||||
### $alpha2=ά;
|
||||
### $epsilon2=έ;
|
||||
### $eta2=ή;
|
||||
### $iota2=ί;
|
||||
### $omicron2=ό;
|
||||
### $ypsilon2=ύ;
|
||||
### $omega2=ώ;
|
||||
### $iota_diaeresis=ϊ;
|
||||
### $ypsilon_diaeresis=ϋ;
|
||||
### $iota_diaeresis2=ΐ;
|
||||
### $ypsilon_diaeresis2=ΰ;
|
||||
### $sigma2=ς;
|
||||
|
||||
# Variables for conditional mappings
|
||||
|
||||
# Use lowercase for all variable names, to allow cut/paste below.
|
||||
|
||||
$letter=[~[:Lu:][:Ll:]];
|
||||
$lower=[[:Ll:]];
|
||||
$softener=[eiyEIY];
|
||||
$vowel=[aeiouAEIOU \
|
||||
ΑΕΗΙΟΥΩ \
|
||||
ΆΈΉΊΌΎΏ \
|
||||
ΪΫ \
|
||||
αεηιουω \
|
||||
άέήίόύώ \
|
||||
ϊϋ \
|
||||
ΐΰ \
|
||||
];
|
||||
$n_gamma=[GKXCgkxc];
|
||||
$gamma_n=[ΓΚΧΞγκχξ];
|
||||
$pp=[Pp];
|
||||
|
||||
# ==============================================
|
||||
# Rules
|
||||
# ==============================================
|
||||
# The following are special titlecases, and should
|
||||
# not be copied when duplicating the lowercase
|
||||
# ==============================================
|
||||
|
||||
Th <> Θ}$lower;
|
||||
Ph <> Φ}$lower;
|
||||
Ch <> Χ}$lower;
|
||||
#masked: + "Ps<Φ}$lower;"
|
||||
|
||||
# Because there is no uppercase forms for final sigma,
|
||||
# we had to move all the sigma rules up here.
|
||||
|
||||
# Remember to insert ' to preserve round trip, for double letters
|
||||
# don't need to do this for the digraphs with h,
|
||||
# since it is not created when mapping back from greek
|
||||
|
||||
# use special form for s
|
||||
|
||||
''S <> $pp{Σ; # handle PS
|
||||
S <> Σ;
|
||||
|
||||
# The following are a bit tricky. 's' takes two forms in greek
|
||||
# final or non final.
|
||||
# We use ~s to represent the abnormal form: final before letter
|
||||
# or non-final before non-letter.
|
||||
# We use 's to separate p and s (otherwise ps is one letter)
|
||||
# so, we break out the following forms:
|
||||
|
||||
''s < $pp{σ}$letter;
|
||||
s < σ}$letter;
|
||||
'~'s < σ;
|
||||
|
||||
'~'s < ς}$letter;
|
||||
''s < $pp{ς;
|
||||
s < ς;
|
||||
|
||||
'~'s }$letter>ς;
|
||||
'~'s > σ;
|
||||
''s }$letter>σ;
|
||||
''s > ς;
|
||||
s }$letter>σ;
|
||||
s > ς;
|
||||
|
||||
# because there are no uppercase forms, had to move these up too.
|
||||
|
||||
i'\"''`'>ΐ;
|
||||
y'\"''`'>ΰ;
|
||||
|
||||
î<>ΐ;
|
||||
û<>$vowel{ΰ;
|
||||
ŷ<>ΰ;
|
||||
|
||||
# ==============================================
|
||||
# Uppercase Forms.
|
||||
# To make lowercase forms, just copy and lowercase below
|
||||
# ==============================================
|
||||
|
||||
# Typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
'A`'>Ά;
|
||||
'E`'>Έ;
|
||||
'EE`'>Ή;
|
||||
EE>Η;
|
||||
'I`'>Ί;
|
||||
'O`'>Ό;
|
||||
'OO`'>Ώ;
|
||||
OO>Ω;
|
||||
I'\"'>Ϊ;
|
||||
Y'\"'>Ϋ;
|
||||
|
||||
# Basic Letters
|
||||
|
||||
A<>Α;
|
||||
Á<>Ά;
|
||||
B<>Β;
|
||||
N }$n_gamma<>Γ}$gamma_n;
|
||||
G<>Γ;
|
||||
D<>Δ;
|
||||
''E <> [Ee]{Ε; # handle EE
|
||||
E<>Ε;
|
||||
É<>Έ;
|
||||
Z<>Ζ;
|
||||
Ê<>Ή;
|
||||
Ē<>Η;
|
||||
TH<>Θ;
|
||||
I<>Ι;
|
||||
Í<>Ί;
|
||||
Ï<>Ϊ;
|
||||
K<>Κ;
|
||||
L<>Λ;
|
||||
M<>Μ;
|
||||
N'' <> Ν}$gamma_n;
|
||||
N<>Ν;
|
||||
X<>Ξ;
|
||||
''O <> [Oo]{ Ο; # handle OO
|
||||
O<>Ο;
|
||||
Ó<>Ό;
|
||||
PH<>Φ; # needs ordering before P
|
||||
PS<>Ψ; # needs ordering before P
|
||||
P<>Π;
|
||||
R<>Ρ;
|
||||
T<>Τ;
|
||||
U <> $vowel{Υ;
|
||||
Ú <> $vowel{Ύ;
|
||||
Ü <> $vowel{Ϋ;
|
||||
Y<>Υ;
|
||||
Ý<>Ύ;
|
||||
Ÿ<>Ϋ;
|
||||
CH<>Χ;
|
||||
Ô<>Ώ;
|
||||
Ō<>Ω;
|
||||
|
||||
# Extra English Letters. Mapped for completeness
|
||||
|
||||
C}$softener>|S;
|
||||
C>|K;
|
||||
F>|PH;
|
||||
H>|CH;
|
||||
J>|I;
|
||||
Q>|K;
|
||||
V>|U;
|
||||
W>|U;
|
||||
|
||||
# ==============================================
|
||||
# Lowercase Forms. Just copy above and lowercase
|
||||
# ==============================================
|
||||
|
||||
# typing variants, in case the keyboard doesn't have accents
|
||||
|
||||
'a`'>ά;
|
||||
'e`'>έ;
|
||||
'ee`'>ή;
|
||||
ee>η;
|
||||
'i`'>ί;
|
||||
'o`'>ό;
|
||||
'oo`'>ώ;
|
||||
oo>ω;
|
||||
i'\"'>ϊ;
|
||||
y'\"'>ϋ;
|
||||
|
||||
# basic letters
|
||||
|
||||
a<>α;
|
||||
á<>ά;
|
||||
b<>β;
|
||||
n }$n_gamma<>γ}$gamma_n;
|
||||
g<>γ;
|
||||
d<>δ;
|
||||
''e <> [Ee]{ε; # handle EE
|
||||
e<>ε;
|
||||
é<>έ;
|
||||
z<>ζ;
|
||||
ê<>ή;
|
||||
ē<>η;
|
||||
th<>θ;
|
||||
i<>ι;
|
||||
í<>ί;
|
||||
ï<>ϊ;
|
||||
k<>κ;
|
||||
l<>λ;
|
||||
m<>μ;
|
||||
n'' <> ν}$gamma_n;
|
||||
n<>ν;
|
||||
x<>ξ;
|
||||
''o <> [Oo]{ ο; # handle OO
|
||||
o<>ο;
|
||||
ó<>ό;
|
||||
ph<>φ; # needs ordering before p
|
||||
ps<>ψ; # needs ordering before p
|
||||
p<>π;
|
||||
r<>ρ;
|
||||
t<>τ;
|
||||
u <> $vowel{υ;
|
||||
ú <> $vowel{ύ;
|
||||
ü <> $vowel{ϋ;
|
||||
y<>υ;
|
||||
ý<>ύ;
|
||||
ÿ<>ϋ;
|
||||
ch<>χ;
|
||||
ô<>ώ;
|
||||
ō<>ω;
|
||||
|
||||
# extra english letters. mapped for completeness
|
||||
|
||||
c}$softener>|s;
|
||||
c>|k;
|
||||
f>|ph;
|
||||
h>|ch;
|
||||
j>|i;
|
||||
q>|k;
|
||||
v>|u;
|
||||
w>|u;
|
||||
|
||||
# ====================================
|
||||
# Normal final rule: remove '
|
||||
# ====================================
|
||||
|
||||
#+ "''>;"
|
||||
|
||||
# eof
|
216
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Hebrew.utf8.txt
Executable file
216
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Hebrew.utf8.txt
Executable file
|
@ -0,0 +1,216 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:06 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Latin-Hebrew
|
||||
|
||||
|
||||
# Variable names, derived from the Unicode names.
|
||||
### $POINT_SHEVA=ְ;
|
||||
### $POINT_HATAF_SEGOL=ֱ;
|
||||
### $POINT_HATAF_PATAH=ֲ;
|
||||
### $POINT_HATAF_QAMATS=ֳ;
|
||||
### $POINT_HIRIQ=ִ;
|
||||
### $POINT_TSERE=ֵ;
|
||||
### $POINT_SEGOL=ֶ;
|
||||
### $POINT_PATAH=ַ;
|
||||
### $POINT_QAMATS=ָ;
|
||||
### $POINT_HOLAM=ֹ;
|
||||
### $POINT_QUBUTS=ֻ;
|
||||
### $POINT_DAGESH_OR_MAPIQ=ּ;
|
||||
### $POINT_METEG=ֽ;
|
||||
### $PUNCTUATION_MAQAF=־;
|
||||
### $POINT_RAFE=ֿ;
|
||||
### $PUNCTUATION_PASEQ=׀;
|
||||
### $POINT_SHIN_DOT=ׁ;
|
||||
### $POINT_SIN_DOT=ׂ;
|
||||
### $PUNCTUATION_SOF_PASUQ=׃;
|
||||
### $ALEF=א;
|
||||
### $BET=ב;
|
||||
### $GIMEL=ג;
|
||||
### $DALET=ד;
|
||||
### $HE=ה;
|
||||
### $VAV=ו;
|
||||
### $ZAYIN=ז;
|
||||
### $HET=ח;
|
||||
### $TET=ט;
|
||||
### $YOD=י;
|
||||
### $FINAL_KAF=ך;
|
||||
### $KAF=כ;
|
||||
### $LAMED=ל;
|
||||
### $FINAL_MEM=ם;
|
||||
### $MEM=מ;
|
||||
### $FINAL_NUN=ן;
|
||||
### $NUN=נ;
|
||||
### $SAMEKH=ס;
|
||||
### $AYIN=ע;
|
||||
### $FINAL_PE=ף;
|
||||
### $PE=פ;
|
||||
### $FINAL_TSADI=ץ;
|
||||
### $TSADI=צ;
|
||||
### $QOF=ק;
|
||||
### $RESH=ר;
|
||||
### $SHIN=ש;
|
||||
### $TAV=ת;
|
||||
### $YIDDISH_DOUBLE_VAV=װ;
|
||||
### $YIDDISH_VAV_YOD=ױ;
|
||||
### $YIDDISH_DOUBLE_YOD=ײ;
|
||||
### $PUNCTUATION_GERESH=׳;
|
||||
### $PUNCTUATION_GERSHAYIM=״;
|
||||
|
||||
$letter=[a-zA-Z];
|
||||
$softvowel=[eiyEIY];
|
||||
$vowellike=[אעיו];
|
||||
$hebrew=[-]; # the whole block -liu
|
||||
|
||||
# [Why is this a special case? -liu]
|
||||
k''h <> כ ה ;
|
||||
|
||||
# Mark non-final forms in final position as x~ -liu
|
||||
k < כ } $hebrew ;
|
||||
m < מ } $hebrew ;
|
||||
n < נ } $hebrew ;
|
||||
p < פ } $hebrew ;
|
||||
ts < צ } $hebrew ;
|
||||
k'~' <> כ ;
|
||||
m'~' <> מ ;
|
||||
n'~' <> נ ;
|
||||
p'~' <> פ ;
|
||||
ts'~'<> צ ;
|
||||
|
||||
# Mark final forms in non-final position as x^ -liu
|
||||
k'^' <> ך } $hebrew ;
|
||||
m'^' <> ם } $hebrew ;
|
||||
n'^' <> ן } $hebrew ;
|
||||
p'^' <> ף } $hebrew ;
|
||||
ts'^'<> ץ } $hebrew ;
|
||||
k < ך;
|
||||
m < ם;
|
||||
n < ן;
|
||||
p < ף;
|
||||
ts < ץ;
|
||||
|
||||
# Main rules
|
||||
a<>א;
|
||||
A>א;
|
||||
|
||||
b<>ב;
|
||||
B>ב;
|
||||
|
||||
c}$softvowel>ס;
|
||||
C}$softvowel>ס;
|
||||
c}$letter>כ;
|
||||
C}$letter>כ;
|
||||
c>ך;
|
||||
C>ך;
|
||||
|
||||
d<>ד;
|
||||
D>ד;
|
||||
|
||||
e<>ע;
|
||||
E>ע;
|
||||
|
||||
f}$letter>פ;
|
||||
f>ף;
|
||||
F}$letter>פ;
|
||||
F>ף;
|
||||
|
||||
g<>ג;
|
||||
G>ג;
|
||||
|
||||
h<>ה;
|
||||
H>ה;
|
||||
|
||||
i>י;
|
||||
I>י;
|
||||
|
||||
j>דש;
|
||||
J>דש;
|
||||
|
||||
kh<>ח;
|
||||
kH>ח;
|
||||
Kh>ח;
|
||||
KH>ח;
|
||||
k}$letter>כ;
|
||||
K}$letter>כ;
|
||||
k>ך;
|
||||
K>ך;
|
||||
|
||||
l<>ל;
|
||||
L>ל;
|
||||
|
||||
m}$letter>מ;
|
||||
m>ם;
|
||||
M}$letter>מ;
|
||||
M>ם;
|
||||
|
||||
n}$letter>נ;
|
||||
n>ן;
|
||||
N}$letter>נ;
|
||||
N>ן;
|
||||
|
||||
o>ו;
|
||||
O>ו;
|
||||
|
||||
p}$letter>פ;
|
||||
p>ף;
|
||||
P}$letter>פ;
|
||||
P>ף;
|
||||
|
||||
q<>ק;
|
||||
Q>ק;
|
||||
|
||||
r<>ר;
|
||||
R>ר;
|
||||
|
||||
sh<>ש;
|
||||
sH>ש;
|
||||
Sh>ש;
|
||||
SH>ש;
|
||||
s''<ס}ה;
|
||||
s<>ס;
|
||||
S>ס;
|
||||
|
||||
th<>ת;
|
||||
tH>ת;
|
||||
Th>ת;
|
||||
TH>ת;
|
||||
tS}$letter>צ;
|
||||
ts}$letter>צ;
|
||||
Ts}$letter>צ;
|
||||
TS}$letter>צ;
|
||||
tS>ץ;
|
||||
ts>ץ;
|
||||
Ts>ץ;
|
||||
TS>ץ;
|
||||
t''<ט}[ה ס ש];
|
||||
t<>ט;
|
||||
T>ט;
|
||||
|
||||
v<ו}$vowellike;
|
||||
u<>ו;
|
||||
U>ו;
|
||||
|
||||
v>ו;
|
||||
V>ו;
|
||||
|
||||
w>ו;
|
||||
W>ו;
|
||||
|
||||
x>כס;
|
||||
X>כס;
|
||||
|
||||
y<>י;
|
||||
Y>י;
|
||||
|
||||
z<>ז;
|
||||
Z>ז;
|
||||
|
||||
# Delete stray apostrophes
|
||||
''>;
|
||||
<'';
|
||||
|
||||
# eof
|
742
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt
Executable file
742
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt
Executable file
|
@ -0,0 +1,742 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:18:45 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Latin-Jamo
|
||||
|
||||
|
||||
# VARIABLES
|
||||
|
||||
$initial=[ᄀ-ᅟ];
|
||||
$INITIAL=[bcdghjklmnpst];
|
||||
$medial=[ᅠ-ᆧ];
|
||||
$MEDIAL=[aeiou]; # as a left context
|
||||
$comp_med=[ᅠᅶ-ᆧ]; # compound medials and filler
|
||||
$final=[ᆨ-ᇹ]; # added - aliu
|
||||
$vowel=[aeiouwy$medial];
|
||||
# following line used to read "..$medial$final]"
|
||||
# assume this was a typo - liu
|
||||
$consonant=[bcdfghjklmnpqrstvxz$initial$final];
|
||||
$ye_=[yeYE];
|
||||
$ywe_=[yweYWE];
|
||||
$yw_=[ywYW];
|
||||
$nl_=[nlNL];
|
||||
$gnl_=[gnlGNL];
|
||||
$lsgb_=[lsgbLSGB];
|
||||
$ywao_=[ywaoYWAO];
|
||||
$bl_=[blBL];
|
||||
|
||||
### $ieung = ᄋ;
|
||||
|
||||
# RULES
|
||||
|
||||
# Hangul structure is IMF or IM
|
||||
# So you can have, because of adjacent sequences
|
||||
# IM, but not II or IF
|
||||
# MF or MI, but not MM
|
||||
# FI, but not FF or FM
|
||||
|
||||
# For English, we just have C or V.
|
||||
# To generate valid Hangul:
|
||||
# Vowels:
|
||||
# We insert IEUNG between VV, and otherwise map V to M
|
||||
# We also insert IEUNG if there is no
|
||||
# Consonants:
|
||||
# We don't break doubles
|
||||
# Cases like lmgg, we have to break at lm
|
||||
# So to guess whether a consonant is I or F
|
||||
# we map all C's to F, except when followed by a vowel, e.g.
|
||||
# X[{vowel}>CHOSEONG (initial)
|
||||
# X>JONGSEONG (final)
|
||||
|
||||
# Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
|
||||
|
||||
|
||||
# General strategy.
|
||||
#
|
||||
# 1. We support both the normal Jamo block, 1100 - 117F, and the
|
||||
# compatibility block, 3130 - 318F. The former uses lowercase latin;
|
||||
# the latter uses uppercase. See notes below for details of the
|
||||
# compatibility block. Remaining items in this list pertain to the
|
||||
# normal Jamo block.
|
||||
#
|
||||
# 2. Canonical syllables should transliterate without special
|
||||
# characters. Canonical syllables are either IMF or IM.
|
||||
#
|
||||
# 3. We want to support round-trip integrity from jamo to latin and back
|
||||
# to Jamo. To do this we have to mark the jamo with special characters
|
||||
# when they occur in non-canonical positions.
|
||||
#
|
||||
# 4. When initial jamo occur in a non-canonical position, they are
|
||||
# marked with a leading '['.
|
||||
#
|
||||
# 5. When final jamo occur in a non-canonical position, they are marked
|
||||
# with a trailing ']'.
|
||||
#
|
||||
# 6. When medial jamo occur in a non-canonical position, they are marked
|
||||
# with a leading '~'.
|
||||
#
|
||||
# 7. Compound jamo characters are handled by enclosing them in
|
||||
# parentheses. Initials are '((x)', medials are '(x)', and finals are
|
||||
# '(x))'.
|
||||
#
|
||||
# 8. Disambiguation of 'g' + 'g' vs. 'gg' is accomplished by inserting a
|
||||
# '' character between them.
|
||||
#
|
||||
# 9. IEUNG is used to mark medials not occuring after initials.
|
||||
# Isolated IEUNG is transliterated as a back tick.
|
||||
#
|
||||
# 10. Some old special case and completeness rules have been commented
|
||||
# out. These can be reintroduced (and the existing rules modified as
|
||||
# needed) so long as round-trip integrity is maintained.
|
||||
|
||||
# We use the uppercase latin letters for the compatibility Jamo
|
||||
# U+3130 - U+318F. The following rules are generated
|
||||
# programmatically by a perl script that analyzes the Unicode
|
||||
# database. These rules are much simpler because there are no
|
||||
# separate code points for initial vs. final consonants, so no
|
||||
# contextual rules are needed. The one wrinkle is, as usual, the
|
||||
# need to distinguish doubles from two singles, that is, GG vs G G.
|
||||
# The perl script finds these special cases by exhaustive search and
|
||||
# adds only the minimal rules needed to resolve these cases. The one
|
||||
# modification that is made by hand is to replace '' with '/' so as
|
||||
# not to conflict with the normal IEUNG in the standard Jamo range. -
|
||||
# liu
|
||||
A '' <> {ㅏ} [ㅓㅡㅔ];
|
||||
B '' <> {ㅂ} [ㅂㅃ];
|
||||
D '' <> {ㄷ} [ㄷㄸ];
|
||||
E '' <> {ㅔ} [ㅚㅗㅜ];
|
||||
G '' <> {ㄱ} [ㄲㄳㄱㅆㅅ];
|
||||
J '' <> {ㅈ} [ㅉㅈ];
|
||||
L '' <> {ㄹ} [ㄲㄳㄱㅁㅂㅃㅆㅅㅌㅍ];
|
||||
N '' <> {ㄴ} [ㅉㅈㅎ];
|
||||
O '' <> {ㅗ} [ㅓㅡㅔ];
|
||||
S '' <> {ㅅ} [ㅆㅅ];
|
||||
WA '' <> {ㅘ} [ㅓㅡㅔ];
|
||||
WE '' <> {ㅞ} [ㅚㅗ];
|
||||
YA '' <> {ㅑ} [ㅓㅡㅔ];
|
||||
YE '' <> {ㅖ} [ㅚㅗ];
|
||||
YU <> ㅠ;
|
||||
YO <> ㅛ;
|
||||
YI <> ㅢ;
|
||||
YEO <> ㅕ;
|
||||
YE <> ㅖ;
|
||||
YAE <> ㅒ;
|
||||
YA <> ㅑ;
|
||||
WI <> ㅟ;
|
||||
WEO <> ㅝ;
|
||||
WE <> ㅞ;
|
||||
WAE <> ㅙ;
|
||||
WA <> ㅘ;
|
||||
U <> ㅜ;
|
||||
T <> ㅌ;
|
||||
S S <> ㅆ;
|
||||
S <> ㅅ;
|
||||
P <> ㅍ;
|
||||
OE <> ㅚ;
|
||||
O <> ㅗ;
|
||||
N J <> ㄵ;
|
||||
N H <> ㄶ;
|
||||
N <> ㄴ;
|
||||
M <> ㅁ;
|
||||
L T <> ㄾ;
|
||||
L S <> ㄽ;
|
||||
L P <> ㄿ;
|
||||
L M <> ㄻ;
|
||||
L G <> ㄺ;
|
||||
L B <> ㄼ;
|
||||
L <> ㄹ;
|
||||
K <> ㅋ;
|
||||
J J <> ㅉ;
|
||||
J <> ㅈ;
|
||||
I <> ㅣ;
|
||||
H <> ㅎ;
|
||||
G S <> ㄳ;
|
||||
G G <> ㄲ;
|
||||
G <> ㄱ;
|
||||
EU <> ㅡ;
|
||||
EO <> ㅓ;
|
||||
E <> ㅔ;
|
||||
D D <> ㄸ;
|
||||
D <> ㄷ;
|
||||
C <> ㅊ;
|
||||
B B <> ㅃ;
|
||||
B <> ㅂ;
|
||||
AE <> ㅐ;
|
||||
A <> ㅏ;
|
||||
'/' <> ㅇ;
|
||||
'(' YU YEO ')' <> ㆊ;
|
||||
'(' YU YE ')' <> ㆋ;
|
||||
'(' YU I ')' <> ㆌ;
|
||||
'(' YR ')' <> ㆆ;
|
||||
'(' YO YAE ')' <> ㆈ;
|
||||
'(' YO YA ')' <> ㆇ;
|
||||
'(' YO I ')' <> ㆉ;
|
||||
'(' YES S ')' <> ㆂ;
|
||||
'(' YES PAN ')' <> ㆃ;
|
||||
'(' YES ')' <> ㆁ;
|
||||
'(' S N ')' <> ㅻ;
|
||||
'(' S J ')' <> ㅾ;
|
||||
'(' S G ')' <> ㅺ;
|
||||
'(' S D ')' <> ㅼ;
|
||||
'(' S B ')' <> ㅽ;
|
||||
'(' PAN ')' <> ㅿ;
|
||||
'(' P '' ')' <> ㆄ;
|
||||
'(' N S ')' <> ㅧ;
|
||||
'(' N PAN ')' <> ㅨ;
|
||||
'(' N N ')' <> ㅥ;
|
||||
'(' N D ')' <> ㅦ;
|
||||
'(' M S ')' <> ㅯ;
|
||||
'(' M PAN ')' <> ㅰ;
|
||||
'(' M B ')' <> ㅮ;
|
||||
'(' M '' ')' <> ㅱ;
|
||||
'(' L YR ')' <> ㅭ;
|
||||
'(' L PAN ')' <> ㅬ;
|
||||
'(' L H ')' <> ㅀ;
|
||||
'(' L G S ')' <> ㅩ;
|
||||
'(' L D ')' <> ㅪ;
|
||||
'(' L B S ')' <> ㅫ;
|
||||
'(' HJF ')' <> ㅤ;
|
||||
'(' H H ')' <> ㆅ;
|
||||
'(' B T ')' <> ㅷ;
|
||||
'(' B S G ')' <> ㅴ;
|
||||
'(' B S D ')' <> ㅵ;
|
||||
'(' B S ')' <> ㅄ;
|
||||
'(' B J ')' <> ㅶ;
|
||||
'(' B G ')' <> ㅲ;
|
||||
'(' B D ')' <> ㅳ;
|
||||
'(' B B '' ')' <> ㅹ;
|
||||
'(' B '' ')' <> ㅸ;
|
||||
'(' AR I ')' <> ㆎ;
|
||||
'(' AR ')' <> ㆍ;
|
||||
'(' '' '' ')' <> ㆀ;
|
||||
|
||||
# APOSTROPHE
|
||||
|
||||
# As always, an apostrophe is used to separate digraphs into
|
||||
# singles. That is, if you really wanted [KAN][GGAN], instead
|
||||
# of [KANG][GAN] you would write "kan'ggan".
|
||||
|
||||
# Rules for inserting ' when mapping separated digraphs back
|
||||
# from Hangul to Latin. Catch every letter that can be the
|
||||
# LAST of a digraph (or multigraph) AND first of an initial
|
||||
|
||||
# special insertion for funny sequences of vowels, and for empty consonant
|
||||
|
||||
# + "'' < l{ }ᇀ;" // hangul jongseong thieuth
|
||||
# + "'' < $lsgb_{}ᆺ;" // hangul jongseong sios
|
||||
# + "'' < l{ }ᇁ;" // hangul jongseong phieuph
|
||||
# + "'' < l{ }ᆷ;" // hangul jongseong mieum
|
||||
# + "'' < n{ }ᆽ;" // hangul jongseong cieuc
|
||||
# + "'' < $nl_{}ᇂ;" // hangul jongseong hieuh
|
||||
# + "'' < $gnl_{}ᆩ;" // hangul jongseong ssangkiyeok
|
||||
# + "'' < $bl_{}ᆸ;" // hangul jongseong pieup
|
||||
# + "'' < d{ }ᆮ;" // hangul jongseong tikeut
|
||||
#
|
||||
# + "'' < $ye_{}ᅮ;" // hangul jungseong u
|
||||
# + "'' < $ywe_{}ᅩ;" // hangul jungseong o
|
||||
# + "'' < $yw_{}ᅵ;" // hangul jungseong i
|
||||
# + "'' < $ywao_{}ᅦ;" // hangul jungseong e
|
||||
# + "'' < $yw_{}ᅡ;" // hangul jungseong a
|
||||
#
|
||||
# + "'' < l{ }ᄐ;" // hangul choseong thieuth
|
||||
# + "'' < $lsgb_{}ᄊ;" // hangul choseong ssangsios
|
||||
# + "'' < $lsgb_{}ᄉ;" // hangul choseong sios
|
||||
# + "'' < l{ }ᄑ;" // hangul choseong phieuph
|
||||
# + "'' < l{ }ᄆ;" // hangul choseong mieum
|
||||
# + "'' < n{ }ᄌ;" // hangul choseong cieuc
|
||||
# + "'' < n{ }ᄍ;"
|
||||
# + "'' < $nl_{}ᄒ;" // hangul choseong hieuh
|
||||
# + "'' < $gnl_{}ᄁ;" // hangul choseong ssangkiyeok
|
||||
# + "'' < $gnl_{}ᄀ;" // hangul choseong kiyeok
|
||||
# + "'' < d{ }ᄃ;" // hangul choseong tikeut
|
||||
# + "'' < d{ }ᄄ;"
|
||||
# + "'' < $bl_{}ᄇ;" // hangul choseong pieup
|
||||
# + "'' < $bl_{}ᄈ;"
|
||||
|
||||
# We transliterate the compound Jamo code points using ((x) for
|
||||
# initials, (x) for medials, and (x)) for finals. - liu
|
||||
'((' n g ')' <> ᄓ;
|
||||
'((' n n ')' <> ᄔ;
|
||||
'((' n d ')' <> ᄕ;
|
||||
'((' n b ')' <> ᄖ;
|
||||
'((' d g ')' <> ᄗ;
|
||||
'((' l n ')' <> ᄘ;
|
||||
'((' l l ')' <> ᄙ;
|
||||
'((' l h ')' <> ᄚ;
|
||||
'((' l '' ')' <> ᄛ;
|
||||
'((' m b ')' <> ᄜ;
|
||||
'((' m '' ')' <> ᄝ;
|
||||
'((' b g ')' <> ᄞ;
|
||||
'((' b n ')' <> ᄟ;
|
||||
'((' b d ')' <> ᄠ;
|
||||
'((' b s ')' <> ᄡ;
|
||||
'((' b s g ')' <> ᄢ;
|
||||
'((' b s d ')' <> ᄣ;
|
||||
'((' b s b ')' <> ᄤ;
|
||||
'((' b s s ')' <> ᄥ;
|
||||
'((' b s j ')' <> ᄦ;
|
||||
'((' b j ')' <> ᄧ;
|
||||
'((' b c ')' <> ᄨ;
|
||||
'((' b t ')' <> ᄩ;
|
||||
'((' b p ')' <> ᄪ;
|
||||
'((' b '' ')' <> ᄫ;
|
||||
'((' b b '' ')' <> ᄬ;
|
||||
'((' s g ')' <> ᄭ;
|
||||
'((' s n ')' <> ᄮ;
|
||||
'((' s d ')' <> ᄯ;
|
||||
'((' s l ')' <> ᄰ;
|
||||
'((' s m ')' <> ᄱ;
|
||||
'((' s b ')' <> ᄲ;
|
||||
'((' s b g ')' <> ᄳ;
|
||||
'((' s s s ')' <> ᄴ;
|
||||
'((' s '' ')' <> ᄵ;
|
||||
'((' s j ')' <> ᄶ;
|
||||
'((' s c ')' <> ᄷ;
|
||||
'((' s k ')' <> ᄸ;
|
||||
'((' s t ')' <> ᄹ;
|
||||
'((' s p ')' <> ᄺ;
|
||||
'((' s h ')' <> ᄻ;
|
||||
'((' chs ')' <> ᄼ;
|
||||
'((' chs chs ')' <> ᄽ;
|
||||
'((' ces ')' <> ᄾ;
|
||||
'((' ces ces ')' <> ᄿ;
|
||||
'((' pan ')' <> ᅀ;
|
||||
'((' '' g ')' <> ᅁ;
|
||||
'((' '' d ')' <> ᅂ;
|
||||
'((' '' m ')' <> ᅃ;
|
||||
'((' '' b ')' <> ᅄ;
|
||||
'((' '' s ')' <> ᅅ;
|
||||
'((' '' pan ')' <> ᅆ;
|
||||
'((' '' '' ')' <> ᅇ;
|
||||
'((' '' j ')' <> ᅈ;
|
||||
'((' '' c ')' <> ᅉ;
|
||||
'((' '' t ')' <> ᅊ;
|
||||
'((' '' p ')' <> ᅋ;
|
||||
'((' yes ')' <> ᅌ;
|
||||
'((' j '' ')' <> ᅍ;
|
||||
'((' chc ')' <> ᅎ;
|
||||
'((' chc chc ')' <> ᅏ;
|
||||
'((' cec ')' <> ᅐ;
|
||||
'((' cec cec ')' <> ᅑ;
|
||||
'((' c k ')' <> ᅒ;
|
||||
'((' c h ')' <> ᅓ;
|
||||
'((' cch ')' <> ᅔ;
|
||||
'((' ceh ')' <> ᅕ;
|
||||
'((' p b ')' <> ᅖ;
|
||||
'((' p '' ')' <> ᅗ;
|
||||
'((' h h ')' <> ᅘ;
|
||||
'((' yr ')' <> ᅙ;
|
||||
'((' hcf ')' <> ᅟ;
|
||||
'(' ahjf ')' <> ᅠ; # must start with vowel, hence 'a' + hjf
|
||||
'(' a o ')' <> ᅶ;
|
||||
'(' a u ')' <> ᅷ;
|
||||
'(' ya o ')' <> ᅸ;
|
||||
'(' ya yo ')' <> ᅹ;
|
||||
'(' eo o ')' <> ᅺ;
|
||||
'(' eo u ')' <> ᅻ;
|
||||
'(' eo eu ')' <> ᅼ;
|
||||
'(' yeo o ')' <> ᅽ;
|
||||
'(' yeo u ')' <> ᅾ;
|
||||
'(' o eo ')' <> ᅿ;
|
||||
'(' o e ')' <> ᆀ;
|
||||
'(' o ye ')' <> ᆁ;
|
||||
'(' o o ')' <> ᆂ;
|
||||
'(' o u ')' <> ᆃ;
|
||||
'(' yo ya ')' <> ᆄ;
|
||||
'(' yo yae ')' <> ᆅ;
|
||||
'(' yo yeo ')' <> ᆆ;
|
||||
'(' yo o ')' <> ᆇ;
|
||||
'(' yo i ')' <> ᆈ;
|
||||
'(' u a ')' <> ᆉ;
|
||||
'(' u ae ')' <> ᆊ;
|
||||
'(' u eo eu ')' <> ᆋ;
|
||||
'(' u ye ')' <> ᆌ;
|
||||
'(' u u ')' <> ᆍ;
|
||||
'(' yu a ')' <> ᆎ;
|
||||
'(' yu eo ')' <> ᆏ;
|
||||
'(' yu e ')' <> ᆐ;
|
||||
'(' yu yeo ')' <> ᆑ;
|
||||
'(' yu ye ')' <> ᆒ;
|
||||
'(' yu u ')' <> ᆓ;
|
||||
'(' yu i ')' <> ᆔ;
|
||||
'(' eu u ')' <> ᆕ;
|
||||
'(' eu eu ')' <> ᆖ;
|
||||
'(' yi u ')' <> ᆗ;
|
||||
'(' i a ')' <> ᆘ;
|
||||
'(' i ya ')' <> ᆙ;
|
||||
'(' i o ')' <> ᆚ;
|
||||
'(' i u ')' <> ᆛ;
|
||||
'(' i eu ')' <> ᆜ;
|
||||
'(' i ar ')' <> ᆝ;
|
||||
'(' ar ')' <> ᆞ;
|
||||
'(' ar eo ')' <> ᆟ;
|
||||
'(' ar u ')' <> ᆠ;
|
||||
'(' ar i ')' <> ᆡ;
|
||||
'(' ar ar ')' <> ᆢ;
|
||||
'(' g l '))' <> ᇃ;
|
||||
'(' g s g '))' <> ᇄ;
|
||||
'(' n g '))' <> ᇅ;
|
||||
'(' n d '))' <> ᇆ;
|
||||
'(' n s '))' <> ᇇ;
|
||||
'(' n pan '))' <> ᇈ;
|
||||
'(' n t '))' <> ᇉ;
|
||||
'(' d g '))' <> ᇊ;
|
||||
'(' d l '))' <> ᇋ;
|
||||
'(' l g s '))' <> ᇌ;
|
||||
'(' l n '))' <> ᇍ;
|
||||
'(' l d '))' <> ᇎ;
|
||||
'(' l d h '))' <> ᇏ;
|
||||
'(' l l '))' <> ᇐ;
|
||||
'(' l m g '))' <> ᇑ;
|
||||
'(' l m s '))' <> ᇒ;
|
||||
'(' l b s '))' <> ᇓ;
|
||||
'(' l b h '))' <> ᇔ;
|
||||
'(' l b ng '))' <> ᇕ;
|
||||
'(' l s s '))' <> ᇖ;
|
||||
'(' l pan '))' <> ᇗ;
|
||||
'(' l k '))' <> ᇘ;
|
||||
'(' l yr '))' <> ᇙ;
|
||||
'(' m g '))' <> ᇚ;
|
||||
'(' m l '))' <> ᇛ;
|
||||
'(' m b '))' <> ᇜ;
|
||||
'(' m s '))' <> ᇝ;
|
||||
'(' m s s '))' <> ᇞ;
|
||||
'(' m pan '))' <> ᇟ;
|
||||
'(' m c '))' <> ᇠ;
|
||||
'(' m h '))' <> ᇡ;
|
||||
'(' m ng '))' <> ᇢ;
|
||||
'(' b l '))' <> ᇣ;
|
||||
'(' b p '))' <> ᇤ;
|
||||
'(' b h '))' <> ᇥ;
|
||||
'(' b ng '))' <> ᇦ;
|
||||
'(' s g '))' <> ᇧ;
|
||||
'(' s d '))' <> ᇨ;
|
||||
'(' s l '))' <> ᇩ;
|
||||
'(' s b '))' <> ᇪ;
|
||||
'(' pan '))' <> ᇫ;
|
||||
'(' ng g '))' <> ᇬ;
|
||||
'(' ng g g '))' <> ᇭ;
|
||||
'(' ng ng '))' <> ᇮ;
|
||||
'(' ng k '))' <> ᇯ;
|
||||
'(' yes '))' <> ᇰ;
|
||||
'(' yes s '))' <> ᇱ;
|
||||
'(' yes pan '))' <> ᇲ;
|
||||
'(' p b '))' <> ᇳ;
|
||||
'(' p ng '))' <> ᇴ;
|
||||
'(' h n '))' <> ᇵ;
|
||||
'(' h l '))' <> ᇶ;
|
||||
'(' h m '))' <> ᇷ;
|
||||
'(' h b '))' <> ᇸ;
|
||||
'(' yr '))' <> ᇹ;
|
||||
|
||||
|
||||
# INITIALS
|
||||
|
||||
# Added }$vowel post context - liu
|
||||
bb}$vowel<>ᄈ } $vowel;
|
||||
jj}$vowel<>ᄍ } $vowel;
|
||||
dd}$vowel<>ᄄ } $vowel;
|
||||
t }$vowel<>ᄐ } $vowel; # hangul choseong thieuth
|
||||
ss}$vowel<>ᄊ } $vowel; # hangul choseong ssangsios
|
||||
s }$vowel<>ᄉ } $vowel; # hangul choseong sios
|
||||
p }$vowel<>ᄑ } $vowel; # hangul choseong phieuph
|
||||
n }$vowel<>ᄂ } $vowel; # hangul choseong nieun
|
||||
m }$vowel<>ᄆ } $vowel; # hangul choseong mieum
|
||||
l }$vowel<>ᄅ } $vowel; # hangul choseong rieul
|
||||
k }$vowel<>ᄏ } $vowel; # hangul choseong khieukh
|
||||
j }$vowel<>ᄌ } $vowel; # hangul choseong cieuc
|
||||
h }$vowel<>ᄒ } $vowel; # hangul choseong hieuh
|
||||
gg}$vowel<>ᄁ } $vowel; # hangul choseong ssangkiyeok
|
||||
g }$vowel<>ᄀ } $vowel; # hangul choseong kiyeok
|
||||
d }$vowel<>ᄃ } $vowel; # hangul choseong tikeut
|
||||
c }$vowel<>ᄎ } $vowel; # hangul choseong chieuch
|
||||
b }$vowel<>ᄇ } $vowel; # hangul choseong pieup
|
||||
|
||||
# Take care of initial-compound medial - '(' $vowel - liu
|
||||
bb} '(' $vowel <> ᄈ } $comp_med;
|
||||
jj} '(' $vowel <> ᄍ } $comp_med;
|
||||
dd} '(' $vowel <> ᄄ } $comp_med;
|
||||
t } '(' $vowel <> ᄐ } $comp_med; # hangul choseong thieuth
|
||||
ss} '(' $vowel <> ᄊ } $comp_med; # hangul choseong ssangsios
|
||||
s } '(' $vowel <> ᄉ } $comp_med; # hangul choseong sios
|
||||
p } '(' $vowel <> ᄑ } $comp_med; # hangul choseong phieuph
|
||||
n } '(' $vowel <> ᄂ } $comp_med; # hangul choseong nieun
|
||||
m } '(' $vowel <> ᄆ } $comp_med; # hangul choseong mieum
|
||||
l } '(' $vowel <> ᄅ } $comp_med; # hangul choseong rieul
|
||||
k } '(' $vowel <> ᄏ } $comp_med; # hangul choseong khieukh
|
||||
j } '(' $vowel <> ᄌ } $comp_med; # hangul choseong cieuc
|
||||
h } '(' $vowel <> ᄒ } $comp_med; # hangul choseong hieuh
|
||||
gg} '(' $vowel <> ᄁ } $comp_med; # hangul choseong ssangkiyeok
|
||||
g } '(' $vowel <> ᄀ } $comp_med; # hangul choseong kiyeok
|
||||
d } '(' $vowel <> ᄃ } $comp_med; # hangul choseong tikeut
|
||||
c } '(' $vowel <> ᄎ } $comp_med; # hangul choseong chieuch
|
||||
b } '(' $vowel <> ᄇ } $comp_med; # hangul choseong pieup
|
||||
|
||||
# Mark non-canonical initials with '[' - liu
|
||||
'[' bb <> ᄈ;
|
||||
'[' jj <> ᄍ;
|
||||
'[' dd <> ᄄ;
|
||||
'[' t <> ᄐ; # hangul choseong thieuth
|
||||
'[' ss <> ᄊ; # hangul choseong ssangsios
|
||||
'[' s <> ᄉ; # hangul choseong sios
|
||||
'[' p <> ᄑ; # hangul choseong phieuph
|
||||
'[' n <> ᄂ; # hangul choseong nieun
|
||||
'[' m <> ᄆ; # hangul choseong mieum
|
||||
'[' l <> ᄅ; # hangul choseong rieul
|
||||
'[' k <> ᄏ; # hangul choseong khieukh
|
||||
'[' j <> ᄌ; # hangul choseong cieuc
|
||||
'[' h <> ᄒ; # hangul choseong hieuh
|
||||
'[' gg <> ᄁ; # hangul choseong ssangkiyeok
|
||||
'[' g <> ᄀ; # hangul choseong kiyeok
|
||||
'[' d <> ᄃ; # hangul choseong tikeut
|
||||
'[' c <> ᄎ; # hangul choseong chieuch
|
||||
'[' b <> ᄇ; # hangul choseong pieup
|
||||
|
||||
|
||||
# If we have gotten through to these rules, and we start with
|
||||
# a consonant, then the remaining mappings would be to F,
|
||||
# because must have CC (or C<non-letter>), not CV.
|
||||
# If we have F before us, then
|
||||
# we would end up with FF, which is wrong. The simplest fix is
|
||||
# to still make it an initial, but also insert an "u",
|
||||
# so we end up with F, I, u, and then continue with the C
|
||||
|
||||
# special, only initial
|
||||
# + "bb > 뿌;" // bb u hangul choseong ssangpieup
|
||||
# + "jj > 쭈;" // jj u hangul choseong ssangcieuc
|
||||
# + "dd > 뚜;" // dd u hangul choseong ssangtikeut
|
||||
|
||||
# + "$final{ t > 투;" // hangul choseong thieuth
|
||||
# + "$final{ ss> 쑤;" // hangul choseong ssangsios
|
||||
# + "$final{ s > 수;" // hangul choseong sios
|
||||
# + "$final{ p > 푸;" // hangul choseong phieuph
|
||||
# + "$final{ n > 누;" // hangul choseong nieun
|
||||
# + "$final{ m > 무;" // hangul choseong mieum
|
||||
# + "$final{ l > 루;" // hangul choseong rieul
|
||||
# + "$final{ k > 쿠;" // hangul choseong khieukh
|
||||
# + "$final{ j > 주;" // hangul choseong cieuc
|
||||
# + "$final{ h > 후;" // hangul choseong hieuh
|
||||
# + "$final{ gg> 꾸;" // hangul choseong ssangkiyeok
|
||||
# + "$final{ g > 구;" // hangul choseong kiyeok
|
||||
# + "$final{ d > 두;" // hangul choseong tikeut
|
||||
# + "$final{ c > 추;" // hangul choseong chieuch
|
||||
# + "$final{ b > 부;" // hangul choseong pieup
|
||||
|
||||
# MEDIALS after INITIALS
|
||||
|
||||
# MEDIALS (vowels) not after INITIALs
|
||||
# Added left $initial context - liu
|
||||
$initial{ yu <> $INITIAL{ ᅲ; # hangul jungseong yu
|
||||
$initial{ yo <> $INITIAL{ ᅭ; # hangul jungseong yo
|
||||
$initial{ yi <> $INITIAL{ ᅴ; # hangul jungseong yi
|
||||
$initial{ yeo<> $INITIAL{ ᅧ; # hangul jungseong yeo
|
||||
$initial{ ye <> $INITIAL{ ᅨ; # hangul jungseong ye
|
||||
$initial{ yae<> $INITIAL{ ᅤ; # hangul jungseong yae
|
||||
$initial{ ya <> $INITIAL{ ᅣ; # hangul jungseong ya
|
||||
$initial{ wi <> $INITIAL{ ᅱ; # hangul jungseong wi
|
||||
$initial{ weo<> $INITIAL{ ᅯ; # hangul jungseong weo
|
||||
$initial{ we <> $INITIAL{ ᅰ; # hangul jungseong we
|
||||
$initial{ wae<> $INITIAL{ ᅫ; # hangul jungseong wae
|
||||
$initial{ wa <> $INITIAL{ ᅪ; # hangul jungseong wa
|
||||
$initial{ u <> $INITIAL{ ᅮ; # hangul jungseong u
|
||||
$initial{ oe <> $INITIAL{ ᅬ; # hangul jungseong oe
|
||||
$initial{ o <> $INITIAL{ ᅩ; # hangul jungseong o
|
||||
$initial{ i <> $INITIAL{ ᅵ; # hangul jungseong i
|
||||
$initial{ eu <> $INITIAL{ ᅳ; # hangul jungseong eu
|
||||
$initial{ eo <> $INITIAL{ ᅥ; # hangul jungseong eo
|
||||
$initial{ e <> $INITIAL{ ᅦ; # hangul jungseong e
|
||||
$initial{ ae <> $INITIAL{ ᅢ; # hangul jungseong ae
|
||||
$initial{ a <> $INITIAL{ ᅡ; # hangul jungseong a
|
||||
|
||||
# Handle non-canonical isolated jungseong - liu
|
||||
'~'yu <> ᅲ; # hangul jungseong yu
|
||||
'~'yo <> ᅭ; # hangul jungseong yo
|
||||
'~'yi <> ᅴ; # hangul jungseong yi
|
||||
'~'yeo<> ᅧ; # hangul jungseong yeo
|
||||
'~'ye <> ᅨ; # hangul jungseong ye
|
||||
'~'yae<> ᅤ; # hangul jungseong yae
|
||||
'~'ya <> ᅣ; # hangul jungseong ya
|
||||
'~'wi <> ᅱ; # hangul jungseong wi
|
||||
'~'weo<> ᅯ; # hangul jungseong weo
|
||||
'~'we <> ᅰ; # hangul jungseong we
|
||||
'~'wae<> ᅫ; # hangul jungseong wae
|
||||
'~'wa <> ᅪ; # hangul jungseong wa
|
||||
'~'u <> ᅮ; # hangul jungseong u
|
||||
'~'oe <> ᅬ; # hangul jungseong oe
|
||||
'~'o <> ᅩ; # hangul jungseong o
|
||||
'~'i <> ᅵ; # hangul jungseong i
|
||||
'~'eu <> ᅳ; # hangul jungseong eu
|
||||
'~'eo <> ᅥ; # hangul jungseong eo
|
||||
'~'e <> ᅦ; # hangul jungseong e
|
||||
'~'ae <> ᅢ; # hangul jungseong ae
|
||||
'~'a <> ᅡ; # hangul jungseong a
|
||||
|
||||
# MEDIALS (vowels) not after INITIALs
|
||||
# Changed from > to <> - liu
|
||||
yu <> ᄋ ᅲ; # hangul jungseong yu
|
||||
yo <> ᄋ ᅭ; # hangul jungseong yo
|
||||
yi <> ᄋ ᅴ; # hangul jungseong yi
|
||||
yeo<> ᄋ ᅧ; # hangul jungseong yeo
|
||||
ye <> ᄋ ᅨ; # hangul jungseong ye
|
||||
yae<> ᄋ ᅤ; # hangul jungseong yae
|
||||
ya <> ᄋ ᅣ; # hangul jungseong ya
|
||||
wi <> ᄋ ᅱ; # hangul jungseong wi
|
||||
weo<> ᄋ ᅯ; # hangul jungseong weo
|
||||
we <> ᄋ ᅰ; # hangul jungseong we
|
||||
wae<> ᄋ ᅫ; # hangul jungseong wae
|
||||
wa <> ᄋ ᅪ; # hangul jungseong wa
|
||||
u <> ᄋ ᅮ; # hangul jungseong u
|
||||
oe <> ᄋ ᅬ; # hangul jungseong oe
|
||||
o <> ᄋ ᅩ; # hangul jungseong o
|
||||
i <> ᄋ ᅵ; # hangul jungseong i
|
||||
eu <> ᄋ ᅳ; # hangul jungseong eu
|
||||
eo <> ᄋ ᅥ; # hangul jungseong eo
|
||||
e <> ᄋ ᅦ; # hangul jungseong e
|
||||
ae <> ᄋ ᅢ; # hangul jungseong ae
|
||||
a <> ᄋ ᅡ; # hangul jungseong a
|
||||
|
||||
\` <> ᄋ;
|
||||
# Moved down so as not to mask above rules - liu
|
||||
# + "'' < $consonant{ᄋ;" // insert a break between any consonant and the empty consonant.
|
||||
# + "$medial{}$vowel<>ᄋ;" // HANGUL CHOSEONG IEUNG
|
||||
|
||||
|
||||
# FINALS
|
||||
|
||||
'' t <> $consonant { ᇀ; # hangul jongseong thieuth
|
||||
'' ss <> $consonant { ᆻ; # hangul jongseong ssangsios
|
||||
'' s <> $consonant { ᆺ; # hangul jongseong sios
|
||||
'' p <> $consonant { ᇁ; # hangul jongseong phieuph
|
||||
'' nj <> $consonant { ᆬ; # hangul jongseong nieun-cieuc
|
||||
'' nh <> $consonant { ᆭ; # hangul jongseong nieun-hieuh
|
||||
'' ng <> $consonant { ᆼ; # hangul jongseong ieung
|
||||
'' n <> $consonant { ᆫ; # hangul jongseong nieun
|
||||
'' m <> $consonant { ᆷ; # hangul jongseong mieum
|
||||
'' lt <> $consonant { ᆴ; # hangul jongseong rieul-thieuth
|
||||
'' ls <> $consonant { ᆳ; # hangul jongseong rieul-sios
|
||||
'' lp <> $consonant { ᆵ; # hangul jongseong rieul-phieuph
|
||||
'' lm <> $consonant { ᆱ; # hangul jongseong rieul-mieum
|
||||
'' lh <> $consonant { ᆶ; # hangul jongseong rieul-hieuh
|
||||
'' lg <> $consonant { ᆰ; # hangul jongseong rieul-kiyeok
|
||||
'' lb <> $consonant { ᆲ; # hangul jongseong rieul-pieup
|
||||
'' l <> $consonant { ᆯ; # hangul jongseong rieul
|
||||
'' k <> $consonant { ᆿ; # hangul jongseong khieukh
|
||||
'' j <> $consonant { ᆽ; # hangul jongseong cieuc
|
||||
'' h <> $consonant { ᇂ; # hangul jongseong hieuh
|
||||
'' gs <> $consonant { ᆪ; # hangul jongseong kiyeok-sios
|
||||
'' gg <> $consonant { ᆩ; # hangul jongseong ssangkiyeok
|
||||
'' g <> $consonant { ᆨ; # hangul jongseong kiyeok
|
||||
'' d <> $consonant { ᆮ; # hangul jongseong tikeut
|
||||
'' c <> $consonant { ᆾ; # hangul jongseong chieuch
|
||||
'' bs <> $consonant { ᆹ; # hangul jongseong pieup-sios
|
||||
'' b <> $consonant { ᆸ; # hangul jongseong pieup
|
||||
|
||||
t ']'> ᇀ; # hangul jongseong thieuth
|
||||
ss ']'> ᆻ; # hangul jongseong ssangsios
|
||||
s ']'> ᆺ; # hangul jongseong sios
|
||||
p ']'> ᇁ; # hangul jongseong phieuph
|
||||
nj ']'> ᆬ; # hangul jongseong nieun-cieuc
|
||||
nh ']'> ᆭ; # hangul jongseong nieun-hieuh
|
||||
ng ']'> ᆼ; # hangul jongseong ieung
|
||||
n ']'> ᆫ; # hangul jongseong nieun
|
||||
m ']'> ᆷ; # hangul jongseong mieum
|
||||
lt ']'> ᆴ; # hangul jongseong rieul-thieuth
|
||||
ls ']'> ᆳ; # hangul jongseong rieul-sios
|
||||
lp ']'> ᆵ; # hangul jongseong rieul-phieuph
|
||||
lm ']'> ᆱ; # hangul jongseong rieul-mieum
|
||||
lh ']'> ᆶ; # hangul jongseong rieul-hieuh
|
||||
lg ']'> ᆰ; # hangul jongseong rieul-kiyeok
|
||||
lb ']'> ᆲ; # hangul jongseong rieul-pieup
|
||||
l ']'> ᆯ; # hangul jongseong rieul
|
||||
k ']'> ᆿ; # hangul jongseong khieukh
|
||||
j ']'> ᆽ; # hangul jongseong cieuc
|
||||
h ']'> ᇂ; # hangul jongseong hieuh
|
||||
gs ']'> ᆪ; # hangul jongseong kiyeok-sios
|
||||
gg ']'> ᆩ; # hangul jongseong ssangkiyeok
|
||||
g ']'> ᆨ; # hangul jongseong kiyeok
|
||||
d ']'> ᆮ; # hangul jongseong tikeut
|
||||
c ']'> ᆾ; # hangul jongseong chieuch
|
||||
bs ']'> ᆹ; # hangul jongseong pieup-sios
|
||||
b ']'> ᆸ; # hangul jongseong pieup
|
||||
|
||||
$medial{ t <> $MEDIAL{ ᇀ; # hangul jongseong thieuth
|
||||
$medial{ ss <> $MEDIAL{ ᆻ; # hangul jongseong ssangsios
|
||||
$medial{ s <> $MEDIAL{ ᆺ; # hangul jongseong sios
|
||||
$medial{ p <> $MEDIAL{ ᇁ; # hangul jongseong phieuph
|
||||
$medial{ nj <> $MEDIAL{ ᆬ; # hangul jongseong nieun-cieuc
|
||||
$medial{ nh <> $MEDIAL{ ᆭ; # hangul jongseong nieun-hieuh
|
||||
$medial{ ng <> $MEDIAL{ ᆼ; # hangul jongseong ieung
|
||||
$medial{ n <> $MEDIAL{ ᆫ; # hangul jongseong nieun
|
||||
$medial{ m <> $MEDIAL{ ᆷ; # hangul jongseong mieum
|
||||
$medial{ lt <> $MEDIAL{ ᆴ; # hangul jongseong rieul-thieuth
|
||||
$medial{ ls <> $MEDIAL{ ᆳ; # hangul jongseong rieul-sios
|
||||
$medial{ lp <> $MEDIAL{ ᆵ; # hangul jongseong rieul-phieuph
|
||||
$medial{ lm <> $MEDIAL{ ᆱ; # hangul jongseong rieul-mieum
|
||||
$medial{ lh <> $MEDIAL{ ᆶ; # hangul jongseong rieul-hieuh
|
||||
$medial{ lg <> $MEDIAL{ ᆰ; # hangul jongseong rieul-kiyeok
|
||||
$medial{ lb <> $MEDIAL{ ᆲ; # hangul jongseong rieul-pieup
|
||||
$medial{ l <> $MEDIAL{ ᆯ; # hangul jongseong rieul
|
||||
$medial{ k <> $MEDIAL{ ᆿ; # hangul jongseong khieukh
|
||||
$medial{ j <> $MEDIAL{ ᆽ; # hangul jongseong cieuc
|
||||
$medial{ h <> $MEDIAL{ ᇂ; # hangul jongseong hieuh
|
||||
$medial{ gs <> $MEDIAL{ ᆪ; # hangul jongseong kiyeok-sios
|
||||
$medial{ gg <> $MEDIAL{ ᆩ; # hangul jongseong ssangkiyeok
|
||||
$medial{ g <> $MEDIAL{ ᆨ; # hangul jongseong kiyeok
|
||||
$medial{ d <> $MEDIAL{ ᆮ; # hangul jongseong tikeut
|
||||
$medial{ c <> $MEDIAL{ ᆾ; # hangul jongseong chieuch
|
||||
$medial{ bs <> $MEDIAL{ ᆹ; # hangul jongseong pieup-sios
|
||||
$medial{ b <> $MEDIAL{ ᆸ; # hangul jongseong pieup
|
||||
|
||||
t ']'< ᇀ; # hangul jongseong thieuth
|
||||
ss ']'< ᆻ; # hangul jongseong ssangsios
|
||||
s ']'< ᆺ; # hangul jongseong sios
|
||||
p ']'< ᇁ; # hangul jongseong phieuph
|
||||
nj ']'< ᆬ; # hangul jongseong nieun-cieuc
|
||||
nh ']'< ᆭ; # hangul jongseong nieun-hieuh
|
||||
ng ']'< ᆼ; # hangul jongseong ieung
|
||||
n ']'< ᆫ; # hangul jongseong nieun
|
||||
m ']'< ᆷ; # hangul jongseong mieum
|
||||
lt ']'< ᆴ; # hangul jongseong rieul-thieuth
|
||||
ls ']'< ᆳ; # hangul jongseong rieul-sios
|
||||
lp ']'< ᆵ; # hangul jongseong rieul-phieuph
|
||||
lm ']'< ᆱ; # hangul jongseong rieul-mieum
|
||||
lh ']'< ᆶ; # hangul jongseong rieul-hieuh
|
||||
lg ']'< ᆰ; # hangul jongseong rieul-kiyeok
|
||||
lb ']'< ᆲ; # hangul jongseong rieul-pieup
|
||||
l ']'< ᆯ; # hangul jongseong rieul
|
||||
k ']'< ᆿ; # hangul jongseong khieukh
|
||||
j ']'< ᆽ; # hangul jongseong cieuc
|
||||
h ']'< ᇂ; # hangul jongseong hieuh
|
||||
gs ']'< ᆪ; # hangul jongseong kiyeok-sios
|
||||
gg ']'< ᆩ; # hangul jongseong ssangkiyeok
|
||||
g ']'< ᆨ; # hangul jongseong kiyeok
|
||||
d ']'< ᆮ; # hangul jongseong tikeut
|
||||
c ']'< ᆾ; # hangul jongseong chieuch
|
||||
bs ']'< ᆹ; # hangul jongseong pieup-sios
|
||||
b ']'< ᆸ; # hangul jongseong pieup
|
||||
|
||||
# extra English letters
|
||||
|
||||
# + "z > |s;"
|
||||
# //{ + "Z > |s;" } masked
|
||||
# + "x > |ks;"
|
||||
# + "X > |ks;"
|
||||
# + "v > |b;"
|
||||
# + "V > |b;"
|
||||
# + "r > |l;"
|
||||
# + "R > |l;"
|
||||
# + "q > |k;"
|
||||
# + "Q > |k;"
|
||||
# + "f > |p;"
|
||||
# + "F > |p;"
|
||||
# //{ + "c > |k;" } masked
|
||||
# + "C > |k;"
|
||||
|
||||
# + "y > ᅲ;" // hangul jungseong yu
|
||||
# + "w > ᅱ;" // hangul jungseong wi
|
||||
|
||||
# eof
|
969
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Kana.utf8.txt
Executable file
969
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Kana.utf8.txt
Executable file
|
@ -0,0 +1,969 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:18:46 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Latin-Kana
|
||||
|
||||
|
||||
# Japanese hiragana and katakana to and from latin
|
||||
# (romaji). Lower case latin corresponds to hiragana;
|
||||
# upper case latin to katakana. The handling of
|
||||
# Hiragana and Katakana is largely the same. The bulk
|
||||
# of the transliterator consists of two identical sets
|
||||
# of rules, differing only in case.
|
||||
|
||||
# Because of minor differences between the two blocks
|
||||
# (e.g., the existence of small katakana ka and ke, but
|
||||
# no corresponding hiragana), some rules exist for only
|
||||
# one script.
|
||||
|
||||
# Uses modified Hepburn. Small changes to make
|
||||
# unambiguous.
|
||||
|
||||
#| Kunrei-shiki: Hepburn/MHepburn
|
||||
#| ------------------------------
|
||||
#| si: shi
|
||||
#| si ~ya: sha
|
||||
#| si ~yu: shu
|
||||
#| si ~yo: sho
|
||||
#| zi: ji
|
||||
#| zi ~ya: ja
|
||||
#| zi ~yu: ju
|
||||
#| zi ~yo: jo
|
||||
#| ti: chi
|
||||
#| ti ~ya: cha
|
||||
#| ti ~yu: chu
|
||||
#| ti ~yu: cho
|
||||
#| tu: tsu
|
||||
#| di: ji/dji
|
||||
#| du: zu/dzu
|
||||
#| hu: fu
|
||||
|
||||
#| For foreign words:
|
||||
#| -----------------
|
||||
#| se ~i si
|
||||
#| si ~e she
|
||||
#|
|
||||
#| ze ~i zi
|
||||
#| zi ~e je
|
||||
#|
|
||||
#| te ~i ti
|
||||
#| ti ~e che
|
||||
#| te ~u tu
|
||||
#|
|
||||
#| de ~i di
|
||||
#| de ~u du
|
||||
#| de ~i di
|
||||
#|
|
||||
#| he ~u: hu
|
||||
#| hu ~a fa
|
||||
#| hu ~i fi
|
||||
#| hu ~e he
|
||||
#| hu ~o ho
|
||||
|
||||
# Most small forms are generated, but if necessary
|
||||
# explicit small forms are given with ~a, ~ya, etc.
|
||||
|
||||
#------------------------------------------------------
|
||||
# Variables
|
||||
|
||||
$vowel=[aeiou];
|
||||
### $QUOTE='';
|
||||
|
||||
# Hiragana block
|
||||
|
||||
### $a2=ぁ;
|
||||
### $a=あ;
|
||||
### $i2=ぃ;
|
||||
### $i=い;
|
||||
### $u2=ぅ;
|
||||
### $u=う;
|
||||
### $e2=ぇ;
|
||||
### $e=え;
|
||||
### $o2=ぉ;
|
||||
### $o=お;
|
||||
|
||||
### $ka=か;
|
||||
### $ga=が;
|
||||
### $ki=き;
|
||||
### $gi=ぎ;
|
||||
### $ku=く;
|
||||
### $gu=ぐ;
|
||||
### $ke=け;
|
||||
### $ge=げ;
|
||||
### $ko=こ;
|
||||
### $go=ご;
|
||||
|
||||
### $sa=さ;
|
||||
### $za=ざ;
|
||||
### $si=し;
|
||||
### $zi=じ;
|
||||
### $su=す;
|
||||
### $zu=ず;
|
||||
### $se=せ;
|
||||
### $ze=ぜ;
|
||||
### $so=そ;
|
||||
### $zo=ぞ;
|
||||
|
||||
### $ta=た;
|
||||
### $da=だ;
|
||||
### $ti=ち;
|
||||
### $di=ぢ;
|
||||
### $tu2=っ;
|
||||
### $tu=つ;
|
||||
### $du=づ;
|
||||
### $te=て;
|
||||
### $de=で;
|
||||
### $to=と;
|
||||
### $do=ど;
|
||||
|
||||
### $na=な;
|
||||
### $ni=に;
|
||||
### $nu=ぬ;
|
||||
### $ne=ね;
|
||||
### $no=の;
|
||||
|
||||
### $ha=は;
|
||||
### $ba=ば;
|
||||
### $pa=ぱ;
|
||||
### $hi=ひ;
|
||||
### $bi=び;
|
||||
### $pi=ぴ;
|
||||
### $hu=ふ;
|
||||
### $bu=ぶ;
|
||||
### $pu=ぷ;
|
||||
### $he=へ;
|
||||
### $be=べ;
|
||||
### $pe=ぺ;
|
||||
### $ho=ほ;
|
||||
### $bo=ぼ;
|
||||
### $po=ぽ;
|
||||
|
||||
### $ma=ま;
|
||||
### $mi=み;
|
||||
### $mu=む;
|
||||
### $me=め;
|
||||
### $mo=も;
|
||||
|
||||
### $ya2=ゃ;
|
||||
### $ya=や;
|
||||
### $yu2=ゅ;
|
||||
### $yu=ゆ;
|
||||
### $yo2=ょ;
|
||||
### $yo=よ;
|
||||
|
||||
### $ra=ら;
|
||||
### $ri=り;
|
||||
### $ru=る;
|
||||
### $re=れ;
|
||||
### $ro=ろ;
|
||||
|
||||
### $wa2=ゎ;
|
||||
### $wa=わ;
|
||||
### $wi=ゐ;
|
||||
### $we=ゑ;
|
||||
### $wo=を;
|
||||
|
||||
### $n=ん;
|
||||
### $vu=ゔ;
|
||||
|
||||
# Alternates, just to make the rules easier
|
||||
### $yi2=ぃ;
|
||||
### $yi=い;
|
||||
### $ye2=ぇ;
|
||||
### $ye=え;
|
||||
### $wu=$u;
|
||||
# End alternates
|
||||
|
||||
# Katakana block
|
||||
|
||||
### $A2=ァ;
|
||||
### $A=ア;
|
||||
### $I2=ィ;
|
||||
### $I=イ;
|
||||
### $U2=ゥ;
|
||||
### $U=ウ;
|
||||
### $E2=ェ;
|
||||
### $E=エ;
|
||||
### $O2=ォ;
|
||||
### $O=オ;
|
||||
|
||||
### $KA=カ;
|
||||
### $GA=ガ;
|
||||
### $KI=キ;
|
||||
### $GI=ギ;
|
||||
### $KU=ク;
|
||||
### $GU=グ;
|
||||
### $KE=ケ;
|
||||
### $GE=ゲ;
|
||||
### $KO=コ;
|
||||
### $GO=ゴ;
|
||||
|
||||
### $KA2=ヵ; # Small Katakana KA; no Hiragana equiv.
|
||||
### $KE2=ヶ; # Small Katakana KE; no Hiragana equiv.
|
||||
|
||||
### $SA=サ;
|
||||
### $ZA=ザ;
|
||||
### $SI=シ;
|
||||
### $ZI=ジ;
|
||||
### $SU=ス;
|
||||
### $ZU=ズ;
|
||||
### $SE=セ;
|
||||
### $ZE=ゼ;
|
||||
### $SO=ソ;
|
||||
### $ZO=ゾ;
|
||||
|
||||
### $TA=タ;
|
||||
### $DA=ダ;
|
||||
### $TI=チ;
|
||||
### $DI=ヂ;
|
||||
### $TU2=ッ;
|
||||
### $TU=ツ;
|
||||
### $DU=ヅ;
|
||||
### $TE=テ;
|
||||
### $DE=デ;
|
||||
### $TO=ト;
|
||||
### $DO=ド;
|
||||
|
||||
### $NA=ナ;
|
||||
### $NI=ニ;
|
||||
### $NU=ヌ;
|
||||
### $NE=ネ;
|
||||
### $NO=ノ;
|
||||
|
||||
### $HA=ハ;
|
||||
### $BA=バ;
|
||||
### $PA=パ;
|
||||
### $HI=ヒ;
|
||||
### $BI=ビ;
|
||||
### $PI=ピ;
|
||||
### $HU=フ;
|
||||
### $BU=ブ;
|
||||
### $PU=プ;
|
||||
### $HE=ヘ;
|
||||
### $BE=ベ;
|
||||
### $PE=ペ;
|
||||
### $HO=ホ;
|
||||
### $BO=ボ;
|
||||
### $PO=ポ;
|
||||
|
||||
### $MA=マ;
|
||||
### $MI=ミ;
|
||||
### $MU=ム;
|
||||
### $ME=メ;
|
||||
### $MO=モ;
|
||||
|
||||
### $YA2=ャ;
|
||||
### $YA=ヤ;
|
||||
### $YU2=ュ;
|
||||
### $YU=ユ;
|
||||
### $YO2=ョ;
|
||||
### $YO=ヨ;
|
||||
### $WA2=ヮ;
|
||||
|
||||
# Alternates, just to make the rules easier
|
||||
### $YI2=ィ;
|
||||
### $YI=イ;
|
||||
### $YE2=ェ;
|
||||
### $YE=エ;
|
||||
### $WU=$U;
|
||||
# End alternates
|
||||
|
||||
### $RA=ラ;
|
||||
### $RI=リ;
|
||||
### $RU=ル;
|
||||
### $RE=レ;
|
||||
### $RO=ロ;
|
||||
|
||||
### $VA=ヷ;
|
||||
### $VI=ヸ;
|
||||
### $VU=ヴ;
|
||||
### $VE=ヹ;
|
||||
### $VO=ヺ;
|
||||
|
||||
### $WA=ワ;
|
||||
### $WI=ヰ;
|
||||
### $WE=ヱ;
|
||||
### $WO=ヲ;
|
||||
|
||||
### $N=ン;
|
||||
### $LONG=ー;
|
||||
|
||||
# Variables used for doubled-consonants with tsu
|
||||
|
||||
$K_START=[カキクケコかきくけこ];
|
||||
$G_START=[ガギグゲゴがぎぐげご];
|
||||
|
||||
$S_START=[サシスセソさしすせそ];
|
||||
$Z_START=[ザズゼゾざずぜぞ];
|
||||
$J_START=[ジじ];
|
||||
|
||||
$T_START=[タチツテトたちつてと];
|
||||
$D_START=[ダヂヅデドだぢづでど];
|
||||
|
||||
$N_START=[ナニヌネノなにぬねの];
|
||||
|
||||
$H_START=[ハヒヘホはひへほ];
|
||||
$F_START=[フふ];
|
||||
$B_START=[バビブベボばびぶべぼ];
|
||||
$P_START=[パピプペポぱぴぷぺぽ];
|
||||
|
||||
$M_START=[マミムメモまみむめも];
|
||||
|
||||
$Y_START=[ヤユヨやゆよ];
|
||||
|
||||
$R_START=[ラリルレロらりるれろ];
|
||||
|
||||
$W_START=[ワヰヱヲわゐゑを];
|
||||
|
||||
$V_START=[ヷヸヴヹヺゔ];
|
||||
|
||||
# If ン is followed by $N_QUOTER, then it needs an
|
||||
# apostrophe after its romaji form to disambiguate it.
|
||||
# E.g., ン ア != ナ, so represent as "n'a", not "na".
|
||||
|
||||
$N_QUOTER = [ア イ ウ エ オ ナ ニ ヌ ネ ノ \
|
||||
ヤ ユ ヨ ン];
|
||||
|
||||
$n_quoter = [あ い う え お な に ぬ ね の \
|
||||
や ゆ よ ん];
|
||||
|
||||
# Lowercase copies for convenience in making hiragana
|
||||
# rule set copy
|
||||
|
||||
### $long = $LONG;
|
||||
### $quote = $QUOTE;
|
||||
### $k_start=$K_START;
|
||||
### $g_start=$G_START;
|
||||
### $s_start=$S_START;
|
||||
### $z_start=$Z_START;
|
||||
### $j_start=$J_START;
|
||||
### $t_start=$T_START;
|
||||
### $d_start=$D_START;
|
||||
### $n_start=$N_START;
|
||||
### $h_start=$H_START;
|
||||
### $f_start=$F_START;
|
||||
### $b_start=$B_START;
|
||||
### $p_start=$P_START;
|
||||
### $m_start=$M_START;
|
||||
### $y_start=$Y_START;
|
||||
### $r_start=$R_START;
|
||||
### $w_start=$W_START;
|
||||
### $v_start=$V_START;
|
||||
|
||||
#------------------------------------------------------
|
||||
# Katakana rules
|
||||
|
||||
# The rules immediately following are not shared. That
|
||||
# is, they exist only for katakana, not for hiragana.
|
||||
|
||||
VA<>ヷ;
|
||||
VI<>ヸ;
|
||||
VE<>ヹ;
|
||||
VO<>ヺ;
|
||||
'~KA'<>ヵ;
|
||||
'~KE'<>ヶ;
|
||||
|
||||
# ~~~ BEGIN shared rules ~~~
|
||||
|
||||
# The shared rules are copied from katakana to hiragana
|
||||
# and then mechanically lowercased.
|
||||
|
||||
A<>ア;
|
||||
|
||||
BA<>バ;
|
||||
BYA<ビャ;
|
||||
BYI<ビィ;
|
||||
BYU<ビュ;
|
||||
BYE<ビェ;
|
||||
BYO<ビョ;
|
||||
BI<>ビ;
|
||||
BU<>ブ;
|
||||
BE<>ベ;
|
||||
BO<>ボ;
|
||||
BY>ビ|'~Y';
|
||||
|
||||
CHA<チャ;
|
||||
CHI'~I'<チィ; # Liu
|
||||
CHU<チュ;
|
||||
CHE<チェ;
|
||||
CHO<チョ;
|
||||
CHI<>チ;
|
||||
CH>チ|'~Y';
|
||||
|
||||
C}I>|S;
|
||||
C}E>|S;
|
||||
|
||||
DA<>ダ;
|
||||
DI<>ディ;
|
||||
DU<>デゥ;
|
||||
DE<>デ;
|
||||
DO<>ド;
|
||||
DZU<>ヅ;
|
||||
DJA<ヂャ;
|
||||
DJI'~I'<ヂィ; # Liu
|
||||
DJU<ヂュ;
|
||||
DJE<ヂェ;
|
||||
DJO<ヂョ;
|
||||
DJI<>ヂ;
|
||||
DJ>ヂ|'~Y';
|
||||
|
||||
E<>エ;
|
||||
|
||||
FA<ファ;
|
||||
FI<フィ;
|
||||
FE<フェ;
|
||||
FO<フォ;
|
||||
FU<>フ;
|
||||
|
||||
GA<>ガ;
|
||||
GYA<ギャ;
|
||||
GYI<ギィ;
|
||||
GYU<ギュ;
|
||||
GYE<ギェ;
|
||||
GYO<ギョ;
|
||||
GI<>ギ;
|
||||
GU<>グ;
|
||||
GE<>ゲ;
|
||||
GO<>ゴ;
|
||||
GY>ギ|'~Y';
|
||||
|
||||
HA<>ハ;
|
||||
HI<>ヒ;
|
||||
HU<>ヘゥ;
|
||||
HE<>ヘ;
|
||||
HO<>ホ;
|
||||
|
||||
I<>イ;
|
||||
|
||||
JA<ジャ;
|
||||
JI'~I'<ジィ; # Liu
|
||||
JU<ジュ;
|
||||
JE<ジェ;
|
||||
JO<ジョ;
|
||||
JI<>ジ;
|
||||
|
||||
KA<>カ;
|
||||
KYA<キャ;
|
||||
KYI<キィ;
|
||||
KYU<キュ;
|
||||
KYE<キェ;
|
||||
KYO<キョ;
|
||||
KI<>キ;
|
||||
KU<>ク;
|
||||
KE<>ケ;
|
||||
KO<>コ;
|
||||
KY>キ|'~Y';
|
||||
|
||||
MA<>マ;
|
||||
MYA<ミャ;
|
||||
MYI<ミィ;
|
||||
MYU<ミュ;
|
||||
MYE<ミェ;
|
||||
MYO<ミョ;
|
||||
MI<>ミ;
|
||||
MU<>ム;
|
||||
ME<>メ;
|
||||
MO<>モ;
|
||||
MY>ミ|'~Y';
|
||||
|
||||
M}P>ン;
|
||||
M}B>ン;
|
||||
M}F>ン;
|
||||
M}V>ン;
|
||||
|
||||
NA<>ナ;
|
||||
NYA<ニャ;
|
||||
NYI<ニィ;
|
||||
NYU<ニュ;
|
||||
NYE<ニェ;
|
||||
NYO<ニョ;
|
||||
NI<>ニ;
|
||||
NU<>ヌ;
|
||||
NE<>ネ;
|
||||
NO<>ノ;
|
||||
NY>ニ|'~Y';
|
||||
|
||||
O<>オ;
|
||||
|
||||
PA<>パ;
|
||||
PYA<ピャ;
|
||||
PYI<ピィ;
|
||||
PYU<ピュ;
|
||||
PYE<ピェ;
|
||||
PYO<ピョ;
|
||||
PI<>ピ;
|
||||
PU<>プ;
|
||||
PE<>ペ;
|
||||
PO<>ポ;
|
||||
PY>ピ|'~Y';
|
||||
|
||||
RA<>ラ;
|
||||
RYA<リャ;
|
||||
RYI<リィ;
|
||||
RYU<リュ;
|
||||
RYE<リェ;
|
||||
RYO<リョ;
|
||||
RI<>リ;
|
||||
RU<>ル;
|
||||
RE<>レ;
|
||||
RO<>ロ;
|
||||
RY>リ|'~Y';
|
||||
|
||||
SA<>サ;
|
||||
SI<>セィ;
|
||||
SU<>ス;
|
||||
SE<>セ;
|
||||
SO<>ソ;
|
||||
|
||||
SHA<シャ;
|
||||
SHI'~I'<シィ; # Liu
|
||||
SHU<シュ;
|
||||
SHE<シェ;
|
||||
SHO<ショ;
|
||||
SHI<>シ;
|
||||
SH>シ|'~Y';
|
||||
|
||||
TA<>タ;
|
||||
TI<>ティ;
|
||||
TU<>テゥ;
|
||||
TE<>テ;
|
||||
TO<>ト;
|
||||
|
||||
# Double consonants
|
||||
|
||||
B}B<>ッ}$B_START;
|
||||
C}K>ッ;
|
||||
C}C>ッ;
|
||||
C}Q>ッ;
|
||||
D}D<>ッ}$D_START;
|
||||
F}F<>ッ}$F_START;
|
||||
G}G<>ッ}$G_START;
|
||||
H}H<>ッ}$H_START;
|
||||
J}J<>ッ}$J_START;
|
||||
K}K<>ッ}$K_START;
|
||||
L}L>ッ;
|
||||
M}M<>ッ}$M_START;
|
||||
N}N<>ッ}$N_START;
|
||||
P}P<>ッ}$P_START;
|
||||
Q}Q>ッ;
|
||||
R}R<>ッ}$R_START;
|
||||
S}SH>ッ;
|
||||
S}S<>ッ}$S_START;
|
||||
T}CH>ッ;
|
||||
T}T<>ッ}$T_START;
|
||||
V}V<>ッ}$V_START;
|
||||
W}W<>ッ}$W_START;
|
||||
X}X>ッ;
|
||||
Y}Y<>ッ}$Y_START;
|
||||
Z}Z<>ッ}$Z_START;
|
||||
|
||||
TSU<>ツ;
|
||||
|
||||
U<>ウ;
|
||||
|
||||
'V~A'<ヴァ; # Liu
|
||||
'V~I'<ヴィ; # Liu
|
||||
'V~E'<ヴェ; # Liu
|
||||
'V~O'<ヴォ; # Liu
|
||||
VU<>ヴ;
|
||||
|
||||
WA<>ワ;
|
||||
WI<>ヰ;
|
||||
WU>ウ;
|
||||
WE<>ヱ;
|
||||
WO<>ヲ;
|
||||
|
||||
YA<>ヤ;
|
||||
YI>イ;
|
||||
YU<>ユ;
|
||||
YE>エ;
|
||||
YO<>ヨ;
|
||||
|
||||
ZA<>ザ;
|
||||
ZI<>ゼィ;
|
||||
ZU<>ズ;
|
||||
ZE<>ゼ;
|
||||
ZO<>ゾ;
|
||||
|
||||
# Prolonged vowel mark. This indicates a doubling of
|
||||
# the preceding vowel sound in both katakana and
|
||||
# hiragana.
|
||||
|
||||
A<A{ー; # Liu
|
||||
E<E{ー; # Liu
|
||||
I<I{ー; # Liu
|
||||
O<O{ー; # Liu
|
||||
U<U{ー; # Liu
|
||||
|
||||
# Small forms
|
||||
|
||||
'~A'<>ァ;
|
||||
'~I'<>ィ;
|
||||
'~U'<>ゥ;
|
||||
'~E'<>ェ;
|
||||
'~O'<>ォ;
|
||||
'~TSU'<>ッ;
|
||||
'~WA'<>ヮ;
|
||||
'~YA'<>ャ;
|
||||
'~YI'>ィ;
|
||||
'~YU'<>ュ;
|
||||
'~YE'>ェ;
|
||||
'~YO'<>ョ;
|
||||
|
||||
# One-way latin->kana rules. These do not occur in
|
||||
# well-formed romaji representing actual Japanese text.
|
||||
# Their purpose is to make all romaji map to kana of
|
||||
# some sort.
|
||||
|
||||
# The following are not really necessary, but produce
|
||||
# slightly more natural results.
|
||||
|
||||
CY>セィ;
|
||||
DY>ディ;
|
||||
HY>ヒ;
|
||||
SY>セィ;
|
||||
TY>ティ;
|
||||
ZY>ゼィ;
|
||||
|
||||
# Simple substitutions using backup
|
||||
|
||||
C>|K;
|
||||
F>フ|'~';
|
||||
J>ジ|'~Y';
|
||||
L>|R;
|
||||
Q>|K;
|
||||
V>ヴ|'~';
|
||||
W>ウ|'~';
|
||||
X>|KS;
|
||||
|
||||
# Isolated consonants listed here so as not to mask
|
||||
# longer rules above.
|
||||
|
||||
B>ブ;
|
||||
D>デ;
|
||||
G>グ;
|
||||
H>ヘ;
|
||||
K>ク;
|
||||
M>ン;
|
||||
N''<ン}$N_QUOTER;
|
||||
N<>ン;
|
||||
P>プ;
|
||||
R>ル;
|
||||
S>ス;
|
||||
T>テ;
|
||||
Y>イ;
|
||||
Z>ズ;
|
||||
|
||||
# ~~~ END shared rules ~~~
|
||||
|
||||
#------------------------------------------------------
|
||||
# Hiragana rules
|
||||
|
||||
# Currently, there are no hiragana rules other than the
|
||||
# shared rules.
|
||||
|
||||
# ~~~ BEGIN shared rules ~~~
|
||||
|
||||
# The shared rules are copied from katakana to hiragana
|
||||
# and then mechanically lowercased.
|
||||
|
||||
a<>あ;
|
||||
|
||||
ba<>ば;
|
||||
bya<びゃ;
|
||||
byi<びぃ;
|
||||
byu<びゅ;
|
||||
bye<びぇ;
|
||||
byo<びょ;
|
||||
bi<>び;
|
||||
bu<>ぶ;
|
||||
be<>べ;
|
||||
bo<>ぼ;
|
||||
by>び|'~y';
|
||||
|
||||
cha<ちゃ;
|
||||
chi'~i'<ちぃ; # liu
|
||||
chu<ちゅ;
|
||||
che<ちぇ;
|
||||
cho<ちょ;
|
||||
chi<>ち;
|
||||
ch>ち|'~y';
|
||||
|
||||
c}i>|s;
|
||||
c}e>|s;
|
||||
|
||||
da<>だ;
|
||||
di<>でぃ;
|
||||
du<>でぅ;
|
||||
de<>で;
|
||||
do<>ど;
|
||||
dzu<>づ;
|
||||
dja<ぢゃ;
|
||||
dji'~i'<ぢぃ; # liu
|
||||
dju<ぢゅ;
|
||||
dje<ぢぇ;
|
||||
djo<ぢょ;
|
||||
dji<>ぢ;
|
||||
dj>ぢ|'~y';
|
||||
|
||||
e<>え;
|
||||
|
||||
fa<ふぁ;
|
||||
fi<ふぃ;
|
||||
fe<ふぇ;
|
||||
fo<ふぉ;
|
||||
fu<>ふ;
|
||||
|
||||
ga<>が;
|
||||
gya<ぎゃ;
|
||||
gyi<ぎぃ;
|
||||
gyu<ぎゅ;
|
||||
gye<ぎぇ;
|
||||
gyo<ぎょ;
|
||||
gi<>ぎ;
|
||||
gu<>ぐ;
|
||||
ge<>げ;
|
||||
go<>ご;
|
||||
gy>ぎ|'~y';
|
||||
|
||||
ha<>は;
|
||||
hi<>ひ;
|
||||
hu<>へぅ;
|
||||
he<>へ;
|
||||
ho<>ほ;
|
||||
|
||||
i<>い;
|
||||
|
||||
ja<じゃ;
|
||||
ji'~i'<じぃ; # liu
|
||||
ju<じゅ;
|
||||
je<じぇ;
|
||||
jo<じょ;
|
||||
ji<>じ;
|
||||
|
||||
ka<>か;
|
||||
kya<きゃ;
|
||||
kyi<きぃ;
|
||||
kyu<きゅ;
|
||||
kye<きぇ;
|
||||
kyo<きょ;
|
||||
ki<>き;
|
||||
ku<>く;
|
||||
ke<>け;
|
||||
ko<>こ;
|
||||
ky>き|'~y';
|
||||
|
||||
ma<>ま;
|
||||
mya<みゃ;
|
||||
myi<みぃ;
|
||||
myu<みゅ;
|
||||
mye<みぇ;
|
||||
myo<みょ;
|
||||
mi<>み;
|
||||
mu<>む;
|
||||
me<>め;
|
||||
mo<>も;
|
||||
my>み|'~y';
|
||||
|
||||
m}p>ん;
|
||||
m}b>ん;
|
||||
m}f>ん;
|
||||
m}v>ん;
|
||||
|
||||
na<>な;
|
||||
nya<にゃ;
|
||||
nyi<にぃ;
|
||||
nyu<にゅ;
|
||||
nye<にぇ;
|
||||
nyo<にょ;
|
||||
ni<>に;
|
||||
nu<>ぬ;
|
||||
ne<>ね;
|
||||
no<>の;
|
||||
ny>に|'~y';
|
||||
|
||||
o<>お;
|
||||
|
||||
pa<>ぱ;
|
||||
pya<ぴゃ;
|
||||
pyi<ぴぃ;
|
||||
pyu<ぴゅ;
|
||||
pye<ぴぇ;
|
||||
pyo<ぴょ;
|
||||
pi<>ぴ;
|
||||
pu<>ぷ;
|
||||
pe<>ぺ;
|
||||
po<>ぽ;
|
||||
py>ぴ|'~y';
|
||||
|
||||
ra<>ら;
|
||||
rya<りゃ;
|
||||
ryi<りぃ;
|
||||
ryu<りゅ;
|
||||
rye<りぇ;
|
||||
ryo<りょ;
|
||||
ri<>り;
|
||||
ru<>る;
|
||||
re<>れ;
|
||||
ro<>ろ;
|
||||
ry>り|'~y';
|
||||
|
||||
sa<>さ;
|
||||
si<>せぃ;
|
||||
su<>す;
|
||||
se<>せ;
|
||||
so<>そ;
|
||||
|
||||
sha<しゃ;
|
||||
shi'~i'<しぃ; # liu
|
||||
shu<しゅ;
|
||||
she<しぇ;
|
||||
sho<しょ;
|
||||
shi<>し;
|
||||
sh>し|'~y';
|
||||
|
||||
ta<>た;
|
||||
ti<>てぃ;
|
||||
tu<>てぅ;
|
||||
te<>て;
|
||||
to<>と;
|
||||
|
||||
# double consonants
|
||||
|
||||
b}b<>っ}$B_START;
|
||||
c}k>っ;
|
||||
c}c>っ;
|
||||
c}q>っ;
|
||||
d}d<>っ}$D_START;
|
||||
f}f<>っ}$F_START;
|
||||
g}g<>っ}$G_START;
|
||||
h}h<>っ}$H_START;
|
||||
j}j<>っ}$J_START;
|
||||
k}k<>っ}$K_START;
|
||||
l}l>っ;
|
||||
m}m<>っ}$M_START;
|
||||
n}n<>っ}$N_START;
|
||||
p}p<>っ}$P_START;
|
||||
q}q>っ;
|
||||
r}r<>っ}$R_START;
|
||||
s}sh>っ;
|
||||
s}s<>っ}$S_START;
|
||||
t}ch>っ;
|
||||
t}t<>っ}$T_START;
|
||||
v}v<>っ}$V_START;
|
||||
w}w<>っ}$W_START;
|
||||
x}x>っ;
|
||||
y}y<>っ}$Y_START;
|
||||
z}z<>っ}$Z_START;
|
||||
|
||||
tsu<>つ;
|
||||
|
||||
u<>う;
|
||||
|
||||
'v~a'<ゔぁ; # liu
|
||||
'v~i'<ゔぃ; # liu
|
||||
'v~e'<ゔぇ; # liu
|
||||
'v~o'<ゔぉ; # liu
|
||||
vu<>ゔ;
|
||||
|
||||
wa<>わ;
|
||||
wi<>ゐ;
|
||||
wu>う;
|
||||
we<>ゑ;
|
||||
wo<>を;
|
||||
|
||||
ya<>や;
|
||||
yi>い;
|
||||
yu<>ゆ;
|
||||
ye>え;
|
||||
yo<>よ;
|
||||
|
||||
za<>ざ;
|
||||
zi<>ぜぃ;
|
||||
zu<>ず;
|
||||
ze<>ぜ;
|
||||
zo<>ぞ;
|
||||
|
||||
# prolonged vowel mark. this indicates a doubling of
|
||||
# the preceding vowel sound in both katakana and
|
||||
# hiragana.
|
||||
|
||||
a<a{ー; # liu
|
||||
e<e{ー; # liu
|
||||
i<i{ー; # liu
|
||||
o<o{ー; # liu
|
||||
u<u{ー; # liu
|
||||
|
||||
# small forms
|
||||
|
||||
'~a'<>ぁ;
|
||||
'~i'<>ぃ;
|
||||
'~u'<>ぅ;
|
||||
'~e'<>ぇ;
|
||||
'~o'<>ぉ;
|
||||
'~tsu'<>っ;
|
||||
'~wa'<>ゎ;
|
||||
'~ya'<>ゃ;
|
||||
'~yi'>ぃ;
|
||||
'~yu'<>ゅ;
|
||||
'~ye'>ぇ;
|
||||
'~yo'<>ょ;
|
||||
|
||||
# one-way latin->kana rules. these do not occur in
|
||||
# well-formed romaji representing actual japanese text.
|
||||
# their purpose is to make all romaji map to kana of
|
||||
# some sort.
|
||||
|
||||
# the following are not really necessary, but produce
|
||||
# slightly more natural results.
|
||||
|
||||
cy>せぃ;
|
||||
dy>でぃ;
|
||||
hy>ひ;
|
||||
sy>せぃ;
|
||||
ty>てぃ;
|
||||
zy>ぜぃ;
|
||||
|
||||
# simple substitutions using backup
|
||||
|
||||
c>|k;
|
||||
f>ふ|'~';
|
||||
j>じ|'~y';
|
||||
l>|r;
|
||||
q>|k;
|
||||
v>ゔ|'~';
|
||||
w>う|'~';
|
||||
x>|ks;
|
||||
|
||||
# isolated consonants listed here so as not to mask
|
||||
# longer rules above.
|
||||
|
||||
b>ぶ;
|
||||
d>で;
|
||||
g>ぐ;
|
||||
h>へ;
|
||||
k>く;
|
||||
m>ん;
|
||||
n''<ん}$n_quoter;
|
||||
n<>ん;
|
||||
p>ぷ;
|
||||
r>る;
|
||||
s>す;
|
||||
t>て;
|
||||
y>い;
|
||||
z>ず;
|
||||
|
||||
# ~~~ END shared rules ~~~
|
||||
|
||||
#------------------------------------------------------
|
||||
# Final cleanup
|
||||
|
||||
'~'>; # delete stray tildes
|
||||
''>; # delete stray quotes
|
||||
'-'>ー;
|
||||
|
||||
# eof
|
|
@ -0,0 +1,89 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:06 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Malayalam-InterIndic
|
||||
|
||||
ം>\uE002; # SIGN ANUSVARA
|
||||
ഃ>\uE003; # SIGN VISARGA
|
||||
അ>\uE005; # LETTER A
|
||||
ആ>\uE006; # LETTER AA
|
||||
ഇ>\uE007; # LETTER I
|
||||
ഈ>\uE008; # LETTER II
|
||||
ഉ>\uE009; # LETTER U
|
||||
ഊ>\uE00A; # LETTER UU
|
||||
ഋ>\uE00B; # LETTER VOCALIC R
|
||||
ഌ>\uE00C; # LETTER VOCALIC L
|
||||
എ>\uE081; # LETTER E
|
||||
ഏ>\uE00F; # LETTER EE
|
||||
ഐ>\uE010; # LETTER AI
|
||||
ഒ>\uE082; # LETTER O
|
||||
ഓ>\uE013; # LETTER OO
|
||||
ഔ>\uE014; # LETTER AU
|
||||
ക>\uE015; # LETTER KA
|
||||
ഖ>\uE016; # LETTER KHA
|
||||
ഗ>\uE017; # LETTER GA
|
||||
ഘ>\uE018; # LETTER GHA
|
||||
ങ>\uE019; # LETTER NGA
|
||||
ച>\uE01A; # LETTER CA
|
||||
ഛ>\uE01B; # LETTER CHA
|
||||
ജ>\uE01C; # LETTER JA
|
||||
ഝ>\uE01D; # LETTER JHA
|
||||
ഞ>\uE01E; # LETTER NYA
|
||||
ട>\uE01F; # LETTER TTA
|
||||
ഠ>\uE020; # LETTER TTHA
|
||||
ഡ>\uE021; # LETTER DDA
|
||||
ഢ>\uE022; # LETTER DDHA
|
||||
ണ>\uE023; # LETTER NNA
|
||||
ത>\uE024; # LETTER TA
|
||||
ഥ>\uE025; # LETTER THA
|
||||
ദ>\uE026; # LETTER DA
|
||||
ധ>\uE027; # LETTER DHA
|
||||
ന>\uE028; # LETTER NA
|
||||
പ>\uE02A; # LETTER PA
|
||||
ഫ>\uE02B; # LETTER PHA
|
||||
ബ>\uE02C; # LETTER BA
|
||||
ഭ>\uE02D; # LETTER BHA
|
||||
മ>\uE02E; # LETTER MA
|
||||
യ>\uE02F; # LETTER YA
|
||||
ര>\uE030; # LETTER RA
|
||||
റ>\uE083; # LETTER RRA
|
||||
ല>\uE032; # LETTER LA
|
||||
ള>\uE033; # LETTER LLA
|
||||
ഴ>\uE034; # LETTER LLLA
|
||||
വ>\uE035; # LETTER VA
|
||||
ശ>\uE036; # LETTER SHA
|
||||
ഷ>\uE037; # LETTER SSA
|
||||
സ>\uE038; # LETTER SA
|
||||
ഹ>\uE039; # LETTER HA
|
||||
ാ>\uE03E; # VOWEL SIGN AA
|
||||
ി>\uE03F; # VOWEL SIGN I
|
||||
ീ>\uE040; # VOWEL SIGN II
|
||||
ു>\uE041; # VOWEL SIGN U
|
||||
ൂ>\uE042; # VOWEL SIGN UU
|
||||
ൃ>\uE043; # VOWEL SIGN VOCALIC R
|
||||
െ>\uE084; # VOWEL SIGN E
|
||||
േ>\uE047; # VOWEL SIGN EE
|
||||
ൈ>\uE048; # VOWEL SIGN AI
|
||||
ൊ>\uE085; # VOWEL SIGN O
|
||||
ോ>\uE04B; # VOWEL SIGN OO
|
||||
ൌ>\uE04C; # VOWEL SIGN AU
|
||||
്>\uE04D; # SIGN VIRAMA
|
||||
ൗ>\uE057; # AU LENGTH MARK
|
||||
ൠ>\uE060; # LETTER VOCALIC RR
|
||||
ൡ>\uE061; # LETTER VOCALIC LL
|
||||
൦>\uE066; # DIGIT ZERO
|
||||
൧>\uE067; # DIGIT ONE
|
||||
൨>\uE068; # DIGIT TWO
|
||||
൩>\uE069; # DIGIT THREE
|
||||
൪>\uE06A; # DIGIT FOUR
|
||||
൫>\uE06B; # DIGIT FIVE
|
||||
൬>\uE06C; # DIGIT SIX
|
||||
൭>\uE06D; # DIGIT SEVEN
|
||||
൮>\uE06E; # DIGIT EIGHT
|
||||
൯>\uE06F; # DIGIT NINE
|
||||
|
||||
# eof
|
90
icu4j/src/com/ibm/text/resources/Transliterator_Oriya_InterIndic.utf8.txt
Executable file
90
icu4j/src/com/ibm/text/resources/Transliterator_Oriya_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,90 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:07 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Oriya-InterIndic
|
||||
|
||||
ଁ>\uE001; # SIGN CANDRABINDU
|
||||
ଂ>\uE002; # SIGN ANUSVARA
|
||||
ଃ>\uE003; # SIGN VISARGA
|
||||
ଅ>\uE005; # LETTER A
|
||||
ଆ>\uE006; # LETTER AA
|
||||
ଇ>\uE007; # LETTER I
|
||||
ଈ>\uE008; # LETTER II
|
||||
ଉ>\uE009; # LETTER U
|
||||
ଊ>\uE00A; # LETTER UU
|
||||
ଋ>\uE00B; # LETTER VOCALIC R
|
||||
ଌ>\uE00C; # LETTER VOCALIC L
|
||||
ଏ>\uE081; # LETTER E
|
||||
ଐ>\uE010; # LETTER AI
|
||||
ଓ>\uE082; # LETTER O
|
||||
ଔ>\uE014; # LETTER AU
|
||||
କ>\uE015; # LETTER KA
|
||||
ଖ>\uE016; # LETTER KHA
|
||||
ଗ>\uE017; # LETTER GA
|
||||
ଘ>\uE018; # LETTER GHA
|
||||
ଙ>\uE019; # LETTER NGA
|
||||
ଚ>\uE01A; # LETTER CA
|
||||
ଛ>\uE01B; # LETTER CHA
|
||||
ଜ>\uE01C; # LETTER JA
|
||||
ଝ>\uE01D; # LETTER JHA
|
||||
ଞ>\uE01E; # LETTER NYA
|
||||
ଟ>\uE01F; # LETTER TTA
|
||||
ଠ>\uE020; # LETTER TTHA
|
||||
ଡ>\uE021; # LETTER DDA
|
||||
ଢ>\uE022; # LETTER DDHA
|
||||
ଣ>\uE023; # LETTER NNA
|
||||
ତ>\uE024; # LETTER TA
|
||||
ଥ>\uE025; # LETTER THA
|
||||
ଦ>\uE026; # LETTER DA
|
||||
ଧ>\uE027; # LETTER DHA
|
||||
ନ>\uE028; # LETTER NA
|
||||
ପ>\uE02A; # LETTER PA
|
||||
ଫ>\uE02B; # LETTER PHA
|
||||
ବ>\uE02C; # LETTER BA
|
||||
ଭ>\uE02D; # LETTER BHA
|
||||
ମ>\uE02E; # LETTER MA
|
||||
ଯ>\uE02F; # LETTER YA
|
||||
ର>\uE030; # LETTER RA
|
||||
ଲ>\uE032; # LETTER LA
|
||||
ଳ>\uE033; # LETTER LLA
|
||||
ଶ>\uE036; # LETTER SHA
|
||||
ଷ>\uE037; # LETTER SSA
|
||||
ସ>\uE038; # LETTER SA
|
||||
ହ>\uE039; # LETTER HA
|
||||
଼>\uE03C; # SIGN NUKTA
|
||||
ଽ>\uE03D; # SIGN AVAGRAHA
|
||||
ା>\uE03E; # VOWEL SIGN AA
|
||||
ି>\uE03F; # VOWEL SIGN I
|
||||
ୀ>\uE040; # VOWEL SIGN II
|
||||
ୁ>\uE041; # VOWEL SIGN U
|
||||
ୂ>\uE042; # VOWEL SIGN UU
|
||||
ୃ>\uE043; # VOWEL SIGN VOCALIC R
|
||||
େ>\uE084; # VOWEL SIGN E
|
||||
ୈ>\uE048; # VOWEL SIGN AI
|
||||
ୋ>\uE085; # VOWEL SIGN O
|
||||
ୌ>\uE04C; # VOWEL SIGN AU
|
||||
୍>\uE04D; # SIGN VIRAMA
|
||||
ୖ>\uE056; # AI LENGTH MARK
|
||||
ୗ>\uE057; # AU LENGTH MARK
|
||||
ଡ଼>\uE083; # LETTER RRA
|
||||
ଢ଼>\uE05D; # LETTER RHA
|
||||
ୟ>\uE05F; # LETTER YYA
|
||||
ୠ>\uE060; # LETTER VOCALIC RR
|
||||
ୡ>\uE061; # LETTER VOCALIC LL
|
||||
୦>\uE066; # DIGIT ZERO
|
||||
୧>\uE067; # DIGIT ONE
|
||||
୨>\uE068; # DIGIT TWO
|
||||
୩>\uE069; # DIGIT THREE
|
||||
୪>\uE06A; # DIGIT FOUR
|
||||
୫>\uE06B; # DIGIT FIVE
|
||||
୬>\uE06C; # DIGIT SIX
|
||||
୭>\uE06D; # DIGIT SEVEN
|
||||
୮>\uE06E; # DIGIT EIGHT
|
||||
୯>\uE06F; # DIGIT NINE
|
||||
୰>\uE080; # ISSHAR
|
||||
|
||||
# eof
|
|
@ -0,0 +1,77 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:07 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# StraightQuotes-CurlyQuotes
|
||||
|
||||
# Rewritten using character codes [LIU]
|
||||
$white=[[:Zs:][:Zl:][:Zp:]];
|
||||
$black=[^$white];
|
||||
$open=[:Ps:];
|
||||
### $dquote='\"';
|
||||
|
||||
### $lAng=〈;
|
||||
### $ldAng=《;
|
||||
### $lBrk='[';
|
||||
### $lBrc='{';
|
||||
|
||||
### $lquote=‘;
|
||||
### $rquote=’;
|
||||
### $ldquote=“;
|
||||
### $rdquote=”;
|
||||
|
||||
### $ldguill=«;
|
||||
### $rdguill=»;
|
||||
### $lguill=‹;
|
||||
### $rguill=›;
|
||||
|
||||
### $mdash=—;
|
||||
|
||||
########################################
|
||||
# Conversions from input
|
||||
########################################
|
||||
|
||||
# join single quotes
|
||||
‘''>“;
|
||||
‘‘>“;
|
||||
’''>”;
|
||||
’’>”;
|
||||
|
||||
#smart single quotes
|
||||
$white{''>‘;
|
||||
$open{''>‘;
|
||||
$black{''>’;
|
||||
''>‘;
|
||||
|
||||
#smart doubles
|
||||
$white{'\"'>“;
|
||||
$open{'\"'>“;
|
||||
$black{'\"'>”;
|
||||
'\"'>“;
|
||||
|
||||
# join single guillemets
|
||||
››>»;
|
||||
'>>'>»;
|
||||
‹‹>«;
|
||||
'<<'>«;
|
||||
|
||||
# prevent double spaces
|
||||
\\ {\\ >;
|
||||
|
||||
# join hyphens into dash ### BIDIRECTIONAL ###
|
||||
'--'<>—;
|
||||
|
||||
########################################
|
||||
# Conversions back to input
|
||||
########################################
|
||||
|
||||
#smart quotes
|
||||
''<‘;
|
||||
''<’;
|
||||
'\"'<“;
|
||||
'\"'<”;
|
||||
|
||||
# eof
|
72
icu4j/src/com/ibm/text/resources/Transliterator_Tamil_InterIndic.utf8.txt
Executable file
72
icu4j/src/com/ibm/text/resources/Transliterator_Tamil_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,72 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:07 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Tamil-InterIndic
|
||||
|
||||
ஂ>\uE002; # SIGN ANUSVARA
|
||||
ஃ>\uE003; # SIGN VISARGA
|
||||
அ>\uE005; # LETTER A
|
||||
ஆ>\uE006; # LETTER AA
|
||||
இ>\uE007; # LETTER I
|
||||
ஈ>\uE008; # LETTER II
|
||||
உ>\uE009; # LETTER U
|
||||
ஊ>\uE00A; # LETTER UU
|
||||
எ>\uE081; # LETTER E
|
||||
ஏ>\uE00F; # LETTER EE
|
||||
ஐ>\uE010; # LETTER AI
|
||||
ஒ>\uE082; # LETTER O
|
||||
ஓ>\uE013; # LETTER OO
|
||||
ஔ>\uE014; # LETTER AU
|
||||
க>\uE015; # LETTER KA
|
||||
ங>\uE019; # LETTER NGA
|
||||
ச>\uE01A; # LETTER CA
|
||||
ஜ>\uE01C; # LETTER JA
|
||||
ஞ>\uE01E; # LETTER NYA
|
||||
ட>\uE01F; # LETTER TTA
|
||||
ண>\uE023; # LETTER NNA
|
||||
த>\uE024; # LETTER TA
|
||||
ந>\uE028; # LETTER NA
|
||||
ன>\uE029; # LETTER NNNA
|
||||
ப>\uE02A; # LETTER PA
|
||||
ம>\uE02E; # LETTER MA
|
||||
ய>\uE02F; # LETTER YA
|
||||
ர>\uE030; # LETTER RA
|
||||
ற>\uE083; # LETTER RRA
|
||||
ல>\uE032; # LETTER LA
|
||||
ள>\uE033; # LETTER LLA
|
||||
ழ>\uE034; # LETTER LLLA
|
||||
வ>\uE035; # LETTER VA
|
||||
ஷ>\uE037; # LETTER SSA
|
||||
ஸ>\uE038; # LETTER SA
|
||||
ஹ>\uE039; # LETTER HA
|
||||
ா>\uE03E; # VOWEL SIGN AA
|
||||
ி>\uE03F; # VOWEL SIGN I
|
||||
ீ>\uE040; # VOWEL SIGN II
|
||||
ு>\uE041; # VOWEL SIGN U
|
||||
ூ>\uE042; # VOWEL SIGN UU
|
||||
ெ>\uE084; # VOWEL SIGN E
|
||||
ே>\uE047; # VOWEL SIGN EE
|
||||
ை>\uE048; # VOWEL SIGN AI
|
||||
ொ>\uE085; # VOWEL SIGN O
|
||||
ோ>\uE04B; # VOWEL SIGN OO
|
||||
ௌ>\uE04C; # VOWEL SIGN AU
|
||||
்>\uE04D; # SIGN VIRAMA
|
||||
ௗ>\uE057; # AU LENGTH MARK
|
||||
௧>\uE067; # DIGIT ONE
|
||||
௨>\uE068; # DIGIT TWO
|
||||
௩>\uE069; # DIGIT THREE
|
||||
௪>\uE06A; # DIGIT FOUR
|
||||
௫>\uE06B; # DIGIT FIVE
|
||||
௬>\uE06C; # DIGIT SIX
|
||||
௭>\uE06D; # DIGIT SEVEN
|
||||
௮>\uE06E; # DIGIT EIGHT
|
||||
௯>\uE06F; # DIGIT NINE
|
||||
# ௰>; // UNMAPPED Tamil-InterIndic: NUMBER TEN
|
||||
# ௱>; // UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
|
||||
# ௲>; // UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
|
||||
|
||||
# eof
|
91
icu4j/src/com/ibm/text/resources/Transliterator_Telugu_InterIndic.utf8.txt
Executable file
91
icu4j/src/com/ibm/text/resources/Transliterator_Telugu_InterIndic.utf8.txt
Executable file
|
@ -0,0 +1,91 @@
|
|||
#--------------------------------------------------------------------
|
||||
# Copyright (c) 1999-2001, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# Date: Tue Jan 23 12:42:07 2001
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Telugu-InterIndic
|
||||
|
||||
ఁ>\uE001; # SIGN CANDRABINDU
|
||||
ం>\uE002; # SIGN ANUSVARA
|
||||
ః>\uE003; # SIGN VISARGA
|
||||
అ>\uE005; # LETTER A
|
||||
ఆ>\uE006; # LETTER AA
|
||||
ఇ>\uE007; # LETTER I
|
||||
ఈ>\uE008; # LETTER II
|
||||
ఉ>\uE009; # LETTER U
|
||||
ఊ>\uE00A; # LETTER UU
|
||||
ఋ>\uE00B; # LETTER VOCALIC R
|
||||
ఌ>\uE00C; # LETTER VOCALIC L
|
||||
ఎ>\uE081; # LETTER E
|
||||
ఏ>\uE00F; # LETTER EE
|
||||
ఐ>\uE010; # LETTER AI
|
||||
ఒ>\uE082; # LETTER O
|
||||
ఓ>\uE013; # LETTER OO
|
||||
ఔ>\uE014; # LETTER AU
|
||||
క>\uE015; # LETTER KA
|
||||
ఖ>\uE016; # LETTER KHA
|
||||
గ>\uE017; # LETTER GA
|
||||
ఘ>\uE018; # LETTER GHA
|
||||
ఙ>\uE019; # LETTER NGA
|
||||
చ>\uE01A; # LETTER CA
|
||||
ఛ>\uE01B; # LETTER CHA
|
||||
జ>\uE01C; # LETTER JA
|
||||
ఝ>\uE01D; # LETTER JHA
|
||||
ఞ>\uE01E; # LETTER NYA
|
||||
ట>\uE01F; # LETTER TTA
|
||||
ఠ>\uE020; # LETTER TTHA
|
||||
డ>\uE021; # LETTER DDA
|
||||
ఢ>\uE022; # LETTER DDHA
|
||||
ణ>\uE023; # LETTER NNA
|
||||
త>\uE024; # LETTER TA
|
||||
థ>\uE025; # LETTER THA
|
||||
ద>\uE026; # LETTER DA
|
||||
ధ>\uE027; # LETTER DHA
|
||||
న>\uE028; # LETTER NA
|
||||
ప>\uE02A; # LETTER PA
|
||||
ఫ>\uE02B; # LETTER PHA
|
||||
బ>\uE02C; # LETTER BA
|
||||
భ>\uE02D; # LETTER BHA
|
||||
మ>\uE02E; # LETTER MA
|
||||
య>\uE02F; # LETTER YA
|
||||
ర>\uE030; # LETTER RA
|
||||
ఱ>\uE083; # LETTER RRA
|
||||
ల>\uE032; # LETTER LA
|
||||
ళ>\uE033; # LETTER LLA
|
||||
వ>\uE035; # LETTER VA
|
||||
శ>\uE036; # LETTER SHA
|
||||
ష>\uE037; # LETTER SSA
|
||||
స>\uE038; # LETTER SA
|
||||
హ>\uE039; # LETTER HA
|
||||
ా>\uE03E; # VOWEL SIGN AA
|
||||
ి>\uE03F; # VOWEL SIGN I
|
||||
ీ>\uE040; # VOWEL SIGN II
|
||||
ు>\uE041; # VOWEL SIGN U
|
||||
ూ>\uE042; # VOWEL SIGN UU
|
||||
ృ>\uE043; # VOWEL SIGN VOCALIC R
|
||||
ౄ>\uE044; # VOWEL SIGN VOCALIC RR
|
||||
ె>\uE084; # VOWEL SIGN E
|
||||
ే>\uE047; # VOWEL SIGN EE
|
||||
ై>\uE048; # VOWEL SIGN AI
|
||||
ొ>\uE085; # VOWEL SIGN O
|
||||
ో>\uE04B; # VOWEL SIGN OO
|
||||
ౌ>\uE04C; # VOWEL SIGN AU
|
||||
్>\uE04D; # SIGN VIRAMA
|
||||
ౕ>\uE055; # LENGTH MARK
|
||||
ౖ>\uE056; # AI LENGTH MARK
|
||||
ౠ>\uE060; # LETTER VOCALIC RR
|
||||
ౡ>\uE061; # LETTER VOCALIC LL
|
||||
౦>\uE066; # DIGIT ZERO
|
||||
౧>\uE067; # DIGIT ONE
|
||||
౨>\uE068; # DIGIT TWO
|
||||
౩>\uE069; # DIGIT THREE
|
||||
౪>\uE06A; # DIGIT FOUR
|
||||
౫>\uE06B; # DIGIT FIVE
|
||||
౬>\uE06C; # DIGIT SIX
|
||||
౭>\uE06D; # DIGIT SEVEN
|
||||
౮>\uE06E; # DIGIT EIGHT
|
||||
౯>\uE06F; # DIGIT NINE
|
||||
|
||||
# eof
|
1522
icu4j/src/com/ibm/text/resources/Transliterator_UnicodeName_UnicodeChar.utf8.txt
Executable file
1522
icu4j/src/com/ibm/text/resources/Transliterator_UnicodeName_UnicodeChar.utf8.txt
Executable file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue