Load RuleBasedTransliterator files from UTF8 files instead of ResourceBundles

X-SVN-Rev: 3550
This commit is contained in:
Alan Liu 2001-02-03 00:48:41 +00:00
parent 1ac04345f0
commit 189ab5836c
40 changed files with 40290 additions and 70 deletions

33
.gitattributes vendored
View file

@ -91,6 +91,39 @@ icu4j/src/com/ibm/icu/dev/data/unicode/Draft-TestSuite.txt -text
icu4j/src/com/ibm/icu/impl/data/thai_dict -text
icu4j/src/com/ibm/icu/text/unames.dat -text
icu4j/src/com/ibm/icu/text/uprops.dat -text
icu4j/src/com/ibm/text/resources/Transliterator_Bengali_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Devanagari_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Gujarati_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Gurmukhi_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Han_Pinyin.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Hiragana_Katakana.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Bengali.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Devanagari.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gujarati.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Gurmukhi.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Kannada.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Malayalam.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Oriya.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Tamil.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_InterIndic_Telugu.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_English.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Kanji_OnRomaji.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Kannada_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_KeyboardEscape_Latin1.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Arabic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Cyrillic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Devanagari.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Greek.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Hebrew.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Latin_Kana.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Malayalam_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Oriya_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_StraightQuotes_CurlyQuotes.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Tamil_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_Telugu_InterIndic.utf8.txt -text
icu4j/src/com/ibm/text/resources/Transliterator_UnicodeName_UnicodeChar.utf8.txt -text
icu4j/src/com/ibm/text/resources/thai_dict -text
icu4j/src/data/holidays_jp.ucs -text
icu4j/src/data/thai6.ucs -text

View file

@ -0,0 +1,88 @@
package com.ibm.text.resources;
import java.io.*;
/**
* A reader for text resource data in the current package. The
* resource data is loaded through the class loader, so it will
* typically be a file in the same directory as the *.class files, or
* a file within a JAR file in the corresponding subdirectory. The
* file must be a text file in one of the supported encoding; when the
* resource is opened by constructing a <code>ResourceReader</code>
* object the encoding is specified.
*
* <p>Although this class has a public API, it is designed for
* internal use by classes in the <code>com.ibm.text</code> package.
*
* @author Alan Liu
*/
public class ResourceReader {
private BufferedReader reader;
private String resourceName;
private String encoding;
private boolean isReset; // TRUE if we are at the start of the file
/**
* Construct a reader object for the text file of the given name
* in this package, in the given encoding.
* @param resourceName thqe name of the text file located in this
* package
* @param encoding the encoding of the text file; if unsupported
* an exception is thrown
* @exception UnsupportedEncodingException if
* <code>encoding</code> is not supported by the JDK.
*/
public ResourceReader(String resourceName, String encoding)
throws UnsupportedEncodingException {
this.resourceName = resourceName;
this.encoding = encoding;
isReset = false;
_reset();
}
/**
* Read and return the next line of the file or <code>null</code>
* if the end of the file has been reached.
*/
public String readLine() throws IOException {
isReset = false;
return reader.readLine();
}
/**
* Reset this reader so that the next call to
* <code>readLine()</code> returns the first line of the file
* again. This is a somewhat expensive call, however, calling
* <code>reset()</code> after calling it the first time does
* nothing if <code>readLine()</code> has not been called in
* between.
*/
public void reset() {
try {
_reset();
} catch (UnsupportedEncodingException e) {}
// We swallow this exception, if there is one. If the encoding is
// invalid, the constructor will have thrown this exception already and
// the caller shouldn't use the object afterwards.
}
/**
* Reset to the start by reconstructing the stream and readers.
* We could also use mark() and reset() on the stream or reader,
* but that would cause them to keep the stream data around in
* memory. We don't want that because some of the resource files
* are large, e.g., 400k.
*/
private void _reset() throws UnsupportedEncodingException {
if (isReset) {
return;
}
InputStream is = getClass().getResourceAsStream(resourceName);
if (is == null) {
throw new IllegalArgumentException("Can't open " + resourceName);
}
InputStreamReader isr = new InputStreamReader(is, encoding);
reader = new BufferedReader(isr);
isReset = true;
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
* $Date: 2000/08/31 17:11:42 $
* $Revision: 1.39 $
* $Date: 2001/02/03 00:46:21 $
* $Revision: 1.40 $
*
*****************************************************************************************
*/
@ -16,6 +16,7 @@ import java.util.Hashtable;
import java.util.Vector;
import java.text.ParsePosition;
import com.ibm.util.Utility;
import com.ibm.text.resources.ResourceReader;
/**
* <code>RuleBasedTransliterator</code> is a transliterator
@ -278,7 +279,7 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.39 $ $Date: 2000/08/31 17:11:42 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.40 $ $Date: 2001/02/03 00:46:21 $
*/
public class RuleBasedTransliterator extends Transliterator {
@ -329,6 +330,10 @@ public class RuleBasedTransliterator extends Transliterator {
return parse(new String[] { rules }, direction);
}
static Data parse(ResourceReader rules, int direction) {
return new Parser(rules, direction).getData();
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
@ -597,6 +602,86 @@ public class RuleBasedTransliterator extends Transliterator {
private static final char SEGMENT_OPEN = '(';
private static final char SEGMENT_CLOSE = ')';
/**
* A private abstract class representing the interface to rule
* source code that is broken up into lines. Handles the
* folding of lines terminated by a backslash. This folding
* is limited; it does not account for comments, quotes, or
* escapes, so its use to be limited.
*/
private abstract class RuleBody {
/**
* Retrieve the next line of the source, or return null if
* none. Folds lines terminated by a backslash into the
* next line, without regard for comments, quotes, or
* escapes.
*/
String nextLine() {
String s = handleNextLine();
if (s != null &&
s.length() > 0 &&
s.charAt(s.length() - 1) == '\\') {
StringBuffer b = new StringBuffer(s);
do {
b.deleteCharAt(b.length()-1);
s = handleNextLine();
if (s == null) {
break;
}
b.append(s);
} while (s.length() > 0 &&
s.charAt(s.length() - 1) == '\\');
s = b.toString();
}
return s;
}
/**
* Reset to the first line of the source.
*/
abstract void reset();
/**
* Subclass method to return the next line of the source.
*/
abstract String handleNextLine();
};
/**
* RuleBody subclass for a String[] array.
*/
private class RuleArray extends RuleBody {
String[] array;
int i;
public RuleArray(String[] array) { this.array = array; i = 0; }
public String handleNextLine() {
return (i < array.length) ? array[i++] : null;
}
public void reset() {
i = 0;
}
};
/**
* RuleBody subclass for a ResourceReader.
*/
private class RuleReader extends RuleBody {
ResourceReader reader;
public RuleReader(ResourceReader reader) { this.reader = reader; }
public String handleNextLine() {
try {
return reader.readLine();
} catch (java.io.IOException e) {}
return null;
}
public void reset() {
reader.reset();
}
};
/**
* @param rules list of rules, separated by semicolon characters
* @exception IllegalArgumentException if there is a syntax error in the
@ -605,7 +690,16 @@ public class RuleBasedTransliterator extends Transliterator {
public Parser(String[] ruleArray, int direction) {
this.direction = direction;
data = new Data();
parseRules(ruleArray);
parseRules(new RuleArray(ruleArray));
}
/**
* @param rules resource reader for the rules
*/
public Parser(ResourceReader rules, int direction) {
this.direction = direction;
data = new Data();
parseRules(new RuleReader(rules));
}
public Data getData() {
@ -622,7 +716,7 @@ public class RuleBasedTransliterator extends Transliterator {
* @exception IllegalArgumentException if there is a syntax error in the
* rules
*/
private void parseRules(String[] ruleArray) {
private void parseRules(RuleBody ruleArray) {
determineVariableRange(ruleArray);
setVariablesVector = new Vector();
parseData = new ParseData();
@ -630,9 +724,13 @@ public class RuleBasedTransliterator extends Transliterator {
StringBuffer errors = null;
int errorCount = 0;
ruleArray.reset();
main:
for (int i=0; i<ruleArray.length; ++i) {
String rule = ruleArray[i];
for (;;) {
String rule = ruleArray.nextLine();
if (rule == null) {
break;
}
int pos = 0;
int limit = rule.length();
while (pos < limit) {
@ -1192,7 +1290,7 @@ public class RuleBasedTransliterator extends Transliterator {
* When done, everything not in the hash is available for use. In practice,
* this method may employ some other algorithm for improved speed.
*/
private final void determineVariableRange(String[] ruleArray) {
private final void determineVariableRange(RuleBody ruleArray) {
// As an initial implementation, we just run through all the
// characters, ignoring any quoting. This works since the quote
// mechanisms are outside the private use area.
@ -1309,12 +1407,16 @@ public class RuleBasedTransliterator extends Transliterator {
* characters in this range, then this range itself is
* returned.
*/
Range largestUnusedSubrange(String[] strings) {
Range largestUnusedSubrange(RuleBody strings) {
Vector v = new Vector(1);
v.addElement(clone());
for (int k=0; k<strings.length; ++k) {
String str = strings[k];
strings.reset();
for (;;) {
String str = strings.nextLine();
if (str == null) {
break;
}
int n = str.length();
for (int i=0; i<n; ++i) {
char c = str.charAt(i);
@ -1349,6 +1451,9 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.40 2001/02/03 00:46:21 alan4j
* Load RuleBasedTransliterator files from UTF8 files instead of ResourceBundles
*
* Revision 1.39 2000/08/31 17:11:42 alan4j
* Implement anchors.
*

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
* $Date: 2000/10/06 23:07:40 $
* $Revision: 1.22 $
* $Date: 2001/02/03 00:46:21 $
* $Revision: 1.23 $
*
*****************************************************************************************
*/
@ -14,6 +14,8 @@ package com.ibm.text;
import java.util.*;
import java.text.MessageFormat;
import java.io.UnsupportedEncodingException;
import com.ibm.text.resources.ResourceReader;
/**
* <code>Transliterator</code> is an abstract class that
@ -210,7 +212,7 @@ import java.text.MessageFormat;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.22 $ $Date: 2000/10/06 23:07:40 $
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.23 $ $Date: 2001/02/03 00:46:21 $
*/
public abstract class Transliterator {
/**
@ -418,6 +420,16 @@ public abstract class Transliterator {
*/
private static final String RB_RULE = "Rule";
/**
* Prefix string to identify UTF8 RuleBasedTransliterator resource.
*/
private static final String RBT_UTF8_PREFIX = "Transliterator_";
/**
* Suffix string to identify UTF8 RuleBasedTransliterator resource.
*/
private static final String RBT_UTF8_SUFFIX = ".utf8.txt";
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
@ -873,37 +885,60 @@ public abstract class Transliterator {
} else {
synchronized (cache) {
boolean isReverse = (obj == REVERSE_RULE_BASED_PLACEHOLDER);
String resourceName = RB_RULE_BASED_PREFIX;
String resourceName = ID;
int i = ID.indexOf('-');
if (i < 0) {
resourceName += ID;
} else {
if (i > 0) {
String IDLeft = ID.substring(0, i);
String IDRight = ID.substring(i+1);
resourceName += isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
resourceName = isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
}
ResourceReader r = null;
try {
ResourceBundle resource = ResourceBundle.getBundle(resourceName);
// We allow the resource bundle to contain either an array
// of rules, or a single rule string.
String[] ruleArray;
try {
ruleArray = resource.getStringArray(RB_RULE);
} catch (Exception e) {
// This is a ClassCastException under JDK 1.1.8
ruleArray = new String[] { resource.getString(RB_RULE) };
}
data = RuleBasedTransliterator.parse(ruleArray,
r = new ResourceReader(RBT_UTF8_PREFIX + resourceName + RBT_UTF8_SUFFIX,
"UTF8");
} catch (UnsupportedEncodingException e) {
// This should never happen; UTF8 is always supported
} catch (IllegalArgumentException e2) {
// Can't load UTF8 file
}
if (r != null) {
data = RuleBasedTransliterator.parse(r,
isReverse
? RuleBasedTransliterator.REVERSE
: RuleBasedTransliterator.FORWARD);
cache.put(ID, data);
// Fall through to construct transliterator from Data object.
} catch (MissingResourceException e) {}
} else {
// Unable to load the UTF8 file; try the resource
// bundles. Eventually, when we phase support for this
// out, we can delete this clause. Leave it in for now.
try {
ResourceBundle resource = ResourceBundle.getBundle(RB_RULE_BASED_PREFIX +
resourceName);
// We allow the resource bundle to contain either an array
// of rules, or a single rule string.
String[] ruleArray;
try {
ruleArray = resource.getStringArray(RB_RULE);
} catch (Exception e) {
// This is a ClassCastException under JDK 1.1.8
ruleArray = new String[] { resource.getString(RB_RULE) };
}
data = RuleBasedTransliterator.parse(ruleArray,
isReverse
? RuleBasedTransliterator.REVERSE
: RuleBasedTransliterator.FORWARD);
cache.put(ID, data);
// Fall through to construct transliterator from Data object.
} catch (MissingResourceException e) {}
}
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
* $Date: 2000/08/31 17:11:42 $
* $Revision: 1.39 $
* $Date: 2001/02/03 00:46:21 $
* $Revision: 1.40 $
*
*****************************************************************************************
*/
@ -16,6 +16,7 @@ import java.util.Hashtable;
import java.util.Vector;
import java.text.ParsePosition;
import com.ibm.util.Utility;
import com.ibm.text.resources.ResourceReader;
/**
* <code>RuleBasedTransliterator</code> is a transliterator
@ -278,7 +279,7 @@ import com.ibm.util.Utility;
* <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
*
* @author Alan Liu
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.39 $ $Date: 2000/08/31 17:11:42 $
* @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.40 $ $Date: 2001/02/03 00:46:21 $
*/
public class RuleBasedTransliterator extends Transliterator {
@ -329,6 +330,10 @@ public class RuleBasedTransliterator extends Transliterator {
return parse(new String[] { rules }, direction);
}
static Data parse(ResourceReader rules, int direction) {
return new Parser(rules, direction).getData();
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
@ -597,6 +602,86 @@ public class RuleBasedTransliterator extends Transliterator {
private static final char SEGMENT_OPEN = '(';
private static final char SEGMENT_CLOSE = ')';
/**
* A private abstract class representing the interface to rule
* source code that is broken up into lines. Handles the
* folding of lines terminated by a backslash. This folding
* is limited; it does not account for comments, quotes, or
* escapes, so its use to be limited.
*/
private abstract class RuleBody {
/**
* Retrieve the next line of the source, or return null if
* none. Folds lines terminated by a backslash into the
* next line, without regard for comments, quotes, or
* escapes.
*/
String nextLine() {
String s = handleNextLine();
if (s != null &&
s.length() > 0 &&
s.charAt(s.length() - 1) == '\\') {
StringBuffer b = new StringBuffer(s);
do {
b.deleteCharAt(b.length()-1);
s = handleNextLine();
if (s == null) {
break;
}
b.append(s);
} while (s.length() > 0 &&
s.charAt(s.length() - 1) == '\\');
s = b.toString();
}
return s;
}
/**
* Reset to the first line of the source.
*/
abstract void reset();
/**
* Subclass method to return the next line of the source.
*/
abstract String handleNextLine();
};
/**
* RuleBody subclass for a String[] array.
*/
private class RuleArray extends RuleBody {
String[] array;
int i;
public RuleArray(String[] array) { this.array = array; i = 0; }
public String handleNextLine() {
return (i < array.length) ? array[i++] : null;
}
public void reset() {
i = 0;
}
};
/**
* RuleBody subclass for a ResourceReader.
*/
private class RuleReader extends RuleBody {
ResourceReader reader;
public RuleReader(ResourceReader reader) { this.reader = reader; }
public String handleNextLine() {
try {
return reader.readLine();
} catch (java.io.IOException e) {}
return null;
}
public void reset() {
reader.reset();
}
};
/**
* @param rules list of rules, separated by semicolon characters
* @exception IllegalArgumentException if there is a syntax error in the
@ -605,7 +690,16 @@ public class RuleBasedTransliterator extends Transliterator {
public Parser(String[] ruleArray, int direction) {
this.direction = direction;
data = new Data();
parseRules(ruleArray);
parseRules(new RuleArray(ruleArray));
}
/**
* @param rules resource reader for the rules
*/
public Parser(ResourceReader rules, int direction) {
this.direction = direction;
data = new Data();
parseRules(new RuleReader(rules));
}
public Data getData() {
@ -622,7 +716,7 @@ public class RuleBasedTransliterator extends Transliterator {
* @exception IllegalArgumentException if there is a syntax error in the
* rules
*/
private void parseRules(String[] ruleArray) {
private void parseRules(RuleBody ruleArray) {
determineVariableRange(ruleArray);
setVariablesVector = new Vector();
parseData = new ParseData();
@ -630,9 +724,13 @@ public class RuleBasedTransliterator extends Transliterator {
StringBuffer errors = null;
int errorCount = 0;
ruleArray.reset();
main:
for (int i=0; i<ruleArray.length; ++i) {
String rule = ruleArray[i];
for (;;) {
String rule = ruleArray.nextLine();
if (rule == null) {
break;
}
int pos = 0;
int limit = rule.length();
while (pos < limit) {
@ -1192,7 +1290,7 @@ public class RuleBasedTransliterator extends Transliterator {
* When done, everything not in the hash is available for use. In practice,
* this method may employ some other algorithm for improved speed.
*/
private final void determineVariableRange(String[] ruleArray) {
private final void determineVariableRange(RuleBody ruleArray) {
// As an initial implementation, we just run through all the
// characters, ignoring any quoting. This works since the quote
// mechanisms are outside the private use area.
@ -1309,12 +1407,16 @@ public class RuleBasedTransliterator extends Transliterator {
* characters in this range, then this range itself is
* returned.
*/
Range largestUnusedSubrange(String[] strings) {
Range largestUnusedSubrange(RuleBody strings) {
Vector v = new Vector(1);
v.addElement(clone());
for (int k=0; k<strings.length; ++k) {
String str = strings[k];
strings.reset();
for (;;) {
String str = strings.nextLine();
if (str == null) {
break;
}
int n = str.length();
for (int i=0; i<n; ++i) {
char c = str.charAt(i);
@ -1349,6 +1451,9 @@ public class RuleBasedTransliterator extends Transliterator {
/**
* $Log: RuleBasedTransliterator.java,v $
* Revision 1.40 2001/02/03 00:46:21 alan4j
* Load RuleBasedTransliterator files from UTF8 files instead of ResourceBundles
*
* Revision 1.39 2000/08/31 17:11:42 alan4j
* Implement anchors.
*

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
* $Date: 2000/10/06 23:07:40 $
* $Revision: 1.22 $
* $Date: 2001/02/03 00:46:21 $
* $Revision: 1.23 $
*
*****************************************************************************************
*/
@ -14,6 +14,8 @@ package com.ibm.text;
import java.util.*;
import java.text.MessageFormat;
import java.io.UnsupportedEncodingException;
import com.ibm.text.resources.ResourceReader;
/**
* <code>Transliterator</code> is an abstract class that
@ -210,7 +212,7 @@ import java.text.MessageFormat;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.22 $ $Date: 2000/10/06 23:07:40 $
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.23 $ $Date: 2001/02/03 00:46:21 $
*/
public abstract class Transliterator {
/**
@ -418,6 +420,16 @@ public abstract class Transliterator {
*/
private static final String RB_RULE = "Rule";
/**
* Prefix string to identify UTF8 RuleBasedTransliterator resource.
*/
private static final String RBT_UTF8_PREFIX = "Transliterator_";
/**
* Suffix string to identify UTF8 RuleBasedTransliterator resource.
*/
private static final String RBT_UTF8_SUFFIX = ".utf8.txt";
private static final String COPYRIGHT =
"\u00A9 IBM Corporation 1999. All rights reserved.";
@ -873,37 +885,60 @@ public abstract class Transliterator {
} else {
synchronized (cache) {
boolean isReverse = (obj == REVERSE_RULE_BASED_PLACEHOLDER);
String resourceName = RB_RULE_BASED_PREFIX;
String resourceName = ID;
int i = ID.indexOf('-');
if (i < 0) {
resourceName += ID;
} else {
if (i > 0) {
String IDLeft = ID.substring(0, i);
String IDRight = ID.substring(i+1);
resourceName += isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
resourceName = isReverse ? (IDRight + RB_RULE_BASED_SEPARATOR + IDLeft)
: (IDLeft + RB_RULE_BASED_SEPARATOR + IDRight);
}
ResourceReader r = null;
try {
ResourceBundle resource = ResourceBundle.getBundle(resourceName);
// We allow the resource bundle to contain either an array
// of rules, or a single rule string.
String[] ruleArray;
try {
ruleArray = resource.getStringArray(RB_RULE);
} catch (Exception e) {
// This is a ClassCastException under JDK 1.1.8
ruleArray = new String[] { resource.getString(RB_RULE) };
}
data = RuleBasedTransliterator.parse(ruleArray,
r = new ResourceReader(RBT_UTF8_PREFIX + resourceName + RBT_UTF8_SUFFIX,
"UTF8");
} catch (UnsupportedEncodingException e) {
// This should never happen; UTF8 is always supported
} catch (IllegalArgumentException e2) {
// Can't load UTF8 file
}
if (r != null) {
data = RuleBasedTransliterator.parse(r,
isReverse
? RuleBasedTransliterator.REVERSE
: RuleBasedTransliterator.FORWARD);
cache.put(ID, data);
// Fall through to construct transliterator from Data object.
} catch (MissingResourceException e) {}
} else {
// Unable to load the UTF8 file; try the resource
// bundles. Eventually, when we phase support for this
// out, we can delete this clause. Leave it in for now.
try {
ResourceBundle resource = ResourceBundle.getBundle(RB_RULE_BASED_PREFIX +
resourceName);
// We allow the resource bundle to contain either an array
// of rules, or a single rule string.
String[] ruleArray;
try {
ruleArray = resource.getStringArray(RB_RULE);
} catch (Exception e) {
// This is a ClassCastException under JDK 1.1.8
ruleArray = new String[] { resource.getString(RB_RULE) };
}
data = RuleBasedTransliterator.parse(ruleArray,
isReverse
? RuleBasedTransliterator.REVERSE
: RuleBasedTransliterator.FORWARD);
cache.put(ID, data);
// Fall through to construct transliterator from Data object.
} catch (MissingResourceException e) {}
}
}
}

View file

@ -0,0 +1,88 @@
package com.ibm.text.resources;
import java.io.*;
/**
* A reader for text resource data in the current package. The
* resource data is loaded through the class loader, so it will
* typically be a file in the same directory as the *.class files, or
* a file within a JAR file in the corresponding subdirectory. The
* file must be a text file in one of the supported encoding; when the
* resource is opened by constructing a <code>ResourceReader</code>
* object the encoding is specified.
*
* <p>Although this class has a public API, it is designed for
* internal use by classes in the <code>com.ibm.text</code> package.
*
* @author Alan Liu
*/
public class ResourceReader {
private BufferedReader reader;
private String resourceName;
private String encoding;
private boolean isReset; // TRUE if we are at the start of the file
/**
* Construct a reader object for the text file of the given name
* in this package, in the given encoding.
* @param resourceName thqe name of the text file located in this
* package
* @param encoding the encoding of the text file; if unsupported
* an exception is thrown
* @exception UnsupportedEncodingException if
* <code>encoding</code> is not supported by the JDK.
*/
public ResourceReader(String resourceName, String encoding)
throws UnsupportedEncodingException {
this.resourceName = resourceName;
this.encoding = encoding;
isReset = false;
_reset();
}
/**
* Read and return the next line of the file or <code>null</code>
* if the end of the file has been reached.
*/
public String readLine() throws IOException {
isReset = false;
return reader.readLine();
}
/**
* Reset this reader so that the next call to
* <code>readLine()</code> returns the first line of the file
* again. This is a somewhat expensive call, however, calling
* <code>reset()</code> after calling it the first time does
* nothing if <code>readLine()</code> has not been called in
* between.
*/
public void reset() {
try {
_reset();
} catch (UnsupportedEncodingException e) {}
// We swallow this exception, if there is one. If the encoding is
// invalid, the constructor will have thrown this exception already and
// the caller shouldn't use the object afterwards.
}
/**
* Reset to the start by reconstructing the stream and readers.
* We could also use mark() and reset() on the stream or reader,
* but that would cause them to keep the stream data around in
* memory. We don't want that because some of the resource files
* are large, e.g., 400k.
*/
private void _reset() throws UnsupportedEncodingException {
if (isReset) {
return;
}
InputStream is = getClass().getResourceAsStream(resourceName);
if (is == null) {
throw new IllegalArgumentException("Can't open " + resourceName);
}
InputStreamReader isr = new InputStreamReader(is, encoding);
reader = new BufferedReader(isr);
isReset = true;
}
}

View file

@ -0,0 +1,100 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:57 2001
#--------------------------------------------------------------------
# Bengali-InterIndic
ঁ>\uE001; # SIGN CANDRABINDU
ং>\uE002; # SIGN ANUSVARA
ঃ>\uE003; # SIGN VISARGA
অ>\uE005; # LETTER A
আ>\uE006; # LETTER AA
ই>\uE007; # LETTER I
ঈ>\uE008; # LETTER II
উ>\uE009; # LETTER U
ঊ>\uE00A; # LETTER UU
ঋ>\uE00B; # LETTER VOCALIC R
ঌ>\uE00C; # LETTER VOCALIC L
এ>\uE081; # LETTER E
ঐ>\uE010; # LETTER AI
ও>\uE082; # LETTER O
ঔ>\uE014; # LETTER AU
ক>\uE015; # LETTER KA
খ>\uE016; # LETTER KHA
গ>\uE017; # LETTER GA
ঘ>\uE018; # LETTER GHA
ঙ>\uE019; # LETTER NGA
চ>\uE01A; # LETTER CA
ছ>\uE01B; # LETTER CHA
জ>\uE01C; # LETTER JA
ঝ>\uE01D; # LETTER JHA
ঞ>\uE01E; # LETTER NYA
ট>\uE01F; # LETTER TTA
ঠ>\uE020; # LETTER TTHA
ড>\uE021; # LETTER DDA
ঢ>\uE022; # LETTER DDHA
ণ>\uE023; # LETTER NNA
ত>\uE024; # LETTER TA
থ>\uE025; # LETTER THA
দ>\uE026; # LETTER DA
ধ>\uE027; # LETTER DHA
ন>\uE028; # LETTER NA
প>\uE02A; # LETTER PA
ফ>\uE02B; # LETTER PHA
ব>\uE02C; # LETTER BA
ভ>\uE02D; # LETTER BHA
ম>\uE02E; # LETTER MA
য>\uE02F; # LETTER YA
র>\uE030; # LETTER RA
ল>\uE032; # LETTER LA
শ>\uE036; # LETTER SHA
ষ>\uE037; # LETTER SSA
স>\uE038; # LETTER SA
হ>\uE039; # LETTER HA
়>\uE03C; # SIGN NUKTA
া>\uE03E; # VOWEL SIGN AA
ি>\uE03F; # VOWEL SIGN I
ী>\uE040; # VOWEL SIGN II
ু>\uE041; # VOWEL SIGN U
ূ>\uE042; # VOWEL SIGN UU
ৃ>\uE043; # VOWEL SIGN VOCALIC R
ৄ>\uE044; # VOWEL SIGN VOCALIC RR
ে>\uE084; # VOWEL SIGN E
ৈ>\uE048; # VOWEL SIGN AI
ো>\uE085; # VOWEL SIGN O
ৌ>\uE04C; # VOWEL SIGN AU
্>\uE04D; # SIGN VIRAMA
ৗ>\uE057; # AU LENGTH MARK
ড়>\uE083; # LETTER RRA
ঢ়>\uE05D; # LETTER RHA
য়>\uE05F; # LETTER YYA
ৠ>\uE060; # LETTER VOCALIC RR
ৡ>\uE061; # LETTER VOCALIC LL
ৢ>\uE062; # VOWEL SIGN VOCALIC L
ৣ>\uE063; # VOWEL SIGN VOCALIC LL
>\uE066; # DIGIT ZERO
১>\uE067; # DIGIT ONE
২>\uE068; # DIGIT TWO
৩>\uE069; # DIGIT THREE
>\uE06A; # DIGIT FOUR
৫>\uE06B; # DIGIT FIVE
৬>\uE06C; # DIGIT SIX
>\uE06D; # DIGIT SEVEN
৮>\uE06E; # DIGIT EIGHT
৯>\uE06F; # DIGIT NINE
# ৰ>; // UNMAPPED Bengali-InterIndic: LETTER RA WITH MIDDLE DIAGONAL
# ৱ>; // UNMAPPED Bengali-InterIndic: LETTER RA WITH LOWER DIAGONAL
# ৲>; // UNMAPPED Bengali-InterIndic: RUPEE MARK
# ৳>; // UNMAPPED Bengali-InterIndic: RUPEE SIGN
# ৴>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE
# ৵>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR TWO
# ৶>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR THREE
# ৷>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR FOUR
# ৸>; // UNMAPPED Bengali-InterIndic: CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
# ৹>; // UNMAPPED Bengali-InterIndic: CURRENCY DENOMINATOR SIXTEEN
৺>\uE080; # ISSHAR
# eof

View file

@ -0,0 +1,115 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:57 2001
#--------------------------------------------------------------------
# Devanagari-InterIndic
ँ>\uE001; # SIGN CANDRABINDU
ं>\uE002; # SIGN ANUSVARA
>\uE003; # SIGN VISARGA
अ>\uE005; # LETTER A
आ>\uE006; # LETTER AA
इ>\uE007; # LETTER I
ई>\uE008; # LETTER II
उ>\uE009; # LETTER U
ऊ>\uE00A; # LETTER UU
ऋ>\uE00B; # LETTER VOCALIC R
ऌ>\uE00C; # LETTER VOCALIC L
# ऍ>; // UNMAPPED Devanagari-InterIndic: LETTER CANDRA E
# ऎ>; // UNMAPPED Devanagari-InterIndic: LETTER SHORT E
ए>\uE081; # LETTER E
ऐ>\uE010; # LETTER AI
# ऑ>; // UNMAPPED Devanagari-InterIndic: LETTER CANDRA O
# ऒ>; // UNMAPPED Devanagari-InterIndic: LETTER SHORT O
ओ>\uE082; # LETTER O
औ>\uE014; # LETTER AU
क>\uE015; # LETTER KA
ख>\uE016; # LETTER KHA
ग>\uE017; # LETTER GA
घ>\uE018; # LETTER GHA
ङ>\uE019; # LETTER NGA
च>\uE01A; # LETTER CA
छ>\uE01B; # LETTER CHA
ज>\uE01C; # LETTER JA
झ>\uE01D; # LETTER JHA
ञ>\uE01E; # LETTER NYA
ट>\uE01F; # LETTER TTA
ठ>\uE020; # LETTER TTHA
ड>\uE021; # LETTER DDA
ढ>\uE022; # LETTER DDHA
ण>\uE023; # LETTER NNA
त>\uE024; # LETTER TA
थ>\uE025; # LETTER THA
द>\uE026; # LETTER DA
ध>\uE027; # LETTER DHA
न>\uE028; # LETTER NA
ऩ>\uE029; # LETTER NNNA
प>\uE02A; # LETTER PA
फ>\uE02B; # LETTER PHA
ब>\uE02C; # LETTER BA
भ>\uE02D; # LETTER BHA
म>\uE02E; # LETTER MA
य>\uE02F; # LETTER YA
र>\uE030; # LETTER RA
ऱ>\uE083; # LETTER RRA
ल>\uE032; # LETTER LA
ळ>\uE033; # LETTER LLA
ऴ>\uE034; # LETTER LLLA
व>\uE035; # LETTER VA
श>\uE036; # LETTER SHA
ष>\uE037; # LETTER SSA
स>\uE038; # LETTER SA
ह>\uE039; # LETTER HA
़>\uE03C; # SIGN NUKTA
ऽ>\uE03D; # SIGN AVAGRAHA
ा>\uE03E; # VOWEL SIGN AA
ि>\uE03F; # VOWEL SIGN I
ी>\uE040; # VOWEL SIGN II
ु>\uE041; # VOWEL SIGN U
ू>\uE042; # VOWEL SIGN UU
ृ>\uE043; # VOWEL SIGN VOCALIC R
ॄ>\uE044; # VOWEL SIGN VOCALIC RR
ॅ>\uE045; # VOWEL SIGN CANDRA E
# ॆ>; // UNMAPPED Devanagari-InterIndic: VOWEL SIGN SHORT E
े>\uE084; # VOWEL SIGN E
ै>\uE048; # VOWEL SIGN AI
ॉ>\uE049; # VOWEL SIGN CANDRA O
# ॊ>; // UNMAPPED Devanagari-InterIndic: VOWEL SIGN SHORT O
ो>\uE085; # VOWEL SIGN O
ौ>\uE04C; # VOWEL SIGN AU
्>\uE04D; # SIGN VIRAMA
ॐ>\uE050; # OM
# ॑>; // UNMAPPED Devanagari-InterIndic: STRESS SIGN UDATTA
# ॒>; // UNMAPPED Devanagari-InterIndic: STRESS SIGN ANUDATTA
# ॓>; // UNMAPPED Devanagari-InterIndic: GRAVE ACCENT
# ॔>; // UNMAPPED Devanagari-InterIndic: ACUTE ACCENT
# क़>; // UNMAPPED Devanagari-InterIndic: LETTER QA
ख़>\uE059; # LETTER KHHA
ग़>\uE05A; # LETTER GHHA
ज़>\uE05B; # LETTER ZA
# ड़>; // UNMAPPED Devanagari-InterIndic: LETTER DDDHA
ढ़>\uE05D; # LETTER RHA
फ़>\uE05E; # LETTER FA
य़>\uE05F; # LETTER YYA
ॠ>\uE060; # LETTER VOCALIC RR
ॡ>\uE061; # LETTER VOCALIC LL
ॢ>\uE062; # VOWEL SIGN VOCALIC L
ॣ>\uE063; # VOWEL SIGN VOCALIC LL
# ।>; // UNMAPPED Devanagari-InterIndic: DANDA
# ॥>; // UNMAPPED Devanagari-InterIndic: DOUBLE DANDA
>\uE066; # DIGIT ZERO
१>\uE067; # DIGIT ONE
२>\uE068; # DIGIT TWO
३>\uE069; # DIGIT THREE
४>\uE06A; # DIGIT FOUR
५>\uE06B; # DIGIT FIVE
६>\uE06C; # DIGIT SIX
७>\uE06D; # DIGIT SEVEN
८>\uE06E; # DIGIT EIGHT
९>\uE06F; # DIGIT NINE
# ॰>; // UNMAPPED Devanagari-InterIndic: ABBREVIATION SIGN
# eof

View file

@ -0,0 +1,269 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:57 2001
#--------------------------------------------------------------------
# Fullwidth-Halfwidth
# Mechanically generated from Unicode Character Database
# multicharacter
ガ<>ガ; # to KATAKANA LETTER GA
ギ<>ギ; # to KATAKANA LETTER GI
グ<>グ; # to KATAKANA LETTER GU
ゲ<>ゲ; # to KATAKANA LETTER GE
ゴ<>ゴ; # to KATAKANA LETTER GO
ザ<>ザ; # to KATAKANA LETTER ZA
ジ<>ジ; # to KATAKANA LETTER ZI
ズ<>ズ; # to KATAKANA LETTER ZU
ゼ<>ゼ; # to KATAKANA LETTER ZE
ゾ<>ゾ; # to KATAKANA LETTER ZO
ダ<>ダ; # to KATAKANA LETTER DA
ヂ<>ヂ; # to KATAKANA LETTER DI
ヅ<>ヅ; # to KATAKANA LETTER DU
デ<>デ; # to KATAKANA LETTER DE
ド<>ド; # to KATAKANA LETTER DO
バ<>バ; # to KATAKANA LETTER BA
パ<>パ; # to KATAKANA LETTER PA
ビ<>ビ; # to KATAKANA LETTER BI
ピ<>ピ; # to KATAKANA LETTER PI
ブ<>ブ; # to KATAKANA LETTER BU
プ<>プ; # to KATAKANA LETTER PU
ベ<>ベ; # to KATAKANA LETTER BE
ペ<>ペ; # to KATAKANA LETTER PE
ボ<>ボ; # to KATAKANA LETTER BO
ポ<>ポ; # to KATAKANA LETTER PO
ヴ<>ヴ; # to KATAKANA LETTER VU
ヷ<>ヷ; # to KATAKANA LETTER VA
ヺ<>ヺ; # to KATAKANA LETTER VO
# single character
<>'!'; # from FULLWIDTH EXCLAMATION MARK
<>'\"'; # from FULLWIDTH QUOTATION MARK
<>'#'; # from FULLWIDTH NUMBER SIGN
<>'$'; # from FULLWIDTH DOLLAR SIGN
<>'%'; # from FULLWIDTH PERCENT SIGN
<>'&'; # from FULLWIDTH AMPERSAND
<>''; # from FULLWIDTH APOSTROPHE
<>'('; # from FULLWIDTH LEFT PARENTHESIS
<>')'; # from FULLWIDTH RIGHT PARENTHESIS
<>'*'; # from FULLWIDTH ASTERISK
<>'+'; # from FULLWIDTH PLUS SIGN
<>','; # from FULLWIDTH COMMA
<>'-'; # from FULLWIDTH HYPHEN-MINUS
<>'.'; # from FULLWIDTH FULL STOP
<>'/'; # from FULLWIDTH SOLIDUS
<>'0'; # from FULLWIDTH DIGIT ZERO
<>'1'; # from FULLWIDTH DIGIT ONE
<>'2'; # from FULLWIDTH DIGIT TWO
<>'3'; # from FULLWIDTH DIGIT THREE
<>'4'; # from FULLWIDTH DIGIT FOUR
<>'5'; # from FULLWIDTH DIGIT FIVE
<>'6'; # from FULLWIDTH DIGIT SIX
<>'7'; # from FULLWIDTH DIGIT SEVEN
<>'8'; # from FULLWIDTH DIGIT EIGHT
<>'9'; # from FULLWIDTH DIGIT NINE
<>':'; # from FULLWIDTH COLON
<>';'; # from FULLWIDTH SEMICOLON
<>'<'; # from FULLWIDTH LESS-THAN SIGN
<>'='; # from FULLWIDTH EQUALS SIGN
<>'>'; # from FULLWIDTH GREATER-THAN SIGN
<>'?'; # from FULLWIDTH QUESTION MARK
<>'@'; # from FULLWIDTH COMMERCIAL AT
<>A; # from FULLWIDTH LATIN CAPITAL LETTER A
<>B; # from FULLWIDTH LATIN CAPITAL LETTER B
<>C; # from FULLWIDTH LATIN CAPITAL LETTER C
<>D; # from FULLWIDTH LATIN CAPITAL LETTER D
<>E; # from FULLWIDTH LATIN CAPITAL LETTER E
<>F; # from FULLWIDTH LATIN CAPITAL LETTER F
<>G; # from FULLWIDTH LATIN CAPITAL LETTER G
<>H; # from FULLWIDTH LATIN CAPITAL LETTER H
<>I; # from FULLWIDTH LATIN CAPITAL LETTER I
<>J; # from FULLWIDTH LATIN CAPITAL LETTER J
<>K; # from FULLWIDTH LATIN CAPITAL LETTER K
<>L; # from FULLWIDTH LATIN CAPITAL LETTER L
<>M; # from FULLWIDTH LATIN CAPITAL LETTER M
<>N; # from FULLWIDTH LATIN CAPITAL LETTER N
<>O; # from FULLWIDTH LATIN CAPITAL LETTER O
<>P; # from FULLWIDTH LATIN CAPITAL LETTER P
<>Q; # from FULLWIDTH LATIN CAPITAL LETTER Q
<>R; # from FULLWIDTH LATIN CAPITAL LETTER R
<>S; # from FULLWIDTH LATIN CAPITAL LETTER S
<>T; # from FULLWIDTH LATIN CAPITAL LETTER T
<>U; # from FULLWIDTH LATIN CAPITAL LETTER U
<>V; # from FULLWIDTH LATIN CAPITAL LETTER V
<>W; # from FULLWIDTH LATIN CAPITAL LETTER W
<>X; # from FULLWIDTH LATIN CAPITAL LETTER X
<>Y; # from FULLWIDTH LATIN CAPITAL LETTER Y
<>Z; # from FULLWIDTH LATIN CAPITAL LETTER Z
<>'['; # from FULLWIDTH LEFT SQUARE BRACKET
<>'\\'; # from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
<>']'; # from FULLWIDTH RIGHT SQUARE BRACKET
<>'^'; # from FULLWIDTH CIRCUMFLEX ACCENT
_<>'_'; # from FULLWIDTH LOW LINE
<>'`'; # from FULLWIDTH GRAVE ACCENT
<>a; # from FULLWIDTH LATIN SMALL LETTER A
<>b; # from FULLWIDTH LATIN SMALL LETTER B
<>c; # from FULLWIDTH LATIN SMALL LETTER C
<>d; # from FULLWIDTH LATIN SMALL LETTER D
<>e; # from FULLWIDTH LATIN SMALL LETTER E
<>f; # from FULLWIDTH LATIN SMALL LETTER F
<>g; # from FULLWIDTH LATIN SMALL LETTER G
<>h; # from FULLWIDTH LATIN SMALL LETTER H
<>i; # from FULLWIDTH LATIN SMALL LETTER I
<>j; # from FULLWIDTH LATIN SMALL LETTER J
<>k; # from FULLWIDTH LATIN SMALL LETTER K
<>l; # from FULLWIDTH LATIN SMALL LETTER L
<>m; # from FULLWIDTH LATIN SMALL LETTER M
<>n; # from FULLWIDTH LATIN SMALL LETTER N
<>o; # from FULLWIDTH LATIN SMALL LETTER O
<>p; # from FULLWIDTH LATIN SMALL LETTER P
<>q; # from FULLWIDTH LATIN SMALL LETTER Q
<>r; # from FULLWIDTH LATIN SMALL LETTER R
<>s; # from FULLWIDTH LATIN SMALL LETTER S
<>t; # from FULLWIDTH LATIN SMALL LETTER T
<>u; # from FULLWIDTH LATIN SMALL LETTER U
<>v; # from FULLWIDTH LATIN SMALL LETTER V
<>w; # from FULLWIDTH LATIN SMALL LETTER W
<>x; # from FULLWIDTH LATIN SMALL LETTER X
<>y; # from FULLWIDTH LATIN SMALL LETTER Y
<>z; # from FULLWIDTH LATIN SMALL LETTER Z
<>'{'; # from FULLWIDTH LEFT CURLY BRACKET
<>'|'; # from FULLWIDTH VERTICAL LINE
<>'}'; # from FULLWIDTH RIGHT CURLY BRACKET
<>'~'; # from FULLWIDTH TILDE
。<>。; # to HALFWIDTH IDEOGRAPHIC FULL STOP
「<>「; # to HALFWIDTH LEFT CORNER BRACKET
」<>」; # to HALFWIDTH RIGHT CORNER BRACKET
、<>、; # to HALFWIDTH IDEOGRAPHIC COMMA
・<>・; # to HALFWIDTH KATAKANA MIDDLE DOT
ヲ<>ヲ; # to HALFWIDTH KATAKANA LETTER WO
ァ<>ァ; # to HALFWIDTH KATAKANA LETTER SMALL A
ィ<>ィ; # to HALFWIDTH KATAKANA LETTER SMALL I
ゥ<>ゥ; # to HALFWIDTH KATAKANA LETTER SMALL U
ェ<>ェ; # to HALFWIDTH KATAKANA LETTER SMALL E
ォ<>ォ; # to HALFWIDTH KATAKANA LETTER SMALL O
ャ<>ャ; # to HALFWIDTH KATAKANA LETTER SMALL YA
ュ<>ュ; # to HALFWIDTH KATAKANA LETTER SMALL YU
ョ<>ョ; # to HALFWIDTH KATAKANA LETTER SMALL YO
ッ<>ッ; # to HALFWIDTH KATAKANA LETTER SMALL TU
ー<>ー; # to HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
ア<>ア; # to HALFWIDTH KATAKANA LETTER A
イ<>イ; # to HALFWIDTH KATAKANA LETTER I
ウ<>ウ; # to HALFWIDTH KATAKANA LETTER U
エ<>エ; # to HALFWIDTH KATAKANA LETTER E
オ<>オ; # to HALFWIDTH KATAKANA LETTER O
カ<>カ; # to HALFWIDTH KATAKANA LETTER KA
キ<>キ; # to HALFWIDTH KATAKANA LETTER KI
ク<>ク; # to HALFWIDTH KATAKANA LETTER KU
ケ<>ケ; # to HALFWIDTH KATAKANA LETTER KE
コ<>コ; # to HALFWIDTH KATAKANA LETTER KO
サ<>サ; # to HALFWIDTH KATAKANA LETTER SA
シ<>シ; # to HALFWIDTH KATAKANA LETTER SI
ス<>ス; # to HALFWIDTH KATAKANA LETTER SU
セ<>セ; # to HALFWIDTH KATAKANA LETTER SE
ソ<>ソ; # to HALFWIDTH KATAKANA LETTER SO
タ<>タ; # to HALFWIDTH KATAKANA LETTER TA
チ<>チ; # to HALFWIDTH KATAKANA LETTER TI
ツ<>ツ; # to HALFWIDTH KATAKANA LETTER TU
テ<>テ; # to HALFWIDTH KATAKANA LETTER TE
ト<>ト; # to HALFWIDTH KATAKANA LETTER TO
ナ<>ナ; # to HALFWIDTH KATAKANA LETTER NA
ニ<>ニ; # to HALFWIDTH KATAKANA LETTER NI
ヌ<>ヌ; # to HALFWIDTH KATAKANA LETTER NU
ネ<>ネ; # to HALFWIDTH KATAKANA LETTER NE
<>ノ; # to HALFWIDTH KATAKANA LETTER NO
ハ<>ハ; # to HALFWIDTH KATAKANA LETTER HA
ヒ<>ヒ; # to HALFWIDTH KATAKANA LETTER HI
フ<>フ; # to HALFWIDTH KATAKANA LETTER HU
ヘ<>ヘ; # to HALFWIDTH KATAKANA LETTER HE
ホ<>ホ; # to HALFWIDTH KATAKANA LETTER HO
マ<>マ; # to HALFWIDTH KATAKANA LETTER MA
ミ<>ミ; # to HALFWIDTH KATAKANA LETTER MI
ム<>ム; # to HALFWIDTH KATAKANA LETTER MU
メ<>メ; # to HALFWIDTH KATAKANA LETTER ME
モ<>モ; # to HALFWIDTH KATAKANA LETTER MO
ヤ<>ヤ; # to HALFWIDTH KATAKANA LETTER YA
ユ<>ユ; # to HALFWIDTH KATAKANA LETTER YU
ヨ<>ヨ; # to HALFWIDTH KATAKANA LETTER YO
ラ<>ラ; # to HALFWIDTH KATAKANA LETTER RA
リ<>リ; # to HALFWIDTH KATAKANA LETTER RI
ル<>ル; # to HALFWIDTH KATAKANA LETTER RU
レ<>レ; # to HALFWIDTH KATAKANA LETTER RE
ロ<>ロ; # to HALFWIDTH KATAKANA LETTER RO
ワ<>ワ; # to HALFWIDTH KATAKANA LETTER WA
ン<>ン; # to HALFWIDTH KATAKANA LETTER N
゙<>゙; # to HALFWIDTH KATAKANA VOICED SOUND MARK
゚<>゚; # to HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
<>; # to HALFWIDTH HANGUL FILLER
ᄀ<>ᄀ; # to HALFWIDTH HANGUL LETTER KIYEOK
ᄁ<>ᄁ; # to HALFWIDTH HANGUL LETTER SSANGKIYEOK
ᆪ<>ᆪ; # to HALFWIDTH HANGUL LETTER KIYEOK-SIOS
ᄂ<>ᄂ; # to HALFWIDTH HANGUL LETTER NIEUN
ᆬ<>ᆬ; # to HALFWIDTH HANGUL LETTER NIEUN-CIEUC
ᆭ<>ᆭ; # to HALFWIDTH HANGUL LETTER NIEUN-HIEUH
ᄃ<>ᄃ; # to HALFWIDTH HANGUL LETTER TIKEUT
ᄄ<>ᄄ; # to HALFWIDTH HANGUL LETTER SSANGTIKEUT
ᄅ<>ᄅ; # to HALFWIDTH HANGUL LETTER RIEUL
ᆰ<>ᆰ; # to HALFWIDTH HANGUL LETTER RIEUL-KIYEOK
ᆱ<>ᆱ; # to HALFWIDTH HANGUL LETTER RIEUL-MIEUM
ᆲ<>ᆲ; # to HALFWIDTH HANGUL LETTER RIEUL-PIEUP
ᆳ<>ᆳ; # to HALFWIDTH HANGUL LETTER RIEUL-SIOS
ᆴ<>ᆴ; # to HALFWIDTH HANGUL LETTER RIEUL-THIEUTH
ᆵ<>ᆵ; # to HALFWIDTH HANGUL LETTER RIEUL-PHIEUPH
ᄚ<>ᄚ; # to HALFWIDTH HANGUL LETTER RIEUL-HIEUH
ᄆ<>ᄆ; # to HALFWIDTH HANGUL LETTER MIEUM
ᄇ<>ᄇ; # to HALFWIDTH HANGUL LETTER PIEUP
ᄈ<>ᄈ; # to HALFWIDTH HANGUL LETTER SSANGPIEUP
ᄡ<>ᄡ; # to HALFWIDTH HANGUL LETTER PIEUP-SIOS
ᄉ<>ᄉ; # to HALFWIDTH HANGUL LETTER SIOS
ᄊ<>ᄊ; # to HALFWIDTH HANGUL LETTER SSANGSIOS
ᄋ<>ᄋ; # to HALFWIDTH HANGUL LETTER IEUNG
ᄌ<>ᄌ; # to HALFWIDTH HANGUL LETTER CIEUC
ᄍ<>ᄍ; # to HALFWIDTH HANGUL LETTER SSANGCIEUC
ᄎ<>ᄎ; # to HALFWIDTH HANGUL LETTER CHIEUCH
ᄏ<>ᄏ; # to HALFWIDTH HANGUL LETTER KHIEUKH
ᄐ<>ᄐ; # to HALFWIDTH HANGUL LETTER THIEUTH
ᄑ<>ᄑ; # to HALFWIDTH HANGUL LETTER PHIEUPH
ᄒ<>ᄒ; # to HALFWIDTH HANGUL LETTER HIEUH
ᅡ<>ᅡ; # to HALFWIDTH HANGUL LETTER A
ᅢ<>ᅢ; # to HALFWIDTH HANGUL LETTER AE
ᅣ<>ᅣ; # to HALFWIDTH HANGUL LETTER YA
ᅤ<>ᅤ; # to HALFWIDTH HANGUL LETTER YAE
ᅥ<>ᅥ; # to HALFWIDTH HANGUL LETTER EO
ᅦ<>ᅦ; # to HALFWIDTH HANGUL LETTER E
ᅧ<>ᅧ; # to HALFWIDTH HANGUL LETTER YEO
ᅨ<>ᅨ; # to HALFWIDTH HANGUL LETTER YE
ᅩ<>ᅩ; # to HALFWIDTH HANGUL LETTER O
ᅪ<>ᅪ; # to HALFWIDTH HANGUL LETTER WA
ᅫ<>ᅫ; # to HALFWIDTH HANGUL LETTER WAE
ᅬ<>ᅬ; # to HALFWIDTH HANGUL LETTER OE
ᅭ<>ᅭ; # to HALFWIDTH HANGUL LETTER YO
ᅮ<>ᅮ; # to HALFWIDTH HANGUL LETTER U
ᅯ<>ᅯ; # to HALFWIDTH HANGUL LETTER WEO
ᅰ<>ᅰ; # to HALFWIDTH HANGUL LETTER WE
ᅱ<>ᅱ; # to HALFWIDTH HANGUL LETTER WI
ᅲ<>ᅲ; # to HALFWIDTH HANGUL LETTER YU
ᅳ<>ᅳ; # to HALFWIDTH HANGUL LETTER EU
ᅴ<>ᅴ; # to HALFWIDTH HANGUL LETTER YI
ᅵ<>ᅵ; # to HALFWIDTH HANGUL LETTER I
¢<>'¢'; # from FULLWIDTH CENT SIGN
£<>'£'; # from FULLWIDTH POUND SIGN
¬<>'¬'; # from FULLWIDTH NOT SIGN
 ̄<>' '̄; # from FULLWIDTH MACRON
¦<>'¦'; # from FULLWIDTH BROKEN BAR
¥<>'¥'; # from FULLWIDTH YEN SIGN
₩<>₩; # from FULLWIDTH WON SIGN
│<>; # to HALFWIDTH FORMS LIGHT VERTICAL
←<>←; # to HALFWIDTH LEFTWARDS ARROW
↑<>↑; # to HALFWIDTH UPWARDS ARROW
→<>→; # to HALFWIDTH RIGHTWARDS ARROW
↓<>↓; # to HALFWIDTH DOWNWARDS ARROW
■<>■; # to HALFWIDTH BLACK SQUARE
○<>○; # to HALFWIDTH WHITE CIRCLE
# eof

View file

@ -0,0 +1,89 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:58 2001
#--------------------------------------------------------------------
# Gujarati-InterIndic
ઁ>\uE001; # SIGN CANDRABINDU
ં>\uE002; # SIGN ANUSVARA
>\uE003; # SIGN VISARGA
અ>\uE005; # LETTER A
આ>\uE006; # LETTER AA
ઇ>\uE007; # LETTER I
ઈ>\uE008; # LETTER II
ઉ>\uE009; # LETTER U
ઊ>\uE00A; # LETTER UU
ઋ>\uE00B; # LETTER VOCALIC R
# ઍ>; // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA E
એ>\uE081; # LETTER E
ઐ>\uE010; # LETTER AI
# ઑ>; // UNMAPPED Gujarati-InterIndic: VOWEL CANDRA O
ઓ>\uE082; # LETTER O
ઔ>\uE014; # LETTER AU
ક>\uE015; # LETTER KA
ખ>\uE016; # LETTER KHA
ગ>\uE017; # LETTER GA
ઘ>\uE018; # LETTER GHA
ઙ>\uE019; # LETTER NGA
ચ>\uE01A; # LETTER CA
છ>\uE01B; # LETTER CHA
જ>\uE01C; # LETTER JA
ઝ>\uE01D; # LETTER JHA
ઞ>\uE01E; # LETTER NYA
ટ>\uE01F; # LETTER TTA
ઠ>\uE020; # LETTER TTHA
ડ>\uE021; # LETTER DDA
ઢ>\uE022; # LETTER DDHA
ણ>\uE023; # LETTER NNA
ત>\uE024; # LETTER TA
થ>\uE025; # LETTER THA
દ>\uE026; # LETTER DA
ધ>\uE027; # LETTER DHA
ન>\uE028; # LETTER NA
પ>\uE02A; # LETTER PA
ફ>\uE02B; # LETTER PHA
બ>\uE02C; # LETTER BA
ભ>\uE02D; # LETTER BHA
મ>\uE02E; # LETTER MA
ય>\uE02F; # LETTER YA
ર>\uE030; # LETTER RA
લ>\uE032; # LETTER LA
ળ>\uE033; # LETTER LLA
વ>\uE035; # LETTER VA
શ>\uE036; # LETTER SHA
ષ>\uE037; # LETTER SSA
સ>\uE038; # LETTER SA
હ>\uE039; # LETTER HA
઼>\uE03C; # SIGN NUKTA
ઽ>\uE03D; # SIGN AVAGRAHA
ા>\uE03E; # VOWEL SIGN AA
િ>\uE03F; # VOWEL SIGN I
ી>\uE040; # VOWEL SIGN II
ુ>\uE041; # VOWEL SIGN U
ૂ>\uE042; # VOWEL SIGN UU
ૃ>\uE043; # VOWEL SIGN VOCALIC R
ૄ>\uE044; # VOWEL SIGN VOCALIC RR
ૅ>\uE045; # VOWEL SIGN CANDRA E
ે>\uE084; # VOWEL SIGN E
ૈ>\uE048; # VOWEL SIGN AI
ૉ>\uE049; # VOWEL SIGN CANDRA O
ો>\uE085; # VOWEL SIGN O
ૌ>\uE04C; # VOWEL SIGN AU
્>\uE04D; # SIGN VIRAMA
ૐ>\uE050; # OM
ૠ>\uE060; # LETTER VOCALIC RR
>\uE066; # DIGIT ZERO
૧>\uE067; # DIGIT ONE
૨>\uE068; # DIGIT TWO
૩>\uE069; # DIGIT THREE
૪>\uE06A; # DIGIT FOUR
૫>\uE06B; # DIGIT FIVE
૬>\uE06C; # DIGIT SIX
૭>\uE06D; # DIGIT SEVEN
૮>\uE06E; # DIGIT EIGHT
૯>\uE06F; # DIGIT NINE
# eof

View file

@ -0,0 +1,86 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:58 2001
#--------------------------------------------------------------------
# Gurmukhi-InterIndic
ਂ>\uE001; # REMAP (indicExceptions.txt): ਁ>ਂ = SIGN CANDRABINDU>SIGN BINDI
ਅ>\uE005; # LETTER A
ਆ>\uE006; # LETTER AA
ਇ>\uE007; # LETTER I
ਈ>\uE008; # LETTER II
ਉ>\uE009; # LETTER U
ਊ>\uE00A; # LETTER UU
ਏ>\uE00F; # LETTER EE
ਐ>\uE010; # LETTER AI
ਓ>\uE013; # LETTER OO
ਔ>\uE014; # LETTER AU
ਕ>\uE015; # LETTER KA
ਖ>\uE016; # LETTER KHA
ਗ>\uE017; # LETTER GA
ਘ>\uE018; # LETTER GHA
ਙ>\uE019; # LETTER NGA
ਚ>\uE01A; # LETTER CA
ਛ>\uE01B; # LETTER CHA
ਜ>\uE01C; # LETTER JA
ਝ>\uE01D; # LETTER JHA
ਞ>\uE01E; # LETTER NYA
ਟ>\uE01F; # LETTER TTA
ਠ>\uE020; # LETTER TTHA
ਡ>\uE021; # LETTER DDA
ਢ>\uE022; # LETTER DDHA
ਣ>\uE023; # LETTER NNA
ਤ>\uE024; # LETTER TA
ਥ>\uE025; # LETTER THA
ਦ>\uE026; # LETTER DA
ਧ>\uE027; # LETTER DHA
ਨ>\uE028; # LETTER NA
ਪ>\uE02A; # LETTER PA
ਫ>\uE02B; # LETTER PHA
ਬ>\uE02C; # LETTER BA
ਭ>\uE02D; # LETTER BHA
ਮ>\uE02E; # LETTER MA
ਯ>\uE02F; # LETTER YA
ਰ>\uE030; # LETTER RA
ਲ>\uE032; # LETTER LA
ਲ਼>\uE033; # LETTER LLA
ਵ>\uE035; # LETTER VA
ਸ਼>\uE036; # LETTER SHA
ਸ>\uE038; # LETTER SA
ਹ>\uE039; # LETTER HA
਼>\uE03C; # SIGN NUKTA
ਾ>\uE03E; # VOWEL SIGN AA
ਿ>\uE03F; # VOWEL SIGN I
ੀ>\uE040; # VOWEL SIGN II
ੁ>\uE041; # VOWEL SIGN U
ੂ>\uE042; # VOWEL SIGN UU
ੇ>\uE047; # VOWEL SIGN EE
ੈ>\uE048; # VOWEL SIGN AI
ੋ>\uE04B; # VOWEL SIGN OO
ੌ>\uE04C; # VOWEL SIGN AU
੍>\uE04D; # SIGN VIRAMA
ਖ਼>\uE059; # LETTER KHHA
ਗ਼>\uE05A; # LETTER GHHA
ਜ਼>\uE05B; # LETTER ZA
ੜ>\uE083; # LETTER RRA
ਫ਼>\uE05E; # LETTER FA
>\uE066; # DIGIT ZERO
>\uE067; # DIGIT ONE
੨>\uE068; # DIGIT TWO
੩>\uE069; # DIGIT THREE
>\uE06A; # DIGIT FOUR
੫>\uE06B; # DIGIT FIVE
੬>\uE06C; # DIGIT SIX
੭>\uE06D; # DIGIT SEVEN
੮>\uE06E; # DIGIT EIGHT
੯>\uE06F; # DIGIT NINE
# ੰ>; // UNMAPPED Gurmukhi-InterIndic: TIPPI
# ੱ>; // UNMAPPED Gurmukhi-InterIndic: ADDAK
# ੲ>; // UNMAPPED Gurmukhi-InterIndic: IRI
# ੳ>; // UNMAPPED Gurmukhi-InterIndic: URA
# ੴ>; // UNMAPPED Gurmukhi-InterIndic: EK ONKAR
# eof

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,200 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 2001
#--------------------------------------------------------------------
# Hiragana-Katana
# This is largely a one-to-one mapping, but it has a
# few kinks:
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
# Hiragana equivalents. We use Hiragana wa/wi/we/wo
# (308F-3092) with a voicing mark (3099), which is
# semantically equivalent. However, this is a non-
# roundtripping transformation.
# 2. The Katakana small ka/ke (30F5,30F6) have no
# Hiragana equiavlents. We convert them to normal
# Hiragana ka/ke (304B,3051). This is a one-way
# information-losing transformation and precludes
# round-tripping of 30F5 and 30F6.
# 3. The combining marks 3099-309C are in the Hiragana
# block, but they apply to Katakana as well, so we
# leave them untouched.
# 4. The Katakana prolonged sound mark 30FC doubles the
# preceding vowel. This is a one-way information-
# losing transformation from Katakana to Hiragana.
# 5. The Katakana middle dot separates words in foreign
# expressions; we leave this unmodified.
# The above points preclude successful round-trip
# transformations of arbitrary input text. However,
# they provide naturalistic results that should conform
# to user expectations.
# Combining equivalents va/vi/ve/vo
わ゙ <> ヷ;
ゐ゙ <> ヸ;
ゑ゙ <> ヹ;
を゙ <> ヺ;
# One-to-one mappings, main block
# 3041:3094 <> 30A1:30F4
# 309D,E <> 30FD,E
ぁ <> ァ;
あ <> ア;
ぃ <> ィ;
い <> イ;
ぅ <> ゥ;
う <> ウ;
ぇ <> ェ;
え <> エ;
ぉ <> ォ;
お <> オ;
か <> カ;
が <> ガ;
き <> キ;
ぎ <> ギ;
く <> ク;
ぐ <> グ;
け <> ケ;
げ <> ゲ;
こ <> コ;
ご <> ゴ;
さ <> サ;
ざ <> ザ;
し <> シ;
じ <> ジ;
す <> ス;
ず <> ズ;
せ <> セ;
ぜ <> ゼ;
そ <> ソ;
ぞ <> ゾ;
た <> タ;
だ <> ダ;
ち <> チ;
ぢ <> ヂ;
っ <> ッ;
つ <> ツ;
づ <> ヅ;
て <> テ;
で <> デ;
と <> ト;
ど <> ド;
な <> ナ;
に <> ニ;
ぬ <> ヌ;
ね <> ネ;
の <> ;
は <> ハ;
ば <> バ;
ぱ <> パ;
ひ <> ヒ;
び <> ビ;
ぴ <> ピ;
ふ <> フ;
ぶ <> ブ;
ぷ <> プ;
へ <> ヘ;
べ <> ベ;
ぺ <> ペ;
ほ <> ホ;
ぼ <> ボ;
ぽ <> ポ;
ま <> マ;
み <> ミ;
む <> ム;
め <> メ;
も <> モ;
ゃ <> ャ;
や <> ヤ;
ゅ <> ュ;
ゆ <> ユ;
ょ <> ョ;
よ <> ヨ;
ら <> ラ;
り <> リ;
る <> ル;
れ <> レ;
ろ <> ロ;
ゎ <> ヮ;
わ <> ワ;
ゐ <> ヰ;
ゑ <> ヱ;
を <> ヲ;
ん <> ン;
ゔ <> ヴ;
ゝ <> ヽ;
ゞ <> ヾ;
# One-way Katakana-Hiragana xform of small K ka/ke to
# normal H ka/ke.
か < ヵ;
け < ヶ;
# Katakana followed by a prolonged sound mark 30FC has
# its final vowel doubled. This is a Katakana-Hiragana
# one-way information-losing transformation. We
# include the small Katakana (e.g., small A 3041) and
# do not distinguish them from their large
# counterparts. It doesn't make sense to double a
# small counterpart vowel as a small Hiragana vowel, so
# we don't do so. In natural text this should never
# occur anyway. If a 30FC is seen without a preceding
# vowel sound (e.g., after n 30F3) we do not change it.
### $long = ー;
# The following categories are Hiragana, not Katakana
# as might be expected, since by the time we get to the
# 30FC, the preceding character will have already been
# transformed to Hiragana.
# {The following mechanically generated from the
# Unicode 3.0 data:}
$xa = [ \
ぁ あ か が さ ざ \
た だ な は ば ぱ \
ま ゃ や ら ゎ わ \
];
$xi = [ \
ぃ い き ぎ し じ \
ち ぢ に ひ び ぴ \
み り ゐ \
];
$xu = [ \
ぅ う く ぐ す ず \
っ つ づ ぬ ふ ぶ \
ぷ む ゅ ゆ る ゔ \
];
$xe = [ \
ぇ え け げ せ ぜ \
て で ね へ べ ぺ \
め れ ゑ \
];
$xo = [ \
ぉ お こ ご そ ぞ \
と ど の ほ ぼ ぽ \
も ょ よ ろ を \
];
あ < $xa {ー};
い < $xi {ー};
う < $xu {ー};
え < $xe {ー};
お < $xo {ー};
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:59 2001
#--------------------------------------------------------------------
# InterIndic-Bengali
\uE001>ঁ; # SIGN CANDRABINDU
\uE002>ং; # SIGN ANUSVARA
\uE003>ঃ; # SIGN VISARGA
\uE005>অ; # LETTER A
\uE006>আ; # LETTER AA
\uE007>ই; # LETTER I
\uE008>ঈ; # LETTER II
\uE009>উ; # LETTER U
\uE00A>ঊ; # LETTER UU
\uE00B>ঋ; # LETTER VOCALIC R
\uE00C>ঌ; # LETTER VOCALIC L
# \uE00F>; // UNMAPPED InterIndic-Bengali: LETTER EE (এ = LETTER E)
\uE010>ঐ; # LETTER AI
# \uE013>; // UNMAPPED InterIndic-Bengali: LETTER OO (ও = LETTER O)
\uE014>ঔ; # LETTER AU
\uE015>ক; # LETTER KA
\uE016>খ; # LETTER KHA
\uE017>গ; # LETTER GA
\uE018>ঘ; # LETTER GHA
\uE019>ঙ; # LETTER NGA
\uE01A>চ; # LETTER CA
\uE01B>ছ; # LETTER CHA
\uE01C>জ; # LETTER JA
\uE01D>ঝ; # LETTER JHA
\uE01E>ঞ; # LETTER NYA
\uE01F>ট; # LETTER TTA
\uE020>ঠ; # LETTER TTHA
\uE021>ড; # LETTER DDA
\uE022>ঢ; # LETTER DDHA
\uE023>ণ; # LETTER NNA
\uE024>ত; # LETTER TA
\uE025>থ; # LETTER THA
\uE026>দ; # LETTER DA
\uE027>ধ; # LETTER DHA
\uE028>ন; # LETTER NA
\uE029>ন; # REMAP (indicExceptions.txt): ঩>ন = LETTER NNNA>LETTER NA
\uE02A>প; # LETTER PA
\uE02B>ফ; # LETTER PHA
\uE02C>ব; # LETTER BA
\uE02D>ভ; # LETTER BHA
\uE02E>ম; # LETTER MA
\uE02F>য; # LETTER YA
\uE030>র; # LETTER RA
\uE032>ল; # LETTER LA
\uE033>ল; # REMAP (indicExceptions.txt): ঳>ল = LETTER LLA>LETTER LA
\uE034>ল; # REMAP (indicExceptions.txt): ঴>ল = LETTER LLLA>LETTER LA
\uE035>ব; # REMAP (indicExceptions.txt): ঵>ব = LETTER VA>LETTER BA
\uE036>শ; # LETTER SHA
\uE037>ষ; # LETTER SSA
\uE038>স; # LETTER SA
\uE039>হ; # LETTER HA
\uE03C>়; # SIGN NUKTA
# \uE03D>; // UNMAPPED InterIndic-Bengali: SIGN AVAGRAHA
\uE03E>া; # VOWEL SIGN AA
\uE03F>ি; # VOWEL SIGN I
\uE040>ী; # VOWEL SIGN II
\uE041>ু; # VOWEL SIGN U
\uE042>ূ; # VOWEL SIGN UU
\uE043>ৃ; # VOWEL SIGN VOCALIC R
\uE044>ৄ; # VOWEL SIGN VOCALIC RR
\uE045>ে; # REMAP (indicExceptions.txt): ৅>ে = VOWEL SIGN CANDRA E>VOWEL SIGN E
# \uE047>; // UNMAPPED InterIndic-Bengali: VOWEL SIGN EE (ে = VOWEL SIGN E)
\uE048>ৈ; # VOWEL SIGN AI
\uE049>ো; # REMAP (indicExceptions.txt): ৉>ো = VOWEL SIGN CANDRA O>VOWEL SIGN O
# \uE04B>; // UNMAPPED InterIndic-Bengali: VOWEL SIGN OO (ো = VOWEL SIGN O)
\uE04C>ৌ; # VOWEL SIGN AU
\uE04D>্; # SIGN VIRAMA
# \uE050>; // UNMAPPED InterIndic-Bengali: OM
# \uE055>; // UNMAPPED InterIndic-Bengali: LENGTH MARK
\uE056>ৈ; # REMAP (indicExceptions.txt): ৖>ৈ = AI LENGTH MARK>VOWEL SIGN AI
\uE057>ৗ; # AU LENGTH MARK
\uE059>খ; # REMAP (indicExceptions.txt): ৙>খ = LETTER KHHA>LETTER KHA
\uE05A>গ; # REMAP (indicExceptions.txt): ৚>গ = LETTER GHHA>LETTER GA
\uE05B>জ; # REMAP (indicExceptions.txt): ৛>জ = LETTER ZA>LETTER JA
\uE05D>ঢ়; # LETTER RHA
\uE05E>ফ; # REMAP (indicExceptions.txt): ৞>ফ = LETTER FA>LETTER PHA
\uE05F>য়; # LETTER YYA
\uE060>ৠ; # LETTER VOCALIC RR
\uE061>ৡ; # LETTER VOCALIC LL
\uE062>ৢ; # VOWEL SIGN VOCALIC L
\uE063>ৣ; # VOWEL SIGN VOCALIC LL
\uE066>; # DIGIT ZERO
\uE067>১; # DIGIT ONE
\uE068>২; # DIGIT TWO
\uE069>৩; # DIGIT THREE
\uE06A>; # DIGIT FOUR
\uE06B>৫; # DIGIT FIVE
\uE06C>৬; # DIGIT SIX
\uE06D>; # DIGIT SEVEN
\uE06E>৮; # DIGIT EIGHT
\uE06F>৯; # DIGIT NINE
\uE080>৺; # ISSHAR
\uE081>এ; # LETTER E
\uE082>ও; # LETTER O
\uE083>ড়; # LETTER RRA
\uE084>ে; # VOWEL SIGN E
\uE085>ো; # VOWEL SIGN O
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:59 2001
#--------------------------------------------------------------------
# InterIndic-Devanagari
\uE001>ँ; # SIGN CANDRABINDU
\uE002>ं; # SIGN ANUSVARA
\uE003>; # SIGN VISARGA
\uE005>अ; # LETTER A
\uE006>आ; # LETTER AA
\uE007>इ; # LETTER I
\uE008>ई; # LETTER II
\uE009>उ; # LETTER U
\uE00A>ऊ; # LETTER UU
\uE00B>ऋ; # LETTER VOCALIC R
\uE00C>ऌ; # LETTER VOCALIC L
# \uE00F>; // UNMAPPED InterIndic-Devanagari: LETTER EE (ए = LETTER E)
\uE010>ऐ; # LETTER AI
# \uE013>; // UNMAPPED InterIndic-Devanagari: LETTER OO (ओ = LETTER O)
\uE014>औ; # LETTER AU
\uE015>क; # LETTER KA
\uE016>ख; # LETTER KHA
\uE017>ग; # LETTER GA
\uE018>घ; # LETTER GHA
\uE019>ङ; # LETTER NGA
\uE01A>च; # LETTER CA
\uE01B>छ; # LETTER CHA
\uE01C>ज; # LETTER JA
\uE01D>झ; # LETTER JHA
\uE01E>ञ; # LETTER NYA
\uE01F>ट; # LETTER TTA
\uE020>ठ; # LETTER TTHA
\uE021>ड; # LETTER DDA
\uE022>ढ; # LETTER DDHA
\uE023>ण; # LETTER NNA
\uE024>त; # LETTER TA
\uE025>थ; # LETTER THA
\uE026>द; # LETTER DA
\uE027>ध; # LETTER DHA
\uE028>न; # LETTER NA
\uE029>ऩ; # LETTER NNNA
\uE02A>प; # LETTER PA
\uE02B>फ; # LETTER PHA
\uE02C>ब; # LETTER BA
\uE02D>भ; # LETTER BHA
\uE02E>म; # LETTER MA
\uE02F>य; # LETTER YA
\uE030>र; # LETTER RA
\uE032>ल; # LETTER LA
\uE033>ळ; # LETTER LLA
\uE034>ऴ; # LETTER LLLA
\uE035>व; # LETTER VA
\uE036>श; # LETTER SHA
\uE037>ष; # LETTER SSA
\uE038>स; # LETTER SA
\uE039>ह; # LETTER HA
\uE03C>़; # SIGN NUKTA
\uE03D>ऽ; # SIGN AVAGRAHA
\uE03E>ा; # VOWEL SIGN AA
\uE03F>ि; # VOWEL SIGN I
\uE040>ी; # VOWEL SIGN II
\uE041>ु; # VOWEL SIGN U
\uE042>ू; # VOWEL SIGN UU
\uE043>ृ; # VOWEL SIGN VOCALIC R
\uE044>ॄ; # VOWEL SIGN VOCALIC RR
\uE045>ॅ; # VOWEL SIGN CANDRA E
# \uE047>; // UNMAPPED InterIndic-Devanagari: VOWEL SIGN EE (े = VOWEL SIGN E)
\uE048>ै; # VOWEL SIGN AI
\uE049>ॉ; # VOWEL SIGN CANDRA O
# \uE04B>; // UNMAPPED InterIndic-Devanagari: VOWEL SIGN OO (ो = VOWEL SIGN O)
\uE04C>ौ; # VOWEL SIGN AU
\uE04D>्; # SIGN VIRAMA
\uE050>ॐ; # OM
# \uE055>; // UNMAPPED InterIndic-Devanagari: LENGTH MARK
\uE056>ै; # REMAP (indicExceptions.txt): ॖ>ै = AI LENGTH MARK>VOWEL SIGN AI
\uE057>ौ; # REMAP (indicExceptions.txt): ॗ>ौ = AU LENGTH MARK>VOWEL SIGN AU
\uE059>ख़; # LETTER KHHA
\uE05A>ग़; # LETTER GHHA
\uE05B>ज़; # LETTER ZA
\uE05D>ढ़; # LETTER RHA
\uE05E>फ़; # LETTER FA
\uE05F>य़; # LETTER YYA
\uE060>ॠ; # LETTER VOCALIC RR
\uE061>ॡ; # LETTER VOCALIC LL
\uE062>ॢ; # VOWEL SIGN VOCALIC L
\uE063>ॣ; # VOWEL SIGN VOCALIC LL
\uE066>; # DIGIT ZERO
\uE067>१; # DIGIT ONE
\uE068>२; # DIGIT TWO
\uE069>३; # DIGIT THREE
\uE06A>४; # DIGIT FOUR
\uE06B>५; # DIGIT FIVE
\uE06C>६; # DIGIT SIX
\uE06D>७; # DIGIT SEVEN
\uE06E>८; # DIGIT EIGHT
\uE06F>९; # DIGIT NINE
# \uE080>; // UNMAPPED InterIndic-Devanagari: ISSHAR
\uE081>ए; # LETTER E
\uE082>ओ; # LETTER O
\uE083>ऱ; # LETTER RRA
\uE084>े; # VOWEL SIGN E
\uE085>ो; # VOWEL SIGN O
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:41:59 2001
#--------------------------------------------------------------------
# InterIndic-Gujarati
\uE001>ઁ; # SIGN CANDRABINDU
\uE002>ં; # SIGN ANUSVARA
\uE003>; # SIGN VISARGA
\uE005>અ; # LETTER A
\uE006>આ; # LETTER AA
\uE007>ઇ; # LETTER I
\uE008>ઈ; # LETTER II
\uE009>ઉ; # LETTER U
\uE00A>ઊ; # LETTER UU
\uE00B>ઋ; # LETTER VOCALIC R
\uE00C>લૃ; # REMAP (indicExceptions.txt): ઌ>લૃ = LETTER VOCALIC L>LETTER LA.VOWEL SIGN VOCALIC R
# \uE00F>; // UNMAPPED InterIndic-Gujarati: LETTER EE (એ = LETTER E)
\uE010>ઐ; # LETTER AI
# \uE013>; // UNMAPPED InterIndic-Gujarati: LETTER OO (ઓ = LETTER O)
\uE014>ઔ; # LETTER AU
\uE015>ક; # LETTER KA
\uE016>ખ; # LETTER KHA
\uE017>ગ; # LETTER GA
\uE018>ઘ; # LETTER GHA
\uE019>ઙ; # LETTER NGA
\uE01A>ચ; # LETTER CA
\uE01B>છ; # LETTER CHA
\uE01C>જ; # LETTER JA
\uE01D>ઝ; # LETTER JHA
\uE01E>ઞ; # LETTER NYA
\uE01F>ટ; # LETTER TTA
\uE020>ઠ; # LETTER TTHA
\uE021>ડ; # LETTER DDA
\uE022>ઢ; # LETTER DDHA
\uE023>ણ; # LETTER NNA
\uE024>ત; # LETTER TA
\uE025>થ; # LETTER THA
\uE026>દ; # LETTER DA
\uE027>ધ; # LETTER DHA
\uE028>ન; # LETTER NA
\uE029>ન; # REMAP (indicExceptions.txt): ઩>ન = LETTER NNNA>LETTER NA
\uE02A>પ; # LETTER PA
\uE02B>ફ; # LETTER PHA
\uE02C>બ; # LETTER BA
\uE02D>ભ; # LETTER BHA
\uE02E>મ; # LETTER MA
\uE02F>ય; # LETTER YA
\uE030>ર; # LETTER RA
\uE032>લ; # LETTER LA
\uE033>ળ; # LETTER LLA
\uE034>ળ; # REMAP (indicExceptions.txt): ઴>ળ = LETTER LLLA>LETTER LLA
\uE035>વ; # LETTER VA
\uE036>શ; # LETTER SHA
\uE037>ષ; # LETTER SSA
\uE038>સ; # LETTER SA
\uE039>હ; # LETTER HA
\uE03C>઼; # SIGN NUKTA
\uE03D>ઽ; # SIGN AVAGRAHA
\uE03E>ા; # VOWEL SIGN AA
\uE03F>િ; # VOWEL SIGN I
\uE040>ી; # VOWEL SIGN II
\uE041>ુ; # VOWEL SIGN U
\uE042>ૂ; # VOWEL SIGN UU
\uE043>ૃ; # VOWEL SIGN VOCALIC R
\uE044>ૄ; # VOWEL SIGN VOCALIC RR
\uE045>ૅ; # VOWEL SIGN CANDRA E
# \uE047>; // UNMAPPED InterIndic-Gujarati: VOWEL SIGN EE (ે = VOWEL SIGN E)
\uE048>ૈ; # VOWEL SIGN AI
\uE049>ૉ; # VOWEL SIGN CANDRA O
# \uE04B>; // UNMAPPED InterIndic-Gujarati: VOWEL SIGN OO (ો = VOWEL SIGN O)
\uE04C>ૌ; # VOWEL SIGN AU
\uE04D>્; # SIGN VIRAMA
\uE050>ૐ; # OM
# \uE055>; // UNMAPPED InterIndic-Gujarati: LENGTH MARK
\uE056>ૈ; # REMAP (indicExceptions.txt): ૖>ૈ = AI LENGTH MARK>VOWEL SIGN AI
\uE057>ૌ; # REMAP (indicExceptions.txt): ૗>ૌ = AU LENGTH MARK>VOWEL SIGN AU
\uE059>ખ઼; # REMAP (indicExceptions.txt): ૙>ખ઼ = LETTER KHHA>LETTER KHA.SIGN NUKTA
\uE05A>ગ઼; # REMAP (indicExceptions.txt): ૚>ગ઼ = LETTER GHHA>LETTER GA.SIGN NUKTA
\uE05B>જ઼; # REMAP (indicExceptions.txt): ૛>જ઼ = LETTER ZA>LETTER JA.SIGN NUKTA
\uE05D>ઢ઼; # REMAP (indicExceptions.txt): ૝>ઢ઼ = LETTER RHA>LETTER DDHA.SIGN NUKTA
\uE05E>ફ઼; # REMAP (indicExceptions.txt): ૞>ફ઼ = LETTER FA>LETTER PHA.SIGN NUKTA
\uE05F>ય઼; # REMAP (indicExceptions.txt): ૟>ય઼ = LETTER YYA>LETTER YA.SIGN NUKTA
\uE060>ૠ; # LETTER VOCALIC RR
\uE061>લૃ; # REMAP (indicExceptions.txt): ૡ>લૃ = LETTER VOCALIC LL>LETTER LA.VOWEL SIGN VOCALIC R
\uE062>િ઼; # REMAP (indicExceptions.txt): ૢ>િ઼ = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
\uE063>ી઼; # REMAP (indicExceptions.txt): ૣ>ી઼ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
\uE066>; # DIGIT ZERO
\uE067>૧; # DIGIT ONE
\uE068>૨; # DIGIT TWO
\uE069>૩; # DIGIT THREE
\uE06A>૪; # DIGIT FOUR
\uE06B>૫; # DIGIT FIVE
\uE06C>૬; # DIGIT SIX
\uE06D>૭; # DIGIT SEVEN
\uE06E>૮; # DIGIT EIGHT
\uE06F>૯; # DIGIT NINE
# \uE080>; // UNMAPPED InterIndic-Gujarati: ISSHAR
\uE081>એ; # LETTER E
\uE082>ઓ; # LETTER O
# \uE083>; // UNMAPPED InterIndic-Gujarati: LETTER RRA ( = SIGN VISARGA)
\uE084>ે; # VOWEL SIGN E
\uE085>ો; # VOWEL SIGN O
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:00 2001
#--------------------------------------------------------------------
# InterIndic-Gurmukhi
\uE001>ਂ; # REMAP (indicExceptions.txt): ਁ>ਂ = SIGN CANDRABINDU>SIGN BINDI
# \uE002>; // UNMAPPED InterIndic-Gurmukhi: SIGN ANUSVARA (ਂ = SIGN BINDI)
# \uE003>; // UNMAPPED InterIndic-Gurmukhi: SIGN VISARGA
\uE005>ਅ; # LETTER A
\uE006>ਆ; # LETTER AA
\uE007>ਇ; # LETTER I
\uE008>ਈ; # LETTER II
\uE009>ਉ; # LETTER U
\uE00A>ਊ; # LETTER UU
\uE00B>ਰਿ; # REMAP (indicExceptions.txt): ਋>ਰਿ = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
\uE00C>ਇ; # REMAP (indicExceptions.txt): ਌>ਇ = LETTER VOCALIC L>LETTER I
\uE00F>ਏ; # LETTER EE
\uE010>ਐ; # LETTER AI
\uE013>ਓ; # LETTER OO
\uE014>ਔ; # LETTER AU
\uE015>ਕ; # LETTER KA
\uE016>ਖ; # LETTER KHA
\uE017>ਗ; # LETTER GA
\uE018>ਘ; # LETTER GHA
\uE019>ਙ; # LETTER NGA
\uE01A>ਚ; # LETTER CA
\uE01B>ਛ; # LETTER CHA
\uE01C>ਜ; # LETTER JA
\uE01D>ਝ; # LETTER JHA
\uE01E>ਞ; # LETTER NYA
\uE01F>ਟ; # LETTER TTA
\uE020>ਠ; # LETTER TTHA
\uE021>ਡ; # LETTER DDA
\uE022>ਢ; # LETTER DDHA
\uE023>ਣ; # LETTER NNA
\uE024>ਤ; # LETTER TA
\uE025>ਥ; # LETTER THA
\uE026>ਦ; # LETTER DA
\uE027>ਧ; # LETTER DHA
\uE028>ਨ; # LETTER NA
\uE029>ਨ; # REMAP (indicExceptions.txt): ਩>ਨ = LETTER NNNA>LETTER NA
\uE02A>ਪ; # LETTER PA
\uE02B>ਫ; # LETTER PHA
\uE02C>ਬ; # LETTER BA
\uE02D>ਭ; # LETTER BHA
\uE02E>ਮ; # LETTER MA
\uE02F>ਯ; # LETTER YA
\uE030>ਰ; # LETTER RA
\uE032>ਲ; # LETTER LA
\uE033>ਲ਼; # LETTER LLA
\uE034>ਲ਼; # REMAP (indicExceptions.txt): ਴>ਲ਼ = LETTER LLLA>LETTER LLA
\uE035>ਵ; # LETTER VA
\uE036>ਸ਼; # LETTER SHA
\uE037>ਸ਼; # REMAP (indicExceptions.txt): ਷>ਸ਼ = LETTER SSA>LETTER SHA
\uE038>ਸ; # LETTER SA
\uE039>ਹ; # LETTER HA
\uE03C>਼; # SIGN NUKTA
# \uE03D>; // UNMAPPED InterIndic-Gurmukhi: SIGN AVAGRAHA
\uE03E>ਾ; # VOWEL SIGN AA
\uE03F>ਿ; # VOWEL SIGN I
\uE040>ੀ; # VOWEL SIGN II
\uE041>ੁ; # VOWEL SIGN U
\uE042>ੂ; # VOWEL SIGN UU
# \uE043>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC R
# \uE044>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN VOCALIC RR
\uE045>ੈ; # REMAP (indicExceptions.txt): ੅>ੈ = VOWEL SIGN CANDRA E>VOWEL SIGN AI
\uE047>ੇ; # VOWEL SIGN EE
\uE048>ੈ; # VOWEL SIGN AI
\uE049>ੌ; # REMAP (indicExceptions.txt): ੉>ੌ = VOWEL SIGN CANDRA O>VOWEL SIGN AU
\uE04B>ੋ; # VOWEL SIGN OO
\uE04C>ੌ; # VOWEL SIGN AU
\uE04D>੍; # SIGN VIRAMA
# \uE050>; // UNMAPPED InterIndic-Gurmukhi: OM
# \uE055>; // UNMAPPED InterIndic-Gurmukhi: LENGTH MARK
\uE056>ੈ; # REMAP (indicExceptions.txt): ੖>ੈ = AI LENGTH MARK>VOWEL SIGN AI
\uE057>ੌ; # REMAP (indicExceptions.txt): ੗>ੌ = AU LENGTH MARK>VOWEL SIGN AU
\uE059>ਖ਼; # LETTER KHHA
\uE05A>ਗ਼; # LETTER GHHA
\uE05B>ਜ਼; # LETTER ZA
\uE05D>ਢ਼; # REMAP (indicExceptions.txt): ੝>ਢ਼ = LETTER RHA>LETTER DDHA.SIGN NUKTA
\uE05E>ਫ਼; # LETTER FA
\uE05F>ਯ; # REMAP (indicExceptions.txt): ੟>ਯ = LETTER YYA>LETTER YA
\uE060>ਰਿ; # REMAP (indicExceptions.txt): ੠>ਰਿ = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
\uE061>ਈ਼; # REMAP (indicExceptions.txt): ੡>ਈ਼ = LETTER VOCALIC LL>LETTER II.SIGN NUKTA
\uE062>ਿ਼; # REMAP (indicExceptions.txt): ੢>ਿ਼ = VOWEL SIGN VOCALIC L>VOWEL SIGN I.SIGN NUKTA
\uE063>ੀ਼; # REMAP (indicExceptions.txt): ੣>ੀ਼ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II.SIGN NUKTA
\uE066>; # DIGIT ZERO
\uE067>; # DIGIT ONE
\uE068>੨; # DIGIT TWO
\uE069>੩; # DIGIT THREE
\uE06A>; # DIGIT FOUR
\uE06B>੫; # DIGIT FIVE
\uE06C>੬; # DIGIT SIX
\uE06D>੭; # DIGIT SEVEN
\uE06E>੮; # DIGIT EIGHT
\uE06F>੯; # DIGIT NINE
# \uE080>; // UNMAPPED InterIndic-Gurmukhi: ISSHAR
# \uE081>; // UNMAPPED InterIndic-Gurmukhi: LETTER E
# \uE082>; // UNMAPPED InterIndic-Gurmukhi: LETTER O (ਂ = SIGN BINDI)
\uE083>ੜ; # LETTER RRA
# \uE084>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN E
# \uE085>; // UNMAPPED InterIndic-Gurmukhi: VOWEL SIGN O (ਅ = LETTER A)
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:00 2001
#--------------------------------------------------------------------
# InterIndic-Kannada
\uE001>; # REMAP (indicExceptions.txt): ಁ> = SIGN CANDRABINDU>SIGN ANUSVARA
\uE002>; # SIGN ANUSVARA
\uE003>ಃ; # SIGN VISARGA
\uE005>ಅ; # LETTER A
\uE006>ಆ; # LETTER AA
\uE007>ಇ; # LETTER I
\uE008>ಈ; # LETTER II
\uE009>ಉ; # LETTER U
\uE00A>ಊ; # LETTER UU
\uE00B>ಋ; # LETTER VOCALIC R
\uE00C>ಌ; # LETTER VOCALIC L
\uE00F>ಏ; # LETTER EE
\uE010>ಐ; # LETTER AI
\uE013>ಓ; # LETTER OO
\uE014>ಔ; # LETTER AU
\uE015>ಕ; # LETTER KA
\uE016>ಖ; # LETTER KHA
\uE017>ಗ; # LETTER GA
\uE018>ಘ; # LETTER GHA
\uE019>ಙ; # LETTER NGA
\uE01A>ಚ; # LETTER CA
\uE01B>ಛ; # LETTER CHA
\uE01C>ಜ; # LETTER JA
\uE01D>ಝ; # LETTER JHA
\uE01E>ಞ; # LETTER NYA
\uE01F>ಟ; # LETTER TTA
\uE020>ಠ; # LETTER TTHA
\uE021>ಡ; # LETTER DDA
\uE022>ಢ; # LETTER DDHA
\uE023>ಣ; # LETTER NNA
\uE024>ತ; # LETTER TA
\uE025>ಥ; # LETTER THA
\uE026>ದ; # LETTER DA
\uE027>ಧ; # LETTER DHA
\uE028>ನ; # LETTER NA
\uE029>ನ; # REMAP (indicExceptions.txt): ಩>ನ = LETTER NNNA>LETTER NA
\uE02A>ಪ; # LETTER PA
\uE02B>ಫ; # LETTER PHA
\uE02C>ಬ; # LETTER BA
\uE02D>ಭ; # LETTER BHA
\uE02E>ಮ; # LETTER MA
\uE02F>ಯ; # LETTER YA
\uE030>ರ; # LETTER RA
\uE032>ಲ; # LETTER LA
\uE033>ಳ; # LETTER LLA
\uE034>ಳ; # REMAP (indicExceptions.txt): ಴>ಳ = LETTER LLLA>LETTER LLA
\uE035>ವ; # LETTER VA
\uE036>ಶ; # LETTER SHA
\uE037>ಷ; # LETTER SSA
\uE038>ಸ; # LETTER SA
\uE039>ಹ; # LETTER HA
# \uE03C>; // UNMAPPED InterIndic-Kannada: SIGN NUKTA
# \uE03D>; // UNMAPPED InterIndic-Kannada: SIGN AVAGRAHA
\uE03E>ಾ; # VOWEL SIGN AA
\uE03F>ಿ; # VOWEL SIGN I
\uE040>ೀ; # VOWEL SIGN II
\uE041>ು; # VOWEL SIGN U
\uE042>ೂ; # VOWEL SIGN UU
\uE043>ೃ; # VOWEL SIGN VOCALIC R
\uE044>ೄ; # VOWEL SIGN VOCALIC RR
\uE045>ೆ; # REMAP (indicExceptions.txt): ೅>ೆ = VOWEL SIGN CANDRA E>VOWEL SIGN E
\uE047>ೇ; # VOWEL SIGN EE
\uE048>ೈ; # VOWEL SIGN AI
\uE049>ೊ; # REMAP (indicExceptions.txt): ೉>ೊ = VOWEL SIGN CANDRA O>VOWEL SIGN O
\uE04B>ೋ; # VOWEL SIGN OO
\uE04C>ೌ; # VOWEL SIGN AU
\uE04D>್; # SIGN VIRAMA
\uE050>ಓಂ; # REMAP (indicExceptions.txt): ೐>ಓಂ = OM>LETTER OO.SIGN ANUSVARA
\uE055>ೕ; # LENGTH MARK
\uE056>ೖ; # AI LENGTH MARK
\uE057>ೌ; # REMAP (indicExceptions.txt): ೗>ೌ = AU LENGTH MARK>VOWEL SIGN AU
\uE059>ಖ; # REMAP (indicExceptions.txt): ೙>ಖ = LETTER KHHA>LETTER KHA
\uE05A>ಗ; # REMAP (indicExceptions.txt): ೚>ಗ = LETTER GHHA>LETTER GA
\uE05B>ಜ; # REMAP (indicExceptions.txt): ೛>ಜ = LETTER ZA>LETTER JA
\uE05D>ಢ; # REMAP (indicExceptions.txt): ೝ>ಢ = LETTER RHA>LETTER DDHA
\uE05E>ೞ; # LETTER FA
\uE05F>ಯ; # REMAP (indicExceptions.txt): ೟>ಯ = LETTER YYA>LETTER YA
\uE060>ೠ; # LETTER VOCALIC RR
\uE061>ೡ; # LETTER VOCALIC LL
\uE062>ಿ; # REMAP (indicExceptions.txt): ೢ>ಿ = VOWEL SIGN VOCALIC L>VOWEL SIGN I
\uE063>ೀ; # REMAP (indicExceptions.txt): ೣ>ೀ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
\uE066>; # DIGIT ZERO
\uE067>೧; # DIGIT ONE
\uE068>೨; # DIGIT TWO
\uE069>೩; # DIGIT THREE
\uE06A>೪; # DIGIT FOUR
\uE06B>೫; # DIGIT FIVE
\uE06C>೬; # DIGIT SIX
\uE06D>೭; # DIGIT SEVEN
\uE06E>೮; # DIGIT EIGHT
\uE06F>೯; # DIGIT NINE
# \uE080>; // UNMAPPED InterIndic-Kannada: ISSHAR
\uE081>ಎ; # LETTER E
\uE082>ಒ; # LETTER O
\uE083>ಱ; # LETTER RRA
\uE084>ೆ; # VOWEL SIGN E
\uE085>ೊ; # VOWEL SIGN O
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:00 2001
#--------------------------------------------------------------------
# InterIndic-Malayalam
\uE001>; # REMAP (indicExceptions.txt): ഁ> = SIGN CANDRABINDU>SIGN ANUSVARA
\uE002>; # SIGN ANUSVARA
\uE003>ഃ; # SIGN VISARGA
\uE005>അ; # LETTER A
\uE006>ആ; # LETTER AA
\uE007>ഇ; # LETTER I
\uE008>ഈ; # LETTER II
\uE009>ഉ; # LETTER U
\uE00A>ഊ; # LETTER UU
\uE00B>ഋ; # LETTER VOCALIC R
\uE00C>ഌ; # LETTER VOCALIC L
\uE00F>ഏ; # LETTER EE
\uE010>ഐ; # LETTER AI
\uE013>ഓ; # LETTER OO
\uE014>ഔ; # LETTER AU
\uE015>ക; # LETTER KA
\uE016>ഖ; # LETTER KHA
\uE017>ഗ; # LETTER GA
\uE018>ഘ; # LETTER GHA
\uE019>ങ; # LETTER NGA
\uE01A>ച; # LETTER CA
\uE01B>ഛ; # LETTER CHA
\uE01C>ജ; # LETTER JA
\uE01D>ഝ; # LETTER JHA
\uE01E>ഞ; # LETTER NYA
\uE01F>ട; # LETTER TTA
\uE020>; # LETTER TTHA
\uE021>ഡ; # LETTER DDA
\uE022>ഢ; # LETTER DDHA
\uE023>ണ; # LETTER NNA
\uE024>ത; # LETTER TA
\uE025>ഥ; # LETTER THA
\uE026>ദ; # LETTER DA
\uE027>ധ; # LETTER DHA
\uE028>ന; # LETTER NA
\uE029>ന; # REMAP (indicExceptions.txt): ഩ>ന = LETTER NNNA>LETTER NA
\uE02A>പ; # LETTER PA
\uE02B>ഫ; # LETTER PHA
\uE02C>ബ; # LETTER BA
\uE02D>ഭ; # LETTER BHA
\uE02E>മ; # LETTER MA
\uE02F>യ; # LETTER YA
\uE030>ര; # LETTER RA
\uE032>ല; # LETTER LA
\uE033>ള; # LETTER LLA
\uE034>ഴ; # LETTER LLLA
\uE035>വ; # LETTER VA
\uE036>ശ; # LETTER SHA
\uE037>ഷ; # LETTER SSA
\uE038>സ; # LETTER SA
\uE039>ഹ; # LETTER HA
# \uE03C>; // UNMAPPED InterIndic-Malayalam: SIGN NUKTA
# \uE03D>; // UNMAPPED InterIndic-Malayalam: SIGN AVAGRAHA
\uE03E>ാ; # VOWEL SIGN AA
\uE03F>ി; # VOWEL SIGN I
\uE040>ീ; # VOWEL SIGN II
\uE041>ു; # VOWEL SIGN U
\uE042>ൂ; # VOWEL SIGN UU
\uE043>ൃ; # VOWEL SIGN VOCALIC R
# \uE044>; // UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC RR
\uE045>ാ; # REMAP (indicExceptions.txt): ൅>ാ = VOWEL SIGN CANDRA E>VOWEL SIGN AA
\uE047>േ; # VOWEL SIGN EE
\uE048>ൈ; # VOWEL SIGN AI
\uE049>ോ; # REMAP (indicExceptions.txt): ൉>ോ = VOWEL SIGN CANDRA O>VOWEL SIGN OO
\uE04B>ോ; # VOWEL SIGN OO
\uE04C>ൌ; # VOWEL SIGN AU
\uE04D>്; # SIGN VIRAMA
# \uE050>; // UNMAPPED InterIndic-Malayalam: OM
# \uE055>; // UNMAPPED InterIndic-Malayalam: LENGTH MARK
\uE056>ൈ; # REMAP (indicExceptions.txt): ൖ>ൈ = AI LENGTH MARK>VOWEL SIGN AI
\uE057>ൗ; # AU LENGTH MARK
\uE059>ഖ; # REMAP (indicExceptions.txt): ൙>ഖ = LETTER KHHA>LETTER KHA
\uE05A>ഗ; # REMAP (indicExceptions.txt): ൚>ഗ = LETTER GHHA>LETTER GA
\uE05B>ജ; # REMAP (indicExceptions.txt): ൛>ജ = LETTER ZA>LETTER JA
\uE05D>ഢ; # REMAP (indicExceptions.txt): ൝>ഢ = LETTER RHA>LETTER DDHA
\uE05E>ഫ; # REMAP (indicExceptions.txt): ൞>ഫ = LETTER FA>LETTER PHA
\uE05F>യ; # REMAP (indicExceptions.txt): ൟ>യ = LETTER YYA>LETTER YA
\uE060>ൠ; # LETTER VOCALIC RR
\uE061>ൡ; # LETTER VOCALIC LL
# \uE062>; // UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC L
# \uE063>; // UNMAPPED InterIndic-Malayalam: VOWEL SIGN VOCALIC LL
\uE066>; # DIGIT ZERO
\uE067>൧; # DIGIT ONE
\uE068>൨; # DIGIT TWO
\uE069>൩; # DIGIT THREE
\uE06A>൪; # DIGIT FOUR
\uE06B>൫; # DIGIT FIVE
\uE06C>൬; # DIGIT SIX
\uE06D>; # DIGIT SEVEN
\uE06E>൮; # DIGIT EIGHT
\uE06F>൯; # DIGIT NINE
# \uE080>; // UNMAPPED InterIndic-Malayalam: ISSHAR
\uE081>എ; # LETTER E
\uE082>ഒ; # LETTER O
\uE083>റ; # LETTER RRA
\uE084>െ; # VOWEL SIGN E
\uE085>ൊ; # VOWEL SIGN O
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:01 2001
#--------------------------------------------------------------------
# InterIndic-Oriya
\uE001>ଁ; # SIGN CANDRABINDU
\uE002>ଂ; # SIGN ANUSVARA
\uE003>; # SIGN VISARGA
\uE005>ଅ; # LETTER A
\uE006>ଆ; # LETTER AA
\uE007>ଇ; # LETTER I
\uE008>ଈ; # LETTER II
\uE009>ଉ; # LETTER U
\uE00A>ଊ; # LETTER UU
\uE00B>ଋ; # LETTER VOCALIC R
\uE00C>ଌ; # LETTER VOCALIC L
# \uE00F>; // UNMAPPED InterIndic-Oriya: LETTER EE (ଏ = LETTER E)
\uE010>ଐ; # LETTER AI
# \uE013>; // UNMAPPED InterIndic-Oriya: LETTER OO (ଓ = LETTER O)
\uE014>ଔ; # LETTER AU
\uE015>କ; # LETTER KA
\uE016>ଖ; # LETTER KHA
\uE017>ଗ; # LETTER GA
\uE018>ଘ; # LETTER GHA
\uE019>ଙ; # LETTER NGA
\uE01A>ଚ; # LETTER CA
\uE01B>ଛ; # LETTER CHA
\uE01C>ଜ; # LETTER JA
\uE01D>ଝ; # LETTER JHA
\uE01E>ଞ; # LETTER NYA
\uE01F>ଟ; # LETTER TTA
\uE020>; # LETTER TTHA
\uE021>ଡ; # LETTER DDA
\uE022>ଢ; # LETTER DDHA
\uE023>ଣ; # LETTER NNA
\uE024>ତ; # LETTER TA
\uE025>ଥ; # LETTER THA
\uE026>ଦ; # LETTER DA
\uE027>ଧ; # LETTER DHA
\uE028>ନ; # LETTER NA
\uE029>ନ; # REMAP (indicExceptions.txt): ଩>ନ = LETTER NNNA>LETTER NA
\uE02A>ପ; # LETTER PA
\uE02B>ଫ; # LETTER PHA
\uE02C>ବ; # LETTER BA
\uE02D>ଭ; # LETTER BHA
\uE02E>ମ; # LETTER MA
\uE02F>ଯ; # LETTER YA
\uE030>ର; # LETTER RA
\uE032>ଲ; # LETTER LA
\uE033>ଳ; # LETTER LLA
\uE034>ଳ; # REMAP (indicExceptions.txt): ଴>ଳ = LETTER LLLA>LETTER LLA
\uE035>ବ; # REMAP (indicExceptions.txt): ଵ>ବ = LETTER VA>LETTER BA
\uE036>ଶ; # LETTER SHA
\uE037>ଷ; # LETTER SSA
\uE038>ସ; # LETTER SA
\uE039>ହ; # LETTER HA
\uE03C>଼; # SIGN NUKTA
\uE03D>ଽ; # SIGN AVAGRAHA
\uE03E>ା; # VOWEL SIGN AA
\uE03F>ି; # VOWEL SIGN I
\uE040>ୀ; # VOWEL SIGN II
\uE041>ୁ; # VOWEL SIGN U
\uE042>ୂ; # VOWEL SIGN UU
\uE043>ୃ; # VOWEL SIGN VOCALIC R
\uE044>ୃ଼; # REMAP (indicExceptions.txt): ୄ>ୃ଼ = VOWEL SIGN VOCALIC RR>VOWEL SIGN VOCALIC R.SIGN NUKTA
\uE045>େ; # REMAP (indicExceptions.txt): ୅>େ = VOWEL SIGN CANDRA E>VOWEL SIGN E
# \uE047>; // UNMAPPED InterIndic-Oriya: VOWEL SIGN EE (େ = VOWEL SIGN E)
\uE048>ୈ; # VOWEL SIGN AI
\uE049>ୋ; # REMAP (indicExceptions.txt): ୉>ୋ = VOWEL SIGN CANDRA O>VOWEL SIGN O
# \uE04B>; // UNMAPPED InterIndic-Oriya: VOWEL SIGN OO (ୋ = VOWEL SIGN O)
\uE04C>ୌ; # VOWEL SIGN AU
\uE04D>୍; # SIGN VIRAMA
\uE050>ଓଁ; # REMAP (indicExceptions.txt): ୐>ଓଁ = OM>LETTER O.SIGN CANDRABINDU
# \uE055>; // UNMAPPED InterIndic-Oriya: LENGTH MARK
\uE056>ୖ; # AI LENGTH MARK
\uE057>ୗ; # AU LENGTH MARK
\uE059>ଖ଼; # REMAP (indicExceptions.txt): ୙>ଖ଼ = LETTER KHHA>LETTER KHA.SIGN NUKTA
\uE05A>ଗ଼; # REMAP (indicExceptions.txt): ୚>ଗ଼ = LETTER GHHA>LETTER GA.SIGN NUKTA
\uE05B>ଜ଼; # REMAP (indicExceptions.txt): ୛>ଜ଼ = LETTER ZA>LETTER JA.SIGN NUKTA
\uE05D>ଢ଼; # LETTER RHA
\uE05E>ଫ଼; # REMAP (indicExceptions.txt): ୞>ଫ଼ = LETTER FA>LETTER PHA.SIGN NUKTA
\uE05F>ୟ; # LETTER YYA
\uE060>ୠ; # LETTER VOCALIC RR
\uE061>ୡ; # LETTER VOCALIC LL
\uE062>ୖ଼; # REMAP (indicExceptions.txt): ୢ>ୖ଼ = VOWEL SIGN VOCALIC L>AI LENGTH MARK.SIGN NUKTA
\uE063>ୗ଼; # REMAP (indicExceptions.txt): ୣ>ୗ଼ = VOWEL SIGN VOCALIC LL>AU LENGTH MARK.SIGN NUKTA
\uE066>; # DIGIT ZERO
\uE067>୧; # DIGIT ONE
\uE068>; # DIGIT TWO
\uE069>୩; # DIGIT THREE
\uE06A>୪; # DIGIT FOUR
\uE06B>୫; # DIGIT FIVE
\uE06C>୬; # DIGIT SIX
\uE06D>୭; # DIGIT SEVEN
\uE06E>୮; # DIGIT EIGHT
\uE06F>୯; # DIGIT NINE
\uE080>୰; # ISSHAR
\uE081>ଏ; # LETTER E
\uE082>ଓ; # LETTER O
\uE083>ଡ଼; # LETTER RRA
\uE084>େ; # VOWEL SIGN E
\uE085>ୋ; # VOWEL SIGN O
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:01 2001
#--------------------------------------------------------------------
# InterIndic-Tamil
# \uE001>; // UNMAPPED InterIndic-Tamil: SIGN CANDRABINDU
\uE002>ஂ; # SIGN ANUSVARA
\uE003>ஃ; # SIGN VISARGA
\uE005>அ; # LETTER A
\uE006>ஆ; # LETTER AA
\uE007>இ; # LETTER I
\uE008>ஈ; # LETTER II
\uE009>உ; # LETTER U
\uE00A>ஊ; # LETTER UU
\uE00B>ரி; # REMAP (indicExceptions.txt): ஋>ரி = LETTER VOCALIC R>LETTER RA.VOWEL SIGN I
\uE00C>இ; # REMAP (indicExceptions.txt): ஌>இ = LETTER VOCALIC L>LETTER I
\uE00F>ஏ; # LETTER EE
\uE010>ஐ; # LETTER AI
\uE013>ஓ; # LETTER OO
\uE014>ஔ; # LETTER AU
\uE015>க; # LETTER KA
\uE016>க; # REMAP (indicExceptions.txt): ஖>க = LETTER KHA>LETTER KA
\uE017>க; # REMAP (indicExceptions.txt): ஗>க = LETTER GA>LETTER KA
\uE018>க; # REMAP (indicExceptions.txt): ஘>க = LETTER GHA>LETTER KA
\uE019>ங; # LETTER NGA
\uE01A>ச; # LETTER CA
\uE01B>ச; # REMAP (indicExceptions.txt): ஛>ச = LETTER CHA>LETTER CA
\uE01C>ஜ; # LETTER JA
\uE01D>ச; # REMAP (indicExceptions.txt): ஝>ச = LETTER JHA>LETTER CA
\uE01E>ஞ; # LETTER NYA
\uE01F>ட; # LETTER TTA
\uE020>ட; # REMAP (indicExceptions.txt): ஠>ட = LETTER TTHA>LETTER TTA
\uE021>ட; # REMAP (indicExceptions.txt): ஡>ட = LETTER DDA>LETTER TTA
\uE022>ட; # REMAP (indicExceptions.txt): ஢>ட = LETTER DDHA>LETTER TTA
\uE023>ண; # LETTER NNA
\uE024>த; # LETTER TA
\uE025>த; # REMAP (indicExceptions.txt): ஥>த = LETTER THA>LETTER TA
\uE026>த; # REMAP (indicExceptions.txt): ஦>த = LETTER DA>LETTER TA
\uE027>த; # REMAP (indicExceptions.txt): ஧>த = LETTER DHA>LETTER TA
\uE028>ந; # LETTER NA
\uE029>ன; # LETTER NNNA
\uE02A>ப; # LETTER PA
\uE02B>ப; # REMAP (indicExceptions.txt): ஫>ப = LETTER PHA>LETTER PA
\uE02C>ப; # REMAP (indicExceptions.txt): ஬>ப = LETTER BA>LETTER PA
\uE02D>ப; # REMAP (indicExceptions.txt): ஭>ப = LETTER BHA>LETTER PA
\uE02E>ம; # LETTER MA
\uE02F>ய; # LETTER YA
\uE030>ர; # LETTER RA
\uE032>ல; # LETTER LA
\uE033>ள; # LETTER LLA
\uE034>ழ; # LETTER LLLA
\uE035>வ; # LETTER VA
\uE036>ஷ; # REMAP (indicExceptions.txt): ஶ>ஷ = LETTER SHA>LETTER SSA
\uE037>ஷ; # LETTER SSA
\uE038>ஸ; # LETTER SA
\uE039>ஹ; # LETTER HA
# \uE03C>; // UNMAPPED InterIndic-Tamil: SIGN NUKTA
# \uE03D>; // UNMAPPED InterIndic-Tamil: SIGN AVAGRAHA
\uE03E>ா; # VOWEL SIGN AA
\uE03F>ி; # VOWEL SIGN I
\uE040>ீ; # VOWEL SIGN II
\uE041>ு; # VOWEL SIGN U
\uE042>ூ; # VOWEL SIGN UU
\uE043>்ரி; # REMAP (indicExceptions.txt): ௃>்ரி = VOWEL SIGN VOCALIC R>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
\uE044>்ரி; # REMAP (indicExceptions.txt): ௄>்ரி = VOWEL SIGN VOCALIC RR>SIGN VIRAMA.LETTER RA.VOWEL SIGN I
\uE045>ா; # REMAP (indicExceptions.txt): ௅>ா = VOWEL SIGN CANDRA E>VOWEL SIGN AA
\uE047>ே; # VOWEL SIGN EE
\uE048>ை; # VOWEL SIGN AI
\uE049>ா; # REMAP (indicExceptions.txt): ௉>ா = VOWEL SIGN CANDRA O>VOWEL SIGN AA
\uE04B>ோ; # VOWEL SIGN OO
\uE04C>ௌ; # VOWEL SIGN AU
\uE04D>்; # SIGN VIRAMA
\uE050>ஓம்; # REMAP (indicExceptions.txt): ௐ>ஓம் = OM>LETTER OO.LETTER MA.SIGN VIRAMA
# \uE055>; // UNMAPPED InterIndic-Tamil: LENGTH MARK
\uE056>ை; # REMAP (indicExceptions.txt): ௖>ை = AI LENGTH MARK>VOWEL SIGN AI
\uE057>ௗ; # AU LENGTH MARK
\uE059>க; # REMAP (indicExceptions.txt): ௙>க = LETTER KHHA>LETTER KA
\uE05A>க; # REMAP (indicExceptions.txt): ௚>க = LETTER GHHA>LETTER KA
\uE05B>ஜ; # REMAP (indicExceptions.txt): ௛>ஜ = LETTER ZA>LETTER JA
\uE05D>ட; # REMAP (indicExceptions.txt): ௝>ட = LETTER RHA>LETTER TTA
\uE05E>ப; # REMAP (indicExceptions.txt): ௞>ப = LETTER FA>LETTER PA
\uE05F>ய; # REMAP (indicExceptions.txt): ௟>ய = LETTER YYA>LETTER YA
\uE060>ரி; # REMAP (indicExceptions.txt): ௠>ரி = LETTER VOCALIC RR>LETTER RA.VOWEL SIGN I
\uE061>ஈ; # REMAP (indicExceptions.txt): ௡>ஈ = LETTER VOCALIC LL>LETTER II
# \uE062>; // UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC L
# \uE063>; // UNMAPPED InterIndic-Tamil: VOWEL SIGN VOCALIC LL
# \uE066>; // UNMAPPED InterIndic-Tamil: DIGIT ZERO
\uE067>௧; # DIGIT ONE
\uE068>௨; # DIGIT TWO
\uE069>௩; # DIGIT THREE
\uE06A>௪; # DIGIT FOUR
\uE06B>௫; # DIGIT FIVE
\uE06C>௬; # DIGIT SIX
\uE06D>௭; # DIGIT SEVEN
\uE06E>௮; # DIGIT EIGHT
\uE06F>௯; # DIGIT NINE
# \uE080>; // UNMAPPED InterIndic-Tamil: ISSHAR
\uE081>எ; # LETTER E
\uE082>ஒ; # LETTER O
\uE083>ற; # LETTER RRA
\uE084>ெ; # VOWEL SIGN E
\uE085>ொ; # VOWEL SIGN O
# eof

View file

@ -0,0 +1,108 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:01 2001
#--------------------------------------------------------------------
# InterIndic-Telugu
\uE001>ఁ; # SIGN CANDRABINDU
\uE002>; # SIGN ANUSVARA
\uE003>ః; # SIGN VISARGA
\uE005>అ; # LETTER A
\uE006>ఆ; # LETTER AA
\uE007>ఇ; # LETTER I
\uE008>ఈ; # LETTER II
\uE009>ఉ; # LETTER U
\uE00A>ఊ; # LETTER UU
\uE00B>ఋ; # LETTER VOCALIC R
\uE00C>ఌ; # LETTER VOCALIC L
\uE00F>ఏ; # LETTER EE
\uE010>ఐ; # LETTER AI
\uE013>ఓ; # LETTER OO
\uE014>ఔ; # LETTER AU
\uE015>క; # LETTER KA
\uE016>ఖ; # LETTER KHA
\uE017>గ; # LETTER GA
\uE018>ఘ; # LETTER GHA
\uE019>ఙ; # LETTER NGA
\uE01A>చ; # LETTER CA
\uE01B>ఛ; # LETTER CHA
\uE01C>జ; # LETTER JA
\uE01D>ఝ; # LETTER JHA
\uE01E>ఞ; # LETTER NYA
\uE01F>ట; # LETTER TTA
\uE020>ఠ; # LETTER TTHA
\uE021>డ; # LETTER DDA
\uE022>ఢ; # LETTER DDHA
\uE023>ణ; # LETTER NNA
\uE024>త; # LETTER TA
\uE025>థ; # LETTER THA
\uE026>ద; # LETTER DA
\uE027>ధ; # LETTER DHA
\uE028>న; # LETTER NA
\uE029>న; # REMAP (indicExceptions.txt): ఩>న = LETTER NNNA>LETTER NA
\uE02A>ప; # LETTER PA
\uE02B>ఫ; # LETTER PHA
\uE02C>బ; # LETTER BA
\uE02D>భ; # LETTER BHA
\uE02E>మ; # LETTER MA
\uE02F>య; # LETTER YA
\uE030>ర; # LETTER RA
\uE032>ల; # LETTER LA
\uE033>ళ; # LETTER LLA
\uE034>ళ; # REMAP (indicExceptions.txt): ఴ>ళ = LETTER LLLA>LETTER LLA
\uE035>వ; # LETTER VA
\uE036>శ; # LETTER SHA
\uE037>ష; # LETTER SSA
\uE038>స; # LETTER SA
\uE039>హ; # LETTER HA
# \uE03C>; // UNMAPPED InterIndic-Telugu: SIGN NUKTA
# \uE03D>; // UNMAPPED InterIndic-Telugu: SIGN AVAGRAHA
\uE03E>ా; # VOWEL SIGN AA
\uE03F>ి; # VOWEL SIGN I
\uE040>ీ; # VOWEL SIGN II
\uE041>ు; # VOWEL SIGN U
\uE042>ూ; # VOWEL SIGN UU
\uE043>ృ; # VOWEL SIGN VOCALIC R
\uE044>ౄ; # VOWEL SIGN VOCALIC RR
\uE045>ె; # REMAP (indicExceptions.txt): ౅>ె = VOWEL SIGN CANDRA E>VOWEL SIGN E
\uE047>ే; # VOWEL SIGN EE
\uE048>ై; # VOWEL SIGN AI
\uE049>ొ; # REMAP (indicExceptions.txt): ౉>ొ = VOWEL SIGN CANDRA O>VOWEL SIGN O
\uE04B>ో; # VOWEL SIGN OO
\uE04C>ౌ; # VOWEL SIGN AU
\uE04D>్; # SIGN VIRAMA
\uE050>ఓం; # REMAP (indicExceptions.txt): ౐>ఓం = OM>LETTER OO.SIGN ANUSVARA
\uE055>ౕ; # LENGTH MARK
\uE056>ౖ; # AI LENGTH MARK
\uE057>ౌ; # REMAP (indicExceptions.txt): ౗>ౌ = AU LENGTH MARK>VOWEL SIGN AU
\uE059>ఖ; # REMAP (indicExceptions.txt): ౙ>ఖ = LETTER KHHA>LETTER KHA
\uE05A>గ; # REMAP (indicExceptions.txt): ౚ>గ = LETTER GHHA>LETTER GA
\uE05B>జ; # REMAP (indicExceptions.txt): ౛>జ = LETTER ZA>LETTER JA
\uE05D>ఢ; # REMAP (indicExceptions.txt): ౝ>ఢ = LETTER RHA>LETTER DDHA
\uE05E>ఫ; # REMAP (indicExceptions.txt): ౞>ఫ = LETTER FA>LETTER PHA
\uE05F>య; # REMAP (indicExceptions.txt): ౟>య = LETTER YYA>LETTER YA
\uE060>ౠ; # LETTER VOCALIC RR
\uE061>ౡ; # LETTER VOCALIC LL
\uE062>ి; # REMAP (indicExceptions.txt): ౢ>ి = VOWEL SIGN VOCALIC L>VOWEL SIGN I
\uE063>ీ; # REMAP (indicExceptions.txt): ౣ>ీ = VOWEL SIGN VOCALIC LL>VOWEL SIGN II
\uE066>; # DIGIT ZERO
\uE067>౧; # DIGIT ONE
\uE068>౨; # DIGIT TWO
\uE069>౩; # DIGIT THREE
\uE06A>౪; # DIGIT FOUR
\uE06B>౫; # DIGIT FIVE
\uE06C>౬; # DIGIT SIX
\uE06D>౭; # DIGIT SEVEN
\uE06E>౮; # DIGIT EIGHT
\uE06F>౯; # DIGIT NINE
# \uE080>; // UNMAPPED InterIndic-Telugu: ISSHAR
\uE081>ఎ; # LETTER E
\uE082>ఒ; # LETTER O
\uE083>ఱ; # LETTER RRA
\uE084>ె; # VOWEL SIGN E
\uE085>ొ; # VOWEL SIGN O
# eof

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:05 2001
#--------------------------------------------------------------------
# Kannada-InterIndic
>\uE002; # SIGN ANUSVARA
ಃ>\uE003; # SIGN VISARGA
ಅ>\uE005; # LETTER A
ಆ>\uE006; # LETTER AA
ಇ>\uE007; # LETTER I
ಈ>\uE008; # LETTER II
ಉ>\uE009; # LETTER U
ಊ>\uE00A; # LETTER UU
ಋ>\uE00B; # LETTER VOCALIC R
ಌ>\uE00C; # LETTER VOCALIC L
ಎ>\uE081; # LETTER E
ಏ>\uE00F; # LETTER EE
ಐ>\uE010; # LETTER AI
ಒ>\uE082; # LETTER O
ಓ>\uE013; # LETTER OO
ಔ>\uE014; # LETTER AU
ಕ>\uE015; # LETTER KA
ಖ>\uE016; # LETTER KHA
ಗ>\uE017; # LETTER GA
ಘ>\uE018; # LETTER GHA
ಙ>\uE019; # LETTER NGA
ಚ>\uE01A; # LETTER CA
ಛ>\uE01B; # LETTER CHA
ಜ>\uE01C; # LETTER JA
ಝ>\uE01D; # LETTER JHA
ಞ>\uE01E; # LETTER NYA
ಟ>\uE01F; # LETTER TTA
ಠ>\uE020; # LETTER TTHA
ಡ>\uE021; # LETTER DDA
ಢ>\uE022; # LETTER DDHA
ಣ>\uE023; # LETTER NNA
ತ>\uE024; # LETTER TA
ಥ>\uE025; # LETTER THA
ದ>\uE026; # LETTER DA
ಧ>\uE027; # LETTER DHA
ನ>\uE028; # LETTER NA
ಪ>\uE02A; # LETTER PA
ಫ>\uE02B; # LETTER PHA
ಬ>\uE02C; # LETTER BA
ಭ>\uE02D; # LETTER BHA
ಮ>\uE02E; # LETTER MA
ಯ>\uE02F; # LETTER YA
ರ>\uE030; # LETTER RA
ಱ>\uE083; # LETTER RRA
ಲ>\uE032; # LETTER LA
ಳ>\uE033; # LETTER LLA
ವ>\uE035; # LETTER VA
ಶ>\uE036; # LETTER SHA
ಷ>\uE037; # LETTER SSA
ಸ>\uE038; # LETTER SA
ಹ>\uE039; # LETTER HA
ಾ>\uE03E; # VOWEL SIGN AA
ಿ>\uE03F; # VOWEL SIGN I
ೀ>\uE040; # VOWEL SIGN II
ು>\uE041; # VOWEL SIGN U
ೂ>\uE042; # VOWEL SIGN UU
ೃ>\uE043; # VOWEL SIGN VOCALIC R
ೄ>\uE044; # VOWEL SIGN VOCALIC RR
ೆ>\uE084; # VOWEL SIGN E
ೇ>\uE047; # VOWEL SIGN EE
ೈ>\uE048; # VOWEL SIGN AI
ೊ>\uE085; # VOWEL SIGN O
ೋ>\uE04B; # VOWEL SIGN OO
ೌ>\uE04C; # VOWEL SIGN AU
್>\uE04D; # SIGN VIRAMA
ೕ>\uE055; # LENGTH MARK
ೖ>\uE056; # AI LENGTH MARK
ೞ>\uE05E; # LETTER FA
ೠ>\uE060; # LETTER VOCALIC RR
ೡ>\uE061; # LETTER VOCALIC LL
>\uE066; # DIGIT ZERO
೧>\uE067; # DIGIT ONE
೨>\uE068; # DIGIT TWO
೩>\uE069; # DIGIT THREE
೪>\uE06A; # DIGIT FOUR
೫>\uE06B; # DIGIT FIVE
೬>\uE06C; # DIGIT SIX
೭>\uE06D; # DIGIT SEVEN
೮>\uE06E; # DIGIT EIGHT
೯>\uE06F; # DIGIT NINE
# eof

View file

@ -0,0 +1,125 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:05 2001
#--------------------------------------------------------------------
# KeyboardEscape-Latin1
### $esc='';
### $grave='`';
### $acute='';
### $hat='^';
### $tilde='~';
### $umlaut=':';
### $ring='.';
### $cedilla=',';
### $slash='/';
### $super='^';
# Make keyboard entry of {esc} possible
# and of backslash
'\\'''>'';
'\\\\'>'\\';
# Long keys
cur''>¤;
sec''>§;
not''>¬;
mul''>×;
div''>÷;
\\ ''> ; # non-breaking space
'!'''>¡; # inverted exclamation
c'/'''>¢; # cent sign
lb''>£; # pound sign
'|'''>¦; # broken vertical bar
':'''>¨; # umlaut
'^' a''>ª; # feminine ordinal
'<<'''>«;
r''>®;
'--'''>¯;
'-'''>­;
'+-'''>±;
'^' 2''>²;
'^' 3''>³;
''''>´;
m''>µ;
para''>¶;
dot''>·;
','''>¸;
'^' 1''>¹;
'^' o''>º; # masculine ordinal
'>>'''>»;
'1/4'''>¼;
'1/2'''>½;
'3/4'''>¾;
'?'''>¿;
A'`'''>À;
A''''>Á;
A'^'''>Â;
A'~'''>Ã;
A':'''>Ä;
A'.'''>Å;
AE''>Æ;
C','''>Ç;
E'`'''>È;
E''''>É;
E'^'''>Ê;
E':'''>Ë;
I'`'''>Ì;
I''''>Í;
I'^'''>Î;
I':'''>Ï;
'D-'''>Ð;
N'~'''>Ñ;
O'`'''>Ò;
O''''>Ó;
O'^'''>Ô;
O'~'''>Õ;
O':'''>Ö;
O'/'''>Ø;
U'`'''>Ù;
U''''>Ú;
U'^'''>Û;
U':'''>Ü;
Y''''>Ý;
TH''>Þ;
ss''>ß;
a'`'''>à;
a''''>á;
a'^'''>â;
a'~'''>ã;
a':'''>ä;
a'.'''>å;
ae''>æ;
c','''>ç;
c''>©; # copyright - after c{cedilla}
e'`'''>è;
e''''>é;
e'^'''>ê;
e':'''>ë;
i'`'''>ì;
i''''>í;
i'^'''>î;
i':'''>ï;
'd-'''>ð;
n'~'''>ñ;
o'`'''>ò;
o''''>ó;
o'^'''>ô;
o'~'''>õ;
o':'''>ö;
o'/'''>ø;
o''>°;
u'`'''>ù;
u''''>ú;
u'^'''>û;
u':'''>ü;
y''''>ý;
y''>¥; # yen sign
th''>þ;
#masked: + "ss''>ÿ;"
# eof

View file

@ -0,0 +1,189 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 2001
#--------------------------------------------------------------------
# To Do: finish adding shadda, add sokoon
# [Is this done? Can someone finish this?]
### $alefmadda=آ;
### $alefuhamza=أ;
### $wauuhamza=ؤ;
### $alefhamza=إ;
### $yehuhamza=ئ;
### $alef=ا;
### $beh=ب;
### $tehmarbuta=ة;
### $teh=ت;
### $theh=ث;
### $geem=ج;
### $hah=ح;
### $kha=خ;
### $dal=د;
### $dhal=ذ;
### $reh=ر;
### $zain=ز;
### $seen=س;
### $sheen=ش;
### $sad=ص;
### $dad=ض;
### $tah=ط;
### $zah=ظ;
### $ein=ع;
### $ghein=غ;
### $feh=ف;
### $qaaf=ق;
### $kaf=ك;
### $lam=ل;
### $meem=م;
### $noon=ن;
### $heh=ه;
### $wau=و;
### $yehmaqsura=ى;
### $yeh=ي;
### $peh=ڤ;
### $hamza=ء;
### $fathatein=ً;
### $dammatein=ٌ;
### $kasratein=ٍ;
### $fatha=َ;
### $damma=ُ;
### $kasra=ِ;
### $shadda=ّ;
### $sokoon=ْ;
# Doubles - liu
t'' < {ت} [تثةط];
h'' < {ه} [هح];
s'' < {س} ش;
# A few pathological special cases to make round
# trip work. - liu
d'~'d <> د د;
dh'~'dh <> ذ ذ;
dd'~'dd <> ض ض;
# Shadda: Map x ّ to x x, where x is dh, dd, or
# d (that is, ذ, ض, or د). If x is d, d'd is
# output. Net effect is to map s.th. like ض ّ
# to dd'dd. - liu
ذ {dh} <> dh {ّ};
ض {dd} <> dd {ّ};
د {''d} [^dh] <> d {ّ}; # Avoid d'dd or d'dh
# [This should be removed, but it's good for demos]
Arabic> \
تتمتع' ' \
اللغة' ' \
العرببية' ' \
ببنظم' ' \
كتاببية' ' \
جميلة;
# Main rules
''ai<a{آ;
ai<>آ;
''ae<a{أ;
ae<>أ;
''ao<a{إ;
ao<>إ;
''aa<a{ا;
aa<>ا;
''an<a{ً;
an<>ً;
''a<a{َ;
a<>َ;
b<>ب;
''dh<d{ذ;
dh<>ذ;
''dd<d{ض;
dd<>ض;
''d<d{د;
d<>د;
''e<a{ع;
''e<w{ع;
''e<y{ع;
e<>ع;
f<>ف;
gh<>غ;
''hh<d{ح;
''hh<t{ح;
''hh<k{ح;
''hh<s{ح;
hh<>ح;
''h<d{ه;
''h<t{ه;
''h<k{ه;
''h<s{ه;
h<>ه;
''ii<i{ٍ;
ii<>ٍ;
''i<i{ِ;
i<>ِ;
j<>ج;
kh<>خ;
k<>ك;
l<>ل;
''m<y{م;
''m<t{م;
m<>م;
n<>ن;
''o<a{ء;
o<>ء;
p<>ڤ;
q<>ق;
r<>ر;
sh<>ش;
''ss<s{ص;
ss<>ص;
''s<s{س;
s<>س;
th<>ث;
tm<>ة;
''tt<t{ط;
tt<>ط;
''t<t{ت;
t<>ت;
''uu<u{ٌ;
uu<>ٌ;
''u<u{ُ;
u<>ُ;
we<>ؤ;
w<>و;
ye<>ئ;
ym<>ى;
''y<y{ي;
y<>ي;
''zz<z{ظ;
zz<>ظ;
''z<z{ز;
z<>ز;
# One-way Latin-Arabic compatability rules
c>ك;
g>ج;
x>كّس;
v>ب;
# Digits
0<>٠; # Arabic digit 0
1<>١; # Arabic digit 1
2<>٢; # Arabic digit 2
3<>٣; # Arabic digit 3
4<>٤; # Arabic digit 4
5<>٥; # Arabic digit 5
6<>٦; # Arabic digit 6
7<>٧; # Arabic digit 7
8<>٨; # Arabic digit 8
9<>٩; # Arabic digit 9
'%'<>٪; # Arabic %
'.'<>٫; # Arabic decimal separator
','<>٬; # Arabic thousands separator
'*'<>٭; # Arabic five-pointed star
''>;
# eof

View file

@ -0,0 +1,305 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:05 2001
#--------------------------------------------------------------------
# Latin-Cyrillic
# These rules provide general Latin-Cyrillic
# transliteration. The standard Russian transliterations
# are generally used for the letters from Russian,
# with additional Cyrillic characters given consistent
# mappings.
### $S_hacek=Š;
### $s_hacek=š;
### $YO=Ё;
### $J=Ј;
### $A=А;
### $B=Б;
### $V=В;
### $G=Г;
### $D=Д;
### $YE=Е;
### $ZH=Ж;
### $Z=З;
### $YI=И;
### $Y=Й;
### $K=К;
### $L=Л;
### $M=М;
### $N=Н;
### $O=О;
### $P=П;
### $R=Р;
### $S=С;
### $T=Т;
### $U=У;
### $F=Ф;
### $KH=Х;
### $TS=Ц;
### $CH=Ч;
### $SH=Ш;
### $SHCH=Щ;
### $HARD=Ъ;
### $I=Ы;
### $SOFT=Ь;
### $E=Э;
### $YU=Ю;
### $YA=Я;
# Lowercase
### $a=а;
### $b=б;
### $v=в;
### $g=г;
### $d=д;
### $ye=е;
### $zh=ж;
### $z=з;
### $yi=и;
### $y=й;
### $k=к;
### $l=л;
### $m=м;
### $n=н;
### $o=о;
### $p=п;
### $r=р;
### $s=с;
### $t=т;
### $u=у;
### $f=ф;
### $kh=х;
### $ts=ц;
### $ch=ч;
### $sh=ш;
### $shch=щ;
### $hard=ъ;
### $i=ы;
### $soft=ь;
### $e=э;
### $yu=ю;
### $ya=я;
### $yo=ё;
### $j=ј;
# variables
# some are duplicated so lowercasing works
$csoft=[eiyEIY];
$CSOFT=[eiyEIY];
$BECOMES_H=[Ъъ];
$becomes_h=[Ъъ];
$BECOMES_S=[Сс];
$becomes_s=[Сс];
$BECOMES_C=[Чч];
$becomes_c=[Чч];
$BECOMES_VOWEL=[АЭЫОУаэыоу];
$becomes_vowel=[АЭЫОУаэыоу];
$letter=[[:Lu:][:Ll:]];
$lower=[[:Ll:]];
# Modified to combine display transliterator and typing transliterator.
# The display mapping uses accents for the "soft" vowels.
# It does not, although it could, use characters like š instead of digraphs
# like sh.
# #############################################
# Special titlecase forms, not duplicated
# #############################################
Sh''ch<>Шч; # LIU Distinguish Шч from Щ
Ch <> {Ч} $lower;
Kh <> {Х} $lower;
Shch <> {Щ}$lower;
Sh <> {Ш} $lower;
Ts <> {Ц} $lower;
Zh <> {Ж} $lower;
Yi>И;
Ye>Е;
Yo>Ё;
Yu>Ю;
Ya>Я;
# #############################################
# Rules to Duplicate
# To get the lowercase versions, copy these and lowercase
# #############################################
# variant spellings in English
SHTCH>Щ;
TCH>Ч;
TH>З;
Q>К;
WH>В;
W>В;
X>КС; #+ "X<КС;"
# Separate letters that would otherwise join
SH''<Ш}$BECOMES_C;
T''<Т}$BECOMES_S;
T''<Т}[ЧЩщ]; # LIU add special cases
K''<К}$BECOMES_H;
S''<С}$BECOMES_H;
T''<Т}$BECOMES_H;
Z''<З}$BECOMES_H;
Y''<Й}$BECOMES_VOWEL;
# Main letters
A<>А;
B<>Б;
CH<>Ч;
D<>Д;
E<>Э;
F<>Ф;
G<>Г;
Ì<>И;
I<>Ы;
KH<>Х;
K<>К;
L<>Л;
M<>М;
N<>Н;
O<>О;
P<>П;
R<>Р;
SHCH<>Щ;
SH>Ш; #+ "SH<Ш;"
Š<>Ш;
S<>С;
TS<>Ц;
T<>Т;
U<>У;
V<>В;
#ÌÀÈÒÙ
YE>Е; #+ "YE<Е;"
È<>Е;
YO>Ё; #+ "YO<Ё;"
Ò<>Ё;
YU>Ю; #+ "YU<Ю;"
Ù<>Ю;
YA>Я; #+ "YA<Я;"
À<>Я;
Y<>Й;
ZH<>Ж;
Z<>З;
H<>Ъ;
Ÿ<>Ь;
# Non-russian
J<>Ј;
# variant spellings in English
C}$csoft>С;
C>К;
# #############################################
# Duplicated Rules
# Copy and lowercase the above rules
# #############################################
# variant spellings in english
shtch>щ;
tch>ч;
th>з;
q>к;
wh>в;
w>в;
x>кс; #+ "x<кс;"
# separate letters that would otherwise join
sh''<ш}$becomes_c;
t''<т}$becomes_s;
t''<т}[чщ]; # LIU add special cases
k''<к}$becomes_h;
s''<с}$becomes_h;
t''<т}$becomes_h;
z''<з}$becomes_h;
y''<й}$becomes_vowel;
# main letters
a<>а;
b<>б;
ch<>ч;
d<>д;
e<>э;
f<>ф;
g<>г;
ì<>и;
i<>ы;
kh<>х;
k<>к;
l<>л;
m<>м;
n<>н;
o<>о;
p<>п;
r<>р;
shch<>щ;
sh>ш; #+ "sh<ш;"
š<>ш;
s<>с;
ts<>ц;
t<>т;
u<>у;
v<>в;
#ìàèòù
ye>е; #+ "ye<е;"
è<>е;
yo>ё; #+ "yo<ё;"
ò<>ё;
yu>ю; #+ "yu<ю;"
ù<>ю;
ya>я; #+ "ya<я;"
à<>я;
y<>й;
zh<>ж;
z<>з;
h<>ъ;
ÿ<>ь;
# non-russian
j<>ј;
# variant spellings in english
c}$csoft>с;
c>к;
# #############################################
# End of Duplicated Rules
# #############################################
#generally the last rule
''>;
# eof

View file

@ -0,0 +1,375 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:18:45 2001
#--------------------------------------------------------------------
# Latin-Greek
# ==============================================
# Modern Greek Transliteration Rules
#
# This transliterates modern Greek characters, but using rules
# that are traditional for Ancient Greek, and
# thus more resemble Greek words that have become part
# of English. It differs from the official Greek
# transliteration, which is more phonetic (since
# most modern Greek vowels, for example, have
# degenerated simply to sound like "ee").
#
# There are only a few tricky parts.
# 1. eta and omega don't map directly to Latin vowels,
# so we use a macron on e and o, and some
# other combinations if they are accented.
# 2. The accented, diaeresis i and y are substituted too.
# 3. Some letters use digraphs, like "ph". While typical,
# they need some special handling.
# 4. A gamma before a gamma or a few other letters is
# transliterated as an "n", as in "Anglo"
# 5. An ypsilon after a vowel is a "u", as in
# "Mouseio". Otherwise it is a "y" as in "Physikon"
# 6. The construction of the rules is made simpler by making sure
# that most rules for lowercase letters exactly correspond to the
# rules for uppercase letters, *except* for the case of the letters
# in the rule itself. That way, after modifying the uppercase rules,
# you can just copy, paste, and "set to lowercase" to get
# the rules for lowercase letters!
# ==============================================
# ==============================================
# Variables, used to make the rules more comprehensible
# and for conditionals.
# ==============================================
### $quote='\"';
# Latin Letters
### $E_MACRON=Ē;
### $e_macron=ē;
### $O_MACRON=Ō;
### $o_macron=ō;
### $Y_UMLAUT=Ÿ;
### $y_umlaut=ÿ;
#! // with real accents.
#! + "$E_MACRON_ACUTE=Ḗ;"
#! + "$e_macron_acute=ḗ;"
#! + "$O_MACRON_ACUTE=Ṓ;"
#! + "$o_macron_acute=ṓ;"
#! + "$y_umlaut_acute=ÿ́;"
#! + "$u00ef_acute=ḯ;"
#! + "$u00fc_acute=ǘ;"
#! //
# single letter equivalents
### $E_MACRON_ACUTE=Ê;
### $e_macron_acute=ê;
### $O_MACRON_ACUTE=Ô;
### $o_macron_acute=ô;
### $y_umlaut_acute=ŷ;
### $u00ef_acute=î;
### $u00fc_acute=û;
# Greek Letters
### $ALPHA=Α;
### $BETA=Β;
### $GAMMA=Γ;
### $DELTA=Δ;
### $EPSILON=Ε;
### $ZETA=Ζ;
### $ETA=Η;
### $THETA=Θ;
### $IOTA=Ι;
### $KAPPA=Κ;
### $LAMBDA=Λ;
### $MU=Μ;
### $NU=Ν;
### $XI=Ξ;
### $OMICRON=Ο;
### $PI=Π;
### $RHO=Ρ;
### $SIGMA=Σ;
### $TAU=Τ;
### $YPSILON=Υ;
### $PHI=Φ;
### $CHI=Χ;
### $PSI=Ψ;
### $OMEGA=Ω;
### $ALPHA2=Ά;
### $EPSILON2=Έ;
### $ETA2=Ή;
### $IOTA2=Ί;
### $OMICRON2=Ό;
### $YPSILON2=Ύ;
### $OMEGA2=Ώ;
### $IOTA_DIAERESIS=Ϊ;
### $YPSILON_DIAERESIS=Ϋ;
### $alpha=α;
### $beta=β;
### $gamma=γ;
### $delta=δ;
### $epsilon=ε;
### $zeta=ζ;
### $eta=η;
### $theta=θ;
### $iota=ι;
### $kappa=κ;
### $lambda=λ;
### $mu=μ;
### $nu=ν;
### $xi=ξ;
### $omicron=ο;
### $pi=π;
### $rho=ρ;
### $sigma=σ;
### $tau=τ;
### $ypsilon=υ;
### $phi=φ;
### $chi=χ;
### $psi=ψ;
### $omega=ω;
#forms
### $alpha2=ά;
### $epsilon2=έ;
### $eta2=ή;
### $iota2=ί;
### $omicron2=ό;
### $ypsilon2=ύ;
### $omega2=ώ;
### $iota_diaeresis=ϊ;
### $ypsilon_diaeresis=ϋ;
### $iota_diaeresis2=ΐ;
### $ypsilon_diaeresis2=ΰ;
### $sigma2=ς;
# Variables for conditional mappings
# Use lowercase for all variable names, to allow cut/paste below.
$letter=[~[:Lu:][:Ll:]];
$lower=[[:Ll:]];
$softener=[eiyEIY];
$vowel=[aeiouAEIOU \
ΑΕΗΙΟΥΩ \
ΆΈΉΊΌΎΏ \
ΪΫ \
αεηιουω \
άέήίόύώ \
ϊϋ \
ΐΰ \
];
$n_gamma=[GKXCgkxc];
$gamma_n=[ΓΚΧΞγκχξ];
$pp=[Pp];
# ==============================================
# Rules
# ==============================================
# The following are special titlecases, and should
# not be copied when duplicating the lowercase
# ==============================================
Th <> Θ}$lower;
Ph <> Φ}$lower;
Ch <> Χ}$lower;
#masked: + "Ps<Φ}$lower;"
# Because there is no uppercase forms for final sigma,
# we had to move all the sigma rules up here.
# Remember to insert ' to preserve round trip, for double letters
# don't need to do this for the digraphs with h,
# since it is not created when mapping back from greek
# use special form for s
''S <> $pp{Σ; # handle PS
S <> Σ;
# The following are a bit tricky. 's' takes two forms in greek
# final or non final.
# We use ~s to represent the abnormal form: final before letter
# or non-final before non-letter.
# We use 's to separate p and s (otherwise ps is one letter)
# so, we break out the following forms:
''s < $pp{σ}$letter;
s < σ}$letter;
'~'s < σ;
'~'s < ς}$letter;
''s < $pp{ς;
s < ς;
'~'s }$letter>ς;
'~'s > σ;
''s }$letter>σ;
''s > ς;
s }$letter>σ;
s > ς;
# because there are no uppercase forms, had to move these up too.
i'\"''`'>ΐ;
y'\"''`'>ΰ;
î<>ΐ;
û<>$vowel{ΰ;
ŷ<>ΰ;
# ==============================================
# Uppercase Forms.
# To make lowercase forms, just copy and lowercase below
# ==============================================
# Typing variants, in case the keyboard doesn't have accents
'A`'>Ά;
'E`'>Έ;
'EE`'>Ή;
EE>Η;
'I`'>Ί;
'O`'>Ό;
'OO`'>Ώ;
OO>Ω;
I'\"'>Ϊ;
Y'\"'>Ϋ;
# Basic Letters
A<>Α;
Á<>Ά;
B<>Β;
N }$n_gamma<>Γ}$gamma_n;
G<>Γ;
D<>Δ;
''E <> [Ee]{Ε; # handle EE
E<>Ε;
É<>Έ;
Z<>Ζ;
Ê<>Ή;
Ē<>Η;
TH<>Θ;
I<>Ι;
Í<>Ί;
Ï<>Ϊ;
K<>Κ;
L<>Λ;
M<>Μ;
N'' <> Ν}$gamma_n;
N<>Ν;
X<>Ξ;
''O <> [Oo]{ Ο; # handle OO
O<>Ο;
Ó<>Ό;
PH<>Φ; # needs ordering before P
PS<>Ψ; # needs ordering before P
P<>Π;
R<>Ρ;
T<>Τ;
U <> $vowel{Υ;
Ú <> $vowel{Ύ;
Ü <> $vowel{Ϋ;
Y<>Υ;
Ý<>Ύ;
Ÿ<>Ϋ;
CH<>Χ;
Ô<>Ώ;
Ō<>Ω;
# Extra English Letters. Mapped for completeness
C}$softener>|S;
C>|K;
F>|PH;
H>|CH;
J>|I;
Q>|K;
V>|U;
W>|U;
# ==============================================
# Lowercase Forms. Just copy above and lowercase
# ==============================================
# typing variants, in case the keyboard doesn't have accents
'a`'>ά;
'e`'>έ;
'ee`'>ή;
ee>η;
'i`'>ί;
'o`'>ό;
'oo`'>ώ;
oo>ω;
i'\"'>ϊ;
y'\"'>ϋ;
# basic letters
a<>α;
á<>ά;
b<>β;
n }$n_gamma<>γ}$gamma_n;
g<>γ;
d<>δ;
''e <> [Ee]{ε; # handle EE
e<>ε;
é<>έ;
z<>ζ;
ê<>ή;
ē<>η;
th<>θ;
i<>ι;
í<>ί;
ï<>ϊ;
k<>κ;
l<>λ;
m<>μ;
n'' <> ν}$gamma_n;
n<>ν;
x<>ξ;
''o <> [Oo]{ ο; # handle OO
o<>ο;
ó<>ό;
ph<>φ; # needs ordering before p
ps<>ψ; # needs ordering before p
p<>π;
r<>ρ;
t<>τ;
u <> $vowel{υ;
ú <> $vowel{ύ;
ü <> $vowel{ϋ;
y<>υ;
ý<>ύ;
ÿ<>ϋ;
ch<>χ;
ô<>ώ;
ō<>ω;
# extra english letters. mapped for completeness
c}$softener>|s;
c>|k;
f>|ph;
h>|ch;
j>|i;
q>|k;
v>|u;
w>|u;
# ====================================
# Normal final rule: remove '
# ====================================
#+ "''>;"
# eof

View file

@ -0,0 +1,216 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:06 2001
#--------------------------------------------------------------------
# Latin-Hebrew
# Variable names, derived from the Unicode names.
### $POINT_SHEVA=ְ;
### $POINT_HATAF_SEGOL=ֱ;
### $POINT_HATAF_PATAH=ֲ;
### $POINT_HATAF_QAMATS=ֳ;
### $POINT_HIRIQ=ִ;
### $POINT_TSERE=ֵ;
### $POINT_SEGOL=ֶ;
### $POINT_PATAH=ַ;
### $POINT_QAMATS=ָ;
### $POINT_HOLAM=ֹ;
### $POINT_QUBUTS=ֻ;
### $POINT_DAGESH_OR_MAPIQ=ּ;
### $POINT_METEG=ֽ;
### $PUNCTUATION_MAQAF=־;
### $POINT_RAFE=ֿ;
### $PUNCTUATION_PASEQ=׀;
### $POINT_SHIN_DOT=ׁ;
### $POINT_SIN_DOT=ׂ;
### $PUNCTUATION_SOF_PASUQ=׃;
### $ALEF=א;
### $BET=ב;
### $GIMEL=ג;
### $DALET=ד;
### $HE=ה;
### $VAV=ו;
### $ZAYIN=ז;
### $HET=ח;
### $TET=ט;
### $YOD=י;
### $FINAL_KAF=ך;
### $KAF=כ;
### $LAMED=ל;
### $FINAL_MEM=ם;
### $MEM=מ;
### $FINAL_NUN=ן;
### $NUN=נ;
### $SAMEKH=ס;
### $AYIN=ע;
### $FINAL_PE=ף;
### $PE=פ;
### $FINAL_TSADI=ץ;
### $TSADI=צ;
### $QOF=ק;
### $RESH=ר;
### $SHIN=ש;
### $TAV=ת;
### $YIDDISH_DOUBLE_VAV=װ;
### $YIDDISH_VAV_YOD=ױ;
### $YIDDISH_DOUBLE_YOD=ײ;
### $PUNCTUATION_GERESH=׳;
### $PUNCTUATION_GERSHAYIM=״;
$letter=[a-zA-Z];
$softvowel=[eiyEIY];
$vowellike=[אעיו];
$hebrew=[֐-׿]; # the whole block -liu
# [Why is this a special case? -liu]
k''h <> כ ה ;
# Mark non-final forms in final position as x~ -liu
k < כ } $hebrew ;
m < מ } $hebrew ;
n < נ } $hebrew ;
p < פ } $hebrew ;
ts < צ } $hebrew ;
k'~' <> כ ;
m'~' <> מ ;
n'~' <> נ ;
p'~' <> פ ;
ts'~'<> צ ;
# Mark final forms in non-final position as x^ -liu
k'^' <> ך } $hebrew ;
m'^' <> ם } $hebrew ;
n'^' <> ן } $hebrew ;
p'^' <> ף } $hebrew ;
ts'^'<> ץ } $hebrew ;
k < ך;
m < ם;
n < ן;
p < ף;
ts < ץ;
# Main rules
a<>א;
A>א;
b<>ב;
B>ב;
c}$softvowel>ס;
C}$softvowel>ס;
c}$letter>כ;
C}$letter>כ;
c>ך;
C>ך;
d<>ד;
D>ד;
e<>ע;
E>ע;
f}$letter>פ;
f>ף;
F}$letter>פ;
F>ף;
g<>ג;
G>ג;
h<>ה;
H>ה;
i>י;
I>י;
j>דש;
J>דש;
kh<>ח;
kH>ח;
Kh>ח;
KH>ח;
k}$letter>כ;
K}$letter>כ;
k>ך;
K>ך;
l<>ל;
L>ל;
m}$letter>מ;
m>ם;
M}$letter>מ;
M>ם;
n}$letter>נ;
n>ן;
N}$letter>נ;
N>ן;
o>ו;
O>ו;
p}$letter>פ;
p>ף;
P}$letter>פ;
P>ף;
q<>ק;
Q>ק;
r<>ר;
R>ר;
sh<>ש;
sH>ש;
Sh>ש;
SH>ש;
s''<ס}ה;
s<>ס;
S>ס;
th<>ת;
tH>ת;
Th>ת;
TH>ת;
tS}$letter>צ;
ts}$letter>צ;
Ts}$letter>צ;
TS}$letter>צ;
tS>ץ;
ts>ץ;
Ts>ץ;
TS>ץ;
t''<ט}[ה ס ש];
t<>ט;
T>ט;
v<ו}$vowellike;
u<>ו;
U>ו;
v>ו;
V>ו;
w>ו;
W>ו;
x>כס;
X>כס;
y<>י;
Y>י;
z<>ז;
Z>ז;
# Delete stray apostrophes
''>;
<'';
# eof

View file

@ -0,0 +1,742 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:18:45 2001
#--------------------------------------------------------------------
# Latin-Jamo
# VARIABLES
$initial=[ᄀ-];
$INITIAL=[bcdghjklmnpst];
$medial=[-ᆧ];
$MEDIAL=[aeiou]; # as a left context
$comp_med=[ᅠᅶ-ᆧ]; # compound medials and filler
$final=[ᆨ-ᇹ]; # added - aliu
$vowel=[aeiouwy$medial];
# following line used to read "..$medial$final]"
# assume this was a typo - liu
$consonant=[bcdfghjklmnpqrstvxz$initial$final];
$ye_=[yeYE];
$ywe_=[yweYWE];
$yw_=[ywYW];
$nl_=[nlNL];
$gnl_=[gnlGNL];
$lsgb_=[lsgbLSGB];
$ywao_=[ywaoYWAO];
$bl_=[blBL];
### $ieung = ᄋ;
# RULES
# Hangul structure is IMF or IM
# So you can have, because of adjacent sequences
# IM, but not II or IF
# MF or MI, but not MM
# FI, but not FF or FM
# For English, we just have C or V.
# To generate valid Hangul:
# Vowels:
# We insert IEUNG between VV, and otherwise map V to M
# We also insert IEUNG if there is no
# Consonants:
# We don't break doubles
# Cases like lmgg, we have to break at lm
# So to guess whether a consonant is I or F
# we map all C's to F, except when followed by a vowel, e.g.
# X[{vowel}>CHOSEONG (initial)
# X>JONGSEONG (final)
# Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
# General strategy.
#
# 1. We support both the normal Jamo block, 1100 - 117F, and the
# compatibility block, 3130 - 318F. The former uses lowercase latin;
# the latter uses uppercase. See notes below for details of the
# compatibility block. Remaining items in this list pertain to the
# normal Jamo block.
#
# 2. Canonical syllables should transliterate without special
# characters. Canonical syllables are either IMF or IM.
#
# 3. We want to support round-trip integrity from jamo to latin and back
# to Jamo. To do this we have to mark the jamo with special characters
# when they occur in non-canonical positions.
#
# 4. When initial jamo occur in a non-canonical position, they are
# marked with a leading '['.
#
# 5. When final jamo occur in a non-canonical position, they are marked
# with a trailing ']'.
#
# 6. When medial jamo occur in a non-canonical position, they are marked
# with a leading '~'.
#
# 7. Compound jamo characters are handled by enclosing them in
# parentheses. Initials are '((x)', medials are '(x)', and finals are
# '(x))'.
#
# 8. Disambiguation of 'g' + 'g' vs. 'gg' is accomplished by inserting a
# '' character between them.
#
# 9. IEUNG is used to mark medials not occuring after initials.
# Isolated IEUNG is transliterated as a back tick.
#
# 10. Some old special case and completeness rules have been commented
# out. These can be reintroduced (and the existing rules modified as
# needed) so long as round-trip integrity is maintained.
# We use the uppercase latin letters for the compatibility Jamo
# U+3130 - U+318F. The following rules are generated
# programmatically by a perl script that analyzes the Unicode
# database. These rules are much simpler because there are no
# separate code points for initial vs. final consonants, so no
# contextual rules are needed. The one wrinkle is, as usual, the
# need to distinguish doubles from two singles, that is, GG vs G G.
# The perl script finds these special cases by exhaustive search and
# adds only the minimal rules needed to resolve these cases. The one
# modification that is made by hand is to replace '' with '/' so as
# not to conflict with the normal IEUNG in the standard Jamo range. -
# liu
A '' <> {ㅏ} [ㅓㅡㅔ];
B '' <> {ㅂ} [ㅂㅃ];
D '' <> {ㄷ} [ㄷㄸ];
E '' <> {ㅔ} [ㅚㅗㅜ];
G '' <> {ㄱ} [ㄲㄳㄱㅆㅅ];
J '' <> {ㅈ} [ㅉㅈ];
L '' <> {ㄹ} [ㄲㄳㄱㅁㅂㅃㅆㅅㅌㅍ];
N '' <> {ㄴ} [ㅉㅈㅎ];
O '' <> {ㅗ} [ㅓㅡㅔ];
S '' <> {ㅅ} [ㅆㅅ];
WA '' <> {ㅘ} [ㅓㅡㅔ];
WE '' <> {ㅞ} [ㅚㅗ];
YA '' <> {ㅑ} [ㅓㅡㅔ];
YE '' <> {ㅖ} [ㅚㅗ];
YU <> ㅠ;
YO <> ㅛ;
YI <> ㅢ;
YEO <> ㅕ;
YE <> ㅖ;
YAE <> ㅒ;
YA <> ㅑ;
WI <> ㅟ;
WEO <> ㅝ;
WE <> ㅞ;
WAE <> ㅙ;
WA <> ㅘ;
U <> ㅜ;
T <> ㅌ;
S S <> ㅆ;
S <> ㅅ;
P <> ㅍ;
OE <> ㅚ;
O <> ㅗ;
N J <> ㄵ;
N H <> ㄶ;
N <> ㄴ;
M <> ㅁ;
L T <> ㄾ;
L S <> ㄽ;
L P <> ㄿ;
L M <> ㄻ;
L G <> ㄺ;
L B <> ㄼ;
L <> ㄹ;
K <> ㅋ;
J J <> ㅉ;
J <> ㅈ;
I <> ㅣ;
H <> ㅎ;
G S <> ㄳ;
G G <> ㄲ;
G <> ㄱ;
EU <> ㅡ;
EO <> ㅓ;
E <> ㅔ;
D D <> ㄸ;
D <> ㄷ;
C <> ㅊ;
B B <> ㅃ;
B <> ㅂ;
AE <> ㅐ;
A <> ㅏ;
'/' <> ㅇ;
'(' YU YEO ')' <> ㆊ;
'(' YU YE ')' <> ㆋ;
'(' YU I ')' <> ㆌ;
'(' YR ')' <> ㆆ;
'(' YO YAE ')' <> ㆈ;
'(' YO YA ')' <> ㆇ;
'(' YO I ')' <> ㆉ;
'(' YES S ')' <> ㆂ;
'(' YES PAN ')' <> ㆃ;
'(' YES ')' <> ㆁ;
'(' S N ')' <> ㅻ;
'(' S J ')' <> ㅾ;
'(' S G ')' <> ㅺ;
'(' S D ')' <> ㅼ;
'(' S B ')' <> ㅽ;
'(' PAN ')' <> ㅿ;
'(' P '' ')' <> ㆄ;
'(' N S ')' <> ㅧ;
'(' N PAN ')' <> ㅨ;
'(' N N ')' <> ㅥ;
'(' N D ')' <> ㅦ;
'(' M S ')' <> ㅯ;
'(' M PAN ')' <> ㅰ;
'(' M B ')' <> ㅮ;
'(' M '' ')' <> ㅱ;
'(' L YR ')' <> ㅭ;
'(' L PAN ')' <> ㅬ;
'(' L H ')' <> ㅀ;
'(' L G S ')' <> ㅩ;
'(' L D ')' <> ㅪ;
'(' L B S ')' <> ㅫ;
'(' HJF ')' <> ;
'(' H H ')' <> ㆅ;
'(' B T ')' <> ㅷ;
'(' B S G ')' <> ㅴ;
'(' B S D ')' <> ㅵ;
'(' B S ')' <> ㅄ;
'(' B J ')' <> ㅶ;
'(' B G ')' <> ㅲ;
'(' B D ')' <> ㅳ;
'(' B B '' ')' <> ㅹ;
'(' B '' ')' <> ㅸ;
'(' AR I ')' <> ㆎ;
'(' AR ')' <> ㆍ;
'(' '' '' ')' <> ㆀ;
# APOSTROPHE
# As always, an apostrophe is used to separate digraphs into
# singles. That is, if you really wanted [KAN][GGAN], instead
# of [KANG][GAN] you would write "kan'ggan".
# Rules for inserting ' when mapping separated digraphs back
# from Hangul to Latin. Catch every letter that can be the
# LAST of a digraph (or multigraph) AND first of an initial
# special insertion for funny sequences of vowels, and for empty consonant
# + "'' < l{ }ᇀ;" // hangul jongseong thieuth
# + "'' < $lsgb_{}ᆺ;" // hangul jongseong sios
# + "'' < l{ }ᇁ;" // hangul jongseong phieuph
# + "'' < l{ }ᆷ;" // hangul jongseong mieum
# + "'' < n{ }ᆽ;" // hangul jongseong cieuc
# + "'' < $nl_{}ᇂ;" // hangul jongseong hieuh
# + "'' < $gnl_{}ᆩ;" // hangul jongseong ssangkiyeok
# + "'' < $bl_{}ᆸ;" // hangul jongseong pieup
# + "'' < d{ }ᆮ;" // hangul jongseong tikeut
#
# + "'' < $ye_{}ᅮ;" // hangul jungseong u
# + "'' < $ywe_{}ᅩ;" // hangul jungseong o
# + "'' < $yw_{}ᅵ;" // hangul jungseong i
# + "'' < $ywao_{}ᅦ;" // hangul jungseong e
# + "'' < $yw_{}ᅡ;" // hangul jungseong a
#
# + "'' < l{ }ᄐ;" // hangul choseong thieuth
# + "'' < $lsgb_{}ᄊ;" // hangul choseong ssangsios
# + "'' < $lsgb_{}ᄉ;" // hangul choseong sios
# + "'' < l{ }ᄑ;" // hangul choseong phieuph
# + "'' < l{ }ᄆ;" // hangul choseong mieum
# + "'' < n{ }ᄌ;" // hangul choseong cieuc
# + "'' < n{ }ᄍ;"
# + "'' < $nl_{}ᄒ;" // hangul choseong hieuh
# + "'' < $gnl_{}ᄁ;" // hangul choseong ssangkiyeok
# + "'' < $gnl_{}ᄀ;" // hangul choseong kiyeok
# + "'' < d{ }ᄃ;" // hangul choseong tikeut
# + "'' < d{ }ᄄ;"
# + "'' < $bl_{}ᄇ;" // hangul choseong pieup
# + "'' < $bl_{}ᄈ;"
# We transliterate the compound Jamo code points using ((x) for
# initials, (x) for medials, and (x)) for finals. - liu
'((' n g ')' <> ᄓ;
'((' n n ')' <> ᄔ;
'((' n d ')' <> ᄕ;
'((' n b ')' <> ᄖ;
'((' d g ')' <> ᄗ;
'((' l n ')' <> ᄘ;
'((' l l ')' <> ᄙ;
'((' l h ')' <> ᄚ;
'((' l '' ')' <> ᄛ;
'((' m b ')' <> ᄜ;
'((' m '' ')' <> ᄝ;
'((' b g ')' <> ᄞ;
'((' b n ')' <> ᄟ;
'((' b d ')' <> ᄠ;
'((' b s ')' <> ᄡ;
'((' b s g ')' <> ᄢ;
'((' b s d ')' <> ᄣ;
'((' b s b ')' <> ᄤ;
'((' b s s ')' <> ᄥ;
'((' b s j ')' <> ᄦ;
'((' b j ')' <> ᄧ;
'((' b c ')' <> ᄨ;
'((' b t ')' <> ᄩ;
'((' b p ')' <> ᄪ;
'((' b '' ')' <> ᄫ;
'((' b b '' ')' <> ᄬ;
'((' s g ')' <> ᄭ;
'((' s n ')' <> ᄮ;
'((' s d ')' <> ᄯ;
'((' s l ')' <> ᄰ;
'((' s m ')' <> ᄱ;
'((' s b ')' <> ᄲ;
'((' s b g ')' <> ᄳ;
'((' s s s ')' <> ᄴ;
'((' s '' ')' <> ᄵ;
'((' s j ')' <> ᄶ;
'((' s c ')' <> ᄷ;
'((' s k ')' <> ᄸ;
'((' s t ')' <> ᄹ;
'((' s p ')' <> ᄺ;
'((' s h ')' <> ᄻ;
'((' chs ')' <> ᄼ;
'((' chs chs ')' <> ᄽ;
'((' ces ')' <> ᄾ;
'((' ces ces ')' <> ᄿ;
'((' pan ')' <> ᅀ;
'((' '' g ')' <> ᅁ;
'((' '' d ')' <> ᅂ;
'((' '' m ')' <> ᅃ;
'((' '' b ')' <> ᅄ;
'((' '' s ')' <> ᅅ;
'((' '' pan ')' <> ᅆ;
'((' '' '' ')' <> ᅇ;
'((' '' j ')' <> ᅈ;
'((' '' c ')' <> ᅉ;
'((' '' t ')' <> ᅊ;
'((' '' p ')' <> ᅋ;
'((' yes ')' <> ᅌ;
'((' j '' ')' <> ᅍ;
'((' chc ')' <> ᅎ;
'((' chc chc ')' <> ᅏ;
'((' cec ')' <> ᅐ;
'((' cec cec ')' <> ᅑ;
'((' c k ')' <> ᅒ;
'((' c h ')' <> ᅓ;
'((' cch ')' <> ᅔ;
'((' ceh ')' <> ᅕ;
'((' p b ')' <> ᅖ;
'((' p '' ')' <> ᅗ;
'((' h h ')' <> ᅘ;
'((' yr ')' <> ᅙ;
'((' hcf ')' <> ;
'(' ahjf ')' <> ; # must start with vowel, hence 'a' + hjf
'(' a o ')' <> ᅶ;
'(' a u ')' <> ᅷ;
'(' ya o ')' <> ᅸ;
'(' ya yo ')' <> ᅹ;
'(' eo o ')' <> ᅺ;
'(' eo u ')' <> ᅻ;
'(' eo eu ')' <> ᅼ;
'(' yeo o ')' <> ᅽ;
'(' yeo u ')' <> ᅾ;
'(' o eo ')' <> ᅿ;
'(' o e ')' <> ᆀ;
'(' o ye ')' <> ᆁ;
'(' o o ')' <> ᆂ;
'(' o u ')' <> ᆃ;
'(' yo ya ')' <> ᆄ;
'(' yo yae ')' <> ᆅ;
'(' yo yeo ')' <> ᆆ;
'(' yo o ')' <> ᆇ;
'(' yo i ')' <> ᆈ;
'(' u a ')' <> ᆉ;
'(' u ae ')' <> ᆊ;
'(' u eo eu ')' <> ᆋ;
'(' u ye ')' <> ᆌ;
'(' u u ')' <> ᆍ;
'(' yu a ')' <> ᆎ;
'(' yu eo ')' <> ᆏ;
'(' yu e ')' <> ᆐ;
'(' yu yeo ')' <> ᆑ;
'(' yu ye ')' <> ᆒ;
'(' yu u ')' <> ᆓ;
'(' yu i ')' <> ᆔ;
'(' eu u ')' <> ᆕ;
'(' eu eu ')' <> ᆖ;
'(' yi u ')' <> ᆗ;
'(' i a ')' <> ᆘ;
'(' i ya ')' <> ᆙ;
'(' i o ')' <> ᆚ;
'(' i u ')' <> ᆛ;
'(' i eu ')' <> ᆜ;
'(' i ar ')' <> ᆝ;
'(' ar ')' <> ᆞ;
'(' ar eo ')' <> ᆟ;
'(' ar u ')' <> ᆠ;
'(' ar i ')' <> ᆡ;
'(' ar ar ')' <> ᆢ;
'(' g l '))' <> ᇃ;
'(' g s g '))' <> ᇄ;
'(' n g '))' <> ᇅ;
'(' n d '))' <> ᇆ;
'(' n s '))' <> ᇇ;
'(' n pan '))' <> ᇈ;
'(' n t '))' <> ᇉ;
'(' d g '))' <> ᇊ;
'(' d l '))' <> ᇋ;
'(' l g s '))' <> ᇌ;
'(' l n '))' <> ᇍ;
'(' l d '))' <> ᇎ;
'(' l d h '))' <> ᇏ;
'(' l l '))' <> ᇐ;
'(' l m g '))' <> ᇑ;
'(' l m s '))' <> ᇒ;
'(' l b s '))' <> ᇓ;
'(' l b h '))' <> ᇔ;
'(' l b ng '))' <> ᇕ;
'(' l s s '))' <> ᇖ;
'(' l pan '))' <> ᇗ;
'(' l k '))' <> ᇘ;
'(' l yr '))' <> ᇙ;
'(' m g '))' <> ᇚ;
'(' m l '))' <> ᇛ;
'(' m b '))' <> ᇜ;
'(' m s '))' <> ᇝ;
'(' m s s '))' <> ᇞ;
'(' m pan '))' <> ᇟ;
'(' m c '))' <> ᇠ;
'(' m h '))' <> ᇡ;
'(' m ng '))' <> ᇢ;
'(' b l '))' <> ᇣ;
'(' b p '))' <> ᇤ;
'(' b h '))' <> ᇥ;
'(' b ng '))' <> ᇦ;
'(' s g '))' <> ᇧ;
'(' s d '))' <> ᇨ;
'(' s l '))' <> ᇩ;
'(' s b '))' <> ᇪ;
'(' pan '))' <> ᇫ;
'(' ng g '))' <> ᇬ;
'(' ng g g '))' <> ᇭ;
'(' ng ng '))' <> ᇮ;
'(' ng k '))' <> ᇯ;
'(' yes '))' <> ᇰ;
'(' yes s '))' <> ᇱ;
'(' yes pan '))' <> ᇲ;
'(' p b '))' <> ᇳ;
'(' p ng '))' <> ᇴ;
'(' h n '))' <> ᇵ;
'(' h l '))' <> ᇶ;
'(' h m '))' <> ᇷ;
'(' h b '))' <> ᇸ;
'(' yr '))' <> ᇹ;
# INITIALS
# Added }$vowel post context - liu
bb}$vowel<>ᄈ } $vowel;
jj}$vowel<>ᄍ } $vowel;
dd}$vowel<>ᄄ } $vowel;
t }$vowel<>ᄐ } $vowel; # hangul choseong thieuth
ss}$vowel<>ᄊ } $vowel; # hangul choseong ssangsios
s }$vowel<>ᄉ } $vowel; # hangul choseong sios
p }$vowel<>ᄑ } $vowel; # hangul choseong phieuph
n }$vowel<>ᄂ } $vowel; # hangul choseong nieun
m }$vowel<>ᄆ } $vowel; # hangul choseong mieum
l }$vowel<>ᄅ } $vowel; # hangul choseong rieul
k }$vowel<>ᄏ } $vowel; # hangul choseong khieukh
j }$vowel<>ᄌ } $vowel; # hangul choseong cieuc
h }$vowel<>ᄒ } $vowel; # hangul choseong hieuh
gg}$vowel<>ᄁ } $vowel; # hangul choseong ssangkiyeok
g }$vowel<>ᄀ } $vowel; # hangul choseong kiyeok
d }$vowel<>ᄃ } $vowel; # hangul choseong tikeut
c }$vowel<>ᄎ } $vowel; # hangul choseong chieuch
b }$vowel<>ᄇ } $vowel; # hangul choseong pieup
# Take care of initial-compound medial - '(' $vowel - liu
bb} '(' $vowel <> ᄈ } $comp_med;
jj} '(' $vowel <> ᄍ } $comp_med;
dd} '(' $vowel <> ᄄ } $comp_med;
t } '(' $vowel <> ᄐ } $comp_med; # hangul choseong thieuth
ss} '(' $vowel <> ᄊ } $comp_med; # hangul choseong ssangsios
s } '(' $vowel <> ᄉ } $comp_med; # hangul choseong sios
p } '(' $vowel <> ᄑ } $comp_med; # hangul choseong phieuph
n } '(' $vowel <> ᄂ } $comp_med; # hangul choseong nieun
m } '(' $vowel <> ᄆ } $comp_med; # hangul choseong mieum
l } '(' $vowel <> ᄅ } $comp_med; # hangul choseong rieul
k } '(' $vowel <> ᄏ } $comp_med; # hangul choseong khieukh
j } '(' $vowel <> ᄌ } $comp_med; # hangul choseong cieuc
h } '(' $vowel <> ᄒ } $comp_med; # hangul choseong hieuh
gg} '(' $vowel <> ᄁ } $comp_med; # hangul choseong ssangkiyeok
g } '(' $vowel <> ᄀ } $comp_med; # hangul choseong kiyeok
d } '(' $vowel <> ᄃ } $comp_med; # hangul choseong tikeut
c } '(' $vowel <> ᄎ } $comp_med; # hangul choseong chieuch
b } '(' $vowel <> ᄇ } $comp_med; # hangul choseong pieup
# Mark non-canonical initials with '[' - liu
'[' bb <> ᄈ;
'[' jj <> ᄍ;
'[' dd <> ᄄ;
'[' t <> ᄐ; # hangul choseong thieuth
'[' ss <> ᄊ; # hangul choseong ssangsios
'[' s <> ᄉ; # hangul choseong sios
'[' p <> ᄑ; # hangul choseong phieuph
'[' n <> ᄂ; # hangul choseong nieun
'[' m <> ᄆ; # hangul choseong mieum
'[' l <> ᄅ; # hangul choseong rieul
'[' k <> ᄏ; # hangul choseong khieukh
'[' j <> ᄌ; # hangul choseong cieuc
'[' h <> ᄒ; # hangul choseong hieuh
'[' gg <> ᄁ; # hangul choseong ssangkiyeok
'[' g <> ᄀ; # hangul choseong kiyeok
'[' d <> ᄃ; # hangul choseong tikeut
'[' c <> ᄎ; # hangul choseong chieuch
'[' b <> ᄇ; # hangul choseong pieup
# If we have gotten through to these rules, and we start with
# a consonant, then the remaining mappings would be to F,
# because must have CC (or C<non-letter>), not CV.
# If we have F before us, then
# we would end up with FF, which is wrong. The simplest fix is
# to still make it an initial, but also insert an "u",
# so we end up with F, I, u, and then continue with the C
# special, only initial
# + "bb > 뿌;" // bb u hangul choseong ssangpieup
# + "jj > 쭈;" // jj u hangul choseong ssangcieuc
# + "dd > 뚜;" // dd u hangul choseong ssangtikeut
# + "$final{ t > 투;" // hangul choseong thieuth
# + "$final{ ss> 쑤;" // hangul choseong ssangsios
# + "$final{ s > 수;" // hangul choseong sios
# + "$final{ p > 푸;" // hangul choseong phieuph
# + "$final{ n > 누;" // hangul choseong nieun
# + "$final{ m > 무;" // hangul choseong mieum
# + "$final{ l > 루;" // hangul choseong rieul
# + "$final{ k > 쿠;" // hangul choseong khieukh
# + "$final{ j > 주;" // hangul choseong cieuc
# + "$final{ h > 후;" // hangul choseong hieuh
# + "$final{ gg> 꾸;" // hangul choseong ssangkiyeok
# + "$final{ g > 구;" // hangul choseong kiyeok
# + "$final{ d > 두;" // hangul choseong tikeut
# + "$final{ c > 추;" // hangul choseong chieuch
# + "$final{ b > 부;" // hangul choseong pieup
# MEDIALS after INITIALS
# MEDIALS (vowels) not after INITIALs
# Added left $initial context - liu
$initial{ yu <> $INITIAL{ ᅲ; # hangul jungseong yu
$initial{ yo <> $INITIAL{ ᅭ; # hangul jungseong yo
$initial{ yi <> $INITIAL{ ᅴ; # hangul jungseong yi
$initial{ yeo<> $INITIAL{ ᅧ; # hangul jungseong yeo
$initial{ ye <> $INITIAL{ ᅨ; # hangul jungseong ye
$initial{ yae<> $INITIAL{ ᅤ; # hangul jungseong yae
$initial{ ya <> $INITIAL{ ᅣ; # hangul jungseong ya
$initial{ wi <> $INITIAL{ ᅱ; # hangul jungseong wi
$initial{ weo<> $INITIAL{ ᅯ; # hangul jungseong weo
$initial{ we <> $INITIAL{ ᅰ; # hangul jungseong we
$initial{ wae<> $INITIAL{ ᅫ; # hangul jungseong wae
$initial{ wa <> $INITIAL{ ᅪ; # hangul jungseong wa
$initial{ u <> $INITIAL{ ᅮ; # hangul jungseong u
$initial{ oe <> $INITIAL{ ᅬ; # hangul jungseong oe
$initial{ o <> $INITIAL{ ᅩ; # hangul jungseong o
$initial{ i <> $INITIAL{ ᅵ; # hangul jungseong i
$initial{ eu <> $INITIAL{ ᅳ; # hangul jungseong eu
$initial{ eo <> $INITIAL{ ᅥ; # hangul jungseong eo
$initial{ e <> $INITIAL{ ᅦ; # hangul jungseong e
$initial{ ae <> $INITIAL{ ᅢ; # hangul jungseong ae
$initial{ a <> $INITIAL{ ᅡ; # hangul jungseong a
# Handle non-canonical isolated jungseong - liu
'~'yu <> ᅲ; # hangul jungseong yu
'~'yo <> ᅭ; # hangul jungseong yo
'~'yi <> ᅴ; # hangul jungseong yi
'~'yeo<> ᅧ; # hangul jungseong yeo
'~'ye <> ᅨ; # hangul jungseong ye
'~'yae<> ᅤ; # hangul jungseong yae
'~'ya <> ᅣ; # hangul jungseong ya
'~'wi <> ᅱ; # hangul jungseong wi
'~'weo<> ᅯ; # hangul jungseong weo
'~'we <> ᅰ; # hangul jungseong we
'~'wae<> ᅫ; # hangul jungseong wae
'~'wa <> ᅪ; # hangul jungseong wa
'~'u <> ᅮ; # hangul jungseong u
'~'oe <> ᅬ; # hangul jungseong oe
'~'o <> ᅩ; # hangul jungseong o
'~'i <> ᅵ; # hangul jungseong i
'~'eu <> ᅳ; # hangul jungseong eu
'~'eo <> ᅥ; # hangul jungseong eo
'~'e <> ᅦ; # hangul jungseong e
'~'ae <> ᅢ; # hangul jungseong ae
'~'a <> ᅡ; # hangul jungseong a
# MEDIALS (vowels) not after INITIALs
# Changed from > to <> - liu
yu <> ᄋ ᅲ; # hangul jungseong yu
yo <> ᄋ ᅭ; # hangul jungseong yo
yi <> ᄋ ᅴ; # hangul jungseong yi
yeo<> ᄋ ᅧ; # hangul jungseong yeo
ye <> ᄋ ᅨ; # hangul jungseong ye
yae<> ᄋ ᅤ; # hangul jungseong yae
ya <> ᄋ ᅣ; # hangul jungseong ya
wi <> ᄋ ᅱ; # hangul jungseong wi
weo<> ᄋ ᅯ; # hangul jungseong weo
we <> ᄋ ᅰ; # hangul jungseong we
wae<> ᄋ ᅫ; # hangul jungseong wae
wa <> ᄋ ᅪ; # hangul jungseong wa
u <> ᄋ ᅮ; # hangul jungseong u
oe <> ᄋ ᅬ; # hangul jungseong oe
o <> ᄋ ᅩ; # hangul jungseong o
i <> ᄋ ᅵ; # hangul jungseong i
eu <> ᄋ ᅳ; # hangul jungseong eu
eo <> ᄋ ᅥ; # hangul jungseong eo
e <> ᄋ ᅦ; # hangul jungseong e
ae <> ᄋ ᅢ; # hangul jungseong ae
a <> ᄋ ᅡ; # hangul jungseong a
\` <> ᄋ;
# Moved down so as not to mask above rules - liu
# + "'' < $consonant{ᄋ;" // insert a break between any consonant and the empty consonant.
# + "$medial{}$vowel<>ᄋ;" // HANGUL CHOSEONG IEUNG
# FINALS
'' t <> $consonant { ᇀ; # hangul jongseong thieuth
'' ss <> $consonant { ᆻ; # hangul jongseong ssangsios
'' s <> $consonant { ᆺ; # hangul jongseong sios
'' p <> $consonant { ᇁ; # hangul jongseong phieuph
'' nj <> $consonant { ᆬ; # hangul jongseong nieun-cieuc
'' nh <> $consonant { ᆭ; # hangul jongseong nieun-hieuh
'' ng <> $consonant { ᆼ; # hangul jongseong ieung
'' n <> $consonant { ᆫ; # hangul jongseong nieun
'' m <> $consonant { ᆷ; # hangul jongseong mieum
'' lt <> $consonant { ᆴ; # hangul jongseong rieul-thieuth
'' ls <> $consonant { ᆳ; # hangul jongseong rieul-sios
'' lp <> $consonant { ᆵ; # hangul jongseong rieul-phieuph
'' lm <> $consonant { ᆱ; # hangul jongseong rieul-mieum
'' lh <> $consonant { ᆶ; # hangul jongseong rieul-hieuh
'' lg <> $consonant { ᆰ; # hangul jongseong rieul-kiyeok
'' lb <> $consonant { ᆲ; # hangul jongseong rieul-pieup
'' l <> $consonant { ᆯ; # hangul jongseong rieul
'' k <> $consonant { ᆿ; # hangul jongseong khieukh
'' j <> $consonant { ᆽ; # hangul jongseong cieuc
'' h <> $consonant { ᇂ; # hangul jongseong hieuh
'' gs <> $consonant { ᆪ; # hangul jongseong kiyeok-sios
'' gg <> $consonant { ᆩ; # hangul jongseong ssangkiyeok
'' g <> $consonant { ᆨ; # hangul jongseong kiyeok
'' d <> $consonant { ᆮ; # hangul jongseong tikeut
'' c <> $consonant { ᆾ; # hangul jongseong chieuch
'' bs <> $consonant { ᆹ; # hangul jongseong pieup-sios
'' b <> $consonant { ᆸ; # hangul jongseong pieup
t ']'> ᇀ; # hangul jongseong thieuth
ss ']'> ᆻ; # hangul jongseong ssangsios
s ']'> ᆺ; # hangul jongseong sios
p ']'> ᇁ; # hangul jongseong phieuph
nj ']'> ᆬ; # hangul jongseong nieun-cieuc
nh ']'> ᆭ; # hangul jongseong nieun-hieuh
ng ']'> ᆼ; # hangul jongseong ieung
n ']'> ᆫ; # hangul jongseong nieun
m ']'> ᆷ; # hangul jongseong mieum
lt ']'> ᆴ; # hangul jongseong rieul-thieuth
ls ']'> ᆳ; # hangul jongseong rieul-sios
lp ']'> ᆵ; # hangul jongseong rieul-phieuph
lm ']'> ᆱ; # hangul jongseong rieul-mieum
lh ']'> ᆶ; # hangul jongseong rieul-hieuh
lg ']'> ᆰ; # hangul jongseong rieul-kiyeok
lb ']'> ᆲ; # hangul jongseong rieul-pieup
l ']'> ᆯ; # hangul jongseong rieul
k ']'> ᆿ; # hangul jongseong khieukh
j ']'> ᆽ; # hangul jongseong cieuc
h ']'> ᇂ; # hangul jongseong hieuh
gs ']'> ᆪ; # hangul jongseong kiyeok-sios
gg ']'> ᆩ; # hangul jongseong ssangkiyeok
g ']'> ᆨ; # hangul jongseong kiyeok
d ']'> ᆮ; # hangul jongseong tikeut
c ']'> ᆾ; # hangul jongseong chieuch
bs ']'> ᆹ; # hangul jongseong pieup-sios
b ']'> ᆸ; # hangul jongseong pieup
$medial{ t <> $MEDIAL{ ᇀ; # hangul jongseong thieuth
$medial{ ss <> $MEDIAL{ ᆻ; # hangul jongseong ssangsios
$medial{ s <> $MEDIAL{ ᆺ; # hangul jongseong sios
$medial{ p <> $MEDIAL{ ᇁ; # hangul jongseong phieuph
$medial{ nj <> $MEDIAL{ ᆬ; # hangul jongseong nieun-cieuc
$medial{ nh <> $MEDIAL{ ᆭ; # hangul jongseong nieun-hieuh
$medial{ ng <> $MEDIAL{ ᆼ; # hangul jongseong ieung
$medial{ n <> $MEDIAL{ ᆫ; # hangul jongseong nieun
$medial{ m <> $MEDIAL{ ᆷ; # hangul jongseong mieum
$medial{ lt <> $MEDIAL{ ᆴ; # hangul jongseong rieul-thieuth
$medial{ ls <> $MEDIAL{ ᆳ; # hangul jongseong rieul-sios
$medial{ lp <> $MEDIAL{ ᆵ; # hangul jongseong rieul-phieuph
$medial{ lm <> $MEDIAL{ ᆱ; # hangul jongseong rieul-mieum
$medial{ lh <> $MEDIAL{ ᆶ; # hangul jongseong rieul-hieuh
$medial{ lg <> $MEDIAL{ ᆰ; # hangul jongseong rieul-kiyeok
$medial{ lb <> $MEDIAL{ ᆲ; # hangul jongseong rieul-pieup
$medial{ l <> $MEDIAL{ ᆯ; # hangul jongseong rieul
$medial{ k <> $MEDIAL{ ᆿ; # hangul jongseong khieukh
$medial{ j <> $MEDIAL{ ᆽ; # hangul jongseong cieuc
$medial{ h <> $MEDIAL{ ᇂ; # hangul jongseong hieuh
$medial{ gs <> $MEDIAL{ ᆪ; # hangul jongseong kiyeok-sios
$medial{ gg <> $MEDIAL{ ᆩ; # hangul jongseong ssangkiyeok
$medial{ g <> $MEDIAL{ ᆨ; # hangul jongseong kiyeok
$medial{ d <> $MEDIAL{ ᆮ; # hangul jongseong tikeut
$medial{ c <> $MEDIAL{ ᆾ; # hangul jongseong chieuch
$medial{ bs <> $MEDIAL{ ᆹ; # hangul jongseong pieup-sios
$medial{ b <> $MEDIAL{ ᆸ; # hangul jongseong pieup
t ']'< ᇀ; # hangul jongseong thieuth
ss ']'< ᆻ; # hangul jongseong ssangsios
s ']'< ᆺ; # hangul jongseong sios
p ']'< ᇁ; # hangul jongseong phieuph
nj ']'< ᆬ; # hangul jongseong nieun-cieuc
nh ']'< ᆭ; # hangul jongseong nieun-hieuh
ng ']'< ᆼ; # hangul jongseong ieung
n ']'< ᆫ; # hangul jongseong nieun
m ']'< ᆷ; # hangul jongseong mieum
lt ']'< ᆴ; # hangul jongseong rieul-thieuth
ls ']'< ᆳ; # hangul jongseong rieul-sios
lp ']'< ᆵ; # hangul jongseong rieul-phieuph
lm ']'< ᆱ; # hangul jongseong rieul-mieum
lh ']'< ᆶ; # hangul jongseong rieul-hieuh
lg ']'< ᆰ; # hangul jongseong rieul-kiyeok
lb ']'< ᆲ; # hangul jongseong rieul-pieup
l ']'< ᆯ; # hangul jongseong rieul
k ']'< ᆿ; # hangul jongseong khieukh
j ']'< ᆽ; # hangul jongseong cieuc
h ']'< ᇂ; # hangul jongseong hieuh
gs ']'< ᆪ; # hangul jongseong kiyeok-sios
gg ']'< ᆩ; # hangul jongseong ssangkiyeok
g ']'< ᆨ; # hangul jongseong kiyeok
d ']'< ᆮ; # hangul jongseong tikeut
c ']'< ᆾ; # hangul jongseong chieuch
bs ']'< ᆹ; # hangul jongseong pieup-sios
b ']'< ᆸ; # hangul jongseong pieup
# extra English letters
# + "z > |s;"
# //{ + "Z > |s;" } masked
# + "x > |ks;"
# + "X > |ks;"
# + "v > |b;"
# + "V > |b;"
# + "r > |l;"
# + "R > |l;"
# + "q > |k;"
# + "Q > |k;"
# + "f > |p;"
# + "F > |p;"
# //{ + "c > |k;" } masked
# + "C > |k;"
# + "y > ᅲ;" // hangul jungseong yu
# + "w > ᅱ;" // hangul jungseong wi
# eof

View file

@ -0,0 +1,969 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:18:46 2001
#--------------------------------------------------------------------
# Latin-Kana
# Japanese hiragana and katakana to and from latin
# (romaji). Lower case latin corresponds to hiragana;
# upper case latin to katakana. The handling of
# Hiragana and Katakana is largely the same. The bulk
# of the transliterator consists of two identical sets
# of rules, differing only in case.
# Because of minor differences between the two blocks
# (e.g., the existence of small katakana ka and ke, but
# no corresponding hiragana), some rules exist for only
# one script.
# Uses modified Hepburn. Small changes to make
# unambiguous.
#| Kunrei-shiki: Hepburn/MHepburn
#| ------------------------------
#| si: shi
#| si ~ya: sha
#| si ~yu: shu
#| si ~yo: sho
#| zi: ji
#| zi ~ya: ja
#| zi ~yu: ju
#| zi ~yo: jo
#| ti: chi
#| ti ~ya: cha
#| ti ~yu: chu
#| ti ~yu: cho
#| tu: tsu
#| di: ji/dji
#| du: zu/dzu
#| hu: fu
#| For foreign words:
#| -----------------
#| se ~i si
#| si ~e she
#|
#| ze ~i zi
#| zi ~e je
#|
#| te ~i ti
#| ti ~e che
#| te ~u tu
#|
#| de ~i di
#| de ~u du
#| de ~i di
#|
#| he ~u: hu
#| hu ~a fa
#| hu ~i fi
#| hu ~e he
#| hu ~o ho
# Most small forms are generated, but if necessary
# explicit small forms are given with ~a, ~ya, etc.
#------------------------------------------------------
# Variables
$vowel=[aeiou];
### $QUOTE='';
# Hiragana block
### $a2=ぁ;
### $a=あ;
### $i2=ぃ;
### $i=い;
### $u2=ぅ;
### $u=う;
### $e2=ぇ;
### $e=え;
### $o2=ぉ;
### $o=お;
### $ka=か;
### $ga=が;
### $ki=き;
### $gi=ぎ;
### $ku=く;
### $gu=ぐ;
### $ke=け;
### $ge=げ;
### $ko=こ;
### $go=ご;
### $sa=さ;
### $za=ざ;
### $si=し;
### $zi=じ;
### $su=す;
### $zu=ず;
### $se=せ;
### $ze=ぜ;
### $so=そ;
### $zo=ぞ;
### $ta=た;
### $da=だ;
### $ti=ち;
### $di=ぢ;
### $tu2=っ;
### $tu=つ;
### $du=づ;
### $te=て;
### $de=で;
### $to=と;
### $do=ど;
### $na=な;
### $ni=に;
### $nu=ぬ;
### $ne=ね;
### $no=の;
### $ha=は;
### $ba=ば;
### $pa=ぱ;
### $hi=ひ;
### $bi=び;
### $pi=ぴ;
### $hu=ふ;
### $bu=ぶ;
### $pu=ぷ;
### $he=へ;
### $be=べ;
### $pe=ぺ;
### $ho=ほ;
### $bo=ぼ;
### $po=ぽ;
### $ma=ま;
### $mi=み;
### $mu=む;
### $me=め;
### $mo=も;
### $ya2=ゃ;
### $ya=や;
### $yu2=ゅ;
### $yu=ゆ;
### $yo2=ょ;
### $yo=よ;
### $ra=ら;
### $ri=り;
### $ru=る;
### $re=れ;
### $ro=ろ;
### $wa2=ゎ;
### $wa=わ;
### $wi=ゐ;
### $we=ゑ;
### $wo=を;
### $n=ん;
### $vu=ゔ;
# Alternates, just to make the rules easier
### $yi2=ぃ;
### $yi=い;
### $ye2=ぇ;
### $ye=え;
### $wu=$u;
# End alternates
# Katakana block
### $A2=ァ;
### $A=ア;
### $I2=ィ;
### $I=イ;
### $U2=ゥ;
### $U=ウ;
### $E2=ェ;
### $E=エ;
### $O2=ォ;
### $O=オ;
### $KA=カ;
### $GA=ガ;
### $KI=キ;
### $GI=ギ;
### $KU=ク;
### $GU=グ;
### $KE=ケ;
### $GE=ゲ;
### $KO=コ;
### $GO=ゴ;
### $KA2=ヵ; # Small Katakana KA; no Hiragana equiv.
### $KE2=ヶ; # Small Katakana KE; no Hiragana equiv.
### $SA=サ;
### $ZA=ザ;
### $SI=シ;
### $ZI=ジ;
### $SU=ス;
### $ZU=ズ;
### $SE=セ;
### $ZE=ゼ;
### $SO=ソ;
### $ZO=ゾ;
### $TA=タ;
### $DA=ダ;
### $TI=チ;
### $DI=ヂ;
### $TU2=ッ;
### $TU=ツ;
### $DU=ヅ;
### $TE=テ;
### $DE=デ;
### $TO=ト;
### $DO=ド;
### $NA=ナ;
### $NI=ニ;
### $NU=ヌ;
### $NE=ネ;
### $NO=;
### $HA=ハ;
### $BA=バ;
### $PA=パ;
### $HI=ヒ;
### $BI=ビ;
### $PI=ピ;
### $HU=フ;
### $BU=ブ;
### $PU=プ;
### $HE=ヘ;
### $BE=ベ;
### $PE=ペ;
### $HO=ホ;
### $BO=ボ;
### $PO=ポ;
### $MA=マ;
### $MI=ミ;
### $MU=ム;
### $ME=メ;
### $MO=モ;
### $YA2=ャ;
### $YA=ヤ;
### $YU2=ュ;
### $YU=ユ;
### $YO2=ョ;
### $YO=ヨ;
### $WA2=ヮ;
# Alternates, just to make the rules easier
### $YI2=ィ;
### $YI=イ;
### $YE2=ェ;
### $YE=エ;
### $WU=$U;
# End alternates
### $RA=ラ;
### $RI=リ;
### $RU=ル;
### $RE=レ;
### $RO=ロ;
### $VA=ヷ;
### $VI=ヸ;
### $VU=ヴ;
### $VE=ヹ;
### $VO=ヺ;
### $WA=ワ;
### $WI=ヰ;
### $WE=ヱ;
### $WO=ヲ;
### $N=ン;
### $LONG=ー;
# Variables used for doubled-consonants with tsu
$K_START=[カキクケコかきくけこ];
$G_START=[ガギグゲゴがぎぐげご];
$S_START=[サシスセソさしすせそ];
$Z_START=[ザズゼゾざずぜぞ];
$J_START=[ジじ];
$T_START=[タチツテトたちつてと];
$D_START=[ダヂヅデドだぢづでど];
$N_START=[ナニヌネノなにぬねの];
$H_START=[ハヒヘホはひへほ];
$F_START=[フふ];
$B_START=[バビブベボばびぶべぼ];
$P_START=[パピプペポぱぴぷぺぽ];
$M_START=[マミムメモまみむめも];
$Y_START=[ヤユヨやゆよ];
$R_START=[ラリルレロらりるれろ];
$W_START=[ワヰヱヲわゐゑを];
$V_START=[ヷヸヴヹヺゔ];
# If ン is followed by $N_QUOTER, then it needs an
# apostrophe after its romaji form to disambiguate it.
# E.g., ン ア != ナ, so represent as "n'a", not "na".
$N_QUOTER = [ア イ ウ エ オ ナ ニ ヌ ネ \
ヤ ユ ヨ ン];
$n_quoter = [あ い う え お な に ぬ ね の \
や ゆ よ ん];
# Lowercase copies for convenience in making hiragana
# rule set copy
### $long = $LONG;
### $quote = $QUOTE;
### $k_start=$K_START;
### $g_start=$G_START;
### $s_start=$S_START;
### $z_start=$Z_START;
### $j_start=$J_START;
### $t_start=$T_START;
### $d_start=$D_START;
### $n_start=$N_START;
### $h_start=$H_START;
### $f_start=$F_START;
### $b_start=$B_START;
### $p_start=$P_START;
### $m_start=$M_START;
### $y_start=$Y_START;
### $r_start=$R_START;
### $w_start=$W_START;
### $v_start=$V_START;
#------------------------------------------------------
# Katakana rules
# The rules immediately following are not shared. That
# is, they exist only for katakana, not for hiragana.
VA<>ヷ;
VI<>ヸ;
VE<>ヹ;
VO<>ヺ;
'~KA'<>ヵ;
'~KE'<>ヶ;
# ~~~ BEGIN shared rules ~~~
# The shared rules are copied from katakana to hiragana
# and then mechanically lowercased.
A<>ア;
BA<>バ;
BYA<ビャ;
BYI<ビィ;
BYU<ビュ;
BYE<ビェ;
BYO<ビョ;
BI<>ビ;
BU<>ブ;
BE<>ベ;
BO<>ボ;
BY>ビ|'~Y';
CHA<チャ;
CHI'~I'<チィ; # Liu
CHU<チュ;
CHE<チェ;
CHO<チョ;
CHI<>チ;
CH>チ|'~Y';
C}I>|S;
C}E>|S;
DA<>ダ;
DI<>ディ;
DU<>デゥ;
DE<>デ;
DO<>ド;
DZU<>ヅ;
DJA<ヂャ;
DJI'~I'<ヂィ; # Liu
DJU<ヂュ;
DJE<ヂェ;
DJO<ヂョ;
DJI<>ヂ;
DJ>ヂ|'~Y';
E<>エ;
FA<ファ;
FI<フィ;
FE<フェ;
FO<フォ;
FU<>フ;
GA<>ガ;
GYA<ギャ;
GYI<ギィ;
GYU<ギュ;
GYE<ギェ;
GYO<ギョ;
GI<>ギ;
GU<>グ;
GE<>ゲ;
GO<>ゴ;
GY>ギ|'~Y';
HA<>ハ;
HI<>ヒ;
HU<>ヘゥ;
HE<>ヘ;
HO<>ホ;
I<>イ;
JA<ジャ;
JI'~I'<ジィ; # Liu
JU<ジュ;
JE<ジェ;
JO<ジョ;
JI<>ジ;
KA<>カ;
KYA<キャ;
KYI<キィ;
KYU<キュ;
KYE<キェ;
KYO<キョ;
KI<>キ;
KU<>ク;
KE<>ケ;
KO<>コ;
KY>キ|'~Y';
MA<>マ;
MYA<ミャ;
MYI<ミィ;
MYU<ミュ;
MYE<ミェ;
MYO<ミョ;
MI<>ミ;
MU<>ム;
ME<>メ;
MO<>モ;
MY>ミ|'~Y';
M}P>ン;
M}B>ン;
M}F>ン;
M}V>ン;
NA<>ナ;
NYA<ニャ;
NYI<ニィ;
NYU<ニュ;
NYE<ニェ;
NYO<ニョ;
NI<>ニ;
NU<>ヌ;
NE<>ネ;
NO<>;
NY>ニ|'~Y';
O<>オ;
PA<>パ;
PYA<ピャ;
PYI<ピィ;
PYU<ピュ;
PYE<ピェ;
PYO<ピョ;
PI<>ピ;
PU<>プ;
PE<>ペ;
PO<>ポ;
PY>ピ|'~Y';
RA<>ラ;
RYA<リャ;
RYI<リィ;
RYU<リュ;
RYE<リェ;
RYO<リョ;
RI<>リ;
RU<>ル;
RE<>レ;
RO<>ロ;
RY>リ|'~Y';
SA<>サ;
SI<>セィ;
SU<>ス;
SE<>セ;
SO<>ソ;
SHA<シャ;
SHI'~I'<シィ; # Liu
SHU<シュ;
SHE<シェ;
SHO<ショ;
SHI<>シ;
SH>シ|'~Y';
TA<>タ;
TI<>ティ;
TU<>テゥ;
TE<>テ;
TO<>ト;
# Double consonants
B}B<>ッ}$B_START;
C}K>ッ;
C}C>ッ;
C}Q>ッ;
D}D<>ッ}$D_START;
F}F<>ッ}$F_START;
G}G<>ッ}$G_START;
H}H<>ッ}$H_START;
J}J<>ッ}$J_START;
K}K<>ッ}$K_START;
L}L>ッ;
M}M<>ッ}$M_START;
N}N<>ッ}$N_START;
P}P<>ッ}$P_START;
Q}Q>ッ;
R}R<>ッ}$R_START;
S}SH>ッ;
S}S<>ッ}$S_START;
T}CH>ッ;
T}T<>ッ}$T_START;
V}V<>ッ}$V_START;
W}W<>ッ}$W_START;
X}X>ッ;
Y}Y<>ッ}$Y_START;
Z}Z<>ッ}$Z_START;
TSU<>ツ;
U<>ウ;
'V~A'<ヴァ; # Liu
'V~I'<ヴィ; # Liu
'V~E'<ヴェ; # Liu
'V~O'<ヴォ; # Liu
VU<>ヴ;
WA<>ワ;
WI<>ヰ;
WU>ウ;
WE<>ヱ;
WO<>ヲ;
YA<>ヤ;
YI>イ;
YU<>ユ;
YE>エ;
YO<>ヨ;
ZA<>ザ;
ZI<>ゼィ;
ZU<>ズ;
ZE<>ゼ;
ZO<>ゾ;
# Prolonged vowel mark. This indicates a doubling of
# the preceding vowel sound in both katakana and
# hiragana.
A<A{ー; # Liu
E<E{ー; # Liu
I<I{ー; # Liu
O<O{ー; # Liu
U<U{ー; # Liu
# Small forms
'~A'<>ァ;
'~I'<>ィ;
'~U'<>ゥ;
'~E'<>ェ;
'~O'<>ォ;
'~TSU'<>ッ;
'~WA'<>ヮ;
'~YA'<>ャ;
'~YI'>ィ;
'~YU'<>ュ;
'~YE'>ェ;
'~YO'<>ョ;
# One-way latin->kana rules. These do not occur in
# well-formed romaji representing actual Japanese text.
# Their purpose is to make all romaji map to kana of
# some sort.
# The following are not really necessary, but produce
# slightly more natural results.
CY>セィ;
DY>ディ;
HY>ヒ;
SY>セィ;
TY>ティ;
ZY>ゼィ;
# Simple substitutions using backup
C>|K;
F>フ|'~';
J>ジ|'~Y';
L>|R;
Q>|K;
V>ヴ|'~';
W>ウ|'~';
X>|KS;
# Isolated consonants listed here so as not to mask
# longer rules above.
B>ブ;
D>デ;
G>グ;
H>ヘ;
K>ク;
M>ン;
N''<ン}$N_QUOTER;
N<>ン;
P>プ;
R>ル;
S>ス;
T>テ;
Y>イ;
Z>ズ;
# ~~~ END shared rules ~~~
#------------------------------------------------------
# Hiragana rules
# Currently, there are no hiragana rules other than the
# shared rules.
# ~~~ BEGIN shared rules ~~~
# The shared rules are copied from katakana to hiragana
# and then mechanically lowercased.
a<>あ;
ba<>ば;
bya<びゃ;
byi<びぃ;
byu<びゅ;
bye<びぇ;
byo<びょ;
bi<>び;
bu<>ぶ;
be<>べ;
bo<>ぼ;
by>び|'~y';
cha<ちゃ;
chi'~i'<ちぃ; # liu
chu<ちゅ;
che<ちぇ;
cho<ちょ;
chi<>ち;
ch>ち|'~y';
c}i>|s;
c}e>|s;
da<>だ;
di<>でぃ;
du<>でぅ;
de<>で;
do<>ど;
dzu<>づ;
dja<ぢゃ;
dji'~i'<ぢぃ; # liu
dju<ぢゅ;
dje<ぢぇ;
djo<ぢょ;
dji<>ぢ;
dj>ぢ|'~y';
e<>え;
fa<ふぁ;
fi<ふぃ;
fe<ふぇ;
fo<ふぉ;
fu<>ふ;
ga<>が;
gya<ぎゃ;
gyi<ぎぃ;
gyu<ぎゅ;
gye<ぎぇ;
gyo<ぎょ;
gi<>ぎ;
gu<>ぐ;
ge<>げ;
go<>ご;
gy>ぎ|'~y';
ha<>は;
hi<>ひ;
hu<>へぅ;
he<>へ;
ho<>ほ;
i<>い;
ja<じゃ;
ji'~i'<じぃ; # liu
ju<じゅ;
je<じぇ;
jo<じょ;
ji<>じ;
ka<>か;
kya<きゃ;
kyi<きぃ;
kyu<きゅ;
kye<きぇ;
kyo<きょ;
ki<>き;
ku<>く;
ke<>け;
ko<>こ;
ky>き|'~y';
ma<>ま;
mya<みゃ;
myi<みぃ;
myu<みゅ;
mye<みぇ;
myo<みょ;
mi<>み;
mu<>む;
me<>め;
mo<>も;
my>み|'~y';
m}p>ん;
m}b>ん;
m}f>ん;
m}v>ん;
na<>な;
nya<にゃ;
nyi<にぃ;
nyu<にゅ;
nye<にぇ;
nyo<にょ;
ni<>に;
nu<>ぬ;
ne<>ね;
no<>の;
ny>に|'~y';
o<>お;
pa<>ぱ;
pya<ぴゃ;
pyi<ぴぃ;
pyu<ぴゅ;
pye<ぴぇ;
pyo<ぴょ;
pi<>ぴ;
pu<>ぷ;
pe<>ぺ;
po<>ぽ;
py>ぴ|'~y';
ra<>ら;
rya<りゃ;
ryi<りぃ;
ryu<りゅ;
rye<りぇ;
ryo<りょ;
ri<>り;
ru<>る;
re<>れ;
ro<>ろ;
ry>り|'~y';
sa<>さ;
si<>せぃ;
su<>す;
se<>せ;
so<>そ;
sha<しゃ;
shi'~i'<しぃ; # liu
shu<しゅ;
she<しぇ;
sho<しょ;
shi<>し;
sh>し|'~y';
ta<>た;
ti<>てぃ;
tu<>てぅ;
te<>て;
to<>と;
# double consonants
b}b<>っ}$B_START;
c}k>っ;
c}c>っ;
c}q>っ;
d}d<>っ}$D_START;
f}f<>っ}$F_START;
g}g<>っ}$G_START;
h}h<>っ}$H_START;
j}j<>っ}$J_START;
k}k<>っ}$K_START;
l}l>っ;
m}m<>っ}$M_START;
n}n<>っ}$N_START;
p}p<>っ}$P_START;
q}q>っ;
r}r<>っ}$R_START;
s}sh>っ;
s}s<>っ}$S_START;
t}ch>っ;
t}t<>っ}$T_START;
v}v<>っ}$V_START;
w}w<>っ}$W_START;
x}x>っ;
y}y<>っ}$Y_START;
z}z<>っ}$Z_START;
tsu<>つ;
u<>う;
'v~a'<ゔぁ; # liu
'v~i'<ゔぃ; # liu
'v~e'<ゔぇ; # liu
'v~o'<ゔぉ; # liu
vu<>ゔ;
wa<>わ;
wi<>ゐ;
wu>う;
we<>ゑ;
wo<>を;
ya<>や;
yi>い;
yu<>ゆ;
ye>え;
yo<>よ;
za<>ざ;
zi<>ぜぃ;
zu<>ず;
ze<>ぜ;
zo<>ぞ;
# prolonged vowel mark. this indicates a doubling of
# the preceding vowel sound in both katakana and
# hiragana.
a<a{ー; # liu
e<e{ー; # liu
i<i{ー; # liu
o<o{ー; # liu
u<u{ー; # liu
# small forms
'~a'<>ぁ;
'~i'<>ぃ;
'~u'<>ぅ;
'~e'<>ぇ;
'~o'<>ぉ;
'~tsu'<>っ;
'~wa'<>ゎ;
'~ya'<>ゃ;
'~yi'>ぃ;
'~yu'<>ゅ;
'~ye'>ぇ;
'~yo'<>ょ;
# one-way latin->kana rules. these do not occur in
# well-formed romaji representing actual japanese text.
# their purpose is to make all romaji map to kana of
# some sort.
# the following are not really necessary, but produce
# slightly more natural results.
cy>せぃ;
dy>でぃ;
hy>ひ;
sy>せぃ;
ty>てぃ;
zy>ぜぃ;
# simple substitutions using backup
c>|k;
f>ふ|'~';
j>じ|'~y';
l>|r;
q>|k;
v>ゔ|'~';
w>う|'~';
x>|ks;
# isolated consonants listed here so as not to mask
# longer rules above.
b>ぶ;
d>で;
g>ぐ;
h>へ;
k>く;
m>ん;
n''<ん}$n_quoter;
n<>ん;
p>ぷ;
r>る;
s>す;
t>て;
y>い;
z>ず;
# ~~~ END shared rules ~~~
#------------------------------------------------------
# Final cleanup
'~'>; # delete stray tildes
''>; # delete stray quotes
'-'>ー;
# eof

View file

@ -0,0 +1,89 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:06 2001
#--------------------------------------------------------------------
# Malayalam-InterIndic
>\uE002; # SIGN ANUSVARA
ഃ>\uE003; # SIGN VISARGA
അ>\uE005; # LETTER A
ആ>\uE006; # LETTER AA
ഇ>\uE007; # LETTER I
ഈ>\uE008; # LETTER II
ഉ>\uE009; # LETTER U
ഊ>\uE00A; # LETTER UU
ഋ>\uE00B; # LETTER VOCALIC R
ഌ>\uE00C; # LETTER VOCALIC L
എ>\uE081; # LETTER E
ഏ>\uE00F; # LETTER EE
ഐ>\uE010; # LETTER AI
ഒ>\uE082; # LETTER O
ഓ>\uE013; # LETTER OO
ഔ>\uE014; # LETTER AU
ക>\uE015; # LETTER KA
ഖ>\uE016; # LETTER KHA
ഗ>\uE017; # LETTER GA
ഘ>\uE018; # LETTER GHA
ങ>\uE019; # LETTER NGA
ച>\uE01A; # LETTER CA
ഛ>\uE01B; # LETTER CHA
ജ>\uE01C; # LETTER JA
ഝ>\uE01D; # LETTER JHA
ഞ>\uE01E; # LETTER NYA
ട>\uE01F; # LETTER TTA
>\uE020; # LETTER TTHA
ഡ>\uE021; # LETTER DDA
ഢ>\uE022; # LETTER DDHA
ണ>\uE023; # LETTER NNA
ത>\uE024; # LETTER TA
ഥ>\uE025; # LETTER THA
ദ>\uE026; # LETTER DA
ധ>\uE027; # LETTER DHA
ന>\uE028; # LETTER NA
പ>\uE02A; # LETTER PA
ഫ>\uE02B; # LETTER PHA
ബ>\uE02C; # LETTER BA
ഭ>\uE02D; # LETTER BHA
മ>\uE02E; # LETTER MA
യ>\uE02F; # LETTER YA
ര>\uE030; # LETTER RA
റ>\uE083; # LETTER RRA
ല>\uE032; # LETTER LA
ള>\uE033; # LETTER LLA
ഴ>\uE034; # LETTER LLLA
വ>\uE035; # LETTER VA
ശ>\uE036; # LETTER SHA
ഷ>\uE037; # LETTER SSA
സ>\uE038; # LETTER SA
ഹ>\uE039; # LETTER HA
ാ>\uE03E; # VOWEL SIGN AA
ി>\uE03F; # VOWEL SIGN I
ീ>\uE040; # VOWEL SIGN II
ു>\uE041; # VOWEL SIGN U
ൂ>\uE042; # VOWEL SIGN UU
ൃ>\uE043; # VOWEL SIGN VOCALIC R
െ>\uE084; # VOWEL SIGN E
േ>\uE047; # VOWEL SIGN EE
ൈ>\uE048; # VOWEL SIGN AI
ൊ>\uE085; # VOWEL SIGN O
ോ>\uE04B; # VOWEL SIGN OO
ൌ>\uE04C; # VOWEL SIGN AU
്>\uE04D; # SIGN VIRAMA
ൗ>\uE057; # AU LENGTH MARK
ൠ>\uE060; # LETTER VOCALIC RR
ൡ>\uE061; # LETTER VOCALIC LL
>\uE066; # DIGIT ZERO
൧>\uE067; # DIGIT ONE
൨>\uE068; # DIGIT TWO
൩>\uE069; # DIGIT THREE
൪>\uE06A; # DIGIT FOUR
൫>\uE06B; # DIGIT FIVE
൬>\uE06C; # DIGIT SIX
>\uE06D; # DIGIT SEVEN
൮>\uE06E; # DIGIT EIGHT
൯>\uE06F; # DIGIT NINE
# eof

View file

@ -0,0 +1,90 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:07 2001
#--------------------------------------------------------------------
# Oriya-InterIndic
ଁ>\uE001; # SIGN CANDRABINDU
ଂ>\uE002; # SIGN ANUSVARA
>\uE003; # SIGN VISARGA
ଅ>\uE005; # LETTER A
ଆ>\uE006; # LETTER AA
ଇ>\uE007; # LETTER I
ଈ>\uE008; # LETTER II
ଉ>\uE009; # LETTER U
ଊ>\uE00A; # LETTER UU
ଋ>\uE00B; # LETTER VOCALIC R
ଌ>\uE00C; # LETTER VOCALIC L
ଏ>\uE081; # LETTER E
ଐ>\uE010; # LETTER AI
ଓ>\uE082; # LETTER O
ଔ>\uE014; # LETTER AU
କ>\uE015; # LETTER KA
ଖ>\uE016; # LETTER KHA
ଗ>\uE017; # LETTER GA
ଘ>\uE018; # LETTER GHA
ଙ>\uE019; # LETTER NGA
ଚ>\uE01A; # LETTER CA
ଛ>\uE01B; # LETTER CHA
ଜ>\uE01C; # LETTER JA
ଝ>\uE01D; # LETTER JHA
ଞ>\uE01E; # LETTER NYA
ଟ>\uE01F; # LETTER TTA
>\uE020; # LETTER TTHA
ଡ>\uE021; # LETTER DDA
ଢ>\uE022; # LETTER DDHA
ଣ>\uE023; # LETTER NNA
ତ>\uE024; # LETTER TA
ଥ>\uE025; # LETTER THA
ଦ>\uE026; # LETTER DA
ଧ>\uE027; # LETTER DHA
ନ>\uE028; # LETTER NA
ପ>\uE02A; # LETTER PA
ଫ>\uE02B; # LETTER PHA
ବ>\uE02C; # LETTER BA
ଭ>\uE02D; # LETTER BHA
ମ>\uE02E; # LETTER MA
ଯ>\uE02F; # LETTER YA
ର>\uE030; # LETTER RA
ଲ>\uE032; # LETTER LA
ଳ>\uE033; # LETTER LLA
ଶ>\uE036; # LETTER SHA
ଷ>\uE037; # LETTER SSA
ସ>\uE038; # LETTER SA
ହ>\uE039; # LETTER HA
଼>\uE03C; # SIGN NUKTA
ଽ>\uE03D; # SIGN AVAGRAHA
ା>\uE03E; # VOWEL SIGN AA
ି>\uE03F; # VOWEL SIGN I
ୀ>\uE040; # VOWEL SIGN II
ୁ>\uE041; # VOWEL SIGN U
ୂ>\uE042; # VOWEL SIGN UU
ୃ>\uE043; # VOWEL SIGN VOCALIC R
େ>\uE084; # VOWEL SIGN E
ୈ>\uE048; # VOWEL SIGN AI
ୋ>\uE085; # VOWEL SIGN O
ୌ>\uE04C; # VOWEL SIGN AU
୍>\uE04D; # SIGN VIRAMA
ୖ>\uE056; # AI LENGTH MARK
ୗ>\uE057; # AU LENGTH MARK
ଡ଼>\uE083; # LETTER RRA
ଢ଼>\uE05D; # LETTER RHA
ୟ>\uE05F; # LETTER YYA
ୠ>\uE060; # LETTER VOCALIC RR
ୡ>\uE061; # LETTER VOCALIC LL
>\uE066; # DIGIT ZERO
୧>\uE067; # DIGIT ONE
>\uE068; # DIGIT TWO
୩>\uE069; # DIGIT THREE
୪>\uE06A; # DIGIT FOUR
୫>\uE06B; # DIGIT FIVE
୬>\uE06C; # DIGIT SIX
୭>\uE06D; # DIGIT SEVEN
୮>\uE06E; # DIGIT EIGHT
୯>\uE06F; # DIGIT NINE
୰>\uE080; # ISSHAR
# eof

View file

@ -0,0 +1,77 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:07 2001
#--------------------------------------------------------------------
# StraightQuotes-CurlyQuotes
# Rewritten using character codes [LIU]
$white=[[:Zs:][:Zl:][:Zp:]];
$black=[^$white];
$open=[:Ps:];
### $dquote='\"';
### $lAng=〈;
### $ldAng=《;
### $lBrk='[';
### $lBrc='{';
### $lquote=;
### $rquote=;
### $ldquote=“;
### $rdquote=”;
### $ldguill=«;
### $rdguill=»;
### $lguill=;
### $rguill=;
### $mdash=—;
########################################
# Conversions from input
########################################
# join single quotes
''>“;
>“;
''>”;
>”;
#smart single quotes
$white{''>;
$open{''>;
$black{''>;
''>;
#smart doubles
$white{'\"'>“;
$open{'\"'>“;
$black{'\"'>”;
'\"'>“;
# join single guillemets
>»;
'>>'>»;
>«;
'<<'>«;
# prevent double spaces
\\ {\\ >;
# join hyphens into dash ### BIDIRECTIONAL ###
'--'<>—;
########################################
# Conversions back to input
########################################
#smart quotes
''<;
''<;
'\"'<“;
'\"'<”;
# eof

View file

@ -0,0 +1,72 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:07 2001
#--------------------------------------------------------------------
# Tamil-InterIndic
ஂ>\uE002; # SIGN ANUSVARA
ஃ>\uE003; # SIGN VISARGA
அ>\uE005; # LETTER A
ஆ>\uE006; # LETTER AA
இ>\uE007; # LETTER I
ஈ>\uE008; # LETTER II
உ>\uE009; # LETTER U
ஊ>\uE00A; # LETTER UU
எ>\uE081; # LETTER E
ஏ>\uE00F; # LETTER EE
ஐ>\uE010; # LETTER AI
ஒ>\uE082; # LETTER O
ஓ>\uE013; # LETTER OO
ஔ>\uE014; # LETTER AU
க>\uE015; # LETTER KA
ங>\uE019; # LETTER NGA
ச>\uE01A; # LETTER CA
ஜ>\uE01C; # LETTER JA
ஞ>\uE01E; # LETTER NYA
ட>\uE01F; # LETTER TTA
ண>\uE023; # LETTER NNA
த>\uE024; # LETTER TA
ந>\uE028; # LETTER NA
ன>\uE029; # LETTER NNNA
ப>\uE02A; # LETTER PA
ம>\uE02E; # LETTER MA
ய>\uE02F; # LETTER YA
ர>\uE030; # LETTER RA
ற>\uE083; # LETTER RRA
ல>\uE032; # LETTER LA
ள>\uE033; # LETTER LLA
ழ>\uE034; # LETTER LLLA
வ>\uE035; # LETTER VA
ஷ>\uE037; # LETTER SSA
ஸ>\uE038; # LETTER SA
ஹ>\uE039; # LETTER HA
ா>\uE03E; # VOWEL SIGN AA
ி>\uE03F; # VOWEL SIGN I
ீ>\uE040; # VOWEL SIGN II
ு>\uE041; # VOWEL SIGN U
ூ>\uE042; # VOWEL SIGN UU
ெ>\uE084; # VOWEL SIGN E
ே>\uE047; # VOWEL SIGN EE
ை>\uE048; # VOWEL SIGN AI
ொ>\uE085; # VOWEL SIGN O
ோ>\uE04B; # VOWEL SIGN OO
ௌ>\uE04C; # VOWEL SIGN AU
்>\uE04D; # SIGN VIRAMA
ௗ>\uE057; # AU LENGTH MARK
௧>\uE067; # DIGIT ONE
௨>\uE068; # DIGIT TWO
௩>\uE069; # DIGIT THREE
௪>\uE06A; # DIGIT FOUR
௫>\uE06B; # DIGIT FIVE
௬>\uE06C; # DIGIT SIX
௭>\uE06D; # DIGIT SEVEN
௮>\uE06E; # DIGIT EIGHT
௯>\uE06F; # DIGIT NINE
# ௰>; // UNMAPPED Tamil-InterIndic: NUMBER TEN
# ௱>; // UNMAPPED Tamil-InterIndic: NUMBER ONE HUNDRED
# ௲>; // UNMAPPED Tamil-InterIndic: NUMBER ONE THOUSAND
# eof

View file

@ -0,0 +1,91 @@
#--------------------------------------------------------------------
# Copyright (c) 1999-2001, International Business Machines
# Corporation and others. All Rights Reserved.
#--------------------------------------------------------------------
# Date: Tue Jan 23 12:42:07 2001
#--------------------------------------------------------------------
# Telugu-InterIndic
ఁ>\uE001; # SIGN CANDRABINDU
>\uE002; # SIGN ANUSVARA
ః>\uE003; # SIGN VISARGA
అ>\uE005; # LETTER A
ఆ>\uE006; # LETTER AA
ఇ>\uE007; # LETTER I
ఈ>\uE008; # LETTER II
ఉ>\uE009; # LETTER U
ఊ>\uE00A; # LETTER UU
ఋ>\uE00B; # LETTER VOCALIC R
ఌ>\uE00C; # LETTER VOCALIC L
ఎ>\uE081; # LETTER E
ఏ>\uE00F; # LETTER EE
ఐ>\uE010; # LETTER AI
ఒ>\uE082; # LETTER O
ఓ>\uE013; # LETTER OO
ఔ>\uE014; # LETTER AU
క>\uE015; # LETTER KA
ఖ>\uE016; # LETTER KHA
గ>\uE017; # LETTER GA
ఘ>\uE018; # LETTER GHA
ఙ>\uE019; # LETTER NGA
చ>\uE01A; # LETTER CA
ఛ>\uE01B; # LETTER CHA
జ>\uE01C; # LETTER JA
ఝ>\uE01D; # LETTER JHA
ఞ>\uE01E; # LETTER NYA
ట>\uE01F; # LETTER TTA
ఠ>\uE020; # LETTER TTHA
డ>\uE021; # LETTER DDA
ఢ>\uE022; # LETTER DDHA
ణ>\uE023; # LETTER NNA
త>\uE024; # LETTER TA
థ>\uE025; # LETTER THA
ద>\uE026; # LETTER DA
ధ>\uE027; # LETTER DHA
న>\uE028; # LETTER NA
ప>\uE02A; # LETTER PA
ఫ>\uE02B; # LETTER PHA
బ>\uE02C; # LETTER BA
భ>\uE02D; # LETTER BHA
మ>\uE02E; # LETTER MA
య>\uE02F; # LETTER YA
ర>\uE030; # LETTER RA
ఱ>\uE083; # LETTER RRA
ల>\uE032; # LETTER LA
ళ>\uE033; # LETTER LLA
వ>\uE035; # LETTER VA
శ>\uE036; # LETTER SHA
ష>\uE037; # LETTER SSA
స>\uE038; # LETTER SA
హ>\uE039; # LETTER HA
ా>\uE03E; # VOWEL SIGN AA
ి>\uE03F; # VOWEL SIGN I
ీ>\uE040; # VOWEL SIGN II
ు>\uE041; # VOWEL SIGN U
ూ>\uE042; # VOWEL SIGN UU
ృ>\uE043; # VOWEL SIGN VOCALIC R
ౄ>\uE044; # VOWEL SIGN VOCALIC RR
ె>\uE084; # VOWEL SIGN E
ే>\uE047; # VOWEL SIGN EE
ై>\uE048; # VOWEL SIGN AI
ొ>\uE085; # VOWEL SIGN O
ో>\uE04B; # VOWEL SIGN OO
ౌ>\uE04C; # VOWEL SIGN AU
్>\uE04D; # SIGN VIRAMA
ౕ>\uE055; # LENGTH MARK
ౖ>\uE056; # AI LENGTH MARK
ౠ>\uE060; # LETTER VOCALIC RR
ౡ>\uE061; # LETTER VOCALIC LL
>\uE066; # DIGIT ZERO
౧>\uE067; # DIGIT ONE
౨>\uE068; # DIGIT TWO
౩>\uE069; # DIGIT THREE
౪>\uE06A; # DIGIT FOUR
౫>\uE06B; # DIGIT FIVE
౬>\uE06C; # DIGIT SIX
౭>\uE06D; # DIGIT SEVEN
౮>\uE06E; # DIGIT EIGHT
౯>\uE06F; # DIGIT NINE
# eof

File diff suppressed because it is too large Load diff