From ace18e9730b70c11bda71babbafcc414170501e5 Mon Sep 17 00:00:00 2001
From: Alan Liu <alansliu@gmail.com>
Date: Tue, 20 Feb 2001 20:25:42 +0000
Subject: [PATCH] Rewrite Latin-Jamo and add test cases

X-SVN-Rev: 3690
---
 .../ibm/icu/dev/test/translit/JamoTest.java   |  196 ++++
 .../icu/dev/test/translit/RoundTripTest.java  |    5 +-
 icu4j/src/com/ibm/test/translit/JamoTest.java |  196 ++++
 .../com/ibm/test/translit/RoundTripTest.java  |    5 +-
 .../Transliterator_Latin_Jamo.utf8.txt        | 1019 ++++++-----------
 5 files changed, 723 insertions(+), 698 deletions(-)
 create mode 100755 icu4j/src/com/ibm/icu/dev/test/translit/JamoTest.java
 create mode 100755 icu4j/src/com/ibm/test/translit/JamoTest.java

diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/JamoTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/JamoTest.java
new file mode 100755
index 00000000000..88404ff4464
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/JamoTest.java
@@ -0,0 +1,196 @@
+package com.ibm.test.translit;
+import com.ibm.text.*;
+import com.ibm.test.*;
+import com.ibm.util.Utility;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary Test the Latin-Jamo transliterator
+ */
+public class JamoTest extends TransliteratorTest {
+
+    public static void main(String[] args) throws Exception {
+        new JamoTest().run(args);
+    }
+
+    public void TestJamo() {
+        Transliterator latinJamo = Transliterator.getInstance("Latin-Jamo");
+        Transliterator jamoLatin = latinJamo.getInverse();
+
+        String[] CASE = {
+            // Column 1 is the latin text L1 to be fed to Latin-Jamo
+            // to yield output J.
+
+            // Column 2 is expected value of J.  J is fed to
+            // Jamo-Latin to yield output L2.
+
+            // Column 3 is expected value of L2.  If the expected
+            // value of L2 is L1, then L2 is null.
+            "bab", "(Bi)(A)(Bf)", null,
+            "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu",
+            "babbba", "(Bi)(A)(Bf)(BB)(A)", null,
+            "bagg", "(Bi)(A)(GGf)", null,
+            "baggga", "(Bi)(A)(GGf)(Gi)(A)", null,
+            "bag'gga", "(Bi)(A)(Gf)(GGi)(A)", null,
+            "kabsa", "(Ki)(A)(Bf)(Si)(A)", null,
+            "kabska", "(Ki)(A)(BS)(Ki)(A)", null,
+            "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf)
+            "gga", "(GGi)(A)", null,
+            "bsa", "(Bi)(EU)(Si)(A)", "beusa",
+            "agg", "(IEUNG)(A)(GGf)", null,
+            "agga", "(IEUNG)(A)(Gf)(Gi)(A)", null,
+            "la", "(R)(A)", "ra",
+            "bs", "(Bi)(EU)(Sf)", "beus",
+        };
+
+        for (int i=0; i<CASE.length; i+=3) {
+            String jamo = nameToJamo(CASE[i+1]);
+            if (CASE[i+2] == null) {
+                expect(latinJamo, CASE[i], jamo, jamoLatin);
+            } else {
+                // Handle case where round-trip is expected to fail
+                expect(latinJamo, CASE[i], jamo);
+                expect(jamoLatin, jamo, CASE[i+2]);
+            }
+        }
+    }
+
+    // TransliteratorTest override
+    void expectAux(String tag, String summary, boolean pass,
+                   String expectedResult) {
+        super.expectAux(tag, jamoToName(summary),
+                        pass, jamoToName(expectedResult));
+    }
+
+    // UTILITIES
+
+    static final String[] JAMO_NAMES = {
+        "(Gi)", "\u1100",
+        "(GGi)", "\u1101",
+        "(Ni)", "\u1102",
+        "(Di)", "\u1103",
+        "(DD)", "\u1104",
+        "(R)", "\u1105",
+        "(Mi)", "\u1106",
+        "(Bi)", "\u1107",
+        "(BB)", "\u1108",
+        "(Si)", "\u1109",
+        "(SSi)", "\u110A",
+        "(IEUNG)", "\u110B",
+        "(Ji)", "\u110C",
+        "(JJ)", "\u110D",
+        "(Ci)", "\u110E",
+        "(Ki)", "\u110F",
+        "(Ti)", "\u1110",
+        "(Pi)", "\u1111",
+        "(Hi)", "\u1112",
+        
+        "(A)", "\u1161",
+        "(AE)", "\u1162",
+        "(YA)", "\u1163",
+        "(YAE)", "\u1164",
+        "(EO)", "\u1165",
+        "(E)", "\u1166",
+        "(YEO)", "\u1167",
+        "(YE)", "\u1168",
+        "(O)", "\u1169",
+        "(WA)", "\u116A",
+        "(WAE)", "\u116B",
+        "(OE)", "\u116C",
+        "(YO)", "\u116D",
+        "(U)", "\u116E",
+        "(WEO)", "\u116F",
+        "(WE)", "\u1170",
+        "(WI)", "\u1171",
+        "(YU)", "\u1172",
+        "(EU)", "\u1173",
+        "(YI)", "\u1174",
+        "(I)", "\u1175",
+
+        "(Gf)", "\u11A8",
+        "(GGf)", "\u11A9",
+        "(GS)", "\u11AA",
+        "(Nf)", "\u11AB",
+        "(NJ)", "\u11AC",
+        "(NH)", "\u11AD",
+        "(Df)", "\u11AE",
+        "(L)", "\u11AF",
+        "(LG)", "\u11B0",
+        "(LM)", "\u11B1",
+        "(LB)", "\u11B2",
+        "(LS)", "\u11B3",
+        "(LT)", "\u11B4",
+        "(LP)", "\u11B5",
+        "(LH)", "\u11B6",
+        "(Mf)", "\u11B7",
+        "(Bf)", "\u11B8",
+        "(BS)", "\u11B9",
+        "(Sf)", "\u11BA",
+        "(SSf)", "\u11BB",
+        "(NG)", "\u11BC",
+        "(Jf)", "\u11BD",
+        "(Cf)", "\u11BE",
+        "(Kf)", "\u11BF",
+        "(Tf)", "\u11C0",
+        "(Pf)", "\u11C1",
+        "(Hf)", "\u11C2",
+    };
+
+    static Hashtable JAMO_TO_NAME;
+    static Hashtable NAME_TO_JAMO;
+
+    static {
+        JAMO_TO_NAME = new Hashtable();
+        NAME_TO_JAMO = new Hashtable();
+        for (int i=0; i<JAMO_NAMES.length; i+=2) {
+            JAMO_TO_NAME.put(JAMO_NAMES[i+1], JAMO_NAMES[i]);
+            NAME_TO_JAMO.put(JAMO_NAMES[i], JAMO_NAMES[i+1]);
+        }
+    }
+
+    /**
+     * Convert short names to actual jamo.  E.g., "x(LG)y" returns
+     * "x\u11B0y".  See JAMO_NAMES for table of names.
+     */
+    static String nameToJamo(String input) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<input.length(); ++i) {
+            char c = input.charAt(i);
+            if (c == '(') {
+                int j = input.indexOf(')', i+1);
+                if ((j-i) >= 2 && (j-i) <= 6) { // "(A)", "(IEUNG)"
+                    String jamo = (String) NAME_TO_JAMO.get(input.substring(i, j+1));
+                    if (jamo != null) {
+                        buf.append(jamo);
+                        i = j;
+                        continue;
+                    }
+                }
+            }
+            buf.append(c);
+        }
+        return buf.toString();
+    }
+
+    /**
+     * Convert jamo to short names.  E.g., "x\u11B0y" returns
+     * "x(LG)y".  See JAMO_NAMES for table of names.
+     */
+    static String jamoToName(String input) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<input.length(); ++i) {
+            char c = input.charAt(i);
+            if (c >= 0x1100 && c <= 0x11C2) {
+                String name = (String) JAMO_TO_NAME.get(input.substring(i, i+1));
+                if (name != null) {
+                    buf.append(name);
+                    continue;
+                }
+            }
+            buf.append(c);
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
index 69576e65e9a..9d4b7183a9f 100755
--- a/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
@@ -49,14 +49,15 @@ public class RoundTripTest extends TestFmwk {
         Test t = new Test("Latin-Jamo", 
           TestUtility.LATIN_SCRIPT, TestUtility.JAMO_SCRIPT);
         t.setErrorLimit(200); // Don't run full test -- too long
-        t.test(null, null, this);
+        //t.test("[[a-z]-[fqvxz]]", null, this);
+        t.test("[a-z]", null, this);
     }
 
     public void TestJamoHangul() throws IOException, ParseException {
         Test t = new Test("Latin-Jamo;Jamo-Hangul", 
           TestUtility.LATIN_SCRIPT, TestUtility.HANGUL_SCRIPT);
         t.setErrorLimit(50); // Don't run full test -- too long
-        t.test(null, null, this);
+        t.test("[a-z]", null, this);
     }
 
     public void TestGreek() throws IOException, ParseException {
diff --git a/icu4j/src/com/ibm/test/translit/JamoTest.java b/icu4j/src/com/ibm/test/translit/JamoTest.java
new file mode 100755
index 00000000000..88404ff4464
--- /dev/null
+++ b/icu4j/src/com/ibm/test/translit/JamoTest.java
@@ -0,0 +1,196 @@
+package com.ibm.test.translit;
+import com.ibm.text.*;
+import com.ibm.test.*;
+import com.ibm.util.Utility;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary Test the Latin-Jamo transliterator
+ */
+public class JamoTest extends TransliteratorTest {
+
+    public static void main(String[] args) throws Exception {
+        new JamoTest().run(args);
+    }
+
+    public void TestJamo() {
+        Transliterator latinJamo = Transliterator.getInstance("Latin-Jamo");
+        Transliterator jamoLatin = latinJamo.getInverse();
+
+        String[] CASE = {
+            // Column 1 is the latin text L1 to be fed to Latin-Jamo
+            // to yield output J.
+
+            // Column 2 is expected value of J.  J is fed to
+            // Jamo-Latin to yield output L2.
+
+            // Column 3 is expected value of L2.  If the expected
+            // value of L2 is L1, then L2 is null.
+            "bab", "(Bi)(A)(Bf)", null,
+            "babb", "(Bi)(A)(Bf)(Bi)(EU)", "babbeu",
+            "babbba", "(Bi)(A)(Bf)(BB)(A)", null,
+            "bagg", "(Bi)(A)(GGf)", null,
+            "baggga", "(Bi)(A)(GGf)(Gi)(A)", null,
+            "bag'gga", "(Bi)(A)(Gf)(GGi)(A)", null,
+            "kabsa", "(Ki)(A)(Bf)(Si)(A)", null,
+            "kabska", "(Ki)(A)(BS)(Ki)(A)", null,
+            "gabsbka", "(Gi)(A)(BS)(Bi)(EU)(Ki)(A)", "gabsbeuka", // not (Kf)
+            "gga", "(GGi)(A)", null,
+            "bsa", "(Bi)(EU)(Si)(A)", "beusa",
+            "agg", "(IEUNG)(A)(GGf)", null,
+            "agga", "(IEUNG)(A)(Gf)(Gi)(A)", null,
+            "la", "(R)(A)", "ra",
+            "bs", "(Bi)(EU)(Sf)", "beus",
+        };
+
+        for (int i=0; i<CASE.length; i+=3) {
+            String jamo = nameToJamo(CASE[i+1]);
+            if (CASE[i+2] == null) {
+                expect(latinJamo, CASE[i], jamo, jamoLatin);
+            } else {
+                // Handle case where round-trip is expected to fail
+                expect(latinJamo, CASE[i], jamo);
+                expect(jamoLatin, jamo, CASE[i+2]);
+            }
+        }
+    }
+
+    // TransliteratorTest override
+    void expectAux(String tag, String summary, boolean pass,
+                   String expectedResult) {
+        super.expectAux(tag, jamoToName(summary),
+                        pass, jamoToName(expectedResult));
+    }
+
+    // UTILITIES
+
+    static final String[] JAMO_NAMES = {
+        "(Gi)", "\u1100",
+        "(GGi)", "\u1101",
+        "(Ni)", "\u1102",
+        "(Di)", "\u1103",
+        "(DD)", "\u1104",
+        "(R)", "\u1105",
+        "(Mi)", "\u1106",
+        "(Bi)", "\u1107",
+        "(BB)", "\u1108",
+        "(Si)", "\u1109",
+        "(SSi)", "\u110A",
+        "(IEUNG)", "\u110B",
+        "(Ji)", "\u110C",
+        "(JJ)", "\u110D",
+        "(Ci)", "\u110E",
+        "(Ki)", "\u110F",
+        "(Ti)", "\u1110",
+        "(Pi)", "\u1111",
+        "(Hi)", "\u1112",
+        
+        "(A)", "\u1161",
+        "(AE)", "\u1162",
+        "(YA)", "\u1163",
+        "(YAE)", "\u1164",
+        "(EO)", "\u1165",
+        "(E)", "\u1166",
+        "(YEO)", "\u1167",
+        "(YE)", "\u1168",
+        "(O)", "\u1169",
+        "(WA)", "\u116A",
+        "(WAE)", "\u116B",
+        "(OE)", "\u116C",
+        "(YO)", "\u116D",
+        "(U)", "\u116E",
+        "(WEO)", "\u116F",
+        "(WE)", "\u1170",
+        "(WI)", "\u1171",
+        "(YU)", "\u1172",
+        "(EU)", "\u1173",
+        "(YI)", "\u1174",
+        "(I)", "\u1175",
+
+        "(Gf)", "\u11A8",
+        "(GGf)", "\u11A9",
+        "(GS)", "\u11AA",
+        "(Nf)", "\u11AB",
+        "(NJ)", "\u11AC",
+        "(NH)", "\u11AD",
+        "(Df)", "\u11AE",
+        "(L)", "\u11AF",
+        "(LG)", "\u11B0",
+        "(LM)", "\u11B1",
+        "(LB)", "\u11B2",
+        "(LS)", "\u11B3",
+        "(LT)", "\u11B4",
+        "(LP)", "\u11B5",
+        "(LH)", "\u11B6",
+        "(Mf)", "\u11B7",
+        "(Bf)", "\u11B8",
+        "(BS)", "\u11B9",
+        "(Sf)", "\u11BA",
+        "(SSf)", "\u11BB",
+        "(NG)", "\u11BC",
+        "(Jf)", "\u11BD",
+        "(Cf)", "\u11BE",
+        "(Kf)", "\u11BF",
+        "(Tf)", "\u11C0",
+        "(Pf)", "\u11C1",
+        "(Hf)", "\u11C2",
+    };
+
+    static Hashtable JAMO_TO_NAME;
+    static Hashtable NAME_TO_JAMO;
+
+    static {
+        JAMO_TO_NAME = new Hashtable();
+        NAME_TO_JAMO = new Hashtable();
+        for (int i=0; i<JAMO_NAMES.length; i+=2) {
+            JAMO_TO_NAME.put(JAMO_NAMES[i+1], JAMO_NAMES[i]);
+            NAME_TO_JAMO.put(JAMO_NAMES[i], JAMO_NAMES[i+1]);
+        }
+    }
+
+    /**
+     * Convert short names to actual jamo.  E.g., "x(LG)y" returns
+     * "x\u11B0y".  See JAMO_NAMES for table of names.
+     */
+    static String nameToJamo(String input) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<input.length(); ++i) {
+            char c = input.charAt(i);
+            if (c == '(') {
+                int j = input.indexOf(')', i+1);
+                if ((j-i) >= 2 && (j-i) <= 6) { // "(A)", "(IEUNG)"
+                    String jamo = (String) NAME_TO_JAMO.get(input.substring(i, j+1));
+                    if (jamo != null) {
+                        buf.append(jamo);
+                        i = j;
+                        continue;
+                    }
+                }
+            }
+            buf.append(c);
+        }
+        return buf.toString();
+    }
+
+    /**
+     * Convert jamo to short names.  E.g., "x\u11B0y" returns
+     * "x(LG)y".  See JAMO_NAMES for table of names.
+     */
+    static String jamoToName(String input) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<input.length(); ++i) {
+            char c = input.charAt(i);
+            if (c >= 0x1100 && c <= 0x11C2) {
+                String name = (String) JAMO_TO_NAME.get(input.substring(i, i+1));
+                if (name != null) {
+                    buf.append(name);
+                    continue;
+                }
+            }
+            buf.append(c);
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/test/translit/RoundTripTest.java b/icu4j/src/com/ibm/test/translit/RoundTripTest.java
index 69576e65e9a..9d4b7183a9f 100755
--- a/icu4j/src/com/ibm/test/translit/RoundTripTest.java
+++ b/icu4j/src/com/ibm/test/translit/RoundTripTest.java
@@ -49,14 +49,15 @@ public class RoundTripTest extends TestFmwk {
         Test t = new Test("Latin-Jamo", 
           TestUtility.LATIN_SCRIPT, TestUtility.JAMO_SCRIPT);
         t.setErrorLimit(200); // Don't run full test -- too long
-        t.test(null, null, this);
+        //t.test("[[a-z]-[fqvxz]]", null, this);
+        t.test("[a-z]", null, this);
     }
 
     public void TestJamoHangul() throws IOException, ParseException {
         Test t = new Test("Latin-Jamo;Jamo-Hangul", 
           TestUtility.LATIN_SCRIPT, TestUtility.HANGUL_SCRIPT);
         t.setErrorLimit(50); // Don't run full test -- too long
-        t.test(null, null, this);
+        t.test("[a-z]", null, this);
     }
 
     public void TestGreek() throws IOException, ParseException {
diff --git a/icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt b/icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt
index 6b28cc7a7bc..489386d7cec 100755
--- a/icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt
+++ b/icu4j/src/com/ibm/text/resources/Transliterator_Latin_Jamo.utf8.txt
@@ -2,741 +2,372 @@
 # Copyright (c) 1999-2001, International Business Machines
 # Corporation and others.  All Rights Reserved.
 #--------------------------------------------------------------------
-# Date: Tue Jan 23 12:18:45 2001
-#--------------------------------------------------------------------
 
 # Latin-Jamo
 
+# Transliteration from Latin characters to Korean script is done in
+# two steps: Latin to Jamo, then Jamo to Hangul.  The Jamo-Hangul
+# transliteration is done algorithmically following Unicode 3.0
+# section 3.11.  This file implements the Latin to Jamo
+# transliteration using rules.
 
-# VARIABLES
+# Jamo occupy the block 1100-11FF.  Within this block there are three
+# groups of characters: initial consonants or choseong (I), medial
+# vowels or jungseong (M), and trailing consonants or jongseong (F).
+# Standard Korean syllables are of the form I+M+F*.
 
-$initial=[ᄀ-ᅟ];
-$INITIAL=[bcdghjklmnpst];
-$medial=[ᅠ-ᆧ];
-$MEDIAL=[aeiou]; # as a left context
-$comp_med=[ᅠᅶ-ᆧ]; # compound medials and filler
-$final=[ᆨ-ᇹ]; # added - aliu
-$vowel=[aeiouwy$medial];
-# following line used to read "..$medial$final]"
-# assume this was a typo - liu
-$consonant=[bcdfghjklmnpqrstvxz$initial$final];
-$ye_=[yeYE];
-$ywe_=[yweYWE];
-$yw_=[ywYW];
-$nl_=[nlNL];
-$gnl_=[gnlGNL];
-$lsgb_=[lsgbLSGB];
-$ywao_=[ywaoYWAO];
-$bl_=[blBL];
+# Section 3.11 describes the use of 'filler' jamo to convert
+# nonstandard syllables to standard form: the choseong filler 115F and
+# the junseong filler 1160.  In this transliterator, we will not use
+# 115F or 1160.
 
-### $ieung = ᄋ;
+# We will, however, insert two 'null' jamo to make foreign words
+# conform to Korean syllable structure.  These are the null initial
+# consonant 110B (IEUNG) and the null vowel 1173 (EU).  In Latin text,
+# we will use the apostrophe in order to disambiguate strings,
+# e.g. "kan'ggan" (initial GG) vs. "kanggan" (final NG + initial G).
 
-# RULES
+# We will not use all of the characters in the jamo block.  We will
+# only use the 19 initials, 21 medials, and 27 finals possessing a
+# jamo short name as defined in section 4.4 of the Unicode book.
 
-# Hangul structure is IMF or IM
-# So you can have, because of adjacent sequences
-# IM, but not II or IF
-# MF or MI, but not MM
-# FI, but not FF or FM
+#----------------------------------------------------------------------
+# Variables
 
-# For English, we just have C or V.
-# To generate valid Hangul:
-# Vowels:
-# We insert IEUNG between VV, and otherwise map V to M
-# We also insert IEUNG if there is no
-# Consonants:
-# We don't break doubles
-# Cases like lmgg, we have to break at lm
-# So to guess whether a consonant is I or F
-# we map all C's to F, except when followed by a vowel, e.g.
-# X[{vowel}>CHOSEONG (initial)
-# X>JONGSEONG (final)
+# Some latin consonants or consonant pairs only occur as initials, and
+# some only as finals, but some occur as both.  This makes some jamo
+# consonants ambiguous when transliterated into latin.
+#   Initial only: IEUNG BB DD JJ R
+#   Final only: BS GS L LB LG LH LM LP LS LT NG NH NJ
+#   Initial and Final: B C D G GG H J K M N P S SS T
 
-# Below, insert an empty consonant in front of a vowel, if there is no Initial in front.
+  $Gi = \u1100;
+  $GGi = \u1101;
+  $Ni = \u1102;
+  $Di = \u1103;
+  $DD = \u1104;
+  $R = \u1105;
+  $Mi = \u1106;
+  $Bi = \u1107;
+  $BB = \u1108;
+  $Si = \u1109;
+  $SSi = \u110A;
+  $IEUNG = \u110B; # null initial, inserted during Latin-Jamo
+  $Ji = \u110C;
+  $JJ = \u110D;
+  $Ci = \u110E;
+  $Ki = \u110F;
+  $Ti = \u1110;
+  $Pi = \u1111;
+  $Hi = \u1112;
 
+  $A = \u1161;
+  $AE = \u1162;
+  $YA = \u1163;
+  $YAE = \u1164;
+  $EO = \u1165;
+  $E = \u1166;
+  $YEO = \u1167;
+  $YE = \u1168;
+  $O = \u1169;
+  $WA = \u116A;
+  $WAE = \u116B;
+  $OE = \u116C;
+  $YO = \u116D;
+  $U = \u116E;
+  $WEO = \u116F;
+  $WE = \u1170;
+  $WI = \u1171;
+  $YU = \u1172;
+  $EU = \u1173; # null medial, inserted during Latin-Jamo
+  $YI = \u1174;
+  $I = \u1175;
 
-# General strategy.
-#
-# 1. We support both the normal Jamo block, 1100 - 117F, and the
-# compatibility block, 3130 - 318F.  The former uses lowercase latin;
-# the latter uses uppercase.  See notes below for details of the
-# compatibility block.  Remaining items in this list pertain to the
-# normal Jamo block.
-#
-# 2. Canonical syllables should transliterate without special
-# characters.  Canonical syllables are either IMF or IM.
-#
-# 3. We want to support round-trip integrity from jamo to latin and back
-# to Jamo.  To do this we have to mark the jamo with special characters
-# when they occur in non-canonical positions.
-#
-# 4. When initial jamo occur in a non-canonical position, they are
-# marked with a leading '['.
-#
-# 5. When final jamo occur in a non-canonical position, they are marked
-# with a trailing ']'.
-#
-# 6. When medial jamo occur in a non-canonical position, they are marked
-# with a leading '~'.
-#
-# 7. Compound jamo characters are handled by enclosing them in
-# parentheses.  Initials are '((x)', medials are '(x)', and finals are
-# '(x))'.
-#
-# 8. Disambiguation of 'g' + 'g' vs. 'gg' is accomplished by inserting a
-# '' character between them.
-#
-# 9. IEUNG is used to mark medials not occuring after initials.
-# Isolated IEUNG is transliterated as a back tick.
-#
-# 10. Some old special case and completeness rules have been commented
-# out.  These can be reintroduced (and the existing rules modified as
-# needed) so long as round-trip integrity is maintained.
+  $Gf = \u11A8;
+  $GGf = \u11A9;
+  $GS = \u11AA;
+  $Nf = \u11AB;
+  $NJ = \u11AC;
+  $NH = \u11AD;
+  $Df = \u11AE;
+  $L = \u11AF;
+  $LG = \u11B0;
+  $LM = \u11B1;
+  $LB = \u11B2;
+  $LS = \u11B3;
+  $LT = \u11B4;
+  $LP = \u11B5;
+  $LH = \u11B6;
+  $Mf = \u11B7;
+  $Bf = \u11B8;
+  $BS = \u11B9;
+  $Sf = \u11BA;
+  $SSf = \u11BB;
+  $NG = \u11BC;
+  $Jf = \u11BD;
+  $Cf = \u11BE;
+  $Kf = \u11BF;
+  $Tf = \u11C0;
+  $Pf = \u11C1;
+  $Hf = \u11C2;
+  
+  $jamoInitial = [\u1100-\u1112];
 
-# We use the uppercase latin letters for the compatibility Jamo
-# U+3130 - U+318F.  The following rules are generated
-# programmatically by a perl script that analyzes the Unicode
-# database.  These rules are much simpler because there are no
-# separate code points for initial vs. final consonants, so no
-# contextual rules are needed.  The one wrinkle is, as usual, the
-# need to distinguish doubles from two singles, that is, GG vs G G.
-# The perl script finds these special cases by exhaustive search and
-# adds only the minimal rules needed to resolve these cases.  The one
-# modification that is made by hand is to replace '' with '/' so as
-# not to conflict with the normal IEUNG in the standard Jamo range. -
-# liu
-A '' <> {ㅏ} [ㅓㅡㅔ];
-B '' <> {ㅂ} [ㅂㅃ];
-D '' <> {ㄷ} [ㄷㄸ];
-E '' <> {ㅔ} [ㅚㅗㅜ];
-G '' <> {ㄱ} [ㄲㄳㄱㅆㅅ];
-J '' <> {ㅈ} [ㅉㅈ];
-L '' <> {ㄹ} [ㄲㄳㄱㅁㅂㅃㅆㅅㅌㅍ];
-N '' <> {ㄴ} [ㅉㅈㅎ];
-O '' <> {ㅗ} [ㅓㅡㅔ];
-S '' <> {ㅅ} [ㅆㅅ];
-WA '' <> {ㅘ} [ㅓㅡㅔ];
-WE '' <> {ㅞ} [ㅚㅗ];
-YA '' <> {ㅑ} [ㅓㅡㅔ];
-YE '' <> {ㅖ} [ㅚㅗ];
-YU <> ㅠ;
-YO <> ㅛ;
-YI <> ㅢ;
-YEO <> ㅕ;
-YE <> ㅖ;
-YAE <> ㅒ;
-YA <> ㅑ;
-WI <> ㅟ;
-WEO <> ㅝ;
-WE <> ㅞ;
-WAE <> ㅙ;
-WA <> ㅘ;
-U <> ㅜ;
-T <> ㅌ;
-S S <> ㅆ;
-S <> ㅅ;
-P <> ㅍ;
-OE <> ㅚ;
-O <> ㅗ;
-N J <> ㄵ;
-N H <> ㄶ;
-N <> ㄴ;
-M <> ㅁ;
-L T <> ㄾ;
-L S <> ㄽ;
-L P <> ㄿ;
-L M <> ㄻ;
-L G <> ㄺ;
-L B <> ㄼ;
-L <> ㄹ;
-K <> ㅋ;
-J J <> ㅉ;
-J <> ㅈ;
-I <> ㅣ;
-H <> ㅎ;
-G S <> ㄳ;
-G G <> ㄲ;
-G <> ㄱ;
-EU <> ㅡ;
-EO <> ㅓ;
-E <> ㅔ;
-D D <> ㄸ;
-D <> ㄷ;
-C <> ㅊ;
-B B <> ㅃ;
-B <> ㅂ;
-AE <> ㅐ;
-A <> ㅏ;
-'/' <> ㅇ;
-'(' YU YEO ')' <> ㆊ;
-'(' YU YE ')' <> ㆋ;
-'(' YU I ')' <> ㆌ;
-'(' YR ')' <> ㆆ;
-'(' YO YAE ')' <> ㆈ;
-'(' YO YA ')' <> ㆇ;
-'(' YO I ')' <> ㆉ;
-'(' YES S ')' <> ㆂ;
-'(' YES PAN ')' <> ㆃ;
-'(' YES ')' <> ㆁ;
-'(' S N ')' <> ㅻ;
-'(' S J ')' <> ㅾ;
-'(' S G ')' <> ㅺ;
-'(' S D ')' <> ㅼ;
-'(' S B ')' <> ㅽ;
-'(' PAN ')' <> ㅿ;
-'(' P '' ')' <> ㆄ;
-'(' N S ')' <> ㅧ;
-'(' N PAN ')' <> ㅨ;
-'(' N N ')' <> ㅥ;
-'(' N D ')' <> ㅦ;
-'(' M S ')' <> ㅯ;
-'(' M PAN ')' <> ㅰ;
-'(' M B ')' <> ㅮ;
-'(' M '' ')' <> ㅱ;
-'(' L YR ')' <> ㅭ;
-'(' L PAN ')' <> ㅬ;
-'(' L H ')' <> ㅀ;
-'(' L G S ')' <> ㅩ;
-'(' L D ')' <> ㅪ;
-'(' L B S ')' <> ㅫ;
-'(' HJF ')' <> ㅤ;
-'(' H H ')' <> ㆅ;
-'(' B T ')' <> ㅷ;
-'(' B S G ')' <> ㅴ;
-'(' B S D ')' <> ㅵ;
-'(' B S ')' <> ㅄ;
-'(' B J ')' <> ㅶ;
-'(' B G ')' <> ㅲ;
-'(' B D ')' <> ㅳ;
-'(' B B '' ')' <> ㅹ;
-'(' B '' ')' <> ㅸ;
-'(' AR I ')' <> ㆎ;
-'(' AR ')' <> ㆍ;
-'(' '' '' ')' <> ㆀ;
+  $jamoMedial = [\u1161-\u1175];
 
-# APOSTROPHE
+  # Any character in the latin transliteration of a medial
+  $latinMedial = [aeiouwy];
 
-# As always, an apostrophe is used to separate digraphs into
-# singles. That is, if you really wanted [KAN][GGAN], instead
-# of [KANG][GAN] you would write "kan'ggan".
+  # The last character of the latin transliteration of a medial
+  $latinMedialEnd = [aeiou];
 
-# Rules for inserting ' when mapping separated digraphs back
-# from Hangul to Latin. Catch every letter that can be the
-# LAST of a digraph (or multigraph) AND first of an initial
+#----------------------------------------------------------------------
+# Jamo-Latin
 
-# special insertion for funny sequences of vowels, and for empty consonant
+# Jamo to latin is very simple, since it is the latin that is
+# ambiguous.  Most rules are straightforward, and we encode them below
+# as simple add-on back rule, e.g.:
 
-#  + "'' < l{ }ᇀ;"      // hangul jongseong thieuth
-#  + "'' < $lsgb_{}ᆺ;" // hangul jongseong sios
-#  + "'' < l{ }ᇁ;"      // hangul jongseong phieuph
-#  + "'' < l{ }ᆷ;"      // hangul jongseong mieum
-#  + "'' < n{ }ᆽ;"      // hangul jongseong cieuc
-#  + "'' < $nl_{}ᇂ;"   // hangul jongseong hieuh
-#  + "'' < $gnl_{}ᆩ;"  // hangul jongseong ssangkiyeok
-#  + "'' < $bl_{}ᆸ;"   // hangul jongseong pieup
-#  + "'' < d{ }ᆮ;"      // hangul jongseong tikeut
-#
-#  + "'' < $ye_{}ᅮ;"   // hangul jungseong u
-#  + "'' < $ywe_{}ᅩ;"  // hangul jungseong o
-#  + "'' < $yw_{}ᅵ;"   // hangul jungseong i
-#  + "'' < $ywao_{}ᅦ;" // hangul jungseong e
-#  + "'' < $yw_{}ᅡ;"   // hangul jungseong a
-#
-#  + "'' < l{ }ᄐ;"      // hangul choseong thieuth
-#  + "'' < $lsgb_{}ᄊ;" // hangul choseong ssangsios
-#  + "'' < $lsgb_{}ᄉ;" // hangul choseong sios
-#  + "'' < l{ }ᄑ;"      // hangul choseong phieuph
-#  + "'' < l{ }ᄆ;"      // hangul choseong mieum
-#  + "'' < n{ }ᄌ;"      // hangul choseong cieuc
-#  + "'' < n{ }ᄍ;"
-#  + "'' < $nl_{}ᄒ;"   // hangul choseong hieuh
-#  + "'' < $gnl_{}ᄁ;"  // hangul choseong ssangkiyeok
-#  + "'' < $gnl_{}ᄀ;"  // hangul choseong kiyeok
-#  + "'' < d{ }ᄃ;"      // hangul choseong tikeut
-#  + "'' < d{ }ᄄ;"
-#  + "'' < $bl_{}ᄇ;"   // hangul choseong pieup
-#  + "'' < $bl_{}ᄈ;"
+#   $jamoMedial {bs} > $BS;
 
-# We transliterate the compound Jamo code points using ((x) for
-# initials, (x) for medials, and (x)) for finals. - liu
- '((' n g ')' <> ᄓ;
- '((' n n ')' <> ᄔ;
- '((' n d ')' <> ᄕ;
- '((' n b ')' <> ᄖ;
- '((' d g ')' <> ᄗ;
- '((' l n ')' <> ᄘ;
- '((' l l ')' <> ᄙ;
- '((' l h ')' <> ᄚ;
- '((' l '' ')' <> ᄛ;
- '((' m b ')' <> ᄜ;
- '((' m '' ')' <> ᄝ;
- '((' b g ')' <> ᄞ;
- '((' b n ')' <> ᄟ;
- '((' b d ')' <> ᄠ;
- '((' b s ')' <> ᄡ;
- '((' b s g ')' <> ᄢ;
- '((' b s d ')' <> ᄣ;
- '((' b s b ')' <> ᄤ;
- '((' b s s ')' <> ᄥ;
- '((' b s j ')' <> ᄦ;
- '((' b j ')' <> ᄧ;
- '((' b c ')' <> ᄨ;
- '((' b t ')' <> ᄩ;
- '((' b p ')' <> ᄪ;
- '((' b '' ')' <> ᄫ;
- '((' b b '' ')' <> ᄬ;
- '((' s g ')' <> ᄭ;
- '((' s n ')' <> ᄮ;
- '((' s d ')' <> ᄯ;
- '((' s l ')' <> ᄰ;
- '((' s m ')' <> ᄱ;
- '((' s b ')' <> ᄲ;
- '((' s b g ')' <> ᄳ;
- '((' s s s ')' <> ᄴ;
- '((' s '' ')' <> ᄵ;
- '((' s j ')' <> ᄶ;
- '((' s c ')' <> ᄷ;
- '((' s k ')' <> ᄸ;
- '((' s t ')' <> ᄹ;
- '((' s p ')' <> ᄺ;
- '((' s h ')' <> ᄻ;
- '((' chs ')' <> ᄼ;
- '((' chs chs ')' <> ᄽ;
- '((' ces ')' <> ᄾ;
- '((' ces ces ')' <> ᄿ;
- '((' pan ')' <> ᅀ;
- '((' '' g ')' <> ᅁ;
- '((' '' d ')' <> ᅂ;
- '((' '' m ')' <> ᅃ;
- '((' '' b ')' <> ᅄ;
- '((' '' s ')' <> ᅅ;
- '((' '' pan ')' <> ᅆ;
- '((' '' '' ')' <> ᅇ;
- '((' '' j ')' <> ᅈ;
- '((' '' c ')' <> ᅉ;
- '((' '' t ')' <> ᅊ;
- '((' '' p ')' <> ᅋ;
- '((' yes ')' <> ᅌ;
- '((' j '' ')' <> ᅍ;
- '((' chc ')' <> ᅎ;
- '((' chc chc ')' <> ᅏ;
- '((' cec ')' <> ᅐ;
- '((' cec cec ')' <> ᅑ;
- '((' c k ')' <> ᅒ;
- '((' c h ')' <> ᅓ;
- '((' cch ')' <> ᅔ;
- '((' ceh ')' <> ᅕ;
- '((' p b ')' <> ᅖ;
- '((' p '' ')' <> ᅗ;
- '((' h h ')' <> ᅘ;
- '((' yr ')' <> ᅙ;
- '((' hcf ')' <> ᅟ;
- '(' ahjf ')' <> ᅠ; # must start with vowel, hence 'a' + hjf
- '(' a o ')' <> ᅶ;
- '(' a u ')' <> ᅷ;
- '(' ya o ')' <> ᅸ;
- '(' ya yo ')' <> ᅹ;
- '(' eo o ')' <> ᅺ;
- '(' eo u ')' <> ᅻ;
- '(' eo eu ')' <> ᅼ;
- '(' yeo o ')' <> ᅽ;
- '(' yeo u ')' <> ᅾ;
- '(' o eo ')' <> ᅿ;
- '(' o e ')' <> ᆀ;
- '(' o ye ')' <> ᆁ;
- '(' o o ')' <> ᆂ;
- '(' o u ')' <> ᆃ;
- '(' yo ya ')' <> ᆄ;
- '(' yo yae ')' <> ᆅ;
- '(' yo yeo ')' <> ᆆ;
- '(' yo o ')' <> ᆇ;
- '(' yo i ')' <> ᆈ;
- '(' u a ')' <> ᆉ;
- '(' u ae ')' <> ᆊ;
- '(' u eo eu ')' <> ᆋ;
- '(' u ye ')' <> ᆌ;
- '(' u u ')' <> ᆍ;
- '(' yu a ')' <> ᆎ;
- '(' yu eo ')' <> ᆏ;
- '(' yu e ')' <> ᆐ;
- '(' yu yeo ')' <> ᆑ;
- '(' yu ye ')' <> ᆒ;
- '(' yu u ')' <> ᆓ;
- '(' yu i ')' <> ᆔ;
- '(' eu u ')' <> ᆕ;
- '(' eu eu ')' <> ᆖ;
- '(' yi u ')' <> ᆗ;
- '(' i a ')' <> ᆘ;
- '(' i ya ')' <> ᆙ;
- '(' i o ')' <> ᆚ;
- '(' i u ')' <> ᆛ;
- '(' i eu ')' <> ᆜ;
- '(' i ar ')' <> ᆝ;
- '(' ar ')' <> ᆞ;
- '(' ar eo ')' <> ᆟ;
- '(' ar u ')' <> ᆠ;
- '(' ar i ')' <> ᆡ;
- '(' ar ar ')' <> ᆢ;
- '(' g l '))' <> ᇃ;
- '(' g s g '))' <> ᇄ;
- '(' n g '))' <> ᇅ;
- '(' n d '))' <> ᇆ;
- '(' n s '))' <> ᇇ;
- '(' n pan '))' <> ᇈ;
- '(' n t '))' <> ᇉ;
- '(' d g '))' <> ᇊ;
- '(' d l '))' <> ᇋ;
- '(' l g s '))' <> ᇌ;
- '(' l n '))' <> ᇍ;
- '(' l d '))' <> ᇎ;
- '(' l d h '))' <> ᇏ;
- '(' l l '))' <> ᇐ;
- '(' l m g '))' <> ᇑ;
- '(' l m s '))' <> ᇒ;
- '(' l b s '))' <> ᇓ;
- '(' l b h '))' <> ᇔ;
- '(' l b ng '))' <> ᇕ;
- '(' l s s '))' <> ᇖ;
- '(' l pan '))' <> ᇗ;
- '(' l k '))' <> ᇘ;
- '(' l yr '))' <> ᇙ;
- '(' m g '))' <> ᇚ;
- '(' m l '))' <> ᇛ;
- '(' m b '))' <> ᇜ;
- '(' m s '))' <> ᇝ;
- '(' m s s '))' <> ᇞ;
- '(' m pan '))' <> ᇟ;
- '(' m c '))' <> ᇠ;
- '(' m h '))' <> ᇡ;
- '(' m ng '))' <> ᇢ;
- '(' b l '))' <> ᇣ;
- '(' b p '))' <> ᇤ;
- '(' b h '))' <> ᇥ;
- '(' b ng '))' <> ᇦ;
- '(' s g '))' <> ᇧ;
- '(' s d '))' <> ᇨ;
- '(' s l '))' <> ᇩ;
- '(' s b '))' <> ᇪ;
- '(' pan '))' <> ᇫ;
- '(' ng g '))' <> ᇬ;
- '(' ng g g '))' <> ᇭ;
- '(' ng ng '))' <> ᇮ;
- '(' ng k '))' <> ᇯ;
- '(' yes '))' <> ᇰ;
- '(' yes s '))' <> ᇱ;
- '(' yes pan '))' <> ᇲ;
- '(' p b '))' <> ᇳ;
- '(' p ng '))' <> ᇴ;
- '(' h n '))' <> ᇵ;
- '(' h l '))' <> ᇶ;
- '(' h m '))' <> ᇷ;
- '(' h b '))' <> ᇸ;
- '(' yr '))' <> ᇹ;
+# becomes
 
+#   $jamoMedial {bs} <> $BS;
 
-# INITIALS
+# Furthermore, we don't care about the ordering for Jamo-Latin because
+# we are going from single characters, so we can very easily piggyback
+# on the Latin-Jamo.
 
-# Added }$vowel post context - liu
-bb}$vowel<>ᄈ } $vowel;
-jj}$vowel<>ᄍ } $vowel;
-dd}$vowel<>ᄄ } $vowel;
-t }$vowel<>ᄐ } $vowel; # hangul choseong thieuth
-ss}$vowel<>ᄊ } $vowel; # hangul choseong ssangsios
-s }$vowel<>ᄉ } $vowel; # hangul choseong sios
-p }$vowel<>ᄑ } $vowel; # hangul choseong phieuph
-n }$vowel<>ᄂ } $vowel; # hangul choseong nieun
-m }$vowel<>ᄆ } $vowel; # hangul choseong mieum
-l }$vowel<>ᄅ } $vowel; # hangul choseong rieul
-k }$vowel<>ᄏ } $vowel; # hangul choseong khieukh
-j }$vowel<>ᄌ } $vowel; # hangul choseong cieuc
-h }$vowel<>ᄒ } $vowel; # hangul choseong hieuh
-gg}$vowel<>ᄁ } $vowel; # hangul choseong ssangkiyeok
-g }$vowel<>ᄀ } $vowel; # hangul choseong kiyeok
-d }$vowel<>ᄃ } $vowel; # hangul choseong tikeut
-c }$vowel<>ᄎ } $vowel; # hangul choseong chieuch
-b }$vowel<>ᄇ } $vowel; # hangul choseong pieup
+# Apostrophe insertion.  We insert apostrophes only for triple
+# consonants; double consonants between vowels are always split so
+# that "axye" yields A Xf Yi E if possible.  For three (or more)
+# consonants "axyz" we insert an apostrophe between "x" and "y" if
+# XYf, Xf, and YZi all exist, and we have A Xf YZi.  This prevents the
+# reverse transliteration to A XYf.
 
-# Take care of initial-compound medial - '(' $vowel - liu
-bb} '(' $vowel <> ᄈ } $comp_med;
-jj} '(' $vowel <> ᄍ } $comp_med;
-dd} '(' $vowel <> ᄄ } $comp_med;
-t } '(' $vowel <> ᄐ } $comp_med; # hangul choseong thieuth
-ss} '(' $vowel <> ᄊ } $comp_med; # hangul choseong ssangsios
-s } '(' $vowel <> ᄉ } $comp_med; # hangul choseong sios
-p } '(' $vowel <> ᄑ } $comp_med; # hangul choseong phieuph
-n } '(' $vowel <> ᄂ } $comp_med; # hangul choseong nieun
-m } '(' $vowel <> ᄆ } $comp_med; # hangul choseong mieum
-l } '(' $vowel <> ᄅ } $comp_med; # hangul choseong rieul
-k } '(' $vowel <> ᄏ } $comp_med; # hangul choseong khieukh
-j } '(' $vowel <> ᄌ } $comp_med; # hangul choseong cieuc
-h } '(' $vowel <> ᄒ } $comp_med; # hangul choseong hieuh
-gg} '(' $vowel <> ᄁ } $comp_med; # hangul choseong ssangkiyeok
-g } '(' $vowel <> ᄀ } $comp_med; # hangul choseong kiyeok
-d } '(' $vowel <> ᄃ } $comp_med; # hangul choseong tikeut
-c } '(' $vowel <> ᄎ } $comp_med; # hangul choseong chieuch
-b } '(' $vowel <> ᄇ } $comp_med; # hangul choseong pieup
+# For vowels the rule is similar.  (We shouldn't really see long
+# strings of medials, but if we do, we need to disambiguate them.)  If
+# there is a vowel "ae" such that "a" by itself and "e" by itself are
+# vowels, then the rule "'' < a {} [ E ];" must be used to introduce
+# and apostrophe between single "a" and single "e".  For vowels of the
+# form "aei", in theory both "ae" + "i" and "a" + "ei" must be tested,
+# but in practice only the former occurs.
 
-# Mark non-canonical initials with '[' - liu
-'[' bb <> ᄈ;
-'[' jj <> ᄍ;
-'[' dd <> ᄄ;
-'[' t  <> ᄐ; # hangul choseong thieuth
-'[' ss <> ᄊ; # hangul choseong ssangsios
-'[' s  <> ᄉ; # hangul choseong sios
-'[' p  <> ᄑ; # hangul choseong phieuph
-'[' n  <> ᄂ; # hangul choseong nieun
-'[' m  <> ᄆ; # hangul choseong mieum
-'[' l  <> ᄅ; # hangul choseong rieul
-'[' k  <> ᄏ; # hangul choseong khieukh
-'[' j  <> ᄌ; # hangul choseong cieuc
-'[' h  <> ᄒ; # hangul choseong hieuh
-'[' gg <> ᄁ; # hangul choseong ssangkiyeok
-'[' g  <> ᄀ; # hangul choseong kiyeok
-'[' d  <> ᄃ; # hangul choseong tikeut
-'[' c  <> ᄎ; # hangul choseong chieuch
-'[' b  <> ᄇ; # hangul choseong pieup
+# These rules are generated programmatically.  These rules must occur
+# before all other Jamo-Latin rules.
 
+  '' < $latinMedialEnd b {} [$SSi];
+  '' < $latinMedialEnd g {} [$GGi $SSi];
+  '' < $latinMedialEnd l {} [$BB $GGi $SSi];
+  '' < $latinMedialEnd n {} [$GGi $JJ];
+  '' < $latinMedialEnd s {} [$SSi];
+  '' < e {} [$O $U];
+  '' < [o a] {} $E;
 
-# If we have gotten through to these rules, and we start with
-# a consonant, then the remaining mappings would be to F,
-# because must have CC (or C<non-letter>), not CV.
-# If we have F before us, then
-# we would end up with FF, which is wrong. The simplest fix is
-# to still make it an initial, but also insert an "u",
-# so we end up with F, I, u, and then continue with the C
+# The other complication is handling of IEUNG, which we do below,
+# together with the deletion of apostrophes.
 
-# special, only initial
-#  + "bb > 뿌;"  // bb u hangul choseong ssangpieup
-#  + "jj > 쭈;"  // jj u hangul choseong ssangcieuc
-#  + "dd > 뚜;"  // dd u hangul choseong ssangtikeut
+#----------------------------------------------------------------------
+# Latin-Jamo
 
-#  + "$final{ t > 투;"   // hangul choseong thieuth
-#  + "$final{ ss> 쑤;"   // hangul choseong ssangsios
-#  + "$final{ s > 수;"   // hangul choseong sios
-#  + "$final{ p > 푸;"   // hangul choseong phieuph
-#  + "$final{ n > 누;"   // hangul choseong nieun
-#  + "$final{ m > 무;"   // hangul choseong mieum
-#  + "$final{ l > 루;"   // hangul choseong rieul
-#  + "$final{ k > 쿠;"   // hangul choseong khieukh
-#  + "$final{ j > 주;"   // hangul choseong cieuc
-#  + "$final{ h > 후;"   // hangul choseong hieuh
-#  + "$final{ gg> 꾸;"   // hangul choseong ssangkiyeok
-#  + "$final{ g > 구;"   // hangul choseong kiyeok
-#  + "$final{ d > 두;"   // hangul choseong tikeut
-#  + "$final{ c > 추;"   // hangul choseong chieuch
-#  + "$final{ b > 부;"   // hangul choseong pieup
+# [Basic, context-free Jamo-Latin rules are embedded here too.  See
+# above.]
 
-# MEDIALS after INITIALS
+# Split digraphs: Text of the form 'axye', where 'xy' is a final
+# digraph, 'x' is a final (by itself), 'y' is an initial, and 'a' and
+# 'e' are medials, we want to transliterate this as A Xf Yi E rather
+# than A XYf IEUNG E.  These rules are generated programmatically from
+# the jamo data.
 
-# MEDIALS (vowels) not after INITIALs
-# Added left $initial context - liu
-$initial{ yu <> $INITIAL{ ᅲ; # hangul jungseong yu
-$initial{ yo <> $INITIAL{ ᅭ; # hangul jungseong yo
-$initial{ yi <> $INITIAL{ ᅴ; # hangul jungseong yi
-$initial{ yeo<> $INITIAL{ ᅧ; # hangul jungseong yeo
-$initial{ ye <> $INITIAL{ ᅨ; # hangul jungseong ye
-$initial{ yae<> $INITIAL{ ᅤ; # hangul jungseong yae
-$initial{ ya <> $INITIAL{ ᅣ; # hangul jungseong ya
-$initial{ wi <> $INITIAL{ ᅱ; # hangul jungseong wi
-$initial{ weo<> $INITIAL{ ᅯ; # hangul jungseong weo
-$initial{ we <> $INITIAL{ ᅰ; # hangul jungseong we
-$initial{ wae<> $INITIAL{ ᅫ; # hangul jungseong wae
-$initial{ wa <> $INITIAL{ ᅪ; # hangul jungseong wa
-$initial{ u  <> $INITIAL{ ᅮ; # hangul jungseong u
-$initial{ oe <> $INITIAL{ ᅬ; # hangul jungseong oe
-$initial{ o  <> $INITIAL{ ᅩ; # hangul jungseong o
-$initial{ i  <> $INITIAL{ ᅵ; # hangul jungseong i
-$initial{ eu <> $INITIAL{ ᅳ; # hangul jungseong eu
-$initial{ eo <> $INITIAL{ ᅥ; # hangul jungseong eo
-$initial{ e  <> $INITIAL{ ᅦ; # hangul jungseong e
-$initial{ ae <> $INITIAL{ ᅢ; # hangul jungseong ae
-$initial{ a  <> $INITIAL{ ᅡ; # hangul jungseong a
+  $jamoMedial {b s} $latinMedial > $Bf $Si;
+  $jamoMedial {g g} $latinMedial > $Gf $Gi;
+  $jamoMedial {g s} $latinMedial > $Gf $Si;
+  $jamoMedial {l b} $latinMedial > $L $Bi;
+  $jamoMedial {l g} $latinMedial > $L $Gi;
+  $jamoMedial {l h} $latinMedial > $L $Hi;
+  $jamoMedial {l m} $latinMedial > $L $Mi;
+  $jamoMedial {l p} $latinMedial > $L $Pi;
+  $jamoMedial {l s} $latinMedial > $L $Si;
+  $jamoMedial {l t} $latinMedial > $L $Ti;
+  $jamoMedial {n g} $latinMedial > $Nf $Gi;
+  $jamoMedial {n h} $latinMedial > $Nf $Hi;
+  $jamoMedial {n j} $latinMedial > $Nf $Ji;
+  $jamoMedial {s s} $latinMedial > $Sf $Si;
 
-# Handle non-canonical isolated jungseong - liu
-'~'yu <> ᅲ; # hangul jungseong yu
-'~'yo <> ᅭ; # hangul jungseong yo
-'~'yi <> ᅴ; # hangul jungseong yi
-'~'yeo<> ᅧ; # hangul jungseong yeo
-'~'ye <> ᅨ; # hangul jungseong ye
-'~'yae<> ᅤ; # hangul jungseong yae
-'~'ya <> ᅣ; # hangul jungseong ya
-'~'wi <> ᅱ; # hangul jungseong wi
-'~'weo<> ᅯ; # hangul jungseong weo
-'~'we <> ᅰ; # hangul jungseong we
-'~'wae<> ᅫ; # hangul jungseong wae
-'~'wa <> ᅪ; # hangul jungseong wa
-'~'u  <> ᅮ; # hangul jungseong u
-'~'oe <> ᅬ; # hangul jungseong oe
-'~'o  <> ᅩ; # hangul jungseong o
-'~'i  <> ᅵ; # hangul jungseong i
-'~'eu <> ᅳ; # hangul jungseong eu
-'~'eo <> ᅥ; # hangul jungseong eo
-'~'e  <> ᅦ; # hangul jungseong e
-'~'ae <> ᅢ; # hangul jungseong ae
-'~'a  <> ᅡ; # hangul jungseong a
+# Single consonants are initials: Text of the form 'axe', where 'x'
+# can be an initial or a final, and 'a' and 'e' are medials, we want
+# to transliterate as A Xi E rather than A Xf IEUNG E.
 
-# MEDIALS (vowels) not after INITIALs
-# Changed from > to <> - liu
-yu <> ᄋ ᅲ; # hangul jungseong yu
-yo <> ᄋ ᅭ; # hangul jungseong yo
-yi <> ᄋ ᅴ; # hangul jungseong yi
-yeo<> ᄋ ᅧ; # hangul jungseong yeo
-ye <> ᄋ ᅨ; # hangul jungseong ye
-yae<> ᄋ ᅤ; # hangul jungseong yae
-ya <> ᄋ ᅣ; # hangul jungseong ya
-wi <> ᄋ ᅱ; # hangul jungseong wi
-weo<> ᄋ ᅯ; # hangul jungseong weo
-we <> ᄋ ᅰ; # hangul jungseong we
-wae<> ᄋ ᅫ; # hangul jungseong wae
-wa <> ᄋ ᅪ; # hangul jungseong wa
-u  <> ᄋ ᅮ; # hangul jungseong u
-oe <> ᄋ ᅬ; # hangul jungseong oe
-o  <> ᄋ ᅩ; # hangul jungseong o
-i  <> ᄋ ᅵ; # hangul jungseong i
-eu <> ᄋ ᅳ; # hangul jungseong eu
-eo <> ᄋ ᅥ; # hangul jungseong eo
-e  <> ᄋ ᅦ; # hangul jungseong e
-ae <> ᄋ ᅢ; # hangul jungseong ae
-a  <> ᄋ ᅡ; # hangul jungseong a
+  $jamoMedial {b} $latinMedial > $Bi;
+  $jamoMedial {c} $latinMedial > $Ci;
+  $jamoMedial {d} $latinMedial > $Di;
+  $jamoMedial {g} $latinMedial > $Gi;
+  $jamoMedial {h} $latinMedial > $Hi;
+  $jamoMedial {j} $latinMedial > $Ji;
+  $jamoMedial {k} $latinMedial > $Ki;
+  $jamoMedial {m} $latinMedial > $Mi;
+  $jamoMedial {n} $latinMedial > $Ni;
+  $jamoMedial {p} $latinMedial > $Pi;
+  $jamoMedial {s} $latinMedial > $Si;
+  $jamoMedial {t} $latinMedial > $Ti;
 
-\` <> ᄋ;
-# Moved down so as not to mask above rules - liu
-# + "'' < $consonant{ᄋ;" // insert a break between any consonant and the empty consonant.
-#  + "$medial{}$vowel<>ᄋ;"  // HANGUL CHOSEONG IEUNG
+# Finals: Attach consonant with preceding medial to preceding medial.
+# Do this BEFORE mapping consonants to initials.  Longer keys must
+# precede shorter keys that they start with, e.g., the rule for 'bs'
+# must precede 'b'.
 
+# [BASIC Jamo-Latin FINALS handled here.  Order irrelevant within this
+# block for Jamo-Latin.]
 
-# FINALS
+  $jamoMedial {bs} <> $BS;
+  $jamoMedial {b} <> $Bf;
+  $jamoMedial {c} <> $Cf;
+  $jamoMedial {d} <> $Df;
+  $jamoMedial {gg} <> $GGf;
+  $jamoMedial {gs} <> $GS;
+  $jamoMedial {g} <> $Gf;
+  $jamoMedial {h} <> $Hf;
+  $jamoMedial {j} <> $Jf;
+  $jamoMedial {k} <> $Kf;
+  $jamoMedial {lb} <> $LB;
+  $jamoMedial {lg} <> $LG;
+  $jamoMedial {lh} <> $LH;
+  $jamoMedial {lm} <> $LM;
+  $jamoMedial {lp} <> $LP;
+  $jamoMedial {ls} <> $LS;
+  $jamoMedial {lt} <> $LT;
+  $jamoMedial {l} <> $L;
+  $jamoMedial {m} <> $Mf;
+  $jamoMedial {ng} <> $NG;
+  $jamoMedial {nh} <> $NH;
+  $jamoMedial {nj} <> $NJ;
+  $jamoMedial {n} <> $Nf;
+  $jamoMedial {p} <> $Pf;
+  $jamoMedial {ss} <> $SSf;
+  $jamoMedial {s} <> $Sf;
+  $jamoMedial {t} <> $Tf;
 
- '' t  <> $consonant { ᇀ; # hangul jongseong thieuth
- '' ss <> $consonant { ᆻ; # hangul jongseong ssangsios
- '' s  <> $consonant { ᆺ; # hangul jongseong sios
- '' p  <> $consonant { ᇁ; # hangul jongseong phieuph
- '' nj <> $consonant { ᆬ; # hangul jongseong nieun-cieuc
- '' nh <> $consonant { ᆭ; # hangul jongseong nieun-hieuh
- '' ng <> $consonant { ᆼ; # hangul jongseong ieung
- '' n  <> $consonant { ᆫ; # hangul jongseong nieun
- '' m  <> $consonant { ᆷ; # hangul jongseong mieum
- '' lt <> $consonant { ᆴ; # hangul jongseong rieul-thieuth
- '' ls <> $consonant { ᆳ; # hangul jongseong rieul-sios
- '' lp <> $consonant { ᆵ; # hangul jongseong rieul-phieuph
- '' lm <> $consonant { ᆱ; # hangul jongseong rieul-mieum
- '' lh <> $consonant { ᆶ; # hangul jongseong rieul-hieuh
- '' lg <> $consonant { ᆰ; # hangul jongseong rieul-kiyeok
- '' lb <> $consonant { ᆲ; # hangul jongseong rieul-pieup
- '' l  <> $consonant { ᆯ; # hangul jongseong rieul
- '' k  <> $consonant { ᆿ; # hangul jongseong khieukh
- '' j  <> $consonant { ᆽ; # hangul jongseong cieuc
- '' h  <> $consonant { ᇂ; # hangul jongseong hieuh
- '' gs <> $consonant { ᆪ; # hangul jongseong kiyeok-sios
- '' gg <> $consonant { ᆩ; # hangul jongseong ssangkiyeok
- '' g  <> $consonant { ᆨ; # hangul jongseong kiyeok
- '' d  <> $consonant { ᆮ; # hangul jongseong tikeut
- '' c  <> $consonant { ᆾ; # hangul jongseong chieuch
- '' bs <> $consonant { ᆹ; # hangul jongseong pieup-sios
- '' b  <> $consonant { ᆸ; # hangul jongseong pieup
+# Initials: Attach single consonant to following medial.  Do this
+# AFTER mapping finals.  Longer keys must precede shorter keys that
+# they start with, e.g., the rule for 'gg' must precede 'g'.
 
-t  ']'> ᇀ; # hangul jongseong thieuth
-ss ']'> ᆻ; # hangul jongseong ssangsios
-s  ']'> ᆺ; # hangul jongseong sios
-p  ']'> ᇁ; # hangul jongseong phieuph
-nj ']'> ᆬ; # hangul jongseong nieun-cieuc
-nh ']'> ᆭ; # hangul jongseong nieun-hieuh
-ng ']'> ᆼ; # hangul jongseong ieung
-n  ']'> ᆫ; # hangul jongseong nieun
-m  ']'> ᆷ; # hangul jongseong mieum
-lt ']'> ᆴ; # hangul jongseong rieul-thieuth
-ls ']'> ᆳ; # hangul jongseong rieul-sios
-lp ']'> ᆵ; # hangul jongseong rieul-phieuph
-lm ']'> ᆱ; # hangul jongseong rieul-mieum
-lh ']'> ᆶ; # hangul jongseong rieul-hieuh
-lg ']'> ᆰ; # hangul jongseong rieul-kiyeok
-lb ']'> ᆲ; # hangul jongseong rieul-pieup
-l  ']'> ᆯ; # hangul jongseong rieul
-k  ']'> ᆿ; # hangul jongseong khieukh
-j  ']'> ᆽ; # hangul jongseong cieuc
-h  ']'> ᇂ; # hangul jongseong hieuh
-gs ']'> ᆪ; # hangul jongseong kiyeok-sios
-gg ']'> ᆩ; # hangul jongseong ssangkiyeok
-g  ']'> ᆨ; # hangul jongseong kiyeok
-d  ']'> ᆮ; # hangul jongseong tikeut
-c  ']'> ᆾ; # hangul jongseong chieuch
-bs ']'> ᆹ; # hangul jongseong pieup-sios
-b  ']'> ᆸ; # hangul jongseong pieup
+# [BASIC Jamo-Latin INITIALS handled here.  Order irrelevant within
+# this block for Jamo-Latin.]
 
-$medial{ t  <> $MEDIAL{ ᇀ; # hangul jongseong thieuth
-$medial{ ss <> $MEDIAL{ ᆻ; # hangul jongseong ssangsios
-$medial{ s  <> $MEDIAL{ ᆺ; # hangul jongseong sios
-$medial{ p  <> $MEDIAL{ ᇁ; # hangul jongseong phieuph
-$medial{ nj <> $MEDIAL{ ᆬ; # hangul jongseong nieun-cieuc
-$medial{ nh <> $MEDIAL{ ᆭ; # hangul jongseong nieun-hieuh
-$medial{ ng <> $MEDIAL{ ᆼ; # hangul jongseong ieung
-$medial{ n  <> $MEDIAL{ ᆫ; # hangul jongseong nieun
-$medial{ m  <> $MEDIAL{ ᆷ; # hangul jongseong mieum
-$medial{ lt <> $MEDIAL{ ᆴ; # hangul jongseong rieul-thieuth
-$medial{ ls <> $MEDIAL{ ᆳ; # hangul jongseong rieul-sios
-$medial{ lp <> $MEDIAL{ ᆵ; # hangul jongseong rieul-phieuph
-$medial{ lm <> $MEDIAL{ ᆱ; # hangul jongseong rieul-mieum
-$medial{ lh <> $MEDIAL{ ᆶ; # hangul jongseong rieul-hieuh
-$medial{ lg <> $MEDIAL{ ᆰ; # hangul jongseong rieul-kiyeok
-$medial{ lb <> $MEDIAL{ ᆲ; # hangul jongseong rieul-pieup
-$medial{ l  <> $MEDIAL{ ᆯ; # hangul jongseong rieul
-$medial{ k  <> $MEDIAL{ ᆿ; # hangul jongseong khieukh
-$medial{ j  <> $MEDIAL{ ᆽ; # hangul jongseong cieuc
-$medial{ h  <> $MEDIAL{ ᇂ; # hangul jongseong hieuh
-$medial{ gs <> $MEDIAL{ ᆪ; # hangul jongseong kiyeok-sios
-$medial{ gg <> $MEDIAL{ ᆩ; # hangul jongseong ssangkiyeok
-$medial{ g  <> $MEDIAL{ ᆨ; # hangul jongseong kiyeok
-$medial{ d  <> $MEDIAL{ ᆮ; # hangul jongseong tikeut
-$medial{ c  <> $MEDIAL{ ᆾ; # hangul jongseong chieuch
-$medial{ bs <> $MEDIAL{ ᆹ; # hangul jongseong pieup-sios
-$medial{ b  <> $MEDIAL{ ᆸ; # hangul jongseong pieup
+  {gg} $latinMedial <> $GGi;
+  {g} $latinMedial <> $Gi;
+  {n} $latinMedial <> $Ni;
+  {dd} $latinMedial <> $DD;
+  {d} $latinMedial <> $Di;
+  {r} $latinMedial <> $R;
+  {m} $latinMedial <> $Mi;
+  {bb} $latinMedial <> $BB;
+  {b} $latinMedial <> $Bi;
+  {ss} $latinMedial <> $SSi;
+  {s} $latinMedial <> $Si;
+  {jj} $latinMedial <> $JJ;
+  {j} $latinMedial <> $Ji;
+  {c} $latinMedial <> $Ci;
+  {k} $latinMedial <> $Ki;
+  {t} $latinMedial <> $Ti;
+  {p} $latinMedial <> $Pi;
+  {h} $latinMedial <> $Hi;
 
-t  ']'< ᇀ; # hangul jongseong thieuth
-ss ']'< ᆻ; # hangul jongseong ssangsios
-s  ']'< ᆺ; # hangul jongseong sios
-p  ']'< ᇁ; # hangul jongseong phieuph
-nj ']'< ᆬ; # hangul jongseong nieun-cieuc
-nh ']'< ᆭ; # hangul jongseong nieun-hieuh
-ng ']'< ᆼ; # hangul jongseong ieung
-n  ']'< ᆫ; # hangul jongseong nieun
-m  ']'< ᆷ; # hangul jongseong mieum
-lt ']'< ᆴ; # hangul jongseong rieul-thieuth
-ls ']'< ᆳ; # hangul jongseong rieul-sios
-lp ']'< ᆵ; # hangul jongseong rieul-phieuph
-lm ']'< ᆱ; # hangul jongseong rieul-mieum
-lh ']'< ᆶ; # hangul jongseong rieul-hieuh
-lg ']'< ᆰ; # hangul jongseong rieul-kiyeok
-lb ']'< ᆲ; # hangul jongseong rieul-pieup
-l  ']'< ᆯ; # hangul jongseong rieul
-k  ']'< ᆿ; # hangul jongseong khieukh
-j  ']'< ᆽ; # hangul jongseong cieuc
-h  ']'< ᇂ; # hangul jongseong hieuh
-gs ']'< ᆪ; # hangul jongseong kiyeok-sios
-gg ']'< ᆩ; # hangul jongseong ssangkiyeok
-g  ']'< ᆨ; # hangul jongseong kiyeok
-d  ']'< ᆮ; # hangul jongseong tikeut
-c  ']'< ᆾ; # hangul jongseong chieuch
-bs ']'< ᆹ; # hangul jongseong pieup-sios
-b  ']'< ᆸ; # hangul jongseong pieup
+# Initial + Final: If we match the next rule, we have initial then
+# final consonant with no intervening medial.  We insert the null
+# vowel BEFORE it to create a well-formed syllable.  (In the next rule
+# we insert a null vowel AFTER an anomalous initial.)
 
-# extra English letters
+  $jamoInitial {} [bcdghjklmnpst] > $EU;
 
-#  + "z > |s;"
-#  //{ + "Z > |s;" } masked
-#  + "x > |ks;"
-#  + "X > |ks;"
-#  + "v > |b;"
-#  + "V > |b;"
-#  + "r > |l;"
-#  + "R > |l;"
-#  + "q > |k;"
-#  + "Q > |k;"
-#  + "f > |p;"
-#  + "F > |p;"
-#  //{ + "c > |k;" } masked
-#  + "C > |k;"
+# Initial + X: This block matches an initial consonant not followed by
+# a medial.  We insert the null vowel after it.  We handle double
+# initials explicitly here; for single initial consonants we insert EU
+# (as Latin) after them and let standard rules do the rest.
 
-#  + "y > ᅲ;"   // hangul jungseong yu
-#  + "w > ᅱ;"   // hangul jungseong wi
+# BREAKS ROUND TRIP INTEGRITY
+
+  gg > $GGi $EU;
+  dd > $DD $EU;
+  bb > $BB $EU;
+  ss > $SSi $EU;
+  jj > $JJ $EU;
+
+  ([bcdghjkmnprst]) > | $1 eu;
+
+# X + Final: Finally we have to deal with a consonant that can only be
+# interpreted as a final (not an initial) and which is preceded
+# neither by an initial nor a medial.  It is the start of the
+# syllable, but cannot be.  Most of these will already be handled by
+# the above rules.  'bs' splits into Bi EU Sf.  Similar for 'gs' 'ng'
+# 'nh' 'nj'.  The only problem is 'l' and digraphs starting with 'l'.
+# For this isolated case, we could add a null initial and medial,
+# which would give "la" => IEUNG EU L IEUNG A, for example.  A more
+# economical solution is to transliterate isolated "l" (that is,
+# initial "l") to "r".  (Other similar conversions of consonants that
+# occur neither as initials nor as finals are handled below.)
+
+  l > | r;
+
+# Medials.  If a medial is preceded by an initial, then we proceed
+# normally.  As usual, longer keys must precede shorter ones.
+
+# [BASIC Jamo-Latin MEDIALS handled here.  Order irrelevant within
+# this block for Jamo-Latin.]
+
+  $jamoInitial {ae} <> $AE;
+  $jamoInitial {a} <> $A;
+  $jamoInitial {eo} <> $EO;
+  $jamoInitial {eu} <> $EU;
+  $jamoInitial {e} <> $E;
+  $jamoInitial {i} <> $I;
+  $jamoInitial {oe} <> $OE;
+  $jamoInitial {o} <> $O;
+  $jamoInitial {u} <> $U;
+  $jamoInitial {wae} <> $WAE;
+  $jamoInitial {wa} <> $WA;
+  $jamoInitial {weo} <> $WEO;
+  $jamoInitial {we} <> $WE;
+  $jamoInitial {wi} <> $WI;
+  $jamoInitial {yae} <> $YAE;
+  $jamoInitial {ya} <> $YA;
+  $jamoInitial {yeo} <> $YEO;
+  $jamoInitial {ye} <> $YE;
+  $jamoInitial {yi} <> $YI;
+  $jamoInitial {yo} <> $YO;
+  $jamoInitial {yu} <> $YU;
+
+# We may see an anomalous isolated 'w' or 'y'.  In that case, we
+# interpret it as 'wi' and 'yu', respectively.
+
+# BREAKS ROUND TRIP INTEGRITY
+
+  $jamoInitial {w} > | wi;
+  $jamoInitial {y} > | yu;
+
+# Otherwise, insert a null consonant IEUNG before the medial (which is
+# still an untransliterated latin vowel).
+
+  ($latinMedial) > $IEUNG | $1;
+
+# Convert non-jamo latin consonants to equivalents.  These occur as
+# neither initials nor finals in jamo.  'l' occurs as a final, but not
+# an initial; it is handled above.  The following letters (left hand
+# side) will never be output by Jamo-Latin.
+
+  f > | p;
+  q > | k;
+  v > | b;
+  x > | ks;
+  z > | s;
+
+# Delete apostrophes (Latin-Jamo).
+
+  '' > ;
+
+# Delete null consonants (Jamo-Latin).  Do NOT delete null EU vowels,
+# since these may also occur in text.
+
+  < $IEUNG;
 
 # eof