From 6a4883a9a3c3cd45bf2b12814ed8f690f439ea0e Mon Sep 17 00:00:00 2001
From: Mark Davis <mark@macchiato.com>
Date: Mon, 12 Apr 2004 01:45:24 +0000
Subject: [PATCH] updated symbol table

X-SVN-Rev: 14929
---
 .../com/ibm/text/UCD/MakeUnicodeFiles.java    | 66 ++++++++++++++++---
 .../com/ibm/text/UCD/UnicodeInvariants.txt    | 60 +++++++++++++++++
 2 files changed, 118 insertions(+), 8 deletions(-)
 create mode 100644 tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt

diff --git a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
index f380a83f591..f175b5e531e 100644
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
@@ -9,6 +9,7 @@ import java.lang.reflect.Field;
 import java.text.ParseException;
 import java.text.ParsePosition;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -61,7 +62,7 @@ public class MakeUnicodeFiles {
     
     public static void main(String[] args) throws IOException {
         //generateFile();
-        testInvariants(ToolUnicodePropertySource.make(Default.ucdVersion()));
+        testInvariants();
     }
 
     static class Format {
@@ -1105,15 +1106,60 @@ public class MakeUnicodeFiles {
         }       
     }
     
-    static Matcher invariantLine = Pattern.compile("([^=><!?])\\s*([=><!?])\\s*([^=><!?])").matcher("");
+    /**
+     * Chain together several SymbolTables. 
+     * @author Davis
+     */
+    static class ChainedSymbolTable implements SymbolTable {
+        // TODO: add accessors?
+        private List symbolTables;
+        /**
+         * Each SymbolTable is each accessed in order by the other methods,
+         * so the first in the list is accessed first, etc.
+         * @param symbolTables
+         */
+        ChainedSymbolTable(SymbolTable[] symbolTables) {
+            this.symbolTables = Arrays.asList(symbolTables);
+        }
+        public char[] lookup(String s) {
+            for (Iterator it = symbolTables.iterator(); it.hasNext();) {
+                SymbolTable st = (SymbolTable) it.next();
+                char[] result = st.lookup(s);
+                if (result != null) return result;
+            }
+            return null;
+        }
+
+        public UnicodeMatcher lookupMatcher(int ch) {
+            for (Iterator it = symbolTables.iterator(); it.hasNext();) {
+                SymbolTable st = (SymbolTable) it.next();
+                UnicodeMatcher result = st.lookupMatcher(ch);
+                if (result != null) return result;
+            }
+            return null;
+        }
+        
+        // Warning: this depends on pos being left alone unless a string is returned!!
+        public String parseReference(String text, ParsePosition pos, int limit) {
+            for (Iterator it = symbolTables.iterator(); it.hasNext();) {
+                SymbolTable st = (SymbolTable) it.next();
+                String result = st.parseReference(text, pos, limit);
+                if (result != null) return result;
+            }
+            return null;
+        }
+    }
     
-    static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[=><!?]");
+    static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
     
-    static void testInvariants(UnicodeProperty.Factory factory) throws IOException {
+    static void testInvariants() throws IOException {
         PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
+        out.write('\uFEFF'); // BOM
         BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
         BagFormatter bf = new BagFormatter();
-        SymbolTable st = factory.getSymbolTable();
+        ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
+            ToolUnicodePropertySource.make("4.0.0").getSymbolTable("\u00D7"),
+            ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
         ParsePosition pp = new ParsePosition(0);
         int parseErrorCount = 0;
         int testFailureCount = 0;
@@ -1122,6 +1168,7 @@ public class MakeUnicodeFiles {
             String leftSide = null;
             String line = in.readLine();
             if (line == null) break;
+            if (line.startsWith("\uFEFF")) line = line.substring(1);
             line = line.trim();
             int pos = line.indexOf('#');
             if (pos >= 0) line = line.substring(0,pos).trim();
@@ -1137,7 +1184,8 @@ public class MakeUnicodeFiles {
                 eatWhitespace(line, pp);
                 relation = line.charAt(pp.getIndex());
                 if (!INVARIANT_RELATIONS.contains(relation)) {
-                    throw new ParseException("Invalid relation", pp.getIndex());
+                    throw new ParseException("Invalid relation, must be one of " + INVARIANT_RELATIONS.toPattern(false),
+                        pp.getIndex());
                 }
                 pp.setIndex(pp.getIndex()+1); // skip char
                 eatWhitespace(line, pp);
@@ -1172,8 +1220,10 @@ public class MakeUnicodeFiles {
             boolean ok = true;
             switch(relation) {
                 case '=': ok = leftSet.equals(rightSet); break;
-                case '>': ok = leftSet.containsAll(rightSet); break;
-                case '<': ok = rightSet.containsAll(leftSet); break;
+                case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
+                case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
+                case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
+                case '\u2265': case '\u2287': ok = leftSet.containsAll(rightSet); break;
                 case '!': ok = leftSet.containsNone(rightSet); break;
                 case '?': ok = !leftSet.equals(rightSet) 
                         && !leftSet.containsAll(rightSet) 
diff --git a/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
new file mode 100644
index 00000000000..bc101f29ef0
--- /dev/null
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
@@ -0,0 +1,60 @@
+﻿# Invariance tests
+# Each line indicates an invariant set relationship to be tested,
+# and is of the form:
+#
+# 	line := set relation set
+#
+#   relation := '='             // has identical contents to
+#            := ('>' | '⊃')    // is proper superset of
+#            := ('≥' | '⊇')    // is superset of 
+#            := ('<' | '⊂')    // is proper subset of
+#            := ('≤' | '⊆')    // is subset of
+#            := '!'             // has no intersection
+#            := '?'             // none of the above (they overlap, and neither contains the other)
+#
+# A set is a standard UnicodeSet, but where $pv can be used to express properties
+#
+# 	pv := '$' '×'? prop (('=' | ':') value)?
+#
+# The × indicates that the property is the previous released version.
+#  That is, if the version is 4.0.1, then the × version is 4.0.0
+# If the value is missing, it is defaulted to true
+# If the value is of the form «...», then the ... is interpreted as a regular expression
+# The property can be the short or long form as in the PropertyAliases.txt
+# The value (if enumerated) can be the short or long form as in PropertyValueAliases.txt
+#
+# A UnicodeSet is a boolean combinations of properties and character ranges, as you would see in
+#  Perl or other regular-expression languages. Examples:
+#	[$General_Category:Unassigned-[a-zA-Z]]
+# For details, see http://oss.software.ibm.com/icu/userguide/unicodeSet.html
+#
+# WARNING: do not use \p{...} or [:...:] syntax, since those will be
+# ICU's current version of properties, not the current snapshot's.
+# Use the $ notation for properties (listed above) instead.
+#
+# When this file is parsed, an error message may contain <@>
+#  to indicate the location of an error in the input line.
+
+# The following not very interesting, but show examples of use
+
+#$GC:Zs ! $GC:Zp
+#$East_Asian_Width:Neutral ? $GC:Uppercase_Letter
+$GC:Zs ? $Name:«.*SPACE.*»
+
+# Examples of parsing errors
+
+# $LBA:Neutral =  $GC:Zp # example of non-existant property
+# $LB:foo =  $GC:Zp # example of non-existant value
+# $GC:Zs @ $GC:Zp # example of unknown relation
+
+# The following should be real invariants
+# For illustration, different alias styles are used
+
+$Line_Break:Unknown = [$General_Category:Unassigned $GeneralCategory:PrivateUse]
+$LB:OP = $GC:Ps
+$General_Category:Decimal_Number = $Numeric_Type:Decimal
+$Whitespace ⊃ [$GC:Zs $GC:Zp $GC:Zl]
+$ID_Start ⊇ $×ID_Start
+$ID_Continue ⊇ $×ID_Continue
+
+