mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
Fixing Break Charts & Tests
X-SVN-Rev: 11428
This commit is contained in:
parent
6946f87968
commit
c4392b5dab
6 changed files with 487 additions and 453 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2003/03/19 23:31:12 $
|
||||
* $Revision: 1.30 $
|
||||
* $Date: 2003/04/01 02:51:57 $
|
||||
* $Revision: 1.31 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -2492,17 +2492,26 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
|
||||
static StringBuffer quoteOperandBuffer = new StringBuffer(); // faster
|
||||
|
||||
static UnicodeSet needsQuoting = null;
|
||||
|
||||
static final String quoteOperand(String s) {
|
||||
if (needsQuoting == null) {
|
||||
/*
|
||||
c >= 'a' && c <= 'z'
|
||||
|| c >= 'A' && c <= 'Z'
|
||||
|| c >= '0' && c <= '9'
|
||||
|| (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c))
|
||||
*/
|
||||
needsQuoting = new UnicodeSet("[a-zA-Z0-9\\u00A0-\\U00010FFF]");
|
||||
needsQuoting.remove();
|
||||
}
|
||||
s = NFC.normalize(s);
|
||||
quoteOperandBuffer.setLength(0);
|
||||
boolean noQuotes = true;
|
||||
boolean inQuote = false;
|
||||
for (int i = 0; i < s.length(); ++i) {
|
||||
char c = s.charAt(i);
|
||||
if (c >= 'a' && c <= 'z'
|
||||
|| c >= 'A' && c <= 'Z'
|
||||
|| c >= '0' && c <= '9'
|
||||
|| (c >= 0xA0 && !UCharacterProperty.isRuleWhiteSpace(c))) {
|
||||
if (!needsQuoting.contains(c)) {
|
||||
if (inQuote) {
|
||||
quoteOperandBuffer.append('\'');
|
||||
inQuote = false;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.19 $
|
||||
* $Date: 2003/04/01 02:52:00 $
|
||||
* $Revision: 1.20 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -501,7 +501,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
public boolean hasValue(int cp) {
|
||||
byte cat = ucdData.getCategory(cp);
|
||||
if (cat == Lu || cat == Ll || cat == Lt || cat == Lm || cat == Lo || cat == Nl
|
||||
|| ucdData.getBinaryProperty(cp, Alphabetic)) return true;
|
||||
|| ucdData.getBinaryProperty(cp, Other_Alphabetic)) return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2003/03/20 21:47:26 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2003/04/01 02:52:00 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -190,7 +190,7 @@ public interface UCD_Types {
|
|||
Math_Property = 10,
|
||||
Hex_Digit = 11,
|
||||
ASCII_Hex_Digit = 12,
|
||||
Alphabetic = 13,
|
||||
Other_Alphabetic = 13,
|
||||
Ideographic = 14,
|
||||
Diacritic = 15,
|
||||
Extender = 16,
|
||||
|
@ -215,7 +215,7 @@ public interface UCD_Types {
|
|||
static final int
|
||||
BidiMirroredMask = 1<<BidiMirrored,
|
||||
CompositionExclusionMask = 1<<CompositionExclusion,
|
||||
AlphabeticMask = 1<<Alphabetic,
|
||||
AlphabeticMask = 1<<Other_Alphabetic,
|
||||
Bidi_ControlMask = 1<<Bidi_Control,
|
||||
DashMask = 1<<Dash,
|
||||
DiacriticMask = 1<<Diacritic,
|
||||
|
@ -466,7 +466,7 @@ public static byte
|
|||
|
||||
// DERIVED PROPERTY
|
||||
|
||||
static final int
|
||||
static final byte
|
||||
PropMath = 0,
|
||||
PropAlphabetic = 1,
|
||||
PropLowercase = 2,
|
||||
|
|
94
tools/unicodetools/com/ibm/text/UCD/UnicodeMap.java
Normal file
94
tools/unicodetools/com/ibm/text/UCD/UnicodeMap.java
Normal file
|
@ -0,0 +1,94 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Attic/UnicodeMap.java,v $
|
||||
* $Date: 2003/04/01 02:53:07 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* Class that maps from codepoints to an index, and optionally a label.
|
||||
*/
|
||||
public class UnicodeMap {
|
||||
UnicodeSet[] sets = new UnicodeSet[50];
|
||||
String[] labels = new String[50];
|
||||
int count = 0;
|
||||
|
||||
public int add(String label, UnicodeSet set) {
|
||||
return add(label, set, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add set
|
||||
*@param removeOld true: remove any collisions from sets already in the map
|
||||
* if false, remove any collisions from this set
|
||||
*@param signal: print a warning when collisions occur
|
||||
*/
|
||||
public int add(String label, UnicodeSet set, boolean removeOld, boolean signal) {
|
||||
// remove from any preceding!!
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (!set.containsSome(sets[i])) continue;
|
||||
if (signal) showOverlap(label, set, i);
|
||||
if (removeOld) {
|
||||
sets[i] = sets[i].removeAll(set);
|
||||
} else {
|
||||
set = set.removeAll(sets[i]);
|
||||
}
|
||||
}
|
||||
sets[count] = set;
|
||||
labels[count++] = label;
|
||||
return (short)(count - 1);
|
||||
}
|
||||
|
||||
public void showOverlap(String label, UnicodeSet set, int i) {
|
||||
UnicodeSet delta = new UnicodeSet(set).retainAll(sets[i]);
|
||||
System.out.println("Warning! Overlap with " + label + " and " + labels[i]
|
||||
+ ": " + delta);
|
||||
}
|
||||
|
||||
public int getIndex(int codepoint) {
|
||||
for (int i = count - 1; i >= 0; --i) {
|
||||
if (sets[i].contains(codepoint)) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public int getIndexFromLabel(String label) {
|
||||
for (int i = count - 1; i >= 0; --i) {
|
||||
if (labels[i].equalsIgnoreCase(label)) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
public String getLabel(int codepoint) {
|
||||
return getLabelFromIndex(getIndex(codepoint));
|
||||
}
|
||||
|
||||
public String getLabelFromIndex(int index) {
|
||||
if (index < 0 || index >= count) return null;
|
||||
return labels[index];
|
||||
}
|
||||
|
||||
public UnicodeSet getSetFromIndex(int index) {
|
||||
if (index < 0 || index >= count) return null;
|
||||
return new UnicodeSet(sets[index]); // protect from changes
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return count;
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2003/03/19 17:30:56 $
|
||||
* $Revision: 1.30 $
|
||||
* $Date: 2003/04/01 02:52:00 $
|
||||
* $Revision: 1.31 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -1071,6 +1071,15 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
|
||||
static PrintWriter showSetNamesPw;
|
||||
|
||||
public static void showSetDifferences(String name1, UnicodeSet set1, String name2, UnicodeSet set2, boolean separateLines, UCD ucd) {
|
||||
UnicodeSet temp = new UnicodeSet(set1).removeAll(set2);
|
||||
showSetNames("In " + name1 + ", but not " + name2, temp, separateLines, false, false, ucd);
|
||||
temp = new UnicodeSet(set2).removeAll(set1);
|
||||
showSetNames("In " + name2 + ", but not " + name1, temp, separateLines, false, false, ucd);
|
||||
temp = new UnicodeSet(set2).retainAll(set1);
|
||||
showSetNames("In " + name1 + " and " + name2, temp, separateLines, false, false, ucd);
|
||||
}
|
||||
|
||||
public static void showSetNames(String prefix, UnicodeSet set, boolean separateLines, UCD ucd) {
|
||||
showSetNames(prefix, set, separateLines, false, false, ucd);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue