mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
added more conformance tests
X-SVN-Rev: 8928
This commit is contained in:
parent
bdfaac55b4
commit
eb6243eb23
7 changed files with 64 additions and 20 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
|
||||
* $Date: 2002/06/15 02:47:12 $
|
||||
* $Revision: 1.6 $
|
||||
* $Date: 2002/06/22 21:02:16 $
|
||||
* $Revision: 1.7 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -19,8 +19,10 @@ import com.ibm.text.utility.*;
|
|||
public class Main {
|
||||
static final String UCDVersion = "";
|
||||
static final String[] ICU_FILES = {"writeCollationValidityLog", "FractionalUCA",
|
||||
"writeconformance", "writeconformanceshifted",
|
||||
"WriteRules", "WriteRulesWithNames", "WriteRulesXML"};
|
||||
"WriteRules", "WriteRulesWithNames", "WriteRulesXML",
|
||||
"writeconformance", "writeconformanceshifted",
|
||||
"short", "writeconformance", "writeconformanceshifted",
|
||||
};
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2002/06/22 01:21:08 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2002/06/22 21:02:16 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -292,6 +292,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
|
|||
|
||||
|
||||
static void writeConformance(String filename, byte option, boolean shortPrint) throws IOException {
|
||||
Default.setUCD();
|
||||
//UCD ucd30 = UCD.make("3.0.0");
|
||||
|
||||
/*
|
||||
|
@ -405,12 +406,16 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
//log.println(source);
|
||||
char extra = source.charAt(source.length()-1);
|
||||
String clipped = source.substring(0, source.length()-1);
|
||||
if (clipped.charAt(0) == LOW_ACCENT && extra != LOW_ACCENT) {
|
||||
extra = LOW_ACCENT;
|
||||
clipped = source.substring(1);
|
||||
}
|
||||
if (!shortPrint) {
|
||||
log.print(Utility.hex(source));
|
||||
log.print(
|
||||
";\t# " + (extra != LOW_ACCENT ? extra : '.') + " " + ucd.getName(clipped, SHORT) + "\t" + UCA.toString(key));
|
||||
} else {
|
||||
log.print(source + "\t" + Utility.hex(clipped));
|
||||
log.print(Utility.hex(source) + "\t" + Utility.hex(clipped));
|
||||
}
|
||||
log.println();
|
||||
}
|
||||
|
@ -424,17 +429,41 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
|||
addStringX(UTF32.valueOf32(x), option);
|
||||
}
|
||||
|
||||
static final char LOW_ACCENT = '\u0325';
|
||||
static final char LOW_ACCENT = '\u0334';
|
||||
static int addCounter = 0;
|
||||
|
||||
static void addStringX(String s, byte option) {
|
||||
int firstChar = UTF16.charAt(s,0);
|
||||
// add characters with different strengths, to verify the order
|
||||
addStringY(s + 'a', option);
|
||||
addStringY(s + 'A', option);
|
||||
addStringY(s + 'á', option);
|
||||
addStringY(s + 'b', option);
|
||||
addStringY(s + LOW_ACCENT, option);
|
||||
addStringY(s + 'á', option);
|
||||
addStringY(s + 'A', option);
|
||||
addStringY(s + '!', option);
|
||||
if (option == SHIFTED && collator.isVariable(firstChar)) addStringY(s + LOW_ACCENT, option);
|
||||
|
||||
// NOW, if the character decomposes, or is a combining mark (non-zero), try combinations
|
||||
|
||||
if (Default.ucd.getCombiningClass(firstChar) > 0
|
||||
|| !Default.nfd.isNormalized(s) && !Default.ucd.isHangulSyllable(firstChar)) {
|
||||
// if it ends with a non-starter, try the decompositions.
|
||||
String decomp = Default.nfd.normalize(s);
|
||||
if (Default.ucd.getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) {
|
||||
if (canIt == null) canIt = new CanonicalIterator(".");
|
||||
canIt.setSource(s + LOW_ACCENT);
|
||||
int limit = 4;
|
||||
for (String can = canIt.next(); can != null; can = canIt.next()) {
|
||||
if (s.equals(can)) continue;
|
||||
if (--limit < 0) continue; // just include a sampling
|
||||
addStringY(can, option);
|
||||
// System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(can));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static CanonicalIterator canIt = null;
|
||||
|
||||
static char counter;
|
||||
|
||||
static void addStringY(String s, byte option) {
|
||||
|
@ -2162,7 +2191,7 @@ F900..FAFF; CJK Compatibility Ideographs
|
|||
|
||||
Set additionalSet = new HashSet();
|
||||
System.out.println("Loading canonical iterator");
|
||||
CanonicalIterator canIt = new CanonicalIterator(".");
|
||||
if (canIt == null) canIt = new CanonicalIterator(".");
|
||||
Iterator it2 = contentsForCanonicalIteration.iterator();
|
||||
System.out.println("Adding any FCD equivalents that have different sort keys");
|
||||
while (it2.hasNext()) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2002/06/22 01:21:09 $
|
||||
* $Revision: 1.15 $
|
||||
* $Date: 2002/06/22 21:02:16 $
|
||||
* $Revision: 1.16 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -745,8 +745,8 @@ of characters, the first of which has a non-zero combining class.
|
|||
|
||||
public static void test() {
|
||||
Default.setUCD();
|
||||
DerivedProperty dprop = new DerivedProperty(Default.ucd);
|
||||
/*
|
||||
DerivedProperty dprop = new DerivedProperty(Default.ucd);
|
||||
for (int j = 0; j < LIMIT; ++j) {
|
||||
System.out.println();
|
||||
System.out.println(j + "\t" + dprop.getName(j));
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
|
||||
* $Date: 2002/06/13 21:14:05 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2002/06/22 21:02:16 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -226,6 +226,19 @@ public final class Normalizer implements UCD_Types {
|
|||
return !data.normalizationDiffers(ch, composition, compatibility);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility: Checks whether there is a recursive decomposition of a character from the
|
||||
* Unicode Character Database. It is compatibility or canonical according to the particular
|
||||
* normalizer.
|
||||
* @param ch the source character
|
||||
*/
|
||||
public boolean isNormalized(String s) {
|
||||
if (UTF16.countCodePoint(s) > 1) {
|
||||
return !data.normalizationDiffers(UTF16.charAt(s,0), composition, compatibility);
|
||||
}
|
||||
return s.equals(normalize(s)); // TODO: OPTIMIZE LATER
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility: Gets recursive decomposition of a character from the
|
||||
* Unicode Character Database.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<html><body>
|
||||
<h1
|
||||
>1. Mismatches when NFD is OFF</h1><h2
|
||||
>Date:Fri Jun 21 16:56:03 PDT 2002</h2><h2
|
||||
>Date:Sat Jun 22 13:56:49 PDT 2002</h2><h2
|
||||
>File Version:-3.1.1d1</h2><p
|
||||
>Alternate Handling = NON_IGNORABLE</p><table border="1"
|
||||
><caption
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Fractional UCA Table, generated from standard UCA
|
||||
# M. Davis, Fri Jun 21 16:56:12 PDT 2002
|
||||
# M. Davis, Sat Jun 22 13:56:57 PDT 2002
|
||||
# VERSION: UCA=3.1.1d1, UCD=3.2.0
|
||||
|
||||
# Generated processed version, as described in ICU design document.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Fractional UCA Table, generated from standard UCA
|
||||
# M. Davis, Fri Jun 21 16:56:12 PDT 2002
|
||||
# M. Davis, Sat Jun 22 13:56:57 PDT 2002
|
||||
# VERSION: UCA=3.1.1d1, UCD=3.2.0
|
||||
|
||||
# Generated processed version, as described in ICU design document.
|
||||
|
|
Loading…
Add table
Reference in a new issue