added more conformance tests

X-SVN-Rev: 8928
This commit is contained in:
Mark Davis 2002-06-22 21:05:34 +00:00
parent bdfaac55b4
commit eb6243eb23
7 changed files with 64 additions and 20 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
* $Date: 2002/06/15 02:47:12 $
* $Revision: 1.6 $
* $Date: 2002/06/22 21:02:16 $
* $Revision: 1.7 $
*
*******************************************************************************
*/
@ -19,8 +19,10 @@ import com.ibm.text.utility.*;
public class Main {
static final String UCDVersion = "";
static final String[] ICU_FILES = {"writeCollationValidityLog", "FractionalUCA",
"writeconformance", "writeconformanceshifted",
"WriteRules", "WriteRulesWithNames", "WriteRulesXML"};
"WriteRules", "WriteRulesWithNames", "WriteRulesXML",
"writeconformance", "writeconformanceshifted",
"short", "writeconformance", "writeconformanceshifted",
};
public static void main(String args[]) throws Exception {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
* $Date: 2002/06/22 01:21:08 $
* $Revision: 1.21 $
* $Date: 2002/06/22 21:02:16 $
* $Revision: 1.22 $
*
*******************************************************************************
*/
@ -292,6 +292,7 @@ public class WriteCollationData implements UCD_Types, UCA_Types {
static void writeConformance(String filename, byte option, boolean shortPrint) throws IOException {
Default.setUCD();
//UCD ucd30 = UCD.make("3.0.0");
/*
@ -405,12 +406,16 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
//log.println(source);
char extra = source.charAt(source.length()-1);
String clipped = source.substring(0, source.length()-1);
if (clipped.charAt(0) == LOW_ACCENT && extra != LOW_ACCENT) {
extra = LOW_ACCENT;
clipped = source.substring(1);
}
if (!shortPrint) {
log.print(Utility.hex(source));
log.print(
";\t# " + (extra != LOW_ACCENT ? extra : '.') + " " + ucd.getName(clipped, SHORT) + "\t" + UCA.toString(key));
} else {
log.print(source + "\t" + Utility.hex(clipped));
log.print(Utility.hex(source) + "\t" + Utility.hex(clipped));
}
log.println();
}
@ -424,17 +429,41 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
addStringX(UTF32.valueOf32(x), option);
}
static final char LOW_ACCENT = '\u0325';
static final char LOW_ACCENT = '\u0334';
static int addCounter = 0;
static void addStringX(String s, byte option) {
int firstChar = UTF16.charAt(s,0);
// add characters with different strengths, to verify the order
addStringY(s + 'a', option);
addStringY(s + 'A', option);
addStringY(s + 'á', option);
addStringY(s + 'b', option);
addStringY(s + LOW_ACCENT, option);
addStringY(s + 'á', option);
addStringY(s + 'A', option);
addStringY(s + '!', option);
if (option == SHIFTED && collator.isVariable(firstChar)) addStringY(s + LOW_ACCENT, option);
// NOW, if the character decomposes, or is a combining mark (non-zero), try combinations
if (Default.ucd.getCombiningClass(firstChar) > 0
|| !Default.nfd.isNormalized(s) && !Default.ucd.isHangulSyllable(firstChar)) {
// if it ends with a non-starter, try the decompositions.
String decomp = Default.nfd.normalize(s);
if (Default.ucd.getCombiningClass(UTF16.charAt(decomp, decomp.length()-1)) > 0) {
if (canIt == null) canIt = new CanonicalIterator(".");
canIt.setSource(s + LOW_ACCENT);
int limit = 4;
for (String can = canIt.next(); can != null; can = canIt.next()) {
if (s.equals(can)) continue;
if (--limit < 0) continue; // just include a sampling
addStringY(can, option);
// System.out.println(addCounter++ + " Adding " + Default.ucd.getCodeAndName(can));
}
}
}
}
static CanonicalIterator canIt = null;
static char counter;
static void addStringY(String s, byte option) {
@ -2162,7 +2191,7 @@ F900..FAFF; CJK Compatibility Ideographs
Set additionalSet = new HashSet();
System.out.println("Loading canonical iterator");
CanonicalIterator canIt = new CanonicalIterator(".");
if (canIt == null) canIt = new CanonicalIterator(".");
Iterator it2 = contentsForCanonicalIteration.iterator();
System.out.println("Adding any FCD equivalents that have different sort keys");
while (it2.hasNext()) {

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
* $Date: 2002/06/22 01:21:09 $
* $Revision: 1.15 $
* $Date: 2002/06/22 21:02:16 $
* $Revision: 1.16 $
*
*******************************************************************************
*/
@ -745,8 +745,8 @@ of characters, the first of which has a non-zero combining class.
public static void test() {
Default.setUCD();
DerivedProperty dprop = new DerivedProperty(Default.ucd);
/*
DerivedProperty dprop = new DerivedProperty(Default.ucd);
for (int j = 0; j < LIMIT; ++j) {
System.out.println();
System.out.println(j + "\t" + dprop.getName(j));

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
* $Date: 2002/06/13 21:14:05 $
* $Revision: 1.10 $
* $Date: 2002/06/22 21:02:16 $
* $Revision: 1.11 $
*
*******************************************************************************
*/
@ -226,6 +226,19 @@ public final class Normalizer implements UCD_Types {
return !data.normalizationDiffers(ch, composition, compatibility);
}
/**
* Utility: Checks whether there is a recursive decomposition of a character from the
* Unicode Character Database. It is compatibility or canonical according to the particular
* normalizer.
* @param ch the source character
*/
public boolean isNormalized(String s) {
if (UTF16.countCodePoint(s) > 1) {
return !data.normalizationDiffers(UTF16.charAt(s,0), composition, compatibility);
}
return s.equals(normalize(s)); // TODO: OPTIMIZE LATER
}
/**
* Utility: Gets recursive decomposition of a character from the
* Unicode Character Database.

View file

@ -1,7 +1,7 @@
<html><body>
<h1
>1. Mismatches when NFD is OFF</h1><h2
>Date:Fri Jun 21 16:56:03 PDT 2002</h2><h2
>Date:Sat Jun 22 13:56:49 PDT 2002</h2><h2
>File Version:-3.1.1d1</h2><p
>Alternate Handling = NON_IGNORABLE</p><table border="1"
><caption

View file

@ -1,5 +1,5 @@
# Fractional UCA Table, generated from standard UCA
# M. Davis, Fri Jun 21 16:56:12 PDT 2002
# M. Davis, Sat Jun 22 13:56:57 PDT 2002
# VERSION: UCA=3.1.1d1, UCD=3.2.0
# Generated processed version, as described in ICU design document.

View file

@ -1,5 +1,5 @@
# Fractional UCA Table, generated from standard UCA
# M. Davis, Fri Jun 21 16:56:12 PDT 2002
# M. Davis, Sat Jun 22 13:56:57 PDT 2002
# VERSION: UCA=3.1.1d1, UCD=3.2.0
# Generated processed version, as described in ICU design document.