mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
Unicode 3.0 character property system code check in
X-SVN-Rev: 3324
This commit is contained in:
parent
22a81eddc3
commit
bc654bbe86
16 changed files with 6144 additions and 0 deletions
313
icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java
Executable file
313
icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java
Executable file
|
@ -0,0 +1,313 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java,v $
|
||||
* $Date: 2000/12/26 20:01:08 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.test.text;
|
||||
|
||||
import com.ibm.icu.text.UCharacter;
|
||||
import com.ibm.icu.text.UCharacterCategoryEnum;
|
||||
import java.io.FileWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Enumeration;
|
||||
|
||||
/**
|
||||
* A class to compare the difference in methods between java.lang.Character and
|
||||
* UCharacter
|
||||
* @author Syn Wee Quek
|
||||
* @since oct 06 2000
|
||||
* @see com.ibm.icu.text.UCharacter
|
||||
*/
|
||||
|
||||
public final class UCharacterCompare
|
||||
{
|
||||
// private variables ================================================
|
||||
|
||||
private static Hashtable m_hashtable_ = new Hashtable();
|
||||
|
||||
// public methods ======================================================
|
||||
|
||||
/**
|
||||
* Main testing method
|
||||
*/
|
||||
public static void main(String arg[])
|
||||
{
|
||||
try
|
||||
{
|
||||
FileWriter f;
|
||||
if (arg.length == 0)
|
||||
f = new FileWriter("compare.txt");
|
||||
else
|
||||
f = new FileWriter(arg[0]);
|
||||
PrintWriter p = new PrintWriter(f);
|
||||
p.print("char character name ");
|
||||
p.println("method name ucharacter character");
|
||||
for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i ++)
|
||||
{
|
||||
if (UCharacter.isDefined(i) != Character.isDefined(i))
|
||||
trackDifference(p, i, "isDefined()", "" + UCharacter.isDefined(i),
|
||||
"" + Character.isDefined(i));
|
||||
else
|
||||
{
|
||||
if (UCharacter.digit(i, 10) != Character.digit(i, 10))
|
||||
trackDifference(p, i, "digit()", "" + UCharacter.digit(i, 10),
|
||||
"" + Character.digit(i, 10));
|
||||
if (UCharacter.getNumericValue(i) != Character.getNumericValue(i))
|
||||
trackDifference(p, i, "getNumericValue()",
|
||||
"" + UCharacter.getNumericValue(i),
|
||||
"" + Character.getNumericValue(i));
|
||||
if (!compareType(UCharacter.getType(i), Character.getType(i)))
|
||||
trackDifference(p, i, "getType()", "" + UCharacter.getType(i),
|
||||
"" + Character.getType(i));
|
||||
if (UCharacter.isDigit(i) != Character.isDigit(i))
|
||||
trackDifference(p, i, "isDigit()",
|
||||
"" + UCharacter.isDigit(i),
|
||||
"" + Character.isDigit(i));
|
||||
if (UCharacter.isISOControl(i) != Character.isISOControl(i))
|
||||
trackDifference(p, i, "isISOControl()",
|
||||
"" + UCharacter.isISOControl(i),
|
||||
"" + Character.isISOControl(i));
|
||||
if (UCharacter.isLetter(i) != Character.isLetter(i))
|
||||
trackDifference(p, i, "isLetter()", "" + UCharacter.isLetter(i),
|
||||
"" + Character.isLetter(i));
|
||||
if (UCharacter.isLetterOrDigit(i) != Character.isLetterOrDigit(i))
|
||||
trackDifference(p, i, "isLetterOrDigit()",
|
||||
"" + UCharacter.isLetterOrDigit(i),
|
||||
"" + Character.isLetterOrDigit(i));
|
||||
if (UCharacter.isLowerCase(i) != Character.isLowerCase(i))
|
||||
trackDifference(p, i, "isLowerCase()",
|
||||
"" + UCharacter.isLowerCase(i),
|
||||
"" + Character.isLowerCase(i));
|
||||
if (UCharacter.isWhitespace(i) != Character.isWhitespace(i))
|
||||
trackDifference(p, i, "isWhitespace()",
|
||||
"" + UCharacter.isWhitespace(i),
|
||||
"" + Character.isWhitespace(i));
|
||||
if (UCharacter.isSpaceChar(i) != Character.isSpaceChar(i))
|
||||
trackDifference(p, i, "isSpaceChar()",
|
||||
"" + UCharacter.isSpaceChar(i),
|
||||
"" + Character.isSpaceChar(i));
|
||||
if (UCharacter.isTitleCase(i) != Character.isTitleCase(i))
|
||||
trackDifference(p, i, "isTitleChar()",
|
||||
"" + UCharacter.isTitleCase(i),
|
||||
"" + Character.isTitleCase(i));
|
||||
if (UCharacter.isUnicodeIdentifierPart(i) !=
|
||||
Character.isUnicodeIdentifierPart(i))
|
||||
trackDifference(p, i, "isUnicodeIdentifierPart()",
|
||||
"" + UCharacter.isUnicodeIdentifierPart(i),
|
||||
"" + Character.isUnicodeIdentifierPart(i));
|
||||
if (UCharacter.isUnicodeIdentifierStart(i) !=
|
||||
Character.isUnicodeIdentifierStart(i))
|
||||
trackDifference(p, i, "isUnicodeIdentifierStart()",
|
||||
"" + UCharacter.isUnicodeIdentifierStart(i),
|
||||
"" + Character.isUnicodeIdentifierStart(i));
|
||||
if (UCharacter.isIdentifierIgnorable(i) !=
|
||||
Character.isIdentifierIgnorable(i))
|
||||
trackDifference(p, i, "isIdentifierIgnorable()",
|
||||
"" + UCharacter.isIdentifierIgnorable(i),
|
||||
"" + Character.isIdentifierIgnorable(i));
|
||||
if (UCharacter.isUpperCase(i) != Character.isUpperCase(i))
|
||||
trackDifference(p, i, "isUpperCase()",
|
||||
"" + UCharacter.isUpperCase(i),
|
||||
"" + Character.isUpperCase(i));
|
||||
if (UCharacter.toLowerCase(i) != Character.toLowerCase(i))
|
||||
trackDifference(p, i, "toLowerCase()",
|
||||
Integer.toHexString(UCharacter.toLowerCase(i)),
|
||||
Integer.toHexString(Character.toLowerCase(i)));
|
||||
if (!UCharacter.toString(i).equals(new Character(i).toString()))
|
||||
trackDifference(p, i, "toString()",
|
||||
UCharacter.toString(i),
|
||||
new Character(i).toString());
|
||||
if (UCharacter.toTitleCase(i) != Character.toTitleCase(i))
|
||||
trackDifference(p, i, "toTitleCase()",
|
||||
Integer.toHexString(UCharacter.toTitleCase(i)),
|
||||
Integer.toHexString(Character.toTitleCase(i)));
|
||||
if (UCharacter.toUpperCase(i) != Character.toUpperCase(i))
|
||||
trackDifference(p, i, "toUpperCase()",
|
||||
Integer.toHexString(UCharacter.toUpperCase(i)),
|
||||
Integer.toHexString(Character.toUpperCase(i)));
|
||||
}
|
||||
}
|
||||
summary(p);
|
||||
p.close();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
// private methods ===================================================
|
||||
|
||||
/**
|
||||
* Comparing types
|
||||
* @param uchartype UCharacter type
|
||||
* @param jchartype java.lang.Character type
|
||||
*/
|
||||
private static boolean compareType(int uchartype, int jchartype)
|
||||
{
|
||||
if (uchartype == UCharacterCategoryEnum.UNASSIGNED &&
|
||||
jchartype == Character.UNASSIGNED)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.UPPERCASE_LETTER &&
|
||||
jchartype == Character.UPPERCASE_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.LOWERCASE_LETTER &&
|
||||
jchartype == Character.LOWERCASE_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.TITLECASE_LETTER &&
|
||||
jchartype == Character.TITLECASE_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.MODIFIER_LETTER &&
|
||||
jchartype == Character.MODIFIER_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_LETTER &&
|
||||
jchartype == Character.OTHER_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.NON_SPACING_MARK &&
|
||||
jchartype == Character.NON_SPACING_MARK)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.ENCLOSING_MARK &&
|
||||
jchartype == Character.ENCLOSING_MARK)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.COMBINING_SPACING_MARK &&
|
||||
jchartype == Character.COMBINING_SPACING_MARK)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.DECIMAL_DIGIT_NUMBER &&
|
||||
jchartype == Character.DECIMAL_DIGIT_NUMBER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.LETTER_NUMBER &&
|
||||
jchartype == Character.LETTER_NUMBER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_NUMBER &&
|
||||
jchartype == Character.OTHER_NUMBER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.SPACE_SEPARATOR &&
|
||||
jchartype == Character.SPACE_SEPARATOR)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.LINE_SEPARATOR &&
|
||||
jchartype == Character.LINE_SEPARATOR)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.PARAGRAPH_SEPARATOR &&
|
||||
jchartype == Character.PARAGRAPH_SEPARATOR)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.CONTROL &&
|
||||
jchartype == Character.CONTROL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.FORMAT &&
|
||||
jchartype == Character.FORMAT)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.PRIVATE_USE &&
|
||||
jchartype == Character.PRIVATE_USE)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.SURROGATE &&
|
||||
jchartype == Character.SURROGATE)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.DASH_PUNCTUATION &&
|
||||
jchartype == Character.DASH_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.START_PUNCTUATION &&
|
||||
jchartype == Character.START_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.END_PUNCTUATION &&
|
||||
jchartype == Character.END_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.CONNECTOR_PUNCTUATION &&
|
||||
jchartype == Character.CONNECTOR_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_PUNCTUATION &&
|
||||
jchartype == Character.OTHER_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.MATH_SYMBOL &&
|
||||
jchartype == Character.MATH_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.CURRENCY_SYMBOL &&
|
||||
jchartype == Character.CURRENCY_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.MODIFIER_SYMBOL &&
|
||||
jchartype == Character.MODIFIER_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_SYMBOL &&
|
||||
jchartype == Character.OTHER_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.INITIAL_PUNCTUATION &&
|
||||
jchartype == Character.START_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.FINAL_PUNCTUATION &&
|
||||
jchartype == Character.END_PUNCTUATION)
|
||||
return true;
|
||||
/*if (uchartype == UCharacterCategoryEnum.GENERAL_OTHER_TYPES &&
|
||||
jchartype == Character.GENERAL_OTHER_TYPES)
|
||||
return true;*/
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Difference writing to file
|
||||
* @param f file outputstream
|
||||
* @param ch code point
|
||||
* @param method for testing
|
||||
* @param ucharval UCharacter value after running method
|
||||
* @param charval Character value after running method
|
||||
* @exception thrown when error occur in writing to file
|
||||
*/
|
||||
private static void trackDifference(PrintWriter f, int ch, String method,
|
||||
String ucharval, String charval)
|
||||
throws Exception
|
||||
{
|
||||
if (m_hashtable_.containsKey(method))
|
||||
{
|
||||
Integer value = (Integer)m_hashtable_.get(method);
|
||||
m_hashtable_.put(method, new Integer(value.intValue() + 1));
|
||||
}
|
||||
else
|
||||
m_hashtable_.put(method, new Integer(1));
|
||||
|
||||
String temp = Integer.toHexString(ch);
|
||||
StringBuffer s = new StringBuffer(temp);
|
||||
for (int i = 0; i < 6 - temp.length(); i ++)
|
||||
s.append(' ');
|
||||
temp = UCharacter.getName(ch);
|
||||
if (temp == null)
|
||||
temp = " ";
|
||||
s.append(temp);
|
||||
for (int i = 0; i < 73 - temp.length(); i ++)
|
||||
s.append(' ');
|
||||
|
||||
s.append(method);
|
||||
for (int i = 0; i < 27 - method.length(); i ++)
|
||||
s.append(' ');
|
||||
s.append(ucharval);
|
||||
for (int i = 0; i < 11 - ucharval.length(); i ++)
|
||||
s.append(' ');
|
||||
s.append(charval);
|
||||
f.println(s.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Does up a summary of the differences
|
||||
* @param f file outputstream
|
||||
*/
|
||||
private static void summary(PrintWriter f)
|
||||
{
|
||||
f.println("==================================================");
|
||||
f.println("Summary of differences");
|
||||
for (Enumeration e = m_hashtable_.keys() ; e.hasMoreElements() ;)
|
||||
{
|
||||
StringBuffer method = new StringBuffer((String)e.nextElement());
|
||||
int count = ((Integer)m_hashtable_.get(method.toString())).intValue();
|
||||
for (int i = 30 - method.length(); i > 0; i --)
|
||||
method.append(' ');
|
||||
f.println(method + " " + count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
664
icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
Executable file
664
icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java
Executable file
|
@ -0,0 +1,664 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterTest.java,v $
|
||||
* $Date: 2000/12/26 20:01:08 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.test.text;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import com.ibm.test.TestFmwk;
|
||||
import com.ibm.icu.text.UCharacter;
|
||||
import com.ibm.icu.text.UCharacterCategoryEnum;
|
||||
import com.ibm.icu.text.UCharacterDirectionEnum;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* Testing class for UCharacter
|
||||
* Mostly following the test cases for ICU
|
||||
* @author Syn Wee Quek
|
||||
* @since nov 04 2000
|
||||
*/
|
||||
public final class UCharacterTest extends TestFmwk
|
||||
{
|
||||
// private variables =============================================
|
||||
|
||||
/**
|
||||
* ICU4J data version number
|
||||
*/
|
||||
private final String VERSION_ = "3.0.0.0";
|
||||
|
||||
// constructor ===================================================
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
public UCharacterTest()
|
||||
{
|
||||
}
|
||||
|
||||
// public methods ================================================
|
||||
|
||||
/**
|
||||
* Testing the uppercase and lowercase function of UCharacter
|
||||
*/
|
||||
public void TestUpperLower()
|
||||
{
|
||||
// variables to test the uppercase and lowercase characters
|
||||
int upper[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xb1, 0xb2,
|
||||
0xb3, 0x48, 0x49, 0x4a, 0x2e, 0x3f, 0x3a, 0x4b, 0x4c,
|
||||
0x4d, 0x4e, 0x4f, 0x01c4, 0x01c8, 0x000c, 0x0000};
|
||||
int lower[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xb1, 0x00b2,
|
||||
0xb3, 0x68, 0x69, 0x6a, 0x2e, 0x3f, 0x3a, 0x6b, 0x6c,
|
||||
0x6d, 0x6e, 0x6f, 0x01c6, 0x01c9, 0x000c, 0x0000};
|
||||
|
||||
int size = upper.length;
|
||||
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (UCharacter.isLetter(lower[i]) && !UCharacter.isLowerCase(lower[i]))
|
||||
{
|
||||
errln("FAIL isLowerCase test for 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isLetter(upper[i]) && !(UCharacter.isUpperCase(upper[i])
|
||||
|| UCharacter.isTitleCase(upper[i])))
|
||||
{
|
||||
errln("FAIL isUpperCase test for 0x" +
|
||||
Integer.toHexString(upper[i]));
|
||||
break;
|
||||
}
|
||||
if (lower[i] != UCharacter.toLowerCase(upper[i]) ||
|
||||
(upper[i] != UCharacter.toUpperCase(lower[i]) &&
|
||||
upper[i] != UCharacter.toTitleCase(lower[i])))
|
||||
{
|
||||
errln("FAIL case conversion test for 0x" +
|
||||
Integer.toHexString(upper[i]) + " to 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
break;
|
||||
}
|
||||
if (lower[i] != UCharacter.toLowerCase(lower[i]))
|
||||
{
|
||||
errln("FAIL lower case conversion test for 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
break;
|
||||
}
|
||||
if (upper[i] != UCharacter.toUpperCase(upper[i]) &&
|
||||
upper[i] != UCharacter.toTitleCase(upper[i]))
|
||||
{
|
||||
errln("FAIL upper case conversion test for 0x" +
|
||||
Integer.toHexString(upper[i]));
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(upper[i]) + " and 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing the letter and number determination in UCharacter
|
||||
*/
|
||||
public void TestLetterNumber()
|
||||
{
|
||||
for (int i = 0x0041; i < 0x005B; i ++)
|
||||
if (!UCharacter.isLetter(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) + " expected to be a letter");
|
||||
|
||||
for (int i = 0x0660; i < 0x066A; i ++)
|
||||
if (UCharacter.isLetter(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) +
|
||||
" expected not to be a letter");
|
||||
|
||||
for (int i = 0x0660; i < 0x066A; i ++)
|
||||
if (!UCharacter.isDigit(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) + " expected to be a digit");
|
||||
|
||||
for (int i = 0x0041; i < 0x005B; i ++)
|
||||
if (!UCharacter.isLetterOrDigit(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) +
|
||||
" expected not to be a digit");
|
||||
|
||||
for (int i = 0x0660; i < 0x066A; i ++)
|
||||
if (!UCharacter.isLetterOrDigit(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) +
|
||||
"expected to be either a letter or a digit");
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for space determination in UCharacter
|
||||
*/
|
||||
public void TestSpaces()
|
||||
{
|
||||
int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
|
||||
int nonspaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
|
||||
int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
|
||||
int nonwhitespaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f};
|
||||
|
||||
int size = spaces.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isSpaceChar(spaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(spaces[i]) +
|
||||
" expected to be a space character");
|
||||
break;
|
||||
}
|
||||
|
||||
if (UCharacter.isSpaceChar(nonspaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonspaces[i]) +
|
||||
" expected not to be space character");
|
||||
break;
|
||||
}
|
||||
|
||||
if (!UCharacter.isWhitespace(whitespaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(whitespaces[i]) +
|
||||
" expected to be a white space character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isWhitespace(nonwhitespaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonwhitespaces[i]) +
|
||||
" expected not to be a space character");
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(spaces[i]) + " and 0x" +
|
||||
Integer.toHexString(nonspaces[i]) + " and 0x" +
|
||||
Integer.toHexString(whitespaces[i]) + " and 0x" +
|
||||
Integer.toHexString(nonwhitespaces[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for defined and undefined characters
|
||||
*/
|
||||
public void TestDefined()
|
||||
{
|
||||
int undefined[] = {0xfff1, 0xfff7, 0xfa30};
|
||||
int defined[] = {0x523E, 0x4f88, 0xfffd};
|
||||
|
||||
int size = undefined.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (UCharacter.isDefined(undefined[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(undefined[i]) +
|
||||
" expected not to be defined");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isDefined(defined[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(defined[i]) +
|
||||
" expected defined");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for base characters and their cellwidth
|
||||
*/
|
||||
public void TestBase()
|
||||
{
|
||||
int base[] = {0x0061, 0x0031, 0x03d2};
|
||||
int nonbase[] = {0x002B, 0x0020, 0x203B};
|
||||
int size = base.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (UCharacter.isBaseForm(nonbase[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonbase[i]) +
|
||||
" expected not to be a base character");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isBaseForm(base[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(base[i]) +
|
||||
" expected to be a base character");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for digit characters
|
||||
*/
|
||||
public void TestDigits()
|
||||
{
|
||||
int digits[] = {0x0030, 0x0662, 0x0F23, 0x0ED5, 0x2160};
|
||||
|
||||
//special characters not in the properties table
|
||||
int digits2[] = {0x3007, 0x4e00, 0x4e8c, 0x4e09, 0x56d8, 0x4e94, 0x516d,
|
||||
0x4e03, 0x516b, 0x4e5d};
|
||||
int nondigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
|
||||
|
||||
int digitvalues[] = {0, 2, 3, 5, 1};
|
||||
int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
|
||||
int size = digits.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (UCharacter.isDigit(digits[i]) &&
|
||||
UCharacter.digit(digits[i]) != digitvalues[i])
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(digits[i]) +
|
||||
" expected digit with value " + digitvalues[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
size = nondigits.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (UCharacter.isDigit(nondigits[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nondigits[i]) +
|
||||
" expected nondigit");
|
||||
break;
|
||||
}
|
||||
|
||||
size = digits2.length;
|
||||
for (int i = 0; i < 10; i ++)
|
||||
if (UCharacter.isDigit(digits2[i]) &&
|
||||
UCharacter.digit(digits2[i]) != digitvalues2[i])
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(digits2[i]) +
|
||||
" expected digit with value " + digitvalues2[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for version
|
||||
*/
|
||||
public void TestVersion()
|
||||
{
|
||||
String version = UCharacter.getUnicodeVersion();
|
||||
if (!version.equals(VERSION_))
|
||||
errln("FAIL expected " + VERSION_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for control characters
|
||||
*/
|
||||
public void TestControl()
|
||||
{
|
||||
int control[] = {0x001b, 0x0097, 0x0082};
|
||||
int noncontrol[] = {0x61, 0x0031, 0x00e2};
|
||||
|
||||
int size = control.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isControl(control[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(control[i]) +
|
||||
" expected to be a control character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isControl(noncontrol[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) +
|
||||
" expected to be not a control character");
|
||||
break;
|
||||
}
|
||||
|
||||
logln("Ok 0x" + Integer.toHexString(control[i]) + " and 0x" +
|
||||
Integer.toHexString(noncontrol[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for printable characters
|
||||
*/
|
||||
public void TestPrint()
|
||||
{
|
||||
int printable[] = {0x0042, 0x005f, 0x2014};
|
||||
int nonprintable[] = {0x200c, 0x009f, 0x001b};
|
||||
|
||||
int size = printable.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isPrintable(printable[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(printable[i]) +
|
||||
" expected to be a printable character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isPrintable(nonprintable[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonprintable[i]) +
|
||||
" expected not to be a printable character");
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(printable[i]) + " and 0x" +
|
||||
Integer.toHexString(nonprintable[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing for identifier characters
|
||||
*/
|
||||
public void TestIdentifier()
|
||||
{
|
||||
int unicodeidstart[] = {0x0250, 0x00e2, 0x0061};
|
||||
int nonunicodeidstart[] = {0x2000, 0x000a, 0x2019};
|
||||
int unicodeidpart[] = {0x005f, 0x0032, 0x0045};
|
||||
int nonunicodeidpart[] = {0x2030, 0x00a3, 0x0020};
|
||||
int idignore[] = {0x070F, 0x180B, 0x180C};
|
||||
int nonidignore[] = {0x0075, 0x00a3, 0x0061};
|
||||
|
||||
int size = unicodeidstart.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(unicodeidstart[i]) +
|
||||
" expected to be a unicode identifier start character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonunicodeidstart[i]) +
|
||||
" expected not to be a unicode identifier start character");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(unicodeidpart[i]) +
|
||||
" expected to be a unicode identifier part character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonunicodeidpart[i]) +
|
||||
" expected not to be a unicode identifier part character");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isIdentifierIgnorable(idignore[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(idignore[i]) +
|
||||
" expected to be a ignorable unicode character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isIdentifierIgnorable(nonidignore[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonidignore[i]) +
|
||||
" expected not to be a ignorable unicode character");
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(unicodeidstart[i]) + " and 0x" +
|
||||
Integer.toHexString(nonunicodeidstart[i]) + " and 0x" +
|
||||
Integer.toHexString(unicodeidpart[i]) + " and 0x" +
|
||||
Integer.toHexString(nonunicodeidpart[i]) + " and 0x" +
|
||||
Integer.toHexString(idignore[i]) + " and 0x" +
|
||||
Integer.toHexString(nonidignore[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for the character types, direction
|
||||
*/
|
||||
public void TestCatDir()
|
||||
{
|
||||
// this is the 2 char category types used in the UnicodeData file
|
||||
final String TYPE =
|
||||
"LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf";
|
||||
|
||||
// directory types used in the UnicodeData file
|
||||
// padded by spaces to make each type size 4
|
||||
final String DIR =
|
||||
"L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN ";
|
||||
|
||||
StringBuffer file = new StringBuffer("UnicodeData-");
|
||||
file.append(UCharacter.getUnicodeVersion());
|
||||
file.append(".txt");
|
||||
String s;
|
||||
|
||||
final int LASTUNICODECHAR = 0xFFFD;
|
||||
int ch = 0,
|
||||
index = 0,
|
||||
type = 0,
|
||||
dir = 0;
|
||||
|
||||
try
|
||||
{
|
||||
// reading in the UnicodeData file
|
||||
FileReader fr = new FileReader(file.toString());
|
||||
BufferedReader input = new BufferedReader(fr);
|
||||
|
||||
while (ch != LASTUNICODECHAR)
|
||||
{
|
||||
s= input.readLine();
|
||||
|
||||
// geting the unicode character, its type and its direction
|
||||
ch = Integer.parseInt(s.substring(0, 4), 16);
|
||||
index = s.indexOf(';', 5);
|
||||
String t = s.substring(index + 1, index + 3);
|
||||
index = s.indexOf(';', index + 4);
|
||||
String d = s.substring(index + 1, s.indexOf(';', index + 1));
|
||||
|
||||
// testing the category
|
||||
// we override the general category of some control characters
|
||||
if (ch == 9 || ch == 0xb || ch == 0x1f)
|
||||
type = UCharacterCategoryEnum.SPACE_SEPARATOR;
|
||||
else
|
||||
if (ch == 0xc)
|
||||
type = UCharacterCategoryEnum.LINE_SEPARATOR;
|
||||
else
|
||||
if (ch == 0xa || ch == 0xd || ch == 0x1c || ch == 0x1d ||
|
||||
ch == 0x1e || ch == 0x85)
|
||||
type = UCharacterCategoryEnum.PARAGRAPH_SEPARATOR;
|
||||
else
|
||||
{
|
||||
type = TYPE.indexOf(t);
|
||||
if (type < 0)
|
||||
type = 0;
|
||||
else
|
||||
type = (type >> 1) + 1;
|
||||
}
|
||||
|
||||
if (UCharacter.getType(ch) != type)
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(ch) + " expected type " +
|
||||
type);
|
||||
break;
|
||||
}
|
||||
|
||||
// testing the direction
|
||||
if (d.length() == 1)
|
||||
d = d + " ";
|
||||
|
||||
dir = DIR.indexOf(d) >> 2;
|
||||
if (UCharacter.getDirection(ch) != dir)
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(ch) +
|
||||
" expected wrong direction " + dir);
|
||||
break;
|
||||
}
|
||||
}
|
||||
input.close();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
if (UCharacter.getDirection(0x10001) !=
|
||||
UCharacterDirectionEnum.LEFT_TO_RIGHT)
|
||||
errln("FAIL 0x10001 expected direction " +
|
||||
UCharacterDirectionEnum.toString(UCharacterDirectionEnum.LEFT_TO_RIGHT));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for the character names
|
||||
*/
|
||||
public void TestNames()
|
||||
{
|
||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5};
|
||||
String name[] = {"LATIN SMALL LETTER A",
|
||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||
"CJK UNIFIED IDEOGRAPH-3401",
|
||||
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
||||
"HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN"};
|
||||
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
||||
"", "", "FULLWIDTH OPENING PARENTHESIS", ""};
|
||||
int size = c.length;
|
||||
String str;
|
||||
int uc;
|
||||
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
// modern Unicode character name
|
||||
str = UCharacter.getName(c[i]);
|
||||
if (!str.equalsIgnoreCase(name[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(c[i]) + " expected name " +
|
||||
name[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
// 1.0 Unicode character name
|
||||
str = UCharacter.getName1_0(c[i]);
|
||||
if ((str == null && oldname[i].length() > 0) ||
|
||||
(str != null && !str.equalsIgnoreCase(oldname[i])))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(c[i]) + " expected 1.0 name " +
|
||||
oldname[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
// retrieving unicode character from modern name
|
||||
uc = UCharacter.getCharFromName(name[i]);
|
||||
if (uc != c[i])
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected character 0x" +
|
||||
Integer.toHexString(c[i]));
|
||||
break;
|
||||
}
|
||||
|
||||
//retrieving unicode character from 1.0 name
|
||||
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
||||
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected 1.0 character " +
|
||||
Integer.toHexString(c[i]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// extra testing different from icu
|
||||
for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)
|
||||
{
|
||||
str = UCharacter.getName(i);
|
||||
if (str != null && UCharacter.getCharFromName(str) != i)
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(i) + " " + str +
|
||||
" retrieval of name and vice versa" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing UTF16 class methods append, getCharCount and bounds
|
||||
*/
|
||||
public void TestUTF16AppendBoundCount()
|
||||
{
|
||||
StringBuffer str = new StringBuffer("this is a string ");
|
||||
int length;
|
||||
|
||||
for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)
|
||||
{
|
||||
length = str.length();
|
||||
UTF16.append(str, i);
|
||||
if (!UCharacter.isSupplementary(i))
|
||||
{
|
||||
if (UTF16.getCharCount(i) != 1)
|
||||
{
|
||||
errln("FAIL Counting BMP character size error" );
|
||||
break;
|
||||
}
|
||||
if (str.length() != length + 1)
|
||||
{
|
||||
errln("FAIL Adding a BMP character error" );
|
||||
break;
|
||||
}
|
||||
if (!UTF16.isSurrogate((char)i) &&
|
||||
UTF16.bounds(str.toString(), str.length() - 1) !=
|
||||
UTF16.SINGLE_CHAR_BOUNDARY)
|
||||
{
|
||||
errln("FAIL Finding BMP character bounds error" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (UTF16.getCharCount(i) != 2)
|
||||
{
|
||||
errln("FAIL Counting Supplementary character size error" );
|
||||
break;
|
||||
}
|
||||
if (str.length() != length + 2)
|
||||
{
|
||||
errln("FAIL Adding a Supplementary character error" );
|
||||
break;
|
||||
}
|
||||
length = str.length();
|
||||
if (UTF16.bounds(str.toString(), str.length() - 2) !=
|
||||
UTF16.LEAD_SURROGATE_BOUNDARY ||
|
||||
UTF16.bounds(str.toString(), str.length() - 1) !=
|
||||
UTF16.TRAIL_SURROGATE_BOUNDARY)
|
||||
{
|
||||
errln("FAIL Finding Supplementary character bounds error" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing UTF16 class methods findCPOffset, findOffsetFromCP and charAt
|
||||
*/
|
||||
public void TestUTF16OffsetCharAt()
|
||||
{
|
||||
StringBuffer str = new StringBuffer("12345");
|
||||
UTF16.append(str, 0x10001);
|
||||
str.append("67890");
|
||||
UTF16.append(str, 0x10002);
|
||||
String s = str.toString();
|
||||
if (UTF16.charAt(s, 0) != '1' || UTF16.charAt(s, 2) != '3' ||
|
||||
UTF16.charAt(s, 5) != 0x10001 || UTF16.charAt(s, 6) != 0x10001 ||
|
||||
UTF16.charAt(s, 12) != 0x10002 || UTF16.charAt(s, 13) != 0x10002)
|
||||
errln("FAIL Getting character from string error" );
|
||||
|
||||
if (UTF16.findCPOffset(s, 3) != 3 || UTF16.findCPOffset(s, 5) != 5 ||
|
||||
UTF16.findCPOffset(s, 6) != 6)
|
||||
errln("FAIL Getting codepoint offset from string error" );
|
||||
if (UTF16.findOffsetFromCP(s, 3) != 3 ||
|
||||
UTF16.findOffsetFromCP(s, 5) != 5 ||
|
||||
UTF16.findOffsetFromCP(s, 6) != 7)
|
||||
errln("FAIL Getting UTF16 offset from codepoint in string error" );
|
||||
}
|
||||
|
||||
public static void main(String[] arg)
|
||||
{
|
||||
try
|
||||
{
|
||||
UCharacterTest test = new UCharacterTest();
|
||||
test.run(arg);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
313
icu4j/src/com/ibm/icu/test/text/UCharacterCompare.java
Executable file
313
icu4j/src/com/ibm/icu/test/text/UCharacterCompare.java
Executable file
|
@ -0,0 +1,313 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterCompare.java,v $
|
||||
* $Date: 2000/12/26 20:01:08 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.test.text;
|
||||
|
||||
import com.ibm.icu.text.UCharacter;
|
||||
import com.ibm.icu.text.UCharacterCategoryEnum;
|
||||
import java.io.FileWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Enumeration;
|
||||
|
||||
/**
|
||||
* A class to compare the difference in methods between java.lang.Character and
|
||||
* UCharacter
|
||||
* @author Syn Wee Quek
|
||||
* @since oct 06 2000
|
||||
* @see com.ibm.icu.text.UCharacter
|
||||
*/
|
||||
|
||||
public final class UCharacterCompare
|
||||
{
|
||||
// private variables ================================================
|
||||
|
||||
private static Hashtable m_hashtable_ = new Hashtable();
|
||||
|
||||
// public methods ======================================================
|
||||
|
||||
/**
|
||||
* Main testing method
|
||||
*/
|
||||
public static void main(String arg[])
|
||||
{
|
||||
try
|
||||
{
|
||||
FileWriter f;
|
||||
if (arg.length == 0)
|
||||
f = new FileWriter("compare.txt");
|
||||
else
|
||||
f = new FileWriter(arg[0]);
|
||||
PrintWriter p = new PrintWriter(f);
|
||||
p.print("char character name ");
|
||||
p.println("method name ucharacter character");
|
||||
for (char i = Character.MIN_VALUE; i < Character.MAX_VALUE; i ++)
|
||||
{
|
||||
if (UCharacter.isDefined(i) != Character.isDefined(i))
|
||||
trackDifference(p, i, "isDefined()", "" + UCharacter.isDefined(i),
|
||||
"" + Character.isDefined(i));
|
||||
else
|
||||
{
|
||||
if (UCharacter.digit(i, 10) != Character.digit(i, 10))
|
||||
trackDifference(p, i, "digit()", "" + UCharacter.digit(i, 10),
|
||||
"" + Character.digit(i, 10));
|
||||
if (UCharacter.getNumericValue(i) != Character.getNumericValue(i))
|
||||
trackDifference(p, i, "getNumericValue()",
|
||||
"" + UCharacter.getNumericValue(i),
|
||||
"" + Character.getNumericValue(i));
|
||||
if (!compareType(UCharacter.getType(i), Character.getType(i)))
|
||||
trackDifference(p, i, "getType()", "" + UCharacter.getType(i),
|
||||
"" + Character.getType(i));
|
||||
if (UCharacter.isDigit(i) != Character.isDigit(i))
|
||||
trackDifference(p, i, "isDigit()",
|
||||
"" + UCharacter.isDigit(i),
|
||||
"" + Character.isDigit(i));
|
||||
if (UCharacter.isISOControl(i) != Character.isISOControl(i))
|
||||
trackDifference(p, i, "isISOControl()",
|
||||
"" + UCharacter.isISOControl(i),
|
||||
"" + Character.isISOControl(i));
|
||||
if (UCharacter.isLetter(i) != Character.isLetter(i))
|
||||
trackDifference(p, i, "isLetter()", "" + UCharacter.isLetter(i),
|
||||
"" + Character.isLetter(i));
|
||||
if (UCharacter.isLetterOrDigit(i) != Character.isLetterOrDigit(i))
|
||||
trackDifference(p, i, "isLetterOrDigit()",
|
||||
"" + UCharacter.isLetterOrDigit(i),
|
||||
"" + Character.isLetterOrDigit(i));
|
||||
if (UCharacter.isLowerCase(i) != Character.isLowerCase(i))
|
||||
trackDifference(p, i, "isLowerCase()",
|
||||
"" + UCharacter.isLowerCase(i),
|
||||
"" + Character.isLowerCase(i));
|
||||
if (UCharacter.isWhitespace(i) != Character.isWhitespace(i))
|
||||
trackDifference(p, i, "isWhitespace()",
|
||||
"" + UCharacter.isWhitespace(i),
|
||||
"" + Character.isWhitespace(i));
|
||||
if (UCharacter.isSpaceChar(i) != Character.isSpaceChar(i))
|
||||
trackDifference(p, i, "isSpaceChar()",
|
||||
"" + UCharacter.isSpaceChar(i),
|
||||
"" + Character.isSpaceChar(i));
|
||||
if (UCharacter.isTitleCase(i) != Character.isTitleCase(i))
|
||||
trackDifference(p, i, "isTitleChar()",
|
||||
"" + UCharacter.isTitleCase(i),
|
||||
"" + Character.isTitleCase(i));
|
||||
if (UCharacter.isUnicodeIdentifierPart(i) !=
|
||||
Character.isUnicodeIdentifierPart(i))
|
||||
trackDifference(p, i, "isUnicodeIdentifierPart()",
|
||||
"" + UCharacter.isUnicodeIdentifierPart(i),
|
||||
"" + Character.isUnicodeIdentifierPart(i));
|
||||
if (UCharacter.isUnicodeIdentifierStart(i) !=
|
||||
Character.isUnicodeIdentifierStart(i))
|
||||
trackDifference(p, i, "isUnicodeIdentifierStart()",
|
||||
"" + UCharacter.isUnicodeIdentifierStart(i),
|
||||
"" + Character.isUnicodeIdentifierStart(i));
|
||||
if (UCharacter.isIdentifierIgnorable(i) !=
|
||||
Character.isIdentifierIgnorable(i))
|
||||
trackDifference(p, i, "isIdentifierIgnorable()",
|
||||
"" + UCharacter.isIdentifierIgnorable(i),
|
||||
"" + Character.isIdentifierIgnorable(i));
|
||||
if (UCharacter.isUpperCase(i) != Character.isUpperCase(i))
|
||||
trackDifference(p, i, "isUpperCase()",
|
||||
"" + UCharacter.isUpperCase(i),
|
||||
"" + Character.isUpperCase(i));
|
||||
if (UCharacter.toLowerCase(i) != Character.toLowerCase(i))
|
||||
trackDifference(p, i, "toLowerCase()",
|
||||
Integer.toHexString(UCharacter.toLowerCase(i)),
|
||||
Integer.toHexString(Character.toLowerCase(i)));
|
||||
if (!UCharacter.toString(i).equals(new Character(i).toString()))
|
||||
trackDifference(p, i, "toString()",
|
||||
UCharacter.toString(i),
|
||||
new Character(i).toString());
|
||||
if (UCharacter.toTitleCase(i) != Character.toTitleCase(i))
|
||||
trackDifference(p, i, "toTitleCase()",
|
||||
Integer.toHexString(UCharacter.toTitleCase(i)),
|
||||
Integer.toHexString(Character.toTitleCase(i)));
|
||||
if (UCharacter.toUpperCase(i) != Character.toUpperCase(i))
|
||||
trackDifference(p, i, "toUpperCase()",
|
||||
Integer.toHexString(UCharacter.toUpperCase(i)),
|
||||
Integer.toHexString(Character.toUpperCase(i)));
|
||||
}
|
||||
}
|
||||
summary(p);
|
||||
p.close();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
// private methods ===================================================
|
||||
|
||||
/**
|
||||
* Comparing types
|
||||
* @param uchartype UCharacter type
|
||||
* @param jchartype java.lang.Character type
|
||||
*/
|
||||
private static boolean compareType(int uchartype, int jchartype)
|
||||
{
|
||||
if (uchartype == UCharacterCategoryEnum.UNASSIGNED &&
|
||||
jchartype == Character.UNASSIGNED)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.UPPERCASE_LETTER &&
|
||||
jchartype == Character.UPPERCASE_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.LOWERCASE_LETTER &&
|
||||
jchartype == Character.LOWERCASE_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.TITLECASE_LETTER &&
|
||||
jchartype == Character.TITLECASE_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.MODIFIER_LETTER &&
|
||||
jchartype == Character.MODIFIER_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_LETTER &&
|
||||
jchartype == Character.OTHER_LETTER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.NON_SPACING_MARK &&
|
||||
jchartype == Character.NON_SPACING_MARK)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.ENCLOSING_MARK &&
|
||||
jchartype == Character.ENCLOSING_MARK)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.COMBINING_SPACING_MARK &&
|
||||
jchartype == Character.COMBINING_SPACING_MARK)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.DECIMAL_DIGIT_NUMBER &&
|
||||
jchartype == Character.DECIMAL_DIGIT_NUMBER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.LETTER_NUMBER &&
|
||||
jchartype == Character.LETTER_NUMBER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_NUMBER &&
|
||||
jchartype == Character.OTHER_NUMBER)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.SPACE_SEPARATOR &&
|
||||
jchartype == Character.SPACE_SEPARATOR)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.LINE_SEPARATOR &&
|
||||
jchartype == Character.LINE_SEPARATOR)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.PARAGRAPH_SEPARATOR &&
|
||||
jchartype == Character.PARAGRAPH_SEPARATOR)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.CONTROL &&
|
||||
jchartype == Character.CONTROL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.FORMAT &&
|
||||
jchartype == Character.FORMAT)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.PRIVATE_USE &&
|
||||
jchartype == Character.PRIVATE_USE)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.SURROGATE &&
|
||||
jchartype == Character.SURROGATE)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.DASH_PUNCTUATION &&
|
||||
jchartype == Character.DASH_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.START_PUNCTUATION &&
|
||||
jchartype == Character.START_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.END_PUNCTUATION &&
|
||||
jchartype == Character.END_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.CONNECTOR_PUNCTUATION &&
|
||||
jchartype == Character.CONNECTOR_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_PUNCTUATION &&
|
||||
jchartype == Character.OTHER_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.MATH_SYMBOL &&
|
||||
jchartype == Character.MATH_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.CURRENCY_SYMBOL &&
|
||||
jchartype == Character.CURRENCY_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.MODIFIER_SYMBOL &&
|
||||
jchartype == Character.MODIFIER_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.OTHER_SYMBOL &&
|
||||
jchartype == Character.OTHER_SYMBOL)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.INITIAL_PUNCTUATION &&
|
||||
jchartype == Character.START_PUNCTUATION)
|
||||
return true;
|
||||
if (uchartype == UCharacterCategoryEnum.FINAL_PUNCTUATION &&
|
||||
jchartype == Character.END_PUNCTUATION)
|
||||
return true;
|
||||
/*if (uchartype == UCharacterCategoryEnum.GENERAL_OTHER_TYPES &&
|
||||
jchartype == Character.GENERAL_OTHER_TYPES)
|
||||
return true;*/
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Difference writing to file
|
||||
* @param f file outputstream
|
||||
* @param ch code point
|
||||
* @param method for testing
|
||||
* @param ucharval UCharacter value after running method
|
||||
* @param charval Character value after running method
|
||||
* @exception thrown when error occur in writing to file
|
||||
*/
|
||||
private static void trackDifference(PrintWriter f, int ch, String method,
|
||||
String ucharval, String charval)
|
||||
throws Exception
|
||||
{
|
||||
if (m_hashtable_.containsKey(method))
|
||||
{
|
||||
Integer value = (Integer)m_hashtable_.get(method);
|
||||
m_hashtable_.put(method, new Integer(value.intValue() + 1));
|
||||
}
|
||||
else
|
||||
m_hashtable_.put(method, new Integer(1));
|
||||
|
||||
String temp = Integer.toHexString(ch);
|
||||
StringBuffer s = new StringBuffer(temp);
|
||||
for (int i = 0; i < 6 - temp.length(); i ++)
|
||||
s.append(' ');
|
||||
temp = UCharacter.getName(ch);
|
||||
if (temp == null)
|
||||
temp = " ";
|
||||
s.append(temp);
|
||||
for (int i = 0; i < 73 - temp.length(); i ++)
|
||||
s.append(' ');
|
||||
|
||||
s.append(method);
|
||||
for (int i = 0; i < 27 - method.length(); i ++)
|
||||
s.append(' ');
|
||||
s.append(ucharval);
|
||||
for (int i = 0; i < 11 - ucharval.length(); i ++)
|
||||
s.append(' ');
|
||||
s.append(charval);
|
||||
f.println(s.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Does up a summary of the differences
|
||||
* @param f file outputstream
|
||||
*/
|
||||
private static void summary(PrintWriter f)
|
||||
{
|
||||
f.println("==================================================");
|
||||
f.println("Summary of differences");
|
||||
for (Enumeration e = m_hashtable_.keys() ; e.hasMoreElements() ;)
|
||||
{
|
||||
StringBuffer method = new StringBuffer((String)e.nextElement());
|
||||
int count = ((Integer)m_hashtable_.get(method.toString())).intValue();
|
||||
for (int i = 30 - method.length(); i > 0; i --)
|
||||
method.append(' ');
|
||||
f.println(method + " " + count);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
664
icu4j/src/com/ibm/icu/test/text/UCharacterTest.java
Executable file
664
icu4j/src/com/ibm/icu/test/text/UCharacterTest.java
Executable file
|
@ -0,0 +1,664 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UCharacterTest.java,v $
|
||||
* $Date: 2000/12/26 20:01:08 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.test.text;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import com.ibm.test.TestFmwk;
|
||||
import com.ibm.icu.text.UCharacter;
|
||||
import com.ibm.icu.text.UCharacterCategoryEnum;
|
||||
import com.ibm.icu.text.UCharacterDirectionEnum;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
|
||||
/**
|
||||
* Testing class for UCharacter
|
||||
* Mostly following the test cases for ICU
|
||||
* @author Syn Wee Quek
|
||||
* @since nov 04 2000
|
||||
*/
|
||||
public final class UCharacterTest extends TestFmwk
|
||||
{
|
||||
// private variables =============================================
|
||||
|
||||
/**
|
||||
* ICU4J data version number
|
||||
*/
|
||||
private final String VERSION_ = "3.0.0.0";
|
||||
|
||||
// constructor ===================================================
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
public UCharacterTest()
|
||||
{
|
||||
}
|
||||
|
||||
// public methods ================================================
|
||||
|
||||
/**
|
||||
* Testing the uppercase and lowercase function of UCharacter
|
||||
*/
|
||||
public void TestUpperLower()
|
||||
{
|
||||
// variables to test the uppercase and lowercase characters
|
||||
int upper[] = {0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xb1, 0xb2,
|
||||
0xb3, 0x48, 0x49, 0x4a, 0x2e, 0x3f, 0x3a, 0x4b, 0x4c,
|
||||
0x4d, 0x4e, 0x4f, 0x01c4, 0x01c8, 0x000c, 0x0000};
|
||||
int lower[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0xb1, 0x00b2,
|
||||
0xb3, 0x68, 0x69, 0x6a, 0x2e, 0x3f, 0x3a, 0x6b, 0x6c,
|
||||
0x6d, 0x6e, 0x6f, 0x01c6, 0x01c9, 0x000c, 0x0000};
|
||||
|
||||
int size = upper.length;
|
||||
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (UCharacter.isLetter(lower[i]) && !UCharacter.isLowerCase(lower[i]))
|
||||
{
|
||||
errln("FAIL isLowerCase test for 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isLetter(upper[i]) && !(UCharacter.isUpperCase(upper[i])
|
||||
|| UCharacter.isTitleCase(upper[i])))
|
||||
{
|
||||
errln("FAIL isUpperCase test for 0x" +
|
||||
Integer.toHexString(upper[i]));
|
||||
break;
|
||||
}
|
||||
if (lower[i] != UCharacter.toLowerCase(upper[i]) ||
|
||||
(upper[i] != UCharacter.toUpperCase(lower[i]) &&
|
||||
upper[i] != UCharacter.toTitleCase(lower[i])))
|
||||
{
|
||||
errln("FAIL case conversion test for 0x" +
|
||||
Integer.toHexString(upper[i]) + " to 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
break;
|
||||
}
|
||||
if (lower[i] != UCharacter.toLowerCase(lower[i]))
|
||||
{
|
||||
errln("FAIL lower case conversion test for 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
break;
|
||||
}
|
||||
if (upper[i] != UCharacter.toUpperCase(upper[i]) &&
|
||||
upper[i] != UCharacter.toTitleCase(upper[i]))
|
||||
{
|
||||
errln("FAIL upper case conversion test for 0x" +
|
||||
Integer.toHexString(upper[i]));
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(upper[i]) + " and 0x" +
|
||||
Integer.toHexString(lower[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing the letter and number determination in UCharacter
|
||||
*/
|
||||
public void TestLetterNumber()
|
||||
{
|
||||
for (int i = 0x0041; i < 0x005B; i ++)
|
||||
if (!UCharacter.isLetter(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) + " expected to be a letter");
|
||||
|
||||
for (int i = 0x0660; i < 0x066A; i ++)
|
||||
if (UCharacter.isLetter(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) +
|
||||
" expected not to be a letter");
|
||||
|
||||
for (int i = 0x0660; i < 0x066A; i ++)
|
||||
if (!UCharacter.isDigit(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) + " expected to be a digit");
|
||||
|
||||
for (int i = 0x0041; i < 0x005B; i ++)
|
||||
if (!UCharacter.isLetterOrDigit(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) +
|
||||
" expected not to be a digit");
|
||||
|
||||
for (int i = 0x0660; i < 0x066A; i ++)
|
||||
if (!UCharacter.isLetterOrDigit(i))
|
||||
errln("FAIL 0x" + Integer.toHexString(i) +
|
||||
"expected to be either a letter or a digit");
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for space determination in UCharacter
|
||||
*/
|
||||
public void TestSpaces()
|
||||
{
|
||||
int spaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
|
||||
int nonspaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
|
||||
int whitespaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
|
||||
int nonwhitespaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f};
|
||||
|
||||
int size = spaces.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isSpaceChar(spaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(spaces[i]) +
|
||||
" expected to be a space character");
|
||||
break;
|
||||
}
|
||||
|
||||
if (UCharacter.isSpaceChar(nonspaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonspaces[i]) +
|
||||
" expected not to be space character");
|
||||
break;
|
||||
}
|
||||
|
||||
if (!UCharacter.isWhitespace(whitespaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(whitespaces[i]) +
|
||||
" expected to be a white space character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isWhitespace(nonwhitespaces[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonwhitespaces[i]) +
|
||||
" expected not to be a space character");
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(spaces[i]) + " and 0x" +
|
||||
Integer.toHexString(nonspaces[i]) + " and 0x" +
|
||||
Integer.toHexString(whitespaces[i]) + " and 0x" +
|
||||
Integer.toHexString(nonwhitespaces[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for defined and undefined characters
|
||||
*/
|
||||
public void TestDefined()
|
||||
{
|
||||
int undefined[] = {0xfff1, 0xfff7, 0xfa30};
|
||||
int defined[] = {0x523E, 0x4f88, 0xfffd};
|
||||
|
||||
int size = undefined.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (UCharacter.isDefined(undefined[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(undefined[i]) +
|
||||
" expected not to be defined");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isDefined(defined[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(defined[i]) +
|
||||
" expected defined");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for base characters and their cellwidth
|
||||
*/
|
||||
public void TestBase()
|
||||
{
|
||||
int base[] = {0x0061, 0x0031, 0x03d2};
|
||||
int nonbase[] = {0x002B, 0x0020, 0x203B};
|
||||
int size = base.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (UCharacter.isBaseForm(nonbase[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonbase[i]) +
|
||||
" expected not to be a base character");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isBaseForm(base[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(base[i]) +
|
||||
" expected to be a base character");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for digit characters
|
||||
*/
|
||||
public void TestDigits()
|
||||
{
|
||||
int digits[] = {0x0030, 0x0662, 0x0F23, 0x0ED5, 0x2160};
|
||||
|
||||
//special characters not in the properties table
|
||||
int digits2[] = {0x3007, 0x4e00, 0x4e8c, 0x4e09, 0x56d8, 0x4e94, 0x516d,
|
||||
0x4e03, 0x516b, 0x4e5d};
|
||||
int nondigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
|
||||
|
||||
int digitvalues[] = {0, 2, 3, 5, 1};
|
||||
int digitvalues2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
|
||||
int size = digits.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (UCharacter.isDigit(digits[i]) &&
|
||||
UCharacter.digit(digits[i]) != digitvalues[i])
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(digits[i]) +
|
||||
" expected digit with value " + digitvalues[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
size = nondigits.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (UCharacter.isDigit(nondigits[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nondigits[i]) +
|
||||
" expected nondigit");
|
||||
break;
|
||||
}
|
||||
|
||||
size = digits2.length;
|
||||
for (int i = 0; i < 10; i ++)
|
||||
if (UCharacter.isDigit(digits2[i]) &&
|
||||
UCharacter.digit(digits2[i]) != digitvalues2[i])
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(digits2[i]) +
|
||||
" expected digit with value " + digitvalues2[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for version
|
||||
*/
|
||||
public void TestVersion()
|
||||
{
|
||||
String version = UCharacter.getUnicodeVersion();
|
||||
if (!version.equals(VERSION_))
|
||||
errln("FAIL expected " + VERSION_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for control characters
|
||||
*/
|
||||
public void TestControl()
|
||||
{
|
||||
int control[] = {0x001b, 0x0097, 0x0082};
|
||||
int noncontrol[] = {0x61, 0x0031, 0x00e2};
|
||||
|
||||
int size = control.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isControl(control[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(control[i]) +
|
||||
" expected to be a control character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isControl(noncontrol[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(noncontrol[i]) +
|
||||
" expected to be not a control character");
|
||||
break;
|
||||
}
|
||||
|
||||
logln("Ok 0x" + Integer.toHexString(control[i]) + " and 0x" +
|
||||
Integer.toHexString(noncontrol[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for printable characters
|
||||
*/
|
||||
public void TestPrint()
|
||||
{
|
||||
int printable[] = {0x0042, 0x005f, 0x2014};
|
||||
int nonprintable[] = {0x200c, 0x009f, 0x001b};
|
||||
|
||||
int size = printable.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isPrintable(printable[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(printable[i]) +
|
||||
" expected to be a printable character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isPrintable(nonprintable[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonprintable[i]) +
|
||||
" expected not to be a printable character");
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(printable[i]) + " and 0x" +
|
||||
Integer.toHexString(nonprintable[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing for identifier characters
|
||||
*/
|
||||
public void TestIdentifier()
|
||||
{
|
||||
int unicodeidstart[] = {0x0250, 0x00e2, 0x0061};
|
||||
int nonunicodeidstart[] = {0x2000, 0x000a, 0x2019};
|
||||
int unicodeidpart[] = {0x005f, 0x0032, 0x0045};
|
||||
int nonunicodeidpart[] = {0x2030, 0x00a3, 0x0020};
|
||||
int idignore[] = {0x070F, 0x180B, 0x180C};
|
||||
int nonidignore[] = {0x0075, 0x00a3, 0x0061};
|
||||
|
||||
int size = unicodeidstart.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
if (!UCharacter.isUnicodeIdentifierStart(unicodeidstart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(unicodeidstart[i]) +
|
||||
" expected to be a unicode identifier start character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isUnicodeIdentifierStart(nonunicodeidstart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonunicodeidstart[i]) +
|
||||
" expected not to be a unicode identifier start character");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isUnicodeIdentifierPart(unicodeidpart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(unicodeidpart[i]) +
|
||||
" expected to be a unicode identifier part character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isUnicodeIdentifierPart(nonunicodeidpart[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonunicodeidpart[i]) +
|
||||
" expected not to be a unicode identifier part character");
|
||||
break;
|
||||
}
|
||||
if (!UCharacter.isIdentifierIgnorable(idignore[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(idignore[i]) +
|
||||
" expected to be a ignorable unicode character");
|
||||
break;
|
||||
}
|
||||
if (UCharacter.isIdentifierIgnorable(nonidignore[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(nonidignore[i]) +
|
||||
" expected not to be a ignorable unicode character");
|
||||
break;
|
||||
}
|
||||
logln("Ok 0x" + Integer.toHexString(unicodeidstart[i]) + " and 0x" +
|
||||
Integer.toHexString(nonunicodeidstart[i]) + " and 0x" +
|
||||
Integer.toHexString(unicodeidpart[i]) + " and 0x" +
|
||||
Integer.toHexString(nonunicodeidpart[i]) + " and 0x" +
|
||||
Integer.toHexString(idignore[i]) + " and 0x" +
|
||||
Integer.toHexString(nonidignore[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests for the character types, direction
|
||||
*/
|
||||
public void TestCatDir()
|
||||
{
|
||||
// this is the 2 char category types used in the UnicodeData file
|
||||
final String TYPE =
|
||||
"LuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCfCoCsPdPsPePcPoSmScSkSoPiPf";
|
||||
|
||||
// directory types used in the UnicodeData file
|
||||
// padded by spaces to make each type size 4
|
||||
final String DIR =
|
||||
"L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN ";
|
||||
|
||||
StringBuffer file = new StringBuffer("UnicodeData-");
|
||||
file.append(UCharacter.getUnicodeVersion());
|
||||
file.append(".txt");
|
||||
String s;
|
||||
|
||||
final int LASTUNICODECHAR = 0xFFFD;
|
||||
int ch = 0,
|
||||
index = 0,
|
||||
type = 0,
|
||||
dir = 0;
|
||||
|
||||
try
|
||||
{
|
||||
// reading in the UnicodeData file
|
||||
FileReader fr = new FileReader(file.toString());
|
||||
BufferedReader input = new BufferedReader(fr);
|
||||
|
||||
while (ch != LASTUNICODECHAR)
|
||||
{
|
||||
s= input.readLine();
|
||||
|
||||
// geting the unicode character, its type and its direction
|
||||
ch = Integer.parseInt(s.substring(0, 4), 16);
|
||||
index = s.indexOf(';', 5);
|
||||
String t = s.substring(index + 1, index + 3);
|
||||
index = s.indexOf(';', index + 4);
|
||||
String d = s.substring(index + 1, s.indexOf(';', index + 1));
|
||||
|
||||
// testing the category
|
||||
// we override the general category of some control characters
|
||||
if (ch == 9 || ch == 0xb || ch == 0x1f)
|
||||
type = UCharacterCategoryEnum.SPACE_SEPARATOR;
|
||||
else
|
||||
if (ch == 0xc)
|
||||
type = UCharacterCategoryEnum.LINE_SEPARATOR;
|
||||
else
|
||||
if (ch == 0xa || ch == 0xd || ch == 0x1c || ch == 0x1d ||
|
||||
ch == 0x1e || ch == 0x85)
|
||||
type = UCharacterCategoryEnum.PARAGRAPH_SEPARATOR;
|
||||
else
|
||||
{
|
||||
type = TYPE.indexOf(t);
|
||||
if (type < 0)
|
||||
type = 0;
|
||||
else
|
||||
type = (type >> 1) + 1;
|
||||
}
|
||||
|
||||
if (UCharacter.getType(ch) != type)
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(ch) + " expected type " +
|
||||
type);
|
||||
break;
|
||||
}
|
||||
|
||||
// testing the direction
|
||||
if (d.length() == 1)
|
||||
d = d + " ";
|
||||
|
||||
dir = DIR.indexOf(d) >> 2;
|
||||
if (UCharacter.getDirection(ch) != dir)
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(ch) +
|
||||
" expected wrong direction " + dir);
|
||||
break;
|
||||
}
|
||||
}
|
||||
input.close();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
if (UCharacter.getDirection(0x10001) !=
|
||||
UCharacterDirectionEnum.LEFT_TO_RIGHT)
|
||||
errln("FAIL 0x10001 expected direction " +
|
||||
UCharacterDirectionEnum.toString(UCharacterDirectionEnum.LEFT_TO_RIGHT));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test for the character names
|
||||
*/
|
||||
public void TestNames()
|
||||
{
|
||||
int c[] = {0x0061, 0x0284, 0x3401, 0x7fed, 0xac00, 0xd7a3, 0xff08, 0xffe5};
|
||||
String name[] = {"LATIN SMALL LETTER A",
|
||||
"LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK",
|
||||
"CJK UNIFIED IDEOGRAPH-3401",
|
||||
"CJK UNIFIED IDEOGRAPH-7FED", "HANGUL SYLLABLE GA",
|
||||
"HANGUL SYLLABLE HIH", "FULLWIDTH LEFT PARENTHESIS",
|
||||
"FULLWIDTH YEN SIGN"};
|
||||
String oldname[] = {"", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "", "",
|
||||
"", "", "FULLWIDTH OPENING PARENTHESIS", ""};
|
||||
int size = c.length;
|
||||
String str;
|
||||
int uc;
|
||||
|
||||
for (int i = 0; i < size; i ++)
|
||||
{
|
||||
// modern Unicode character name
|
||||
str = UCharacter.getName(c[i]);
|
||||
if (!str.equalsIgnoreCase(name[i]))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(c[i]) + " expected name " +
|
||||
name[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
// 1.0 Unicode character name
|
||||
str = UCharacter.getName1_0(c[i]);
|
||||
if ((str == null && oldname[i].length() > 0) ||
|
||||
(str != null && !str.equalsIgnoreCase(oldname[i])))
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(c[i]) + " expected 1.0 name " +
|
||||
oldname[i]);
|
||||
break;
|
||||
}
|
||||
|
||||
// retrieving unicode character from modern name
|
||||
uc = UCharacter.getCharFromName(name[i]);
|
||||
if (uc != c[i])
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected character 0x" +
|
||||
Integer.toHexString(c[i]));
|
||||
break;
|
||||
}
|
||||
|
||||
//retrieving unicode character from 1.0 name
|
||||
uc = UCharacter.getCharFromName1_0(oldname[i]);
|
||||
if (uc != c[i] && i != 0 && (i == 1 || i == 6))
|
||||
{
|
||||
errln("FAIL " + name[i] + " expected 1.0 character " +
|
||||
Integer.toHexString(c[i]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// extra testing different from icu
|
||||
for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)
|
||||
{
|
||||
str = UCharacter.getName(i);
|
||||
if (str != null && UCharacter.getCharFromName(str) != i)
|
||||
{
|
||||
errln("FAIL 0x" + Integer.toHexString(i) + " " + str +
|
||||
" retrieval of name and vice versa" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing UTF16 class methods append, getCharCount and bounds
|
||||
*/
|
||||
public void TestUTF16AppendBoundCount()
|
||||
{
|
||||
StringBuffer str = new StringBuffer("this is a string ");
|
||||
int length;
|
||||
|
||||
for (int i = UCharacter.MIN_VALUE; i < UCharacter.MAX_VALUE; i ++)
|
||||
{
|
||||
length = str.length();
|
||||
UTF16.append(str, i);
|
||||
if (!UCharacter.isSupplementary(i))
|
||||
{
|
||||
if (UTF16.getCharCount(i) != 1)
|
||||
{
|
||||
errln("FAIL Counting BMP character size error" );
|
||||
break;
|
||||
}
|
||||
if (str.length() != length + 1)
|
||||
{
|
||||
errln("FAIL Adding a BMP character error" );
|
||||
break;
|
||||
}
|
||||
if (!UTF16.isSurrogate((char)i) &&
|
||||
UTF16.bounds(str.toString(), str.length() - 1) !=
|
||||
UTF16.SINGLE_CHAR_BOUNDARY)
|
||||
{
|
||||
errln("FAIL Finding BMP character bounds error" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (UTF16.getCharCount(i) != 2)
|
||||
{
|
||||
errln("FAIL Counting Supplementary character size error" );
|
||||
break;
|
||||
}
|
||||
if (str.length() != length + 2)
|
||||
{
|
||||
errln("FAIL Adding a Supplementary character error" );
|
||||
break;
|
||||
}
|
||||
length = str.length();
|
||||
if (UTF16.bounds(str.toString(), str.length() - 2) !=
|
||||
UTF16.LEAD_SURROGATE_BOUNDARY ||
|
||||
UTF16.bounds(str.toString(), str.length() - 1) !=
|
||||
UTF16.TRAIL_SURROGATE_BOUNDARY)
|
||||
{
|
||||
errln("FAIL Finding Supplementary character bounds error" );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing UTF16 class methods findCPOffset, findOffsetFromCP and charAt
|
||||
*/
|
||||
public void TestUTF16OffsetCharAt()
|
||||
{
|
||||
StringBuffer str = new StringBuffer("12345");
|
||||
UTF16.append(str, 0x10001);
|
||||
str.append("67890");
|
||||
UTF16.append(str, 0x10002);
|
||||
String s = str.toString();
|
||||
if (UTF16.charAt(s, 0) != '1' || UTF16.charAt(s, 2) != '3' ||
|
||||
UTF16.charAt(s, 5) != 0x10001 || UTF16.charAt(s, 6) != 0x10001 ||
|
||||
UTF16.charAt(s, 12) != 0x10002 || UTF16.charAt(s, 13) != 0x10002)
|
||||
errln("FAIL Getting character from string error" );
|
||||
|
||||
if (UTF16.findCPOffset(s, 3) != 3 || UTF16.findCPOffset(s, 5) != 5 ||
|
||||
UTF16.findCPOffset(s, 6) != 6)
|
||||
errln("FAIL Getting codepoint offset from string error" );
|
||||
if (UTF16.findOffsetFromCP(s, 3) != 3 ||
|
||||
UTF16.findOffsetFromCP(s, 5) != 5 ||
|
||||
UTF16.findOffsetFromCP(s, 6) != 7)
|
||||
errln("FAIL Getting UTF16 offset from codepoint in string error" );
|
||||
}
|
||||
|
||||
public static void main(String[] arg)
|
||||
{
|
||||
try
|
||||
{
|
||||
UCharacterTest test = new UCharacterTest();
|
||||
test.run(arg);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
1150
icu4j/src/com/ibm/icu/text/UCharacter.java
Executable file
1150
icu4j/src/com/ibm/icu/text/UCharacter.java
Executable file
File diff suppressed because it is too large
Load diff
248
icu4j/src/com/ibm/icu/text/UCharacterCategoryEnum.java
Executable file
248
icu4j/src/com/ibm/icu/text/UCharacterCategoryEnum.java
Executable file
|
@ -0,0 +1,248 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterCategoryEnum.java $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
/**
|
||||
* Enumerated Unicode category types from the UnicodeData.txt file.
|
||||
* Used as return results from <a href=UCharacter.html>UCharacter</a>
|
||||
* Equivalent to icu's UCharCategory.
|
||||
* Refer to <a href=http://www.unicode.org/Public/UNIDATA/UnicodeData.html>
|
||||
* Unicode Consortium</a> for more information about UnicodeData.txt.
|
||||
* @author Syn Wee Quek
|
||||
* @since oct0300
|
||||
*/
|
||||
|
||||
public final class UCharacterCategoryEnum
|
||||
{
|
||||
// private constructor ===================================================
|
||||
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
*/
|
||||
private UCharacterCategoryEnum()
|
||||
{
|
||||
}
|
||||
|
||||
// public variable =======================================================
|
||||
|
||||
/**
|
||||
* Unassigned character type
|
||||
*/
|
||||
public static final int UNASSIGNED = 0;
|
||||
/**
|
||||
* Character type Lu
|
||||
*/
|
||||
public static final int UPPERCASE_LETTER = UNASSIGNED + 1;
|
||||
/**
|
||||
* Character type Ll
|
||||
*/
|
||||
public static final int LOWERCASE_LETTER = UPPERCASE_LETTER + 1;
|
||||
/**
|
||||
* Character type Lt
|
||||
*/
|
||||
public static final int TITLECASE_LETTER = LOWERCASE_LETTER + 1;
|
||||
/**
|
||||
* Character type Lm
|
||||
*/
|
||||
public static final int MODIFIER_LETTER = TITLECASE_LETTER + 1;
|
||||
/**
|
||||
* Character type Lo
|
||||
*/
|
||||
public static final int OTHER_LETTER = MODIFIER_LETTER + 1;
|
||||
/**
|
||||
* Character type Lu
|
||||
*/
|
||||
public static final int NON_SPACING_MARK = OTHER_LETTER + 1;
|
||||
/**
|
||||
* Character type Me
|
||||
*/
|
||||
public static final int ENCLOSING_MARK = NON_SPACING_MARK + 1;
|
||||
/**
|
||||
* Character type Mc
|
||||
*/
|
||||
public static final int COMBINING_SPACING_MARK = ENCLOSING_MARK + 1;
|
||||
/**
|
||||
* Character type Nd
|
||||
*/
|
||||
public static final int DECIMAL_DIGIT_NUMBER = COMBINING_SPACING_MARK + 1;
|
||||
/**
|
||||
* Character type Nl
|
||||
*/
|
||||
public static final int LETTER_NUMBER = DECIMAL_DIGIT_NUMBER + 1;
|
||||
|
||||
// start of 11------------
|
||||
|
||||
/**
|
||||
* Character type No
|
||||
*/
|
||||
public static final int OTHER_NUMBER = LETTER_NUMBER + 1;
|
||||
/**
|
||||
* Character type Zs
|
||||
*/
|
||||
public static final int SPACE_SEPARATOR = OTHER_NUMBER + 1;
|
||||
/**
|
||||
* Character type Zl
|
||||
*/
|
||||
public static final int LINE_SEPARATOR = SPACE_SEPARATOR + 1;
|
||||
/**
|
||||
* Character type Zp
|
||||
*/
|
||||
public static final int PARAGRAPH_SEPARATOR = LINE_SEPARATOR + 1;
|
||||
/**
|
||||
* Character type Cc
|
||||
*/
|
||||
public static final int CONTROL = PARAGRAPH_SEPARATOR + 1;
|
||||
/**
|
||||
* Character type Cf
|
||||
*/
|
||||
public static final int FORMAT = CONTROL + 1;
|
||||
/**
|
||||
* Character type Co
|
||||
*/
|
||||
public static final int PRIVATE_USE = FORMAT + 1;
|
||||
/**
|
||||
* Character type Cs
|
||||
*/
|
||||
public static final int SURROGATE = PRIVATE_USE + 1;
|
||||
/**
|
||||
* Character type Pd
|
||||
*/
|
||||
public static final int DASH_PUNCTUATION = SURROGATE + 1;
|
||||
/**
|
||||
* Character type Ps
|
||||
*/
|
||||
public static final int START_PUNCTUATION = DASH_PUNCTUATION + 1;
|
||||
|
||||
// start of 21 ------------
|
||||
|
||||
/**
|
||||
* Character type Pe
|
||||
*/
|
||||
public static final int END_PUNCTUATION = START_PUNCTUATION + 1;
|
||||
/**
|
||||
* Character type Pc
|
||||
*/
|
||||
public static final int CONNECTOR_PUNCTUATION = END_PUNCTUATION + 1;
|
||||
/**
|
||||
* Character type Po
|
||||
*/
|
||||
public static final int OTHER_PUNCTUATION = CONNECTOR_PUNCTUATION + 1;
|
||||
/**
|
||||
* Character type Sm
|
||||
*/
|
||||
public static final int MATH_SYMBOL = OTHER_PUNCTUATION + 1;
|
||||
/**
|
||||
* Character type Sc
|
||||
*/
|
||||
public static final int CURRENCY_SYMBOL = MATH_SYMBOL + 1;
|
||||
/**
|
||||
* Character type Sk
|
||||
*/
|
||||
public static final int MODIFIER_SYMBOL = CURRENCY_SYMBOL + 1;
|
||||
/**
|
||||
* Character type So
|
||||
*/
|
||||
public static final int OTHER_SYMBOL = MODIFIER_SYMBOL + 1;
|
||||
/**
|
||||
* Character type Pi
|
||||
*/
|
||||
public static final int INITIAL_PUNCTUATION = OTHER_SYMBOL + 1;
|
||||
/**
|
||||
* Character type Pf
|
||||
*/
|
||||
public static final int FINAL_PUNCTUATION = INITIAL_PUNCTUATION + 1;
|
||||
/**
|
||||
* Character type Cn
|
||||
*/
|
||||
public static final int GENERAL_OTHER_TYPES = FINAL_PUNCTUATION + 1;
|
||||
|
||||
// start of 31 ------------
|
||||
|
||||
/**
|
||||
* Character type count
|
||||
*/
|
||||
public static final int CHAR_CATEGORY_COUNT = GENERAL_OTHER_TYPES + 1;
|
||||
|
||||
/**
|
||||
* Gets the name of the argument category
|
||||
* @param category to retrieve name
|
||||
* @return category name
|
||||
*/
|
||||
public static String toString(int category)
|
||||
{
|
||||
switch (category)
|
||||
{
|
||||
case UPPERCASE_LETTER :
|
||||
return "Letter, Uppercase";
|
||||
case LOWERCASE_LETTER :
|
||||
return "Letter, Lowercase";
|
||||
case TITLECASE_LETTER :
|
||||
return "Letter, Titlecase";
|
||||
case MODIFIER_LETTER :
|
||||
return "Letter, Modifier";
|
||||
case OTHER_LETTER :
|
||||
return "Letter, Other";
|
||||
case NON_SPACING_MARK :
|
||||
return "Mark, Non-Spacing";
|
||||
case ENCLOSING_MARK :
|
||||
return "Mark, Enclosing";
|
||||
case COMBINING_SPACING_MARK :
|
||||
return "Mark, Spacing Combining";
|
||||
case DECIMAL_DIGIT_NUMBER :
|
||||
return "Number, Decimal Digit";
|
||||
case LETTER_NUMBER :
|
||||
return "Number, Letter";
|
||||
case OTHER_NUMBER :
|
||||
return "Number, Other";
|
||||
case SPACE_SEPARATOR :
|
||||
return "Separator, Space";
|
||||
case LINE_SEPARATOR :
|
||||
return "Separator, Line";
|
||||
case PARAGRAPH_SEPARATOR :
|
||||
return "Separator, Paragraph";
|
||||
case CONTROL :
|
||||
return "Other, Control";
|
||||
case FORMAT :
|
||||
return "Other, Format";
|
||||
case PRIVATE_USE :
|
||||
return "Other, Private Use";
|
||||
case SURROGATE :
|
||||
return "Other, Surrogate";
|
||||
case DASH_PUNCTUATION :
|
||||
return "Punctuation, Dash";
|
||||
case START_PUNCTUATION :
|
||||
return "Punctuation, Open";
|
||||
case END_PUNCTUATION :
|
||||
return "Punctuation, Close";
|
||||
case CONNECTOR_PUNCTUATION :
|
||||
return "Punctuation, Connector";
|
||||
case OTHER_PUNCTUATION :
|
||||
return "Punctuation, Other";
|
||||
case MATH_SYMBOL :
|
||||
return "Symbol, Math";
|
||||
case CURRENCY_SYMBOL :
|
||||
return "Symbol, Currency";
|
||||
case MODIFIER_SYMBOL :
|
||||
return "Symbol, Modifier";
|
||||
case OTHER_SYMBOL :
|
||||
return "Symbol, Other";
|
||||
case INITIAL_PUNCTUATION :
|
||||
return "Punctuation, Initial quote ";
|
||||
case FINAL_PUNCTUATION :
|
||||
return "Punctuation, Final quote ";
|
||||
}
|
||||
return "Unassigned";
|
||||
}
|
||||
}
|
99
icu4j/src/com/ibm/icu/text/UCharacterDB.java
Executable file
99
icu4j/src/com/ibm/icu/text/UCharacterDB.java
Executable file
|
@ -0,0 +1,99 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/UCharacterDB.java,v $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
/**
|
||||
* Internal base class for all character databases.
|
||||
* Database classes store binary data read from uprops.dat and unames for use.
|
||||
* It does not have the capability to parse the data into more high-level
|
||||
* information. It only returns bytes of information when required.
|
||||
* Due to the form most commonly used for retrieval, array of char is used
|
||||
* to store the binary data
|
||||
* Responsibility for molding the binary data into more meaning form lies on
|
||||
* <a href=UCharacterPpty.html>UCharacterPpty</a> and
|
||||
* <a href=UCharacterName.html>UCharacterName</a>.
|
||||
* Data populated by <a href=UGenReader.html>UGenReader</a>
|
||||
* @author Syn Wee Quek
|
||||
* @since oct3100 HALLOWEEN!!
|
||||
* @see com.ibm.icu.text.UCharacterPpty
|
||||
* @see com.ibm.icu.text.UCharacterName
|
||||
*/
|
||||
|
||||
class UCharacterDB
|
||||
{
|
||||
// protected variable ===========================================
|
||||
|
||||
/**
|
||||
* Unicode data version
|
||||
*/
|
||||
String m_unicodeversion_;
|
||||
|
||||
// constructor =============================================
|
||||
|
||||
/**
|
||||
* Constructor for UCharacterDB
|
||||
*/
|
||||
protected UCharacterDB()
|
||||
{
|
||||
}
|
||||
|
||||
// public method =============================================
|
||||
|
||||
/**
|
||||
* toString method for printing
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer result = new StringBuffer();
|
||||
/*for (int i = 0; i < size; i ++)
|
||||
{
|
||||
result.append(" ");
|
||||
result.append(0x0000FFFF & m_db_[i]);
|
||||
}
|
||||
|
||||
result.append('\n');
|
||||
*/
|
||||
result.append("\nunicode version number ");
|
||||
result.append(m_unicodeversion_);
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
// protected method =============================================
|
||||
|
||||
/**
|
||||
* set version number for this set of unicode characters
|
||||
* @param version
|
||||
* @return false if version is not a valid number
|
||||
*/
|
||||
protected boolean setUnicodeVersion(byte[] version)
|
||||
{
|
||||
int size = 0;
|
||||
if (version != null)
|
||||
size = version.length;
|
||||
boolean result = false;
|
||||
StringBuffer s = new StringBuffer(size);
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
s.append((int)version[i]);
|
||||
s.append('.');
|
||||
if (version[i] < 0 || version[i] > 9)
|
||||
return false;
|
||||
if (version[i] != 0)
|
||||
result = true;
|
||||
}
|
||||
if (result)
|
||||
m_unicodeversion_ = s.substring(0, (size << 1) - 1);
|
||||
return true;
|
||||
}
|
||||
}
|
182
icu4j/src/com/ibm/icu/text/UCharacterDirectionEnum.java
Executable file
182
icu4j/src/com/ibm/icu/text/UCharacterDirectionEnum.java
Executable file
|
@ -0,0 +1,182 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterDirectionEnum.java $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
/**
|
||||
* Enumerated Unicode character linguistic direction constants.
|
||||
* Used as return results from <a href=UCharacter.html>UCharacter</a>
|
||||
* @author Syn Wee Quek
|
||||
* @since oct0300
|
||||
*/
|
||||
|
||||
public final class UCharacterDirectionEnum
|
||||
{
|
||||
// private constructor =========================================
|
||||
|
||||
/**
|
||||
* Private constructor to prevent initialisation
|
||||
*/
|
||||
private UCharacterDirectionEnum()
|
||||
{
|
||||
}
|
||||
|
||||
// public variable =============================================
|
||||
|
||||
/**
|
||||
* Directional type L
|
||||
*/
|
||||
public static final int LEFT_TO_RIGHT = 0;
|
||||
/**
|
||||
* Directional type R
|
||||
*/
|
||||
public static final int RIGHT_TO_LEFT = LEFT_TO_RIGHT + 1;
|
||||
/**
|
||||
* Directional type EN
|
||||
*/
|
||||
public static final int EUROPEAN_NUMBER = RIGHT_TO_LEFT + 1;
|
||||
/**
|
||||
* Directional type ES
|
||||
*/
|
||||
public static final int EUROPEAN_NUMBER_SEPARATOR = EUROPEAN_NUMBER + 1;
|
||||
/**
|
||||
* Directional type ET
|
||||
*/
|
||||
public static final int EUROPEAN_NUMBER_TERMINATOR =
|
||||
EUROPEAN_NUMBER_SEPARATOR + 1;
|
||||
/**
|
||||
* Directional type AN
|
||||
*/
|
||||
public static final int ARABIC_NUMBER =
|
||||
EUROPEAN_NUMBER_TERMINATOR + 1;
|
||||
/**
|
||||
* Directional type CS
|
||||
*/
|
||||
public static final int COMMON_NUMBER_SEPARATOR = ARABIC_NUMBER + 1;
|
||||
/**
|
||||
* Directional type B
|
||||
*/
|
||||
public static final int BLOCK_SEPARATOR =
|
||||
COMMON_NUMBER_SEPARATOR + 1;
|
||||
/**
|
||||
* Directional type S
|
||||
*/
|
||||
public static final int SEGMENT_SEPARATOR = BLOCK_SEPARATOR + 1;
|
||||
/**
|
||||
* Directional type WS
|
||||
*/
|
||||
public static final int WHITE_SPACE_NEUTRAL = SEGMENT_SEPARATOR + 1;
|
||||
|
||||
// start of 11 ---------------
|
||||
|
||||
/**
|
||||
* Directional type ON
|
||||
*/
|
||||
public static final int OTHER_NEUTRAL =
|
||||
WHITE_SPACE_NEUTRAL + 1;
|
||||
/**
|
||||
* Directional type LRE
|
||||
*/
|
||||
public static final int LEFT_TO_RIGHT_EMBEDDING = OTHER_NEUTRAL + 1;
|
||||
/**
|
||||
* Directional type LRO
|
||||
*/
|
||||
public static final int LEFT_TO_RIGHT_OVERRIDE =
|
||||
LEFT_TO_RIGHT_EMBEDDING + 1;
|
||||
/**
|
||||
* Directional type AL
|
||||
*/
|
||||
public static final int RIGHT_TO_LEFT_ARABIC =
|
||||
LEFT_TO_RIGHT_OVERRIDE + 1;
|
||||
/**
|
||||
* Directional type RLE
|
||||
*/
|
||||
public static final int RIGHT_TO_LEFT_EMBEDDING =
|
||||
RIGHT_TO_LEFT_ARABIC + 1;
|
||||
/**
|
||||
* Directional type RLO
|
||||
*/
|
||||
public static final int RIGHT_TO_LEFT_OVERRIDE =
|
||||
RIGHT_TO_LEFT_EMBEDDING + 1;
|
||||
/**
|
||||
* Directional type PDF
|
||||
*/
|
||||
public static final int POP_DIRECTIONAL_FORMAT =
|
||||
RIGHT_TO_LEFT_OVERRIDE + 1;
|
||||
/**
|
||||
* Directional type NSM
|
||||
*/
|
||||
public static final int DIR_NON_SPACING_MARK =
|
||||
POP_DIRECTIONAL_FORMAT + 1;
|
||||
/**
|
||||
* Directional type BN
|
||||
*/
|
||||
public static final int BOUNDARY_NEUTRAL =
|
||||
DIR_NON_SPACING_MARK + 1;
|
||||
/**
|
||||
* Number of directional type
|
||||
*/
|
||||
public static final int CHAR_DIRECTION_COUNT = BOUNDARY_NEUTRAL + 1;
|
||||
|
||||
/**
|
||||
* Gets the name of the argument direction
|
||||
* @param dir direction type to retrieve name
|
||||
* @return directional name
|
||||
*/
|
||||
public static String toString(int dir)
|
||||
{
|
||||
switch(dir)
|
||||
{
|
||||
case LEFT_TO_RIGHT :
|
||||
return "Left-to-Right";
|
||||
case RIGHT_TO_LEFT :
|
||||
return "Right-to-Left";
|
||||
case EUROPEAN_NUMBER :
|
||||
return "European Number";
|
||||
case EUROPEAN_NUMBER_SEPARATOR :
|
||||
return "European Number Separator";
|
||||
case EUROPEAN_NUMBER_TERMINATOR :
|
||||
return "European Number Terminator";
|
||||
case ARABIC_NUMBER :
|
||||
return "Arabic Number";
|
||||
case COMMON_NUMBER_SEPARATOR :
|
||||
return "Common Number Separator";
|
||||
case BLOCK_SEPARATOR :
|
||||
return "Paragraph Separator";
|
||||
case SEGMENT_SEPARATOR :
|
||||
return "Segment Separator";
|
||||
case WHITE_SPACE_NEUTRAL :
|
||||
return "Whitespace";
|
||||
case OTHER_NEUTRAL :
|
||||
return "Other Neutrals";
|
||||
case LEFT_TO_RIGHT_EMBEDDING :
|
||||
return "Left-to-Right Embedding";
|
||||
case LEFT_TO_RIGHT_OVERRIDE :
|
||||
return "Left-to-Right Override";
|
||||
case RIGHT_TO_LEFT_ARABIC :
|
||||
return "Right-to-Left Arabic";
|
||||
case RIGHT_TO_LEFT_EMBEDDING :
|
||||
return "Right-to-Left Embedding";
|
||||
case RIGHT_TO_LEFT_OVERRIDE :
|
||||
return "Right-to-Left Override";
|
||||
case POP_DIRECTIONAL_FORMAT :
|
||||
return "Pop Directional Format";
|
||||
case DIR_NON_SPACING_MARK :
|
||||
return "Non-Spacing Mark";
|
||||
case BOUNDARY_NEUTRAL :
|
||||
return "Boundary Neutral";
|
||||
}
|
||||
return "Unassigned";
|
||||
}
|
||||
}
|
179
icu4j/src/com/ibm/icu/text/UCharacterName.java
Executable file
179
icu4j/src/com/ibm/icu/text/UCharacterName.java
Executable file
|
@ -0,0 +1,179 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterName.java $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
/**
|
||||
* Internal class to manage character names.
|
||||
* <a href=UCharacterNameDB.html>UCharacterNameDB</a> provides the data
|
||||
* required and UCharacterName parses it into meaningful results before
|
||||
* returning value.
|
||||
* Since data in <a href=UCharacterNameDB.html>UCharacterNameDB</a> is stored
|
||||
* in an array of char, by default indexes used in this class is refering to
|
||||
* a 2 byte count, unless otherwise stated. Cases where the index is refering
|
||||
* to a byte count, the index is halved and depending on whether the index is
|
||||
* even or odd, the MSB or LSB of the result char at the halved index is
|
||||
* returned. For indexes to an array of int, the index is multiplied by 2,
|
||||
* result char at the multiplied index and its following char is returned as an
|
||||
* int.
|
||||
* <a href=UCharacter.html>UCharacter</a> acts as a public facade for this class
|
||||
* Note : 0 - 0x1F are control characters without names in Unicode 3.0
|
||||
* For information on parsing of the binary data in
|
||||
* <a href=UCharacterNameDB.html>UCharacterNameDB</a> is located at
|
||||
* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
|
||||
* ReadMe</a>
|
||||
* @author Syn Wee Quek
|
||||
* @since nov0700
|
||||
*/
|
||||
|
||||
final class UCharacterName
|
||||
{
|
||||
// private variable =============================================
|
||||
|
||||
/**
|
||||
* Database storing the sets of character name
|
||||
*/
|
||||
private static final UCharacterNameDB NAME_DB_;
|
||||
|
||||
// block to initialise name database and unicode 1.0 data indicator
|
||||
static
|
||||
{
|
||||
try
|
||||
{
|
||||
NAME_DB_ = new UCharacterNameDB();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new RuntimeException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// protected method =============================================
|
||||
|
||||
/**
|
||||
* Retrieve the name of a Unicode code point.
|
||||
* Depending on <code>choice</code>, the character name written into the
|
||||
* buffer is the "modern" name or the name that was defined in Unicode
|
||||
* version 1.0.
|
||||
* The name contains only "invariant" characters
|
||||
* like A-Z, 0-9, space, and '-'.
|
||||
*
|
||||
* @param ch the code point for which to get the name.
|
||||
* @param choice Selector for which name to get.
|
||||
* @return if code point is above 0x1fff, null is returned
|
||||
*/
|
||||
protected static String getName(int ch, int choice)
|
||||
{
|
||||
if (ch < 0 || ch > 0x1ffff ||
|
||||
choice >= UCharacterNameChoiceEnum.U_CHAR_NAME_CHOICE_COUNT)
|
||||
return null;
|
||||
|
||||
String result = "";
|
||||
|
||||
// Do not write algorithmic Unicode 1.0 names because Unihan names are
|
||||
// the same as the modern ones, extension A was only introduced with
|
||||
// Unicode 3.0, and the Hangul syllable block was moved and changed around
|
||||
// Unicode 1.1.5.
|
||||
if (choice == UCharacterNameChoiceEnum.U_UNICODE_CHAR_NAME)
|
||||
// try getting algorithmic name first
|
||||
result = getAlgName(ch);
|
||||
|
||||
// getting normal character name
|
||||
if (result == null || result.length() == 0)
|
||||
result = NAME_DB_.getGroupName(ch, choice);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a character by its name and return its code point value
|
||||
* @param character name
|
||||
* @param choice selector to indicate if argument name is a Unicode 1.0
|
||||
* or the most current version
|
||||
* @return code point
|
||||
*/
|
||||
protected static int getCharFromName(int choice, String name)
|
||||
{
|
||||
// checks for illegal arguments
|
||||
if (choice >= UCharacterNameChoiceEnum.U_CHAR_NAME_CHOICE_COUNT ||
|
||||
name == null || name.length() == 0)
|
||||
return -1;
|
||||
|
||||
// try algorithmic names first, if fails then try group names
|
||||
int result = getAlgorithmChar(choice, name);
|
||||
if (result >= 0)
|
||||
return result;
|
||||
return getGroupChar(name, choice);
|
||||
}
|
||||
|
||||
// private method =============================================
|
||||
|
||||
/**
|
||||
* Gets the algorithmic name for the argument character
|
||||
* @param ch character to determine name for
|
||||
* @return the algorithmic name or null if not found
|
||||
*/
|
||||
private static String getAlgName(int ch)
|
||||
{
|
||||
// index in terms integer index
|
||||
StringBuffer s = new StringBuffer();
|
||||
|
||||
int index = NAME_DB_.getAlgorithmIndex(ch);
|
||||
if (index >= 0)
|
||||
{
|
||||
NAME_DB_.appendAlgorithmName(index, ch, s);
|
||||
return s.toString();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character for the argument algorithmic name
|
||||
* @param choice of either 1.0 or the most current unicode name
|
||||
* @return the algorithmic char or -1 otherwise.
|
||||
*/
|
||||
private static int getAlgorithmChar(int choice, String name)
|
||||
{
|
||||
// 1.0 has no algorithmic names
|
||||
if (choice != UCharacterNameChoiceEnum.U_UNICODE_CHAR_NAME)
|
||||
return -1;
|
||||
int result;
|
||||
for (int count = NAME_DB_.countAlgorithm() - 1; count >= 0; count --)
|
||||
{
|
||||
result = NAME_DB_.getAlgorithmChar(count, name);
|
||||
if (result >= 0)
|
||||
return result;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the character with the tokenized argument name
|
||||
* @param name of the character
|
||||
* @return character with the tokenized argument name or -1 if character is
|
||||
* not found
|
||||
*/
|
||||
private static int getGroupChar(String name, int choice)
|
||||
{
|
||||
int groupcount = NAME_DB_.countGroup();
|
||||
int result = 0;
|
||||
|
||||
for (int i = 0; i < groupcount; i ++)
|
||||
{
|
||||
result = NAME_DB_.getGroupChar(i, name, choice);
|
||||
if (result != -1)
|
||||
return result;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
34
icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java
Executable file
34
icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java
Executable file
|
@ -0,0 +1,34 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterNameChoiceEnum.java $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
/**
|
||||
* Internal class containing selector constants for the unicode character names.
|
||||
* Constants representing the "modern" name of a Unicode character or the name
|
||||
* that was defined in Unicode version 1.0, before the Unicode standard
|
||||
* merged with ISO-10646.
|
||||
* Arguments for <a href=UCharacterName.html>UCharacterName</a>
|
||||
* @author Syn Wee Quek
|
||||
* @since oct0600
|
||||
*/
|
||||
|
||||
interface UCharacterNameChoiceEnum
|
||||
{
|
||||
// public variables =============================================
|
||||
|
||||
static final int U_UNICODE_CHAR_NAME = 0;
|
||||
static final int U_UNICODE_10_CHAR_NAME = U_UNICODE_CHAR_NAME + 1;
|
||||
static final int U_CHAR_NAME_CHOICE_COUNT = U_UNICODE_10_CHAR_NAME + 1;
|
||||
}
|
877
icu4j/src/com/ibm/icu/text/UCharacterNameDB.java
Executable file
877
icu4j/src/com/ibm/icu/text/UCharacterNameDB.java
Executable file
|
@ -0,0 +1,877 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/UCharacterNameDB.java,v $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.BufferedInputStream;
|
||||
|
||||
/**
|
||||
* Internal class used for Unicode character name database.
|
||||
* Database classes store binary data read from uprops.dat and unames for use.
|
||||
* It does not have the capability to parse the data into more high-level
|
||||
* information. It only returns bytes of information when required.
|
||||
* Due to the form most commonly used for retrieval, array of char is used
|
||||
* to store the binary data
|
||||
* UCharacterNameDB also contains indexes to significant points in the binary
|
||||
* data.
|
||||
* Responsibility for molding the binary data into more meaning form lies on
|
||||
* <a href=UCharacterPpty.html>UCharacterPpty</a> and
|
||||
* <a href=UCharacterName.html>UCharacterName</a>.
|
||||
* Data populated by <a href=UGenNameReader.html>UGenNameReader</a>
|
||||
* @author Syn Wee Quek
|
||||
* @since oct2700
|
||||
* @see com.ibm.icu.text.UGenReader
|
||||
*/
|
||||
|
||||
final class UCharacterNameDB extends UCharacterDB
|
||||
{
|
||||
// private variable =============================================
|
||||
|
||||
/**
|
||||
* Data used in unames.dat
|
||||
*/
|
||||
private char m_tokentable_[];
|
||||
private byte m_tokenstring_[];
|
||||
private char m_groupinfo_[];
|
||||
private byte m_groupstring_[];
|
||||
private AlgorithmName m_algorithm_[];
|
||||
|
||||
/**
|
||||
* Number of group sets
|
||||
*/
|
||||
private int m_groupcount_ = 0;
|
||||
private int m_groupsize_ = 0;
|
||||
|
||||
/**
|
||||
* Default name of the name datafile
|
||||
*/
|
||||
private static final String NAME_FILE_NAME_ = "unames.dat";
|
||||
|
||||
/**
|
||||
* Default buffer size of datafile
|
||||
*/
|
||||
private static final int NAME_BUFFER_SIZE_ = 100000;
|
||||
|
||||
/**
|
||||
* Shift count to retrieve group information
|
||||
*/
|
||||
private static final int GROUP_SHIFT_ = 5;
|
||||
|
||||
/**
|
||||
* Number of lines per group
|
||||
*/
|
||||
private static final int LINES_PER_GROUP_ = 1 << GROUP_SHIFT_;
|
||||
|
||||
/**
|
||||
* Mask to retrieve the offset for a particular character within a group
|
||||
*/
|
||||
private static final int GROUP_MASK_ = LINES_PER_GROUP_ - 1;
|
||||
|
||||
/**
|
||||
* Position of offsethigh in group information array
|
||||
*/
|
||||
private static final int OFFSET_HIGH_OFFSET_ = 1;
|
||||
|
||||
/**
|
||||
* Position of offsetlow in group information array
|
||||
*/
|
||||
private static final int OFFSET_LOW_OFFSET_ = 2;
|
||||
|
||||
/**
|
||||
* Indicator of if Unicode 1.0 names are available
|
||||
*/
|
||||
private static boolean UNICODE_1_;
|
||||
|
||||
/**
|
||||
* Double nibble indicator, any nibble > this number has to be combined
|
||||
* with its following nibble
|
||||
*/
|
||||
private static final int SINGLE_NIBBLE_MAX_ = 11;
|
||||
|
||||
// constructor ====================================================
|
||||
|
||||
/**
|
||||
* protected constructor
|
||||
* @exception thrown when data reading fails or when data has been corrupted
|
||||
*/
|
||||
protected UCharacterNameDB() throws Exception
|
||||
{
|
||||
UGenNameReader reader = new UGenNameReader();
|
||||
InputStream i = getClass().getResourceAsStream(NAME_FILE_NAME_);
|
||||
BufferedInputStream b = new BufferedInputStream(i, NAME_BUFFER_SIZE_);
|
||||
DataInputStream d = new DataInputStream(b);
|
||||
if (!reader.read(d, this))
|
||||
throw new Exception("Data corrupted in " + NAME_FILE_NAME_);
|
||||
d.close();
|
||||
UNICODE_1_ = (';' >= m_tokentable_.length) ||
|
||||
(m_tokentable_[(int)';'] == 0xFFFF);
|
||||
}
|
||||
|
||||
// public method ==================================================
|
||||
|
||||
/**
|
||||
* toString method for printing
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer result = new StringBuffer("names content \n");
|
||||
/*result.append(super.toString());
|
||||
result.append('\n');
|
||||
result.append("token string offset ");
|
||||
result.append(m_tokenstringoffset_);
|
||||
result.append("\n");
|
||||
result.append("group offset ");
|
||||
result.append(m_groupsoffset_);
|
||||
result.append("\n");
|
||||
result.append("group string offset ");
|
||||
result.append(m_groupstringoffset_);
|
||||
result.append("\n");
|
||||
result.append("alg names offset ");
|
||||
result.append(m_algnamesoffset_);
|
||||
result.append("\n");
|
||||
*/
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
// protected methods ===============================================
|
||||
|
||||
/**
|
||||
* Sets the token data
|
||||
* @param token array of tokens
|
||||
* @param tokenstring array of string values of the tokens
|
||||
* @return false if there is a data error
|
||||
*/
|
||||
protected boolean setToken(char token[], byte tokenstring[])
|
||||
{
|
||||
if (token != null && tokenstring != null && token.length > 0 &&
|
||||
tokenstring.length > 0)
|
||||
{
|
||||
m_tokentable_ = token;
|
||||
m_tokenstring_ = tokenstring;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of group and size of each group in number of char
|
||||
* @param count number of groups
|
||||
* @param size size of group in char
|
||||
* @return true if group size is set correctly
|
||||
*/
|
||||
protected boolean setGroupCountSize(int count, int size)
|
||||
{
|
||||
if (count <= 0 || size <= 0)
|
||||
return false;
|
||||
m_groupcount_ = count;
|
||||
m_groupsize_ = size;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the group name data
|
||||
* @param group index information array
|
||||
* @param groupstring name information array
|
||||
* @return false if there is a data error
|
||||
*/
|
||||
protected boolean setGroup(char group[], byte groupstring[])
|
||||
{
|
||||
if (group != null && groupstring != null && group.length > 0 &&
|
||||
groupstring.length > 0)
|
||||
{
|
||||
m_groupinfo_ = group;
|
||||
m_groupstring_ = groupstring;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Binary search for the group strings set that contains the argument Unicode
|
||||
* code point's most significant bits.
|
||||
* The return value is always a valid group string set that contain msb.
|
||||
* If group string set is not found, -1 is returned
|
||||
* @param ch the code point to look for
|
||||
* @return group string set index in datatable otherwise -1 is returned if
|
||||
* group string set is not found
|
||||
*/
|
||||
protected int getGroupStringIndex(int ch)
|
||||
{
|
||||
// gets the msb
|
||||
int msb = ch >> GROUP_SHIFT_,
|
||||
end = m_groupcount_,
|
||||
start,
|
||||
gindex = 0;
|
||||
|
||||
// binary search for the group of names that contains the one for code
|
||||
for (start = 0; start < end - 1;)
|
||||
{
|
||||
gindex = (start + end) >> 1;
|
||||
if (msb < getGroupMSB(gindex))
|
||||
end = gindex;
|
||||
else
|
||||
start = gindex;
|
||||
}
|
||||
|
||||
// return this if it is an exact match
|
||||
if (msb == getGroupMSB(start))
|
||||
{
|
||||
start = start * m_groupsize_;
|
||||
return UCharacterUtil.toInt(m_groupinfo_[start + OFFSET_HIGH_OFFSET_],
|
||||
m_groupinfo_[start + OFFSET_LOW_OFFSET_]);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of the group information object
|
||||
* @return number of group information object
|
||||
*/
|
||||
protected int countGroup()
|
||||
{
|
||||
return m_groupcount_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the group name of the character
|
||||
* @param ch character to get the group name
|
||||
* @param choice name choice selector to choose a unicode 1.0 or newer name
|
||||
*/
|
||||
protected String getGroupName(int ch, int choice)
|
||||
{
|
||||
if (choice != UCharacterNameChoiceEnum.U_UNICODE_CHAR_NAME && !UNICODE_1_)
|
||||
// if not modern name requested and semicolon byte value is a character,
|
||||
// not a token number, otherwise since only modern names are stored in
|
||||
// unames.dat and there is no such requested Unicode 1.0 name here
|
||||
return null;
|
||||
|
||||
// gets the msb
|
||||
int msb = ch >> GROUP_SHIFT_,
|
||||
end = m_groupcount_,
|
||||
start,
|
||||
gindex = 0;
|
||||
|
||||
// binary search for the group of names that contains the one for code
|
||||
for (start = 0; start < end - 1;)
|
||||
{
|
||||
gindex = (start + end) >> 1;
|
||||
if (msb < getGroupMSB(gindex))
|
||||
end = gindex;
|
||||
else
|
||||
start = gindex;
|
||||
}
|
||||
|
||||
// return this if it is an exact match
|
||||
if (msb == getGroupMSB(start))
|
||||
{
|
||||
char offsets[] = new char[LINES_PER_GROUP_ + 1];
|
||||
char lengths[] = new char[LINES_PER_GROUP_ + 1];
|
||||
|
||||
int index = getGroupLengths(start, offsets, lengths);
|
||||
int offset = ch & GROUP_MASK_;
|
||||
return getGroupName(index + offsets[offset], lengths[offset], choice);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the character with the tokenized argument name
|
||||
* @param index of the group to check
|
||||
* @param name of the character
|
||||
* @param choice of Unicode version used
|
||||
* @return character with the tokenized argument name or -1 if character is
|
||||
* not found
|
||||
*/
|
||||
protected int getGroupChar(int index, String name, int choice)
|
||||
{
|
||||
if (choice != UCharacterNameChoiceEnum.U_UNICODE_CHAR_NAME &&
|
||||
!UNICODE_1_)
|
||||
// semicolon byte value is a token number , therefore only modern
|
||||
// names are stored in unames.dat and there is no such requested
|
||||
// Unicode 1.0 name here
|
||||
return -1;
|
||||
|
||||
// populating the data set of grouptable
|
||||
char offsets[] = new char[LINES_PER_GROUP_ + 1];
|
||||
char lengths[] = new char[LINES_PER_GROUP_ + 1];
|
||||
int startgpstrindex = getGroupLengths(index, offsets, lengths);
|
||||
|
||||
// shift out to function
|
||||
int result = getGroupChar(startgpstrindex, lengths, name, choice);
|
||||
if (result != -1)
|
||||
return (getGroupMSB(index) << GROUP_SHIFT_) | result;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the algorithm name information array
|
||||
* @param algorithm information array
|
||||
* @return true if the group string offset has been set correctly
|
||||
*/
|
||||
protected boolean setAlgorithm(AlgorithmName alg[])
|
||||
{
|
||||
if (alg != null && alg.length != 0)
|
||||
{
|
||||
m_algorithm_ = alg;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of algorithm name groups
|
||||
* @return number of algorithm name groups
|
||||
*/
|
||||
protected int countAlgorithm()
|
||||
{
|
||||
if (m_algorithm_ == null)
|
||||
return 0;
|
||||
return m_algorithm_.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the index of the Algorithm object the argument code point lies
|
||||
* @param ch code point
|
||||
* @return index of the Algorithm object the argument code point lies,
|
||||
* otherwise -1 if code point is not found in Algorithm objects
|
||||
*/
|
||||
protected int getAlgorithmIndex(int ch)
|
||||
{
|
||||
for (int index = m_algorithm_.length - 1; index >= 0; index --)
|
||||
if (m_algorithm_[index].contains(ch))
|
||||
return index;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends algorithm name of code point into StringBuffer.
|
||||
* Note this method does not check for validity of code point in Algorithm,
|
||||
* result is undefined if code point does not belong in Algorithm.
|
||||
* @param index of Algorithm object in array
|
||||
* @param ch code point
|
||||
* @param str StringBuffer to append to
|
||||
*/
|
||||
protected void appendAlgorithmName(int index, int ch, StringBuffer str)
|
||||
{
|
||||
m_algorithm_[index].appendName(ch, str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get algorithm code point for the argument name at index. If name is not
|
||||
* found in algorithm, -1 is returned.
|
||||
* @param index algorithm index
|
||||
* @param name code point name
|
||||
* @param code point in algorithm that matches name, -1 otherwise
|
||||
*/
|
||||
protected int getAlgorithmChar(int index, String name)
|
||||
{
|
||||
return m_algorithm_[index].getAlgorithmChar(name);
|
||||
}
|
||||
|
||||
// private methods =================================================
|
||||
|
||||
/**
|
||||
* Gets the most significant bits representation in the argument group
|
||||
* @param index the indexth group in datatable
|
||||
* @return most significant bits representation of group
|
||||
*/
|
||||
private char getGroupMSB(int index)
|
||||
{
|
||||
return m_groupinfo_[index * m_groupsize_];
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a block of compressed lengths of 32 strings and expands them into
|
||||
* offsets and lengths for each string. Lengths are stored with a
|
||||
* variable-width encoding in consecutive nibbles:
|
||||
* If a nibble<0xc, then it is the length itself (0 = empty string).
|
||||
* If a nibble>=0xc, then it forms a length value with the following nibble.
|
||||
* The offsets and lengths arrays must be at least 33 (one more) long because
|
||||
* there is no check here at the end if the last nibble is still used.
|
||||
* @param index of group string object in array
|
||||
* @param offsets array to store the value of the string offsets
|
||||
* @param lengths array to store the value of the string length
|
||||
* @return next index of the data string immediately after the lengths
|
||||
* in terms of byte address
|
||||
*/
|
||||
private int getGroupLengths(int index, char offsets[], char lengths[])
|
||||
{
|
||||
char length = 0xffff;
|
||||
byte b = 0,
|
||||
n = 0;
|
||||
int shift;
|
||||
index = index * m_groupsize_; // byte count offsets of group strings
|
||||
int stringoffset = UCharacterUtil.toInt(
|
||||
m_groupinfo_[index + OFFSET_HIGH_OFFSET_],
|
||||
m_groupinfo_[index + OFFSET_LOW_OFFSET_]);
|
||||
|
||||
offsets[0] = 0;
|
||||
|
||||
// all 32 lengths must be read to get the offset of the first group string
|
||||
for (int i = 0; i < LINES_PER_GROUP_; stringoffset ++)
|
||||
{
|
||||
b = m_groupstring_[stringoffset];
|
||||
shift = 4;
|
||||
|
||||
while (shift >= 0)
|
||||
{
|
||||
// getting nibble
|
||||
n = (byte)((b >> shift) & 0x0F);
|
||||
if (length == 0xffff && n > SINGLE_NIBBLE_MAX_)
|
||||
length = (char)((n - 12) << 4);
|
||||
else
|
||||
{
|
||||
if (length != 0xffff)
|
||||
lengths[i] = (char)((length | n) + 12);
|
||||
else
|
||||
lengths[i] = (char)n;
|
||||
|
||||
if (i < LINES_PER_GROUP_)
|
||||
offsets[i + 1] = (char)(offsets[i] + lengths[i]);
|
||||
|
||||
length = 0xffff;
|
||||
i ++;
|
||||
}
|
||||
|
||||
shift -= 4;
|
||||
}
|
||||
}
|
||||
return stringoffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of the argument group index
|
||||
* @param index of the group name string in byte count
|
||||
* @param length of the group name string
|
||||
* @param choice of Unicode 1.0 name or the most current name
|
||||
* @return name of the group
|
||||
*/
|
||||
private String getGroupName(int index, int length, int choice)
|
||||
{
|
||||
if (choice != UCharacterNameChoiceEnum.U_UNICODE_CHAR_NAME)
|
||||
{
|
||||
int oldindex = index;
|
||||
index += UCharacterUtil.skipByteSubString(m_groupstring_, index, length,
|
||||
(byte)';');
|
||||
length -= (index - oldindex);
|
||||
}
|
||||
|
||||
StringBuffer s = new StringBuffer();
|
||||
byte b;
|
||||
char token;
|
||||
for (int i = 0; i < length;)
|
||||
{
|
||||
b = m_groupstring_[index + i];
|
||||
i ++;
|
||||
|
||||
if (b >= m_tokentable_.length)
|
||||
{
|
||||
if (b == ';')
|
||||
break;
|
||||
s.append(b); // implicit letter
|
||||
}
|
||||
else
|
||||
{
|
||||
token = m_tokentable_[b & 0x00ff];
|
||||
if (token == 0xFFFE)
|
||||
{
|
||||
// this is a lead byte for a double-byte token
|
||||
token = m_tokentable_[b << 8 | (m_groupstring_[index + i] & 0x00ff)];
|
||||
i ++;
|
||||
}
|
||||
if (token == 0xFFFF)
|
||||
{
|
||||
if (b == ';')
|
||||
break;
|
||||
s.append((char)(b & 0x00ff)); // explicit letter
|
||||
}
|
||||
else // write token word
|
||||
UCharacterUtil.getNullTermByteSubString(s, m_tokenstring_, token);
|
||||
}
|
||||
}
|
||||
|
||||
if (s.length() == 0)
|
||||
return null;
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares and retrieve character if name is found within the argument
|
||||
* group
|
||||
* @param index index where the set of names reside in the group block
|
||||
* @param length list of lengths of the strings
|
||||
* @param name character name to search for
|
||||
* @param choice of either 1.0 or the most current unicode name
|
||||
* @return relative character in the group which matches name, otherwise if
|
||||
* not found, -1 will be returned
|
||||
*/
|
||||
private int getGroupChar(int index, char length[], String name, int choice)
|
||||
{
|
||||
byte b = 0;
|
||||
char token;
|
||||
int len;
|
||||
int namelen = name.length();
|
||||
int nindex;
|
||||
int count;
|
||||
|
||||
for (int result = 0; result <= LINES_PER_GROUP_; result ++)
|
||||
{
|
||||
nindex = 0;
|
||||
len = length[result];
|
||||
|
||||
if (choice != UCharacterNameChoiceEnum.U_UNICODE_CHAR_NAME)
|
||||
{
|
||||
int oldindex = index;
|
||||
index += UCharacterUtil.skipByteSubString(m_groupstring_, index, len,
|
||||
(byte)';');
|
||||
len -= (index - oldindex);
|
||||
}
|
||||
|
||||
// number of tokens is > the length of the name
|
||||
// write each letter directly, and write a token word per token
|
||||
for (count = 0; count < len && nindex != -1 && nindex < namelen;)
|
||||
{
|
||||
b = m_groupstring_[index + count];
|
||||
count ++;
|
||||
|
||||
if (b >= m_tokentable_.length)
|
||||
{
|
||||
if (name.charAt(nindex ++) != (b & 0xFF))
|
||||
nindex = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
token = m_tokentable_[b & 0xFF];
|
||||
if (token == 0xFFFE)
|
||||
{
|
||||
// this is a lead byte for a double-byte token
|
||||
token = m_tokentable_[b << 8 |
|
||||
(m_groupstring_[index + count] & 0x00ff)];
|
||||
count ++;
|
||||
}
|
||||
if (token == 0xFFFF)
|
||||
{
|
||||
if (name.charAt(nindex ++) != (b & 0xFF))
|
||||
nindex = -1;
|
||||
}
|
||||
else
|
||||
// compare token with name
|
||||
nindex = UCharacterUtil.compareNullTermByteSubString(name,
|
||||
m_tokenstring_, nindex, token);
|
||||
}
|
||||
}
|
||||
|
||||
if (namelen == nindex &&
|
||||
(count == len || m_groupstring_[index + count] == ';'))
|
||||
return result;
|
||||
|
||||
index += len;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// protected inner class ===========================================
|
||||
|
||||
/**
|
||||
* Algorithmic name class
|
||||
*/
|
||||
static final class AlgorithmName
|
||||
{
|
||||
// protected variables ===========================================
|
||||
|
||||
/**
|
||||
* Constant type value of the different AlgorithmName
|
||||
*/
|
||||
protected static final int TYPE_0_ = 0;
|
||||
protected static final int TYPE_1_ = 1;
|
||||
|
||||
// private variables =============================================
|
||||
|
||||
/**
|
||||
* Algorithmic data information
|
||||
*/
|
||||
private int m_rangestart_;
|
||||
private int m_rangeend_;
|
||||
private byte m_type_;
|
||||
private byte m_variant_;
|
||||
private char m_factor_[];
|
||||
private String m_prefix_;
|
||||
private byte m_factorstring_[];
|
||||
|
||||
// constructor ===================================================
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
protected AlgorithmName()
|
||||
{
|
||||
}
|
||||
|
||||
// protected methods =============================================
|
||||
|
||||
/**
|
||||
* Sets the information for accessing the algorithmic names
|
||||
* @param rangestart starting code point that lies within this name group
|
||||
* @param rangeend end code point that lies within this name group
|
||||
* @param type algorithm type. There's 2 kinds of algorithmic type. First
|
||||
* which uses code point as part of its name and the other uses
|
||||
* variant postfix strings
|
||||
* @param variant algorithmic variant
|
||||
* @return true if values are valid
|
||||
*/
|
||||
protected boolean setInfo(int rangestart, int rangeend, byte type,
|
||||
byte variant)
|
||||
{
|
||||
if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend &&
|
||||
rangeend <= UCharacter.MAX_VALUE &&
|
||||
(type == TYPE_0_ || type == TYPE_1_))
|
||||
{
|
||||
m_rangestart_ = rangestart;
|
||||
m_rangeend_ = rangeend;
|
||||
m_type_ = type;
|
||||
m_variant_ = variant;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the factor data
|
||||
* @param array of factor
|
||||
* @return true if factors are valid
|
||||
*/
|
||||
protected boolean setFactor(char factor[])
|
||||
{
|
||||
if (factor.length == m_variant_)
|
||||
{
|
||||
m_factor_ = factor;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the name prefix
|
||||
* @param prefix
|
||||
* @return true if prefix is set
|
||||
*/
|
||||
protected boolean setPrefix(String prefix)
|
||||
{
|
||||
if (prefix != null && prefix.length() > 0)
|
||||
{
|
||||
m_prefix_ = prefix;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the variant factorized name data
|
||||
* @param string variant factorized name data
|
||||
* @return true if values are set
|
||||
*/
|
||||
protected boolean setFactorString(byte string[])
|
||||
{
|
||||
// factor and variant string can be empty for things like hanggul code
|
||||
// points
|
||||
m_factorstring_ = string;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if code point lies in Algorithm object at index
|
||||
* @param ch code point
|
||||
*/
|
||||
protected boolean contains(int ch)
|
||||
{
|
||||
return m_rangestart_ <= ch && ch <= m_rangeend_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends algorithm name of code point into StringBuffer.
|
||||
* Note this method does not check for validity of code point in Algorithm,
|
||||
* result is undefined if code point does not belong in Algorithm.
|
||||
* @param ch code point
|
||||
* @param str StringBuffer to append to
|
||||
*/
|
||||
protected void appendName(int ch, StringBuffer str)
|
||||
{
|
||||
str.append(m_prefix_);
|
||||
switch (m_type_)
|
||||
{
|
||||
case TYPE_0_:
|
||||
// prefix followed by hex digits indicating variants
|
||||
str.append(Integer.toHexString(ch));
|
||||
break;
|
||||
case TYPE_1_:
|
||||
// prefix followed by factorized-elements
|
||||
int offset = ch - m_rangestart_;
|
||||
int indexes[] = new int[m_variant_];
|
||||
int factor;
|
||||
|
||||
// write elements according to the factors
|
||||
// the factorized elements are determined by modulo arithmetic
|
||||
for (int i = m_variant_ - 1; i > 0; i --)
|
||||
{
|
||||
factor = m_factor_[i] & 0x00FF;
|
||||
indexes[i] = offset % factor;
|
||||
offset /= factor;
|
||||
}
|
||||
|
||||
// we don't need to calculate the last modulus because
|
||||
// start <= code <= end guarantees here that code <= factors[0]
|
||||
indexes[0] = offset;
|
||||
|
||||
// joining up the factorized strings
|
||||
String s[] = getFactorString(indexes);
|
||||
if (s != null && s.length > 0)
|
||||
{
|
||||
int size = s.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
str.append(s[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character for the argument algorithmic name
|
||||
* @return the algorithmic char or -1 otherwise.
|
||||
*/
|
||||
protected int getAlgorithmChar(String name)
|
||||
{
|
||||
int prefixlen = m_prefix_.length();
|
||||
if (name.length() < prefixlen ||
|
||||
!m_prefix_.equals(name.substring(0, prefixlen)))
|
||||
return -1;
|
||||
|
||||
switch (m_type_)
|
||||
{
|
||||
case TYPE_0_ :
|
||||
try
|
||||
{
|
||||
int result = Integer.parseInt(name.substring(prefixlen), 16);
|
||||
// does it fit into the range?
|
||||
if (m_rangestart_ <= result && result <= m_rangeend_)
|
||||
return result;
|
||||
}
|
||||
catch (NumberFormatException e)
|
||||
{
|
||||
}
|
||||
break;
|
||||
case TYPE_1_ :
|
||||
// repetitative suffix name comparison done here
|
||||
// offset is the character code - start
|
||||
for (int ch = m_rangestart_; ch <= m_rangeend_; ch ++)
|
||||
{
|
||||
int offset = ch - m_rangestart_;
|
||||
int indexes[] = new int[m_variant_];
|
||||
int factor;
|
||||
|
||||
// write elements according to the factors
|
||||
// the factorized elements are determined by modulo arithmetic
|
||||
for (int i = m_variant_ - 1; i > 0; i --)
|
||||
{
|
||||
factor = m_factor_[i] & 0x00FF;
|
||||
indexes[i] = offset % factor;
|
||||
offset /= factor;
|
||||
}
|
||||
|
||||
// we don't need to calculate the last modulus because
|
||||
// start <= code <= end guarantees here that code <= factors[0]
|
||||
indexes[0] = offset;
|
||||
|
||||
// joining up the factorized strings
|
||||
if (compareFactorString(indexes, name.substring(prefixlen)))
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// private methods ================================================
|
||||
|
||||
/**
|
||||
* Gets the indexth string in each of the argument factor block
|
||||
* @param index array with each index corresponding to each factor block
|
||||
* @return array of indexth factor string in factor block
|
||||
*/
|
||||
private String[] getFactorString(int index[])
|
||||
{
|
||||
int size = m_factor_.length;
|
||||
if (index == null || index.length != size)
|
||||
return null;
|
||||
|
||||
String result[] = new String[size];
|
||||
StringBuffer str = new StringBuffer();
|
||||
int count = 0;
|
||||
int factor;
|
||||
size --;
|
||||
for (int i = 0; i <= size; i ++)
|
||||
{
|
||||
factor = m_factor_[i];
|
||||
count = UCharacterUtil.skipNullTermByteSubString(m_factorstring_,
|
||||
count, index[i]);
|
||||
count = UCharacterUtil.getNullTermByteSubString(str, m_factorstring_,
|
||||
count);
|
||||
if (i != size)
|
||||
count = UCharacterUtil.skipNullTermByteSubString(m_factorstring_,
|
||||
count, factor - index[i] - 1);
|
||||
result[i] = str.toString();
|
||||
str.delete(0, str.length());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the indexth string in each of the argument factor block with
|
||||
* the argument string
|
||||
* @param index array with each index corresponding to each factor block
|
||||
* @param str string to compare with
|
||||
* @return true if string matches
|
||||
*/
|
||||
private boolean compareFactorString(int index[], String str)
|
||||
{
|
||||
int size = m_factor_.length;
|
||||
if (index == null || index.length != size)
|
||||
return false;
|
||||
|
||||
int count = 0;
|
||||
int strcount = 0;
|
||||
int factor;
|
||||
size --;
|
||||
for (int i = 0; i <= size; i ++)
|
||||
{
|
||||
factor = m_factor_[i];
|
||||
count = UCharacterUtil.skipNullTermByteSubString(m_factorstring_,
|
||||
count, index[i]);
|
||||
strcount = UCharacterUtil.compareNullTermByteSubString(str,
|
||||
m_factorstring_, strcount, count);
|
||||
if (strcount < 0)
|
||||
return false;
|
||||
|
||||
if (i != size)
|
||||
count = UCharacterUtil.skipNullTermByteSubString(m_factorstring_,
|
||||
count, factor - index[i]);
|
||||
}
|
||||
if (strcount != str.length())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
426
icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java
Executable file
426
icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java
Executable file
|
@ -0,0 +1,426 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source:
|
||||
* /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.BufferedInputStream;
|
||||
|
||||
/**
|
||||
* Internal class used for Unicode character property database.
|
||||
* Database classes store binary data read from uprops.dat and unames for use.
|
||||
* It does not have the capability to parse the data into more high-level
|
||||
* information. It only returns bytes of information when required.
|
||||
* Due to the form most commonly used for retrieval, array of char is used
|
||||
* to store the binary data
|
||||
* UCharacterPropertyDB also contains information on accessing indexes to
|
||||
* significant points in the binary data.
|
||||
* Responsibility for molding the binary data into more meaning form lies on
|
||||
* <a href=UCharacter.html>UCharacter</a> and
|
||||
* <a href=UCharacterName.html>UCharacterName</a>.
|
||||
* Data populated by <a href=UGenPropReader.html>UGenPropReader</a>
|
||||
* @author Syn Wee Quek
|
||||
* @since oct1000
|
||||
* @see com.ibm.icu.text.UGenReader
|
||||
*/
|
||||
|
||||
final class UCharacterPropertyDB extends UCharacterDB
|
||||
{
|
||||
// protected variables ================================================
|
||||
|
||||
/**
|
||||
* Data type indicators
|
||||
*/
|
||||
protected static final int EXC_UPPERCASE_ = 0;
|
||||
protected static final int EXC_LOWERCASE_ = EXC_UPPERCASE_ + 1;
|
||||
protected static final int EXC_TITLECASE_ = EXC_LOWERCASE_ + 1;
|
||||
protected static final int EXC_DIGIT_VALUE_ = EXC_TITLECASE_ + 1;
|
||||
protected static final int EXC_NUMERIC_VALUE_ = EXC_DIGIT_VALUE_ + 1;
|
||||
protected static final int EXC_DENOMINATOR_VALUE_ = EXC_NUMERIC_VALUE_ + 1;
|
||||
protected static final int EXC_MIRROR_MAPPING_ = EXC_DENOMINATOR_VALUE_ + 1;
|
||||
|
||||
|
||||
// private variables ==================================================
|
||||
|
||||
/**
|
||||
* Number of bits to shift right to get the correct segment of bits out for
|
||||
* index to the unicode database
|
||||
*/
|
||||
private int m_stage1shift_;
|
||||
private int m_stage2shift_;
|
||||
|
||||
/**
|
||||
* Mask for performing on the bit segment after shifting to get an index out
|
||||
* of it
|
||||
*/
|
||||
private int m_stage2maskaftershift_;
|
||||
private int m_stage3maskaftershift_;
|
||||
|
||||
/**
|
||||
* Table for stages data block
|
||||
*/
|
||||
private char m_stages_[];
|
||||
|
||||
/**
|
||||
* Character property table
|
||||
*/
|
||||
private int m_property_[];
|
||||
|
||||
/**
|
||||
* Exception property table
|
||||
*/
|
||||
private int m_exception_[];
|
||||
|
||||
/**
|
||||
* Default name of the datafile
|
||||
*/
|
||||
private static final String DATA_FILE_NAME_ = "uprops.dat";
|
||||
|
||||
/**
|
||||
* Default buffer size of datafile
|
||||
*/
|
||||
private static final int DATA_BUFFER_SIZE_ = 25000;
|
||||
|
||||
/**
|
||||
* This, from what i infer is the max size of the indicators used for the
|
||||
* exception values.
|
||||
* Number of bits in an 8-bit integer value
|
||||
*/
|
||||
private static final int EXC_GROUP_ = 8;
|
||||
|
||||
/**
|
||||
* Mask to get the group
|
||||
*/
|
||||
private static final int EXC_GROUP_MASK_ = 255;
|
||||
|
||||
/**
|
||||
* Mask to get the digit value in the exception result
|
||||
*/
|
||||
private static final int EXC_DIGIT_MASK_ = 0xFFFF;
|
||||
|
||||
/**
|
||||
* Offset table for data in exception block.<br>
|
||||
* Table formed by the number of bits used for the index, e.g. 0 = 0 bits,
|
||||
* 1 = 1 bits.
|
||||
*/
|
||||
private static final byte FLAGS_OFFSET_[] =
|
||||
{
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
/**
|
||||
* Numeric value shift
|
||||
*/
|
||||
private static final int VALUE_SHIFT_ = 20;
|
||||
|
||||
/**
|
||||
* Since character information data are packed together.
|
||||
* This is the category mask for getting the category information
|
||||
*/
|
||||
private static final int CATEGORY_MASK_ = 0x1F;
|
||||
|
||||
/**
|
||||
* Exception test mask
|
||||
*/
|
||||
private static final int EXCEPTION_MASK_ = 0x20;
|
||||
|
||||
/**
|
||||
* Mask to be applied after shifting to obtain an unsigned numeric value
|
||||
*/
|
||||
private static final int UNSIGNED_VALUE_MASK_AFTER_SHIFT_ = 0x7FF;
|
||||
|
||||
/**
|
||||
* Mirror test mask
|
||||
*/
|
||||
private static final int MIRROR_MASK_ = 0x800;
|
||||
|
||||
/**
|
||||
* Shift to get bidi bits
|
||||
*/
|
||||
private static final int BIDI_SHIFT_ = 6;
|
||||
|
||||
/**
|
||||
* Mask to be applied after shifting to get bidi bits
|
||||
*/
|
||||
private static final int BIDI_MASK_AFTER_SHIFT_ = 0x1F;
|
||||
|
||||
// constructor ======================================================
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @exception thrown when data reading fails or data corrupted
|
||||
*/
|
||||
protected UCharacterPropertyDB() throws Exception
|
||||
{
|
||||
UGenPropReader reader = new UGenPropReader();
|
||||
|
||||
InputStream i = getClass().getResourceAsStream(DATA_FILE_NAME_);
|
||||
BufferedInputStream b = new BufferedInputStream(i, DATA_BUFFER_SIZE_);
|
||||
DataInputStream d = new DataInputStream(b);
|
||||
if (!reader.read(d, this))
|
||||
throw new Exception("Data corrupted in " + DATA_FILE_NAME_);
|
||||
d.close();
|
||||
}
|
||||
|
||||
// public methods ===================================================
|
||||
|
||||
/**
|
||||
* toString method for printing
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer result = new StringBuffer("Property block\n");
|
||||
result.append(super.toString());
|
||||
result.append("\nshift 1 : ");
|
||||
result.append(m_stage1shift_);
|
||||
result.append("\nshift 2 : ");
|
||||
result.append(m_stage2shift_);
|
||||
result.append("\nmask 2 : ");
|
||||
result.append(m_stage2maskaftershift_);
|
||||
result.append("\nmask 3 : ");
|
||||
result.append(m_stage3maskaftershift_);
|
||||
result.append("\nsize of stage data ");
|
||||
result.append(m_stages_.length);
|
||||
result.append("\nsize of property data ");
|
||||
result.append(m_property_.length);
|
||||
result.append("\nsize of exception data ");
|
||||
result.append(m_exception_.length);
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
// protected methods ================================================
|
||||
|
||||
/**
|
||||
* Set stage shift bits, mask and property offset
|
||||
* @param stage1shift count
|
||||
* @param stage2shift count
|
||||
* @param stage2mask count
|
||||
* @param stage3mask count
|
||||
* @param offset property block offset
|
||||
* @return false if there is a data error
|
||||
*/
|
||||
protected boolean setInfo(int stage1shift, int stage2shift, int stage2mask,
|
||||
int stage3mask)
|
||||
{
|
||||
if (stage1shift >= 0 && stage2shift >= 0 && stage2mask != 0 &&
|
||||
stage3mask != 0)
|
||||
{
|
||||
m_stage1shift_ = stage1shift;
|
||||
m_stage2shift_ = stage2shift;
|
||||
m_stage2maskaftershift_ = stage2mask;
|
||||
m_stage3maskaftershift_ = stage3mask;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the stages block data. The first UGenPropReader.INDEX_SIZE char of data
|
||||
* being some other data not used from hence onwards. Note the unused data
|
||||
* resides since all indexes are relative to it.
|
||||
* @param stages array containing the 2 stages of index pointing to property
|
||||
* data
|
||||
* @return true if stages data is set successfully
|
||||
*/
|
||||
protected boolean setStage(char stages[])
|
||||
{
|
||||
if (stages == null || stages.length <= 0)
|
||||
return false;
|
||||
m_stages_ = stages;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the property block data.
|
||||
* @param property array containing data regarding the character properties
|
||||
* @return true if stages data is set successfully
|
||||
*/
|
||||
protected boolean setProperty(int property[])
|
||||
{
|
||||
if (property == null || property.length <= 0)
|
||||
return false;
|
||||
m_property_ = property;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the exception block data.
|
||||
* @param exception array containing extra character properties not found in
|
||||
* property array
|
||||
* @return true if stages data is set successfully
|
||||
*/
|
||||
protected boolean setException(int exception[])
|
||||
{
|
||||
if (exception == null || exception.length <= 0)
|
||||
return false;
|
||||
m_exception_ = exception;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the property value at the index
|
||||
* @param ch code point whose property value is to be retrieved
|
||||
* @return property value of code point
|
||||
*/
|
||||
protected int getProperty(int ch)
|
||||
{
|
||||
// index of the first access to the database
|
||||
int index1 = ch >> m_stage1shift_;
|
||||
// index of the second access to the database
|
||||
int index2 = m_stages_[index1] +
|
||||
((ch >> m_stage2shift_) & m_stage2maskaftershift_);
|
||||
// index of the third access to the database
|
||||
int index3 = m_stages_[index2] + (ch & m_stage3maskaftershift_);
|
||||
int propindex = m_stages_[index3];
|
||||
return m_property_[propindex];
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the exception value passed in has the kind of information
|
||||
* which the indicator wants, e.g if the exception value contains the digit
|
||||
* value of the character
|
||||
* @param index exception index
|
||||
* @param indicator type indicator
|
||||
* @return true if type value exist
|
||||
*/
|
||||
protected boolean hasExceptionValue(int index, int indicator)
|
||||
{
|
||||
return (m_exception_[index] & (1 << indicator)) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the exception value at the index, assuming that data type is
|
||||
* available. Result is undefined if data is not available. Use
|
||||
* hasExceptionValue() to determine data's availability.
|
||||
* @param index
|
||||
* @param exception data type
|
||||
* @return exception data type value at index
|
||||
*/
|
||||
protected int getException(int index, int etype)
|
||||
{
|
||||
// contained in exception data
|
||||
int evalue = m_exception_[index];
|
||||
index ++;
|
||||
// contained in the exception digit address
|
||||
index = addExceptionOffset(evalue, etype, index);
|
||||
if (etype == EXC_DIGIT_VALUE_)
|
||||
return m_exception_[index] & EXC_DIGIT_MASK_;
|
||||
return m_exception_[index];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value indicating a character category from the argument property
|
||||
* value
|
||||
* @param unicode character property
|
||||
* @return category
|
||||
*/
|
||||
protected static int getPropType(int prop)
|
||||
{
|
||||
int result = prop & CATEGORY_MASK_;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the argument props indicates that the exception block has
|
||||
* to be accessed for data
|
||||
* @param props property value
|
||||
* @return true if this is an exception indicator false otherwise
|
||||
*/
|
||||
protected static boolean isExceptionIndicator(int props)
|
||||
{
|
||||
if ((props & EXCEPTION_MASK_) != 0)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the exception index for argument property
|
||||
* @param prop character property
|
||||
*/
|
||||
protected static int getExceptionIndex(int prop)
|
||||
{
|
||||
return getSignedValue(prop) & UNSIGNED_VALUE_MASK_AFTER_SHIFT_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the signed numeric value of a character embedded in the property
|
||||
* argument
|
||||
* @param prop the character
|
||||
* @return signed numberic value
|
||||
*/
|
||||
protected static int getSignedValue(int prop)
|
||||
{
|
||||
return (prop >> VALUE_SHIFT_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checking if property indicates mirror element
|
||||
* @param prop property value
|
||||
* @return true if mirror indicator is set, false otherwise
|
||||
*/
|
||||
protected static boolean isMirrored(int prop)
|
||||
{
|
||||
return (prop & MIRROR_MASK_) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting the direction data in the property value
|
||||
* @param prop property value
|
||||
* @return direction value in property
|
||||
*/
|
||||
protected static int getDirection(int prop)
|
||||
{
|
||||
return (prop >> BIDI_SHIFT_) & BIDI_MASK_AFTER_SHIFT_;
|
||||
}
|
||||
|
||||
// private methods ===============================================
|
||||
|
||||
/**
|
||||
* Getting the correct address for data in the exception value
|
||||
* @param evalue exception value
|
||||
* @param indicator type of data to retrieve
|
||||
* @param address current address to move from
|
||||
* @return the correct address
|
||||
*/
|
||||
private int addExceptionOffset(int evalue, int indicator, int address)
|
||||
{
|
||||
int result = address;
|
||||
if (indicator >= EXC_GROUP_)
|
||||
result += (FLAGS_OFFSET_[evalue & EXC_GROUP_MASK_] << 1);
|
||||
// evalue >>= EXC_GROUP_;
|
||||
// indicator -= EXC_GROUP_;
|
||||
else
|
||||
{
|
||||
int mask = (1 << indicator) - 1;
|
||||
result += FLAGS_OFFSET_[evalue & mask];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
265
icu4j/src/com/ibm/icu/text/UCharacterUtil.java
Executable file
265
icu4j/src/com/ibm/icu/text/UCharacterUtil.java
Executable file
|
@ -0,0 +1,265 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/UCharacterUtil.java,v $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
/**
|
||||
* Internal character utility class for simple data type conversion and String
|
||||
* parsing functions. Does not have an analog in the JDK.
|
||||
* @author Syn Wee Quek
|
||||
* @since sep2900
|
||||
*/
|
||||
|
||||
final class UCharacterUtil
|
||||
{
|
||||
// constructor =====================================================
|
||||
|
||||
/**
|
||||
* private constructor to avoid initialisation
|
||||
*/
|
||||
private UCharacterUtil()
|
||||
{
|
||||
}
|
||||
|
||||
// protected methods ===============================================
|
||||
|
||||
/**
|
||||
* joining 2 chars to form an int
|
||||
* @param msc most significant char
|
||||
* @param lsc least significant char
|
||||
* @return int form
|
||||
*/
|
||||
protected static int toInt(char msc, char lsc)
|
||||
{
|
||||
return ((msc << 16) | lsc);
|
||||
}
|
||||
|
||||
/**
|
||||
* converting first 2 bytes of a byte array into char
|
||||
* if array size is < 2 then algorithmn will only return value == 1 byte
|
||||
* @param bytes 2 byte argument
|
||||
* @return char form
|
||||
*/
|
||||
protected static char toChar(byte bytes[])
|
||||
{
|
||||
if (bytes == null || bytes.length == 0)
|
||||
return 0;
|
||||
if (bytes.length == 1)
|
||||
return toChar(bytes[0]);
|
||||
|
||||
char firstbyte = (char)(toChar(bytes[0]) << 8);
|
||||
char secondbyte = toChar(bytes[1]);
|
||||
|
||||
return (char)(firstbyte | secondbyte);
|
||||
}
|
||||
|
||||
/**
|
||||
* converting 2 bytes into a char
|
||||
* @param msb the most significant byte
|
||||
* @param lsb the least significant byte
|
||||
* @return char form
|
||||
*/
|
||||
protected static char toChar(byte msb, byte lsb)
|
||||
{
|
||||
char firstbyte = (char)(toChar(msb) << 8);
|
||||
char secondbyte = toChar(lsb);
|
||||
|
||||
return (char)(firstbyte | secondbyte);
|
||||
}
|
||||
|
||||
/**
|
||||
* converting first 4 bytes of a byte array into int
|
||||
* if array size is < 4 then algorithmn will only return value == # bytes
|
||||
* @param bytes 4 byte argument
|
||||
* @return int form
|
||||
*/
|
||||
protected static int toInt(byte bytes[])
|
||||
{
|
||||
if (bytes == null || bytes.length == 0)
|
||||
return 0;
|
||||
int size = bytes.length;
|
||||
if (size > 4)
|
||||
size = 4;
|
||||
int result = 0;
|
||||
for (int i = 0; i < size; i ++)
|
||||
result = (result << 8) | (0x000000FF & bytes[i]);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* converting a byte into char
|
||||
* @param onebyte
|
||||
* @return char form
|
||||
*/
|
||||
protected static char toChar(byte onebyte)
|
||||
{
|
||||
char result = (char)(onebyte & 0x000000ff);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* converting a integer to a array of 4 characters where each character
|
||||
* corresponds to its respective byte
|
||||
* @param i integer to be converted
|
||||
* @return array of 4 characters
|
||||
*/
|
||||
protected static char[] to4Char(int i)
|
||||
{
|
||||
char result[] = new char[4];
|
||||
result[0] = (char)((i >> 24) & 0xFF);
|
||||
result[1] = (char)((i & 0x00FF0000) >> 16);
|
||||
result[2] = (char)((i & 0x0000FF00) >> 8);
|
||||
result[3] = (char)(i & 0xFF);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a null terminated substring from an array of bytes.
|
||||
* Substring is a set of non-zero bytes starting from argument start to the
|
||||
* next zero byte. If the first byte is a zero, the next byte will be taken as
|
||||
* the first byte.
|
||||
* @param str stringbuffer to store data in, data will be store with each
|
||||
* byte as a char
|
||||
* @param array byte array
|
||||
* @param index to start substring in byte count
|
||||
* @return the end position of the substring within the character array
|
||||
*/
|
||||
protected static int getNullTermByteSubString(StringBuffer str, byte[] array,
|
||||
int index)
|
||||
{
|
||||
byte b = 1;
|
||||
|
||||
while (b != 0)
|
||||
{
|
||||
b = array[index];
|
||||
if (b != 0)
|
||||
str.append((char)(b & 0x00FF));
|
||||
index ++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares a null terminated substring from an array of bytes.
|
||||
* Substring is a set of non-zero bytes starting from argument start to the
|
||||
* next zero byte. if the first byte is a zero, the next byte will be taken as
|
||||
* the first byte.
|
||||
* @param str string to compare
|
||||
* @param array byte array
|
||||
* @param strindex index within str to start comparing
|
||||
* @param aindex array index to start in byte count
|
||||
* @return the end position of the substring within str if matches otherwise
|
||||
* a -1
|
||||
*/
|
||||
protected static int compareNullTermByteSubString(String str, byte[] array,
|
||||
int strindex, int aindex)
|
||||
{
|
||||
byte b = 1;
|
||||
int length = str.length();
|
||||
|
||||
while (b != 0)
|
||||
{
|
||||
b = array[aindex];
|
||||
aindex ++;
|
||||
if (b == 0)
|
||||
break;
|
||||
// if we have reached the end of the string and yet the array has not
|
||||
// reached the end of their substring yet, abort
|
||||
if (strindex == length || (str.charAt(strindex) != (char)(b & 0xFF)))
|
||||
return -1;
|
||||
strindex ++;
|
||||
}
|
||||
return strindex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip null terminated substrings from an array of bytes.
|
||||
* Substring is a set of non-zero bytes starting from argument start to the
|
||||
* next zero byte. If the first byte is a zero, the next byte will be taken as
|
||||
* the first byte.
|
||||
* @param array byte array
|
||||
* @param index to start substrings in byte count
|
||||
* @param skipcount number of null terminated substrings to skip
|
||||
* @return the end position of the substrings within the character array
|
||||
*/
|
||||
protected static int skipNullTermByteSubString(byte[] array, int index,
|
||||
int skipcount)
|
||||
{
|
||||
byte b;
|
||||
for (int i = 0; i < skipcount; i ++)
|
||||
{
|
||||
b = 1;
|
||||
while (b != 0)
|
||||
{
|
||||
b = array[index];
|
||||
index ++;
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* skip substrings from an array of characters, where each character is a set
|
||||
* of 2 bytes. substring is a set of non-zero bytes starting from argument
|
||||
* start to the byte of the argument value. skips up to a max number of
|
||||
* characters
|
||||
* @param array byte array to parse
|
||||
* @param index to start substrings in byte count
|
||||
* @param length the max number of bytes to skip
|
||||
* @param skipend value of byte to skip to
|
||||
* @return the number of bytes skipped
|
||||
*/
|
||||
protected static int skipByteSubString(byte[] array, int index, int length,
|
||||
byte skipend)
|
||||
{
|
||||
int result;
|
||||
byte b;
|
||||
|
||||
for (result = 0; result < length; result ++)
|
||||
{
|
||||
b = array[index + result];
|
||||
if (b == skipend)
|
||||
{
|
||||
result ++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* skip substrings from an array of characters, where each character is a set
|
||||
* of 2 bytes. substring is a set of non-zero bytes starting from argument
|
||||
* start to the byte of the argument value.
|
||||
* @param array byte array to parse
|
||||
* @param index to start substrings in byte count
|
||||
* @param skipend value of byte to skip to
|
||||
* @return the number of bytes skipped
|
||||
*/
|
||||
protected static int skipByteSubString(byte[] array, int index, byte skipend)
|
||||
{
|
||||
int result = 0;
|
||||
byte b;
|
||||
|
||||
while (true)
|
||||
{
|
||||
b = array[index + result];
|
||||
result ++;
|
||||
if (b == skipend)
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
275
icu4j/src/com/ibm/icu/text/UGenNameReader.java
Executable file
275
icu4j/src/com/ibm/icu/text/UGenNameReader.java
Executable file
|
@ -0,0 +1,275 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/UGenNameReader.java,v $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
|
||||
/**
|
||||
* Internal reader class reading binary data from unames.dat created by ICU
|
||||
* programs gennames.
|
||||
* It arranges the header and index data apart into meaningful data before
|
||||
* populating <a href=UCharacterNameDB.html>UCharacterNameDB</a>. UGenNameReader
|
||||
* does not have or require the ability to decipher the rest of the data in
|
||||
* unames.dat and hence stores it as a block of data in an array of char in
|
||||
* <a href=UCharacterNameDB.html>UCharacterNameDB</a>. The ability to decipher
|
||||
* the block of data lies in <a href=UCharacterName.html>UCharacterName</a>.
|
||||
* For more information about the format of unames.dat refer to
|
||||
* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
|
||||
* ReadMe</a>.<br>
|
||||
* unames.dat which is in big-endian format is jared together with this package.
|
||||
* @author Syn Wee Quek
|
||||
* @since oct1000
|
||||
*/
|
||||
|
||||
final class UGenNameReader extends UGenReader
|
||||
{
|
||||
// private variables ===========================================
|
||||
|
||||
/**
|
||||
* Size of the group information block in number of char
|
||||
*/
|
||||
private static final int GROUP_INFO_SIZE_ = 3;
|
||||
|
||||
/**
|
||||
* Index of the offset information
|
||||
*/
|
||||
private int m_tokenstringindex_;
|
||||
private int m_groupindex_;
|
||||
private int m_groupstringindex_;
|
||||
private int m_algnamesindex_;
|
||||
|
||||
/**
|
||||
* Size of an algorithmic name information group
|
||||
* start code point size + end code point size + type size + variant size +
|
||||
* size of data size
|
||||
*/
|
||||
private static final int ALG_INFO_SIZE_ = 12;
|
||||
|
||||
/**
|
||||
* File format version and id that this class understands.
|
||||
* No guarantees are made if a older version is used
|
||||
*/
|
||||
private static final byte DATA_FORMAT_VERSION_[] =
|
||||
{(byte)0x1, (byte)0x0, (byte)0x0, (byte)0x0};
|
||||
private static final byte DATA_FORMAT_ID_[] = {(byte)0x75, (byte)0x6E,
|
||||
(byte)0x61, (byte)0x6D};
|
||||
|
||||
// constructor ==================================================
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
protected UGenNameReader()
|
||||
{
|
||||
}
|
||||
|
||||
// protected methods ============================================
|
||||
|
||||
/**
|
||||
* Read and break up the stream of data passed in as arguments
|
||||
* and fills up UCharacterNameDB.
|
||||
* If unsuccessful false will be returned.
|
||||
* @param input data input stream
|
||||
* @param data instance of datablock
|
||||
* @return true if successfully filled UCharacterNameDB
|
||||
* @exception thrown if there is a failure reading file
|
||||
*/
|
||||
protected boolean read(DataInputStream input, UCharacterNameDB data)
|
||||
throws Exception
|
||||
{
|
||||
if (super.read(input, data))
|
||||
{
|
||||
// read the indexes
|
||||
if (readIndex(input) && readToken(input, data) && readGroup(input, data)
|
||||
&& readAlg(input, data))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checking the file for the correct format
|
||||
* @param dataformatid
|
||||
* @param dataformatversion
|
||||
* @return true if the file format version is correct
|
||||
*/
|
||||
protected boolean authenticate(byte dataformatid[],
|
||||
byte dataformatversion[])
|
||||
{
|
||||
int size = DATA_FORMAT_ID_.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (DATA_FORMAT_ID_[i] != dataformatid[i])
|
||||
return false;
|
||||
size = DATA_FORMAT_VERSION_.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (DATA_FORMAT_VERSION_[i] != dataformatversion[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the size of the file id version
|
||||
* @return size of file format version in bytes
|
||||
*/
|
||||
protected int getFileFormatIDSize()
|
||||
{
|
||||
return DATA_FORMAT_ID_.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the size of the file format version
|
||||
* @return size of file format version in bytes
|
||||
*/
|
||||
protected int getFileFormatVersionSize()
|
||||
{
|
||||
return DATA_FORMAT_VERSION_.length;
|
||||
}
|
||||
|
||||
// private methods =========================================
|
||||
|
||||
/**
|
||||
* Read the indexes
|
||||
* @param input data stream
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readIndex(DataInputStream input) throws Exception
|
||||
{
|
||||
m_tokenstringindex_ = input.readInt();
|
||||
m_groupindex_ = input.readInt();
|
||||
m_groupstringindex_ = input.readInt();
|
||||
m_algnamesindex_ = input.readInt();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the tokens
|
||||
* @param input data stream
|
||||
* @param data instance of UCharacterName to populate
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readToken(DataInputStream input, UCharacterNameDB data)
|
||||
throws Exception
|
||||
{
|
||||
char count = input.readChar();
|
||||
char token[] = new char[count];
|
||||
for (char i = 0; i < count; i ++)
|
||||
token[i] = input.readChar();
|
||||
|
||||
int size = m_groupindex_ - m_tokenstringindex_;
|
||||
byte tokenstr[] = new byte[size];
|
||||
input.readFully(tokenstr);
|
||||
return data.setToken(token, tokenstr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the groups
|
||||
* @param input data stream
|
||||
* @param data instance of UCharacterName to populate
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readGroup(DataInputStream input, UCharacterNameDB data)
|
||||
throws Exception
|
||||
{
|
||||
// reading the group information records
|
||||
int count = input.readChar();
|
||||
data.setGroupCountSize(count, GROUP_INFO_SIZE_);
|
||||
count *= GROUP_INFO_SIZE_;
|
||||
char group[] = new char[count];
|
||||
for (int i = 0; i < count; i ++)
|
||||
group[i] = input.readChar();
|
||||
|
||||
int size = m_algnamesindex_ - m_groupstringindex_;
|
||||
byte groupstring[] = new byte[size];
|
||||
input.readFully(groupstring);
|
||||
return data.setGroup(group, groupstring);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the algorithmic names
|
||||
* @param input data stream
|
||||
* @param data instance of UCharacterName to populate
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readAlg(DataInputStream input, UCharacterNameDB data)
|
||||
throws Exception
|
||||
{
|
||||
int count = input.readInt();
|
||||
UCharacterNameDB.AlgorithmName alg[] =
|
||||
new UCharacterNameDB.AlgorithmName[count];
|
||||
|
||||
for (int i = 0; i < count; i ++)
|
||||
{
|
||||
UCharacterNameDB.AlgorithmName an = readAlg(input);
|
||||
if (an == null)
|
||||
return false;
|
||||
alg[i] = an;
|
||||
}
|
||||
data.setAlgorithm(alg);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an individual record of AlgorithmNames
|
||||
* @param input stream
|
||||
* @return an instance of AlgorithNames if read is successful otherwise null
|
||||
* @exception thrown when file read error occurs or data is corrupted
|
||||
*/
|
||||
private UCharacterNameDB.AlgorithmName readAlg(DataInputStream input)
|
||||
throws Exception
|
||||
{
|
||||
UCharacterNameDB.AlgorithmName result =
|
||||
new UCharacterNameDB.AlgorithmName();
|
||||
int rangestart = input.readInt();
|
||||
int rangeend = input.readInt();
|
||||
byte type = input.readByte();
|
||||
byte variant = input.readByte();
|
||||
if (!result.setInfo(rangestart, rangeend, type, variant))
|
||||
return null;
|
||||
|
||||
int size = input.readChar();
|
||||
if (type == UCharacterNameDB.AlgorithmName.TYPE_1_)
|
||||
{
|
||||
char factor[] = new char[variant];
|
||||
for (int j = 0; j < variant; j ++)
|
||||
factor[j] = input.readChar();
|
||||
|
||||
result.setFactor(factor);
|
||||
size -= (variant << 1);
|
||||
}
|
||||
|
||||
StringBuffer prefix = new StringBuffer();
|
||||
char c = (char)(input.readByte() & 0x00FF);
|
||||
while (c != 0)
|
||||
{
|
||||
prefix.append(c);
|
||||
c = (char)(input.readByte() & 0x00FF);
|
||||
}
|
||||
|
||||
result.setPrefix(prefix.toString());
|
||||
|
||||
size -= (ALG_INFO_SIZE_ + prefix.length() + 1);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
byte string[] = new byte[size];
|
||||
input.readFully(string);
|
||||
result.setFactorString(string);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
263
icu4j/src/com/ibm/icu/text/UGenPropReader.java
Executable file
263
icu4j/src/com/ibm/icu/text/UGenPropReader.java
Executable file
|
@ -0,0 +1,263 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/UGenPropReader.java,v $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
|
||||
/**
|
||||
* Internal reader class reading binary data from uprops.dat created by ICU
|
||||
* programs genprops.
|
||||
* It arranges the header and index data apart into meaningful data before
|
||||
* populating <a href=UCharacterPropDB.html>UCharacterPropDB</a>. UGenPropReader
|
||||
* does not have or require the ability to decipher the rest of the data in
|
||||
* uprop.dat and hence stores it as a block of data in an array of char in
|
||||
* <a href=UCharacterPropDB.html>UCharacterPropDB</a>. The ability to decipher
|
||||
* the block of data lies in <a href=UCharacterProp.html>UCharacterProp</a>.
|
||||
* For more information about the format of uprops.dat refer to
|
||||
* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
|
||||
* ReadMe</a>.<br>
|
||||
* uprops.dat which is in big-endian format is jared together with this package.
|
||||
* @author Syn Wee Quek
|
||||
* @since oct0200
|
||||
*/
|
||||
|
||||
final class UGenPropReader extends UGenReader
|
||||
{
|
||||
// private variables ===========================================
|
||||
|
||||
/**
|
||||
* Index size
|
||||
*/
|
||||
private static final int INDEX_SIZE_ = 8;
|
||||
|
||||
/**
|
||||
* Elements in the index where addresses are in number of chars.
|
||||
* Size is basically the count and does not depend on the type.
|
||||
*/
|
||||
private char m_stage2indexsize_;
|
||||
private char m_stage3indexsize_;
|
||||
private int m_exception_;
|
||||
private char m_stage3_;
|
||||
private int m_prop_;
|
||||
private char m_end_;
|
||||
|
||||
/**
|
||||
* Size of actual number of bits used in surrogate unicode character
|
||||
*/
|
||||
private static final int USED_SURROGATE_BIT_SIZE_ = 21;
|
||||
|
||||
/**
|
||||
* File format version that this class understands.
|
||||
* No guarantees are made if a older version is used
|
||||
*/
|
||||
private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x50,
|
||||
(byte)0x72, (byte)0x6F};
|
||||
private static final byte DATA_FORMAT_VERSION_[] =
|
||||
{(byte)0x1, (byte)0x1, (byte)0x0, (byte)0x0};
|
||||
|
||||
// constructor =============================================
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
*/
|
||||
protected UGenPropReader()
|
||||
{
|
||||
}
|
||||
|
||||
// protected methods ==================================================
|
||||
|
||||
/**
|
||||
* Read and fills up UCharacterPptyDB.
|
||||
* If unsuccessful false will be returned
|
||||
* @param input data stream
|
||||
* @param data data instance
|
||||
* @return true if successfully filled
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
protected boolean read(DataInputStream input, UCharacterPropertyDB data)
|
||||
throws Exception
|
||||
{
|
||||
if (super.read(input, data))
|
||||
{
|
||||
// read the indexes
|
||||
if (readIndex(input, data) &&
|
||||
// read the stages block
|
||||
readStage(input, data) &&
|
||||
// read the property data
|
||||
readProperty(input, data) &&
|
||||
// read the exception data
|
||||
readException(input, data))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checking the file for the correct format
|
||||
* @param dataformatid
|
||||
* @param dataformatversion
|
||||
* @return true if the file format version is correct
|
||||
*/
|
||||
protected boolean authenticate(byte dataformatid[],
|
||||
byte dataformatversion[])
|
||||
{
|
||||
int size = DATA_FORMAT_ID_.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (DATA_FORMAT_ID_[i] != dataformatid[i])
|
||||
return false;
|
||||
|
||||
size = DATA_FORMAT_VERSION_.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (DATA_FORMAT_VERSION_[i] != dataformatversion[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the size of the file format version
|
||||
* @return size of file format version in bytes
|
||||
*/
|
||||
protected int getFileFormatVersionSize()
|
||||
{
|
||||
return DATA_FORMAT_VERSION_.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the size of the file format id
|
||||
* @return size of file format id in bytes
|
||||
*/
|
||||
protected int getFileFormatIDSize()
|
||||
{
|
||||
return DATA_FORMAT_ID_.length;
|
||||
}
|
||||
|
||||
// private methods ===================================================
|
||||
|
||||
/**
|
||||
* Read the INDEX_SIZE_ indexes and updates the instance of
|
||||
* UCharacterPropertyDB with the processed shifts and mask
|
||||
* @param input data stream
|
||||
* @param data instance of UCharacterPropertyDB
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readIndex(DataInputStream input, UCharacterPropertyDB data)
|
||||
throws Exception
|
||||
{
|
||||
int count = INDEX_SIZE_;
|
||||
m_stage2indexsize_ = input.readChar();
|
||||
count --;
|
||||
m_stage3indexsize_ = input.readChar();
|
||||
count --;
|
||||
m_exception_ = input.readChar();
|
||||
count --;
|
||||
m_stage3_ = input.readChar();
|
||||
count --;
|
||||
m_prop_ = input.readChar();
|
||||
count --;
|
||||
m_end_ = input.readChar();
|
||||
count --;
|
||||
input.skipBytes(count << 1);
|
||||
|
||||
return data.setInfo(m_stage3indexsize_ + m_stage2indexsize_,
|
||||
(int)m_stage3indexsize_,
|
||||
(1 << m_stage2indexsize_) - 1,
|
||||
(1 << m_stage3indexsize_) - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the stages block and updates the instance of UCharacterPropertyDB
|
||||
* with the stages data
|
||||
* @param input data stream
|
||||
* @param data instance of UCharacterPropertyDB
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readStage(DataInputStream input, UCharacterPropertyDB data)
|
||||
throws Exception
|
||||
{
|
||||
// size of the 3 stages
|
||||
int stagesize = (m_prop_ << 1) - INDEX_SIZE_;
|
||||
|
||||
char array[] = new char[stagesize];
|
||||
|
||||
int max = 0;
|
||||
int props = m_prop_ - INDEX_SIZE_;
|
||||
// setting up the propery index for stage 1 to 3
|
||||
for (int count = 0; count < stagesize; count ++)
|
||||
{
|
||||
array[count] = (char)(input.readChar() - INDEX_SIZE_);
|
||||
if (max < array[count] && count < 0x448)
|
||||
max = array[count];
|
||||
|
||||
// setting up the property index for stage 3
|
||||
// uprops.dat contain data that includes the address from the top of
|
||||
// index to property data. since the blocks are split up, so now i have
|
||||
// to subtract the excess address from it.
|
||||
if (count >= m_stage3_ - INDEX_SIZE_)
|
||||
array[count] -= props;
|
||||
}
|
||||
|
||||
// synwee : hmm... gaps in stage 2.
|
||||
/*
|
||||
System.out.println("stage 3 " + (int)m_stage3_);
|
||||
System.out.println("stage 2 top " + (max - 0x440 - INDEX_SIZE_));
|
||||
*/
|
||||
|
||||
// setting up the stages block in the instance of UCharacterPropertyDB
|
||||
return data.setStage(array);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the propery data block and updates the instance of
|
||||
* UCharacterPropertyDB with the data
|
||||
* @param input data stream
|
||||
* @param data instance of UCharacterPropertyDB
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readProperty(DataInputStream input,
|
||||
UCharacterPropertyDB data) throws Exception
|
||||
{
|
||||
// getting size of the property block
|
||||
int size = m_exception_ - m_prop_;
|
||||
int ppty[] = new int[size];
|
||||
for (int i = 0; i < size; i ++)
|
||||
ppty[i] = input.readInt();
|
||||
|
||||
// setting up the property block in the instance of UCharacterPropertyDB
|
||||
return data.setProperty(ppty);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the exception data block and updates the instance of
|
||||
* UCharacterPropertyDB with the data
|
||||
* @param input data stream
|
||||
* @param data instance of UCharacterPropertyDB
|
||||
* @return true if successfully read
|
||||
* @exception thrown when data reading fails
|
||||
*/
|
||||
private boolean readException(DataInputStream input,
|
||||
UCharacterPropertyDB data) throws Exception
|
||||
{
|
||||
int size = m_end_ - m_exception_;
|
||||
int exception[] = new int[size];
|
||||
for (int i = 0; i < size; i ++)
|
||||
exception[i] = input.readInt();
|
||||
|
||||
// setting up the property block in the instance of UCharacterPropertyDB
|
||||
return data.setException(exception);
|
||||
}
|
||||
}
|
192
icu4j/src/com/ibm/icu/text/UGenReader.java
Executable file
192
icu4j/src/com/ibm/icu/text/UGenReader.java
Executable file
|
@ -0,0 +1,192 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2000, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/UGenReader.java,v $
|
||||
* $Date: 2000/12/26 20:00:56 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.EOFException;
|
||||
|
||||
/**
|
||||
* Internal parent reader class reading binary header data from uprops.dat and
|
||||
* unames.dat, created by ICU programs genprops and gennames.
|
||||
* It arranges the header data into meaningful data before
|
||||
* populating <a href=UCharacterDB.html>UCharacterDB</a>. It also authenticates
|
||||
* that the data files before proceeding on.
|
||||
* For more information about the format of uprops.dat refer to
|
||||
* <a href=oss.software.ibm.com/icu4j/icu4jhtml/com/ibm/icu/text/readme.html>
|
||||
* ReadMe</a>.<br>
|
||||
* uprops.data and unames.dat which are in big-endian format are jared together
|
||||
* with this package.
|
||||
* @author Syn Wee Quek
|
||||
* @since oct1000
|
||||
*/
|
||||
|
||||
abstract class UGenReader
|
||||
{
|
||||
// private variables ===========================================
|
||||
|
||||
/**
|
||||
* Magic numbers to authenticate the data file
|
||||
*/
|
||||
private static final byte MAGIC1 = (byte)0xda;
|
||||
private static final byte MAGIC2 = (byte)0x27;
|
||||
|
||||
/**
|
||||
* Size of the field datasize and reservedcharacter
|
||||
*/
|
||||
private static final int SKIP_BYTES_ = 4;
|
||||
|
||||
/**
|
||||
* File format authentication values
|
||||
*/
|
||||
private static final byte BIG_ENDIAN_ = 1;
|
||||
private static final byte CHAR_SET_ = 0;
|
||||
private static final byte CHAR_SIZE_ = 2;
|
||||
private static final byte UNICODE_VERSION_[] = {(byte)0x3, (byte)0x0,
|
||||
(byte)0x0, (byte)0x0};
|
||||
|
||||
// constructor =================================================
|
||||
|
||||
/**
|
||||
* Protected constructor
|
||||
*/
|
||||
protected UGenReader()
|
||||
{
|
||||
}
|
||||
|
||||
// protected methods ===========================================
|
||||
|
||||
/**
|
||||
* Read the data header and fills the relevant information into UCharacterDB.
|
||||
* If unsuccessful false will be returned
|
||||
* @param input data stream
|
||||
* @param data data instance
|
||||
* @return true if successfully filled
|
||||
* @exception thrown when error reading data
|
||||
*/
|
||||
protected boolean read(DataInputStream input, UCharacterDB data)
|
||||
throws Exception
|
||||
{
|
||||
char headersize = input.readChar();
|
||||
headersize -= 2;
|
||||
//reading the header format
|
||||
byte magic1 = input.readByte();
|
||||
headersize --;
|
||||
byte magic2 = input.readByte();
|
||||
headersize --;
|
||||
input.skipBytes(SKIP_BYTES_);
|
||||
headersize -= SKIP_BYTES_;
|
||||
if (authenticate(magic1, magic2))
|
||||
{
|
||||
byte bigendian = input.readByte();
|
||||
headersize --;
|
||||
byte charset = input.readByte();
|
||||
headersize --;
|
||||
byte charsize = input.readByte();
|
||||
headersize --;
|
||||
byte reserved = input.readByte();
|
||||
headersize --;
|
||||
|
||||
byte dataformatid[] = new byte[getFileFormatIDSize()];
|
||||
input.readFully(dataformatid);
|
||||
headersize -= getFileFormatIDSize();
|
||||
byte dataformatversion[] = new byte[getFileFormatVersionSize()];
|
||||
input.readFully(dataformatversion);
|
||||
headersize -= getFileFormatVersionSize();
|
||||
byte unicodeversion[] = new byte[UNICODE_VERSION_.length];
|
||||
input.readFully(unicodeversion);
|
||||
headersize -= UNICODE_VERSION_.length;
|
||||
input.skipBytes(headersize);
|
||||
|
||||
if (authenticate(bigendian, charset, charsize, unicodeversion) &&
|
||||
authenticate(dataformatid, dataformatversion))
|
||||
return setUCharacterDB(data, unicodeversion);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Abstract method for verifying the file format version
|
||||
* @param formatid file format identification
|
||||
* @param formatversion file format version of input file to be verified
|
||||
* @return true if the right file format version is used
|
||||
*/
|
||||
protected abstract boolean authenticate(byte formatid[],
|
||||
byte formatversion[]);
|
||||
|
||||
/**
|
||||
* Abstract method for getting the size of the file format version
|
||||
* @return size of file format version in bytes
|
||||
*/
|
||||
protected abstract int getFileFormatVersionSize();
|
||||
|
||||
/**
|
||||
* Abstract method for getting the size of the file format id
|
||||
* @return size of file format id in bytes
|
||||
*/
|
||||
protected abstract int getFileFormatIDSize();
|
||||
|
||||
// private methods ====================================================
|
||||
|
||||
/**
|
||||
* Checking the file against the magic numbers for authenticity
|
||||
* @param m1 magic number 1
|
||||
* @param m2 magic number 2
|
||||
* @return true if the magic numbers are correct
|
||||
*/
|
||||
private boolean authenticate(byte m1, byte m2)
|
||||
{
|
||||
if (m1 == MAGIC1 && m2 == MAGIC2)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checking the file for the correct format
|
||||
* @param bigendian
|
||||
* @param charset
|
||||
* @param charsize
|
||||
* @param dataformatid
|
||||
* @param dataformatversion
|
||||
* @param unicodeversion
|
||||
* @return true if the file is in bigendian, charset , charsize == 2,
|
||||
* dataformatid 85.80.114.111, dataformatversion dependent on file,
|
||||
* and unicodeversion > 3.0.0.0
|
||||
*/
|
||||
private boolean authenticate(byte bigendian, byte charset, byte charsize,
|
||||
byte unicodeversion[])
|
||||
{
|
||||
if (bigendian != BIG_ENDIAN_ || charset != CHAR_SET_ ||
|
||||
charsize != CHAR_SIZE_)
|
||||
return false;
|
||||
int size = UNICODE_VERSION_.length;
|
||||
for (int i = 0; i < size; i ++)
|
||||
if (UNICODE_VERSION_[i] != unicodeversion[i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the relevant data into UCharacterDB
|
||||
* @param data UCharacterDB instance to populate
|
||||
* @param unicodeversion version number of the Unicode data information used
|
||||
* @param formatversion icu version number of the uprops.dat and unames.dat
|
||||
* used
|
||||
* @return true if operation is successful, false otherwise
|
||||
*/
|
||||
private boolean setUCharacterDB(UCharacterDB data, byte[] unicodeversion)
|
||||
{
|
||||
boolean result = data.setUnicodeVersion(unicodeversion);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue