mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 00:43:32 +00:00
ICU-1897
initial collation commits X-SVN-Rev: 8615
This commit is contained in:
parent
fa460c1481
commit
44672d459f
7 changed files with 6543 additions and 44 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/UCharacterIterator.java,v $
|
||||
* $Date: 2002/04/03 00:00:00 $
|
||||
* $Revision: 1.4 $
|
||||
* $Date: 2002/05/14 16:48:49 $
|
||||
* $Revision: 1.5 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -41,27 +41,62 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
// public constructor ------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Public constructor
|
||||
* Public constructor.
|
||||
* By default the iteration range will be from 0 to the end of the text.
|
||||
* @param replacable text which the iterator will be based on
|
||||
*/
|
||||
public UCharacterIterator(Replaceable replaceable)
|
||||
{
|
||||
m_replaceable_ = replaceable;
|
||||
m_index_ = 0;
|
||||
m_length_ = replaceable.length();
|
||||
m_start_ = 0;
|
||||
m_limit_ = replaceable.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Public constructor
|
||||
* By default the iteration range will be from 0 to the end of the text.
|
||||
* @param str text which the iterator will be based on
|
||||
*/
|
||||
public UCharacterIterator(String str)
|
||||
{
|
||||
m_replaceable_ = new ReplaceableString(str);
|
||||
m_index_ = 0;
|
||||
m_length_ = m_replaceable_.length();
|
||||
m_start_ = 0;
|
||||
m_limit_ = m_replaceable_.length();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an iterator over the given range of the given string.
|
||||
* @param text text to be iterated over
|
||||
* @param start offset of the first character to iterate
|
||||
* @param limit offset of the character following the last character to
|
||||
* iterate
|
||||
*/
|
||||
public UCharacterIterator(String str, int start, int limit)
|
||||
{
|
||||
m_replaceable_ = new ReplaceableString(str);
|
||||
m_start_ = start;
|
||||
m_limit_ = limit;
|
||||
m_index_ = m_start_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an iterator over the given range of the given replaceable
|
||||
* string.
|
||||
* @param text text to be iterated over
|
||||
* @param start offset of the first character to iterate
|
||||
* @param limit offset of the character following the last character to
|
||||
* iterate
|
||||
*/
|
||||
public UCharacterIterator(Replaceable replaceable, int start, int limit)
|
||||
{
|
||||
m_replaceable_ = replaceable;
|
||||
m_start_ = start;
|
||||
m_limit_ = limit;
|
||||
m_index_ = m_start_;
|
||||
}
|
||||
|
||||
// public methods ----------------------------------------------------------
|
||||
|
||||
/**
|
||||
|
@ -87,7 +122,7 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
*/
|
||||
public char current()
|
||||
{
|
||||
if (m_index_ >= 0 && m_index_ < m_length_) {
|
||||
if (m_index_ >= m_start_ && m_index_ < m_limit_) {
|
||||
return m_replaceable_.charAt(m_index_);
|
||||
}
|
||||
return DONE;
|
||||
|
@ -99,7 +134,7 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
*/
|
||||
public int currentCodePoint()
|
||||
{
|
||||
if (m_index_ >= 0 && m_index_ < m_length_) {
|
||||
if (m_index_ >= m_start_ && m_index_ < m_limit_) {
|
||||
return m_replaceable_.char32At(m_index_);
|
||||
}
|
||||
return DONE_CODEPOINT;
|
||||
|
@ -111,26 +146,28 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
*/
|
||||
public char first()
|
||||
{
|
||||
m_index_ = 0;
|
||||
m_index_ = m_start_;
|
||||
return current();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the start of the text.
|
||||
* @return 0
|
||||
* Returns the start of the text to iterate.
|
||||
* @return by default this method will return 0, unless a range for
|
||||
* iteration had been specified during construction.
|
||||
*/
|
||||
public int getBeginIndex()
|
||||
{
|
||||
return 0;
|
||||
return m_start_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length of the text
|
||||
* @return length of the text
|
||||
* Returns the limit offset of the text to iterate
|
||||
* @return by default this method returns the length of the text, unless a
|
||||
* range for iteration had been specified during construction.
|
||||
*/
|
||||
public int getEndIndex()
|
||||
{
|
||||
return m_length_;
|
||||
return m_limit_;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -143,31 +180,31 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets the last UTF16 character from the text and shifts the index to the
|
||||
* end of the text accordingly.
|
||||
* @return the last UTF16 character
|
||||
* Gets the last UTF16 iterateable character from the text and shifts the
|
||||
* index to the end of the text accordingly.
|
||||
* @return the last UTF16 iterateable character
|
||||
*/
|
||||
public char last()
|
||||
{
|
||||
if (m_length_ != 0) {
|
||||
m_index_ = m_length_ - 1;
|
||||
if (m_limit_ != m_start_) {
|
||||
m_index_ = m_limit_ - 1;
|
||||
return m_replaceable_.charAt(m_index_);
|
||||
}
|
||||
m_index_ = m_length_;
|
||||
m_index_ = m_limit_;
|
||||
return DONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns next UTF16 character and increments the iterator's index by 1.
|
||||
* If the resulting index is greater or equal to the text length, the
|
||||
* index is reset to the text length and a value of DONE_CODEPOINT is
|
||||
* If the resulting index is greater or equal to the iteration limit, the
|
||||
* index is reset to the text iteration limit and a value of DONE_CODEPOINT is
|
||||
* returned.
|
||||
* @return next UTF16 character in text or DONE if the new index is off the
|
||||
* end of the text range.
|
||||
* end of the text iteration limit.
|
||||
*/
|
||||
public char next()
|
||||
{
|
||||
if (m_index_ < m_length_) {
|
||||
if (m_index_ < m_limit_) {
|
||||
char result = m_replaceable_.charAt(m_index_);
|
||||
m_index_ ++;
|
||||
return result;
|
||||
|
@ -182,20 +219,20 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
* with surrogate pairs intermixed. If the index of a leading or trailing
|
||||
* code unit of a surrogate pair is given, return the code point after the
|
||||
* surrogate pair.
|
||||
* If the resulting index is greater or equal to the text length, the
|
||||
* current index is reset to the text length and a value of DONE_CODEPOINT
|
||||
* is returned.
|
||||
* If the resulting index is greater or equal to the text iterateable limit,
|
||||
* the current index is reset to the text iterateable limit and a value of
|
||||
* DONE_CODEPOINT is returned.
|
||||
* @return next codepoint in text or DONE_CODEPOINT if the new index is off the
|
||||
* end of the text range.
|
||||
* end of the text iterateable limit.
|
||||
*/
|
||||
public int nextCodePoint()
|
||||
{
|
||||
if (m_index_ < m_length_) {
|
||||
if (m_index_ < m_limit_) {
|
||||
char ch = m_replaceable_.charAt(m_index_);
|
||||
m_index_ ++;
|
||||
if (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
|
||||
ch <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
|
||||
m_index_ < m_length_) {
|
||||
m_index_ < m_limit_) {
|
||||
char trail = m_replaceable_.charAt(m_index_);
|
||||
if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
|
||||
trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
|
||||
|
@ -212,14 +249,15 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
/**
|
||||
* Returns previous UTF16 character and decrements the iterator's index by
|
||||
* 1.
|
||||
* If the resulting index is less than 0, the index is reset to 0 and a
|
||||
* value of DONE_CODEPOINT is returned.
|
||||
* If the resulting index is less than the text iterateable limit, the
|
||||
* index is reset to the start of the text iteration and a value of
|
||||
* DONE_CODEPOINT is returned.
|
||||
* @return next UTF16 character in text or DONE if the new index is off the
|
||||
* start of the text range.
|
||||
* start of the text iteration range.
|
||||
*/
|
||||
public char previous()
|
||||
{
|
||||
if (m_index_ > 0) {
|
||||
if (m_index_ > m_start_) {
|
||||
m_index_ --;
|
||||
return m_replaceable_.charAt(m_index_);
|
||||
}
|
||||
|
@ -233,19 +271,20 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
* with surrogate pairs intermixed. If the index of a leading or trailing
|
||||
* code unit of a surrogate pair is given, return the code point before the
|
||||
* surrogate pair.
|
||||
* If the resulting index is less than 0, the current index is reset to 0
|
||||
* and a value of DONE_CODEPOINT is returned.
|
||||
* If the resulting index is less than the text iterateable range, the
|
||||
* current index is reset to the start of the range and a value of
|
||||
* DONE_CODEPOINT is returned.
|
||||
* @return previous codepoint in text or DONE_CODEPOINT if the new index is
|
||||
* off the start of the text range.
|
||||
* off the start of the text iteration range.
|
||||
*/
|
||||
public int previousCodePoint()
|
||||
{
|
||||
if (m_index_ > 0) {
|
||||
if (m_index_ > m_start_) {
|
||||
m_index_ --;
|
||||
char ch = m_replaceable_.charAt(m_index_);
|
||||
if (ch >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
|
||||
ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE &&
|
||||
m_index_ > 0) {
|
||||
m_index_ > m_start_) {
|
||||
char lead = m_replaceable_.charAt(m_index_);
|
||||
if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
|
||||
lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
|
||||
|
@ -267,12 +306,11 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
* @exception IllegalArgumentException is thrown if an invalid index is
|
||||
* supplied. i.e. index is out of bounds.
|
||||
* @return the character at the specified index or DONE if the specified
|
||||
* index is equal to the end of the text.
|
||||
* index is equal to the limit of the text iteration range.
|
||||
*/
|
||||
public char setIndex(int index)
|
||||
{
|
||||
int length = m_replaceable_.length();
|
||||
if (index < 0 || index > length) {
|
||||
if (index < m_start_ || index > m_limit_) {
|
||||
throw new IllegalArgumentException("Index index out of bounds");
|
||||
}
|
||||
m_index_ = index;
|
||||
|
@ -290,7 +328,12 @@ public final class UCharacterIterator implements CharacterIterator
|
|||
*/
|
||||
private int m_index_;
|
||||
/**
|
||||
* Replaceable text length
|
||||
* Start offset of iterateable range, by default this is 0
|
||||
*/
|
||||
private int m_length_;
|
||||
private int m_start_;
|
||||
/**
|
||||
* Limit offset of iterateable range, by default this is the length of the
|
||||
* string
|
||||
*/
|
||||
private int m_limit_;
|
||||
}
|
||||
|
|
382
icu4j/src/com/ibm/icu/text/BOSCU.java
Normal file
382
icu4j/src/com/ibm/icu/text/BOSCU.java
Normal file
|
@ -0,0 +1,382 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/BOSCU.java,v $
|
||||
* $Date: 2002/05/14 16:48:48 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import com.ibm.icu.impl.UCharacterIterator;
|
||||
|
||||
/**
|
||||
* <p>Binary Ordered Compression Scheme for Unicode</p>
|
||||
*
|
||||
* <p>Specific application:<br>
|
||||
* Encode a Unicode string for the identical level of a sort key.<br>
|
||||
* Restrictions:
|
||||
* <ul>
|
||||
* <li> byte stream (unsigned 8-bit bytes)
|
||||
* <li> lexical order of the identical-level run must be the same as code
|
||||
* point order for the string
|
||||
* <li> avoid byte values 0, 1, 2
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <p>Method: Slope Detection<br>
|
||||
* Remember the previous code point (initial 0).
|
||||
* For each cp in the string, encode the difference to the previous one.
|
||||
* </p>
|
||||
* <p>With a compact encoding of differences, this yields good results for
|
||||
* small scripts and UTF-like results otherwise.
|
||||
* </p>
|
||||
* <p>Encoding of differences:<br>
|
||||
* <ul>
|
||||
* <li>Similar to a UTF, encoding the length of the byte sequence in the lead
|
||||
* bytes.
|
||||
* <li> Does not need to be friendly for decoding or random access
|
||||
* (trail byte values may overlap with lead/single byte values).
|
||||
* <li> The signedness must be encoded as the most significant part.
|
||||
* </ul>
|
||||
* </p>
|
||||
* <p>We encode differences with few bytes if their absolute values are small.
|
||||
* For correct ordering, we must treat the entire value range -10ffff..+10ffff
|
||||
* in ascending order, which forbids encoding the sign and the absolute value
|
||||
* separately.
|
||||
* Instead, we split the lead byte range in the middle and encode non-negative
|
||||
* values going up and negative values going down.
|
||||
* </p>
|
||||
* <p>For very small absolute values, the difference is added to a middle byte
|
||||
* value for single-byte encoded differences.
|
||||
* For somewhat larger absolute values, the difference is divided by the number
|
||||
* of byte values available, the modulo is used for one trail byte, and the
|
||||
* remainder is added to a lead byte avoiding the single-byte range.
|
||||
* For large absolute values, the difference is similarly encoded in three
|
||||
* bytes.
|
||||
* </p>
|
||||
* <p>This encoding does not use byte values 0, 1, 2, but uses all other byte
|
||||
* values for lead/single bytes so that the middle range of single bytes is as
|
||||
* large as possible.
|
||||
* </p>
|
||||
* <p>Note that the lead byte ranges overlap some, but that the sequences as a
|
||||
* whole are well ordered. I.e., even if the lead byte is the same for
|
||||
* sequences of different lengths, the trail bytes establish correct order.
|
||||
* It would be possible to encode slightly larger ranges for each length (>1)
|
||||
* by subtracting the lower bound of the range. However, that would also slow
|
||||
* down the calculation.
|
||||
* </p>
|
||||
* <p>For the actual string encoding, an optimization moves the previous code
|
||||
* point value to the middle of its Unicode script block to minimize the
|
||||
* differences in same-script text runs.
|
||||
* </p>
|
||||
* @author Syn Wee Quek
|
||||
* @since release 2.2, May 3rd 2002
|
||||
* @draft 2.2
|
||||
*/
|
||||
public class BOSCU
|
||||
{
|
||||
// public constructors --------------------------------------------------
|
||||
|
||||
// public methods -------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Encode the code points of a string as a sequence of byte-encoded
|
||||
* differences (slope detection), preserving lexical order.</p>
|
||||
* <p>Optimize the difference-taking for runs of Unicode text within
|
||||
* small scripts:<br>
|
||||
* Most small scripts are allocated within aligned 128-blocks of Unicode
|
||||
* code points. Lexical order is preserved if "prev" is always moved
|
||||
* into the middle of such a block.</p>
|
||||
* <p>Additionally, "prev" is moved from anywhere in the Unihan area into
|
||||
* the middle of that area.</p>
|
||||
* <p>Note that the identical-level run in a sort key is generated from
|
||||
* NFD text - there are never Hangul characters included.</p>
|
||||
* @param source text source
|
||||
* @param buffer output buffer
|
||||
* @param offset to start writing to
|
||||
* @return end offset where the writing stop
|
||||
*/
|
||||
public static int writeIdenticalLevelRun(String source, byte buffer[],
|
||||
int offset)
|
||||
{
|
||||
int prev = 0;
|
||||
UCharacterIterator iterator = new UCharacterIterator(source);
|
||||
int codepoint = iterator.nextCodePoint();
|
||||
while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
|
||||
if (prev < 0x4e00 || prev >= 0xa000) {
|
||||
prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
|
||||
}
|
||||
else {
|
||||
// Unihan U+4e00..U+9fa5:
|
||||
// double-bytes down from the upper end
|
||||
prev = 0x9fff - SLOPE_REACH_POS_2_;
|
||||
}
|
||||
|
||||
offset = writeDiff(codepoint - prev, buffer, offset);
|
||||
prev = codepoint;
|
||||
codepoint = iterator.nextCodePoint();
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* How many bytes would writeIdenticalLevelRun() write?
|
||||
* @param source text source string
|
||||
* @return the length of the BOSCU result
|
||||
*/
|
||||
public static int lengthOfIdenticalLevelRun(String source)
|
||||
{
|
||||
int prev = 0;
|
||||
int result = 0;
|
||||
UCharacterIterator iterator = new UCharacterIterator(source);
|
||||
int codepoint = iterator.nextCodePoint();
|
||||
while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
|
||||
if (prev < 0x4e00 || prev >= 0xa000) {
|
||||
prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
|
||||
}
|
||||
else {
|
||||
// Unihan U+4e00..U+9fa5:
|
||||
// double-bytes down from the upper end
|
||||
prev = 0x9fff - SLOPE_REACH_POS_2_;
|
||||
}
|
||||
|
||||
codepoint = iterator.nextCodePoint();
|
||||
result += lengthOfDiff(codepoint - prev);
|
||||
prev = codepoint;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// public setter methods -------------------------------------------------
|
||||
|
||||
// public getter methods ------------------------------------------------
|
||||
|
||||
// public other methods -------------------------------------------------
|
||||
|
||||
// protected constructor ------------------------------------------------
|
||||
|
||||
// protected data members ------------------------------------------------
|
||||
|
||||
// protected methods -----------------------------------------------------
|
||||
|
||||
// private data members --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Do not use byte values 0, 1, 2 because they are separators in sort keys.
|
||||
*/
|
||||
private static final int SLOPE_MIN_ = 3;
|
||||
private static final int SLOPE_MAX_ = 0xff;
|
||||
private static final int SLOPE_MIDDLE_ = 0x81;
|
||||
private static final int SLOPE_TAIL_COUNT_ = SLOPE_MAX_ - SLOPE_MIN_ + 1;
|
||||
private static final int SLOPE_MAX_BYTES_ = 4;
|
||||
|
||||
/**
|
||||
* Number of lead bytes:
|
||||
* 1 middle byte for 0
|
||||
* 2*80=160 single bytes for !=0
|
||||
* 2*42=84 for double-byte values
|
||||
* 2*3=6 for 3-byte values
|
||||
* 2*1=2 for 4-byte values
|
||||
*
|
||||
* The sum must be <=SLOPE_TAIL_COUNT.
|
||||
*
|
||||
* Why these numbers?
|
||||
* - There should be >=128 single-byte values to cover 128-blocks
|
||||
* with small scripts.
|
||||
* - There should be >=20902 single/double-byte values to cover Unihan.
|
||||
* - It helps CJK Extension B some if there are 3-byte values that cover
|
||||
* the distance between them and Unihan.
|
||||
* This also helps to jump among distant places in the BMP.
|
||||
* - Four-byte values are necessary to cover the rest of Unicode.
|
||||
*
|
||||
* Symmetrical lead byte counts are for convenience.
|
||||
* With an equal distribution of even and odd differences there is also
|
||||
* no advantage to asymmetrical lead byte counts.
|
||||
*/
|
||||
private static final int SLOPE_SINGLE_ = 80;
|
||||
private static final int SLOPE_LEAD_2_ = 42;
|
||||
private static final int SLOPE_LEAD_3_ = 3;
|
||||
private static final int SLOPE_LEAD_4_ = 1;
|
||||
|
||||
/**
|
||||
* The difference value range for single-byters.
|
||||
*/
|
||||
private static final int SLOPE_REACH_POS_1_ = SLOPE_SINGLE_;
|
||||
private static final int SLOPE_REACH_NEG_1_ = (-SLOPE_SINGLE_);
|
||||
|
||||
/**
|
||||
* The difference value range for double-byters.
|
||||
*/
|
||||
private static final int SLOPE_REACH_POS_2_ =
|
||||
SLOPE_LEAD_2_ * SLOPE_TAIL_COUNT_ + SLOPE_LEAD_2_ - 1;
|
||||
private static final int SLOPE_REACH_NEG_2_ = (-SLOPE_REACH_POS_2_ - 1);
|
||||
|
||||
/**
|
||||
* The difference value range for 3-byters.
|
||||
*/
|
||||
private static final int SLOPE_REACH_POS_3_ = SLOPE_LEAD_3_
|
||||
* SLOPE_TAIL_COUNT_
|
||||
* SLOPE_TAIL_COUNT_
|
||||
+ (SLOPE_LEAD_3_ - 1)
|
||||
* SLOPE_TAIL_COUNT_ +
|
||||
(SLOPE_TAIL_COUNT_ - 1);
|
||||
private static final int SLOPE_REACH_NEG_3_ = (-SLOPE_REACH_POS_3_ - 1);
|
||||
|
||||
/**
|
||||
* The lead byte start values.
|
||||
*/
|
||||
private static final int SLOPE_START_POS_2_ = SLOPE_MIDDLE_
|
||||
+ SLOPE_SINGLE_ + 1;
|
||||
private static final int SLOPE_START_POS_3_ = SLOPE_START_POS_2_
|
||||
+ SLOPE_LEAD_2_;
|
||||
private static final int SLOPE_START_NEG_2_ = SLOPE_MIDDLE_ +
|
||||
SLOPE_REACH_NEG_1_;
|
||||
private static final int SLOPE_START_NEG_3_ = SLOPE_START_NEG_2_
|
||||
- SLOPE_LEAD_2_;
|
||||
|
||||
// private constructor ---------------------------------------------------
|
||||
|
||||
/**
|
||||
* Constructor private to prevent initialization
|
||||
*/
|
||||
private BOSCU()
|
||||
{
|
||||
}
|
||||
|
||||
// private methods -------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Integer division and modulo with negative numerators
|
||||
* yields negative modulo results and quotients that are one more than
|
||||
* what we need here.
|
||||
* @param number which operations are to be performed on
|
||||
* @param factor the factor to use for division
|
||||
* @return (result of division) << 32 | modulo
|
||||
*/
|
||||
private static final long getNegDivMod(int number, int factor)
|
||||
{
|
||||
int modulo = number % factor;
|
||||
long result = number / factor;
|
||||
if (modulo < 0) {
|
||||
-- result;
|
||||
modulo += factor;
|
||||
}
|
||||
return (result << 32) | modulo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
|
||||
* preserving lexical order
|
||||
* @param diff
|
||||
* @param buffer byte buffer to append to
|
||||
* @param offset to the byte buffer to start appending
|
||||
* @return end offset where the appending stops
|
||||
*/
|
||||
private static final int writeDiff(int diff, byte buffer[], int offset)
|
||||
{
|
||||
if (diff >= SLOPE_REACH_NEG_1_) {
|
||||
if (diff <= SLOPE_REACH_POS_1_) {
|
||||
buffer[offset ++] = (byte)(SLOPE_MIDDLE_ + diff);
|
||||
}
|
||||
else if (diff <= SLOPE_REACH_POS_2_) {
|
||||
buffer[offset ++] = (byte)(SLOPE_START_POS_2_
|
||||
+ (diff / SLOPE_TAIL_COUNT_));
|
||||
buffer[offset ++] = (byte)(SLOPE_MIN_ +
|
||||
(diff % SLOPE_TAIL_COUNT_));
|
||||
}
|
||||
else if (diff <= SLOPE_REACH_POS_3_) {
|
||||
buffer[offset + 2] = (byte)(SLOPE_MIN_
|
||||
+ (diff % SLOPE_TAIL_COUNT_));
|
||||
diff /= SLOPE_TAIL_COUNT_;
|
||||
buffer[offset + 1] = (byte)(SLOPE_MIN_
|
||||
+ (diff % SLOPE_TAIL_COUNT_));
|
||||
buffer[offset] = (byte)(SLOPE_START_POS_3_
|
||||
+ (diff / SLOPE_TAIL_COUNT_));
|
||||
offset += 3;
|
||||
}
|
||||
else {
|
||||
buffer[offset + 3] = (byte)(SLOPE_MIN_
|
||||
+ diff % SLOPE_TAIL_COUNT_);
|
||||
diff /= SLOPE_TAIL_COUNT_;
|
||||
buffer[offset] = (byte)(SLOPE_MIN_
|
||||
+ diff % SLOPE_TAIL_COUNT_);
|
||||
diff /= SLOPE_TAIL_COUNT_;
|
||||
buffer[offset + 1] = (byte)(SLOPE_MIN_
|
||||
+ diff % SLOPE_TAIL_COUNT_);
|
||||
buffer[offset] = (byte)SLOPE_MAX_;
|
||||
offset += 4;
|
||||
}
|
||||
}
|
||||
else {
|
||||
long division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
|
||||
int modulo = (int)division;
|
||||
if (diff >= SLOPE_REACH_NEG_2_) {
|
||||
diff = (int)(division >> 32);
|
||||
buffer[offset ++] = (byte)(SLOPE_START_NEG_2_ + diff);
|
||||
buffer[offset ++] = (byte)(SLOPE_MIN_ + modulo);
|
||||
}
|
||||
else if (diff >= SLOPE_REACH_NEG_3_) {
|
||||
buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
|
||||
diff = (int)(division >> 32);
|
||||
division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
|
||||
modulo = (int)division;
|
||||
diff = (int)(division >> 32);
|
||||
buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
|
||||
buffer[offset] = (byte)(SLOPE_START_NEG_3_ + diff);
|
||||
offset += 3;
|
||||
}
|
||||
else {
|
||||
buffer[offset + 3] = (byte)(SLOPE_MIN_ + modulo);
|
||||
diff = (int)(division >> 32);
|
||||
division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
|
||||
modulo = (int)division;
|
||||
diff = (int)(division >> 32);
|
||||
buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
|
||||
division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
|
||||
modulo = (int)division;
|
||||
buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
|
||||
buffer[offset] = SLOPE_MIN_;
|
||||
offset += 4;
|
||||
}
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* How many bytes would writeDiff() write?
|
||||
* @param diff
|
||||
*/
|
||||
private static final int lengthOfDiff(int diff)
|
||||
{
|
||||
if (diff >= SLOPE_REACH_NEG_1_) {
|
||||
if (diff <= SLOPE_REACH_POS_1_) {
|
||||
return 1;
|
||||
}
|
||||
else if (diff <= SLOPE_REACH_POS_2_) {
|
||||
return 2;
|
||||
}
|
||||
else if(diff <= SLOPE_REACH_POS_3_) {
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (diff >= SLOPE_REACH_NEG_2_) {
|
||||
return 2;
|
||||
}
|
||||
else if (diff >= SLOPE_REACH_NEG_3_) {
|
||||
return 3;
|
||||
}
|
||||
else {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
2116
icu4j/src/com/ibm/icu/text/CollationElementIterator.java
Executable file
2116
icu4j/src/com/ibm/icu/text/CollationElementIterator.java
Executable file
File diff suppressed because it is too large
Load diff
260
icu4j/src/com/ibm/icu/text/CollationKey.java
Executable file
260
icu4j/src/com/ibm/icu/text/CollationKey.java
Executable file
|
@ -0,0 +1,260 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $
|
||||
* $Date: 2002/05/14 16:48:49 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* <p>A <code>CollationKey</code> represents a <code>String</code> under the
|
||||
* rules of a specific <code>Collator</code> object. Comparing two
|
||||
* <code>CollationKey</code>s returns the relative order of the
|
||||
* <code>String</code>s they represent. Using <code>CollationKey</code>s to
|
||||
* compare <code>String</code>s is generally faster than using
|
||||
* <code>Collator.compare</code>. Thus, when the <code>String</code>s must be
|
||||
* compared multiple times, for example when sorting a list of
|
||||
* <code>String</code>s. It's more efficient to use <code>CollationKey</code>s.
|
||||
* </p>
|
||||
* <p>You can not create <code>CollationKey</code>s directly. Rather, generate
|
||||
* them by calling <code>Collator.getCollationKey(String)</code>. You can only
|
||||
* compare <code>CollationKey</code>s generated from the same
|
||||
* <code>Collator</code> object.</p>
|
||||
* <p>Generating a <code>CollationKey</code> for a <code>String</code>
|
||||
* involves examining the entire <code>String</code> and converting it to
|
||||
* series of bits that can be compared bitwise. This allows fast comparisons
|
||||
* once the keys are generated. The cost of generating keys is recouped in
|
||||
* faster comparisons when <code>String</code>s need to be compared many
|
||||
* times. On the other hand, the result of a comparison is often determined by
|
||||
* the first couple of characters of each <code>String</code>.
|
||||
* <code>Collator.compare(String, String)</code> examines only as many characters as it needs
|
||||
* which allows it to be faster when doing single comparisons.</p>
|
||||
* <p>The following example shows how <code>CollationKey</code>s might be used
|
||||
* to sort a list of <code>String</code>s.</p>
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* // Create an array of CollationKeys for the Strings to be sorted.
|
||||
* Collator myCollator = Collator.getInstance();
|
||||
* CollationKey[] keys = new CollationKey[3];
|
||||
* keys[0] = myCollator.getCollationKey("Tom");
|
||||
* keys[1] = myCollator.getCollationKey("Dick");
|
||||
* keys[2] = myCollator.getCollationKey("Harry");
|
||||
* sort( keys );
|
||||
* <br>
|
||||
* //...
|
||||
* <br>
|
||||
* // Inside body of sort routine, compare keys this way
|
||||
* if( keys[i].compareTo( keys[j] ) > 0 )
|
||||
* // swap keys[i] and keys[j]
|
||||
* <br>
|
||||
* //...
|
||||
* <br>
|
||||
* // Finally, when we've returned from sort.
|
||||
* System.out.println( keys[0].getSourceString() );
|
||||
* System.out.println( keys[1].getSourceString() );
|
||||
* System.out.println( keys[2].getSourceString() );
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* @see Collator
|
||||
* @see RuleBasedCollator
|
||||
* @author Syn Wee Quek
|
||||
* @since release 2.2, April 18 2002
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final class CollationKey implements Comparable
|
||||
{
|
||||
// public methods -------------------------------------------------------
|
||||
|
||||
// public getters -------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the String that this CollationKey represents.
|
||||
* @return source string that this CollationKey represents
|
||||
* @draft 2.2
|
||||
*/
|
||||
public String getSourceString()
|
||||
{
|
||||
return m_source_;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Duplicates and returns the value of this CollationKey as a sequence
|
||||
* of big-endian bytes.</p>
|
||||
* <p>If two CollationKeys could be legitimately compared, then one could
|
||||
* compare the byte arrays of each to obtain the same result.</p>
|
||||
* @return CollationKey value in a sequence of big-endian byte bytes.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public byte[] toByteArray()
|
||||
{
|
||||
int length = 0;
|
||||
while (true) {
|
||||
if (m_key_[length] == 0) {
|
||||
break;
|
||||
}
|
||||
length ++;
|
||||
}
|
||||
length ++;
|
||||
byte result[] = new byte[length];
|
||||
System.arraycopy(m_key_, 0, result, 0, length);
|
||||
return result;
|
||||
}
|
||||
|
||||
// public other methods -------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Compare this CollationKey to the target CollationKey. The collation
|
||||
* rules of the Collator object which created these keys are applied.</p>
|
||||
* <p><strong>Note:</strong> CollationKeys created by different Collators
|
||||
* can not be compared.</p>
|
||||
* @param target target CollationKey
|
||||
* @return an integer value, if value is less than zero this CollationKey
|
||||
* is less than than target, if value is zero if they are equal
|
||||
* and value is greater than zero if this CollationKey is greater
|
||||
* than target.
|
||||
* @see Collator#compare(String, String)
|
||||
* @draft 2.2
|
||||
*/
|
||||
public int compareTo(CollationKey target)
|
||||
{
|
||||
int i = 0;
|
||||
while (m_key_[i] != 0 && target.m_key_[i] != 0) {
|
||||
int key = m_key_[i] & 0xFF;
|
||||
int targetkey = target.m_key_[i] & 0xFF;
|
||||
if (key < targetkey) {
|
||||
return -1;
|
||||
}
|
||||
if (targetkey < key) {
|
||||
return 1;
|
||||
}
|
||||
i ++;
|
||||
}
|
||||
// last comparison if we encounter a 0
|
||||
int key = m_key_[i] & 0xFF;
|
||||
int targetkey = target.m_key_[i] & 0xFF;
|
||||
if (key < targetkey) {
|
||||
return -1;
|
||||
}
|
||||
if (targetkey < key) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Compares this CollationKey with the specified Object.</p>
|
||||
* @param obj the Object to be compared.
|
||||
* @return Returns a negative integer, zero, or a positive integer
|
||||
* respectively if this CollationKey is less than, equal to, or
|
||||
* greater than the given Object.
|
||||
* @exception ClassCastException thrown when the specified Object is not a
|
||||
* CollationKey.
|
||||
* @see #compareTo(CollationKey)
|
||||
* @draft 2.2
|
||||
*/
|
||||
public int compareTo(Object obj)
|
||||
{
|
||||
return compareTo((CollationKey)obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Compare this CollationKey and the target CollationKey for equality.
|
||||
* </p>
|
||||
* <p>The collation rules of the Collator object which created these keys
|
||||
* are applied.</p>
|
||||
* <p><strong>Note:</strong> CollationKeys created by different Collators
|
||||
* can not be compared.</p>
|
||||
* @param target the CollationKey to compare to.
|
||||
* @return true if two objects are equal, false otherwise.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public boolean equals(Object target)
|
||||
{
|
||||
if (this == target) {
|
||||
return true;
|
||||
}
|
||||
if (target == null || !(target instanceof CollationKey)) {
|
||||
return false;
|
||||
}
|
||||
CollationKey other = (CollationKey)target;
|
||||
int i = 0;
|
||||
while (true) {
|
||||
if (m_key_[i] != other.m_key_[i]) {
|
||||
return false;
|
||||
}
|
||||
if (m_key_[i] == 0) {
|
||||
break;
|
||||
}
|
||||
i ++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Creates a hash code for this CollationKey. The hash value is
|
||||
* calculated on the key itself, not the String from which the key was
|
||||
* created. Thus if x and y are CollationKeys, then
|
||||
* x.hashCode(x) == y.hashCode() if x.equals(y) is true. This allows
|
||||
* language-sensitive comparison in a hash table.</p>
|
||||
* <p>See the CollatinKey class description for an example.</p>
|
||||
* @return the hash value.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public int hashCode()
|
||||
{
|
||||
if (m_hashCode_ == 0) {
|
||||
int size = m_key_.length >> 1;
|
||||
StringBuffer key = new StringBuffer(size);
|
||||
int i = 0;
|
||||
while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
|
||||
key.append((m_key_[i] << 8) | m_key_[i + 1]);
|
||||
i += 2;
|
||||
}
|
||||
if (m_key_[i] != 0) {
|
||||
key.append(m_key_[i] << 8);
|
||||
}
|
||||
m_hashCode_ = key.hashCode();
|
||||
}
|
||||
return m_hashCode_;
|
||||
}
|
||||
|
||||
// protected constructor ------------------------------------------------
|
||||
|
||||
/**
|
||||
* Protected CollationKey can only be generated by Collator objects
|
||||
* @param source string the CollationKey represents
|
||||
* @param key sort key array of bytes
|
||||
* @param size of sort key
|
||||
* @draft 2v2
|
||||
*/
|
||||
CollationKey(String source, byte key[])
|
||||
{
|
||||
m_source_ = source;
|
||||
m_key_ = key;
|
||||
m_hashCode_ = 0;
|
||||
}
|
||||
|
||||
// private data members -------------------------------------------------
|
||||
|
||||
/**
|
||||
* Source string this CollationKey represents
|
||||
*/
|
||||
private String m_source_;
|
||||
/**
|
||||
* Sequence of bytes that represents the sort key
|
||||
*/
|
||||
private byte m_key_[];
|
||||
/**
|
||||
* Hash code for the key
|
||||
*/
|
||||
private int m_hashCode_;
|
||||
}
|
454
icu4j/src/com/ibm/icu/text/Collator.java
Executable file
454
icu4j/src/com/ibm/icu/text/Collator.java
Executable file
|
@ -0,0 +1,454 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $
|
||||
* $Date: 2002/05/14 16:48:49 $
|
||||
* $Revision: 1.4 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* <p>The Collator class performs locale-sensitive String comparison.
|
||||
* You use this class to build searching and sorting routines for natural
|
||||
* language text.</p>
|
||||
* <p>Collator is an abstract base class. Subclasses implement specific
|
||||
* collation strategies. One subclass, RuleBasedCollator, is currently
|
||||
* provided and is applicable to a wide set of languages. Other subclasses
|
||||
* may be created to handle more specialized needs.</p>
|
||||
* <p>Like other locale-sensitive classes, you can use the static factory
|
||||
* method, getInstance, to obtain the appropriate Collator object for a given
|
||||
* locale. You will only need to look at the subclasses of Collator if you need
|
||||
* to understand the details of a particular collation strategy or if you need
|
||||
* to modify that strategy. </p>
|
||||
* <p>The following example shows how to compare two strings using the Collator
|
||||
* for the default locale.
|
||||
* <pre>
|
||||
* // Compare two strings in the default locale
|
||||
* Collator myCollator = Collator.getInstance();
|
||||
* if (myCollator.compare("abc", "ABC") < 0) {
|
||||
* System.out.println("abc is less than ABC");
|
||||
* }
|
||||
* else {
|
||||
* System.out.println("abc is greater than or equal to ABC");
|
||||
* }
|
||||
* </pre>
|
||||
* <p>You can set a <code>Collator</code>'s <em>strength</em> property to
|
||||
* determine the level of difference considered significant in comparisons.
|
||||
* Four strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
|
||||
* <code>TERTIARY</code>, and <code>IDENTICAL</code>. The exact assignment of
|
||||
* strengths to language features is locale dependant. For example, in Czech,
|
||||
* "e" and "f" are considered primary differences, while "e" and "\u00EA" are
|
||||
* secondary differences, "e" and "E" are tertiary differences and "e" and "e"
|
||||
* are identical. The following shows how both case and accents could be
|
||||
* ignored for US English.</p>
|
||||
* <pre>
|
||||
* //Get the Collator for US English and set its strength to PRIMARY
|
||||
* Collator usCollator = Collator.getInstance(Locale.US);
|
||||
* usCollator.setStrength(Collator.PRIMARY);
|
||||
* if (usCollator.compare("abc", "ABC") == 0) {
|
||||
* System.out.println("Strings are equivalent");
|
||||
* }
|
||||
* </pre>
|
||||
* <p>For comparing Strings exactly once, the compare method provides the best
|
||||
* performance. When sorting a list of Strings however, it is generally
|
||||
* necessary to compare each String multiple times. In this case,
|
||||
* CollationKeys provide better performance. The CollationKey class converts a
|
||||
* String to a series of bits that can be compared bitwise against other
|
||||
* CollationKeys. A CollationKey is created by a Collator object for a given
|
||||
* String.</p>
|
||||
* <p>Note: CollationKeys from different Collators can not be compared. See the
|
||||
* class description for CollationKey for an example using CollationKeys.
|
||||
* </p>
|
||||
* @author Syn Wee Quek
|
||||
* @since release 2.2, April 18 2002
|
||||
* @draft 2.2
|
||||
*/
|
||||
|
||||
public abstract class Collator
|
||||
{
|
||||
// public data members ---------------------------------------------------
|
||||
|
||||
/**
|
||||
* Collator strength value. When set, only PRIMARY differences are
|
||||
* considered significant during comparison. The assignment of strengths
|
||||
* to language features is locale dependant. A common example is for
|
||||
* different base letters ("a" vs "b") to be considered a PRIMARY
|
||||
* difference.
|
||||
* @see #setStrength
|
||||
* @see #getStrength
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int PRIMARY
|
||||
= RuleBasedCollator.AttributeValue.PRIMARY_;
|
||||
/**
|
||||
* Collator strength value. When set, only SECONDARY and above
|
||||
* differences are considered significant during comparison. The
|
||||
* assignment of strengths to language features is locale dependant. A
|
||||
* common example is for different accented forms of the same base letter
|
||||
* ("a" vs "\u00E4") to be considered a SECONDARY difference.
|
||||
* @see #setStrength
|
||||
* @see #getStrength
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int SECONDARY
|
||||
= RuleBasedCollator.AttributeValue.SECONDARY_;
|
||||
/**
|
||||
* Collator strength value. When set, only TERTIARY and above differences
|
||||
* are considered significant during comparison. The assignment of
|
||||
* strengths to language features is locale dependant. A common example is
|
||||
* for case differences ("a" vs "A") to be considered a TERTIARY
|
||||
* difference.
|
||||
* @see #setStrength
|
||||
* @see #getStrength
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int TERTIARY
|
||||
= RuleBasedCollator.AttributeValue.TERTIARY_;
|
||||
|
||||
/**
|
||||
* Collator strength value. When set, only QUARTENARY and above differences
|
||||
* are considered significant during comparison. The assignment of
|
||||
* strengths to language features is locale dependant.
|
||||
* difference.
|
||||
* @see #setStrength
|
||||
* @see #getStrength
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int QUATERNARY
|
||||
= RuleBasedCollator.AttributeValue.QUATERNARY_;
|
||||
|
||||
/**
|
||||
* <p>Collator strength value. When set, all differences are considered
|
||||
* significant during comparison. The assignment of strengths to language
|
||||
* features is locale dependant. A common example is for control
|
||||
* characters ("\u0001" vs "\u0002") to be considered equal at
|
||||
* the PRIMARY, SECONDARY, and TERTIARY levels but different at the
|
||||
* IDENTICAL level. Additionally, differences between pre-composed
|
||||
* accents such as "\u00C0" (A-grave) and combining accents such as
|
||||
* "A\u0300" (A, combining-grave) will be considered significant at
|
||||
* the tertiary level if decomposition is set to NO_DECOMPOSITION.
|
||||
* </p>
|
||||
* <p>Note this value is different from JDK's</p>
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int IDENTICAL
|
||||
= RuleBasedCollator.AttributeValue.IDENTICAL_;
|
||||
|
||||
/**
|
||||
* <p>Decomposition mode value. With NO_DECOMPOSITION set, accented
|
||||
* characters will not be decomposed for collation. This is the default
|
||||
* setting and provides the fastest collation but will only produce
|
||||
* correct results for languages that do not use accents.</p>
|
||||
* <p>Note this value is different from JDK's</p>
|
||||
* @see #getDecomposition
|
||||
* @see #setDecomposition
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int NO_DECOMPOSITION
|
||||
= RuleBasedCollator.AttributeValue.OFF_;
|
||||
|
||||
/**
|
||||
* <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set,
|
||||
* characters that are canonical variants according to Unicode 2.0 will be
|
||||
* decomposed for collation. This should be used to get correct collation
|
||||
* of accented characters.</p>
|
||||
* <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
|
||||
* described in <a href="http://www.unicode.org/unicode/reports/tr15/">
|
||||
* Unicode Technical Report #15</a>.</p>
|
||||
* @see #getDecomposition
|
||||
* @see #setDecomposition
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int CANONICAL_DECOMPOSITION = 1;
|
||||
|
||||
/**
|
||||
* <p>Decomposition mode value. With FULL_DECOMPOSITION set, both Unicode
|
||||
* canonical variants and Unicode compatibility variants will be
|
||||
* decomposed for collation. This causes not only accented characters to
|
||||
* be collated, but also characters that have special formats to be
|
||||
* collated with their norminal form. For example, the half-width and
|
||||
* full-width ASCII and Katakana characters are then collated together.
|
||||
* FULL_DECOMPOSITION is the most complete and therefore the slowest
|
||||
* decomposition mode.</p>
|
||||
* <p>
|
||||
* FULL_DECOMPOSITION corresponds to Normalization Form KD as described in
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode
|
||||
* Technical Report #15</a>.</p>
|
||||
* @see #getDecomposition
|
||||
* @see #setDecomposition
|
||||
* @draft 2.2
|
||||
*/
|
||||
public final static int FULL_DECOMPOSITION = 2;
|
||||
|
||||
// public methods --------------------------------------------------------
|
||||
|
||||
// public setters --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Sets this Collator's strength property. The strength property
|
||||
* determines the minimum level of difference considered significant
|
||||
* during comparison.</p>
|
||||
* <p>See the Collator class description for an example of use.</p>
|
||||
* @param the new strength value.
|
||||
* @see #getStrength
|
||||
* @see #PRIMARY
|
||||
* @see #SECONDARY
|
||||
* @see #TERTIARY
|
||||
* @see #IDENTICAL
|
||||
* @exception IllegalArgumentException If the new strength value is not one of
|
||||
* PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public synchronized void setStrength(int newStrength) {
|
||||
if ((newStrength != PRIMARY) &&
|
||||
(newStrength != SECONDARY) &&
|
||||
(newStrength != TERTIARY) &&
|
||||
(newStrength != QUATERNARY) &&
|
||||
(newStrength != IDENTICAL)) {
|
||||
throw new IllegalArgumentException("Incorrect comparison level.");
|
||||
}
|
||||
m_strength_ = newStrength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the decomposition mode of this Collator. See getDecomposition
|
||||
* for a description of decomposition mode.
|
||||
* @param decomposition the new decomposition mode
|
||||
* @see #getDecomposition
|
||||
* @see #NO_DECOMPOSITION
|
||||
* @see #CANONICAL_DECOMPOSITION
|
||||
* @see #FULL_DECOMPOSITION
|
||||
* @exception IllegalArgumentException If the given value is not a valid decomposition
|
||||
* mode.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public synchronized void setDecomposition(int decomposition) {
|
||||
if ((decomposition != NO_DECOMPOSITION) &&
|
||||
(decomposition != CANONICAL_DECOMPOSITION) &&
|
||||
(decomposition != FULL_DECOMPOSITION)) {
|
||||
throw new IllegalArgumentException("Wrong decomposition mode.");
|
||||
}
|
||||
if (decomposition != NO_DECOMPOSITION) {
|
||||
m_decomposition_ = decomposition;
|
||||
}
|
||||
else {
|
||||
m_decomposition_ = CANONICAL_DECOMPOSITION;
|
||||
}
|
||||
}
|
||||
|
||||
// public getters --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Gets the Collator for the current default locale.
|
||||
* The default locale is determined by java.util.Locale.getDefault().
|
||||
* @return the Collator for the default locale (for example, en_US) if it
|
||||
* is created successfully, otherwise if there is a failure,
|
||||
* null will be returned.
|
||||
* @see java.util.Locale#getDefault
|
||||
* @draft 2.2
|
||||
*/
|
||||
public static final Collator getInstance()
|
||||
{
|
||||
return getInstance(Locale.getDefault());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Collator for the desired locale.
|
||||
* @param locale the desired locale.
|
||||
* @return Collator for the desired locale if it is created successfully,
|
||||
* otherwise if there is a failure, the default UCA collator will
|
||||
* be returned.
|
||||
* @see java.util.Locale
|
||||
* @see java.util.ResourceBundle
|
||||
* @draft 2.2
|
||||
*/
|
||||
public static final Collator getInstance(Locale locale)
|
||||
{
|
||||
try {
|
||||
return new RuleBasedCollator(locale);
|
||||
}
|
||||
catch(Exception e) {
|
||||
return RuleBasedCollator.UCA_;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Returns this Collator's strength property. The strength property
|
||||
* determines the minimum level of difference considered significant
|
||||
* during comparison.</p>
|
||||
* <p>See the Collator class description for an example of use.</p>
|
||||
* @return this Collator's current strength property.
|
||||
* @see #setStrength
|
||||
* @see #PRIMARY
|
||||
* @see #SECONDARY
|
||||
* @see #TERTIARY
|
||||
* @see #IDENTICAL
|
||||
* @draft 2.2
|
||||
*/
|
||||
public int getStrength()
|
||||
{
|
||||
return m_strength_;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Get the decomposition mode of this Collator. Decomposition mode
|
||||
* determines how Unicode composed characters are handled. Adjusting
|
||||
* decomposition mode allows the user to select between faster and more
|
||||
* complete collation behavior.
|
||||
* <p>The three values for decomposition mode are:
|
||||
* <UL>
|
||||
* <LI>NO_DECOMPOSITION,
|
||||
* <LI>CANONICAL_DECOMPOSITION
|
||||
* <LI>FULL_DECOMPOSITION.
|
||||
* </UL>
|
||||
* See the documentation for these three constants for a description
|
||||
* of their meaning.
|
||||
* </p>
|
||||
* @return the decomposition mode
|
||||
* @see #setDecomposition
|
||||
* @see #NO_DECOMPOSITION
|
||||
* @see #CANONICAL_DECOMPOSITION
|
||||
* @see #FULL_DECOMPOSITION
|
||||
* @draft 2.2
|
||||
*/
|
||||
public int getDecomposition()
|
||||
{
|
||||
return m_decomposition_;
|
||||
}
|
||||
|
||||
// public other methods -------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convenience method for comparing the equality of two strings based on
|
||||
* this Collator's collation rules.
|
||||
* @param source the source string to be compared with.
|
||||
* @param target the target string to be compared with.
|
||||
* @return true if the strings are equal according to the collation
|
||||
* rules. false, otherwise.
|
||||
* @see #compare
|
||||
* @draft 2.2
|
||||
*/
|
||||
public boolean equals(String source, String target)
|
||||
{
|
||||
return (compare(source, target) == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cloning this Collator.
|
||||
* @return a cloned Collator of this object
|
||||
* @draft 2.2
|
||||
*/
|
||||
public Object clone()
|
||||
{
|
||||
try {
|
||||
return (Collator)super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new InternalError();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the equality of two Collators.
|
||||
* @param that the Collator to be compared with this.
|
||||
* @return true if this Collator is the same as that Collator;
|
||||
* false otherwise.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public boolean equals(Object that)
|
||||
{
|
||||
if (this == that) {
|
||||
return true;
|
||||
}
|
||||
if (that == null || getClass() != that.getClass()) {
|
||||
return false;
|
||||
}
|
||||
Collator other = (Collator) that;
|
||||
return ((m_strength_ == other.m_strength_) &&
|
||||
(m_decomposition_ == other.m_decomposition_));
|
||||
}
|
||||
|
||||
// public abstract methods -----------------------------------------------
|
||||
|
||||
/**
|
||||
* Generates the hash code for this Collator.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public abstract int hashCode();
|
||||
|
||||
/**
|
||||
* <p>Compares the source string to the target string according to the
|
||||
* collation rules for this Collator. Returns an integer less than, equal
|
||||
* to or greater than zero depending on whether the source String is less
|
||||
* than, equal to or greater than the target string. See the Collator
|
||||
* class description for an example of use.</p>
|
||||
* <p>For a one time comparison, this method has the best performance. If
|
||||
* a given String will be involved in multiple comparisons,
|
||||
* CollationKey.compareTo() has the best performance. See the Collator
|
||||
* class description for an example using CollationKeys.</p>
|
||||
* @param source the source string.
|
||||
* @param target the target string.
|
||||
* @return Returns an integer value. Value is less than zero if source is
|
||||
* less than target, value is zero if source and target are equal,
|
||||
* value is greater than zero if source is greater than target.
|
||||
* @see CollationKey
|
||||
* @see #getCollationKey
|
||||
* @draft 2.2
|
||||
*/
|
||||
public abstract int compare(String source, String target);
|
||||
|
||||
/**
|
||||
* <p>Transforms the String into a series of bits that can be compared
|
||||
* bitwise to other CollationKeys. CollationKeys provide better
|
||||
* performance than Collator.compare() when Strings are involved in
|
||||
* multiple comparisons.</p>
|
||||
* <p>See the Collator class description for an example using
|
||||
* CollationKeys.</p>
|
||||
* @param source the string to be transformed into a collation key.
|
||||
* @return the CollationKey for the given String based on this Collator's
|
||||
* collation rules. If the source String is null, a null
|
||||
* CollationKey is returned.
|
||||
* @see CollationKey
|
||||
* @see #compare(String, String)
|
||||
* @draft 2.2
|
||||
*/
|
||||
public abstract CollationKey getCollationKey(String source);
|
||||
|
||||
// protected data members ------------------------------------------------
|
||||
|
||||
/**
|
||||
* Collation strength
|
||||
*/
|
||||
protected int m_strength_;
|
||||
/**
|
||||
* Decomposition mode
|
||||
*/
|
||||
protected int m_decomposition_;
|
||||
|
||||
// protected constructor -------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Protected constructor for use by subclasses.
|
||||
* Public access to creating Collators is handled by the API getInstance().
|
||||
* </p>
|
||||
* @draft 2.2
|
||||
*/
|
||||
protected Collator() throws Exception
|
||||
{
|
||||
m_strength_ = TERTIARY;
|
||||
m_decomposition_ = CANONICAL_DECOMPOSITION;
|
||||
}
|
||||
|
||||
// protected methods -----------------------------------------------------
|
||||
|
||||
// private variables -----------------------------------------------------
|
||||
|
||||
// private methods -------------------------------------------------------
|
||||
}
|
||||
|
284
icu4j/src/com/ibm/icu/text/CollatorReader.java
Normal file
284
icu4j/src/com/ibm/icu/text/CollatorReader.java
Normal file
|
@ -0,0 +1,284 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2002, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $
|
||||
* $Date: 2002/05/14 16:48:49 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.IOException;
|
||||
import com.ibm.icu.impl.ICUBinary;
|
||||
import com.ibm.icu.impl.IntTrie;
|
||||
|
||||
/**
|
||||
* <p>Internal reader class for ICU data file uca.dat containing
|
||||
* Unicode Collation Algorithm data.</p>
|
||||
* <p>This class simply reads uca.dat, authenticates that it is a valid
|
||||
* ICU data file and split its contents up into blocks of data for use in
|
||||
* <a href=Collator.html>com.ibm.icu.text.Collator</a>.
|
||||
* </p>
|
||||
* <p>uca.dat which is in big-endian format is jared together with this
|
||||
* package.</p>
|
||||
* @author Syn Wee Quek
|
||||
* @since release 2.2, April 18 2002
|
||||
* @draft 2.2
|
||||
*/
|
||||
|
||||
final class CollatorReader
|
||||
{
|
||||
// protected constructor ---------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>Protected constructor.</p>
|
||||
* @param inputStream ICU uprop.dat file input stream
|
||||
* @exception IOException throw if data file fails authentication
|
||||
* @draft 2.1
|
||||
*/
|
||||
protected CollatorReader(InputStream inputStream) throws IOException
|
||||
{
|
||||
ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
|
||||
DATA_FORMAT_VERSION_, UNICODE_VERSION_);
|
||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Protected constructor.</p>
|
||||
* @param inputStream ICU uprop.dat file input stream
|
||||
* @param readICUHeader flag to indicate if the ICU header has to be read
|
||||
* @exception IOException throw if data file fails authentication
|
||||
* @draft 2.1
|
||||
*/
|
||||
protected CollatorReader(InputStream inputStream, boolean readICUHeader)
|
||||
throws IOException
|
||||
{
|
||||
if (readICUHeader) {
|
||||
ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_,
|
||||
DATA_FORMAT_VERSION_, UNICODE_VERSION_);
|
||||
}
|
||||
m_dataInputStream_ = new DataInputStream(inputStream);
|
||||
}
|
||||
|
||||
// protected methods -------------------------------------------------
|
||||
|
||||
/**
|
||||
* Read and break up the header stream of data passed in as arguments into
|
||||
* meaningful Collator data.
|
||||
* @param rbc RuleBasedCollator to populate with header information
|
||||
* @exception IOException thrown when there's a data error.
|
||||
*/
|
||||
protected void readHeader(RuleBasedCollator rbc) throws IOException
|
||||
{
|
||||
int size = m_dataInputStream_.readInt();
|
||||
// all the offsets are in bytes
|
||||
// to get the address add to the header address and cast properly
|
||||
// Default options int options
|
||||
m_dataInputStream_.skipBytes(4);
|
||||
// this one is needed only for UCA, to copy the appropriate
|
||||
// contractions
|
||||
m_dataInputStream_.skipBytes(4);
|
||||
// reserved for future use
|
||||
m_dataInputStream_.readInt();
|
||||
// const uint8_t *mappingPosition;
|
||||
int mapping = m_dataInputStream_.readInt();
|
||||
// uint32_t *expansion;
|
||||
rbc.m_expansionOffset_ = m_dataInputStream_.readInt();
|
||||
// UChar *contractionIndex;
|
||||
rbc.m_contractionOffset_ = m_dataInputStream_.readInt();
|
||||
// uint32_t *contractionCEs;
|
||||
int contractionCE = m_dataInputStream_.readInt();
|
||||
// needed for various closures int contractionSize
|
||||
m_dataInputStream_.skipBytes(4);
|
||||
// array of last collation element in expansion
|
||||
int expansionEndCE = m_dataInputStream_.readInt();
|
||||
// array of maximum expansion size corresponding to the expansion
|
||||
// collation elements with last element in expansionEndCE
|
||||
int expansionEndCEMaxSize = m_dataInputStream_.readInt();
|
||||
// size of endExpansionCE int expansionEndCESize
|
||||
m_dataInputStream_.skipBytes(4);
|
||||
// hash table of unsafe code points
|
||||
int unsafe = m_dataInputStream_.readInt();
|
||||
// hash table of final code points in contractions.
|
||||
int contractionEnd = m_dataInputStream_.readInt();
|
||||
// int CEcount = m_dataInputStream_.readInt();
|
||||
m_dataInputStream_.skipBytes(4);
|
||||
// is jamoSpecial
|
||||
rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean();
|
||||
m_dataInputStream_.skipBytes(3);
|
||||
// byte version[] = new byte[4];
|
||||
m_dataInputStream_.skipBytes(4);
|
||||
// byte charsetName[] = new byte[32]; // for charset CEs
|
||||
m_dataInputStream_.skipBytes(32);
|
||||
m_dataInputStream_.skipBytes(64); // for future use
|
||||
if (rbc.m_contractionOffset_ == 0) { // contraction can be null
|
||||
rbc.m_contractionOffset_ = mapping;
|
||||
contractionCE = mapping;
|
||||
}
|
||||
m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
|
||||
m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
|
||||
m_contractionCESize_ = mapping - contractionCE;
|
||||
m_trieSize_ = expansionEndCE - mapping;
|
||||
m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
|
||||
m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
|
||||
m_unsafeSize_ = contractionEnd - unsafe;
|
||||
m_contractionEndSize_ = size - contractionEnd;
|
||||
rbc.m_contractionOffset_ >>= 1; // casting to ints
|
||||
rbc.m_expansionOffset_ >>= 2; // casting to chars
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and break up the collation options passed in the stream of data
|
||||
* and update the argument Collator with the results
|
||||
* @param rbc RuleBasedCollator to populate
|
||||
* @exception IOException thrown when there's a data error.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public void readOptions(RuleBasedCollator rbc) throws IOException
|
||||
{
|
||||
rbc.m_variableTopValue_ = m_dataInputStream_.readInt();
|
||||
rbc.setAttributeDefault(RuleBasedCollator.Attribute.FRENCH_COLLATION_,
|
||||
m_dataInputStream_.readInt());
|
||||
rbc.setAttributeDefault(
|
||||
RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
|
||||
m_dataInputStream_.readInt());
|
||||
rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_FIRST_,
|
||||
m_dataInputStream_.readInt());
|
||||
rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_LEVEL_,
|
||||
m_dataInputStream_.readInt());
|
||||
rbc.setAttributeDefault(
|
||||
RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
|
||||
m_dataInputStream_.readInt());
|
||||
rbc.setAttributeDefault(RuleBasedCollator.Attribute.STRENGTH_,
|
||||
m_dataInputStream_.readInt());
|
||||
rbc.setAttributeDefault(
|
||||
RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
|
||||
m_dataInputStream_.readInt());
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and break up the stream of data passed in as arguments into
|
||||
* meaningful Collator data.b
|
||||
* @param rbc RuleBasedCollator to populate
|
||||
* @exception IOException thrown when there's a data error.
|
||||
* @draft 2.2
|
||||
*/
|
||||
public void read(RuleBasedCollator rbc) throws IOException
|
||||
{
|
||||
readHeader(rbc);
|
||||
readOptions(rbc);
|
||||
m_expansionSize_ >>= 2;
|
||||
rbc.m_expansion_ = new int[m_expansionSize_];
|
||||
for (int i = 0; i < m_expansionSize_; i ++) {
|
||||
rbc.m_expansion_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
m_contractionIndexSize_ >>= 1;
|
||||
rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
|
||||
for (int i = 0; i < m_contractionIndexSize_; i ++) {
|
||||
rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
|
||||
}
|
||||
m_contractionCESize_ >>= 2;
|
||||
rbc.m_contractionCE_ = new int[m_contractionCESize_];
|
||||
for (int i = 0; i < m_contractionCESize_; i ++) {
|
||||
rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
rbc.m_trie_ = new IntTrie(m_dataInputStream_, rbc);
|
||||
if (!rbc.m_trie_.isLatin1Linear()) {
|
||||
throw new IOException("Data corrupted, "
|
||||
+ "Collator Tries expected to have linear "
|
||||
+ "latin one data arrays");
|
||||
}
|
||||
m_expansionEndCESize_ >>= 2;
|
||||
rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
|
||||
for (int i = 0; i < m_expansionEndCESize_; i ++) {
|
||||
rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
|
||||
}
|
||||
rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
|
||||
for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {
|
||||
rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
|
||||
}
|
||||
rbc.m_unsafe_ = new byte[m_unsafeSize_];
|
||||
for (int i = 0; i < m_unsafeSize_; i ++) {
|
||||
rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
|
||||
}
|
||||
rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];
|
||||
for (int i = 0; i < m_contractionEndSize_; i ++) {
|
||||
rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
|
||||
}
|
||||
}
|
||||
|
||||
// private variables -------------------------------------------------
|
||||
|
||||
/**
|
||||
* Data input stream for uca.dat
|
||||
*/
|
||||
private DataInputStream m_dataInputStream_;
|
||||
|
||||
/**
|
||||
* File format version and id that this class understands.
|
||||
* No guarantees are made if a older version is used
|
||||
*/
|
||||
private static final byte DATA_FORMAT_VERSION_[] =
|
||||
{(byte)0x2, (byte)0x0, (byte)0x0, (byte)0x0};
|
||||
private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,
|
||||
(byte)0x6f, (byte)0x6c};
|
||||
private static final byte UNICODE_VERSION_[] = {(byte)0x3, (byte)0x0,
|
||||
(byte)0x0, (byte)0x0};
|
||||
/**
|
||||
* Corrupted error string
|
||||
*/
|
||||
private static final String CORRUPTED_DATA_ERROR_ =
|
||||
"Data corrupted in Collation data file";
|
||||
|
||||
/**
|
||||
* Size of expansion table in bytes
|
||||
*/
|
||||
private int m_expansionSize_;
|
||||
/**
|
||||
* Size of contraction index table in bytes
|
||||
*/
|
||||
private int m_contractionIndexSize_;
|
||||
/**
|
||||
* Size of contraction table in bytes
|
||||
*/
|
||||
private int m_contractionCESize_;
|
||||
/**
|
||||
* Size of the Trie in bytes
|
||||
*/
|
||||
private int m_trieSize_;
|
||||
/**
|
||||
* Size of the table that contains information about collation elements
|
||||
* that end with an expansion
|
||||
*/
|
||||
private int m_expansionEndCESize_;
|
||||
/**
|
||||
* Size of the table that contains information about the maximum size of
|
||||
* collation elements that end with a particular expansion CE corresponding
|
||||
* to the ones in expansionEndCE
|
||||
*/
|
||||
private int m_expansionEndCEMaxSizeSize_;
|
||||
/**
|
||||
* Size of the table that contains information about the "Unsafe"
|
||||
* codepoints
|
||||
*/
|
||||
private int m_unsafeSize_;
|
||||
/**
|
||||
* Size of the table that contains information about codepoints that ends
|
||||
* with a contraction
|
||||
*/
|
||||
private int m_contractionEndSize_;
|
||||
/**
|
||||
* Size of the table that contains UCA contraction information
|
||||
*/
|
||||
private int m_UCAContractionSize_;
|
||||
|
||||
// private methods ---------------------------------------------------
|
||||
|
||||
}
|
||||
|
2960
icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
Executable file
2960
icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
Executable file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue