mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-17 18:56:53 +00:00
ICU-2732 adding collation apis and code to cater for RawCollationKeys
X-SVN-Rev: 13153
This commit is contained in:
parent
adfca6a4c7
commit
969599a083
6 changed files with 377 additions and 24 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/collator/CollationTest.java,v $
|
||||
* $Date: 2003/07/29 23:08:06 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2003/09/22 06:24:25 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -17,6 +17,7 @@ import com.ibm.icu.dev.test.TestFmwk;
|
|||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.CollationKey;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.text.CollationElementIterator;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
|
@ -438,7 +439,22 @@ public class CollationTest extends ModuleTest
|
|||
if (compareResult != result) {
|
||||
printInfo = true;
|
||||
if(!test.isModularBuild()){
|
||||
test.errln("Comparing sortkeys of \"" + Utility.hex(source)
|
||||
test.errln("Comparing CollationKeys of \"" + Utility.hex(source)
|
||||
+ "\" with \"" + Utility.hex(target)
|
||||
+ "\" expected " + result + " but got "
|
||||
+ compareResult);
|
||||
}
|
||||
}
|
||||
RawCollationKey srsk = new RawCollationKey();
|
||||
myCollation.getRawCollationKey(source, srsk);
|
||||
RawCollationKey trsk = new RawCollationKey();
|
||||
myCollation.getRawCollationKey(target, trsk);
|
||||
compareResult = ssk.compareTo(tsk);
|
||||
if (compareResult != result) {
|
||||
printInfo = true;
|
||||
if(!test.isModularBuild()){
|
||||
test.errln("Comparing RawCollationKeys of \""
|
||||
+ Utility.hex(source)
|
||||
+ "\" with \"" + Utility.hex(target)
|
||||
+ "\" expected " + result + " but got "
|
||||
+ compareResult);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $
|
||||
* $Date: 2003/06/11 19:55:18 $
|
||||
* $Revision: 1.17 $
|
||||
* $Date: 2003/09/22 06:24:19 $
|
||||
* $Revision: 1.18 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -598,6 +598,33 @@ public final class CollationKey implements Comparable
|
|||
// trust that neither sort key contained illegally embedded zero bytes
|
||||
return new CollationKey(null, result);
|
||||
}
|
||||
|
||||
// package private constructor ------------------------------------------
|
||||
|
||||
/**
|
||||
* CollationKey constructor.
|
||||
* @param source string this CollationKey is to represent
|
||||
* @param key RawCollationKey object that represents the collation order of
|
||||
* argument source
|
||||
* @see Collator
|
||||
* @see RawCollationKey
|
||||
*/
|
||||
CollationKey(String source, RawCollationKey key)
|
||||
{
|
||||
m_source_ = source;
|
||||
m_key_ = new byte[key.size];
|
||||
byte src[] = key.bytes;
|
||||
if (key.size < 64) { // arraycopy slower for elements size < 64
|
||||
for (int i = key.size - 1; i >= 0; i --) {
|
||||
m_key_[i] = src[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
System.arraycopy(src, 0, m_key_, 0, key.size);
|
||||
}
|
||||
m_hashCode_ = 0;
|
||||
m_length_ = -1;
|
||||
}
|
||||
|
||||
// private data members -------------------------------------------------
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $
|
||||
* $Date: 2003/06/09 23:31:10 $
|
||||
* $Revision: 1.33 $
|
||||
* $Date: 2003/09/22 06:24:18 $
|
||||
* $Revision: 1.34 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -658,10 +658,29 @@ public abstract class Collator implements Comparator, Cloneable
|
|||
* CollationKey is returned.
|
||||
* @see CollationKey
|
||||
* @see #compare(String, String)
|
||||
* @see #getRawCollationKey
|
||||
* @draft ICU 2.2
|
||||
*/
|
||||
public abstract CollationKey getCollationKey(String source);
|
||||
|
||||
/**
|
||||
* Gets the simpler form of a CollationKey for the String source following
|
||||
* the rules of this Collator and stores the result into the user provided
|
||||
* argument key.
|
||||
* If key has a internal byte array of length that's too small for the
|
||||
* result, the internal byte array will be grown to the exact required
|
||||
* size.
|
||||
* @param source the text String to be transformed into a RawCollationKey
|
||||
* @return If key is null, a new instance of RawCollationKey will be
|
||||
* created and returned, otherwise the user provided key will be
|
||||
* returned.
|
||||
* @see #compare(String, String)
|
||||
* @see #getCollationKey
|
||||
* @see RawCollationKey
|
||||
*/
|
||||
public abstract RawCollationKey getRawCollationKey(String source,
|
||||
RawCollationKey key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Variable top is a two byte primary value which causes all the codepoints
|
||||
|
|
130
icu4j/src/com/ibm/icu/text/RawCollationKey.java
Normal file
130
icu4j/src/com/ibm/icu/text/RawCollationKey.java
Normal file
|
@ -0,0 +1,130 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RawCollationKey.java,v $
|
||||
* $Date: 2003/09/22 06:24:20 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import com.ibm.icu.util.ByteArrayWrapper;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Simple class wrapper to store the internal byte representation of a
|
||||
* CollationKey. Unlike the CollationKey, this class do not contain information
|
||||
* on the source string the sort order represents. RawCollationKey is mutable
|
||||
* and users can reuse its objects with the method in
|
||||
* RuleBasedCollator.getRawCollationKey(..).
|
||||
* </p>
|
||||
* <p>
|
||||
* Please refer to the documentation on CollationKey for a detail description
|
||||
* on the internal byte representation.
|
||||
* </p>
|
||||
* <code>
|
||||
* Example of use:<br>
|
||||
* String str[] = {.....};
|
||||
* RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance();
|
||||
* RawCollationKey key = new RawCollationKey(128);
|
||||
* for (int i = 0; i < str.length; i ++) {
|
||||
* collator.getRawCollationKey(str[i], key);
|
||||
* // do something with key.bytes
|
||||
* }
|
||||
* </code>
|
||||
* @draft ICU 2.8
|
||||
* @see RuleBasedCollator
|
||||
* @see CollationKey
|
||||
*/
|
||||
public final class RawCollationKey extends ByteArrayWrapper
|
||||
{
|
||||
// public constructors --------------------------------------------------
|
||||
|
||||
/**
|
||||
* Default constructor, internal byte array is null.
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public RawCollationKey()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* RawCollationKey created with an empty internal byte array of length
|
||||
* capacity
|
||||
* @param capacity length of internal byte array
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public RawCollationKey(int capacity)
|
||||
{
|
||||
bytes = new byte[capacity];
|
||||
}
|
||||
|
||||
/**
|
||||
* RawCollationKey created taking bytes as the internal byte array
|
||||
* @param bytes
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public RawCollationKey(byte[] bytes)
|
||||
{
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
// public method --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Compares this RawCollationKey object to the target RawCollationKey
|
||||
* object. The collation rules of the Collator that created this key are
|
||||
* applied.
|
||||
* </p>
|
||||
* <p><strong>Note:</strong> Comparison between RawCollationKeys created by
|
||||
* different Collators might return incorrect results.
|
||||
* See class documentation.</p>
|
||||
* @param target RawCollationKey to be compared with
|
||||
* @return 0 if the sort order is the same,
|
||||
* < 0 if this RawCollationKey has a smaller sort order than
|
||||
* target,
|
||||
* > 0 if this RawCollationKey has a bigger sort order than
|
||||
* target.
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public int compareTo(RawCollationKey target)
|
||||
{
|
||||
int i = 0;
|
||||
while (bytes[i] != 0 && target.bytes[i] != 0) {
|
||||
byte key = bytes[i];
|
||||
byte targetkey = target.bytes[i];
|
||||
if (key == targetkey) {
|
||||
i ++;
|
||||
continue;
|
||||
}
|
||||
if (key >= 0) {
|
||||
if (targetkey < 0 || key < targetkey) {
|
||||
return -1;
|
||||
}
|
||||
// target key has to be positive and less than key
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
// key is negative
|
||||
if (targetkey >= 0 || key > targetkey) {
|
||||
return 1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// last comparison if we encounter a 0
|
||||
if (bytes[i] == target.bytes[i]) {
|
||||
return 0;
|
||||
}
|
||||
if (bytes[i] == 0) {
|
||||
return -1;
|
||||
}
|
||||
// target is 0
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
|
||||
* $Date: 2003/09/19 00:14:37 $
|
||||
* $Revision: 1.46 $
|
||||
* $Date: 2003/09/22 06:24:20 $
|
||||
* $Revision: 1.47 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -729,9 +729,37 @@ public final class RuleBasedCollator extends Collator
|
|||
* null, a null CollationKey is returned.
|
||||
* @see CollationKey
|
||||
* @see #compare(String, String)
|
||||
* @see #getRawCollationKey
|
||||
* @draft ICU 2.2
|
||||
*/
|
||||
public CollationKey getCollationKey(String source)
|
||||
public CollationKey getCollationKey(String source) {
|
||||
if (source == null) {
|
||||
return null;
|
||||
}
|
||||
m_utilRawCollationKey_ = getRawCollationKey(source,
|
||||
m_utilRawCollationKey_);
|
||||
return new CollationKey(source, m_utilRawCollationKey_);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the simpler form of a CollationKey for the String source following
|
||||
* the rules of this Collator and stores the result into the user provided
|
||||
* argument key.
|
||||
* If key has a internal byte array of length that's too small for the
|
||||
* result, the internal byte array will be grown to the exact required
|
||||
* size.
|
||||
* @param source the text String to be transformed into a RawCollationKey
|
||||
* @param key output RawCollationKey to store results
|
||||
* @return If key is null, a new instance of RawCollationKey will be
|
||||
* created and returned, otherwise the user provided key will be
|
||||
* returned.
|
||||
* @see #getCollationKey
|
||||
* @see #compare(String, String)
|
||||
* @see RawCollationKey
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public RawCollationKey getRawCollationKey(String source,
|
||||
RawCollationKey key)
|
||||
{
|
||||
if (source == null) {
|
||||
return null;
|
||||
|
@ -786,9 +814,11 @@ public final class RuleBasedCollator extends Collator
|
|||
}
|
||||
getSortKeyBytes(source, doFrench, hiragana4, commonBottom4,
|
||||
bottomCount4);
|
||||
byte sortkey[] = getSortKey(source, doFrench, commonBottom4,
|
||||
bottomCount4);
|
||||
return new CollationKey(source, sortkey);
|
||||
if (key == null) {
|
||||
key = new RawCollationKey();
|
||||
}
|
||||
getSortKey(source, doFrench, commonBottom4, bottomCount4, key);
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1035,6 +1065,13 @@ public final class RuleBasedCollator extends Collator
|
|||
* If comparison are to be done to the same String multiple times, it would
|
||||
* be more efficient to generate CollationKeys for the Strings and use
|
||||
* CollationKey.compareTo(CollationKey) for the comparisons.
|
||||
* If speed performance is critical and object instantiation is to be
|
||||
* reduced, further optimization may be achieved by generating a simpler
|
||||
* key of the form RawCollationKey and reusing this RawCollationKey
|
||||
* object with the method RuleBasedCollator.getRawCollationKey. Internal
|
||||
* byte representation can be directly accessed via RawCollationKey and
|
||||
* stored for future use. Like CollationKey, RawCollationKey provides a
|
||||
* method RawCollationKey.compareTo for key comparisons.
|
||||
* If the each Strings are compared to only once, using the method
|
||||
* RuleBasedCollator.compare(String, String) will have a better performance.
|
||||
* </p>
|
||||
|
@ -1951,6 +1988,7 @@ public final class RuleBasedCollator extends Collator
|
|||
private byte m_utilBytes3_[];
|
||||
private byte m_utilBytes4_[];
|
||||
private byte m_utilBytes5_[];
|
||||
private RawCollationKey m_utilRawCollationKey_;
|
||||
|
||||
private int m_utilBytesCount0_;
|
||||
private int m_utilBytesCount1_;
|
||||
|
@ -2567,10 +2605,12 @@ public final class RuleBasedCollator extends Collator
|
|||
* be done
|
||||
* @param commonBottom4 smallest common quaternary byte
|
||||
* @param bottomCount4 smallest quaternary byte
|
||||
* @return the compact sortkey
|
||||
* @param key output RawCollationKey to store results, key cannot be null
|
||||
*/
|
||||
private final byte[] getSortKey(String source, boolean doFrench,
|
||||
int commonBottom4, int bottomCount4)
|
||||
private final void getSortKey(String source, boolean doFrench,
|
||||
int commonBottom4,
|
||||
int bottomCount4,
|
||||
RawCollationKey key)
|
||||
{
|
||||
// we have done all the CE's, now let's put them together to form
|
||||
// a key
|
||||
|
@ -2592,8 +2632,8 @@ public final class RuleBasedCollator extends Collator
|
|||
}
|
||||
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte)0);
|
||||
m_utilBytesCount1_ ++;
|
||||
byte result[] = (byte [])m_utilBytes1_.clone();
|
||||
return result;
|
||||
|
||||
key.set(m_utilBytes1_, 0, m_utilBytesCount1_);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2917,10 +2957,13 @@ public final class RuleBasedCollator extends Collator
|
|||
private static final byte[] append(byte array[], int appendindex,
|
||||
byte value)
|
||||
{
|
||||
if (appendindex + 1 >= array.length) {
|
||||
array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
|
||||
try {
|
||||
array[appendindex] = value;
|
||||
}
|
||||
catch (ArrayIndexOutOfBoundsException e) {
|
||||
array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
|
||||
array[appendindex] = value;
|
||||
}
|
||||
array[appendindex] = value;
|
||||
return array;
|
||||
}
|
||||
|
||||
|
@ -2934,9 +2977,11 @@ public final class RuleBasedCollator extends Collator
|
|||
private final int compareBySortKeys(String source, String target)
|
||||
|
||||
{
|
||||
CollationKey sourcekey = getCollationKey(source);
|
||||
CollationKey targetkey = getCollationKey(target);
|
||||
return sourcekey.compareTo(targetkey);
|
||||
m_utilRawCollationKey_ = getRawCollationKey(source,
|
||||
m_utilRawCollationKey_);
|
||||
// this method is very seldom called
|
||||
RawCollationKey targetkey = getRawCollationKey(target, null);
|
||||
return m_utilRawCollationKey_.compareTo(targetkey);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
116
icu4j/src/com/ibm/icu/util/ByteArrayWrapper.java
Normal file
116
icu4j/src/com/ibm/icu/util/ByteArrayWrapper.java
Normal file
|
@ -0,0 +1,116 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2003, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/util/ByteArrayWrapper.java,v $
|
||||
* $Date: 2003/09/22 06:24:18 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.util;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A simple utility class to wrap a byte array.
|
||||
* </p>
|
||||
* <p>
|
||||
* Generally passed as an argument object into a method. The method takes
|
||||
* responsibility of writing into the internal byte array and increasing its
|
||||
* size when necessary.
|
||||
* </p>
|
||||
* @author syn wee
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public class ByteArrayWrapper
|
||||
{
|
||||
// public data member ------------------------------------------------
|
||||
|
||||
/**
|
||||
* Internal byte array.
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public byte[] bytes;
|
||||
/**
|
||||
* Size of the internal byte array used.
|
||||
* Different from bytes.length, size will be <= bytes.length.
|
||||
* Semantics of size is similar to java.util.Vector.size().
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public int size;
|
||||
|
||||
// public methods ----------------------------------------------------
|
||||
|
||||
/**
|
||||
* Ensure that the internal byte array is at least of length capacity.
|
||||
* If the byte array is null or its length is less than capacity, a new
|
||||
* byte array of length capacity will be allocated.
|
||||
* The contents of the array (between 0 and size) remain unchanged.
|
||||
* @param capacity minimum length of internal byte array.
|
||||
* @draft ICU 2.8
|
||||
*/
|
||||
public void ensureCapacity(int capacity)
|
||||
{
|
||||
if (bytes == null || bytes.length < capacity) {
|
||||
byte[] newbytes = new byte[capacity];
|
||||
copyBytes(bytes, 0, newbytes, 0, size);
|
||||
bytes = newbytes;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the internal byte array from offset 0 to (limit - start) with the
|
||||
* contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
|
||||
* byte array of length (limit - start) will be allocated.
|
||||
* This resets the size of the internal byte array to (limit - start).
|
||||
* @param src source byte array to copy from
|
||||
* @param start start offset of src to copy from
|
||||
* @param limit end + 1 offset of src to copy from
|
||||
*/
|
||||
public final void set(byte[] src, int start, int limit)
|
||||
{
|
||||
size = 0;
|
||||
append(src, start, limit);
|
||||
}
|
||||
|
||||
// private methods ---------------------------------------------------
|
||||
|
||||
/**
|
||||
* Appends the internal byte array from offset size with the
|
||||
* contents of src from offset start to limit. This increases the size of
|
||||
* the internal byte array to (size + limit - start).
|
||||
* @param src source byte array to copy from
|
||||
* @param start start offset of src to copy from
|
||||
* @param limit end + 1 offset of src to copy from
|
||||
*/
|
||||
private final void append(byte[] src, int start, int limit)
|
||||
{
|
||||
int len = limit - start;
|
||||
ensureCapacity(len);
|
||||
copyBytes(src, start, bytes, size, len);
|
||||
size += len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the contents of src byte array from offset srcoff to the
|
||||
* target of tgt byte array at the offset tgtoff.
|
||||
* @param src source byte array to copy from
|
||||
* @param srcoff start offset of src to copy from
|
||||
* @param tgt target byte array to copy to
|
||||
* @param tgtoff start offset of tgt to copy to
|
||||
* @param length size of contents to copy
|
||||
*/
|
||||
private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
|
||||
int tgtoff, int length) {
|
||||
if (length < 64) {
|
||||
for (int i = srcoff, n = tgtoff; -- length >= 0; ++ i, ++ n) {
|
||||
tgt[n] = src[i];
|
||||
}
|
||||
} else {
|
||||
System.arraycopy(src, srcoff, tgt, tgtoff, length);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue