ICU-2732 adding collation apis and code to cater for RawCollationKeys

X-SVN-Rev: 13153
This commit is contained in:
Syn Wee Quek 2003-09-22 06:24:25 +00:00
parent adfca6a4c7
commit 969599a083
6 changed files with 377 additions and 24 deletions

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/collator/CollationTest.java,v $
* $Date: 2003/07/29 23:08:06 $
* $Revision: 1.13 $
* $Date: 2003/09/22 06:24:25 $
* $Revision: 1.14 $
*
*******************************************************************************
*/
@ -17,6 +17,7 @@ import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.CollationKey;
import com.ibm.icu.text.RawCollationKey;
import com.ibm.icu.text.CollationElementIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.lang.UCharacter;
@ -438,7 +439,22 @@ public class CollationTest extends ModuleTest
if (compareResult != result) {
printInfo = true;
if(!test.isModularBuild()){
test.errln("Comparing sortkeys of \"" + Utility.hex(source)
test.errln("Comparing CollationKeys of \"" + Utility.hex(source)
+ "\" with \"" + Utility.hex(target)
+ "\" expected " + result + " but got "
+ compareResult);
}
}
RawCollationKey srsk = new RawCollationKey();
myCollation.getRawCollationKey(source, srsk);
RawCollationKey trsk = new RawCollationKey();
myCollation.getRawCollationKey(target, trsk);
compareResult = ssk.compareTo(tsk);
if (compareResult != result) {
printInfo = true;
if(!test.isModularBuild()){
test.errln("Comparing RawCollationKeys of \""
+ Utility.hex(source)
+ "\" with \"" + Utility.hex(target)
+ "\" expected " + result + " but got "
+ compareResult);

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $
* $Date: 2003/06/11 19:55:18 $
* $Revision: 1.17 $
* $Date: 2003/09/22 06:24:19 $
* $Revision: 1.18 $
*
*******************************************************************************
*/
@ -598,6 +598,33 @@ public final class CollationKey implements Comparable
// trust that neither sort key contained illegally embedded zero bytes
return new CollationKey(null, result);
}
// package private constructor ------------------------------------------
/**
* CollationKey constructor.
* @param source string this CollationKey is to represent
* @param key RawCollationKey object that represents the collation order of
* argument source
* @see Collator
* @see RawCollationKey
*/
CollationKey(String source, RawCollationKey key)
{
m_source_ = source;
m_key_ = new byte[key.size];
byte src[] = key.bytes;
if (key.size < 64) { // arraycopy slower for elements size < 64
for (int i = key.size - 1; i >= 0; i --) {
m_key_[i] = src[i];
}
}
else {
System.arraycopy(src, 0, m_key_, 0, key.size);
}
m_hashCode_ = 0;
m_length_ = -1;
}
// private data members -------------------------------------------------

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $
* $Date: 2003/06/09 23:31:10 $
* $Revision: 1.33 $
* $Date: 2003/09/22 06:24:18 $
* $Revision: 1.34 $
*
*******************************************************************************
*/
@ -658,10 +658,29 @@ public abstract class Collator implements Comparator, Cloneable
* CollationKey is returned.
* @see CollationKey
* @see #compare(String, String)
* @see #getRawCollationKey
* @draft ICU 2.2
*/
public abstract CollationKey getCollationKey(String source);
/**
* Gets the simpler form of a CollationKey for the String source following
* the rules of this Collator and stores the result into the user provided
* argument key.
* If key has a internal byte array of length that's too small for the
* result, the internal byte array will be grown to the exact required
* size.
* @param source the text String to be transformed into a RawCollationKey
* @return If key is null, a new instance of RawCollationKey will be
* created and returned, otherwise the user provided key will be
* returned.
* @see #compare(String, String)
* @see #getCollationKey
* @see RawCollationKey
*/
public abstract RawCollationKey getRawCollationKey(String source,
RawCollationKey key);
/**
* <p>
* Variable top is a two byte primary value which causes all the codepoints

View file

@ -0,0 +1,130 @@
/**
*******************************************************************************
* Copyright (C) 1996-2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RawCollationKey.java,v $
* $Date: 2003/09/22 06:24:20 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.text;
import com.ibm.icu.util.ByteArrayWrapper;
/**
* <p>
* Simple class wrapper to store the internal byte representation of a
* CollationKey. Unlike the CollationKey, this class do not contain information
* on the source string the sort order represents. RawCollationKey is mutable
* and users can reuse its objects with the method in
* RuleBasedCollator.getRawCollationKey(..).
* </p>
* <p>
* Please refer to the documentation on CollationKey for a detail description
* on the internal byte representation.
* </p>
* <code>
* Example of use:<br>
* String str[] = {.....};
* RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance();
* RawCollationKey key = new RawCollationKey(128);
* for (int i = 0; i &lt; str.length; i ++) {
* collator.getRawCollationKey(str[i], key);
* // do something with key.bytes
* }
* </code>
* @draft ICU 2.8
* @see RuleBasedCollator
* @see CollationKey
*/
public final class RawCollationKey extends ByteArrayWrapper
{
// public constructors --------------------------------------------------
/**
* Default constructor, internal byte array is null.
* @draft ICU 2.8
*/
public RawCollationKey()
{
}
/**
* RawCollationKey created with an empty internal byte array of length
* capacity
* @param capacity length of internal byte array
* @draft ICU 2.8
*/
public RawCollationKey(int capacity)
{
bytes = new byte[capacity];
}
/**
* RawCollationKey created taking bytes as the internal byte array
* @param bytes
* @draft ICU 2.8
*/
public RawCollationKey(byte[] bytes)
{
this.bytes = bytes;
}
// public method --------------------------------------------------------
/**
* <p>
* Compares this RawCollationKey object to the target RawCollationKey
* object. The collation rules of the Collator that created this key are
* applied.
* </p>
* <p><strong>Note:</strong> Comparison between RawCollationKeys created by
* different Collators might return incorrect results.
* See class documentation.</p>
* @param target RawCollationKey to be compared with
* @return 0 if the sort order is the same,
* &lt; 0 if this RawCollationKey has a smaller sort order than
* target,
* &gt; 0 if this RawCollationKey has a bigger sort order than
* target.
* @draft ICU 2.8
*/
public int compareTo(RawCollationKey target)
{
int i = 0;
while (bytes[i] != 0 && target.bytes[i] != 0) {
byte key = bytes[i];
byte targetkey = target.bytes[i];
if (key == targetkey) {
i ++;
continue;
}
if (key >= 0) {
if (targetkey < 0 || key < targetkey) {
return -1;
}
// target key has to be positive and less than key
return 1;
}
else {
// key is negative
if (targetkey >= 0 || key > targetkey) {
return 1;
}
return -1;
}
}
// last comparison if we encounter a 0
if (bytes[i] == target.bytes[i]) {
return 0;
}
if (bytes[i] == 0) {
return -1;
}
// target is 0
return 1;
}
}

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $
* $Date: 2003/09/19 00:14:37 $
* $Revision: 1.46 $
* $Date: 2003/09/22 06:24:20 $
* $Revision: 1.47 $
*
*******************************************************************************
*/
@ -729,9 +729,37 @@ public final class RuleBasedCollator extends Collator
* null, a null CollationKey is returned.
* @see CollationKey
* @see #compare(String, String)
* @see #getRawCollationKey
* @draft ICU 2.2
*/
public CollationKey getCollationKey(String source)
public CollationKey getCollationKey(String source) {
if (source == null) {
return null;
}
m_utilRawCollationKey_ = getRawCollationKey(source,
m_utilRawCollationKey_);
return new CollationKey(source, m_utilRawCollationKey_);
}
/**
* Gets the simpler form of a CollationKey for the String source following
* the rules of this Collator and stores the result into the user provided
* argument key.
* If key has a internal byte array of length that's too small for the
* result, the internal byte array will be grown to the exact required
* size.
* @param source the text String to be transformed into a RawCollationKey
* @param key output RawCollationKey to store results
* @return If key is null, a new instance of RawCollationKey will be
* created and returned, otherwise the user provided key will be
* returned.
* @see #getCollationKey
* @see #compare(String, String)
* @see RawCollationKey
* @draft ICU 2.8
*/
public RawCollationKey getRawCollationKey(String source,
RawCollationKey key)
{
if (source == null) {
return null;
@ -786,9 +814,11 @@ public final class RuleBasedCollator extends Collator
}
getSortKeyBytes(source, doFrench, hiragana4, commonBottom4,
bottomCount4);
byte sortkey[] = getSortKey(source, doFrench, commonBottom4,
bottomCount4);
return new CollationKey(source, sortkey);
if (key == null) {
key = new RawCollationKey();
}
getSortKey(source, doFrench, commonBottom4, bottomCount4, key);
return key;
}
/**
@ -1035,6 +1065,13 @@ public final class RuleBasedCollator extends Collator
* If comparison are to be done to the same String multiple times, it would
* be more efficient to generate CollationKeys for the Strings and use
* CollationKey.compareTo(CollationKey) for the comparisons.
* If speed performance is critical and object instantiation is to be
* reduced, further optimization may be achieved by generating a simpler
* key of the form RawCollationKey and reusing this RawCollationKey
* object with the method RuleBasedCollator.getRawCollationKey. Internal
* byte representation can be directly accessed via RawCollationKey and
* stored for future use. Like CollationKey, RawCollationKey provides a
* method RawCollationKey.compareTo for key comparisons.
* If the each Strings are compared to only once, using the method
* RuleBasedCollator.compare(String, String) will have a better performance.
* </p>
@ -1951,6 +1988,7 @@ public final class RuleBasedCollator extends Collator
private byte m_utilBytes3_[];
private byte m_utilBytes4_[];
private byte m_utilBytes5_[];
private RawCollationKey m_utilRawCollationKey_;
private int m_utilBytesCount0_;
private int m_utilBytesCount1_;
@ -2567,10 +2605,12 @@ public final class RuleBasedCollator extends Collator
* be done
* @param commonBottom4 smallest common quaternary byte
* @param bottomCount4 smallest quaternary byte
* @return the compact sortkey
* @param key output RawCollationKey to store results, key cannot be null
*/
private final byte[] getSortKey(String source, boolean doFrench,
int commonBottom4, int bottomCount4)
private final void getSortKey(String source, boolean doFrench,
int commonBottom4,
int bottomCount4,
RawCollationKey key)
{
// we have done all the CE's, now let's put them together to form
// a key
@ -2592,8 +2632,8 @@ public final class RuleBasedCollator extends Collator
}
m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte)0);
m_utilBytesCount1_ ++;
byte result[] = (byte [])m_utilBytes1_.clone();
return result;
key.set(m_utilBytes1_, 0, m_utilBytesCount1_);
}
/**
@ -2917,10 +2957,13 @@ public final class RuleBasedCollator extends Collator
private static final byte[] append(byte array[], int appendindex,
byte value)
{
if (appendindex + 1 >= array.length) {
array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
try {
array[appendindex] = value;
}
catch (ArrayIndexOutOfBoundsException e) {
array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_);
array[appendindex] = value;
}
array[appendindex] = value;
return array;
}
@ -2934,9 +2977,11 @@ public final class RuleBasedCollator extends Collator
private final int compareBySortKeys(String source, String target)
{
CollationKey sourcekey = getCollationKey(source);
CollationKey targetkey = getCollationKey(target);
return sourcekey.compareTo(targetkey);
m_utilRawCollationKey_ = getRawCollationKey(source,
m_utilRawCollationKey_);
// this method is very seldom called
RawCollationKey targetkey = getRawCollationKey(target, null);
return m_utilRawCollationKey_.compareTo(targetkey);
}
/**

View file

@ -0,0 +1,116 @@
/**
*******************************************************************************
* Copyright (C) 1996-2003, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/util/ByteArrayWrapper.java,v $
* $Date: 2003/09/22 06:24:18 $
* $Revision: 1.1 $
*
*******************************************************************************
*/
package com.ibm.icu.util;
/**
* <p>
* A simple utility class to wrap a byte array.
* </p>
* <p>
* Generally passed as an argument object into a method. The method takes
* responsibility of writing into the internal byte array and increasing its
* size when necessary.
* </p>
* @author syn wee
* @draft ICU 2.8
*/
public class ByteArrayWrapper
{
// public data member ------------------------------------------------
/**
* Internal byte array.
* @draft ICU 2.8
*/
public byte[] bytes;
/**
* Size of the internal byte array used.
* Different from bytes.length, size will be &lt;= bytes.length.
* Semantics of size is similar to java.util.Vector.size().
* @draft ICU 2.8
*/
public int size;
// public methods ----------------------------------------------------
/**
* Ensure that the internal byte array is at least of length capacity.
* If the byte array is null or its length is less than capacity, a new
* byte array of length capacity will be allocated.
* The contents of the array (between 0 and size) remain unchanged.
* @param capacity minimum length of internal byte array.
* @draft ICU 2.8
*/
public void ensureCapacity(int capacity)
{
if (bytes == null || bytes.length < capacity) {
byte[] newbytes = new byte[capacity];
copyBytes(bytes, 0, newbytes, 0, size);
bytes = newbytes;
}
}
/**
* Set the internal byte array from offset 0 to (limit - start) with the
* contents of src from offset start to limit. If the byte array is null or its length is less than capacity, a new
* byte array of length (limit - start) will be allocated.
* This resets the size of the internal byte array to (limit - start).
* @param src source byte array to copy from
* @param start start offset of src to copy from
* @param limit end + 1 offset of src to copy from
*/
public final void set(byte[] src, int start, int limit)
{
size = 0;
append(src, start, limit);
}
// private methods ---------------------------------------------------
/**
* Appends the internal byte array from offset size with the
* contents of src from offset start to limit. This increases the size of
* the internal byte array to (size + limit - start).
* @param src source byte array to copy from
* @param start start offset of src to copy from
* @param limit end + 1 offset of src to copy from
*/
private final void append(byte[] src, int start, int limit)
{
int len = limit - start;
ensureCapacity(len);
copyBytes(src, start, bytes, size, len);
size += len;
}
/**
* Copies the contents of src byte array from offset srcoff to the
* target of tgt byte array at the offset tgtoff.
* @param src source byte array to copy from
* @param srcoff start offset of src to copy from
* @param tgt target byte array to copy to
* @param tgtoff start offset of tgt to copy to
* @param length size of contents to copy
*/
private static final void copyBytes(byte[] src, int srcoff, byte[] tgt,
int tgtoff, int length) {
if (length < 64) {
for (int i = srcoff, n = tgtoff; -- length >= 0; ++ i, ++ n) {
tgt[n] = src[i];
}
} else {
System.arraycopy(src, srcoff, tgt, tgtoff, length);
}
}
}