ICU-1897

initial collation commits X-SVN-Rev: 8615
2025-04-13 00:43:32 +00:00 · 2002-05-14 16:48:49 +00:00 · 2002-05-14 16:48:49 +00:00 · 44672d459f
commit 44672d459f
parent fa460c1481
7 changed files with 6543 additions and 44 deletions
--- a/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/UCharacterIterator.java,v $ 
- * $Date: 2002/04/03 00:00:00 $ 
- * $Revision: 1.4 $
+ * $Date: 2002/05/14 16:48:49 $ 
+ * $Revision: 1.5 $
 *
 *******************************************************************************
 */
@ -41,27 +41,62 @@ public final class UCharacterIterator implements CharacterIterator
 	// public constructor ------------------------------------------------------
 	
 	/**
-	 * Public constructor
+	 * Public constructor.
+	 * By default the iteration range will be from 0 to the end of the text.
 	 * @param replacable text which the iterator will be based on
 	 */
 	public UCharacterIterator(Replaceable replaceable)
 	{
 		m_replaceable_  = replaceable;
 		m_index_        = 0;
-		m_length_       = replaceable.length();
+		m_start_        = 0;
+		m_limit_        = replaceable.length();
 	}
 	
 	/**
 	 * Public constructor
+	 * By default the iteration range will be from 0 to the end of the text.
 	 * @param str text which the iterator will be based on
 	 */
 	public UCharacterIterator(String str)
 	{
 		m_replaceable_  = new ReplaceableString(str);
 		m_index_        = 0;
-		m_length_       = m_replaceable_.length();
+		m_start_        = 0;
+		m_limit_        = m_replaceable_.length();
 	}
 	
+	/**
+     * Constructs an iterator over the given range of the given string.
+     * @param  text  text to be iterated over
+     * @param  start offset of the first character to iterate
+     * @param  limit offset of the character following the last character to
+     * 					iterate
+     */
+    public UCharacterIterator(String str, int start, int limit) 
+    {
+    	m_replaceable_  = new ReplaceableString(str);
+		m_start_        = start;
+		m_limit_        = limit;
+		m_index_        = m_start_;
+    }   
+    
+    /**
+     * Constructs an iterator over the given range of the given replaceable 
+     * string.
+     * @param  text  text to be iterated over
+     * @param  start offset of the first character to iterate
+     * @param  limit offset of the character following the last character to
+     * 					iterate
+     */
+    public UCharacterIterator(Replaceable replaceable, int start, int limit) 
+    {
+    	m_replaceable_  = replaceable;
+		m_start_        = start;
+		m_limit_        = limit;
+		m_index_        = m_start_;
+    }   
+	
 	// public methods ----------------------------------------------------------
 	
 	/**
@ -87,7 +122,7 @@ public final class UCharacterIterator implements CharacterIterator
     */
    public char current()
    {
-        if (m_index_ >= 0 && m_index_ < m_length_) {
+        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
            return m_replaceable_.charAt(m_index_);
        }
        return DONE;
@ -99,7 +134,7 @@ public final class UCharacterIterator implements CharacterIterator
     */
    public int currentCodePoint()
    {
-        if (m_index_ >= 0 && m_index_ < m_length_) {
+        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
            return m_replaceable_.char32At(m_index_);
        }
        return DONE_CODEPOINT;
@ -111,26 +146,28 @@ public final class UCharacterIterator implements CharacterIterator
     */
    public char first()
    {
-        m_index_ = 0;
+        m_index_ = m_start_;
        return current();
    }
    
    /**
-     * Returns the start of the text.
-     * @return 0
+     * Returns the start of the text to iterate.
+     * @return by default this method will return 0, unless a range for 
+     * iteration had been specified during construction.
     */
    public int getBeginIndex()
    {
-        return 0;
+        return m_start_;
    }

    /**
-     * Returns the length of the text
-     * @return length of the text
+     * Returns the limit offset of the text to iterate
+     * @return by default this method returns the length of the text, unless a 
+     * range for iteration had been specified during construction.
     */
    public int getEndIndex()
    {
-        return m_length_;
+        return m_limit_;
    }
    
    /**
@ -143,31 +180,31 @@ public final class UCharacterIterator implements CharacterIterator
    }
    
    /**
-     * Gets the last UTF16 character from the text and shifts the index to the
-     * end of the text accordingly.
-     * @return the last UTF16 character
+     * Gets the last UTF16 iterateable character from the text and shifts the 
+     * index to the end of the text accordingly.
+     * @return the last UTF16 iterateable character
     */
    public char last()
    {
-        if (m_length_ != 0) {
-            m_index_ = m_length_ - 1;
+        if (m_limit_ != m_start_) {
+            m_index_ = m_limit_ - 1;
            return m_replaceable_.charAt(m_index_);
        } 
-		m_index_ = m_length_;
+		m_index_ = m_limit_;
        return DONE;
    }
    
 	/**
     * Returns next UTF16 character and increments the iterator's index by 1. 
-	 * If the resulting index is greater or equal to the text length, the 
-	 * index is reset to the text length and a value of DONE_CODEPOINT is 
+	 * If the resulting index is greater or equal to the iteration limit, the 
+	 * index is reset to the text iteration limit and a value of DONE_CODEPOINT is 
 	 * returned. 
 	 * @return next UTF16 character in text or DONE if the new index is off the 
-	 *         end of the text range.
+	 *         end of the text iteration limit.
     */
    public char next()
    {
-        if (m_index_ < m_length_) {
+        if (m_index_ < m_limit_) {
        	char result = m_replaceable_.charAt(m_index_);
            m_index_ ++;
            return result;
@ -182,20 +219,20 @@ public final class UCharacterIterator implements CharacterIterator
     * with surrogate pairs intermixed. If the index of a leading or trailing 
     * code unit of a surrogate pair is given, return the code point after the 
     * surrogate pair.
-	 * If the resulting index is greater or equal to the text length, the 
-	 * current index is reset to the text length and a value of DONE_CODEPOINT 
-	 * is returned. 
+	 * If the resulting index is greater or equal to the text iterateable limit,
+	 * the current index is reset to the text iterateable limit and a value of 
+	 * DONE_CODEPOINT is returned. 
 	 * @return next codepoint in text or DONE_CODEPOINT if the new index is off the 
-	 *         end of the text range.
+	 *         end of the text iterateable limit.
 	 */	
 	public int nextCodePoint()
 	{
-		if (m_index_ < m_length_) {
+		if (m_index_ < m_limit_) {
 			char ch = m_replaceable_.charAt(m_index_);
 			m_index_ ++;
 			if (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
 			    ch <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
-			    m_index_ < m_length_) {
+			    m_index_ < m_limit_) {
 			    char trail = m_replaceable_.charAt(m_index_);
 			    if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
 			    	trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
@ -212,14 +249,15 @@ public final class UCharacterIterator implements CharacterIterator
    /**
     * Returns previous UTF16 character and decrements the iterator's index by 
     * 1. 
-	 * If the resulting index is less than 0, the index is reset to 0 and a 
-	 * value of DONE_CODEPOINT is returned. 
+	 * If the resulting index is less than the text iterateable limit, the 
+	 * index is reset to the start of the text iteration and a value of 
+	 * DONE_CODEPOINT is returned. 
 	 * @return next UTF16 character in text or DONE if the new index is off the 
-	 *         start of the text range.
+	 *         start of the text iteration range.
     */
    public char previous()
    {
-        if (m_index_ > 0) {
+        if (m_index_ > m_start_) {
            m_index_ --;
            return m_replaceable_.charAt(m_index_);
        }
@ -233,19 +271,20 @@ public final class UCharacterIterator implements CharacterIterator
     * with surrogate pairs intermixed. If the index of a leading or trailing 
     * code unit of a surrogate pair is given, return the code point before the 
     * surrogate pair.
-	 * If the resulting index is less than 0, the current index is reset to 0
-	 * and a value of DONE_CODEPOINT is returned. 
+	 * If the resulting index is less than the text iterateable range, the 
+	 * current index is reset to the start of the range and a value of 
+	 * DONE_CODEPOINT is returned. 
 	 * @return previous codepoint in text or DONE_CODEPOINT if the new index is 
-	 *         off the start of the text range.
+	 *         off the start of the text iteration range.
     */
    public int previousCodePoint()
    {
-        if (m_index_ > 0) {
+        if (m_index_ > m_start_) {
            m_index_ --;
            char ch = m_replaceable_.charAt(m_index_);
 			if (ch >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
 			    ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE &&
-			    m_index_ > 0) {
+			    m_index_ > m_start_) {
 			    char lead = m_replaceable_.charAt(m_index_);
 			    if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
 			    	lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
@ -267,12 +306,11 @@ public final class UCharacterIterator implements CharacterIterator
 	 * @exception IllegalArgumentException is thrown if an invalid index is 
 	 *            supplied. i.e. index is out of bounds.
 	 * @return the character at the specified index or DONE if the specified 
-	 *         index is equal to the end of the text.
+	 *         index is equal to the limit of the text iteration range.
 	 */
 	public char setIndex(int index)
 	{
-		int length = m_replaceable_.length();
-		if (index < 0 || index > length) {
+		if (index < m_start_ || index > m_limit_) {
 			throw new IllegalArgumentException("Index index out of bounds");
 		}
 		m_index_ = index;
@ -290,7 +328,12 @@ public final class UCharacterIterator implements CharacterIterator
 	 */
 	private int m_index_;
 	/**
-	 * Replaceable text length
+	 * Start offset of iterateable range, by default this is 0
 	 */
-	private int m_length_;
+	private int m_start_;
+	/**
+	 * Limit offset of iterateable range, by default this is the length of the
+	 * string
+	 */
+	private int m_limit_;
 }
--- a/icu4j/src/com/ibm/icu/text/BOSCU.java
+++ b/icu4j/src/com/ibm/icu/text/BOSCU.java
@ -0,0 +1,382 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/BOSCU.java,v $ 
+* $Date: 2002/05/14 16:48:48 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import com.ibm.icu.impl.UCharacterIterator;
+
+/**
+ * <p>Binary Ordered Compression Scheme for Unicode</p>
+ * 
+ * <p>Specific application:<br>
+ * Encode a Unicode string for the identical level of a sort key.<br>
+ * Restrictions:
+ * <ul>
+ * <li> byte stream (unsigned 8-bit bytes)
+ * <li> lexical order of the identical-level run must be the same as code 
+ * 		point order for the string
+ * <li> avoid byte values 0, 1, 2
+ * </ul>
+ * </p>
+ * 
+ * <p>Method: Slope Detection<br>
+ * Remember the previous code point (initial 0).
+ * For each cp in the string, encode the difference to the previous one.
+ * </p>
+ * <p>With a compact encoding of differences, this yields good results for
+ * small scripts and UTF-like results otherwise.
+ * </p>
+ * <p>Encoding of differences:<br>
+ * <ul> 
+ * <li>Similar to a UTF, encoding the length of the byte sequence in the lead 
+ * 		bytes.
+ * <li> Does not need to be friendly for decoding or random access
+ *     (trail byte values may overlap with lead/single byte values).
+ * <li> The signedness must be encoded as the most significant part.
+ * </ul>
+ * </p>
+ * <p>We encode differences with few bytes if their absolute values are small.
+ * For correct ordering, we must treat the entire value range -10ffff..+10ffff
+ * in ascending order, which forbids encoding the sign and the absolute value 
+ * separately.
+ * Instead, we split the lead byte range in the middle and encode non-negative 
+ * values going up and negative values going down.
+ * </p>
+ * <p>For very small absolute values, the difference is added to a middle byte 
+ * value for single-byte encoded differences.
+ * For somewhat larger absolute values, the difference is divided by the number
+ * of byte values available, the modulo is used for one trail byte, and the 
+ * remainder is added to a lead byte avoiding the single-byte range.
+ * For large absolute values, the difference is similarly encoded in three 
+ * bytes.
+ * </p>
+ * <p>This encoding does not use byte values 0, 1, 2, but uses all other byte 
+ * values for lead/single bytes so that the middle range of single bytes is as 
+ * large as possible.
+ * </p>
+ * <p>Note that the lead byte ranges overlap some, but that the sequences as a 
+ * whole are well ordered. I.e., even if the lead byte is the same for 
+ * sequences of different lengths, the trail bytes establish correct order.
+ * It would be possible to encode slightly larger ranges for each length (>1) 
+ * by subtracting the lower bound of the range. However, that would also slow 
+ * down the calculation.
+ * </p>
+ * <p>For the actual string encoding, an optimization moves the previous code 
+ * point value to the middle of its Unicode script block to minimize the 
+ * differences in same-script text runs.
+ * </p>
+ * @author Syn Wee Quek
+ * @since release 2.2, May 3rd 2002
+ * @draft 2.2
+ */
+public class BOSCU 
+{      
+	// public constructors --------------------------------------------------
+    
+	// public methods -------------------------------------------------------
+	
+	/**
+	 * <p>Encode the code points of a string as a sequence of byte-encoded 
+	 * differences (slope detection), preserving lexical order.</p>
+	 * <p>Optimize the difference-taking for runs of Unicode text within
+	 * small scripts:<br>
+	 * Most small scripts are allocated within aligned 128-blocks of Unicode
+	 * code points. Lexical order is preserved if "prev" is always moved
+	 * into the middle of such a block.</p>
+	 * <p>Additionally, "prev" is moved from anywhere in the Unihan area into 
+	 * the middle of that area.</p>
+	 * <p>Note that the identical-level run in a sort key is generated from
+	 * NFD text - there are never Hangul characters included.</p>
+	 * @param source text source
+	 * @param buffer output buffer
+	 * @param offset to start writing to
+	 * @return end offset where the writing stop
+	 */
+	public static int writeIdenticalLevelRun(String source, byte buffer[], 
+																int offset) 
+	{
+	    int prev = 0;
+	    UCharacterIterator iterator = new UCharacterIterator(source);
+	    int codepoint = iterator.nextCodePoint();
+	    while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
+	        if (prev < 0x4e00 || prev >= 0xa000) {
+	            prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
+	        } 
+	        else {
+	            // Unihan U+4e00..U+9fa5:
+	            // double-bytes down from the upper end
+	            prev = 0x9fff - SLOPE_REACH_POS_2_;
+	        }
+	
+	        offset = writeDiff(codepoint - prev, buffer, offset);
+	        prev = codepoint;
+	        codepoint = iterator.nextCodePoint();
+	    }
+	    return offset;
+	}
+	
+	/** 
+	 * How many bytes would writeIdenticalLevelRun() write? 
+	 * @param source text source string
+	 * @return the length of the BOSCU result 
+	 */
+	public static int lengthOfIdenticalLevelRun(String source) 
+	{
+	    int prev = 0;
+	    int result = 0;
+	    UCharacterIterator iterator = new UCharacterIterator(source);
+	    int codepoint = iterator.nextCodePoint();
+	    while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
+	        if (prev < 0x4e00 || prev >= 0xa000) {
+	            prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
+	        } 
+	        else {
+	            // Unihan U+4e00..U+9fa5:
+	            // double-bytes down from the upper end
+	            prev = 0x9fff - SLOPE_REACH_POS_2_;
+	        }
+	
+	        codepoint = iterator.nextCodePoint();
+	        result += lengthOfDiff(codepoint - prev);
+	        prev = codepoint;
+	    }
+	    return result;
+	}
+
+	// public setter methods -------------------------------------------------
+	
+    // public getter methods ------------------------------------------------
+	    
+	// public other methods -------------------------------------------------
+    
+    // protected constructor ------------------------------------------------
+      
+  	// protected data members ------------------------------------------------
+    
+    // protected methods -----------------------------------------------------
+ 
+ 	// private data members --------------------------------------------------
+    
+    /** 
+     * Do not use byte values 0, 1, 2 because they are separators in sort keys.
+     */
+	private static final int SLOPE_MIN_ = 3;
+	private static final int SLOPE_MAX_ = 0xff;
+	private static final int SLOPE_MIDDLE_ = 0x81;
+	private static final int SLOPE_TAIL_COUNT_ = SLOPE_MAX_ - SLOPE_MIN_ + 1;
+	private static final int SLOPE_MAX_BYTES_ = 4;
+
+	/**
+ 	 * Number of lead bytes:
+	 * 1        middle byte for 0
+	 * 2*80=160 single bytes for !=0
+	 * 2*42=84  for double-byte values
+	 * 2*3=6    for 3-byte values
+	 * 2*1=2    for 4-byte values
+	 *
+	 * The sum must be <=SLOPE_TAIL_COUNT.
+	 *
+	 * Why these numbers?
+	 * - There should be >=128 single-byte values to cover 128-blocks
+	 *   with small scripts.
+	 * - There should be >=20902 single/double-byte values to cover Unihan.
+	 * - It helps CJK Extension B some if there are 3-byte values that cover
+	 *   the distance between them and Unihan.
+	 *   This also helps to jump among distant places in the BMP.
+	 * - Four-byte values are necessary to cover the rest of Unicode.
+	 *
+ 	 * Symmetrical lead byte counts are for convenience.
+	 * With an equal distribution of even and odd differences there is also
+	 * no advantage to asymmetrical lead byte counts.
+	 */
+	private static final int SLOPE_SINGLE_ = 80;
+	private static final int SLOPE_LEAD_2_ = 42;
+	private static final int SLOPE_LEAD_3_ = 3;
+	private static final int SLOPE_LEAD_4_ = 1;
+
+	/** 
+	 * The difference value range for single-byters.
+	 */
+	private static final int SLOPE_REACH_POS_1_ = SLOPE_SINGLE_;
+	private static final int SLOPE_REACH_NEG_1_ = (-SLOPE_SINGLE_);
+
+	/** 
+	 * The difference value range for double-byters.
+	 */
+	private static final int SLOPE_REACH_POS_2_ = 
+					SLOPE_LEAD_2_ * SLOPE_TAIL_COUNT_ + SLOPE_LEAD_2_ - 1;
+	private static final int SLOPE_REACH_NEG_2_ = (-SLOPE_REACH_POS_2_ - 1);
+
+	/** 
+	 * The difference value range for 3-byters.
+	 */
+	private static final int SLOPE_REACH_POS_3_ = SLOPE_LEAD_3_ 
+	 											  * SLOPE_TAIL_COUNT_ 
+												  * SLOPE_TAIL_COUNT_ 
+												  + (SLOPE_LEAD_3_ - 1)
+												  * SLOPE_TAIL_COUNT_ +
+												  (SLOPE_TAIL_COUNT_ - 1);
+	private static final int SLOPE_REACH_NEG_3_ = (-SLOPE_REACH_POS_3_ - 1);
+
+	/** 
+	 * The lead byte start values.
+	 */
+	private static final int SLOPE_START_POS_2_ = SLOPE_MIDDLE_ 
+													+ SLOPE_SINGLE_ + 1;
+	private static final int SLOPE_START_POS_3_ = SLOPE_START_POS_2_ 
+													+ SLOPE_LEAD_2_;
+	private static final int SLOPE_START_NEG_2_ = SLOPE_MIDDLE_ + 
+													SLOPE_REACH_NEG_1_;
+	private static final int SLOPE_START_NEG_3_ = SLOPE_START_NEG_2_
+													- SLOPE_LEAD_2_;
+													
+	// private constructor ---------------------------------------------------
+	
+	/**
+	 * Constructor private to prevent initialization
+	 */
+	private BOSCU()
+	{
+	}													
+    
+    // private methods -------------------------------------------------------
+    
+    /**
+ 	 * Integer division and modulo with negative numerators
+ 	 * yields negative modulo results and quotients that are one more than
+ 	 * what we need here.
+ 	 * @param number which operations are to be performed on
+ 	 * @param factor the factor to use for division
+ 	 * @return (result of division) << 32 | modulo 
+ 	 */
+	private static final long getNegDivMod(int number, int factor) 
+	{
+    	int modulo = number % factor; 
+    	long result = number / factor;
+    	if (modulo < 0) { 
+        	-- result; 
+        	modulo += factor; 
+    	} 
+    	return (result << 32) | modulo;
+   	}
+   	
+   	/**
+	 * Encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
+	 * preserving lexical order
+	 * @param diff
+	 * @param buffer byte buffer to append to
+	 * @param offset to the byte buffer to start appending
+	 * @return end offset where the appending stops
+	 */
+	private static final int writeDiff(int diff, byte buffer[], int offset) 
+	{
+	    if (diff >= SLOPE_REACH_NEG_1_) {
+	        if (diff <= SLOPE_REACH_POS_1_) {
+	            buffer[offset ++] = (byte)(SLOPE_MIDDLE_ + diff);
+	        } 
+	        else if (diff <= SLOPE_REACH_POS_2_) {
+	            buffer[offset ++] = (byte)(SLOPE_START_POS_2_ 
+	            							+ (diff / SLOPE_TAIL_COUNT_));
+	            buffer[offset ++] = (byte)(SLOPE_MIN_ + 
+	            								(diff % SLOPE_TAIL_COUNT_));
+	        } 
+	        else if (diff <= SLOPE_REACH_POS_3_) {
+	            buffer[offset + 2] = (byte)(SLOPE_MIN_ 
+	            							+ (diff % SLOPE_TAIL_COUNT_));
+	            diff /= SLOPE_TAIL_COUNT_;
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ 
+	            							+ (diff % SLOPE_TAIL_COUNT_));
+	            buffer[offset] = (byte)(SLOPE_START_POS_3_ 
+	            						+ (diff / SLOPE_TAIL_COUNT_));
+	            offset += 3;
+	        } 
+	        else {
+	            buffer[offset + 3] = (byte)(SLOPE_MIN_ 
+	            							+ diff % SLOPE_TAIL_COUNT_);
+	            diff /= SLOPE_TAIL_COUNT_;
+	            buffer[offset] = (byte)(SLOPE_MIN_ 
+	            						+ diff % SLOPE_TAIL_COUNT_);
+	            diff /= SLOPE_TAIL_COUNT_;
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ 
+	            							+ diff % SLOPE_TAIL_COUNT_);
+	            buffer[offset] = (byte)SLOPE_MAX_;
+	            offset += 4;
+	        }
+	    } 
+	    else {
+	        long division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	        int modulo = (int)division;
+	        if (diff >= SLOPE_REACH_NEG_2_) {
+	            diff = (int)(division >> 32);
+	            buffer[offset ++] = (byte)(SLOPE_START_NEG_2_ + diff);
+	            buffer[offset ++] = (byte)(SLOPE_MIN_ + modulo);
+	        } 
+	        else if (diff >= SLOPE_REACH_NEG_3_) {
+	            buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
+	            diff = (int)(division >> 32);
+	            division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	            modulo = (int)division;
+	            diff = (int)(division >> 32);
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
+	            buffer[offset] = (byte)(SLOPE_START_NEG_3_ + diff);
+	            offset += 3;
+	        } 
+	        else {
+	            buffer[offset + 3] = (byte)(SLOPE_MIN_ + modulo);
+	            diff = (int)(division >> 32);
+	            division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	            modulo = (int)division;
+	            diff = (int)(division >> 32);
+	            buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
+	            division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	            modulo = (int)division;
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
+	            buffer[offset] = SLOPE_MIN_;
+	            offset += 4;
+	        }
+	    }
+	    return offset;
+	}
+	
+	/**
+	 * How many bytes would writeDiff() write? 
+	 * @param diff
+	 */
+	private static final int lengthOfDiff(int diff) 
+	{
+	    if (diff >= SLOPE_REACH_NEG_1_) {
+	        if (diff <= SLOPE_REACH_POS_1_) {
+	            return 1;
+	        } 
+	        else if (diff <= SLOPE_REACH_POS_2_) {
+	            return 2;
+	        } 
+	        else if(diff <= SLOPE_REACH_POS_3_) {
+	            return 3;
+	        } 
+	        else {
+	            return 4;
+	        }
+	    } 
+	    else {
+	        if (diff >= SLOPE_REACH_NEG_2_) {
+	            return 2;
+	        } 
+	        else if (diff >= SLOPE_REACH_NEG_3_) {
+	            return 3;
+	        } 
+	        else {
+	            return 4;
+	        }
+	    }
+	}
+}
--- a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
+++ b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
--- a/icu4j/src/com/ibm/icu/text/CollationKey.java
+++ b/icu4j/src/com/ibm/icu/text/CollationKey.java
@ -0,0 +1,260 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $ 
+* $Date: 2002/05/14 16:48:49 $ 
+* $Revision: 1.4 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.util.Arrays;
+
+/**
+ * <p>A <code>CollationKey</code> represents a <code>String</code> under the
+ * rules of a specific <code>Collator</code> object. Comparing two
+ * <code>CollationKey</code>s returns the relative order of the
+ * <code>String</code>s they represent. Using <code>CollationKey</code>s to 
+ * compare <code>String</code>s is generally faster than using 
+ * <code>Collator.compare</code>. Thus, when the <code>String</code>s must be 
+ * compared multiple times, for example when sorting a list of 
+ * <code>String</code>s. It's more efficient to use <code>CollationKey</code>s.
+ * </p>
+ * <p>You can not create <code>CollationKey</code>s directly. Rather, generate 
+ * them by calling <code>Collator.getCollationKey(String)</code>. You can only 
+ * compare <code>CollationKey</code>s generated from the same 
+ * <code>Collator</code> object.</p>
+ * <p>Generating a <code>CollationKey</code> for a <code>String</code>
+ * involves examining the entire <code>String</code> and converting it to 
+ * series of bits that can be compared bitwise. This allows fast comparisons 
+ * once the keys are generated. The cost of generating keys is recouped in 
+ * faster comparisons when <code>String</code>s need to be compared many 
+ * times. On the other hand, the result of a comparison is often determined by 
+ * the first couple of characters of each <code>String</code>.
+ * <code>Collator.compare(String, String)</code> examines only as many characters as it needs 
+ * which allows it to be faster when doing single comparisons.</p>
+ * <p>The following example shows how <code>CollationKey</code>s might be used
+ * to sort a list of <code>String</code>s.</p>
+ * <blockquote>
+ * <pre>
+ * // Create an array of CollationKeys for the Strings to be sorted.
+ * Collator myCollator = Collator.getInstance();
+ * CollationKey[] keys = new CollationKey[3];
+ * keys[0] = myCollator.getCollationKey("Tom");
+ * keys[1] = myCollator.getCollationKey("Dick");
+ * keys[2] = myCollator.getCollationKey("Harry");
+ * sort( keys );
+ * <br>
+ * //...
+ * <br>
+ * // Inside body of sort routine, compare keys this way
+ * if( keys[i].compareTo( keys[j] ) > 0 )
+ *    // swap keys[i] and keys[j]
+ * <br>
+ * //...
+ * <br>
+ * // Finally, when we've returned from sort.
+ * System.out.println( keys[0].getSourceString() );
+ * System.out.println( keys[1].getSourceString() );
+ * System.out.println( keys[2].getSourceString() );
+ * </pre>
+ * </blockquote>
+ *
+ * @see Collator
+ * @see RuleBasedCollator
+ * @author Syn Wee Quek
+ * @since release 2.2, April 18 2002
+ * @draft 2.2
+ */
+public final class CollationKey implements Comparable 
+{
+	// public methods -------------------------------------------------------
+
+	// public getters -------------------------------------------------------
+	
+    /**
+     * Returns the String that this CollationKey represents.
+     * @return source string that this CollationKey represents
+     * @draft 2.2
+     */
+    public String getSourceString() 
+    {
+        return m_source_;
+    }
+
+    /**
+     * <p>Duplicates and returns the value of this CollationKey as a sequence 
+     * of big-endian bytes.</p> 
+     * <p>If two CollationKeys could be legitimately compared, then one could 
+     * compare the byte arrays of each to obtain the same result.</p>  
+     * @return CollationKey value in a sequence of big-endian byte bytes.
+     * @draft 2.2
+     */
+    public byte[] toByteArray() 
+    {
+    	int length = 0;
+    	while (true) {
+    		if (m_key_[length] == 0) {
+    			break;
+    		}
+    		length ++;
+    	}
+    	length ++;
+    	byte result[] = new byte[length];
+    	System.arraycopy(m_key_, 0, result, 0, length);
+        return result;
+    }
+
+ 	// public other methods -------------------------------------------------	
+ 	
+    /**
+     * <p>Compare this CollationKey to the target CollationKey. The collation 
+     * rules of the Collator object which created these keys are applied.</p>
+     * <p><strong>Note:</strong> CollationKeys created by different Collators 
+     * can not be compared.</p>
+     * @param target target CollationKey
+     * @return an integer value, if value is less than zero this CollationKey
+     *         is less than than target, if value is zero if they are equal 
+     *         and value is greater than zero if this CollationKey is greater 
+     *         than target.
+     * @see Collator#compare(String, String)
+     * @draft 2.2
+     */
+    public int compareTo(CollationKey target)
+    {
+    	int i = 0;
+    	while (m_key_[i] != 0 && target.m_key_[i] != 0) {
+    		int key = m_key_[i] & 0xFF;
+    		int targetkey = target.m_key_[i] & 0xFF;
+    		if (key < targetkey) {
+    			return -1;
+    		}
+    		if (targetkey < key) {
+    			return 1;
+    		}
+    		i ++;
+    	}
+    	// last comparison if we encounter a 0
+    	int key = m_key_[i] & 0xFF;
+    	int targetkey = target.m_key_[i] & 0xFF;
+        if (key < targetkey) {
+    		return -1;
+    	}
+    	if (targetkey < key) {
+    		return 1;
+    	}
+        return 0;
+    }
+
+    /**
+     * <p>Compares this CollationKey with the specified Object.</p>
+     * @param obj the Object to be compared.
+     * @return Returns a negative integer, zero, or a positive integer 
+     *         respectively if this CollationKey is less than, equal to, or 
+     *         greater than the given Object.
+     * @exception ClassCastException thrown when the specified Object is not a
+     *		      CollationKey.
+     * @see #compareTo(CollationKey)
+     * @draft 2.2
+     */
+    public int compareTo(Object obj) 
+    {
+ 		return compareTo((CollationKey)obj);
+    }
+
+    /**
+     * <p>Compare this CollationKey and the target CollationKey for equality.
+     * </p>
+     * <p>The collation rules of the Collator object which created these keys 
+     * are applied.</p>
+     * <p><strong>Note:</strong> CollationKeys created by different Collators 
+     * can not be compared.</p>
+     * @param target the CollationKey to compare to.
+     * @return true if two objects are equal, false otherwise.
+     * @draft 2.2
+     */
+    public boolean equals(Object target) 
+    {
+        if (this == target) {
+        	return true;
+        }
+        if (target == null || !(target instanceof CollationKey)) {
+            return false;
+        }
+        CollationKey other = (CollationKey)target;
+        int i = 0;
+        while (true) {
+        	if (m_key_[i] != other.m_key_[i]) {
+        		return false;
+        	}
+        	if (m_key_[i] == 0) {
+        		break;
+        	}
+        	i ++;
+        }
+        return true;
+    }
+
+    /**
+     * <p>Creates a hash code for this CollationKey. The hash value is 
+     * calculated on the key itself, not the String from which the key was 
+     * created. Thus if x and y are CollationKeys, then 
+     * x.hashCode(x) == y.hashCode() if x.equals(y) is true. This allows 
+     * language-sensitive comparison in a hash table.</p>
+     * <p>See the CollatinKey class description for an example.</p>
+     * @return the hash value.
+     * @draft 2.2
+     */
+    public int hashCode() 
+    {
+    	if (m_hashCode_ == 0) {
+    		int size = m_key_.length >> 1;
+    		StringBuffer key = new StringBuffer(size);
+    		int i = 0;
+    		while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
+    			key.append((m_key_[i] << 8) | m_key_[i + 1]);
+    			i += 2;
+    		}
+    		if (m_key_[i] != 0) {
+    			key.append(m_key_[i] << 8);
+    		}
+    		m_hashCode_ = key.hashCode();
+    	}
+        return m_hashCode_;
+    }
+
+	// protected constructor ------------------------------------------------
+    
+    /**
+     * Protected CollationKey can only be generated by Collator objects
+     * @param source string the CollationKey represents
+     * @param key sort key array of bytes
+     * @param size of sort key 
+     * @draft 2v2
+     */
+    CollationKey(String source, byte key[])
+    {
+    	m_source_ = source;
+    	m_key_ = key;
+    	m_hashCode_ = 0;
+    }
+
+	// private data members -------------------------------------------------
+
+	/**
+	 * Source string this CollationKey represents
+	 */	
+    private String m_source_;
+    /**
+     * Sequence of bytes that represents the sort key
+     */
+    private byte m_key_[];
+    /**
+     * Hash code for the key
+     */
+    private int m_hashCode_;
+}
--- a/icu4j/src/com/ibm/icu/text/Collator.java
+++ b/icu4j/src/com/ibm/icu/text/Collator.java
@ -0,0 +1,454 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $ 
+* $Date: 2002/05/14 16:48:49 $ 
+* $Revision: 1.4 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.util.Locale;
+
+/**
+* <p>The Collator class performs locale-sensitive String comparison. 
+* You use this class to build searching and sorting routines for natural 
+* language text.</p> 
+* <p>Collator is an abstract base class. Subclasses implement specific 
+* collation strategies. One subclass, RuleBasedCollator, is currently 
+* provided and is applicable to a wide set of languages. Other subclasses 
+* may be created to handle more specialized needs.</p>
+* <p>Like other locale-sensitive classes, you can use the static factory 
+* method, getInstance, to obtain the appropriate Collator object for a given 
+* locale. You will only need to look at the subclasses of Collator if you need 
+* to understand the details of a particular collation strategy or if you need 
+* to modify that strategy. </p>
+* <p>The following example shows how to compare two strings using the Collator 
+* for the default locale. 
+* <pre>
+* // Compare two strings in the default locale
+* Collator myCollator = Collator.getInstance();
+* if (myCollator.compare("abc", "ABC") < 0) {
+*     System.out.println("abc is less than ABC");
+* }
+* else {
+*     System.out.println("abc is greater than or equal to ABC");
+* }
+* </pre>
+* <p>You can set a <code>Collator</code>'s <em>strength</em> property to 
+* determine the level of difference considered significant in comparisons. 
+* Four strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, 
+* <code>TERTIARY</code>, and <code>IDENTICAL</code>. The exact assignment of 
+* strengths to language features is locale dependant. For example, in Czech, 
+* "e" and "f" are considered primary differences, while "e" and "\u00EA" are 
+* secondary differences, "e" and "E" are tertiary differences and "e" and "e" 
+* are identical. The following shows how both case and accents could be 
+* ignored for US English.</p>
+* <pre>
+* //Get the Collator for US English and set its strength to PRIMARY
+* Collator usCollator = Collator.getInstance(Locale.US);
+* usCollator.setStrength(Collator.PRIMARY);
+* if (usCollator.compare("abc", "ABC") == 0) {
+*     System.out.println("Strings are equivalent");
+* }
+* </pre>
+* <p>For comparing Strings exactly once, the compare method provides the best 
+* performance. When sorting a list of Strings however, it is generally 
+* necessary to compare each String multiple times. In this case, 
+* CollationKeys provide better performance. The CollationKey class converts a 
+* String to a series of bits that can be compared bitwise against other 
+* CollationKeys. A CollationKey is created by a Collator object for a given 
+* String.</p> 
+* <p>Note: CollationKeys from different Collators can not be compared. See the 
+* class description for CollationKey for an example using CollationKeys. 
+* </p>
+* @author Syn Wee Quek
+* @since release 2.2, April 18 2002
+* @draft 2.2
+*/
+
+public abstract class Collator
+{     
+	// public data members ---------------------------------------------------
+	
+	/**
+     * Collator strength value. When set, only PRIMARY differences are
+     * considered significant during comparison. The assignment of strengths
+     * to language features is locale dependant. A common example is for
+     * different base letters ("a" vs "b") to be considered a PRIMARY 
+     * difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int PRIMARY 
+    							= RuleBasedCollator.AttributeValue.PRIMARY_;
+    /**
+     * Collator strength value. When set, only SECONDARY and above 
+     * differences are considered significant during comparison. The 
+     * assignment of strengths to language features is locale dependant. A 
+     * common example is for different accented forms of the same base letter 
+     * ("a" vs "\u00E4") to be considered a SECONDARY difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int SECONDARY 
+    							= RuleBasedCollator.AttributeValue.SECONDARY_;
+    /**
+     * Collator strength value. When set, only TERTIARY and above differences 
+     * are considered significant during comparison. The assignment of 
+     * strengths to language features is locale dependant. A common example is 
+     * for case differences ("a" vs "A") to be considered a TERTIARY 
+     * difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int TERTIARY 
+    							= RuleBasedCollator.AttributeValue.TERTIARY_;
+                                   
+    /**
+     * Collator strength value. When set, only QUARTENARY and above differences 
+     * are considered significant during comparison. The assignment of 
+     * strengths to language features is locale dependant.
+     * difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int QUATERNARY 
+    							= RuleBasedCollator.AttributeValue.QUATERNARY_;
+
+    /**
+     * <p>Collator strength value. When set, all differences are considered 
+     * significant during comparison. The assignment of strengths to language 
+     * features is locale dependant. A common example is for control 
+     * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at 
+     * the PRIMARY, SECONDARY, and TERTIARY levels but different at the 
+     * IDENTICAL level.  Additionally, differences between pre-composed 
+     * accents such as "&#092;u00C0" (A-grave) and combining accents such as 
+     * "A&#092;u0300" (A, combining-grave) will be considered significant at 
+     * the tertiary level if decomposition is set to NO_DECOMPOSITION.
+     * </p>
+     * <p>Note this value is different from JDK's</p>
+     * @draft 2.2
+     */
+    public final static int IDENTICAL 
+    							= RuleBasedCollator.AttributeValue.IDENTICAL_;
+
+    /**
+     * <p>Decomposition mode value. With NO_DECOMPOSITION set, accented 
+     * characters will not be decomposed for collation. This is the default 
+     * setting and provides the fastest collation but will only produce 
+     * correct results for languages that do not use accents.</p>
+     * <p>Note this value is different from JDK's</p>
+     * @see #getDecomposition
+     * @see #setDecomposition
+     * @draft 2.2
+     */
+    public final static int NO_DECOMPOSITION 
+    							= RuleBasedCollator.AttributeValue.OFF_;
+
+    /**
+     * <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set, 
+     * characters that are canonical variants according to Unicode 2.0 will be 
+     * decomposed for collation. This should be used to get correct collation 
+     * of accented characters.</p>
+     * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
+     * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
+     * Unicode Technical Report #15</a>.</p>
+     * @see #getDecomposition
+     * @see #setDecomposition
+     * @draft 2.2
+     */
+    public final static int CANONICAL_DECOMPOSITION = 1;
+
+    /**
+     * <p>Decomposition mode value. With FULL_DECOMPOSITION set, both Unicode 
+     * canonical variants and Unicode compatibility variants will be 
+     * decomposed for collation.  This causes not only accented characters to 
+     * be collated, but also characters that have special formats to be 
+     * collated with their norminal form. For example, the half-width and
+     * full-width ASCII and Katakana characters are then collated together.
+     * FULL_DECOMPOSITION is the most complete and therefore the slowest
+     * decomposition mode.</p>
+     * <p>
+     * FULL_DECOMPOSITION corresponds to Normalization Form KD as described in 
+     * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode 
+     * Technical Report #15</a>.</p>
+     * @see #getDecomposition
+     * @see #setDecomposition
+     * @draft 2.2
+     */
+    public final static int FULL_DECOMPOSITION = 2;
+    
+    // public methods --------------------------------------------------------
+    
+    // public setters --------------------------------------------------------
+    
+    /**
+     * <p>Sets this Collator's strength property. The strength property 
+     * determines the minimum level of difference considered significant 
+     * during comparison.</p>
+     * <p>See the Collator class description for an example of use.</p>
+     * @param the new strength value.
+     * @see #getStrength
+     * @see #PRIMARY
+     * @see #SECONDARY
+     * @see #TERTIARY
+     * @see #IDENTICAL
+     * @exception  IllegalArgumentException If the new strength value is not one of
+     * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
+     * @draft 2.2
+     */
+    public synchronized void setStrength(int newStrength) {
+        if ((newStrength != PRIMARY) &&
+            (newStrength != SECONDARY) &&
+            (newStrength != TERTIARY) &&
+            (newStrength != QUATERNARY) &&
+            (newStrength != IDENTICAL)) {
+            throw new IllegalArgumentException("Incorrect comparison level.");
+        }
+        m_strength_ = newStrength;
+    }
+    
+    /**
+     * Set the decomposition mode of this Collator. See getDecomposition
+     * for a description of decomposition mode.
+     * @param decomposition the new decomposition mode
+     * @see #getDecomposition
+     * @see #NO_DECOMPOSITION
+     * @see #CANONICAL_DECOMPOSITION
+     * @see #FULL_DECOMPOSITION
+     * @exception IllegalArgumentException If the given value is not a valid decomposition
+     * mode.
+     * @draft 2.2
+     */
+    public synchronized void setDecomposition(int decomposition) {
+        if ((decomposition != NO_DECOMPOSITION) &&
+            (decomposition != CANONICAL_DECOMPOSITION) &&
+            (decomposition != FULL_DECOMPOSITION)) {
+            throw new IllegalArgumentException("Wrong decomposition mode.");
+        }
+        if (decomposition != NO_DECOMPOSITION) {
+        	m_decomposition_ = decomposition;
+        }
+        else {
+        	m_decomposition_ = CANONICAL_DECOMPOSITION;
+        }
+    }
+    
+    // public getters --------------------------------------------------------
+    
+    /**
+     * Gets the Collator for the current default locale.
+     * The default locale is determined by java.util.Locale.getDefault().
+     * @return the Collator for the default locale (for example, en_US) if it
+     *         is created successfully, otherwise if there is a failure,
+     *         null will be returned.
+     * @see java.util.Locale#getDefault
+     * @draft 2.2
+     */
+    public static final Collator getInstance() 
+    {
+        return getInstance(Locale.getDefault());
+    }
+    
+    /**
+     * Gets the Collator for the desired locale.
+     * @param locale the desired locale.
+     * @return Collator for the desired locale if it is created successfully,
+     *         otherwise if there is a failure, the default UCA collator will 
+     * 		   be returned.
+     * @see java.util.Locale
+     * @see java.util.ResourceBundle
+     * @draft 2.2
+     */
+    public static final Collator getInstance(Locale locale)
+    {
+    	try {
+    		return new RuleBasedCollator(locale);
+    	} 
+    	catch(Exception e) {
+    		return RuleBasedCollator.UCA_;
+    	}
+    }
+    
+    /**
+     * <p>Returns this Collator's strength property. The strength property 
+     * determines the minimum level of difference considered significant 
+     * during comparison.</p>
+     * <p>See the Collator class description for an example of use.</p>
+     * @return this Collator's current strength property.
+     * @see #setStrength
+     * @see #PRIMARY
+     * @see #SECONDARY
+     * @see #TERTIARY
+     * @see #IDENTICAL
+     * @draft 2.2
+     */
+    public int getStrength()
+    {
+        return m_strength_;
+    }
+    
+    /**
+     * <p>Get the decomposition mode of this Collator. Decomposition mode
+     * determines how Unicode composed characters are handled. Adjusting
+     * decomposition mode allows the user to select between faster and more
+     * complete collation behavior.
+     * <p>The three values for decomposition mode are:
+     * <UL>
+     * <LI>NO_DECOMPOSITION,
+     * <LI>CANONICAL_DECOMPOSITION
+     * <LI>FULL_DECOMPOSITION.
+     * </UL>
+     * See the documentation for these three constants for a description
+     * of their meaning.
+     * </p>
+     * @return the decomposition mode
+     * @see #setDecomposition
+     * @see #NO_DECOMPOSITION
+     * @see #CANONICAL_DECOMPOSITION
+     * @see #FULL_DECOMPOSITION
+     * @draft 2.2
+     */
+    public int getDecomposition()
+    {
+        return m_decomposition_;
+    }
+    
+    // public other methods -------------------------------------------------
+
+    /**
+     * Convenience method for comparing the equality of two strings based on
+     * this Collator's collation rules.
+     * @param source the source string to be compared with.
+     * @param target the target string to be compared with.
+     * @return true if the strings are equal according to the collation
+     *         rules. false, otherwise.
+     * @see #compare
+     * @draft 2.2
+     */
+    public boolean equals(String source, String target)
+    {
+        return (compare(source, target) == 0);
+    }
+	    
+    /**
+     * Cloning this Collator.
+     * @return a cloned Collator of this object
+     * @draft 2.2
+     */
+    public Object clone()
+    {
+        try {
+            return (Collator)super.clone();
+        } catch (CloneNotSupportedException e) {
+            throw new InternalError();
+        }
+    }
+
+    /**
+     * Compares the equality of two Collators.
+     * @param that the Collator to be compared with this.
+     * @return true if this Collator is the same as that Collator;
+     * false otherwise.
+     * @draft 2.2
+     */
+    public boolean equals(Object that)
+    {
+        if (this == that) {
+        	return true;
+        }
+        if (that == null || getClass() != that.getClass()) {
+        	return false;
+        }
+        Collator other = (Collator) that;
+        return ((m_strength_ == other.m_strength_) &&
+                (m_decomposition_ == other.m_decomposition_));
+    }
+    
+    // public abstract methods -----------------------------------------------
+
+    /**
+     * Generates the hash code for this Collator.
+     * @draft 2.2
+     */
+    public abstract int hashCode();
+    
+    /**
+     * <p>Compares the source string to the target string according to the
+     * collation rules for this Collator. Returns an integer less than, equal 
+     * to or greater than zero depending on whether the source String is less 
+     * than, equal to or greater than the target string. See the Collator
+     * class description for an example of use.</p>
+     * <p>For a one time comparison, this method has the best performance. If 
+     * a given String will be involved in multiple comparisons, 
+     * CollationKey.compareTo() has the best performance. See the Collator 
+     * class description for an example using CollationKeys.</p>
+     * @param source the source string.
+     * @param target the target string.
+     * @return Returns an integer value. Value is less than zero if source is 
+     *         less than target, value is zero if source and target are equal, 
+     *         value is greater than zero if source is greater than target.
+     * @see CollationKey
+     * @see #getCollationKey
+     * @draft 2.2
+     */
+    public abstract int compare(String source, String target);
+
+    /**
+     * <p>Transforms the String into a series of bits that can be compared 
+     * bitwise to other CollationKeys. CollationKeys provide better 
+     * performance than Collator.compare() when Strings are involved in 
+     * multiple comparisons.</p> 
+     * <p>See the Collator class description for an example using 
+     * CollationKeys.</p>
+     * @param source the string to be transformed into a collation key.
+     * @return the CollationKey for the given String based on this Collator's 
+     *         collation rules. If the source String is null, a null 
+     *         CollationKey is returned.
+     * @see CollationKey
+     * @see #compare(String, String)
+     * @draft 2.2
+     */
+    public abstract CollationKey getCollationKey(String source);
+    
+    // protected data members ------------------------------------------------
+    
+    /**
+     * Collation strength
+     */
+    protected int m_strength_;
+    /**
+     * Decomposition mode
+     */ 
+    protected int m_decomposition_;
+    
+    // protected constructor -------------------------------------------------
+    
+    /**
+    * <p>Protected constructor for use by subclasses. 
+    * Public access to creating Collators is handled by the API getInstance().
+    * </p>
+    * @draft 2.2
+    */
+    protected Collator() throws Exception
+    {
+    	m_strength_ = TERTIARY;
+    	m_decomposition_ = CANONICAL_DECOMPOSITION;
+    }
+  
+    // protected methods -----------------------------------------------------
+    
+    // private variables -----------------------------------------------------
+
+    // private methods -------------------------------------------------------
+}
+
--- a/icu4j/src/com/ibm/icu/text/CollatorReader.java
+++ b/icu4j/src/com/ibm/icu/text/CollatorReader.java
@ -0,0 +1,284 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $ 
+* $Date: 2002/05/14 16:48:49 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import com.ibm.icu.impl.ICUBinary;
+import com.ibm.icu.impl.IntTrie;
+
+/**
+* <p>Internal reader class for ICU data file uca.dat containing 
+* Unicode Collation Algorithm data.</p> 
+* <p>This class simply reads uca.dat, authenticates that it is a valid
+* ICU data file and split its contents up into blocks of data for use in
+* <a href=Collator.html>com.ibm.icu.text.Collator</a>.
+* </p> 
+* <p>uca.dat which is in big-endian format is jared together with this 
+* package.</p>
+* @author Syn Wee Quek
+* @since release 2.2, April 18 2002
+* @draft 2.2
+*/
+
+final class CollatorReader
+{      
+    // protected constructor ---------------------------------------------
+    
+    /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @exception IOException throw if data file fails authentication 
+    * @draft 2.1
+    */
+    protected CollatorReader(InputStream inputStream) throws IOException
+    {
+        ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, 
+                             DATA_FORMAT_VERSION_, UNICODE_VERSION_);
+        m_dataInputStream_ = new DataInputStream(inputStream);
+    }
+    
+    /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @param readICUHeader flag to indicate if the ICU header has to be read
+    * @exception IOException throw if data file fails authentication 
+    * @draft 2.1
+    */
+    protected CollatorReader(InputStream inputStream, boolean readICUHeader) 
+    														throws IOException
+    {
+    	if (readICUHeader) {
+        	ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, 
+            		                 DATA_FORMAT_VERSION_, UNICODE_VERSION_);
+    	}
+        m_dataInputStream_ = new DataInputStream(inputStream);
+    }
+  
+    // protected methods -------------------------------------------------
+      
+    /**
+    * Read and break up the header stream of data passed in as arguments into 
+    * meaningful Collator data.
+    * @param rbc RuleBasedCollator to populate with header information
+    * @exception IOException thrown when there's a data error.
+    */
+    protected void readHeader(RuleBasedCollator rbc) throws IOException
+    {
+    	int size = m_dataInputStream_.readInt();
+    	// all the offsets are in bytes
+      	// to get the address add to the header address and cast properly 
+      	// Default options int options
+      	m_dataInputStream_.skipBytes(4);
+      	// this one is needed only for UCA, to copy the appropriate 
+      	// contractions  
+      	m_dataInputStream_.skipBytes(4);
+      	// reserved for future use
+      	m_dataInputStream_.readInt(); 
+      	// const uint8_t *mappingPosition; 
+      	int mapping = m_dataInputStream_.readInt(); 
+      	// uint32_t *expansion; 
+      	rbc.m_expansionOffset_ = m_dataInputStream_.readInt(); 
+      	// UChar *contractionIndex;     
+      	rbc.m_contractionOffset_ = m_dataInputStream_.readInt(); 
+      	// uint32_t *contractionCEs;
+      	int contractionCE = m_dataInputStream_.readInt();   
+      	// needed for various closures int contractionSize 
+      	m_dataInputStream_.skipBytes(4);  
+      	// array of last collation element in expansion
+      	int expansionEndCE = m_dataInputStream_.readInt();  
+      	// array of maximum expansion size corresponding to the expansion
+        // collation elements with last element in expansionEndCE
+      	int expansionEndCEMaxSize = m_dataInputStream_.readInt();     
+      	// size of endExpansionCE int expansionEndCESize
+      	m_dataInputStream_.skipBytes(4); 
+      	// hash table of unsafe code points 
+      	int unsafe = m_dataInputStream_.readInt();            
+      	// hash table of final code points in contractions.
+      	int contractionEnd = m_dataInputStream_.readInt();
+      	// int CEcount = m_dataInputStream_.readInt();
+      	m_dataInputStream_.skipBytes(4);
+      	// is jamoSpecial
+      	rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean(); 
+      	m_dataInputStream_.skipBytes(3);
+      	// byte version[] = new byte[4];
+      	m_dataInputStream_.skipBytes(4);
+      	// byte charsetName[] = new byte[32]; // for charset CEs
+      	m_dataInputStream_.skipBytes(32);
+      	m_dataInputStream_.skipBytes(64); // for future use 
+      	if (rbc.m_contractionOffset_ == 0) { // contraction can be null
+      		rbc.m_contractionOffset_ = mapping;
+      		contractionCE = mapping;
+      	}
+      	m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
+      	m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
+      	m_contractionCESize_ = mapping - contractionCE;
+      	m_trieSize_ = expansionEndCE - mapping;
+      	m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
+      	m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
+      	m_unsafeSize_ = contractionEnd - unsafe;
+      	m_contractionEndSize_ = size - contractionEnd;    
+      	rbc.m_contractionOffset_ >>= 1; // casting to ints
+      	rbc.m_expansionOffset_ >>= 2; // casting to chars
+    }
+    
+    /**
+     * Read and break up the collation options passed in the stream of data
+     * and update the argument Collator with the results
+     * @param rbc RuleBasedCollator to populate
+     * @exception IOException thrown when there's a data error.
+     * @draft 2.2
+     */
+    public void readOptions(RuleBasedCollator rbc) throws IOException
+    {
+    	rbc.m_variableTopValue_ = m_dataInputStream_.readInt();
+    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.FRENCH_COLLATION_,
+    	                 m_dataInputStream_.readInt());
+    	rbc.setAttributeDefault(
+    	                 RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
+    	                 m_dataInputStream_.readInt());
+    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_FIRST_,
+    	                 m_dataInputStream_.readInt());
+      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_LEVEL_,
+    	                 m_dataInputStream_.readInt());
+      	rbc.setAttributeDefault(
+      	                 RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
+    	                 m_dataInputStream_.readInt());
+      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.STRENGTH_,
+    	                 m_dataInputStream_.readInt());
+		rbc.setAttributeDefault(
+		                 RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
+    	                 m_dataInputStream_.readInt());
+    }
+    
+    /**
+    * Read and break up the stream of data passed in as arguments into 
+    * meaningful Collator data.b
+    * @param rbc RuleBasedCollator to populate
+    * @exception IOException thrown when there's a data error.
+    * @draft 2.2
+    */
+    public void read(RuleBasedCollator rbc) throws IOException
+    {
+    	readHeader(rbc);
+    	readOptions(rbc);
+    	m_expansionSize_ >>= 2;
+    	rbc.m_expansion_ = new int[m_expansionSize_];
+    	for (int i = 0; i < m_expansionSize_; i ++) {
+    		rbc.m_expansion_[i] = m_dataInputStream_.readInt();
+    	}
+    	m_contractionIndexSize_ >>= 1;
+    	rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
+    	for (int i = 0; i < m_contractionIndexSize_; i ++) {
+    		rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
+    	}
+    	m_contractionCESize_ >>= 2;
+    	rbc.m_contractionCE_ = new int[m_contractionCESize_];
+    	for (int i = 0; i < m_contractionCESize_; i ++) {
+    		rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
+    	}
+    	rbc.m_trie_ = new IntTrie(m_dataInputStream_, rbc);
+    	if (!rbc.m_trie_.isLatin1Linear()) {
+    		throw new IOException("Data corrupted, " 
+    		                      + "Collator Tries expected to have linear "
+    		                      + "latin one data arrays");
+    	}
+    	m_expansionEndCESize_ >>= 2;
+    	rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
+    	for (int i = 0; i < m_expansionEndCESize_; i ++) {
+    		rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
+    	}
+    	rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
+    	for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {
+    		rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
+    	}
+    	rbc.m_unsafe_ = new byte[m_unsafeSize_];
+    	for (int i = 0; i < m_unsafeSize_; i ++) {
+    		rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
+    	}
+    	rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];
+    	for (int i = 0; i < m_contractionEndSize_; i ++) {
+    		rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
+    	}
+    }
+    
+    // private variables -------------------------------------------------
+  
+    /**
+    * Data input stream for uca.dat 
+    */
+    private DataInputStream m_dataInputStream_;
+   
+    /**
+    * File format version and id that this class understands.
+    * No guarantees are made if a older version is used
+    */
+    private static final byte DATA_FORMAT_VERSION_[] = 
+                                   {(byte)0x2, (byte)0x0, (byte)0x0, (byte)0x0};
+    private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,  
+                                                    (byte)0x6f, (byte)0x6c};
+    private static final byte UNICODE_VERSION_[] = {(byte)0x3, (byte)0x0, 
+                                                    (byte)0x0, (byte)0x0};
+    /**
+    * Corrupted error string
+    */
+    private static final String CORRUPTED_DATA_ERROR_ =
+                                "Data corrupted in Collation data file";
+                                
+    /**
+     * Size of expansion table in bytes
+     */
+    private int m_expansionSize_;
+    /**
+     * Size of contraction index table in bytes
+     */
+    private int m_contractionIndexSize_;
+    /**
+     * Size of contraction table in bytes
+     */
+    private int m_contractionCESize_;
+    /**
+     * Size of the Trie in bytes
+     */
+    private int m_trieSize_;
+    /**
+     * Size of the table that contains information about collation elements
+     * that end with an expansion 
+     */
+    private int m_expansionEndCESize_;
+    /**
+     * Size of the table that contains information about the maximum size of 
+     * collation elements that end with a particular expansion CE corresponding
+     * to the ones in expansionEndCE
+     */
+    private int m_expansionEndCEMaxSizeSize_;
+    /**
+     * Size of the table that contains information about the "Unsafe" 
+     * codepoints
+     */
+    private int m_unsafeSize_;
+    /**
+     * Size of the table that contains information about codepoints that ends
+     * with a contraction
+     */
+    private int m_contractionEndSize_;
+    /**
+     * Size of the table that contains UCA contraction information
+     */
+    private int m_UCAContractionSize_;
+      
+    // private methods ---------------------------------------------------
+      
+}
+
--- a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java