diff --git a/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java b/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
index fac65c3690e..c699bffac20 100644
--- a/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
@@ -5,8 +5,8 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/UCharacterIterator.java,v $ 
- * $Date: 2002/04/03 00:00:00 $ 
- * $Revision: 1.4 $
+ * $Date: 2002/05/14 16:48:49 $ 
+ * $Revision: 1.5 $
  *
  *******************************************************************************
  */
@@ -41,27 +41,62 @@ public final class UCharacterIterator implements CharacterIterator
 	// public constructor ------------------------------------------------------
 	
 	/**
-	 * Public constructor
+	 * Public constructor.
+	 * By default the iteration range will be from 0 to the end of the text.
 	 * @param replacable text which the iterator will be based on
 	 */
 	public UCharacterIterator(Replaceable replaceable)
 	{
 		m_replaceable_  = replaceable;
 		m_index_        = 0;
-		m_length_       = replaceable.length();
+		m_start_        = 0;
+		m_limit_        = replaceable.length();
 	}
 	
 	/**
 	 * Public constructor
+	 * By default the iteration range will be from 0 to the end of the text.
 	 * @param str text which the iterator will be based on
 	 */
 	public UCharacterIterator(String str)
 	{
 		m_replaceable_  = new ReplaceableString(str);
 		m_index_        = 0;
-		m_length_       = m_replaceable_.length();
+		m_start_        = 0;
+		m_limit_        = m_replaceable_.length();
 	}
 	
+	/**
+     * Constructs an iterator over the given range of the given string.
+     * @param  text  text to be iterated over
+     * @param  start offset of the first character to iterate
+     * @param  limit offset of the character following the last character to
+     * 					iterate
+     */
+    public UCharacterIterator(String str, int start, int limit) 
+    {
+    	m_replaceable_  = new ReplaceableString(str);
+		m_start_        = start;
+		m_limit_        = limit;
+		m_index_        = m_start_;
+    }   
+    
+    /**
+     * Constructs an iterator over the given range of the given replaceable 
+     * string.
+     * @param  text  text to be iterated over
+     * @param  start offset of the first character to iterate
+     * @param  limit offset of the character following the last character to
+     * 					iterate
+     */
+    public UCharacterIterator(Replaceable replaceable, int start, int limit) 
+    {
+    	m_replaceable_  = replaceable;
+		m_start_        = start;
+		m_limit_        = limit;
+		m_index_        = m_start_;
+    }   
+	
 	// public methods ----------------------------------------------------------
 	
 	/**
@@ -87,7 +122,7 @@ public final class UCharacterIterator implements CharacterIterator
      */
     public char current()
     {
-        if (m_index_ >= 0 && m_index_ < m_length_) {
+        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
             return m_replaceable_.charAt(m_index_);
         }
         return DONE;
@@ -99,7 +134,7 @@ public final class UCharacterIterator implements CharacterIterator
      */
     public int currentCodePoint()
     {
-        if (m_index_ >= 0 && m_index_ < m_length_) {
+        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
             return m_replaceable_.char32At(m_index_);
         }
         return DONE_CODEPOINT;
@@ -111,26 +146,28 @@ public final class UCharacterIterator implements CharacterIterator
      */
     public char first()
     {
-        m_index_ = 0;
+        m_index_ = m_start_;
         return current();
     }
     
     /**
-     * Returns the start of the text.
-     * @return 0
+     * Returns the start of the text to iterate.
+     * @return by default this method will return 0, unless a range for 
+     * iteration had been specified during construction.
      */
     public int getBeginIndex()
     {
-        return 0;
+        return m_start_;
     }
 
     /**
-     * Returns the length of the text
-     * @return length of the text
+     * Returns the limit offset of the text to iterate
+     * @return by default this method returns the length of the text, unless a 
+     * range for iteration had been specified during construction.
      */
     public int getEndIndex()
     {
-        return m_length_;
+        return m_limit_;
     }
     
     /**
@@ -143,31 +180,31 @@ public final class UCharacterIterator implements CharacterIterator
     }
     
     /**
-     * Gets the last UTF16 character from the text and shifts the index to the
-     * end of the text accordingly.
-     * @return the last UTF16 character
+     * Gets the last UTF16 iterateable character from the text and shifts the 
+     * index to the end of the text accordingly.
+     * @return the last UTF16 iterateable character
      */
     public char last()
     {
-        if (m_length_ != 0) {
-            m_index_ = m_length_ - 1;
+        if (m_limit_ != m_start_) {
+            m_index_ = m_limit_ - 1;
             return m_replaceable_.charAt(m_index_);
         } 
-		m_index_ = m_length_;
+		m_index_ = m_limit_;
         return DONE;
     }
     
 	/**
      * Returns next UTF16 character and increments the iterator's index by 1. 
-	 * If the resulting index is greater or equal to the text length, the 
-	 * index is reset to the text length and a value of DONE_CODEPOINT is 
+	 * If the resulting index is greater or equal to the iteration limit, the 
+	 * index is reset to the text iteration limit and a value of DONE_CODEPOINT is 
 	 * returned. 
 	 * @return next UTF16 character in text or DONE if the new index is off the 
-	 *         end of the text range.
+	 *         end of the text iteration limit.
      */
     public char next()
     {
-        if (m_index_ < m_length_) {
+        if (m_index_ < m_limit_) {
         	char result = m_replaceable_.charAt(m_index_);
             m_index_ ++;
             return result;
@@ -182,20 +219,20 @@ public final class UCharacterIterator implements CharacterIterator
      * with surrogate pairs intermixed. If the index of a leading or trailing 
      * code unit of a surrogate pair is given, return the code point after the 
      * surrogate pair.
-	 * If the resulting index is greater or equal to the text length, the 
-	 * current index is reset to the text length and a value of DONE_CODEPOINT 
-	 * is returned. 
+	 * If the resulting index is greater or equal to the text iterateable limit,
+	 * the current index is reset to the text iterateable limit and a value of 
+	 * DONE_CODEPOINT is returned. 
 	 * @return next codepoint in text or DONE_CODEPOINT if the new index is off the 
-	 *         end of the text range.
+	 *         end of the text iterateable limit.
 	 */	
 	public int nextCodePoint()
 	{
-		if (m_index_ < m_length_) {
+		if (m_index_ < m_limit_) {
 			char ch = m_replaceable_.charAt(m_index_);
 			m_index_ ++;
 			if (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
 			    ch <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
-			    m_index_ < m_length_) {
+			    m_index_ < m_limit_) {
 			    char trail = m_replaceable_.charAt(m_index_);
 			    if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
 			    	trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
@@ -212,14 +249,15 @@ public final class UCharacterIterator implements CharacterIterator
     /**
      * Returns previous UTF16 character and decrements the iterator's index by 
      * 1. 
-	 * If the resulting index is less than 0, the index is reset to 0 and a 
-	 * value of DONE_CODEPOINT is returned. 
+	 * If the resulting index is less than the text iterateable limit, the 
+	 * index is reset to the start of the text iteration and a value of 
+	 * DONE_CODEPOINT is returned. 
 	 * @return next UTF16 character in text or DONE if the new index is off the 
-	 *         start of the text range.
+	 *         start of the text iteration range.
      */
     public char previous()
     {
-        if (m_index_ > 0) {
+        if (m_index_ > m_start_) {
             m_index_ --;
             return m_replaceable_.charAt(m_index_);
         }
@@ -233,19 +271,20 @@ public final class UCharacterIterator implements CharacterIterator
      * with surrogate pairs intermixed. If the index of a leading or trailing 
      * code unit of a surrogate pair is given, return the code point before the 
      * surrogate pair.
-	 * If the resulting index is less than 0, the current index is reset to 0
-	 * and a value of DONE_CODEPOINT is returned. 
+	 * If the resulting index is less than the text iterateable range, the 
+	 * current index is reset to the start of the range and a value of 
+	 * DONE_CODEPOINT is returned. 
 	 * @return previous codepoint in text or DONE_CODEPOINT if the new index is 
-	 *         off the start of the text range.
+	 *         off the start of the text iteration range.
      */
     public int previousCodePoint()
     {
-        if (m_index_ > 0) {
+        if (m_index_ > m_start_) {
             m_index_ --;
             char ch = m_replaceable_.charAt(m_index_);
 			if (ch >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
 			    ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE &&
-			    m_index_ > 0) {
+			    m_index_ > m_start_) {
 			    char lead = m_replaceable_.charAt(m_index_);
 			    if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
 			    	lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
@@ -267,12 +306,11 @@ public final class UCharacterIterator implements CharacterIterator
 	 * @exception IllegalArgumentException is thrown if an invalid index is 
 	 *            supplied. i.e. index is out of bounds.
 	 * @return the character at the specified index or DONE if the specified 
-	 *         index is equal to the end of the text.
+	 *         index is equal to the limit of the text iteration range.
 	 */
 	public char setIndex(int index)
 	{
-		int length = m_replaceable_.length();
-		if (index < 0 || index > length) {
+		if (index < m_start_ || index > m_limit_) {
 			throw new IllegalArgumentException("Index index out of bounds");
 		}
 		m_index_ = index;
@@ -290,7 +328,12 @@ public final class UCharacterIterator implements CharacterIterator
 	 */
 	private int m_index_;
 	/**
-	 * Replaceable text length
+	 * Start offset of iterateable range, by default this is 0
 	 */
-	private int m_length_;
+	private int m_start_;
+	/**
+	 * Limit offset of iterateable range, by default this is the length of the
+	 * string
+	 */
+	private int m_limit_;
 }
diff --git a/icu4j/src/com/ibm/icu/text/BOSCU.java b/icu4j/src/com/ibm/icu/text/BOSCU.java
new file mode 100644
index 00000000000..b7c0bf38099
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/BOSCU.java
@@ -0,0 +1,382 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/BOSCU.java,v $ 
+* $Date: 2002/05/14 16:48:48 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import com.ibm.icu.impl.UCharacterIterator;
+
+/**
+ * <p>Binary Ordered Compression Scheme for Unicode</p>
+ * 
+ * <p>Specific application:<br>
+ * Encode a Unicode string for the identical level of a sort key.<br>
+ * Restrictions:
+ * <ul>
+ * <li> byte stream (unsigned 8-bit bytes)
+ * <li> lexical order of the identical-level run must be the same as code 
+ * 		point order for the string
+ * <li> avoid byte values 0, 1, 2
+ * </ul>
+ * </p>
+ * 
+ * <p>Method: Slope Detection<br>
+ * Remember the previous code point (initial 0).
+ * For each cp in the string, encode the difference to the previous one.
+ * </p>
+ * <p>With a compact encoding of differences, this yields good results for
+ * small scripts and UTF-like results otherwise.
+ * </p>
+ * <p>Encoding of differences:<br>
+ * <ul> 
+ * <li>Similar to a UTF, encoding the length of the byte sequence in the lead 
+ * 		bytes.
+ * <li> Does not need to be friendly for decoding or random access
+ *     (trail byte values may overlap with lead/single byte values).
+ * <li> The signedness must be encoded as the most significant part.
+ * </ul>
+ * </p>
+ * <p>We encode differences with few bytes if their absolute values are small.
+ * For correct ordering, we must treat the entire value range -10ffff..+10ffff
+ * in ascending order, which forbids encoding the sign and the absolute value 
+ * separately.
+ * Instead, we split the lead byte range in the middle and encode non-negative 
+ * values going up and negative values going down.
+ * </p>
+ * <p>For very small absolute values, the difference is added to a middle byte 
+ * value for single-byte encoded differences.
+ * For somewhat larger absolute values, the difference is divided by the number
+ * of byte values available, the modulo is used for one trail byte, and the 
+ * remainder is added to a lead byte avoiding the single-byte range.
+ * For large absolute values, the difference is similarly encoded in three 
+ * bytes.
+ * </p>
+ * <p>This encoding does not use byte values 0, 1, 2, but uses all other byte 
+ * values for lead/single bytes so that the middle range of single bytes is as 
+ * large as possible.
+ * </p>
+ * <p>Note that the lead byte ranges overlap some, but that the sequences as a 
+ * whole are well ordered. I.e., even if the lead byte is the same for 
+ * sequences of different lengths, the trail bytes establish correct order.
+ * It would be possible to encode slightly larger ranges for each length (>1) 
+ * by subtracting the lower bound of the range. However, that would also slow 
+ * down the calculation.
+ * </p>
+ * <p>For the actual string encoding, an optimization moves the previous code 
+ * point value to the middle of its Unicode script block to minimize the 
+ * differences in same-script text runs.
+ * </p>
+ * @author Syn Wee Quek
+ * @since release 2.2, May 3rd 2002
+ * @draft 2.2
+ */
+public class BOSCU 
+{      
+	// public constructors --------------------------------------------------
+    
+	// public methods -------------------------------------------------------
+	
+	/**
+	 * <p>Encode the code points of a string as a sequence of byte-encoded 
+	 * differences (slope detection), preserving lexical order.</p>
+	 * <p>Optimize the difference-taking for runs of Unicode text within
+	 * small scripts:<br>
+	 * Most small scripts are allocated within aligned 128-blocks of Unicode
+	 * code points. Lexical order is preserved if "prev" is always moved
+	 * into the middle of such a block.</p>
+	 * <p>Additionally, "prev" is moved from anywhere in the Unihan area into 
+	 * the middle of that area.</p>
+	 * <p>Note that the identical-level run in a sort key is generated from
+	 * NFD text - there are never Hangul characters included.</p>
+	 * @param source text source
+	 * @param buffer output buffer
+	 * @param offset to start writing to
+	 * @return end offset where the writing stop
+	 */
+	public static int writeIdenticalLevelRun(String source, byte buffer[], 
+																int offset) 
+	{
+	    int prev = 0;
+	    UCharacterIterator iterator = new UCharacterIterator(source);
+	    int codepoint = iterator.nextCodePoint();
+	    while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
+	        if (prev < 0x4e00 || prev >= 0xa000) {
+	            prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
+	        } 
+	        else {
+	            // Unihan U+4e00..U+9fa5:
+	            // double-bytes down from the upper end
+	            prev = 0x9fff - SLOPE_REACH_POS_2_;
+	        }
+	
+	        offset = writeDiff(codepoint - prev, buffer, offset);
+	        prev = codepoint;
+	        codepoint = iterator.nextCodePoint();
+	    }
+	    return offset;
+	}
+	
+	/** 
+	 * How many bytes would writeIdenticalLevelRun() write? 
+	 * @param source text source string
+	 * @return the length of the BOSCU result 
+	 */
+	public static int lengthOfIdenticalLevelRun(String source) 
+	{
+	    int prev = 0;
+	    int result = 0;
+	    UCharacterIterator iterator = new UCharacterIterator(source);
+	    int codepoint = iterator.nextCodePoint();
+	    while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
+	        if (prev < 0x4e00 || prev >= 0xa000) {
+	            prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
+	        } 
+	        else {
+	            // Unihan U+4e00..U+9fa5:
+	            // double-bytes down from the upper end
+	            prev = 0x9fff - SLOPE_REACH_POS_2_;
+	        }
+	
+	        codepoint = iterator.nextCodePoint();
+	        result += lengthOfDiff(codepoint - prev);
+	        prev = codepoint;
+	    }
+	    return result;
+	}
+
+	// public setter methods -------------------------------------------------
+	
+    // public getter methods ------------------------------------------------
+	    
+	// public other methods -------------------------------------------------
+    
+    // protected constructor ------------------------------------------------
+      
+  	// protected data members ------------------------------------------------
+    
+    // protected methods -----------------------------------------------------
+ 
+ 	// private data members --------------------------------------------------
+    
+    /** 
+     * Do not use byte values 0, 1, 2 because they are separators in sort keys.
+     */
+	private static final int SLOPE_MIN_ = 3;
+	private static final int SLOPE_MAX_ = 0xff;
+	private static final int SLOPE_MIDDLE_ = 0x81;
+	private static final int SLOPE_TAIL_COUNT_ = SLOPE_MAX_ - SLOPE_MIN_ + 1;
+	private static final int SLOPE_MAX_BYTES_ = 4;
+
+	/**
+ 	 * Number of lead bytes:
+	 * 1        middle byte for 0
+	 * 2*80=160 single bytes for !=0
+	 * 2*42=84  for double-byte values
+	 * 2*3=6    for 3-byte values
+	 * 2*1=2    for 4-byte values
+	 *
+	 * The sum must be <=SLOPE_TAIL_COUNT.
+	 *
+	 * Why these numbers?
+	 * - There should be >=128 single-byte values to cover 128-blocks
+	 *   with small scripts.
+	 * - There should be >=20902 single/double-byte values to cover Unihan.
+	 * - It helps CJK Extension B some if there are 3-byte values that cover
+	 *   the distance between them and Unihan.
+	 *   This also helps to jump among distant places in the BMP.
+	 * - Four-byte values are necessary to cover the rest of Unicode.
+	 *
+ 	 * Symmetrical lead byte counts are for convenience.
+	 * With an equal distribution of even and odd differences there is also
+	 * no advantage to asymmetrical lead byte counts.
+	 */
+	private static final int SLOPE_SINGLE_ = 80;
+	private static final int SLOPE_LEAD_2_ = 42;
+	private static final int SLOPE_LEAD_3_ = 3;
+	private static final int SLOPE_LEAD_4_ = 1;
+
+	/** 
+	 * The difference value range for single-byters.
+	 */
+	private static final int SLOPE_REACH_POS_1_ = SLOPE_SINGLE_;
+	private static final int SLOPE_REACH_NEG_1_ = (-SLOPE_SINGLE_);
+
+	/** 
+	 * The difference value range for double-byters.
+	 */
+	private static final int SLOPE_REACH_POS_2_ = 
+					SLOPE_LEAD_2_ * SLOPE_TAIL_COUNT_ + SLOPE_LEAD_2_ - 1;
+	private static final int SLOPE_REACH_NEG_2_ = (-SLOPE_REACH_POS_2_ - 1);
+
+	/** 
+	 * The difference value range for 3-byters.
+	 */
+	private static final int SLOPE_REACH_POS_3_ = SLOPE_LEAD_3_ 
+	 											  * SLOPE_TAIL_COUNT_ 
+												  * SLOPE_TAIL_COUNT_ 
+												  + (SLOPE_LEAD_3_ - 1)
+												  * SLOPE_TAIL_COUNT_ +
+												  (SLOPE_TAIL_COUNT_ - 1);
+	private static final int SLOPE_REACH_NEG_3_ = (-SLOPE_REACH_POS_3_ - 1);
+
+	/** 
+	 * The lead byte start values.
+	 */
+	private static final int SLOPE_START_POS_2_ = SLOPE_MIDDLE_ 
+													+ SLOPE_SINGLE_ + 1;
+	private static final int SLOPE_START_POS_3_ = SLOPE_START_POS_2_ 
+													+ SLOPE_LEAD_2_;
+	private static final int SLOPE_START_NEG_2_ = SLOPE_MIDDLE_ + 
+													SLOPE_REACH_NEG_1_;
+	private static final int SLOPE_START_NEG_3_ = SLOPE_START_NEG_2_
+													- SLOPE_LEAD_2_;
+													
+	// private constructor ---------------------------------------------------
+	
+	/**
+	 * Constructor private to prevent initialization
+	 */
+	private BOSCU()
+	{
+	}													
+    
+    // private methods -------------------------------------------------------
+    
+    /**
+ 	 * Integer division and modulo with negative numerators
+ 	 * yields negative modulo results and quotients that are one more than
+ 	 * what we need here.
+ 	 * @param number which operations are to be performed on
+ 	 * @param factor the factor to use for division
+ 	 * @return (result of division) << 32 | modulo 
+ 	 */
+	private static final long getNegDivMod(int number, int factor) 
+	{
+    	int modulo = number % factor; 
+    	long result = number / factor;
+    	if (modulo < 0) { 
+        	-- result; 
+        	modulo += factor; 
+    	} 
+    	return (result << 32) | modulo;
+   	}
+   	
+   	/**
+	 * Encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
+	 * preserving lexical order
+	 * @param diff
+	 * @param buffer byte buffer to append to
+	 * @param offset to the byte buffer to start appending
+	 * @return end offset where the appending stops
+	 */
+	private static final int writeDiff(int diff, byte buffer[], int offset) 
+	{
+	    if (diff >= SLOPE_REACH_NEG_1_) {
+	        if (diff <= SLOPE_REACH_POS_1_) {
+	            buffer[offset ++] = (byte)(SLOPE_MIDDLE_ + diff);
+	        } 
+	        else if (diff <= SLOPE_REACH_POS_2_) {
+	            buffer[offset ++] = (byte)(SLOPE_START_POS_2_ 
+	            							+ (diff / SLOPE_TAIL_COUNT_));
+	            buffer[offset ++] = (byte)(SLOPE_MIN_ + 
+	            								(diff % SLOPE_TAIL_COUNT_));
+	        } 
+	        else if (diff <= SLOPE_REACH_POS_3_) {
+	            buffer[offset + 2] = (byte)(SLOPE_MIN_ 
+	            							+ (diff % SLOPE_TAIL_COUNT_));
+	            diff /= SLOPE_TAIL_COUNT_;
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ 
+	            							+ (diff % SLOPE_TAIL_COUNT_));
+	            buffer[offset] = (byte)(SLOPE_START_POS_3_ 
+	            						+ (diff / SLOPE_TAIL_COUNT_));
+	            offset += 3;
+	        } 
+	        else {
+	            buffer[offset + 3] = (byte)(SLOPE_MIN_ 
+	            							+ diff % SLOPE_TAIL_COUNT_);
+	            diff /= SLOPE_TAIL_COUNT_;
+	            buffer[offset] = (byte)(SLOPE_MIN_ 
+	            						+ diff % SLOPE_TAIL_COUNT_);
+	            diff /= SLOPE_TAIL_COUNT_;
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ 
+	            							+ diff % SLOPE_TAIL_COUNT_);
+	            buffer[offset] = (byte)SLOPE_MAX_;
+	            offset += 4;
+	        }
+	    } 
+	    else {
+	        long division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	        int modulo = (int)division;
+	        if (diff >= SLOPE_REACH_NEG_2_) {
+	            diff = (int)(division >> 32);
+	            buffer[offset ++] = (byte)(SLOPE_START_NEG_2_ + diff);
+	            buffer[offset ++] = (byte)(SLOPE_MIN_ + modulo);
+	        } 
+	        else if (diff >= SLOPE_REACH_NEG_3_) {
+	            buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
+	            diff = (int)(division >> 32);
+	            division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	            modulo = (int)division;
+	            diff = (int)(division >> 32);
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
+	            buffer[offset] = (byte)(SLOPE_START_NEG_3_ + diff);
+	            offset += 3;
+	        } 
+	        else {
+	            buffer[offset + 3] = (byte)(SLOPE_MIN_ + modulo);
+	            diff = (int)(division >> 32);
+	            division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	            modulo = (int)division;
+	            diff = (int)(division >> 32);
+	            buffer[offset + 2] = (byte)(SLOPE_MIN_ + modulo);
+	            division = getNegDivMod(diff, SLOPE_TAIL_COUNT_);
+	            modulo = (int)division;
+	            buffer[offset + 1] = (byte)(SLOPE_MIN_ + modulo);
+	            buffer[offset] = SLOPE_MIN_;
+	            offset += 4;
+	        }
+	    }
+	    return offset;
+	}
+	
+	/**
+	 * How many bytes would writeDiff() write? 
+	 * @param diff
+	 */
+	private static final int lengthOfDiff(int diff) 
+	{
+	    if (diff >= SLOPE_REACH_NEG_1_) {
+	        if (diff <= SLOPE_REACH_POS_1_) {
+	            return 1;
+	        } 
+	        else if (diff <= SLOPE_REACH_POS_2_) {
+	            return 2;
+	        } 
+	        else if(diff <= SLOPE_REACH_POS_3_) {
+	            return 3;
+	        } 
+	        else {
+	            return 4;
+	        }
+	    } 
+	    else {
+	        if (diff >= SLOPE_REACH_NEG_2_) {
+	            return 2;
+	        } 
+	        else if (diff >= SLOPE_REACH_NEG_3_) {
+	            return 3;
+	        } 
+	        else {
+	            return 4;
+	        }
+	    }
+	}
+}
diff --git a/icu4j/src/com/ibm/icu/text/CollationElementIterator.java b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
new file mode 100755
index 00000000000..1ac958fb1ef
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/CollationElementIterator.java
@@ -0,0 +1,2116 @@
+
+package com.ibm.icu.text;
+
+import java.text.CharacterIterator;
+import com.ibm.icu.impl.NormalizerImpl;
+import com.ibm.icu.impl.UCharacterProperty;
+
+/**
+ * <p>The <code>CollationElementIterator</code> class is used as an iterator
+ * to walk through each character of an international string. Use the iterator
+ * to return the ordering priority of the positioned character. The ordering
+ * priority of a character, which we refer to as a key, defines how a 
+ * character is collated in the given collation object.</p>
+ * <p>For example, consider the following in Spanish:
+ * <blockquote>
+ * <pre>
+ * "ca" -> the first key is key('c') and second key is key('a').
+ * "cha" -> the first key is key('ch') and second key is key('a').
+ * </pre>
+ * </blockquote>
+ * And in German,
+ * <blockquote>
+ * <pre>
+ * "\u00e4b"-> the first key is key('a'), the second key is key('e'), and
+ * the third key is key('b').
+ * </pre>
+ * </blockquote>
+ * </p>
+ * <p>The key of a character is an integer composed of primary order(short),
+ * secondary order(byte), and tertiary order(byte). Java strictly defines
+ * the size and signedness of its primitive data types. Therefore, the static
+ * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
+ * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
+ * and <code>short</code> respectively to ensure the correctness of the key
+ * value.</p>
+ * <p>
+ * Example of the iterator usage,
+ * <blockquote>
+ * <pre>
+ *  String testString = "This is a test";
+ *  RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)Collator.getInstance();
+ *  CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
+ *  int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
+ * </pre>
+ * </blockquote>
+ * </p>
+ * <p>
+ * <code>CollationElementIterator.next</code> returns the collation order
+ * of the next character. A collation order consists of primary order,
+ * secondary order and tertiary order. The data type of the collation
+ * order is <strong>int</strong>. The first 16 bits of a collation order
+ * is its primary order; the next 8 bits is the secondary order and the
+ * last 8 bits is the tertiary order.</p>
+ * @see                Collator
+ * @see                RuleBasedCollator
+ * @author Syn Wee Quek
+ * @since release 2.2, April 18 2002
+ * @draft 2.2
+ */
+public final class CollationElementIterator 
+{
+	// public data members --------------------------------------------------
+	
+    /**
+     * Null order which indicates the end of string is reached
+     * @draft 2.2
+     */
+    public final static int NULLORDER = 0xffffffff;
+    /**
+     * Ignorable collation element order.
+     */
+    public static final int IGNORABLE = 0;
+
+	// public methods -------------------------------------------------------
+	
+	// public getters -------------------------------------------------------
+	
+	/**
+     * <p>Returns the character offset in the original text corresponding to 
+     * the next collation element. (That is, getOffset() returns the position 
+     * in the text corresponding to the collation element that will be 
+     * returned by the next call to next().) This value could be either
+     * <ul>
+     * <li>index of the <b>first</b> character corresponding to the next
+     * collation element. This means that if <code>setOffset(offset)</code> 
+     * sets the index in the middle of a contraction, <code>getOffset()</code>
+     * returns the index of the first character in the contraction, which
+     * may not be equals to offset.
+     * <li>if normalization is on, <code>getOffset()</code> may return the 
+     * index of the <b>immediate</b> subsequent character, or composite 
+     * character with the first character, having a combining class of 0.
+     * </ul>
+     * </p>
+     * <p>Note calling getOffset() immediately after setOffset(offset) may not
+     * return the value offset.</p>
+     * @return The character offset in the original text corresponding to the 
+     *         collation element that will be returned by the next call to 
+     *         next().
+     * @draft 2.2
+     */
+    public int getOffset()
+    {
+        return m_source_.getIndex();
+    }
+
+
+    /**
+     * Return the maximum length of any expansion sequences that end with the 
+     * specified collation element.
+     * @param ce a collation element returned by previous() or next().
+     * @return the maximum length of any expansion sequences ending
+     *         with the specified collation element.
+     * @draft 2.2
+     */
+    public int getMaxExpansion(int ce)
+    {
+        int start = 0;                                  
+  		int limit = m_collator_.m_expansionEndCE_.length;
+  		while (start < limit - 1) {
+    		int mid = start + ((limit - start) >> 1);              
+    		if (ce <= m_collator_.m_expansionEndCE_[mid]) {              
+      			limit = mid;                                              
+    		}                                                             
+    		else {                                                        
+      			start = mid;                                              
+    		}                                                             
+  		}          
+  		int result = 1;                                                       
+  		if (m_collator_.m_expansionEndCE_[start] == ce) {
+    		result = m_collator_.m_expansionEndCEMaxSize_[start];
+  		}                                                                
+  		else if (m_collator_.m_expansionEndCE_[limit] == ce) {           
+         	result = m_collator_.m_expansionEndCEMaxSize_[limit]; 
+       	}                                  
+       	else if ((ce & 0xFFFF) == 0x00C0) {
+            result = 2;                                                    
+       	}                                                                
+    	return result;    
+    }
+
+	// public other methods -------------------------------------------------
+	
+	/**
+     * <p>Resets the cursor to the beginning of the string. The next call
+     * to next() will return the first collation element in the string.</p>
+     * @draft 2.2
+     */
+    public synchronized void reset()
+    {
+    	m_source_.setIndex(0);
+    	updateInternalState();
+    }
+
+    /**
+     * <p>Get the next collation element in the string.</p>  
+     * <p>This iterator iterates over a sequence of collation elements that 
+     * were built from the string. Because there isn't necessarily a 
+     * one-to-one mapping from characters to collation elements, this doesn't 
+     * mean the same thing as "return the collation element [or ordering 
+     * priority] of the next character in the string".</p>
+     * <p>This function returns the collation element that the iterator is 
+     * currently pointing to and then updates the internal pointer to point to 
+     * the next element. previous() updates the pointer first and then 
+     * returns the element. This means that when you change direction while 
+     * iterating (i.e., call next() and then call previous(), or call 
+     * previous() and then call next()), you'll get back the same element 
+     * twice.</p>
+     * @return the next collation element 
+     * @draft 2.2
+     */
+    public synchronized int next()
+    {
+    	m_isForwards_ = true;
+        if (m_CEBufferSize_ > 0) { 
+        	if (m_CEBufferOffset_ < m_CEBufferSize_) { 
+	    		// if there are expansions left in the buffer, we return it
+	      		return m_CEBuffer_[m_CEBufferOffset_ ++];
+        	}
+        	m_CEBufferSize_ = 0;
+        	m_CEBufferOffset_ = 0;
+	    }
+	
+		char ch = nextChar();    
+		/* System.out.println("ch " + Integer.toHexString(ch) + " " + 
+								Integer.toHexString(m_source_.current()));*/
+		if (ch == CharacterIterator.DONE) {
+	        return NULLORDER;
+	    }
+	    if (m_collator_.m_isHiragana4_) {
+	       	m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x3094) 
+	       	                         || ch == 0x309d || ch == 0x309e;
+	    }
+	    
+	    int result = NULLORDER;
+	    if (ch <= 0xFF) {
+	        // For latin-1 characters we never need to fall back to the UCA 
+	        // table because all of the UCA data is replicated in the 
+	        // latinOneMapping array
+	        result = m_collator_.m_trie_.getLatin1LinearValue(ch);
+	        if (RuleBasedCollator.isSpecial(result)) {
+	            result = nextSpecial(m_collator_, result, ch);
+	        }
+	    }
+	    else
+	    {
+	        result = m_collator_.m_trie_.getLeadValue(ch);
+	        //System.out.println(Integer.toHexString(result));
+	        if (RuleBasedCollator.isSpecial(result)) {               
+	        	// surrogate leads are handled as special ces
+	        	result = nextSpecial(m_collator_, result, ch);
+	        }
+	        if (result == CE_NOT_FOUND_) {   
+	            // couldn't find a good CE in the tailoring
+	            // if we got here, the codepoint MUST be over 0xFF - so we look 
+	            // directly in the UCA
+	            result = m_collator_.UCA_.m_trie_.getLeadValue(ch);
+	            if (RuleBasedCollator.isSpecial(result)) { 
+	            	// UCA also gives us a special CE
+	              	result = nextSpecial(m_collator_.UCA_, result, ch);
+	            }
+	        }
+	    }
+	    return result; 
+    }
+
+    /**
+     * <p>Get the previous collation element in the string.</p>  
+     * <p>This iterator iterates over a sequence of collation elements that 
+     * were built from the string. Because there isn't necessarily a 
+     * one-to-one mapping from characters to collation elements, this doesn't 
+     * mean the same thing as "return the collation element [or ordering 
+     * priority] of the previous character in the string".</p>
+     * <p>This function updates the iterator's internal pointer to point to 
+     * the collation element preceding the one it's currently pointing to and 
+     * then returns that element, while next() returns the current element and 
+     * then updates the pointer. This means that when you change direction 
+     * while iterating (i.e., call next() and then call previous(), or call 
+     * previous() and then call next()), you'll get back the same element 
+     * twice.</p>
+     * @return the previous collation element
+     * @draft 2.2
+     */
+    public synchronized int previous()
+    {
+    	if (m_source_.getIndex() <= 0 && m_isForwards_) {
+    		// if iterator is new or reset, we can immediate perform  backwards
+    		// iteration even when the offset is not right.
+    		m_source_.setIndex(m_source_.getEndIndex());
+    		updateInternalState();
+    	}
+    	m_isForwards_ = false;
+        int result = NULLORDER;
+	    if (m_CEBufferSize_ > 0) {
+	    	if (m_CEBufferOffset_ > 0) {
+	        	return m_CEBuffer_[-- m_CEBufferOffset_];
+	    	}
+	    	m_CEBufferSize_ = 0; 
+	    	m_CEBufferOffset_ = 0;
+	    }
+	    char ch = previousChar();    
+		if (ch == CharacterIterator.DONE) {
+	        return NULLORDER;
+	    } 
+	    if (m_collator_.m_isHiragana4_) {
+	       	m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f);
+	    }
+	    if (m_collator_.isContractionEnd(ch) && !isBackwardsStart()) {
+	        result = previousSpecial(m_collator_, CE_CONTRACTION_, ch);
+	    }
+	    else {
+	        if (ch <= 0xFF) {
+	            result = m_collator_.m_trie_.getLatin1LinearValue(ch);
+	        	if (RuleBasedCollator.isSpecial(result)) {
+	            	result = previousSpecial(m_collator_, result, ch);
+	            }
+	        }
+	        else {
+	            if (m_bufferOffset_ < 0 && isThaiBaseConsonant(ch) 
+	            	&& m_source_.getIndex() != 0) {
+	            	if (isThaiPreVowel(m_source_.previous())) {
+	                	result = CE_THAI_;
+	            	}
+	            	else {
+	            		result = m_collator_.m_trie_.getLeadValue(ch);
+	            	}		
+	            	m_source_.next();
+	            }
+	            else {
+	                result = m_collator_.m_trie_.getLeadValue(ch);
+	            }
+	            if (RuleBasedCollator.isSpecial(result)) {
+	                result = previousSpecial(m_collator_, result, ch);
+	            }
+	            if (result == CE_NOT_FOUND_) {
+	                if (!isBackwardsStart() 
+	                    && m_collator_.isContractionEnd(ch)) {
+	                    result = CE_CONTRACTION_;
+	                }
+	                else {
+	                    result = m_collator_.m_trie_.getLeadValue(ch);
+	                }
+	
+	                if (RuleBasedCollator.isSpecial(result)) {
+	                    result = previousSpecial(m_collator_.UCA_, result, ch);
+	                }
+	            }
+	        }
+	    }
+	    return result;
+    }
+
+    /**
+     * Return the primary strength of a collation element.
+     * @param ce the collation element
+     * @return the element's primary strength
+     * @draft 2.2
+     */
+    public final static int primaryOrder(int ce)
+    {
+        return (ce & RuleBasedCollator.CE_PRIMARY_MASK_) >> CE_PRIMARY_SHIFT_;
+    }
+    /**
+     * Return the secondary strength of a collation element.
+     * @param ce the collation element
+     * @return the element's secondary strength
+     * @draft 2.2
+     */
+    public final static short secondaryOrder(int ce)
+    {
+        return (short)((ce & RuleBasedCollator.CE_SECONDARY_MASK_) 
+        											>> CE_SECONDARY_SHIFT_);
+    }
+    
+    /**
+     * Return the tertiary strength of a collation element.
+     * @param colelem the collation element
+     * @return the element's tertiary strength
+     * @draft 2.2
+     */
+    public final static short tertiaryOrder(int ce)
+    {
+        return (short)(ce & RuleBasedCollator.CE_TERTIARY_MASK_);
+    }
+
+    /**
+     * <p>Sets the iterator to point to the collation element corresponding to
+     * the specified character (the parameter is a CHARACTER offset in the
+     * original string, not an offset into its corresponding sequence of
+     * collation elements). The value returned by the next call to next()
+     * will be the collation element corresponding to the specified position
+     * in the text. If that position is in the middle of a contracting
+     * character sequence, the result of the next call to next() is the
+     * collation element for that sequence. This means that getOffset()
+     * is not guaranteed to return the same value as was passed to a preceding
+     * call to setOffset().</p>
+     * @param offset new character offset into the original text to set. 
+     * @draft 2.2
+     */
+    public void setOffset(int offset)
+    {  
+    	m_source_.setIndex(offset);
+    	char ch = m_source_.current();
+    	if (m_collator_.isUnsafe(ch)) {
+    		// if it is unsafe we need to check if it is part of a contraction
+    		// or a surrogate character
+    		if (UTF16.isTrailSurrogate(ch)) {
+    			// if it is a surrogate pair we move up one character
+    			char prevch = m_source_.previous();
+    			if (!UTF16.isLeadSurrogate(prevch)) {
+    				m_source_.setIndex(offset); // go back to the same index
+    			}
+    		}
+    		else {
+    			// could be part of a contraction
+    			// backup to a safe point and iterate till we pass offset
+    			while (m_source_.getIndex() > 0) {
+    				if (!m_collator_.isUnsafe(ch)) {
+    					break;
+    				}
+    				ch = m_source_.previous();
+    			}
+    			updateInternalState();
+    			int prevoffset = 0;
+    			while (m_source_.getIndex() < offset) {
+    				prevoffset = m_source_.getIndex();
+    				next();
+    			}	
+    			m_source_.setIndex(prevoffset);
+    		}
+    	}
+    	updateInternalState();
+    }
+
+    /**
+     * <p>Set a new string over which to iterate.</p>
+     * <p>Iteration will start from the start of source.</p>
+     * @param source the new source text.
+     * @draft 2.2
+     */
+    public synchronized void setText(String source)
+    {
+    	m_source_ = new StringCharacterIterator(source);
+    	updateInternalState();
+    }
+
+    /**
+     * <p>Set a new string iterator over which to iterate.</p>
+     * <p>Iteration will start from the start of source.</p>
+     * @param source the new source text.
+     * @draft 2.2
+     */
+    public synchronized void setText(CharacterIterator source)
+    {
+		m_source_ = source;    	
+		m_source_.setIndex(0);
+		updateInternalState();
+    }
+
+	// protected data members -----------------------------------------------
+	
+	/**
+  	 * true if current codepoint was Hiragana
+  	 */
+  	protected boolean m_isCodePointHiragana_;
+  	
+	// protected constructors -----------------------------------------------
+	
+	/**
+     * <p>CollationElementIterator constructor. This takes the source string 
+     * and the Collator. The cursor will walk thru the source string based
+     * on the predefined collation rules. If the source string is empty,
+     * NULLORDER will be returned on the calls to next().</p>
+     * @param source the source string.
+     * @param collator the RuleBasedCollator
+     * @draft 2.2
+     */
+    CollationElementIterator(String source, RuleBasedCollator collator) 
+    {
+    	m_source_ = new StringCharacterIterator(source);
+  		m_collator_ = collator;
+  		m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
+    	m_buffer_ = new StringBuffer();
+    	m_backup_ = new Backup();
+    	updateInternalState();
+    }
+
+    /**
+     * <p>CollationElementIterator constructor. This takes the source string 
+     * and the Collator. The cursor will walk thru the source string based
+     * on the predefined collation rules. If the source string is empty,
+     * NULLORDER will be returned on the calls to next().</p>
+     * @param source the source string iterator.
+     * @param collator the RuleBasedCollator
+     * @draft 2.2
+     */
+    CollationElementIterator(CharacterIterator source, 
+                             RuleBasedCollator collator) 
+    {
+    	m_source_ = source;
+    	m_collator_ = collator;
+    	m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_];
+    	m_buffer_ = new StringBuffer();
+    	m_backup_ = new Backup();
+    	updateInternalState();
+    }
+    
+    // private data members -------------------------------------------------
+    
+    // private inner class --------------------------------------------------
+    
+    /**
+     * Backup data class
+     */
+    private static class Backup
+    {
+    	// protected data members -------------------------------------------
+    	
+    	/**
+	 	 * Backup non FCD sequence limit
+	  	 */
+		protected int m_FCDLimit_;
+		/**
+		 * Backup non FCD sequence start
+		 */
+		protected int m_FCDStart_;
+		/**
+		 * Backup if previous Codepoint is Hiragana quatenary
+		 */
+		protected boolean m_isCodePointHiragana_;
+		/**
+		 * Backup buffer position 
+		 */
+		protected int m_bufferOffset_;
+		/**
+		 * Backup source iterator offset
+		 */
+		protected int m_offset_;
+		/**
+		 * Backup buffer contents
+		 */
+		protected StringBuffer m_buffer_;
+		
+		// protected constructor --------------------------------------------
+		
+		/**
+		 * Empty constructor
+		 */
+		protected Backup()
+		{
+			m_buffer_ = new StringBuffer();
+		}
+    }
+    // end inner class ------------------------------------------------------
+    
+    /**
+     * Direction of travel
+     */
+    private boolean m_isForwards_;
+    /**
+     * Source string iterator
+     */
+    private CharacterIterator m_source_;
+    /** 
+     * This is position to the m_buffer_, -1 if iterator is not in m_buffer_
+     */
+    private int m_bufferOffset_; 
+	/** 
+	 * This is the CE from CEs buffer that should be returned 
+	 */
+  	private int m_CEBufferOffset_;
+  	/** 
+  	 * This is the position to which we have stored processed CEs 
+  	 */
+  	private int m_CEBufferSize_; 
+  	/**
+  	 * Buffer for temporary storage of normalized characters, discontiguous
+  	 * characters and Thai characters
+  	 */
+  	private StringBuffer m_buffer_;
+  	/** 
+  	 * Position in the original string to continue forward FCD check from. 
+  	 */
+  	private int m_FCDLimit_; 
+  	/**
+  	 * Position in the original string that starts with a non-FCD sequence
+  	 */
+  	private int m_FCDStart_;
+  	/**
+  	 * The collator this iterator is based on
+  	 */ 
+  	private RuleBasedCollator m_collator_;
+  	/**
+  	 * true if Hiragana quatenary is on
+  	 */
+  	private boolean m_isHiragana4_;
+  	/**
+  	 * CE buffer
+  	 */	
+  	private int m_CEBuffer_[]; 
+	/** 
+	 * In reality we should not have to deal with expansion sequences longer 
+	 * then 16. However this value can be change if a bigger buffer is needed.
+	 * Note, if the size is change to too small a number, BIG trouble.
+	 * Reasonable small value is around 10, if there's no Arabic or other 
+	 * funky collations that have long expansion sequence. This is the longest 
+	 * expansion sequence this can handle without bombing out.
+	 */
+	private static final int CE_BUFFER_INIT_SIZE_ = 512;
+	/**
+	 * Backup storage
+	 */
+	private Backup m_backup_;
+	/**
+	 * One character before the first non-zero combining class character
+	 */
+	private static final int FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0xC0;
+	/**
+	 * One character before the first character with leading non-zero combining 
+	 * class 
+	 */
+	private static final int LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0x300;
+	/**
+	 * Mask for the last byte
+	 */
+	private static final int LAST_BYTE_MASK_ = 0xFF;
+	/**
+	 * Shift value for the second last byte
+	 */
+	private static final int SECOND_LAST_BYTE_SHIFT_ = 8;
+
+	// special ce values and tags -------------------------------------------
+	private static final int CE_NOT_FOUND_ = 0xF0000000;
+	private static final int CE_EXPANSION_ = 0xF1000000;
+	private static final int CE_CONTRACTION_ = 0xF2000000;
+	private static final int CE_THAI_ = 0xF3000000;
+	/**
+	 * Indicates the last ce has been consumed. Compare with NULLORDER. 
+	 * NULLORDER is returned if error occurs.
+ 	 */
+	private static final int CE_NO_MORE_CES_ = 0x00010101;
+	private static final int CE_NO_MORE_CES_PRIMARY_ = 0x00010000;
+	private static final int CE_NO_MORE_CES_SECONDARY_ = 0x00000100;
+	private static final int CE_NO_MORE_CES_TERTIARY_ = 0x00000001;
+
+	private static final int CE_NOT_FOUND_TAG_ = 0;
+	private static final int CE_EXPANSION_TAG_ = 1;       
+    private static final int CE_CONTRACTION_TAG_ = 2;     
+    private static final int CE_THAI_TAG_ = 3;
+    /** 
+     * Charset processing, not yet implemented 
+     */
+    private static final int CE_CHARSET_TAG_ = 4;         
+    /** 
+     * AC00-D7AF
+     */
+    private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
+    /**
+     * D800-DBFF
+     */
+    private static final int CE_LEAD_SURROGATE_TAG_ = 7;  
+    /** 
+     * DC00-DFFF
+     */
+    private static final int CE_TRAIL_SURROGATE_TAG_ = 8; 
+    /** 
+     * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
+     */    
+    private static final int CE_CJK_IMPLICIT_TAG_ = 9;    
+    private static final int CE_IMPLICIT_TAG_ = 10;
+    private static final int CE_SPEC_PROC_TAG_ = 11;
+    /** 
+     * This is a 3 byte primary with starting secondaries and tertiaries.
+     * It fits in a single 32 bit CE and is used instead of expansion to save
+     * space without affecting the performance (hopefully).
+     */
+    private static final int CE_LONG_PRIMARY_TAG_ = 12; 
+    private static final int CE_CE_TAGS_COUNT = 13;
+   	private static final int CE_BYTE_COMMON_ = 0x05;
+   	private static final int CE_PRIMARY_SHIFT_ = 16;
+   	private static final int CE_SECONDARY_SHIFT_ = 8;
+   	
+	// end special ce values and tags ---------------------------------------
+	
+	private static final int IMPLICIT_HAN_START_ = 0x3400;
+	private static final int IMPLICIT_HAN_LIMIT_ = 0xA000;
+	private static final int IMPLICIT_SUPPLEMENTARY_COUNT_ = 0x100000;
+	private static final int IMPLICIT_BYTES_TO_AVOID_ = 3;
+	private static final int IMPLICIT_OTHER_COUNT_ = 
+												256 - IMPLICIT_BYTES_TO_AVOID_;
+	private static final int IMPLICIT_LAST_COUNT_ = IMPLICIT_OTHER_COUNT_ >> 1;
+	private static final int IMPLICIT_LAST_COUNT2_ =
+                       	(IMPLICIT_SUPPLEMENTARY_COUNT_ - 1) /
+                       	(IMPLICIT_OTHER_COUNT_ * IMPLICIT_OTHER_COUNT_) + 1;
+	private static final int IMPLICIT_HAN_SHIFT_ = IMPLICIT_LAST_COUNT_ *
+                              	IMPLICIT_OTHER_COUNT_ - IMPLICIT_HAN_START_;
+	private static final int IMPLICIT_BOUNDARY_ = 2 * IMPLICIT_OTHER_COUNT_ *
+                                  IMPLICIT_LAST_COUNT_ + IMPLICIT_HAN_START_;
+	private static final int IMPLICIT_LAST2_MULTIPLIER_ = 
+								IMPLICIT_OTHER_COUNT_ / IMPLICIT_LAST_COUNT2_;
+	private static final int HANGUL_SBASE_ = 0xAC00;
+	private static final int HANGUL_LBASE_ = 0x1100; 
+	private static final int HANGUL_VBASE_ = 0x1161;
+	private static final int HANGUL_TBASE_ = 0x11A7;
+	private static final int HANGUL_VCOUNT_ = 21; 
+	private static final int HANGUL_TCOUNT_ = 28;                                                        
+	// private methods ------------------------------------------------------
+	
+	/**
+	 * Reset the iterator internally
+	 */
+	private void updateInternalState()
+	{
+		m_isCodePointHiragana_ = false;
+  		m_bufferOffset_ = -1; 
+		m_CEBufferOffset_ = 0;
+  		m_CEBufferSize_ = 0; 
+  		m_FCDLimit_ = -1;
+  		m_FCDStart_ = m_source_.getEndIndex();
+    	m_isHiragana4_ = m_collator_.m_isHiragana4_;
+    	m_isForwards_ = true;
+	}
+	
+	/**
+	 * Backup the current internal state
+	 * @param backup object to store the data
+	 */
+	private void backupInternalState(Backup backup)
+	{
+		backup.m_offset_ = m_source_.getIndex();
+		backup.m_FCDLimit_ = m_FCDLimit_;
+		backup.m_FCDStart_ = m_FCDStart_;
+		backup.m_isCodePointHiragana_ = m_isCodePointHiragana_;
+		backup.m_bufferOffset_ = m_bufferOffset_;
+		if (m_bufferOffset_ >= 0) {
+			backup.m_buffer_.append(m_buffer_);
+		}
+	}
+		
+	/**
+	 * Update the iterator internally with backed-up state
+	 * @param backup object that stored the data
+	 */
+	private void updateInternalState(Backup backup)
+	{
+		m_source_.setIndex(backup.m_offset_);
+		m_isCodePointHiragana_ = backup.m_isCodePointHiragana_;
+		m_bufferOffset_ = backup.m_bufferOffset_;
+		m_FCDLimit_ = backup.m_FCDLimit_;
+        m_FCDStart_ = backup.m_FCDStart_;
+        m_buffer_.delete(0, m_buffer_.length());
+		if (m_bufferOffset_ >= 0) {
+        	m_buffer_.append(backup.m_buffer_);
+    	}
+	}
+	
+	/**
+	 * A fast combining class retrieval system.
+	 * @param ch UTF16 character
+	 * @return combining class of ch
+	 */
+	private int getCombiningClass(char ch) 
+	{
+    	if (ch >= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ && 
+    	    m_collator_.isUnsafe(ch)) {
+        	return NormalizerImpl.getCombiningClass(ch);
+    	}
+    	return 0;
+	}
+	
+	/**
+	 * <p>Incremental normalization, this is an essential optimization.
+	 * Assuming FCD checks has been done, normalize the non-FCD characters into 
+	 * the buffer.
+	 * Source offsets points to the current processing character.
+	 * </p>
+	 */
+	private void normalize()
+	{
+		/* synwee todo normalize to 1 before fcd
+		try {
+			decompose(m_buffer_, m_source_, m_FCDStart_, m_FCDLimit_,
+    	          	  m_collator_.m_decomposition_);
+		} 
+		catch (ArrayOutOfBoundsException e) {
+			// increase the size of the buffer
+			m_buffer_ = new char[m_buffer_.length << 1];
+        	decompose(m_buffer_, m_source_, m_FCDStart_, m_FCDLimit_,
+    	          	  m_collator_.m_decomposition_);
+    	}
+		*/
+    	m_bufferOffset_ = 0;
+	}
+	
+	/** 
+	 * <p>Incremental FCD check and normalization. Gets the next base character
+	 * position and determines if the in-between characters needs normalization.
+	 * </p> 
+	 * <p>When entering, the state is known to be this:
+	 * <ul>
+	 * <li>We are working on source string, not the buffer.
+	 * <li>The leading combining class from the current character is 0 or the 
+	 *     trailing combining class of the previous char was zero.
+	 * </ul>
+	 * Incoming source offsets points to the next processing character.
+	 * Return source offsets points to the current processing character.
+	 * </p>
+	 * @return true if FCDCheck passes, false otherwise
+	 */
+	private boolean FCDCheck() 
+	{
+    	boolean result = true;
+
+    	// srcP = collationSource->pos-1;
+    	
+		// Get the trailing combining class of the current character.  
+		// If it's zero, we are OK.
+    	char ch = m_source_.previous();
+    	m_FCDStart_ = m_source_.getIndex();
+    	// trie access
+    	char fcd = 0; // synwee todo: unorm_getFCD16(ch);
+    	if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
+    		ch = m_source_.next(); // CharacterIterator.DONE has 0 fcd
+            if (UTF16.isTrailSurrogate(ch)) {
+               	fcd = 0xFFFF; // unorm_getFCD16FromSurrogatePair(fcd, ch);
+            } else {
+               	fcd = 0;
+            }
+        }
+
+        byte prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
+
+        if (prevTrailCC != 0) {
+        	// The current char has a non-zero trailing CC. Scan forward until 
+            // we find a char with a leading cc of zero.
+            while (true) {
+            	ch = m_source_.next();
+            	if (ch == CharacterIterator.DONE) {
+            		break;
+            	}
+                // trie access
+                fcd = 0; // unorm_getFCD16(ch);
+                if (fcd != 0 && UTF16.isLeadSurrogate(ch)) {
+                	ch = m_source_.next();
+                    if (UTF16.isTrailSurrogate(ch)) {
+                        fcd = 0xFFFF; // unorm_getFCD16FromSurrogatePair(fcd, ch);
+                    } else {
+                        fcd = 0;
+                    }
+                }
+                byte leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
+                if (leadCC == 0) {
+                	// this is a base character, we stop the FCD checks
+                    break;
+                }
+
+                if (leadCC < prevTrailCC) {
+                    result = false;
+                }
+
+                prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
+            }
+        }
+        m_source_.setIndex(m_FCDStart_);
+        m_FCDLimit_ = m_source_.getIndex();
+    	return result;
+	}
+	
+	/** 
+	 * <p>Method tries to fetch the next character that is in fcd form.</p>
+	 * <p>Normalization is done if required.</p>
+	 * <p>Offsets are returned at the next character.</p>
+	 * @return next fcd character
+	 */
+	private char nextChar()
+	{
+		char result;
+    	// loop handles the next character whether it is in the buffer or not.
+	    if (m_bufferOffset_ == -1) {
+	        // we're working on the source and not normalizing. fast path.
+	        // note Thai pre-vowel reordering uses buffer too
+	        result = m_source_.current();
+	    }
+		else {
+	        // we are in the buffer, buffer offset will never be 0 here
+	        result = m_buffer_.charAt(m_bufferOffset_ ++);
+	        if (result == 0) {
+	            // Null marked end of buffer, revert to the source string and
+	            // loop back to top to try again to get a character.
+	            m_source_.setIndex(m_FCDLimit_);
+	            m_bufferOffset_ = -1;
+	            m_buffer_.delete(0, m_buffer_.length());
+	            return nextChar();
+	        }
+		}
+	
+	    if (m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
+	        || m_bufferOffset_ != -1 || m_FCDLimit_ > m_source_.getIndex()
+	        // skip the fcd checks
+	  		|| result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_  
+	   		// Fast fcd safe path. trail combining class == 0.
+	   		) {
+	   		m_source_.next();
+	   		return result;
+	    }
+		
+	    if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
+	        // We need to peek at the next character in order to tell if we are 
+	        // FCD
+	        char next = m_source_.next(); 
+	        if (next == CharacterIterator.DONE 
+	            || next == LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
+	            return result; // end of source string and if next character 
+	            				// starts with a base character is always fcd.
+	        }
+	    }
+	
+	    // Need a more complete FCD check and possible normalization.
+	    if (!FCDCheck()) {
+	        normalize();
+	        result = m_buffer_.charAt(0);
+	        m_bufferOffset_ = 1;	  
+	    }	
+	    m_source_.next();
+	    return result;
+	}
+	
+	/**
+	* <p>Incremental normalization, this is an essential optimization.
+	*7 Assuming FCD checks has been done, normalize the non-FCD characters into 
+	* the buffer.
+	* Source offsets points to the current processing character.</p>
+	*/
+	public void normalizeBackwards()
+	{
+	    int start = m_FCDStart_;
+		int size = 0;
+	    /* synwee todo normalize including fcd
+	    try {
+	    	size = decompose(m_buffer_, m_source_, start, m_FCDLimit_);
+		}
+		catch (ArrayOutOfBoundsException .) {
+	    	m_buffer_ = new char[m_buffer_.length << 1];
+	    	size = decompose(m_buffer_, m_source_, start, m_FCDLimit);
+	    }
+	    */
+	    m_bufferOffset_ = size - 1;
+	}
+
+	/**
+	 * <p>Incremental backwards FCD check and normalization. Gets the previous 
+	 * base character position and determines if the in-between characters 
+	 * needs normalization.
+	 * </p> 
+	 * <p>When entering, the state is known to be this:
+	 * <ul>
+	 * <li>We are working on source string, not the buffer.
+	 * <li>The trailing combining class from the current character is 0 or the 
+	 *     leading combining class of the next char was zero.
+	 * </ul>
+	 * Input source offsets points to the previous character.
+	 * Return source offsets points to the current processing character.
+	 * </p>
+	 * @return true if FCDCheck passes, false otherwise
+	*/
+	private boolean FCDCheckBackwards()
+	{
+	    boolean result = true;    
+	    char ch = m_source_.next();
+	    char fcd = 0; 
+	    m_FCDLimit_ = m_source_.getIndex();
+	    if (!UTF16.isSurrogate(ch)) {
+	        fcd = 0; // synwee todo unorm_getFCD16(fcdTrieIndex, c);
+	    } 
+	    else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) { 
+	    	// note trail surrogate characters gets 0 fcd
+	    	ch = m_source_.previous();  
+	       	if (UTF16.isLeadSurrogate(ch)) {
+	        	fcd = 0; // unorm_getFCD16(fcdTrieIndex, c2);
+	        	if (fcd != 0) {
+	            	fcd = 0; // unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
+	        	}
+	    	} 
+	    	else {
+	        	fcd = 0; // unpaired surrogate 
+	    	}
+	    }
+	
+	    byte leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
+	    if (leadCC != 0) {
+	        // The current char has a non-zero leading combining class.
+	        // Scan backward until we find a char with a trailing cc of zero.
+	        while (true) {
+	            if (m_source_.getIndex() == 0) {
+	                break;
+	            }
+	            ch = m_source_.previous();
+	            if (!UTF16.isSurrogate(ch)) {
+	                fcd = 0; //unorm_getFCD16(fcdTrieIndex, c);
+	            } 
+	            else {
+	            	if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) 
+	            	{
+	            		ch = m_source_.previous();
+	            	    if (UTF16.isLeadSurrogate(ch)) {
+	                		fcd = 0; // unorm_getFCD16(fcdTrieIndex, c2);
+	            	    }
+	            		if (fcd != 0) {
+	                   		fcd = 0; // unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
+	                	}
+	            	} else {
+	                	fcd = 0; // unpaired surrogate
+	            	}
+	            	byte prevTrailCC = (byte)(fcd & LAST_BYTE_MASK_);
+	            	if (prevTrailCC == 0) {
+	                	break;
+	            	}
+	
+	            	if (leadCC < prevTrailCC) {
+	                	result = false;
+	            	}
+	            	leadCC = (byte)(fcd >> SECOND_LAST_BYTE_SHIFT_);
+	        	}
+	    	}
+	    }
+	    m_FCDStart_ = m_source_.getIndex(); // character with 0 lead/trail fcd
+	    m_source_.setIndex(m_FCDLimit_);
+	    return result;
+	}
+	
+	/** 
+	 * <p>Method tries to fetch the previous character that is in fcd form.</p>
+	 * <p>Normalization is done if required.</p>
+	 * <p>Offsets are returned at the current character.</p>
+	 * @return previous fcd character
+	 */
+	private char previousChar()
+	{
+		if (m_bufferOffset_ >= 0) {
+			m_bufferOffset_ --;
+			if (m_bufferOffset_ >= 0) {
+	        	return m_buffer_.charAt(m_bufferOffset_);
+			}
+	        else {
+	            // At the start of buffer, route back to string.
+	            m_buffer_.delete(0, m_buffer_.length());
+                if (m_FCDStart_ == 0) {
+                	m_FCDStart_ = -1;
+	                return CharacterIterator.DONE;
+                }
+	            else {
+	                m_FCDLimit_ = m_FCDStart_;
+	                return previousChar();
+	            }
+	        }
+		}    
+		char result = m_source_.previous();
+	    if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ 
+	        || m_collator_.m_decomposition_ == Collator.NO_DECOMPOSITION 
+	        || m_FCDStart_ <= m_source_.getIndex()
+	        || m_source_.getIndex() == 0) {
+	        return result;
+	    }
+	    char ch = m_source_.previous();
+	    if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) {
+	        // if previous character is FCD 
+	        m_source_.next();
+	        return result;
+	    }
+	    // Need a more complete FCD check and possible normalization.
+	    if (!FCDCheckBackwards()) {
+	        normalizeBackwards();
+	        m_bufferOffset_ --;
+	        result = m_buffer_.charAt(m_bufferOffset_);
+	    }
+	    return result;
+	}
+	
+	/**
+	 * Determines if it is at the start of source iteration
+	 * @return true if iterator at the start, false otherwise
+	 */
+	private boolean isBackwardsStart() 
+	{
+    	return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0)
+    	        || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0);
+	}
+	
+	/**
+ 	 * Determine if a character is a Thai vowel, which sorts after its base 
+ 	 * consonant.
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Thai prevowel, false otherwise
+ 	 */
+	private boolean isThaiPreVowel(char ch)
+	{ 
+		return (ch >= 0xe40 && ch <= 0xe44) || (ch >= 0xec0 && ch <= 0xec4);
+	}
+
+	/**
+ 	 * Determine if a character is a Thai base consonant, which sorts before 
+ 	 * its prevowel
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Thai base consonant, false otherwise
+ 	 */
+	private boolean isThaiBaseConsonant(char ch)
+	{
+		return ch >= 0xe01 && ch <= 0xe2e;
+	}
+	
+	
+	/**
+ 	 * Determine if a character is a Jamo
+ 	 * @param ch character to test
+ 	 * @return true if ch is a Jamo, false otherwise
+ 	 */
+	private boolean isJamo(char ch)
+	{ 
+		return (ch - 0x1100 <= 0x1112 - 0x1100) 
+		       || (ch - 0x1161 <= 0x1175 - 0x1161) 
+		       || (ch - 0x11A8 <= 0x11C2 - 0x11A8);
+	}
+	
+	/**
+	 * Checks if iterator is at the end of its source string.
+	 * @return true if it is at the end, false otherwise
+	 */
+	private boolean isEnd() 
+	{
+    	if (m_bufferOffset_ >= 0) {
+    		if (m_bufferOffset_ != m_buffer_.length()) {
+	    		return false;
+    		}
+    		else {
+    			// at end of buffer. check if fcd is at the end
+    			return m_FCDLimit_ == m_source_.getEndIndex();
+    		}
+	    }
+		return m_source_.getEndIndex() == m_source_.getIndex();
+	}
+	
+	/**
+	 * <p>Special CE management for surrogates</p>
+	 * <p>Lead surrogate is encountered. CE to be retrieved by using the 
+	 * following code unit. If next character is a trail surrogate, both 
+	 * characters will be combined to retrieve the CE, otherwise completely 
+	 * ignorable (UCA specification) is returned.</p>
+	 * @param collator collator to use
+	 * @param ce current CE
+	 * @param trail character
+	 * @return next CE for the surrogate characters
+	 */
+	private int nextSurrogate(RuleBasedCollator collator, int ce, char trail)
+	{
+		if (!UTF16.isTrailSurrogate(trail)) {
+	        updateInternalState(m_backup_);
+	        return IGNORABLE;
+	    } 
+	    // TODO: CE contain the data from the previous CE + the mask. 
+	    // It should at least be unmasked
+	    int result = collator.m_trie_.getTrailValue(ce, trail);
+	    if (result == CE_NOT_FOUND_) { 
+	      	updateInternalState(m_backup_);
+	    }
+	    return result;
+	}
+	
+	/**
+	 * Gets the CE expansion offset
+	 * @param collator current collator
+	 * @param ce ce to test
+	 * @return expansion offset
+	 */
+	private int getExpansionOffset(RuleBasedCollator collator, int ce)
+	{
+		return ((ce & 0xFFFFF0) >> 4) - collator.m_expansionOffset_;
+	}
+	
+	/**
+	 * Swaps the Thai and Laos characters and returns the CEs.
+	 * @param collator collator to use
+	 * @param ce current ce
+	 * @param ch current character
+	 * @return next CE for Thai characters
+	 */
+	private int nextThai(RuleBasedCollator collator, int ce, char ch) 
+	{
+		if (m_bufferOffset_ != -1 // already swapped
+		    || isEnd() || !isThaiBaseConsonant(m_source_.current())) {
+		    // next character is also not a thai base consonant
+	        // Treat Thai as a length one expansion
+	        // find the offset to expansion table
+	        return collator.m_expansion_[getExpansionOffset(collator, ce)]; 
+	    }
+	    else {
+	        // swap the prevowel and the following base consonant into the 
+	        // buffer with their order swapped
+	        // buffer is always clean when we are in the source string
+	        m_buffer_.append(nextChar());
+	        m_buffer_.append(ch);
+	        m_FCDLimit_ = m_source_.getIndex();
+	        m_FCDStart_ = m_FCDLimit_ - 2; 
+			m_bufferOffset_ = 0;
+	        return IGNORABLE;
+	   }
+	}
+	
+	/**
+	 * Gets the contraction ce offset
+	 * @param collator current collator
+	 * @param ce current ce
+	 * @return contraction offset
+	 */
+	private int getContractionOffset(RuleBasedCollator collator, int ce)
+	{
+		return (ce & 0xFFFFFF) - collator.m_contractionOffset_;
+	}
+	
+	/**
+	 * Checks if CE is a special tag CE
+	 * @param ce to check
+	 * @return true if CE is a special tag CE, false otherwise
+	 */
+	private boolean isSpecialPrefixTag(int ce)
+	{
+		return RuleBasedCollator.isSpecial(ce) && 
+						RuleBasedCollator.getTag(ce) == CE_SPEC_PROC_TAG_;
+	}
+	
+	/**
+	 * <p>Special processing getting a CE that is preceded by a certain 
+	 * prefix.</p>
+	 * <p>Used for optimizing Japanese length and iteration marks. When a 
+	 * special processing tag is encountered, iterate backwards to see if 
+	 * there's a match.</p> 
+	 * <p>Contraction tables are used, prefix data is stored backwards in the 
+	 * table.</p>
+	 * @param collator collator to use
+	 * @param ce current ce
+	 * @param entrybackup entry backup iterator status
+	 * @return next collation element
+	 */
+	private int nextSpecialPrefix(RuleBasedCollator collator, int ce,
+	                              Backup entrybackup)
+	{
+		backupInternalState(m_backup_);
+	    updateInternalState(entrybackup);
+	    previousChar();
+	    // We want to look at the character where we entered
+	
+	 	while (true) {
+	        // This loop will run once per source string character, for as 
+	        // long as we are matching a potential contraction sequence                  
+            // First we position ourselves at the begining of contraction 
+            // sequence 
+	        int entryoffset = getContractionOffset(collator, ce);
+			int offset = entryoffset;						
+	        if (isBackwardsStart()) {
+	          	ce = collator.m_contractionCE_[offset];
+	            break;
+	        }
+	        int previous = previousChar();
+	        while (previous > collator.m_contractionIndex_[offset]) { 
+	        	// contraction characters are ordered, skip smaller characters
+	            offset ++;
+	        }
+	
+	        if (previous == collator.m_contractionIndex_[offset]) {
+	            // Found the source string char in the table.
+	            // Pick up the corresponding CE from the table.
+	            ce = collator.m_contractionCE_[offset];
+	       	}
+	        else {
+	            // Source string char was not in the table, prefix not found
+	            ce = collator.m_contractionCE_[entryoffset];
+	        }
+	
+	        if (!isSpecialPrefixTag(ce)) {
+	            // The source string char was in the contraction table, and 
+	            // the corresponding CE is not a prefix CE. We found the 
+	            // prefix, break out of loop, this CE will end up being 
+	            // returned. This is the normal way out of prefix handling 
+	            // when the source actually contained the prefix.
+	            break;
+	        }
+	    }
+	    if (ce != CE_NOT_FOUND_) { 
+	    	// we found something and we can merilly continue
+	        updateInternalState(m_backup_);
+	    } 
+	    else { // prefix search was a failure, we have to backup all the way to 
+	    		// the start
+	        updateInternalState(entrybackup);
+	    }
+	    return ce;
+	}
+	
+	/**
+	 * Checks if the ce is a contraction tag
+	 * @param ce ce to check
+	 * @return true if ce is a contraction tag, false otherwise
+	 */
+	private boolean isContractionTag(int ce)
+	{
+		return RuleBasedCollator.isSpecial(ce) && 
+							RuleBasedCollator.getTag(ce) == CE_CONTRACTION_TAG_;
+	}
+	
+	/**
+	 * Method to copy skipped characters into the buffer and sets the fcd 
+	 * position. To ensure that the skipped characters are considered later, 
+	 * we need to place it in the appropriate position in the buffer and 
+	 * reassign the source index. simple case if index reside in string, 
+	 * simply copy to buffer and fcdposition = pos, pos = start of buffer. 
+	 * if pos in normalization buffer, we'll insert the copy infront of pos 
+	 * and point pos to the start of the buffer. why am i doing these copies?
+	 * well, so that the whole chunk of codes in the getNextCE, 
+	 * ucol_prv_getSpecialCE does not require any changes, which will be 
+	 * really painful.
+	 * @param skipped character buffer
+	 */
+	private void setDiscontiguous(StringBuffer skipped)
+	{
+	    if (m_bufferOffset_ >= 0) {
+	        skipped.append(m_buffer_.substring(m_bufferOffset_));
+	    }
+	    else {
+	        m_FCDLimit_ = m_source_.getIndex();
+	    }
+	
+		m_bufferOffset_ = 0;
+		m_buffer_ = skipped;
+	}
+	
+	/**
+	 * Returns the current character for forward iteration
+	 * @return current character
+	 */
+	private char currentChar()
+	{
+		if (m_bufferOffset_ < 0) {
+			char result = m_source_.previous();
+			m_source_.next();
+			return result;
+		}
+		
+		// m_bufferOffset_ is never 0 in normal circumstances except after a
+		// discontiguous contraction since it is always returned and moved
+		// by 1 when we do nextChar()
+		return m_buffer_.charAt(m_bufferOffset_ - 1);
+	}
+
+	/**
+	 * Method to get the discontiguous collation element within the source.
+	 * Note this function will set the position to the appropriate places.
+	 * Passed in character offset points to the second combining character 
+	 * after the start character.
+	 * @param collator current collator used
+	 * @param entryoffset index to the start character in the contraction table
+	 * @return discontiguous collation element offset
+	 */
+	private int nextDiscontiguous(RuleBasedCollator collator, int entryoffset)
+	{
+		int offset = entryoffset;
+    	boolean multicontraction = false;
+    	StringBuffer skipped = new StringBuffer();
+    	char ch = currentChar();
+    	skipped.append(currentChar()); // accent after the first character
+    	Backup backup = new Backup();
+    	backupInternalState(backup);
+		char nextch = ch;
+        while (true) {
+        	ch = nextch;
+        	nextch = nextChar(); 
+        	if (nextch == CharacterIterator.DONE 
+        		|| getCombiningClass(nextch) == 0) {
+	            // if there are no more accents to move around
+	            // we don't have to shift previousChar, since we are resetting
+	            // the offset later
+	            if (multicontraction) {
+	                setDiscontiguous(skipped);
+	                return collator.m_contractionCE_[offset];
+	            }
+	            break;
+	        }
+
+			offset ++; // skip the combining class offset
+        	while (nextch > collator.m_contractionIndex_[offset]) {
+            	offset ++;
+        	}
+
+			int ce = CE_NOT_FOUND_;
+        	if (nextch != collator.m_contractionIndex_[offset]
+        		|| getCombiningClass(nextch) == getCombiningClass(ch)) {
+            	// unmatched or blocked character
+            	skipped.append(nextch);
+            	continue;
+        	}
+	        else {
+	            ce = collator.m_contractionCE_[offset];
+	        }
+	        
+	        if (ce == CE_NOT_FOUND_) {
+	          	break;
+	        } 
+	        else if (isContractionTag(ce)) {
+	            // this is a multi-contraction
+	            offset = getContractionOffset(collator, ce);
+	            if (collator.m_contractionCE_[offset] != CE_NOT_FOUND_) {
+	            	multicontraction = true;
+	            	backupInternalState(backup);
+	            }
+	        } 
+	        else {
+	           	setDiscontiguous(skipped);
+	            return ce;
+	        }
+    	}
+	    updateInternalState(backup);
+	    return collator.m_contractionCE_[entryoffset];
+	}
+	
+	/**
+	 * Gets the next contraction ce
+	 * @param collator collator to use
+	 * @param ce current ce
+	 * @param entrybackup entry backup iterator status
+	 */
+	private int nextContraction(RuleBasedCollator collator, int ce)
+	{
+		Backup backup = new Backup();
+	    backupInternalState(backup);
+	    int entryce = CE_NOT_FOUND_;
+	    while (true) {
+	        int entryoffset = getContractionOffset(collator, ce);
+			int offset = entryoffset;	
+	
+	        if (isEnd()) {
+	        	ce = collator.m_contractionCE_[offset];
+	            if (ce == CE_NOT_FOUND_) {
+	                // back up the source over all the chars we scanned going 
+	                // into this contraction.
+	                ce = entryce;  
+	                updateInternalState(backup);
+	            }
+	            break;
+	        }
+	
+			// get the discontiguos maximum combining class
+	        byte maxCC = (byte)(collator.m_contractionIndex_[offset] & 0xFF); 
+	        // checks if all characters have the same combining class
+	        byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8);
+	        char ch = nextChar();
+	        offset ++;
+	        while(ch > collator.m_contractionIndex_[offset]) { 
+	        	// contraction characters are ordered, skip all smaller
+	          	offset ++;
+	        }
+	
+	        if (ch == collator.m_contractionIndex_[offset]) {
+	            // Found the source string char in the contraction table.
+	            //  Pick up the corresponding CE from the table.
+	            ce = collator.m_contractionCE_[offset];
+	        }
+	        else
+	        {
+	            // Source string char was not in contraction table.
+	            // Unless it is a discontiguous contraction, we are done
+	            byte sCC;
+	            if (maxCC == 0 || (sCC = (byte)getCombiningClass(ch)) == 0 
+	            	|| sCC > maxCC || (allSame != 0 && sCC == maxCC) || 
+	            	isEnd()) {
+	                // Contraction can not be discontiguous, back up by one
+	                previousChar(); 
+	                ce = collator.m_contractionCE_[entryoffset];
+	            } 
+	            else {
+	                // Contraction is possibly discontiguous.
+	                // find the next character if ch is not a base character
+	                char nextch = nextChar();
+	                if (nextch != CharacterIterator.DONE) {
+	                	previousChar();
+	                }
+	                if (getCombiningClass(nextch) == 0) {
+	                    previousChar();
+	                    // base character not part of discontiguous contraction
+	                    ce = collator.m_contractionCE_[entryoffset];
+	                } 
+	                else {
+	                    ce = nextDiscontiguous(collator, entryoffset);
+	                }
+	            }
+	        }
+	
+	        if (ce == CE_NOT_FOUND_) {
+	            // source did not match the contraction, revert back original
+	          	updateInternalState(backup);
+	          	ce = entryce;
+	          	break;
+	        }
+	        
+	        // source was a contraction
+	        if (!isContractionTag(ce)) {
+	            break;
+	        }
+	
+	        // ccontinue looping to check for the remaining contraction.
+	        if (collator.m_contractionCE_[entryoffset] != CE_NOT_FOUND_) {
+	            // there are further contractions to be performed, so we store
+	            // the so-far completed ce, so that if we fail in the next
+	            // round we just return this one.
+	            entryce = collator.m_contractionCE_[entryoffset];
+	            backupInternalState(backup);
+	            if (backup.m_bufferOffset_ >= 0) {
+	            	backup.m_bufferOffset_ --;
+	            }
+	            else {
+	            	backup.m_offset_ --;
+	            }
+	        }
+	    }								
+	    return ce;
+	}
+	
+	/**
+	 * Gets the next ce for long primaries, stuffs the rest of the collation 
+	 * elements into the ce buffer
+	 * @param ce current ce
+	 * @return next ce
+	 */
+	private int nextLongPrimary(int ce)
+	{
+		m_CEBuffer_[1] = ((ce & 0xFF) << 24) 
+							| RuleBasedCollator.CE_CONTINUATION_MARKER_;
+		m_CEBufferOffset_ = 1;
+		m_CEBufferSize_ = 2;
+		m_CEBuffer_[0] = ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | 
+	   													CE_BYTE_COMMON_;
+	   	return m_CEBuffer_[0];
+	}
+	
+	/**
+	 * Gets the number of expansion
+	 * @param ce current ce
+	 * @return number of expansion
+	 */
+	private int getExpansionCount(int ce)
+	{	
+		return ce & 0xF;
+	}
+	
+	/**
+	 * Gets the next expansion ce and stuffs the rest of the collation elements
+	 * into the ce buffer
+	 * @param collator current collator
+	 * @param ce current ce
+	 * @return next expansion ce
+	 */
+	private int nextExpansion(RuleBasedCollator collator, int ce)
+	{
+		// NOTE: we can encounter both continuations and expansions in an 
+		// expansion!
+	    // I have to decide where continuations are going to be dealt with 
+	    int offset = getExpansionOffset(collator, ce);
+	    m_CEBufferSize_ = getExpansionCount(ce);
+	    m_CEBufferOffset_ = 1;
+	    m_CEBuffer_[0] = collator.m_expansion_[offset];
+	    if (m_CEBufferSize_ != 0) { 
+	    	// if there are less than 16 elements in expansion
+	    	for (int i = 1; i < m_CEBufferSize_; i ++) {
+	          	m_CEBuffer_[i] = collator.m_expansion_[offset + i];
+	        }
+	    } 
+	    else { 
+	    	// ce are terminated
+	    	m_CEBufferSize_ = 1;
+	        while (collator.m_expansion_[offset] != 0) {
+	          	m_CEBuffer_[m_CEBufferSize_ ++] = 
+	          								collator.m_expansion_[++ offset];
+	        }
+	    }
+	    return m_CEBuffer_[0];
+	}
+	
+	/**
+	 * Gets the next implicit ce for codepoints
+	 * @param codepoint current codepoint
+	 * @param fixupoffset an offset to calculate the implicit ce
+	 * @return implicit ce
+	 */
+	private int nextImplicit(int codepoint, int fixupoffset) 
+	{
+	  	if ((codepoint & 0xFFFE) == 0xFFFE 
+	  		|| (0xD800 <= codepoint && codepoint <= 0xDC00)) {
+	  		// illegal code value, use completely ignoreable!
+	      	return IGNORABLE;
+	  	}
+	  	// we must skip all 00, 01, 02 bytes, so most bytes have 253 values
+	  	// we must leave a gap of 01 between all values of the last byte, so 
+	  	// the last byte has 126 values (3 byte case)
+	  	// shift so that HAN all has the same first primary, for compression.
+	  	// for the 4 byte case, we make the gap as large as we can fit.
+	  	// Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
+	  	// Four byte forms (most supplementaries) are EF xx xx xx 
+	  	// (with a gap of LAST2_MULTIPLIER == 14)
+	  	int last0 = codepoint - IMPLICIT_BOUNDARY_;
+	  	int result = 0;
+	  	if (last0 < 0) {
+	  		// shift so HAN shares single block
+	      	codepoint += IMPLICIT_HAN_SHIFT_; 
+	      	int last1 = codepoint / IMPLICIT_LAST_COUNT_;
+	      	last0 = codepoint % IMPLICIT_LAST_COUNT_;
+	      	int last2 = last1 / IMPLICIT_OTHER_COUNT_;
+	      	last1 %= IMPLICIT_OTHER_COUNT_;
+	      	result = 0xEC030300 - fixupoffset + (last2 << 24) + (last1 << 16) 
+	      														+ (last0 << 9);
+	  	} 
+	  	else {
+	      	int last1 = last0 / IMPLICIT_LAST_COUNT2_;
+	      	last0 %= IMPLICIT_LAST_COUNT2_;
+	      	int last2 = last1 / IMPLICIT_OTHER_COUNT_;
+	      	last1 %= IMPLICIT_OTHER_COUNT_;
+	      	result = 0xEF030303 - fixupoffset + (last2 << 16) + (last1 << 8) 
+	      							+ (last0 * IMPLICIT_LAST2_MULTIPLIER_);
+	  	}
+	  	m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_) 
+	  						| 0x00000505;
+	  	m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
+	  	m_CEBufferOffset_ = 1;
+	  	m_CEBufferSize_ = 2;
+	  	return m_CEBuffer_[0];
+	}
+	
+	/**
+	 * Returns the next ce associated with the following surrogate characters
+	 * @param ch current character
+	 * @return ce
+	 */
+	private int nextSurrogate(char ch)
+	{
+		char nextch = nextChar();
+		if (nextch != CharacterIterator.DONE && 
+			UTF16.isTrailSurrogate(nextch)) {
+			int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch);
+			if ((codepoint >= 0x20000 && codepoint <= 0x2a6d6) 
+				|| (codepoint >= 0x2F800 && codepoint <= 0x2FA1D)) { 
+				// this might be a CJK supplementary cp
+			    return nextImplicit(codepoint, 0x04000000);
+			} 
+			// or a regular one
+			return nextImplicit(codepoint, 0);
+		} 
+		if (nextch != CharacterIterator.DONE) {
+			previousChar(); // reverts back to the original position
+		}
+		return IGNORABLE; // completely ignorable
+	}
+	
+	/**
+	 * Returns the next ce for a hangul character, this is an implicit 
+	 * calculation
+	 * @param collator current collator
+	 * @param ch current character
+	 * @return hangul ce
+	 */
+	private int nextHangul(RuleBasedCollator collator, char ch)
+	{
+		char L = (char)(ch - HANGUL_SBASE_);
+		
+		// divide into pieces
+		// do it in this order since some compilers can do % and / in one 
+		// operation
+		char T = (char)(L % HANGUL_TCOUNT_); 
+		L /= HANGUL_TCOUNT_;
+		char V = (char)(L % HANGUL_VCOUNT_);
+		L /= HANGUL_VCOUNT_;
+		
+		// offset them
+		L += HANGUL_LBASE_;
+		V += HANGUL_VBASE_;
+		T += HANGUL_TBASE_;
+			
+		// return the first CE, but first put the rest into the expansion 
+		// buffer
+		m_CEBufferSize_ = 0;
+		if (!collator.m_isJamoSpecial_) { // FAST PATH
+			m_CEBuffer_[m_CEBufferSize_ ++] = 
+										collator.UCA_.m_trie_.getLeadValue(L);
+			m_CEBuffer_[m_CEBufferSize_ ++] = 
+										collator.UCA_.m_trie_.getLeadValue(V);
+			
+			if (T != HANGUL_TBASE_) {
+			   	m_CEBuffer_[m_CEBufferSize_ ++] = 
+										collator.UCA_.m_trie_.getLeadValue(T);
+			}
+			m_CEBufferOffset_ = 1;
+			return m_CEBuffer_[0];
+		} 
+		else { 
+			// Jamo is Special
+			// Since Hanguls pass the FCD check, it is guaranteed that we 
+			// won't be in the normalization buffer if something like this 
+			// happens
+			// Move Jamos into normalization buffer
+			m_buffer_.append((char)L);
+			m_buffer_.append((char)V);
+			if (T != HANGUL_TBASE_) {
+			    m_buffer_.append((char)T);
+			}
+			m_FCDLimit_ = m_source_.getIndex();  
+			m_FCDStart_ = m_FCDLimit_ - 1;
+			// Indicate where to continue in main input string after 
+			// exhausting the buffer
+			return IGNORABLE;
+		}
+	}
+	
+	/**
+	 * <p>Special CE management. Expansions, contractions etc...</p>
+	 * @param collator can be plain UCA 
+	 * @param ce current ce
+	 * @param ch current character
+	 * @return next special ce
+	 */
+	private int nextSpecial(RuleBasedCollator collator, int ce, char ch) 
+	{
+		int codepoint = ch;
+		Backup entrybackup = new Backup();
+		backupInternalState(entrybackup);
+	  	while (true) {
+	    	// This loop will repeat only in the case of contractions, 
+	    	// surrogate
+		    switch(RuleBasedCollator.getTag(ce)) {
+	    		case CE_NOT_FOUND_TAG_:
+	      			// impossible case for icu4j
+	      			return ce;
+	    		case RuleBasedCollator.CE_SURROGATE_TAG_:
+	    			if (isEnd()) {
+	    				return IGNORABLE;
+	    			}
+	    			backupInternalState(m_backup_);
+	    			char trail = nextChar();
+	      			ce = nextSurrogate(collator, ce, trail);
+	      			// calculate the supplementary code point value, 
+	        		// if surrogate was not tailored we go one more round
+	        		codepoint = 
+	        		        UCharacterProperty.getRawSupplementary(ch, trail);
+	      			break;
+	    		case CE_THAI_TAG_:
+	    			ce = nextThai(collator, ce, ch);
+	      			break;
+	    		case CE_SPEC_PROC_TAG_:
+	    			ce = nextSpecialPrefix(collator, ce, entrybackup);
+	      			break;
+	    		case CE_CONTRACTION_TAG_:
+	      			ce = nextContraction(collator, ce);
+	      			break;
+	    		case CE_LONG_PRIMARY_TAG_:
+	      			return nextLongPrimary(ce);
+	    		case CE_EXPANSION_TAG_:
+	      			return nextExpansion(collator, ce);
+	    		// various implicits optimization
+	    		case CE_CJK_IMPLICIT_TAG_:    
+	    			// 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
+	      			return nextImplicit(codepoint, 0x04000000);
+			    case CE_IMPLICIT_TAG_: // everything that is not defined 
+			      	return nextImplicit(codepoint, 0);
+			    case CE_TRAIL_SURROGATE_TAG_: 
+			      	return IGNORABLE; // DC00-DFFF broken surrogate
+			    case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
+			      	return nextSurrogate(ch);
+			    case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
+			      	return nextHangul(collator, ch);
+			    case CE_CHARSET_TAG_:
+			    	// not yet implemented probably after 1.8
+			      	return CE_NOT_FOUND_;
+			    default:
+			      	ce = IGNORABLE;
+			      	// synwee todo, throw exception or something here.
+			}
+			if (!RuleBasedCollator.isSpecial(ce)) {
+				break;
+			}
+		}
+		return ce;
+	}
+	
+	/**
+	 * Getting the previous Thai ce
+	 * @param collator current collator
+	 * @param ch current character
+	 * @return previous Thai ce
+	 */
+	private int previousThai(RuleBasedCollator collator, int ce, char ch)
+	{
+		char prevch = previousChar();
+		if (isBackwardsStart() || !isThaiBaseConsonant(ch)
+			|| !isThaiPreVowel(prevch)) {
+			if (prevch != CharacterIterator.DONE) {
+				nextChar();
+			}
+	        // Treat Thai as a length one expansion
+	        return collator.m_expansion_[getExpansionOffset(collator, ce)];
+	    }
+	    else
+	    {
+	        // Move the prevowel and the following base Consonant into the
+	        // normalization buffer with their order swapped
+	        // buffer is always clean when we are in the source string
+	        m_buffer_.append(ch);
+	        m_buffer_.append(prevch);
+			m_bufferOffset_ = 2;
+			
+			if (m_source_.getIndex() == 0) {
+	            m_FCDStart_ = 0;
+	            m_FCDLimit_ = 2;
+	        } 
+	        else {
+	            m_FCDStart_ = m_source_.getIndex();
+	            m_FCDLimit_ = m_FCDStart_ + 2;
+	        }
+	
+	 		return IGNORABLE;
+	    }
+	}
+	
+	/**
+	 * Special processing is getting a CE that is preceded by a certain prefix.
+	 * Currently this is only needed for optimizing Japanese length and 
+	 * iteration marks. When we encouter a special processing tag, we go 
+	 * backwards and try to see if we have a match. Contraction tables are used 
+	 * - so the whole process is not unlike contraction. prefix data is stored 
+	 * backwards in the table.
+	 * @param collator current collator
+	 * @param ce current ce
+	 * @return previous ce
+	 */
+	private int previousSpecialPrefix(RuleBasedCollator collator, int ce)
+	{
+	    Backup backup = new Backup();
+	    backupInternalState(backup);
+	    while (true) {
+	        // position ourselves at the begining of contraction sequence 
+	        int offset = getContractionOffset(collator, ce);
+	        int entryoffset = offset;
+			if (isBackwardsStart()) {
+	          	ce = collator.m_contractionCE_[offset];
+	            break;
+	        }
+	        char prevch = previousChar();
+	        while (prevch > collator.m_contractionIndex_[offset]) { 
+	        	// since contraction codepoints are ordered, we skip all that 
+	        	// are smaller
+	            offset ++;
+	        }
+	        if (prevch == collator.m_contractionIndex_[offset]) {
+	            ce = collator.m_contractionCE_[offset];
+	        }
+	        else {
+	            // char was not in the table. prefix not found
+	            ce = collator.m_contractionCE_[entryoffset];
+	        }
+	
+	        if (!isSpecialPrefixTag(ce)) {
+	            // char was in the contraction table, and the corresponding ce 
+	            // is not a prefix ce.  We found the prefix, break out of loop, 
+	            // this ce will end up being returned. 
+	            break;
+	        }
+	    }
+	    updateInternalState(backup);
+	    return ce;
+	}
+	
+	/**
+	 * Retrieves the previous contraction ce. To ensure that the backwards and 
+	 * forwards iteration matches, we take the current region of most possible 
+	 * match and pass it through the forward iteration. This will ensure that 
+	 * the obstinate problem of overlapping contractions will not occur.
+	 * @param collator current collator
+	 * @param ce current ce
+	 * @param ch current character
+	 * @return previous contraction ce
+	 */
+	private int previousContraction(RuleBasedCollator collator, int ce, char ch)
+	{
+		int entryoffset = getContractionOffset(collator, ce);
+	    if (isBackwardsStart()) {
+	        // start of string or this is not the end of any contraction
+	        return collator.m_contractionCE_[entryoffset];
+	    }
+	    StringBuffer buffer = new StringBuffer();
+	    while (collator.isUnsafe(ch)) {
+	        buffer.insert(0, ch);
+	        ch = previousChar();
+	        if (isBackwardsStart()) {
+	            break;
+	        }
+	    }
+	    // adds the initial base character to the string
+	    buffer.insert(0, ch);	
+	    // a new collation element iterator is used to simply things, since 
+	    // using the current collation element iterator will mean that the 
+	    // forward and backwards iteration will share and change the same 
+	    // buffers. it is going to be painful. 
+	    CollationElementIterator temp = 
+	    			new CollationElementIterator(buffer.toString(), collator);
+		ce = temp.next();
+		m_CEBufferSize_ = 0;
+	    while (ce != NULLORDER) {
+	    	if (m_CEBufferSize_ == m_CEBuffer_.length) {
+	            try {
+	            	int tempbuffer[] = new int[m_CEBuffer_.length + 50];
+	            	System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0, 
+	            										m_CEBuffer_.length);
+	            	m_CEBuffer_ = tempbuffer;
+	            }
+	            catch (Exception e)
+	            {
+	            	e.printStackTrace();
+	            	return NULLORDER;
+	            }
+	        }
+	        m_CEBuffer_[m_CEBufferSize_ ++] = ce;
+	        ce = temp.next();
+	    }
+	    
+	    m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+	    return m_CEBuffer_[m_CEBufferOffset_];
+	}
+	
+	/**
+	 * Returns the previous long primary ces
+	 * @param ce long primary ce
+	 * @return previous long primary ces
+	 */
+	private int previousLongPrimary(int ce)
+	{
+		m_CEBufferSize_ = 0;
+		m_CEBuffer_[m_CEBufferSize_ ++] = 
+			((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | CE_BYTE_COMMON_;
+	    m_CEBuffer_[m_CEBufferSize_ ++] = ((ce & 0xFF) << 24) 
+	    							| RuleBasedCollator.CE_CONTINUATION_MARKER_;
+	    m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+	    return m_CEBuffer_[m_CEBufferOffset_];
+	}
+	
+	/**
+	 * Returns the previous expansion ces
+	 * @param collator current collator
+	 * @param ce current ce
+	 * @return previous expansion ce
+	 */
+	private int previousExpansion(RuleBasedCollator collator, int ce)
+	{
+		// find the offset to expansion table
+	    int offset = getExpansionOffset(collator, ce);
+	    m_CEBufferSize_ = getExpansionCount(ce);
+	    if (m_CEBufferSize_ != 0) {
+	        // less than 16 elements in expansion
+	        for (int i = 0; i < m_CEBufferSize_; i ++) {
+	          	m_CEBuffer_[i] = collator.m_expansion_[offset + i];
+	        }
+	        
+	    }
+	    else {
+	        // null terminated ces
+	        while (collator.m_expansion_[offset + m_CEBufferSize_] != 0) {
+	          	m_CEBuffer_[m_CEBufferSize_] = 
+	          				collator.m_expansion_[offset + m_CEBufferSize_];
+	          	m_CEBufferSize_ ++;
+	        }
+	    }
+	    m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+	    return m_CEBuffer_[m_CEBufferOffset_];
+	}
+	
+	/**
+	 * Returns previous hangul ces
+	 * @param collator current collator
+	 * @param ch current character
+	 * @return previous hangul ce
+	 */
+	private int previousHangul(RuleBasedCollator collator, char ch)
+	{
+		char L = (char)(ch - HANGUL_SBASE_);
+	    // we do it in this order since some compilers can do % and / in one
+	    // operation
+	    char T = (char)(L % HANGUL_TCOUNT_);
+	    L /= HANGUL_TCOUNT_;
+	    char V = (char)(L % HANGUL_VCOUNT_);
+	    L /= HANGUL_VCOUNT_;
+	
+	    // offset them
+	    L += HANGUL_LBASE_;
+	    V += HANGUL_VBASE_;
+	    T += HANGUL_TBASE_;
+	
+		m_CEBufferSize_ = 0;
+		if (!collator.m_isJamoSpecial_) {
+	        m_CEBuffer_[m_CEBufferSize_ ++] =
+										collator.UCA_.m_trie_.getLeadValue(L);
+	        m_CEBuffer_[m_CEBufferSize_ ++] =
+										collator.UCA_.m_trie_.getLeadValue(V);
+	        if (T != HANGUL_TBASE_) {
+	            m_CEBuffer_[m_CEBufferSize_ ++] =
+										collator.UCA_.m_trie_.getLeadValue(T);
+	        }
+	        m_CEBufferOffset_ = m_CEBufferSize_ - 1;
+	        return m_CEBuffer_[m_CEBufferOffset_];
+	    } 
+	    else {
+	        // Since Hanguls pass the FCD check, it is guaranteed that we won't 
+	        // be in the normalization buffer if something like this happens
+	        // Move Jamos into normalization buffer
+	        m_buffer_.append(L);
+	        m_buffer_.append(V);
+	        if (T != HANGUL_TBASE_) {
+	            m_buffer_.append(T);
+	        } 
+			
+			m_FCDStart_ = m_source_.getIndex();
+	        m_FCDLimit_ = m_FCDStart_ + 1;
+	        return IGNORABLE;
+	    }
+	}
+	
+	/**
+	 * Gets implicit codepoint ces
+	 * @param codepoint current codepoint
+	 * @param fixupoffset offset to shift ces for han
+	 * @return implicit codepoint ces
+	 */
+	private int previousImplicit(int codepoint, int fixupoffset)
+	{
+      	if ((codepoint & 0xFFFE) == 0xFFFE 
+      		|| (0xD800 <= codepoint && codepoint <= 0xDC00)) {
+          	return IGNORABLE; // illegal code value, completely ignoreable! 
+      	}
+      	// we must skip all 00, 01, 02 bytes, so most bytes have 253 values
+        // we must leave a gap of 01 between all values of the last byte, so 
+        // the last byte has 126 values (3 byte case)
+       	// we shift so that HAN all has the same first primary, for 
+       	// compression.
+       	// for the 4 byte case, we make the gap as large as we can fit.
+       	// Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
+       	// Four byte forms (most supplementaries) are EF xx xx xx (with a gap 
+       	// of LAST2_MULTIPLIER == 14)
+      	int last0 = codepoint - IMPLICIT_BOUNDARY_;
+      	int result = 0;
+
+      	if (last0 < 0) {
+      		// shift HAN to share single block
+          	codepoint += IMPLICIT_HAN_SHIFT_; 
+          	int last1 = codepoint / IMPLICIT_LAST_COUNT_;
+          	last0 = codepoint % IMPLICIT_LAST_COUNT_;
+          	int last2 = last1 / IMPLICIT_OTHER_COUNT_;
+          	last1 %= IMPLICIT_OTHER_COUNT_;
+          	result = 0xEC030300 - fixupoffset + (last2 << 24) + (last1 << 16) 
+          				+ (last0 << 9);
+      	} 
+      	else {
+          	int last1 = last0 / IMPLICIT_LAST_COUNT2_;
+          	last0 %= IMPLICIT_LAST_COUNT2_;
+          	int last2 = last1 / IMPLICIT_OTHER_COUNT_;
+          	last1 %= IMPLICIT_OTHER_COUNT_;
+          	result = 0xEF030303 - fixupoffset + (last2 << 16) + (last1 << 8) 
+          				+ (last0 * IMPLICIT_LAST2_MULTIPLIER_);
+      	}
+      	m_CEBufferSize_ = 2;
+      	m_CEBufferOffset_ = 1;
+      	m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_) 
+      						| 0x00000505;
+      	m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0;
+      	return m_CEBuffer_[1];
+	}
+	
+	/**
+	 * Gets the previous surrogate ce
+	 * @param ch current character
+	 * @return previous surrogate ce
+	 */
+	private int previousSurrogate(char ch)
+	{
+	   	if (isBackwardsStart()) {
+	       	// we are at the start of the string, wrong place to be at
+	       	return IGNORABLE;
+	   	}
+	   	char prevch = previousChar(); 
+	   	// Handles Han and Supplementary characters here.
+	   	if (UTF16.isLeadSurrogate(prevch)) {
+	       	return previousImplicit(
+	       				UCharacterProperty.getRawSupplementary(prevch, ch), 0);
+	   	} 
+	   	if (prevch != CharacterIterator.DONE) {
+	    	nextChar();
+	   	}
+	   	return IGNORABLE; // completely ignorable 
+	}
+		
+	/**
+	 * <p>Special CE management. Expansions, contractions etc...</p>
+	 * @param collator can be plain UCA 
+	 * @param ce current ce
+	 * @param ch current character
+	 * @return previous special ce
+	 */
+	private int previousSpecial(RuleBasedCollator collator, int ce, char ch)
+	{
+	  	while(true) {
+	    	// the only ces that loops are thai, special prefix and 
+	    	// contractions 
+	    	switch (RuleBasedCollator.getTag(ce)) {
+	    		case CE_NOT_FOUND_TAG_:  // this tag always returns
+	      			return ce;
+	    		case RuleBasedCollator.CE_SURROGATE_TAG_: 
+	      			// essentialy a disengaged lead surrogate. a broken 
+	      			// sequence was encountered and this is an error
+	      			return IGNORABLE;
+	    		case CE_THAI_TAG_:
+	      			ce = previousThai(collator, ce, ch);
+	      			break;
+	    		case CE_SPEC_PROC_TAG_:
+	    			ce = previousSpecialPrefix(collator, ce);
+	      			break;
+	    		case CE_CONTRACTION_TAG_:
+	        		return previousContraction(collator, ce, ch);
+	   	 		case CE_LONG_PRIMARY_TAG_:
+	      			return previousLongPrimary(ce);
+	    		case CE_EXPANSION_TAG_: // always returns
+	      			return previousExpansion(collator, ce);
+	    		case CE_HANGUL_SYLLABLE_TAG_: // AC00-D7AF
+	    			return previousHangul(collator, ch);
+	      		case CE_LEAD_SURROGATE_TAG_:  // D800-DBFF
+	      			return IGNORABLE; // broken surrogate sequence
+	    		case CE_TRAIL_SURROGATE_TAG_: // DC00-DFFF
+	    			return previousSurrogate(ch);
+	    		case CE_CJK_IMPLICIT_TAG_: 
+	    			// 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D*/
+	      			return previousImplicit(ch, 0x04000000);
+	    		case CE_IMPLICIT_TAG_: // everything that is not defined
+	    			// UCA is filled with these. Tailorings are NOT_FOUND 
+	      			return previousImplicit(ch,  0);
+	    		case CE_CHARSET_TAG_: // this tag always returns
+	      			return CE_NOT_FOUND_;
+	    		default:           
+	    			// this tag always returns
+	      			ce = IGNORABLE;
+	      			// synwee todo, throw exception or something here.
+	    	}
+	    	if (!RuleBasedCollator.isSpecial(ce)) {
+	      		break;
+	    	}
+	  	}
+	  	return ce;
+	}
+}
diff --git a/icu4j/src/com/ibm/icu/text/CollationKey.java b/icu4j/src/com/ibm/icu/text/CollationKey.java
new file mode 100755
index 00000000000..1385431f3d2
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/CollationKey.java
@@ -0,0 +1,260 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollationKey.java,v $ 
+* $Date: 2002/05/14 16:48:49 $ 
+* $Revision: 1.4 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.util.Arrays;
+
+/**
+ * <p>A <code>CollationKey</code> represents a <code>String</code> under the
+ * rules of a specific <code>Collator</code> object. Comparing two
+ * <code>CollationKey</code>s returns the relative order of the
+ * <code>String</code>s they represent. Using <code>CollationKey</code>s to 
+ * compare <code>String</code>s is generally faster than using 
+ * <code>Collator.compare</code>. Thus, when the <code>String</code>s must be 
+ * compared multiple times, for example when sorting a list of 
+ * <code>String</code>s. It's more efficient to use <code>CollationKey</code>s.
+ * </p>
+ * <p>You can not create <code>CollationKey</code>s directly. Rather, generate 
+ * them by calling <code>Collator.getCollationKey(String)</code>. You can only 
+ * compare <code>CollationKey</code>s generated from the same 
+ * <code>Collator</code> object.</p>
+ * <p>Generating a <code>CollationKey</code> for a <code>String</code>
+ * involves examining the entire <code>String</code> and converting it to 
+ * series of bits that can be compared bitwise. This allows fast comparisons 
+ * once the keys are generated. The cost of generating keys is recouped in 
+ * faster comparisons when <code>String</code>s need to be compared many 
+ * times. On the other hand, the result of a comparison is often determined by 
+ * the first couple of characters of each <code>String</code>.
+ * <code>Collator.compare(String, String)</code> examines only as many characters as it needs 
+ * which allows it to be faster when doing single comparisons.</p>
+ * <p>The following example shows how <code>CollationKey</code>s might be used
+ * to sort a list of <code>String</code>s.</p>
+ * <blockquote>
+ * <pre>
+ * // Create an array of CollationKeys for the Strings to be sorted.
+ * Collator myCollator = Collator.getInstance();
+ * CollationKey[] keys = new CollationKey[3];
+ * keys[0] = myCollator.getCollationKey("Tom");
+ * keys[1] = myCollator.getCollationKey("Dick");
+ * keys[2] = myCollator.getCollationKey("Harry");
+ * sort( keys );
+ * <br>
+ * //...
+ * <br>
+ * // Inside body of sort routine, compare keys this way
+ * if( keys[i].compareTo( keys[j] ) > 0 )
+ *    // swap keys[i] and keys[j]
+ * <br>
+ * //...
+ * <br>
+ * // Finally, when we've returned from sort.
+ * System.out.println( keys[0].getSourceString() );
+ * System.out.println( keys[1].getSourceString() );
+ * System.out.println( keys[2].getSourceString() );
+ * </pre>
+ * </blockquote>
+ *
+ * @see Collator
+ * @see RuleBasedCollator
+ * @author Syn Wee Quek
+ * @since release 2.2, April 18 2002
+ * @draft 2.2
+ */
+public final class CollationKey implements Comparable 
+{
+	// public methods -------------------------------------------------------
+
+	// public getters -------------------------------------------------------
+	
+    /**
+     * Returns the String that this CollationKey represents.
+     * @return source string that this CollationKey represents
+     * @draft 2.2
+     */
+    public String getSourceString() 
+    {
+        return m_source_;
+    }
+
+    /**
+     * <p>Duplicates and returns the value of this CollationKey as a sequence 
+     * of big-endian bytes.</p> 
+     * <p>If two CollationKeys could be legitimately compared, then one could 
+     * compare the byte arrays of each to obtain the same result.</p>  
+     * @return CollationKey value in a sequence of big-endian byte bytes.
+     * @draft 2.2
+     */
+    public byte[] toByteArray() 
+    {
+    	int length = 0;
+    	while (true) {
+    		if (m_key_[length] == 0) {
+    			break;
+    		}
+    		length ++;
+    	}
+    	length ++;
+    	byte result[] = new byte[length];
+    	System.arraycopy(m_key_, 0, result, 0, length);
+        return result;
+    }
+
+ 	// public other methods -------------------------------------------------	
+ 	
+    /**
+     * <p>Compare this CollationKey to the target CollationKey. The collation 
+     * rules of the Collator object which created these keys are applied.</p>
+     * <p><strong>Note:</strong> CollationKeys created by different Collators 
+     * can not be compared.</p>
+     * @param target target CollationKey
+     * @return an integer value, if value is less than zero this CollationKey
+     *         is less than than target, if value is zero if they are equal 
+     *         and value is greater than zero if this CollationKey is greater 
+     *         than target.
+     * @see Collator#compare(String, String)
+     * @draft 2.2
+     */
+    public int compareTo(CollationKey target)
+    {
+    	int i = 0;
+    	while (m_key_[i] != 0 && target.m_key_[i] != 0) {
+    		int key = m_key_[i] & 0xFF;
+    		int targetkey = target.m_key_[i] & 0xFF;
+    		if (key < targetkey) {
+    			return -1;
+    		}
+    		if (targetkey < key) {
+    			return 1;
+    		}
+    		i ++;
+    	}
+    	// last comparison if we encounter a 0
+    	int key = m_key_[i] & 0xFF;
+    	int targetkey = target.m_key_[i] & 0xFF;
+        if (key < targetkey) {
+    		return -1;
+    	}
+    	if (targetkey < key) {
+    		return 1;
+    	}
+        return 0;
+    }
+
+    /**
+     * <p>Compares this CollationKey with the specified Object.</p>
+     * @param obj the Object to be compared.
+     * @return Returns a negative integer, zero, or a positive integer 
+     *         respectively if this CollationKey is less than, equal to, or 
+     *         greater than the given Object.
+     * @exception ClassCastException thrown when the specified Object is not a
+     *		      CollationKey.
+     * @see #compareTo(CollationKey)
+     * @draft 2.2
+     */
+    public int compareTo(Object obj) 
+    {
+ 		return compareTo((CollationKey)obj);
+    }
+
+    /**
+     * <p>Compare this CollationKey and the target CollationKey for equality.
+     * </p>
+     * <p>The collation rules of the Collator object which created these keys 
+     * are applied.</p>
+     * <p><strong>Note:</strong> CollationKeys created by different Collators 
+     * can not be compared.</p>
+     * @param target the CollationKey to compare to.
+     * @return true if two objects are equal, false otherwise.
+     * @draft 2.2
+     */
+    public boolean equals(Object target) 
+    {
+        if (this == target) {
+        	return true;
+        }
+        if (target == null || !(target instanceof CollationKey)) {
+            return false;
+        }
+        CollationKey other = (CollationKey)target;
+        int i = 0;
+        while (true) {
+        	if (m_key_[i] != other.m_key_[i]) {
+        		return false;
+        	}
+        	if (m_key_[i] == 0) {
+        		break;
+        	}
+        	i ++;
+        }
+        return true;
+    }
+
+    /**
+     * <p>Creates a hash code for this CollationKey. The hash value is 
+     * calculated on the key itself, not the String from which the key was 
+     * created. Thus if x and y are CollationKeys, then 
+     * x.hashCode(x) == y.hashCode() if x.equals(y) is true. This allows 
+     * language-sensitive comparison in a hash table.</p>
+     * <p>See the CollatinKey class description for an example.</p>
+     * @return the hash value.
+     * @draft 2.2
+     */
+    public int hashCode() 
+    {
+    	if (m_hashCode_ == 0) {
+    		int size = m_key_.length >> 1;
+    		StringBuffer key = new StringBuffer(size);
+    		int i = 0;
+    		while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
+    			key.append((m_key_[i] << 8) | m_key_[i + 1]);
+    			i += 2;
+    		}
+    		if (m_key_[i] != 0) {
+    			key.append(m_key_[i] << 8);
+    		}
+    		m_hashCode_ = key.hashCode();
+    	}
+        return m_hashCode_;
+    }
+
+	// protected constructor ------------------------------------------------
+    
+    /**
+     * Protected CollationKey can only be generated by Collator objects
+     * @param source string the CollationKey represents
+     * @param key sort key array of bytes
+     * @param size of sort key 
+     * @draft 2v2
+     */
+    CollationKey(String source, byte key[])
+    {
+    	m_source_ = source;
+    	m_key_ = key;
+    	m_hashCode_ = 0;
+    }
+
+	// private data members -------------------------------------------------
+
+	/**
+	 * Source string this CollationKey represents
+	 */	
+    private String m_source_;
+    /**
+     * Sequence of bytes that represents the sort key
+     */
+    private byte m_key_[];
+    /**
+     * Hash code for the key
+     */
+    private int m_hashCode_;
+}
\ No newline at end of file
diff --git a/icu4j/src/com/ibm/icu/text/Collator.java b/icu4j/src/com/ibm/icu/text/Collator.java
new file mode 100755
index 00000000000..993f0127c29
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/Collator.java
@@ -0,0 +1,454 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Collator.java,v $ 
+* $Date: 2002/05/14 16:48:49 $ 
+* $Revision: 1.4 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.util.Locale;
+
+/**
+* <p>The Collator class performs locale-sensitive String comparison. 
+* You use this class to build searching and sorting routines for natural 
+* language text.</p> 
+* <p>Collator is an abstract base class. Subclasses implement specific 
+* collation strategies. One subclass, RuleBasedCollator, is currently 
+* provided and is applicable to a wide set of languages. Other subclasses 
+* may be created to handle more specialized needs.</p>
+* <p>Like other locale-sensitive classes, you can use the static factory 
+* method, getInstance, to obtain the appropriate Collator object for a given 
+* locale. You will only need to look at the subclasses of Collator if you need 
+* to understand the details of a particular collation strategy or if you need 
+* to modify that strategy. </p>
+* <p>The following example shows how to compare two strings using the Collator 
+* for the default locale. 
+* <pre>
+* // Compare two strings in the default locale
+* Collator myCollator = Collator.getInstance();
+* if (myCollator.compare("abc", "ABC") < 0) {
+*     System.out.println("abc is less than ABC");
+* }
+* else {
+*     System.out.println("abc is greater than or equal to ABC");
+* }
+* </pre>
+* <p>You can set a <code>Collator</code>'s <em>strength</em> property to 
+* determine the level of difference considered significant in comparisons. 
+* Four strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>, 
+* <code>TERTIARY</code>, and <code>IDENTICAL</code>. The exact assignment of 
+* strengths to language features is locale dependant. For example, in Czech, 
+* "e" and "f" are considered primary differences, while "e" and "\u00EA" are 
+* secondary differences, "e" and "E" are tertiary differences and "e" and "e" 
+* are identical. The following shows how both case and accents could be 
+* ignored for US English.</p>
+* <pre>
+* //Get the Collator for US English and set its strength to PRIMARY
+* Collator usCollator = Collator.getInstance(Locale.US);
+* usCollator.setStrength(Collator.PRIMARY);
+* if (usCollator.compare("abc", "ABC") == 0) {
+*     System.out.println("Strings are equivalent");
+* }
+* </pre>
+* <p>For comparing Strings exactly once, the compare method provides the best 
+* performance. When sorting a list of Strings however, it is generally 
+* necessary to compare each String multiple times. In this case, 
+* CollationKeys provide better performance. The CollationKey class converts a 
+* String to a series of bits that can be compared bitwise against other 
+* CollationKeys. A CollationKey is created by a Collator object for a given 
+* String.</p> 
+* <p>Note: CollationKeys from different Collators can not be compared. See the 
+* class description for CollationKey for an example using CollationKeys. 
+* </p>
+* @author Syn Wee Quek
+* @since release 2.2, April 18 2002
+* @draft 2.2
+*/
+
+public abstract class Collator
+{     
+	// public data members ---------------------------------------------------
+	
+	/**
+     * Collator strength value. When set, only PRIMARY differences are
+     * considered significant during comparison. The assignment of strengths
+     * to language features is locale dependant. A common example is for
+     * different base letters ("a" vs "b") to be considered a PRIMARY 
+     * difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int PRIMARY 
+    							= RuleBasedCollator.AttributeValue.PRIMARY_;
+    /**
+     * Collator strength value. When set, only SECONDARY and above 
+     * differences are considered significant during comparison. The 
+     * assignment of strengths to language features is locale dependant. A 
+     * common example is for different accented forms of the same base letter 
+     * ("a" vs "\u00E4") to be considered a SECONDARY difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int SECONDARY 
+    							= RuleBasedCollator.AttributeValue.SECONDARY_;
+    /**
+     * Collator strength value. When set, only TERTIARY and above differences 
+     * are considered significant during comparison. The assignment of 
+     * strengths to language features is locale dependant. A common example is 
+     * for case differences ("a" vs "A") to be considered a TERTIARY 
+     * difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int TERTIARY 
+    							= RuleBasedCollator.AttributeValue.TERTIARY_;
+                                   
+    /**
+     * Collator strength value. When set, only QUARTENARY and above differences 
+     * are considered significant during comparison. The assignment of 
+     * strengths to language features is locale dependant.
+     * difference.
+     * @see #setStrength
+     * @see #getStrength
+     * @draft 2.2
+     */
+    public final static int QUATERNARY 
+    							= RuleBasedCollator.AttributeValue.QUATERNARY_;
+
+    /**
+     * <p>Collator strength value. When set, all differences are considered 
+     * significant during comparison. The assignment of strengths to language 
+     * features is locale dependant. A common example is for control 
+     * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at 
+     * the PRIMARY, SECONDARY, and TERTIARY levels but different at the 
+     * IDENTICAL level.  Additionally, differences between pre-composed 
+     * accents such as "&#092;u00C0" (A-grave) and combining accents such as 
+     * "A&#092;u0300" (A, combining-grave) will be considered significant at 
+     * the tertiary level if decomposition is set to NO_DECOMPOSITION.
+     * </p>
+     * <p>Note this value is different from JDK's</p>
+     * @draft 2.2
+     */
+    public final static int IDENTICAL 
+    							= RuleBasedCollator.AttributeValue.IDENTICAL_;
+
+    /**
+     * <p>Decomposition mode value. With NO_DECOMPOSITION set, accented 
+     * characters will not be decomposed for collation. This is the default 
+     * setting and provides the fastest collation but will only produce 
+     * correct results for languages that do not use accents.</p>
+     * <p>Note this value is different from JDK's</p>
+     * @see #getDecomposition
+     * @see #setDecomposition
+     * @draft 2.2
+     */
+    public final static int NO_DECOMPOSITION 
+    							= RuleBasedCollator.AttributeValue.OFF_;
+
+    /**
+     * <p>Decomposition mode value. With CANONICAL_DECOMPOSITION set, 
+     * characters that are canonical variants according to Unicode 2.0 will be 
+     * decomposed for collation. This should be used to get correct collation 
+     * of accented characters.</p>
+     * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
+     * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
+     * Unicode Technical Report #15</a>.</p>
+     * @see #getDecomposition
+     * @see #setDecomposition
+     * @draft 2.2
+     */
+    public final static int CANONICAL_DECOMPOSITION = 1;
+
+    /**
+     * <p>Decomposition mode value. With FULL_DECOMPOSITION set, both Unicode 
+     * canonical variants and Unicode compatibility variants will be 
+     * decomposed for collation.  This causes not only accented characters to 
+     * be collated, but also characters that have special formats to be 
+     * collated with their norminal form. For example, the half-width and
+     * full-width ASCII and Katakana characters are then collated together.
+     * FULL_DECOMPOSITION is the most complete and therefore the slowest
+     * decomposition mode.</p>
+     * <p>
+     * FULL_DECOMPOSITION corresponds to Normalization Form KD as described in 
+     * <a href="http://www.unicode.org/unicode/reports/tr15/">Unicode 
+     * Technical Report #15</a>.</p>
+     * @see #getDecomposition
+     * @see #setDecomposition
+     * @draft 2.2
+     */
+    public final static int FULL_DECOMPOSITION = 2;
+    
+    // public methods --------------------------------------------------------
+    
+    // public setters --------------------------------------------------------
+    
+    /**
+     * <p>Sets this Collator's strength property. The strength property 
+     * determines the minimum level of difference considered significant 
+     * during comparison.</p>
+     * <p>See the Collator class description for an example of use.</p>
+     * @param the new strength value.
+     * @see #getStrength
+     * @see #PRIMARY
+     * @see #SECONDARY
+     * @see #TERTIARY
+     * @see #IDENTICAL
+     * @exception  IllegalArgumentException If the new strength value is not one of
+     * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
+     * @draft 2.2
+     */
+    public synchronized void setStrength(int newStrength) {
+        if ((newStrength != PRIMARY) &&
+            (newStrength != SECONDARY) &&
+            (newStrength != TERTIARY) &&
+            (newStrength != QUATERNARY) &&
+            (newStrength != IDENTICAL)) {
+            throw new IllegalArgumentException("Incorrect comparison level.");
+        }
+        m_strength_ = newStrength;
+    }
+    
+    /**
+     * Set the decomposition mode of this Collator. See getDecomposition
+     * for a description of decomposition mode.
+     * @param decomposition the new decomposition mode
+     * @see #getDecomposition
+     * @see #NO_DECOMPOSITION
+     * @see #CANONICAL_DECOMPOSITION
+     * @see #FULL_DECOMPOSITION
+     * @exception IllegalArgumentException If the given value is not a valid decomposition
+     * mode.
+     * @draft 2.2
+     */
+    public synchronized void setDecomposition(int decomposition) {
+        if ((decomposition != NO_DECOMPOSITION) &&
+            (decomposition != CANONICAL_DECOMPOSITION) &&
+            (decomposition != FULL_DECOMPOSITION)) {
+            throw new IllegalArgumentException("Wrong decomposition mode.");
+        }
+        if (decomposition != NO_DECOMPOSITION) {
+        	m_decomposition_ = decomposition;
+        }
+        else {
+        	m_decomposition_ = CANONICAL_DECOMPOSITION;
+        }
+    }
+    
+    // public getters --------------------------------------------------------
+    
+    /**
+     * Gets the Collator for the current default locale.
+     * The default locale is determined by java.util.Locale.getDefault().
+     * @return the Collator for the default locale (for example, en_US) if it
+     *         is created successfully, otherwise if there is a failure,
+     *         null will be returned.
+     * @see java.util.Locale#getDefault
+     * @draft 2.2
+     */
+    public static final Collator getInstance() 
+    {
+        return getInstance(Locale.getDefault());
+    }
+    
+    /**
+     * Gets the Collator for the desired locale.
+     * @param locale the desired locale.
+     * @return Collator for the desired locale if it is created successfully,
+     *         otherwise if there is a failure, the default UCA collator will 
+     * 		   be returned.
+     * @see java.util.Locale
+     * @see java.util.ResourceBundle
+     * @draft 2.2
+     */
+    public static final Collator getInstance(Locale locale)
+    {
+    	try {
+    		return new RuleBasedCollator(locale);
+    	} 
+    	catch(Exception e) {
+    		return RuleBasedCollator.UCA_;
+    	}
+    }
+    
+    /**
+     * <p>Returns this Collator's strength property. The strength property 
+     * determines the minimum level of difference considered significant 
+     * during comparison.</p>
+     * <p>See the Collator class description for an example of use.</p>
+     * @return this Collator's current strength property.
+     * @see #setStrength
+     * @see #PRIMARY
+     * @see #SECONDARY
+     * @see #TERTIARY
+     * @see #IDENTICAL
+     * @draft 2.2
+     */
+    public int getStrength()
+    {
+        return m_strength_;
+    }
+    
+    /**
+     * <p>Get the decomposition mode of this Collator. Decomposition mode
+     * determines how Unicode composed characters are handled. Adjusting
+     * decomposition mode allows the user to select between faster and more
+     * complete collation behavior.
+     * <p>The three values for decomposition mode are:
+     * <UL>
+     * <LI>NO_DECOMPOSITION,
+     * <LI>CANONICAL_DECOMPOSITION
+     * <LI>FULL_DECOMPOSITION.
+     * </UL>
+     * See the documentation for these three constants for a description
+     * of their meaning.
+     * </p>
+     * @return the decomposition mode
+     * @see #setDecomposition
+     * @see #NO_DECOMPOSITION
+     * @see #CANONICAL_DECOMPOSITION
+     * @see #FULL_DECOMPOSITION
+     * @draft 2.2
+     */
+    public int getDecomposition()
+    {
+        return m_decomposition_;
+    }
+    
+    // public other methods -------------------------------------------------
+
+    /**
+     * Convenience method for comparing the equality of two strings based on
+     * this Collator's collation rules.
+     * @param source the source string to be compared with.
+     * @param target the target string to be compared with.
+     * @return true if the strings are equal according to the collation
+     *         rules. false, otherwise.
+     * @see #compare
+     * @draft 2.2
+     */
+    public boolean equals(String source, String target)
+    {
+        return (compare(source, target) == 0);
+    }
+	    
+    /**
+     * Cloning this Collator.
+     * @return a cloned Collator of this object
+     * @draft 2.2
+     */
+    public Object clone()
+    {
+        try {
+            return (Collator)super.clone();
+        } catch (CloneNotSupportedException e) {
+            throw new InternalError();
+        }
+    }
+
+    /**
+     * Compares the equality of two Collators.
+     * @param that the Collator to be compared with this.
+     * @return true if this Collator is the same as that Collator;
+     * false otherwise.
+     * @draft 2.2
+     */
+    public boolean equals(Object that)
+    {
+        if (this == that) {
+        	return true;
+        }
+        if (that == null || getClass() != that.getClass()) {
+        	return false;
+        }
+        Collator other = (Collator) that;
+        return ((m_strength_ == other.m_strength_) &&
+                (m_decomposition_ == other.m_decomposition_));
+    }
+    
+    // public abstract methods -----------------------------------------------
+
+    /**
+     * Generates the hash code for this Collator.
+     * @draft 2.2
+     */
+    public abstract int hashCode();
+    
+    /**
+     * <p>Compares the source string to the target string according to the
+     * collation rules for this Collator. Returns an integer less than, equal 
+     * to or greater than zero depending on whether the source String is less 
+     * than, equal to or greater than the target string. See the Collator
+     * class description for an example of use.</p>
+     * <p>For a one time comparison, this method has the best performance. If 
+     * a given String will be involved in multiple comparisons, 
+     * CollationKey.compareTo() has the best performance. See the Collator 
+     * class description for an example using CollationKeys.</p>
+     * @param source the source string.
+     * @param target the target string.
+     * @return Returns an integer value. Value is less than zero if source is 
+     *         less than target, value is zero if source and target are equal, 
+     *         value is greater than zero if source is greater than target.
+     * @see CollationKey
+     * @see #getCollationKey
+     * @draft 2.2
+     */
+    public abstract int compare(String source, String target);
+
+    /**
+     * <p>Transforms the String into a series of bits that can be compared 
+     * bitwise to other CollationKeys. CollationKeys provide better 
+     * performance than Collator.compare() when Strings are involved in 
+     * multiple comparisons.</p> 
+     * <p>See the Collator class description for an example using 
+     * CollationKeys.</p>
+     * @param source the string to be transformed into a collation key.
+     * @return the CollationKey for the given String based on this Collator's 
+     *         collation rules. If the source String is null, a null 
+     *         CollationKey is returned.
+     * @see CollationKey
+     * @see #compare(String, String)
+     * @draft 2.2
+     */
+    public abstract CollationKey getCollationKey(String source);
+    
+    // protected data members ------------------------------------------------
+    
+    /**
+     * Collation strength
+     */
+    protected int m_strength_;
+    /**
+     * Decomposition mode
+     */ 
+    protected int m_decomposition_;
+    
+    // protected constructor -------------------------------------------------
+    
+    /**
+    * <p>Protected constructor for use by subclasses. 
+    * Public access to creating Collators is handled by the API getInstance().
+    * </p>
+    * @draft 2.2
+    */
+    protected Collator() throws Exception
+    {
+    	m_strength_ = TERTIARY;
+    	m_decomposition_ = CANONICAL_DECOMPOSITION;
+    }
+  
+    // protected methods -----------------------------------------------------
+    
+    // private variables -----------------------------------------------------
+
+    // private methods -------------------------------------------------------
+}
+
diff --git a/icu4j/src/com/ibm/icu/text/CollatorReader.java b/icu4j/src/com/ibm/icu/text/CollatorReader.java
new file mode 100644
index 00000000000..110ba7bffcd
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/CollatorReader.java
@@ -0,0 +1,284 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CollatorReader.java,v $ 
+* $Date: 2002/05/14 16:48:49 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import com.ibm.icu.impl.ICUBinary;
+import com.ibm.icu.impl.IntTrie;
+
+/**
+* <p>Internal reader class for ICU data file uca.dat containing 
+* Unicode Collation Algorithm data.</p> 
+* <p>This class simply reads uca.dat, authenticates that it is a valid
+* ICU data file and split its contents up into blocks of data for use in
+* <a href=Collator.html>com.ibm.icu.text.Collator</a>.
+* </p> 
+* <p>uca.dat which is in big-endian format is jared together with this 
+* package.</p>
+* @author Syn Wee Quek
+* @since release 2.2, April 18 2002
+* @draft 2.2
+*/
+
+final class CollatorReader
+{      
+    // protected constructor ---------------------------------------------
+    
+    /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @exception IOException throw if data file fails authentication 
+    * @draft 2.1
+    */
+    protected CollatorReader(InputStream inputStream) throws IOException
+    {
+        ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, 
+                             DATA_FORMAT_VERSION_, UNICODE_VERSION_);
+        m_dataInputStream_ = new DataInputStream(inputStream);
+    }
+    
+    /**
+    * <p>Protected constructor.</p>
+    * @param inputStream ICU uprop.dat file input stream
+    * @param readICUHeader flag to indicate if the ICU header has to be read
+    * @exception IOException throw if data file fails authentication 
+    * @draft 2.1
+    */
+    protected CollatorReader(InputStream inputStream, boolean readICUHeader) 
+    														throws IOException
+    {
+    	if (readICUHeader) {
+        	ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, 
+            		                 DATA_FORMAT_VERSION_, UNICODE_VERSION_);
+    	}
+        m_dataInputStream_ = new DataInputStream(inputStream);
+    }
+  
+    // protected methods -------------------------------------------------
+      
+    /**
+    * Read and break up the header stream of data passed in as arguments into 
+    * meaningful Collator data.
+    * @param rbc RuleBasedCollator to populate with header information
+    * @exception IOException thrown when there's a data error.
+    */
+    protected void readHeader(RuleBasedCollator rbc) throws IOException
+    {
+    	int size = m_dataInputStream_.readInt();
+    	// all the offsets are in bytes
+      	// to get the address add to the header address and cast properly 
+      	// Default options int options
+      	m_dataInputStream_.skipBytes(4);
+      	// this one is needed only for UCA, to copy the appropriate 
+      	// contractions  
+      	m_dataInputStream_.skipBytes(4);
+      	// reserved for future use
+      	m_dataInputStream_.readInt(); 
+      	// const uint8_t *mappingPosition; 
+      	int mapping = m_dataInputStream_.readInt(); 
+      	// uint32_t *expansion; 
+      	rbc.m_expansionOffset_ = m_dataInputStream_.readInt(); 
+      	// UChar *contractionIndex;     
+      	rbc.m_contractionOffset_ = m_dataInputStream_.readInt(); 
+      	// uint32_t *contractionCEs;
+      	int contractionCE = m_dataInputStream_.readInt();   
+      	// needed for various closures int contractionSize 
+      	m_dataInputStream_.skipBytes(4);  
+      	// array of last collation element in expansion
+      	int expansionEndCE = m_dataInputStream_.readInt();  
+      	// array of maximum expansion size corresponding to the expansion
+        // collation elements with last element in expansionEndCE
+      	int expansionEndCEMaxSize = m_dataInputStream_.readInt();     
+      	// size of endExpansionCE int expansionEndCESize
+      	m_dataInputStream_.skipBytes(4); 
+      	// hash table of unsafe code points 
+      	int unsafe = m_dataInputStream_.readInt();            
+      	// hash table of final code points in contractions.
+      	int contractionEnd = m_dataInputStream_.readInt();
+      	// int CEcount = m_dataInputStream_.readInt();
+      	m_dataInputStream_.skipBytes(4);
+      	// is jamoSpecial
+      	rbc.m_isJamoSpecial_ = m_dataInputStream_.readBoolean(); 
+      	m_dataInputStream_.skipBytes(3);
+      	// byte version[] = new byte[4];
+      	m_dataInputStream_.skipBytes(4);
+      	// byte charsetName[] = new byte[32]; // for charset CEs
+      	m_dataInputStream_.skipBytes(32);
+      	m_dataInputStream_.skipBytes(64); // for future use 
+      	if (rbc.m_contractionOffset_ == 0) { // contraction can be null
+      		rbc.m_contractionOffset_ = mapping;
+      		contractionCE = mapping;
+      	}
+      	m_expansionSize_ = rbc.m_contractionOffset_ - rbc.m_expansionOffset_;
+      	m_contractionIndexSize_ = contractionCE - rbc.m_contractionOffset_;
+      	m_contractionCESize_ = mapping - contractionCE;
+      	m_trieSize_ = expansionEndCE - mapping;
+      	m_expansionEndCESize_ = expansionEndCEMaxSize - expansionEndCE;
+      	m_expansionEndCEMaxSizeSize_ = unsafe - expansionEndCEMaxSize;
+      	m_unsafeSize_ = contractionEnd - unsafe;
+      	m_contractionEndSize_ = size - contractionEnd;    
+      	rbc.m_contractionOffset_ >>= 1; // casting to ints
+      	rbc.m_expansionOffset_ >>= 2; // casting to chars
+    }
+    
+    /**
+     * Read and break up the collation options passed in the stream of data
+     * and update the argument Collator with the results
+     * @param rbc RuleBasedCollator to populate
+     * @exception IOException thrown when there's a data error.
+     * @draft 2.2
+     */
+    public void readOptions(RuleBasedCollator rbc) throws IOException
+    {
+    	rbc.m_variableTopValue_ = m_dataInputStream_.readInt();
+    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.FRENCH_COLLATION_,
+    	                 m_dataInputStream_.readInt());
+    	rbc.setAttributeDefault(
+    	                 RuleBasedCollator.Attribute.ALTERNATE_HANDLING_,
+    	                 m_dataInputStream_.readInt());
+    	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_FIRST_,
+    	                 m_dataInputStream_.readInt());
+      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.CASE_LEVEL_,
+    	                 m_dataInputStream_.readInt());
+      	rbc.setAttributeDefault(
+      	                 RuleBasedCollator.Attribute.NORMALIZATION_MODE_,
+    	                 m_dataInputStream_.readInt());
+      	rbc.setAttributeDefault(RuleBasedCollator.Attribute.STRENGTH_,
+    	                 m_dataInputStream_.readInt());
+		rbc.setAttributeDefault(
+		                 RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_,
+    	                 m_dataInputStream_.readInt());
+    }
+    
+    /**
+    * Read and break up the stream of data passed in as arguments into 
+    * meaningful Collator data.b
+    * @param rbc RuleBasedCollator to populate
+    * @exception IOException thrown when there's a data error.
+    * @draft 2.2
+    */
+    public void read(RuleBasedCollator rbc) throws IOException
+    {
+    	readHeader(rbc);
+    	readOptions(rbc);
+    	m_expansionSize_ >>= 2;
+    	rbc.m_expansion_ = new int[m_expansionSize_];
+    	for (int i = 0; i < m_expansionSize_; i ++) {
+    		rbc.m_expansion_[i] = m_dataInputStream_.readInt();
+    	}
+    	m_contractionIndexSize_ >>= 1;
+    	rbc.m_contractionIndex_ = new char[m_contractionIndexSize_];
+    	for (int i = 0; i < m_contractionIndexSize_; i ++) {
+    		rbc.m_contractionIndex_[i] = m_dataInputStream_.readChar();
+    	}
+    	m_contractionCESize_ >>= 2;
+    	rbc.m_contractionCE_ = new int[m_contractionCESize_];
+    	for (int i = 0; i < m_contractionCESize_; i ++) {
+    		rbc.m_contractionCE_[i] = m_dataInputStream_.readInt();
+    	}
+    	rbc.m_trie_ = new IntTrie(m_dataInputStream_, rbc);
+    	if (!rbc.m_trie_.isLatin1Linear()) {
+    		throw new IOException("Data corrupted, " 
+    		                      + "Collator Tries expected to have linear "
+    		                      + "latin one data arrays");
+    	}
+    	m_expansionEndCESize_ >>= 2;
+    	rbc.m_expansionEndCE_ = new int[m_expansionEndCESize_];
+    	for (int i = 0; i < m_expansionEndCESize_; i ++) {
+    		rbc.m_expansionEndCE_[i] = m_dataInputStream_.readInt();
+    	}
+    	rbc.m_expansionEndCEMaxSize_ = new byte[m_expansionEndCEMaxSizeSize_];
+    	for (int i = 0; i < m_expansionEndCEMaxSizeSize_; i ++) {
+    		rbc.m_expansionEndCEMaxSize_[i] = m_dataInputStream_.readByte();
+    	}
+    	rbc.m_unsafe_ = new byte[m_unsafeSize_];
+    	for (int i = 0; i < m_unsafeSize_; i ++) {
+    		rbc.m_unsafe_[i] = m_dataInputStream_.readByte();
+    	}
+    	rbc.m_contractionEnd_ = new byte[m_contractionEndSize_];
+    	for (int i = 0; i < m_contractionEndSize_; i ++) {
+    		rbc.m_contractionEnd_[i] = m_dataInputStream_.readByte();
+    	}
+    }
+    
+    // private variables -------------------------------------------------
+  
+    /**
+    * Data input stream for uca.dat 
+    */
+    private DataInputStream m_dataInputStream_;
+   
+    /**
+    * File format version and id that this class understands.
+    * No guarantees are made if a older version is used
+    */
+    private static final byte DATA_FORMAT_VERSION_[] = 
+                                   {(byte)0x2, (byte)0x0, (byte)0x0, (byte)0x0};
+    private static final byte DATA_FORMAT_ID_[] = {(byte)0x55, (byte)0x43,  
+                                                    (byte)0x6f, (byte)0x6c};
+    private static final byte UNICODE_VERSION_[] = {(byte)0x3, (byte)0x0, 
+                                                    (byte)0x0, (byte)0x0};
+    /**
+    * Corrupted error string
+    */
+    private static final String CORRUPTED_DATA_ERROR_ =
+                                "Data corrupted in Collation data file";
+                                
+    /**
+     * Size of expansion table in bytes
+     */
+    private int m_expansionSize_;
+    /**
+     * Size of contraction index table in bytes
+     */
+    private int m_contractionIndexSize_;
+    /**
+     * Size of contraction table in bytes
+     */
+    private int m_contractionCESize_;
+    /**
+     * Size of the Trie in bytes
+     */
+    private int m_trieSize_;
+    /**
+     * Size of the table that contains information about collation elements
+     * that end with an expansion 
+     */
+    private int m_expansionEndCESize_;
+    /**
+     * Size of the table that contains information about the maximum size of 
+     * collation elements that end with a particular expansion CE corresponding
+     * to the ones in expansionEndCE
+     */
+    private int m_expansionEndCEMaxSizeSize_;
+    /**
+     * Size of the table that contains information about the "Unsafe" 
+     * codepoints
+     */
+    private int m_unsafeSize_;
+    /**
+     * Size of the table that contains information about codepoints that ends
+     * with a contraction
+     */
+    private int m_contractionEndSize_;
+    /**
+     * Size of the table that contains UCA contraction information
+     */
+    private int m_UCAContractionSize_;
+      
+    // private methods ---------------------------------------------------
+      
+}
+
diff --git a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
new file mode 100755
index 00000000000..d2737979a49
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
@@ -0,0 +1,2960 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2002, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java,v $ 
+* $Date: 2002/05/14 16:48:49 $ 
+* $Revision: 1.4 $
+*
+*******************************************************************************
+*/
+package com.ibm.icu.text;
+
+import java.io.InputStream;
+import java.io.DataInputStream;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.ByteArrayInputStream;
+import java.nio.IntBuffer;
+import java.util.Locale;
+import java.util.ResourceBundle;
+import java.util.MissingResourceException;
+import java.text.CharacterIterator;
+import com.ibm.icu.impl.IntTrie;
+import com.ibm.icu.impl.Trie;
+import com.ibm.icu.impl.NormalizerImpl;
+import com.ibm.icu.impl.ICULocaleData;
+import com.ibm.icu.impl.UCharacterIterator;
+
+/**
+* <p>The RuleBasedCollator class is a concrete subclass of Collator that 
+* provides a simple, data-driven, table collator. With this class you can 
+* create a customized table-based Collator. RuleBasedCollator maps characters 
+* to sort keys.</p>
+* <p>RuleBasedCollator has the following restrictions for efficiency (other 
+* subclasses may be used for more complex languages) : 
+* <ol>
+* <li>If a special collation rule controlled by a &lt;modifier&gt; is
+*     specified it applies to the whole collator object.
+* <li>All non-mentioned characters are at the end of the collation order.
+* </ol>
+* </p>
+* <p>The collation table is composed of a list of collation rules, where each 
+* rule is of three forms: 
+*     <pre>
+*    &lt;modifier&gt;
+*    &lt;relation&gt; &lt;text-argument&gt;
+*    &lt;reset&gt; &lt;text-argument&gt;
+* </pre>
+* </p>
+* <p>The definitions of the rule elements is as follows:
+* <UL Type=disc>
+*    <LI><strong>Text-Argument</strong>: A text-argument is any sequence of
+*        characters, excluding special characters (that is, common
+*        whitespace characters [0009-000D, 0020] and rule syntax characters
+*        [0021-002F, 003A-0040, 005B-0060, 007B-007E]). If those
+*        characters are desired, you can put them in single quotes
+*        (e.g. ampersand => '&'). Note that unquoted white space characters
+*        are ignored; e.g. <code>b c</code> is treated as <code>bc</code>.
+*    <LI><strong>Modifier</strong>: There are currently two modifiers that 
+*        turn on special collation rules.
+*        <UL Type=square>
+*            <LI>'@' : Turns on backwards sorting of accents (secondary
+*                      differences), as in French.
+*            <LI>'!' : Turns on Thai/Lao vowel-consonant swapping.  If this
+*                      rule is in force when a Thai vowel of the range
+*                      &#92;U0E40-&#92;U0E44 precedes a Thai consonant of the 
+*                      range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the range 
+*                      &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the 
+*                      range &#92;U0E81-&#92;U0EAE then the vowel is placed 
+*                      after the consonant for collation purposes.
+*        </UL>
+*        <p>'@' : Indicates that accents are sorted backwards, as in French.
+*    <LI><strong>Relation</strong>: The relations are the following:
+*        <UL Type=square>
+*            <LI>'&lt;' : Greater, as a letter difference (primary)
+*            <LI>';' : Greater, as an accent difference (secondary)
+*            <LI>',' : Greater, as a case difference (tertiary)
+*            <LI>'=' : Equal
+*        </UL>
+*    <LI><strong>Reset</strong>: There is a single reset
+*        which is used primarily for contractions and expansions, but which
+*        can also be used to add a modification at the end of a set of rules.
+*        <p>'&' : Indicates that the next rule follows the position to where
+*            the reset text-argument would be sorted.
+* </UL>
+* </p>
+* <p>
+* This sounds more complicated than it is in practice. For example, the
+* following are equivalent ways of expressing the same thing:
+* <blockquote>
+* <pre>
+* a &lt; b &lt; c
+* a &lt; b &amp; b &lt; c
+* a &lt; c &amp; a &lt; b
+* </pre>
+* </blockquote>
+* Notice that the order is important, as the subsequent item goes immediately
+* after the text-argument. The following are not equivalent:
+* <blockquote>
+* <pre>
+* a &lt; b &amp; a &lt; c
+* a &lt; c &amp; a &lt; b
+* </pre>
+* </blockquote>
+* Either the text-argument must already be present in the sequence, or some
+* initial substring of the text-argument must be present. 
+* (e.g. "a &lt; b &amp; ae &lt; e" is valid since "a" is present in the 
+* sequence before "ae" is reset). In this latter case, "ae" is not entered and 
+* treated as a single character; instead, "e" is sorted as if it were expanded 
+* to two characters: "a" followed by an "e". This difference appears in 
+* natural languages: in traditional Spanish "ch" is treated as though it 
+* contracts to a single character (expressed as "c &lt; ch &lt; d"), while in 
+* traditional German a-umlaut is treated as though it expanded to two 
+* characters (expressed as 
+* "a,A &lt; b,B ... &amp;ae;&#92;u00e3&amp;AE;&#92;u00c3").
+* [&#92;u00e3 and &#92;u00c3 are, of course, the escape sequences for 
+* a-umlaut.]
+* </p>
+* <p>
+* <strong>Ignorable Characters</strong>
+* <p>
+* For ignorable characters, the first rule must start with a relation (the
+* examples we have used above are really fragments; "a &lt; b" really should 
+* be "&lt; a &lt; b"). If, however, the first relation is not "&lt;", then all 
+* the all text-arguments up to the first "&lt;" are ignorable. For example, 
+* ", - &lt; a &lt; b" makes "-" an ignorable character, as we saw earlier in 
+* the word "black-birds". In the samples for different languages, you see that 
+* most accents are ignorable.</p>
+* <p><strong>Normalization and Accents</strong>
+* <p><code>RuleBasedCollator</code> automatically processes its rule table to
+* include both pre-composed and combining-character versions of accented 
+* characters. Even if the provided rule string contains only base characters 
+* and separate combining accent characters, the pre-composed accented 
+* characters matching all canonical combinations of characters from the rule 
+* string will be entered in the table.</p>
+* <p>This allows you to use a RuleBasedCollator to compare accented strings
+* even when the collator is set to NO_DECOMPOSITION. There are two caveats,
+* however. First, if the strings to be collated contain combining sequences 
+* that may not be in canonical order, you should set the collator to 
+* CANONICAL_DECOMPOSITION or FULL_DECOMPOSITION to enable sorting of combining 
+* sequences. Second, if the strings contain characters with compatibility 
+* decompositions (such as full-width and half-width forms), you must use 
+* FULL_DECOMPOSITION, since the rule tables only include canonical mappings.
+* </p>
+* <p><strong>Errors</strong></p>
+* <p>The following are errors:</p>
+* <UL Type=disc>
+*     <LI>A text-argument contains unquoted punctuation symbols
+*         (e.g. "a &lt; b-c &lt; d").
+*     <LI>A relation or reset character not followed by a text-argument
+*         (e.g. "a &lt; ,b").
+*     <LI>A reset where the text-argument (or an initial substring of the
+*         text-argument) is not already in the sequence.
+*         (e.g. "a &lt; b &amp; e &lt; f")
+* </UL>
+* <p>If you produce one of these errors, a <code>RuleBasedCollator</code> 
+* throws a <code>ParseException</code>.</p>
+* <p><strong>Examples</strong></p>
+* <p>Simple:     "&lt; a &lt; b &lt; c &lt; d"</p>
+* <p>Norwegian:  "&lt; a,A&lt; b,B&lt; c,C&lt; d,D&lt; e,E&lt; f,F&lt; " +
+*                "g,G&lt; h,H&lt; i,I&lt; j,J&lt; k,K&lt; l,L&lt; m,M&lt; " +
+*                "n,N&lt; o,O&lt; p,P&lt; q,Q&lt; r,R&lt; s,S&lt; t,T&lt; " +
+*                "u,U&lt; v,V&lt; w,W&lt; x,X&lt; y,Y&lt; z,Z&lt; " +
+*                "&#92;u00E5=a&#92; u030A,&#92;u00C5=A&#92;u030A;aa,AA&lt; " +
+*                "&#92;u00E6,&#92; u00C6&lt; &#92;u00F8,&#92;u00D8"</p>
+* <p>Normally, to create a rule-based Collator object, you will use
+* <code>Collator</code>'s factory method <code>getInstance</code>. However, to 
+* create a rule-based Collator object with specialized rules tailored to your 
+* needs, you construct the <code>RuleBasedCollator</code> with the rules 
+* contained in a <code>String</code> object. For example:</p>
+* <blockquote>
+* <pre>
+* String Simple = "&lt; a&lt; b&lt; c&lt; d";
+* RuleBasedCollator mySimple = new RuleBasedCollator(Simple);
+* </pre>
+* </blockquote>
+* Or:
+* <blockquote>
+* <pre>
+* String Norwegian = "&lt; a,A&lt; b,B&lt; c,C&lt; d,D&lt; e,E&lt; f,F&lt;" +  
+*                    "g,G&lt; h,H&lt; i,I&lt; j,J &lt; k,K&lt; l,L&lt; " +
+*                    "m,M&lt; n,N&lt; o,O&lt; p,P&lt; q,Q&lt; r,R&lt; " +
+*                    "s,S&lt; t,T &lt; u,U&lt; v,V&lt; w,W&lt; x,X&lt; " +
+*                    "y,Y&lt; z,Z &lt; &#92;u00E5=a&#92;u030A," +
+*                    "&#92;u00C5=A&#92;u030A;aa,AA&lt; &#92;u00E6," +
+*                    "&#92;u00C6&lt; &#92;u00F8,&#92;u00D8";
+* RuleBasedCollator myNorwegian = new RuleBasedCollator(Norwegian);
+* </pre>
+* </blockquote>
+* <p>Combining <code>Collator</code>s is as simple as concatenating strings.
+* Here's an example that combines two <code>Collator</code>s from two
+* different locales:</p>
+* <blockquote>
+* <pre>
+* // Create an en_US Collator object
+* RuleBasedCollator en_USCollator = (RuleBasedCollator)
+*     Collator.getInstance(new Locale("en", "US", ""));
+* // Create a da_DK Collator object
+* RuleBasedCollator da_DKCollator = (RuleBasedCollator)
+*     Collator.getInstance(new Locale("da", "DK", ""));
+* // Combine the two
+* // First, get the collation rules from en_USCollator
+* String en_USRules = en_USCollator.getRules();
+* // Second, get the collation rules from da_DKCollator
+* String da_DKRules = da_DKCollator.getRules();
+* RuleBasedCollator newCollator =
+*     new RuleBasedCollator(en_USRules + da_DKRules);
+* // newCollator has the combined rules
+* </pre>
+* </blockquote>
+* <p>Another more interesting example would be to make changes on an existing
+* table to create a new <code>Collator</code> object. For example, add
+* "&amp;C&lt; ch, cH, Ch, CH" to the <code>en_USCollator</code> object to 
+* create your own:</p>
+* <blockquote>
+* <pre>
+* // Create a new Collator object with additional rules
+* String addRules = "&amp;C&lt; ch, cH, Ch, CH";
+* RuleBasedCollator myCollator =
+*     new RuleBasedCollator(en_USCollator + addRules);
+* // myCollator contains the new rules
+* </pre>
+* </blockquote>
+* <p>The following example demonstrates how to change the order of
+* non-spacing accents,
+* <blockquote>
+* <pre>
+* // old rule
+* String oldRules = 
+*     "=&#92;u0301;&#92;u0300;&#92;u0302;&#92;u0308"    // main accents
+*     + ";&#92;u0327;&#92;u0303;&#92;u0304;&#92;u0305"    // main accents
+*     + ";&#92;u0306;&#92;u0307;&#92;u0309;&#92;u030A"    // main accents
+*     + ";&#92;u030B;&#92;u030C;&#92;u030D;&#92;u030E"    // main accents
+*     + ";&#92;u030F;&#92;u0310;&#92;u0311;&#92;u0312"    // main accents
+*     + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6"
+*     + "&lt; b , B &lt; c, C &lt; e, E & C &lt; d, D";
+* // change the order of accent characters
+* String addOn = "& &#92;u0300 ; &#92;u0308 ; &#92;u0302";
+* RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
+* </pre>
+* </blockquote>
+* <p>The last example shows how to put new primary ordering in before the
+* default setting. For example, in Japanese <code>Collator</code>, you
+* can either sort English characters before or after Japanese characters,
+* <blockquote>
+* <pre>
+* // get en_US Collator rules
+* RuleBasedCollator en_USCollator = (RuleBasedCollator)
+*                                             Collator.getInstance(Locale.US);
+* // add a few Japanese character to sort before English characters
+* // suppose the last character before the first base letter 'a' in
+* // the English collation rule is &#92;u2212
+* String jaString = "& &#92;u2212 &lt; &#92;u3041, &#92;u3042 &lt; &#92;u3043, &#92;u3044";
+* RuleBasedCollator myJapaneseCollator = new
+*     RuleBasedCollator(en_USCollator.getRules() + jaString);
+* </pre>
+* @author Syn Wee Quek
+* @since release 2.2, April 18 2002
+* @draft 2.2
+*/
+public class RuleBasedCollator extends Collator implements Trie.DataManipulate
+{     
+	// public data members ---------------------------------------------------
+	
+	// public constructors ---------------------------------------------------
+	
+	/**
+     * <p>RuleBasedCollator constructor that takes the rules. 
+     * Please see RuleBasedCollator class description for more details on the 
+     * collation rule syntax.</p>
+     * <p>Note different from Java, does not throw a ParseException</p>
+     * @see java.util.Locale
+     * @param rules the collation rules to build the collation table from.
+     * @exception Exception thrown when there's an error creating the collator
+     * @draft 2.2
+     */
+    public RuleBasedCollator(String rules) throws Exception
+    {
+    	setStrength(Collator.TERTIARY);
+        setDecomposition(Collator.CANONICAL_DECOMPOSITION);
+        m_rules_ = rules;
+        // tables = new RBCollationTables(rules, decomp);
+        // init();
+    }
+    
+	// public methods --------------------------------------------------------
+    
+    /**
+     * Return a CollationElementIterator for the given String.
+     * @see CollationElementIterator
+     * @draft 2.2
+     */
+    public CollationElementIterator getCollationElementIterator(String source) {
+        return new CollationElementIterator(source, this);
+    }
+
+    /**
+     * Return a CollationElementIterator for the given String.
+     * @see CollationElementIterator
+     * @draft 2.2
+     */
+    public CollationElementIterator getCollationElementIterator(
+                                                CharacterIterator source) {
+        return new CollationElementIterator(source, this);
+    }
+    
+    // public setters --------------------------------------------------------
+    
+    /**
+	 * Sets the Hiragana Quartenary sort to be on or off
+	 * @param flag true if Hiragana Quartenary sort is to be on, false 
+	 *        otherwise
+	 * @draft 2.2
+	 */
+	public synchronized void setHiraganaQuartenary(boolean flag)
+	{
+		m_isHiragana4_ = flag;
+	}
+	
+	/**
+	 * Sets the Hiragana Quartenary sort to be on or off depending on the 
+	 * Collator's locale specific default value.
+	 * @draft 2.2
+	 */
+	public synchronized void setHiraganaQuartenaryDefault()
+	{
+		m_isHiragana4_ = m_defaultIsHiragana4_;
+	}
+	
+	/**
+   	 * Sets the Collator to sort with the indicated casing first
+   	 * @param upper true for sorting uppercased characters before lowercased 
+   	 *              characters, false for sorting lowercased characters before
+   	 *              uppercased characters 
+   	 * @draft 2.2
+   	 */
+   	public synchronized void setCaseFirst(boolean upper)
+   	{
+   		if (upper) {
+   			m_caseFirst_ = AttributeValue.UPPER_FIRST_;
+   		}
+   		else {
+   			m_caseFirst_ = AttributeValue.LOWER_FIRST_;
+   		}
+   		updateInternalState();
+   	}
+   	
+   	/**
+   	 * Sets the Collator to ignore any previous setCaseFirst(boolean) calls.
+   	 * Ignores case preferences.
+   	 * @draft 2.2
+   	 */
+   	public synchronized void setCaseFirstOff()
+   	{
+   		m_caseFirst_ = AttributeValue.OFF_;
+   		updateInternalState();
+   	}
+   	
+   	/**
+   	 * Sets the case sorting preferences to the Collator's locale specific 
+   	 * default value.
+   	 * @see #setCaseFirst(boolean)
+   	 * @see #setCaseFirstOff
+   	 * @draft 2.2
+   	 */
+   	public synchronized final void setCaseFirstDefault()
+   	{
+   		m_caseFirst_ = m_defaultCaseFirst_;
+   		updateInternalState();
+   	}
+   
+   /**
+     * Sets the alternate handling value for quartenary strength to the 
+     * Collator's locale specific default value. 
+     * @see #setAlternateHandling
+     * @draft 2.2
+     */
+    public synchronized void setAlternateHandlingDefault()
+    {
+    	m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_;
+    }
+    
+    /**
+     * Sets case level sorting to the Collator's locale specific default value.
+     * @see #setCaseLevel
+     * @draft 2.2
+     */
+    public synchronized void setCaseLevelDefault()
+    {
+    	m_isCaseLevel_ = m_defaultIsCaseLevel_;
+    	updateInternalState();
+    }
+    
+    /**
+     * Set the decomposition mode to the Collator's locale specific default 
+     * value. 
+     * @see #getDecomposition
+     * @draft 2.2
+     */
+    public synchronized void setDecompositionDefault()
+    {
+    	m_decomposition_ = m_defaultDecomposition_;
+    }
+    
+    /**
+     * Sets French collation to the Collator's locale specific default value.
+     * @see #getFrenchCollation
+     * @draft 2.2
+     */
+    public synchronized void setFrenchCollationDefault()
+    {
+    	m_isFrenchCollation_ = m_defaultIsFrenchCollation_;
+    	updateInternalState();
+    }
+    
+    /**
+     * <p>Sets strength to the Collator's locale specific default value.</p>
+     * @see #setStrength
+     * @draft 2.2
+     */
+    public synchronized void setStrengthDefault()
+    {
+    	m_strength_ = m_defaultStrength_;
+    	updateInternalState();
+    }
+    
+    /**
+     * Sets the French collation
+     * @param flag true to set the French collation on, false to set it off
+     * @draft 2.2
+     */
+    public synchronized void setFrenchCollation(boolean flag) 
+    {
+    	m_isFrenchCollation_ = flag;
+    	updateInternalState();
+    }
+    
+    /**
+     * Sets the alternate handling for quartenary strength to be either 
+     * shifted or non-ignorable. This attribute will only be effective with
+     * a quartenary strength sort.
+     * @param shifted true if shifted for alternate handling is desired, false 
+     *        for the non-ignorable.
+     * @draft 2.2
+     */
+    public synchronized void setAlternateHandling(boolean shifted)
+    {
+    	m_isAlternateHandlingShifted_ = shifted;
+    	updateInternalState();
+    }
+    
+    /**
+     * Sets if case level sorting is required.
+     * @param flag true if case level sorting is required, false otherwise
+     * @draft 2.2
+     */
+    public synchronized void setCaseLevel(boolean flag) 
+    {
+    	m_isCaseLevel_ = flag;
+    	updateInternalState();
+    }
+
+
+    // public getters --------------------------------------------------------
+    
+    /**
+     * Internal method called to parse a lead surrogate's ce for the offset
+     * to the next trail surrogate data.
+     * @param ce collation element of the lead surrogate
+     * @return data offset or 0 for the next trail surrogate
+     * @draft 2.2
+     */
+    public int getFoldingOffset(int ce)
+    {
+    	if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) {
+    		return (ce & 0xFFFFFF);
+    	}
+    	return 0;
+    }
+    	
+	/**
+     * Gets the collation rules for this RuleBasedCollator.     * @return returns the collation rules
+     * @draft 2.2
+     */
+    public final String getRules()
+    {
+    	return m_rules_;
+    }
+
+	/**
+     * <p>Transforms the String into a series of bits that can be compared 
+     * bitwise to other CollationKeys. CollationKeys provide better 
+     * performance than Collator.compare() when Strings are involved in 
+     * multiple comparisons.</p> 
+     * <p>Internally CollationKey stores its data in a null-terminated byte
+     * array.</p>
+     * <p>See the Collator class description for an example using 
+     * CollationKeys.</p>
+     * @param source the string to be transformed into a collation key.
+     * @return the CollationKey for the given String based on this Collator's 
+     *         collation rules. If the source String is null, a null 
+     *         CollationKey is returned.
+     * @see CollationKey
+     * @see compare(String, String)
+     * @draft 2.2
+     */
+    public CollationKey getCollationKey(String source)
+    {
+    	boolean compare[] = {m_isCaseLevel_,
+    						 true,
+    						 m_strength_ >= SECONDARY,
+    						 m_strength_ >= TERTIARY,
+    						 m_strength_ >= QUATERNARY,
+							 m_strength_ == IDENTICAL
+							};
+
+		byte bytes[][] = {new byte[SORT_BUFFER_INIT_SIZE_CASE_], // case
+    					new byte[SORT_BUFFER_INIT_SIZE_1_], // primary 
+						new byte[SORT_BUFFER_INIT_SIZE_2_], // secondary
+						new byte[SORT_BUFFER_INIT_SIZE_3_],	// tertiary	
+						new byte[SORT_BUFFER_INIT_SIZE_4_]	// quartenary
+    	};
+    	int bytescount[] = {0, 0, 0, 0, 0};
+    	int count[] = {0, 0, 0, 0, 0};
+    	boolean doFrench = m_isFrenchCollation_ && compare[2];
+    	// TODO: UCOL_COMMON_BOT4 should be a function of qShifted. 
+	    // If we have no qShifted, we don't need to set UCOL_COMMON_BOT4 so 
+	    // high.
+   		int commonBottom4 = ((m_variableTopValue_ >> 8) & LAST_BYTE_MASK_) + 1;
+    	byte hiragana4 = 0;
+    	if (m_isHiragana4_ && compare[4]) {
+    		// allocate one more space for hiragana, value for hiragana
+      		hiragana4 = (byte)commonBottom4;
+      		commonBottom4 ++;
+    	}
+    	
+    	int bottomCount4 = 0xFF - commonBottom4;
+    	// If we need to normalize, we'll do it all at once at the beginning!
+    	if ((compare[5] || m_decomposition_ != NO_DECOMPOSITION)
+    		/*&& UNORM_YES != unorm_quickCheck(source, len, normMode, status)*/
+    		) {
+        	/*
+        	 * len = unorm_internalNormalize(normSource, normSourceLen,
+                                      source, len,
+                                      normMode, FALSE,
+                                      status);
+        	source = normSource;*/
+        	String norm = source;
+        	getSortKeyBytes(norm, compare, bytes, bytescount, count, 
+        					doFrench, hiragana4, commonBottom4, bottomCount4);
+    	}
+		else {
+			getSortKeyBytes(source, compare, bytes, bytescount, count, doFrench,
+						hiragana4, commonBottom4, bottomCount4);
+		}
+		byte sortkey[] = getSortKey(source, compare, bytes, bytescount, count, 
+									doFrench, commonBottom4, bottomCount4);
+		return new CollationKey(source, sortkey);
+    }
+    		    
+    /**
+	 * Checks if uppercase is sorted before lowercase
+	 * @return true if Collator sorts uppercase before lower, false otherwise
+	 * @draft 2.2
+	 */
+	public boolean isUpperCaseFirst()
+	{
+		return (m_caseFirst_ == AttributeValue.UPPER_FIRST_);
+	}
+	
+	/**
+	 * Checks if lowercase is sorted before uppercase
+	 * @return true if Collator sorts lowercase before upper, false otherwise
+	 * @draft 2.2
+	 */
+	public boolean isLowerCaseFirst()
+	{
+		return (m_caseFirst_ == AttributeValue.LOWER_FIRST_);
+	}
+	
+	/**
+	 * Checks if case sorting is off.
+	 * @return true if case sorting is off, false otherwise
+	 * @draft 2.2
+	 */
+	public boolean isCaseFirstOff()
+	{
+		return (m_caseFirst_ == AttributeValue.OFF_);
+	}
+	
+	/**
+	 * Checks if the alternate handling attribute is shifted or non-ignorable.
+	 * <ul>
+	 * <li>If argument shifted is true and
+	 *     <ul>
+	 *     <li>return value is true, then the alternate handling attribute for 
+	 *         the Collator is shifted. Or
+	 *     <li>return value is false, then the alternate handling attribute for
+	 *         the Collator is not shifted
+	 *     </ul>
+	 * <li> If argument shifted is false and 
+	 *     <ul>
+	 *     <li>return value is true, then the alternate handling attribute for 
+	 *         the Collator is non-ignorable. Or
+	 *     <li>return value is false, then the alternate handling attribute for
+	 *         the Collator is not non-ignorable.
+	 *     </ul>
+	 * </ul>
+	 * @param shifted true if checks are to be done on shifted, false if 
+	 *        checks are to be done on non-ignorable
+	 * @return true or false 
+	 * @draft 2.2
+	 */
+	public boolean isAlternateHandling(boolean shifted)
+	{
+		if (shifted) {
+			return m_isAlternateHandlingShifted_;
+		}
+		return !m_isAlternateHandlingShifted_;
+	}
+	
+	/**
+	 * Checks if case level sorting is on
+	 * @return true if case level sorting is on
+	 * @draft 2.2
+	 */
+	public boolean isCaseLevel()
+	{
+		return m_isCaseLevel_;
+	}
+	
+	/**
+	 * Checks if French Collation sorting is on
+	 * @return true if French Collation sorting is on
+	 * @draft 2.2
+	 */
+	public boolean isFrenchCollation()
+	{
+		return m_isFrenchCollation_;
+	}
+		
+	// public other methods -------------------------------------------------
+
+    /**
+     * Compares the equality of two RuleBasedCollators.
+     * @param obj the RuleBasedCollator to be compared with.
+     * @return true if this RuleBasedCollator has exactly the same behaviour 
+     *         as obj, false otherwise.
+     * @draft 2.2
+     */
+    public boolean equals(Object obj) {
+        if (obj == null || !super.equals(obj)) {
+        	return false;  // super does class check
+        }
+        RuleBasedCollator other = (RuleBasedCollator)obj;
+        // all other non-transient information is also contained in rules.
+        return (m_rules_.equals(other.m_rules_));
+    }
+    
+    /**
+     * Standard override; no change in semantics.
+     * @draft 2.2
+     */
+    public Object clone() {
+    	// synwee todo: do after all implementation done
+        return null;
+    }
+    
+	/**
+     * Generates the hash code for this RuleBasedCollator.
+     * @return the unique hash code for this Collator
+     * @draft 2.2
+     */
+    public final int hashCode() 
+    {
+    	return getRules().hashCode();
+    }
+    
+    /**
+     * <p>Compares the source string to the target string according to the
+     * collation rules for this Collator. Returns an integer less than, equal 
+     * to or greater than zero depending on whether the source String is less 
+     * than, equal to or greater than the target string. See the Collator
+     * class description for an example of use.</p>
+     * <p>For a one time comparison, this method has the best performance. If 
+     * a given String will be involved in multiple comparisons, 
+     * CollationKey.compareTo() has the best performance. See the Collator
+     * class description for an example using CollationKeys.</p>
+     * @param source the source string.
+     * @param target the target string.
+     * @return Returns an integer value. Value is less than zero if source is 
+     *         less than target, value is zero if source and target are equal, 
+     *         value is greater than zero if source is greater than target.
+     * @see CollationKey
+     * @see Collator#getCollationKey
+     * @draft 2.2
+     */
+    public final int compare(String source, String target)
+    {
+    	if (source == target) {
+	        return 0;
+	    }
+	
+		// Find the length of any leading portion that is equal
+		int offset = getFirstUnmatchedOffset(source, target);
+		if (source.charAt(offset) == 0) {
+			if (target.charAt(offset) == 0) {
+	        	return 0;
+			}
+			return 1;
+	    }
+	    else if (target.charAt(offset) == 0) {
+	    	return -1;
+	    }
+
+		// setting up the collator parameters	
+		boolean compare[] = {m_isCaseLevel_,
+    						 true,
+    						 m_strength_ >= SECONDARY,
+    						 m_strength_ >= TERTIARY,
+    						 m_strength_ >= QUATERNARY,
+							 m_strength_ == IDENTICAL
+							};
+		boolean doFrench = m_isFrenchCollation_ && compare[2];
+    	boolean doShift4 = m_isAlternateHandlingShifted_ && compare[4];
+	    boolean doHiragana4 = m_isHiragana4_ && compare[4];
+	
+	    if (doHiragana4 && doShift4) {
+	    	String sourcesub = source.substring(offset);
+	    	String targetsub = target.substring(offset);
+	      	return compareBySortKeys(sourcesub, targetsub);
+	    }
+	    
+	    // Preparing the CE buffers. will be filled during the primary phase
+		int cebuffer[][] = {new int[CE_BUFFER_SIZE_], new int[CE_BUFFER_SIZE_]};
+		int cebuffersize[] = {0, 0};
+		// This is the lowest primary value that will not be ignored if shifted
+	    int lowestpvalue = m_isAlternateHandlingShifted_ 
+	    									? m_variableTopValue_ << 16 : 0;
+		int result = doPrimaryCompare(doHiragana4, lowestpvalue, source, 
+										target, offset, cebuffer, cebuffersize);
+		if (cebuffer[0] == null && cebuffer[1] == null) {
+			// since the cebuffer is cleared when we have determined that
+			// either source is greater than target or vice versa, the return
+			// result is the comparison result and not the hiragana result
+			return result;
+		} 
+		
+		int hiraganaresult = result;
+		
+		if (compare[2]) {
+			result = doSecondaryCompare(cebuffer, cebuffersize, doFrench);
+			if (result != 0) {
+				return result;
+			}
+		}
+		// doing the case bit
+	    if (compare[0]) {
+	    	result = doCaseCompare(cebuffer);
+			if (result != 0) {
+				return result;
+			}	
+	    }
+		// Tertiary level
+	    if (compare[3]) {
+	      	result = doTertiaryCompare(cebuffer);
+	      	if (result != 0) {
+	      		return result;
+	      	}
+	    }
+	
+		if (compare[4]) {  // checkQuad
+	      	result = doQuaternaryCompare(cebuffer, lowestpvalue);
+	      	if (result != 0) {
+	      		return result;
+	      	}
+	    } 
+	    else if (doHiragana4 && hiraganaresult != 0) {
+	      	// If we're fine on quaternaries, we might be different
+	      	// on Hiragana. This, however, might fail us in shifted.
+	      	return hiraganaresult;
+	    }
+	
+	    // For IDENTICAL comparisons, we use a bitwise character comparison 
+	    // as a tiebreaker if all else is equal.                                
+	    // Getting here  should be quite rare - strings are not identical -     
+	    // that is checked first, but compared == through all other checks.  
+	    if (compare[5]) {
+	        return doIdenticalCompare(source, target, offset, true);
+	    }
+	    return 0;
+    }
+        
+    // public abstract methods -----------------------------------------------
+
+	// protected inner interfaces --------------------------------------------
+    
+    /**
+	 * Attribute values to be used when setting the Collator options
+	 */	
+	protected static interface AttributeValue
+	{
+		/**
+		 * Indicates that the default attribute value will be used. 
+		 * See individual attribute for details on its default value. 
+		 */
+		static final int DEFAULT_ = -1;
+		/** 
+		 * Primary collation strength 
+		 */
+  		static final int PRIMARY_ = 0;
+  		/** 
+  		 * Secondary collation strength 
+  		 */
+  		static final int SECONDARY_ = 1;
+  		/** 
+  		 * Tertiary collation strength 
+  		 */
+  		static final int TERTIARY_ = 2;
+  		/** 
+  		 * Default collation strength 
+  		 */
+  		static final int DEFAULT_STRENGTH_ = TERTIARY;
+  		/**
+  		 * Internal use for strength checks in Collation elements
+  		 */
+  		static final int CE_STRENGTH_LIMIT_ = TERTIARY + 1;
+  		/** 
+  		 * Quaternary collation strength 
+  		 */
+  		static final int QUATERNARY_ = 3;
+  		/** 
+  		 * Identical collation strength 
+  		 */
+  		static final int IDENTICAL_ = 15;
+  		/**
+  		 * Internal use for strength checks
+  		 */
+  		static final int STRENGTH_LIMIT_ = IDENTICAL + 1;
+  		/** 
+  		 * Turn the feature off - works for FRENCH_COLLATION, CASE_LEVEL, 
+  		 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
+  		 */
+  		static final int OFF_ = 16;
+  		/** 
+  		 * Turn the feature on - works for FRENCH_COLLATION, CASE_LEVEL, 
+  		 * HIRAGANA_QUATERNARY_MODE and DECOMPOSITION_MODE
+  		 */
+  		static final int ON_ = 17;
+		/** 
+		 * Valid for ALTERNATE_HANDLING. Alternate handling will be shifted 
+		 */
+  		static final int SHIFTED_ = 20;
+  		/** 
+  		 * Valid for ALTERNATE_HANDLING. Alternate handling will be non 
+  		 * ignorable 
+  		 */
+  		static final int NON_IGNORABLE_ = 21;
+  		/** 
+  		 * Valid for CASE_FIRST - lower case sorts before upper case 
+  		 */
+  		static final int LOWER_FIRST_ = 24;
+  		/** 
+  		 * Upper case sorts before lower case 
+  		 */
+  		static final int UPPER_FIRST_ = 25;
+  		/** 
+  		 * Valid for NORMALIZATION_MODE ON and OFF are also allowed for this 
+  		 * attribute 
+  		 */
+  		static final int ON_WITHOUT_HANGUL_ = 28;
+  		/**
+  		 * Number of attribute values
+  		 */
+  	    static final int LIMIT_ = 29;
+	}
+    
+    /** 
+     * Attributes that collation service understands. All the attributes can 
+     * take DEFAULT value, as well as the values specific to each one. 
+     */
+	protected static interface Attribute {
+     	/** 
+     	 * Attribute for direction of secondary weights - used in French.
+     	 * Acceptable values are ON, which results in secondary weights being 
+     	 * considered backwards and OFF which treats secondary weights in the 
+     	 * order they appear.
+     	 */
+     	static final int FRENCH_COLLATION_ = 0; 
+     	/** 
+     	 * Attribute for handling variable elements. Acceptable values are 
+     	 * NON_IGNORABLE (default) which treats all the codepoints with 
+     	 * non-ignorable primary weights in the same way, and SHIFTED which 
+     	 * causes codepoints with primary weights that are equal or below the 
+     	 * variable top value to be ignored on primary level and moved to the 
+     	 * quaternary level.
+     	 */
+     	static final int ALTERNATE_HANDLING_ = 1;
+     	/** 
+     	 * Controls the ordering of upper and lower case letters. Acceptable 
+     	 * values are OFF (default), which orders upper and lower case letters 
+     	 * in accordance to their tertiary weights, UPPER_FIRST which forces 
+     	 * upper case letters to sort before lower case letters, and 
+     	 * LOWER_FIRST which does the opposite. 
+     	 */
+     	static final int CASE_FIRST_ = 2;
+     	/** 
+     	 * Controls whether an extra case level (positioned before the third 
+     	 * level) is generated or not. Acceptable values are OFF (default),
+     	 * when case level is not generated, and ON which causes the case
+     	 * level to be generated. Contents of the case level are affected by
+     	 * the value of CASE_FIRST attribute. A simple way to ignore accent 
+     	 * differences in a string is to set the strength to PRIMARY and 
+     	 * enable case level. 
+     	 */
+     	static final int CASE_LEVEL_ = 3;
+     	/** 
+     	 * Controls whether the normalization check and necessary 
+     	 * normalizations are performed. When set to OFF (default) no 
+     	 * normalization check is performed. The correctness of the result is 
+     	 * guaranteed only if the input data is in so-called FCD form (see 
+     	 * users manual for more info). When set to ON, an incremental check 
+     	 * is performed to see whether the input data is in the FCD form. If 
+     	 * the data is not in the FCD form, incremental NFD normalization is 
+     	 * performed. 
+     	 */
+     	static final int NORMALIZATION_MODE_ = 4; 
+     	/** 
+     	 * The strength attribute. Can be either PRIMARY, SECONDARY, TERTIARY, 
+     	 * QUATERNARY or IDENTICAL. The usual strength for most locales 
+     	 * (except Japanese) is tertiary. Quaternary strength is useful when 
+     	 * combined with shifted setting for alternate handling attribute and 
+     	 * for JIS x 4061 collation, when it is used to distinguish between 
+     	 * Katakana  and Hiragana (this is achieved by setting the 
+     	 * HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level is 
+     	 * affected only by the number of non ignorable code points in the 
+     	 * string. Identical strength is rarely useful, as it amounts to 
+     	 * codepoints of the NFD form of the string. 
+     	 */
+     	static final int STRENGTH_ = 5;
+     	/** 
+     	 * When turned on, this attribute positions Hiragana before all  
+     	 * non-ignorables on quaternary level. This is a sneaky way to produce 
+     	 * JIS sort order. 
+     	 */     
+     	static final int HIRAGANA_QUATERNARY_MODE_ = 6;
+     	/**
+     	 * Attribute count
+     	 */
+     	static final int LIMIT_ = 7;
+	} 
+	
+    // protected data members ------------------------------------------------
+
+	/**
+	 * Size of collator raw data headers and options before the expansion
+	 * data. This is used when expansion ces are to be retrieved. ICU4C uses
+	 * the expansion offset starting from UCollator.UColHeader, hence ICU4J
+	 * will have to minus that off to get the right expansion ce offset. In
+	 * number of ints.
+	 */
+	protected int m_expansionOffset_;
+	/**
+	 * Size of collator raw data headers, options and expansions before
+	 * contraction data. This is used when contraction ces are to be retrieved. 
+	 * ICU4C uses contraction offset starting from UCollator.UColHeader, hence
+	 * ICU4J will have to minus that off to get the right contraction ce 
+	 * offset. In number of chars.
+	 */
+	protected int m_contractionOffset_;
+    /**
+     * Flag indicator if Jamo is special
+     */
+    protected boolean m_isJamoSpecial_;
+ 
+ 	// Collator options ------------------------------------------------------   
+ 	protected int m_defaultVariableTopValue_;
+	protected boolean m_defaultIsFrenchCollation_;
+	protected boolean m_defaultIsAlternateHandlingShifted_; 
+    protected int m_defaultCaseFirst_;
+    protected boolean m_defaultIsCaseLevel_;
+    protected int m_defaultDecomposition_;
+    protected int m_defaultStrength_;
+    protected boolean m_defaultIsHiragana4_;
+ 	/**
+ 	 * Value of the variable top
+ 	 */
+    protected int m_variableTopValue_;
+    /** 
+     * Attribute for special Hiragana 
+     */
+    protected boolean m_isHiragana4_;         
+	/**
+     * Case sorting customization
+     */
+    protected int m_caseFirst_;
+    
+    // end Collator options --------------------------------------------------
+    
+    /**
+     * Expansion table
+     */
+    protected int m_expansion_[];
+    /**
+     * Contraction index table
+     */
+    protected char m_contractionIndex_[];
+    /**
+     * Contraction CE table
+     */
+    protected int m_contractionCE_[];
+    /**
+     * Data trie
+     */
+    protected IntTrie m_trie_;
+    /**
+     * Table to store all collation elements that are the last element of an
+     * expansion. This is for use in StringSearch.
+     */
+    protected int m_expansionEndCE_[];
+    /**
+     * Table to store the maximum size of any expansions that end with the 
+     * corresponding collation element in m_expansionEndCE_. For use in
+     * StringSearch too
+     */
+    protected byte m_expansionEndCEMaxSize_[];
+    /**
+     * Heuristic table to store information on whether a char character is 
+     * considered "unsafe". "Unsafe" character are combining marks or those 
+     * belonging to some contraction sequence from the offset 1 onwards. 
+     * E.g. if "ABC" is the only contraction, then 'B' and 'C' are considered 
+     * unsafe. If we have another contraction "ZA" with the one above, then 
+     * 'A', 'B', 'C' are "unsafe" but 'Z' is not. 
+     */
+    protected byte m_unsafe_[];
+    /**
+     * Table to store information on whether a codepoint can occur as the last
+     * character in a contraction
+     */
+    protected byte m_contractionEnd_[];
+    /**
+     * Table for UCA use, may be removed
+     */
+    protected char m_UCAContraction_[];
+	/**
+	 * Original collation rules
+	 */
+	protected String m_rules_;
+	/**
+     * The smallest "unsafe" codepoint
+     */
+    protected char m_minUnsafe_;
+    /**
+	 * The smallest codepoint that could be the end of a contraction
+	 */
+	protected char m_minContractionEnd_;
+	
+	/**
+     * UnicodeData.txt property object
+     */
+    protected static final RuleBasedCollator UCA_;  
+    
+    // block to initialise character property database
+    static
+    {
+        try
+        {
+        	UCA_ = new RuleBasedCollator();
+        	InputStream i = UCA_.getClass().getResourceAsStream(
+                                        "/com/ibm/icu/impl/data/ucadata.dat");
+        	
+       		BufferedInputStream b = new BufferedInputStream(i, 90000);
+        	CollatorReader reader = new CollatorReader(b);
+        	reader.read(UCA_);
+        	b.close();
+        	i.close();
+        	ResourceBundle rb = 
+        	                  ICULocaleData.getLocaleElements(Locale.ENGLISH);
+        	UCA_.m_rules_ = rb.getString("%%UCARULES");
+        	UCA_.init();
+        }
+        catch (Exception e)
+        {
+        	e.printStackTrace();
+            throw new RuntimeException(e.getMessage());
+        }
+    }    
+    
+    // protected constants ---------------------------------------------------
+    
+    protected static final int CE_SPECIAL_FLAG_ = 0xF0000000;
+    /** 
+     * Lead surrogate that is tailored and doesn't start a contraction 
+     */
+    protected static final int CE_SURROGATE_TAG_ = 5;  
+  
+  	/**
+	 * Minimum size required for the binary collation data in bytes.
+	 * Size of UCA header + size of options to 4 bytes
+	 */
+	private static final int MIN_BINARY_DATA_SIZE_ = (41 + 8) << 2;     
+	/**
+  	 * Mask to get the primary strength of the collation element
+  	 */
+  	protected static final int CE_PRIMARY_MASK_ = 0xFFFF0000;
+  	/**
+  	 * Mask to get the secondary strength of the collation element
+  	 */
+   	protected static final int CE_SECONDARY_MASK_ = 0xFF00;
+   	/**
+  	 * Mask to get the tertiary strength of the collation element
+  	 */
+   	protected static final int CE_TERTIARY_MASK_ = 0xFF;
+   	/**
+   	 * Primary strength shift 
+   	 */
+	protected static final int CE_PRIMARY_SHIFT_ = 16;
+	/** 
+	 * Secondary strength shift 
+	 */
+	protected static final int CE_SECONDARY_SHIFT_ = 8;
+
+   	/**
+   	 * Continuation marker
+   	 */
+   	protected static final int CE_CONTINUATION_MARKER_ = 0xC0;
+    
+    // end protected constants -----------------------------------------------
+    
+    // protected constructor -------------------------------------------------
+  
+  	/**
+     * Constructors a RuleBasedCollator from the argument locale.
+     * If no resource bundle is associated with the locale, UCA is used 
+     * instead.
+     * @param locale
+     * @exception Exception thrown when there's an error creating the Collator
+     */
+    protected RuleBasedCollator(Locale locale) throws Exception
+    {
+    	ResourceBundle rb = ICULocaleData.getLocaleElements(locale);
+ 
+    	if (rb != null) {
+    		byte map[] = (byte [])rb.getObject("%%CollationBin");
+			// synwee todo: problem, data in little endian and
+			// ICUListResourceBundle should not calculate size by
+			// using .available() that only gives the buffer size
+			BufferedInputStream input = 
+						new BufferedInputStream(new ByteArrayInputStream(map));
+			CollatorReader reader = new CollatorReader(input, false);
+			if (map.length > MIN_BINARY_DATA_SIZE_) {
+				// synwee todo: undo when problem solved
+				reader.read(this);
+    		} 
+    		else {
+    			reader.readHeader(this);
+    			reader.readOptions(this);
+    			// duplicating UCA_'s data
+    			m_expansion_ = UCA_.m_expansion_;
+    			m_contractionIndex_ = UCA_.m_contractionIndex_;
+    			m_contractionCE_ = UCA_.m_contractionCE_;
+    			m_trie_ = UCA_.m_trie_;
+				m_expansionEndCE_ = UCA_.m_expansionEndCE_;
+    			m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_;
+    			m_unsafe_ = UCA_.m_unsafe_;
+    			m_contractionEnd_ = UCA_.m_contractionEnd_;
+    			m_minUnsafe_ = UCA_.m_minUnsafe_; 
+    	     	m_minContractionEnd_ = UCA_.m_minContractionEnd_;
+    	     	setStrengthDefault();
+    	     	setDecompositionDefault();
+    	     	setFrenchCollationDefault();
+    			setAlternateHandlingDefault();
+    			setCaseLevelDefault();
+    			setCaseFirstDefault();
+    			setHiraganaQuartenaryDefault();
+    			updateInternalState();
+    		}
+    		Object rules = rb.getObject("CollationElements");
+    		if (rules != null) {
+     			m_rules_ = (String)((Object[][])rules)[0][1];
+    		}
+    		init();
+    	}
+    }
+    
+  	/**
+    * <p>Protected constructor for use by subclasses. 
+    * Public access to creating Collators is handled by the API 
+    * Collator.getInstance() or RuleBasedCollator(String rules).
+    * </p>
+    * <p>
+    * This constructor constructs the UCA collator internally
+    * </p>
+    * @draft 2.2
+    */
+    protected RuleBasedCollator() throws Exception
+    {
+    }
+  	
+    // protected methods -----------------------------------------------------
+    
+    /**
+     * Initializes the RuleBasedCollator
+     */
+    protected synchronized final void init()
+    {
+    	for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; 
+    	     m_minUnsafe_ ++) {  
+    		// Find the smallest unsafe char.
+        	if (isUnsafe(m_minUnsafe_)) {
+        		break;
+        	}
+    	}
+    	
+    	for (m_minContractionEnd_ = 0; 
+    	     m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; 
+    	     m_minContractionEnd_ ++) {  
+    	    // Find the smallest contraction-ending char.
+        	if (isContractionEnd(m_minContractionEnd_)) {
+        		break;
+        	}
+    	}
+    	setStrengthDefault();
+    	setDecompositionDefault();
+    	setFrenchCollationDefault();
+    	setAlternateHandlingDefault();
+    	setCaseLevelDefault();
+    	setCaseFirstDefault();
+    	setHiraganaQuartenaryDefault();
+    	updateInternalState();
+    }
+    
+    /**
+     * Test whether a char character is potentially "unsafe" for use as a 
+     * collation starting point. "Unsafe" characters are combining marks or 
+     * those belonging to some contraction sequence from the offset 1 onwards.
+     * E.g. if "ABC" is the only contraction, then 'B' and 
+     * 'C' are considered unsafe. If we have another contraction "ZA" with 
+     * the one above, then 'A', 'B', 'C' are "unsafe" but 'Z' is not. 
+     * @param ch character to determin
+     * @return true if ch is unsafe, false otherwise
+     */
+	protected final boolean isUnsafe(char ch) 
+	{
+    	if (ch < m_minUnsafe_) {
+	        return false;
+	    }
+	
+	    if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
+	      	if (UTF16.isTrailSurrogate(ch)) {
+	            //  Trail surrogate are always considered unsafe.
+	            return true;
+	        }
+	        ch &= HEURISTIC_OVERFLOW_MASK_;
+	        ch += HEURISTIC_OVERFLOW_OFFSET_;
+	    }
+	    int value = m_unsafe_[ch >> HEURISTIC_SHIFT_];
+	    return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
+	}
+	
+	/**
+	 * Approximate determination if a char character is at a contraction end.
+	 * Guaranteed to be true if a character is at the end of a contraction,
+	 * otherwise it is not deterministic.
+	 * @param ch character to be determined
+	 */
+	protected final boolean isContractionEnd(char ch) 
+	{
+		if (UTF16.isTrailSurrogate(ch)) {
+      		return true;
+		}
+
+    	if (ch < m_minContractionEnd_) {
+        	return false;
+    	}
+
+   		if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) {
+        	ch &= HEURISTIC_OVERFLOW_MASK_;
+        	ch += HEURISTIC_OVERFLOW_OFFSET_;
+    	}
+    	int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_];
+	    return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0;
+	}
+	
+	/**
+	 * Resets the internal case data members and compression values.
+	 */
+	protected synchronized void updateInternalState() 
+	{
+      	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+        	m_caseSwitch_ = CASE_SWITCH_;
+      	} 
+      	else {
+        	m_caseSwitch_ = NO_CASE_SWITCH_;
+      	}
+
+      	if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) {
+        	m_mask3_ = CE_REMOVE_CASE_;
+        	m_common3_ = COMMON_NORMAL_3_;
+        	m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_;
+        	m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_;
+        	m_bottom3_ = COMMON_BOTTOM_3_;
+      	} 
+      	else {
+        	m_mask3_ = CE_KEEP_CASE_;
+        	m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_;
+        	if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+          		m_common3_ = COMMON_UPPER_FIRST_3_;
+          		m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_;
+          		m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_;
+        	} else {
+          		m_common3_ = COMMON_NORMAL_3_;
+          		m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_;
+          		m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_;
+        	}
+      	}
+
+      	// Set the compression values
+      	int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1;
+      	// we multilply double with int, but need only int
+      	m_topCount3_ = (int)(PROPORTION_3_ * total3); 
+      	m_bottomCount3_ = total3 - m_topCount3_;
+
+      	if (!m_isCaseLevel_ && m_strength_ == AttributeValue.TERTIARY_ 
+          	&& !m_isFrenchCollation_ && !m_isAlternateHandlingShifted_) {
+        	m_isSimple3_ = true;
+      	} 
+      	else {
+        	m_isSimple3_ = false;
+      	}
+	}
+	
+	/**
+ 	 * <p>Converts the C attribute index and values for use and stores it into 
+ 	 * the relevant default attribute variable.</p>
+ 	 * <p>Note internal use, no sanity checks done on arguments</p>
+ 	 */
+    protected void setAttributeDefault(int attribute, int value)
+    {
+    	switch (attribute) {
+    		case Attribute.FRENCH_COLLATION_:
+    			m_defaultIsFrenchCollation_ = (value == AttributeValue.ON_);
+    			break;
+    		case Attribute.ALTERNATE_HANDLING_:
+    			m_defaultIsAlternateHandlingShifted_ = 
+    			                            (value == AttributeValue.SHIFTED_);
+    			break;
+    		case Attribute.CASE_FIRST_:
+    			m_defaultCaseFirst_ = value;
+        		break;
+    		case Attribute.CASE_LEVEL_:
+    			m_defaultIsCaseLevel_ = (value == AttributeValue.ON_);
+    			break;
+    		case Attribute.NORMALIZATION_MODE_:
+    			m_defaultDecomposition_ = value;
+    			break;
+    		case Attribute.STRENGTH_:
+    			m_defaultStrength_ = value;
+    		case Attribute.HIRAGANA_QUATERNARY_MODE_:
+    			m_defaultIsHiragana4_ = (value == AttributeValue.ON_);
+    	}
+    }
+    
+    /**
+	 * Retrieve the tag of a special ce
+	 * @param ce ce to test
+	 * @return tag of ce
+	 */
+	protected static int getTag(int ce) 
+	{
+		return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_;
+	}
+    
+    /** 
+	 * Checking if ce is special
+	 * @param ce to check
+	 * @return true if ce is special
+	 */
+	protected static boolean isSpecial(int ce)
+	{
+		return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_; 
+	}
+	
+	/**
+	 * Getting the mask for collation strength
+	 * @param strength collation strength
+ 	 * @return collation element mask
+	 */
+	protected static final int getMask(int strength) 
+	{
+	    switch (strength) 
+	    {
+	    	case Collator.PRIMARY:
+	        	return CE_PRIMARY_MASK_;
+	    	case Collator.SECONDARY:
+	        	return CE_SECONDARY_MASK_ | CE_PRIMARY_MASK_;
+	    	default:
+	        	return CE_TERTIARY_MASK_ | CE_SECONDARY_MASK_ 
+	        											| CE_PRIMARY_MASK_;
+	    }
+	}
+
+	/** 
+	 * Gets the primary weights from a CE 
+	 * @param ce collation element
+	 * @return the primary weight of the collation element
+	 */
+	protected static final int getPrimaryWeight(int ce)
+	{
+		return ((ce) & CE_PRIMARY_MASK_) >> CE_PRIMARY_SHIFT_;
+	}
+	
+	/** 
+	 * Gets the secondary weights from a CE 
+	 * @param ce collation element
+	 * @return the secondary weight of the collation element
+	 */
+	protected static final int getSecondaryWeight(int ce)
+	{
+		return (ce & CE_SECONDARY_MASK_) >> CE_SECONDARY_SHIFT_;
+	}
+	
+	/** 
+	 * Gets the tertiary weights from a CE 
+	 * @param ce collation element
+	 * @return the tertiary weight of the collation element
+	 */
+	protected static final int getTertiaryWeight(int ce)
+	{
+		return ce & CE_TERTIARY_MASK_;
+	}
+	
+    // private variables -----------------------------------------------------
+
+    /**
+     * The smallest natural unsafe or contraction end char character before 
+     * tailoring.
+     * This is a combining mark.
+     */
+    private static final int DEFAULT_MIN_HEURISTIC_ = 0x300;
+    /** 
+     * Heuristic table table size. Size is 32 bytes, 1 bit for each 
+     * latin 1 char, and some power of two for hashing the rest of the chars.   
+     * Size in bytes.                               
+     */
+	private static final char HEURISTIC_SIZE_ = 1056;
+    /** 
+     * Mask value down to "some power of two" - 1,
+     * number of bits, not num of bytes.
+     */
+	private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff;
+	/**
+	 * Unsafe character shift
+	 */
+	private static final int HEURISTIC_SHIFT_ = 3;
+	/**
+	 * Unsafe character addition for character too large, it has to be folded
+	 * then incremented.
+	 */
+	private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256;
+	/** 
+     * Mask value to get offset in heuristic table.
+     */
+	private static final char HEURISTIC_MASK_ = 7;
+	
+	private byte m_caseSwitch_;
+    private int m_common3_;
+    private byte m_mask3_;
+    /** 
+     * When switching case, we need to add or subtract different values.
+     */
+    private int m_addition3_; 
+    /** 
+     * Upper range when compressing 
+     */
+    private int m_top3_;
+    /** 
+     * Upper range when compressing 
+     */ 
+    private int m_bottom3_; 
+    private int m_topCount3_;
+    private int m_bottomCount3_;	
+	/**
+	 * Case first constants
+	 */
+	private static final byte CASE_SWITCH_ = (byte)0xC0;
+	private static final byte NO_CASE_SWITCH_ = 0;
+	/**
+	 * Case level constants
+	 */
+	private static final byte CE_REMOVE_CASE_ = (byte)0x3F;
+	private static final byte CE_KEEP_CASE_ = (byte)0xFF;
+	/**
+	 * Case strength mask
+	 */
+	private static final byte CE_CASE_BIT_MASK_ = (byte)0xC0;
+	private static final byte CE_CASE_MASK_3_ = (byte)0xFF;
+	/** 
+	 * Sortkey size factor. Values can be changed.
+	 */
+	private static final double PROPORTION_2_ = 0.5;
+	private static final double PROPORTION_3_ = 0.667;
+
+	// These values come from the UCA ----------------------------------------
+	
+	/** 
+	 * This is an enum that lists magic special byte values from the 
+	 * fractional UCA 
+	 */
+	private static final byte BYTE_ZERO_ = 0x0;
+    private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01;
+    private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02;
+    private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03;
+    private static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
+    private static final byte BYTE_FIRST_TAILORED_ = (byte)0x04;
+    private static final byte BYTE_COMMON_ = (byte)0x05;
+    private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
+    private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C;
+    private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D;
+    private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF;
+	private static final int COMMON_BOTTOM_2_ = BYTE_COMMON_;
+	private static final int COMMON_TOP_2_ = 0x86; // int for unsigness
+	private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1; 
+	private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80;
+	private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40;
+	private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85;
+	private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45;
+	private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5;
+	private static final int COMMON_BOTTOM_3_ = 0x05;
+	private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86;
+	private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ = 
+                                                              COMMON_BOTTOM_3_;
+	private static final int TOP_COUNT_2_ = (int)(PROPORTION_2_ * TOTAL_2_);
+	private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_;
+	private static final int COMMON_2_ = COMMON_BOTTOM_2_;
+	private static final int COMMON_UPPER_FIRST_3_ = 0xC5;
+	private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_;
+	private static final int COMMON_4_ = (byte)0xFF;
+	
+	/**
+	 * If this collator is to generate only simple tertiaries for fast path
+	 */
+	private boolean m_isSimple3_;
+	
+	/**
+     * French collation sorting flag
+     */
+    private boolean m_isFrenchCollation_;
+    /**
+     * Flag indicating if shifted is requested for quartenary alternate
+     * handling. If this is not true, the default for alternate handling will
+     * be non-ignorable.
+     */
+    private boolean m_isAlternateHandlingShifted_; 
+    /** 
+     * Extra case level for sorting
+     */
+    private boolean m_isCaseLevel_;
+    
+    private static final int CE_TAG_SHIFT_ = 24;
+	private static final int CE_TAG_MASK_ = 0x0F000000;
+	
+	private static final int SORT_BUFFER_INIT_SIZE_ = 128;
+	private static final int SORT_BUFFER_INIT_SIZE_1_ = 
+													SORT_BUFFER_INIT_SIZE_ << 3;
+	private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_;
+	private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_;
+	private static final int SORT_BUFFER_INIT_SIZE_CASE_ = 
+												SORT_BUFFER_INIT_SIZE_ >> 2;
+	private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_;
+    
+    private static final int CE_CONTINUATION_TAG_ = 0xC0;
+	private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F;
+
+	private static final int LAST_BYTE_MASK_ = 0xFF;
+	
+	private static final int CE_RESET_TOP_VALUE_ = 0x9F000303;
+	private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303;
+
+	private static final byte SORT_CASE_BYTE_START_ = (byte)0x80;
+	private static final byte SORT_CASE_SHIFT_START_ = (byte)7;
+	
+	private static final byte SORT_LEVEL_TERMINATOR_ = 1;
+	
+	/**
+	 * CE buffer size
+	 */
+	private static final int CE_BUFFER_SIZE_ = 512;
+
+    // private methods -------------------------------------------------------
+    
+    /**
+     * Checks if the argument ce is a continuation
+     * @param ce collation element to test
+     * @return true if ce is a continuation
+     */
+    private static final boolean isContinuation(int ce) 
+    {
+    	return (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_;
+    }
+    
+    /**
+     * Gets the 2 bytes of primary order and adds it to the primary byte array
+     * @param ce current ce
+     * @param bytes array of byte arrays for each strength
+     * @param bytescount array of the size of each strength byte arrays 
+     * @param count array of counters for each of the strength
+     * @param notIsContinuation flag indicating if the current bytes belong to 
+     * 			a continuation ce
+     * @param doShift flag indicating if ce is to be shifted
+     * @param leadPrimary lead primary used for compression
+     * @param commonBottom4 common byte value for quartenary
+     * @param bottomCount4 smallest byte value for quartenary
+     * @return the new lead primary for compression
+     */
+    private final int doPrimaryBytes(int ce, byte bytes[][], int bytescount[],
+    						      int count[], boolean notIsContinuation, 
+    						      boolean doShift, int leadPrimary,
+    						      int commonBottom4, int bottomCount4)
+    {
+    	
+    	int p2 = (ce >>= 16) & LAST_BYTE_MASK_; // in ints for unsigned 
+        int p1 = (ce >> 8) & LAST_BYTE_MASK_;  // comparison
+    	if (doShift) {
+    		if (count[4] > 0) {
+            	while (count[4] > bottomCount4) {
+            		append(bytes, bytescount, 4,
+            				(byte)(commonBottom4 + bottomCount4));
+           			count[4] -= bottomCount4;
+           		}
+           		append(bytes, bytescount, 4,
+							(byte)(commonBottom4 + (count[4] - 1)));
+           		count[4] = 0;
+        	}
+        	// dealing with a variable and we're treating them as shifted
+            // This is a shifted ignorable
+            if (p1 != 0) { 
+             	// we need to check this since we could be in continuation
+             	append(bytes, bytescount, 4, (byte)p1);
+            }
+            if (p2 != 0) {
+            	append(bytes, bytescount, 4, (byte)p2);
+            }
+        } 
+        else {
+        	// Note: This code assumes that the table is well built 
+        	// i.e. not having 0 bytes where they are not supposed to be.
+       		// Usually, we'll have non-zero primary1 & primary2, except 
+      		// in cases of LatinOne and friends, when primary2 will be
+       		// regular and simple sortkey calc 
+       		if (p1 != CollationElementIterator.IGNORABLE) {
+           		if (notIsContinuation) {
+           			if (leadPrimary == p1) {
+               			append(bytes, bytescount, 1, (byte)p2);
+           			} 
+                  	else {
+                    	if (leadPrimary != 0) {
+                    		append(bytes, bytescount, 1, 
+                    				(byte)((p1 > leadPrimary) 
+                    						? BYTE_UNSHIFTED_MAX_ 
+                      						: BYTE_UNSHIFTED_MIN_));
+                    	}
+                    	if (p2 == CollationElementIterator.IGNORABLE) {
+                    		// one byter, not compressed
+                        	append(bytes, bytescount, 1, (byte)p1);
+                        	leadPrimary = 0;
+                    	} 
+                    	else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_
+                    			|| (p1 > ((CE_RESET_TOP_VALUE_ >> 24) & 0xFF)
+                    			&& p1 < ((CE_NEXT_TOP_VALUE_ >> 24) & 0xFF))) {
+                    			// not compressible
+                        		leadPrimary = 0;
+                        		append(bytes, bytescount, 1, (byte)p1);
+                        		append(bytes, bytescount, 1, (byte)p2);
+                    	} 
+                    	else { // compress
+                    		leadPrimary = p1;
+                        	append(bytes, bytescount, 1, (byte)p1);
+                        	append(bytes, bytescount, 1, (byte)p2);
+                    	}
+                  	}
+                } 
+                else { 
+                	// continuation, add primary to the key, no compression
+                  	append(bytes, bytescount, 1, (byte)p1);
+                  	if (p2 != CollationElementIterator.IGNORABLE) {
+                    	append(bytes, bytescount, 1, (byte)p2); // second part
+                  	}
+                }
+            }
+       	}
+       	return leadPrimary;
+    }
+    
+    /**
+     * Gets the secondary byte and adds it to the secondary byte array
+     * @param ce current ce
+     * @param bytes array of byte arrays for each strength
+     * @param bytescount array of the size of each strength byte arrays 
+     * @param count array of counters for each of the strength
+     * @param notIsContinuation flag indicating if the current bytes belong to 
+     * 			a continuation ce
+     * @param doFrench flag indicator if french sort is to be performed
+     * @param frenchOffset start and end offsets to source string for reversing
+     */
+    private final void doSecondaryBytes(int ce, byte bytes[][], 
+    								 int bytescount[], int count[], 
+    								 boolean notIsContinuation,
+    						 		 boolean doFrench, int frenchOffset[])
+    {
+    	int s = (ce >>= 8) & LAST_BYTE_MASK_; // int for comparison
+    	if (s != 0) {
+    		if (!doFrench) {
+                // This is compression code.
+                if (s == COMMON_2_ && notIsContinuation) {
+                   count[2] ++;
+                } 
+                else {
+                  	if (count[2] > 0) {
+                    	if (s > COMMON_2_) { // not necessary for 4th level.
+                    		while (count[2] > TOP_COUNT_2_) {
+                        		append(bytes, bytescount, 2,
+                        				(byte)(COMMON_TOP_2_ - TOP_COUNT_2_));
+                        		count[2] -= TOP_COUNT_2_;
+                      		}
+                      		append(bytes, bytescount, 2,
+                      				(byte)(COMMON_TOP_2_ - (count[2] - 1)));
+                    	} 
+                    	else {
+                    		while (count[2] > BOTTOM_COUNT_2_) {
+                        		append(bytes, bytescount, 2,
+                        			(byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+                        		count[2] -= BOTTOM_COUNT_2_;
+                      		}
+                      		append(bytes, bytescount, 2,
+                      				(byte)(COMMON_BOTTOM_2_ + (count[2] - 1)));
+                    	}
+                    	count[2] = 0;
+                  	}
+                  	append(bytes, bytescount, 2, (byte)s);
+                }
+            } 
+            else {
+                  append(bytes, bytescount, 2, (byte)s);
+                  // Do the special handling for French secondaries
+                  // We need to get continuation elements and do intermediate 
+                  // restore 
+                  // abc1c2c3de with french secondaries need to be edc1c2c3ba 
+                  // NOT edc3c2c1ba
+                  if (notIsContinuation) {
+                    	if (frenchOffset[0] != -1) {
+                        	// reverse secondaries from frenchStartPtr up to 
+                        	// frenchEndPtr
+                      		reverseBuffer(bytes[2], frenchOffset);
+                      		frenchOffset[0] = -1;
+                    	}
+                  } 
+                  else {
+                    	if (frenchOffset[0] == -1) {
+                      		frenchOffset[0] = bytescount[2] - 2;
+                    	}
+                    	frenchOffset[1] = bytescount[2] - 1;
+                  }
+        	}
+    	}
+    }
+    
+    /**
+     * Reverse the argument buffer 
+     * @param buffer to reverse
+     * @param offset start and end offsets to reverse
+     */
+    private void reverseBuffer(byte buffer[], int offset[]) 
+    { 
+    	int start = offset[0];
+    	int end = offset[1];
+    	while (start < end) { 
+    		byte b = buffer[start]; 
+    		buffer[start ++] = buffer[end]; 
+    		buffer[end --] = b; 
+    	}
+	}
+
+	/**
+	 * Insert the case shifting byte if required
+	 * @param bytes array of byte arrays corresponding to each strength
+	 * @param bytescount array of the size of the byte arrays
+	 * @param caseshift value
+	 * @return new caseshift value
+	 */
+	private static final int doCaseShift(byte bytes[][], int bytescount[],
+											 int caseshift) 
+	{
+  		if (caseshift  == 0) {
+    		append(bytes, bytescount, 0, SORT_CASE_BYTE_START_);
+    		caseshift = SORT_CASE_SHIFT_START_;
+  		}
+  		return caseshift;
+	}
+
+	/**
+	 * Performs the casing sort
+	 * @param tertiary byte in ints for easy comparison
+	 * @param bytes of byte arrays for each strength
+     * @param bytescount array of the size of each strength byte arrays 
+     * @param notIsContinuation flag indicating if the current bytes belong to 
+     * 			a continuation ce
+	 * @param caseshift
+	 * @return the new value of case shift
+	 */
+	private final int doCaseBytes(int tertiary, byte bytes[][], 
+							   int bytescount[], boolean notIsContinuation, 
+							   int caseshift)
+	{
+		caseshift = doCaseShift(bytes, bytescount, caseshift);
+              		
+        if (notIsContinuation && tertiary != 0) {
+        	byte casebits = (byte)(tertiary & 0xC0);
+            if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) {
+                if (casebits == 0) {
+                    bytes[0][bytescount[0] - 1] |= (1 << (-- caseshift));
+                } 
+                else {
+                     // second bit
+                     caseshift = doCaseShift(bytes, bytescount, caseshift);
+                     bytes[0][bytescount[0] - 1] |= ((casebits >> 6) & 1) 
+                     										<< (-- caseshift);
+                } 
+            }
+            else {
+                if (casebits != 0) {
+                    bytes[0][bytescount[0] - 1] |= 1 << (-- caseshift);
+                    // second bit
+                    caseshift = doCaseShift(bytes, bytescount, caseshift);
+                    bytes[0][bytescount[0] - 1] |= ((casebits >> 7) & 1) 
+                          									<< (-- caseshift);
+                }
+            }
+        }
+             
+		return caseshift;
+	}
+	
+	/**
+	 * Gets the tertiary byte and adds it to the tertiary byte array
+     * @param tertiary byte in int for easy comparison
+     * @param bytes array of byte arrays for each strength
+     * @param bytescount array of the size of each strength byte arrays 
+     * @param count array of counters for each of the strength
+     * @param notIsContinuation flag indicating if the current bytes belong to 
+     * 			a continuation ce
+	 */
+	private final void doTertiaryBytes(int tertiary, byte bytes[][], 
+									int bytescount[], int count[], 
+									boolean notIsContinuation)
+	{
+		if (tertiary != 0) {
+			// This is compression code.
+            // sequence size check is included in the if clause
+            if (tertiary == m_common3_ && notIsContinuation) {
+                 count[3] ++;
+            } 
+            else {
+            	int common3 = m_common3_ & LAST_BYTE_MASK_;
+                if ((tertiary > common3 
+                	&& m_common3_ == COMMON_NORMAL_3_)
+                    || (tertiary <= common3 
+                    	&& m_common3_ == COMMON_UPPER_FIRST_3_)) {
+                    tertiary += m_addition3_;
+                }
+                if (count[3] > 0) {
+                	if (tertiary > common3) {
+                		while (count[3] > m_topCount3_) {
+                        	append(bytes, bytescount, 3, 
+                        					(byte)(m_top3_ - m_topCount3_));
+                        	count[3] -= m_topCount3_;
+                      	}
+                      	append(bytes, bytescount, 3,
+                        		             (byte)(m_top3_ - (count[3] - 1)));
+                 	} 
+                 	else {
+                 		while (count[3] > m_bottomCount3_) {
+                        	append(bytes, bytescount, 3,
+                       		          (byte)(m_bottom3_ + m_bottomCount3_));
+                        	count[3] -= m_bottomCount3_;
+                      	}
+                      	append(bytes, bytescount, 3,
+                        		          (byte)(m_bottom3_ + (count[3] - 1)));
+                    }
+                    count[3] = 0;
+                }
+                append(bytes, bytescount, 3, (byte)tertiary);
+            }
+        }
+	}
+	
+	/**
+	 * Gets the quartenary byte and adds it to the quartenary byte array
+     * @param bytes array of byte arrays for each strength
+     * @param bytescount array of the size of each strength byte arrays 
+     * @param count array of counters for each of the strength
+     * @param isCodePointHiragana flag indicator if the previous codepoint 
+     * 			we dealt with was Hiragana
+     * @param commonBottom4 smallest common quartenary byte 
+     * @param bottomCount4 smallest quartenary byte 
+     * @param hiragana4 hiragana quartenary byte
+	 */
+	private final void doQuartenaryBytes(byte bytes[][], int bytescount[], 
+									  int count[],	
+									  boolean isCodePointHiragana,
+									  int commonBottom4, int bottomCount4,
+									  byte hiragana4)
+	{
+		if (isCodePointHiragana) { // This was Hiragana, need to note it
+			if (count[4] > 0) { // Close this part
+            	while (count[4] > bottomCount4) {
+                    append(bytes, bytescount, 4, (byte)(commonBottom4 
+                    									+ bottomCount4));
+                    count[4] -= bottomCount4;
+                }
+                append(bytes, bytescount, 4, (byte)(commonBottom4 
+                										+ (count[4] - 1)));
+                count[4] = 0;
+            }
+            append(bytes, bytescount, 4, hiragana4); // Add the Hiragana
+        } 
+        else { // This wasn't Hiragana, so we can continue adding stuff
+            count[4] ++;
+        }
+	}
+	
+	/**
+	 * Iterates through the argument string for all ces.
+	 * Split the ces into their relevant primaries, secondaries etc.
+	 * @param source normalized string
+	 * @param compare array of flags indicating if a particular strength is 
+	 * 			to be processed
+	 * @param bytes an array of byte arrays corresponding to the strengths
+	 * @param bytescount an array of the size of the byte arrays
+	 * @param count array of compression counters for each strength
+	 * @param doFrench flag indicator if special handling of French has to be
+	 * 					done
+	 * @param hiragana4 offset for Hiragana quaternary
+	 * @param commonBottom4 smallest common quaternary byte
+	 * @param bottomCount4 smallest quaternary byte
+	 */
+	private synchronized final void getSortKeyBytes(String source, 
+													boolean compare[], 
+													byte bytes[][], 
+													int bytescount[], 
+													int count[],
+													boolean doFrench,
+													byte hiragana4, 
+													int commonBottom4, 
+													int bottomCount4)
+	{
+		int backupDecomposition = m_decomposition_;
+		m_decomposition_ = NO_DECOMPOSITION; // have to revert to backup later
+    	CollationElementIterator coleiter = 
+    							new CollationElementIterator(source, this);
+    	
+		int frenchOffset[] = {-1, -1};
+    	
+    	// scriptorder not implemented yet 
+    	// const uint8_t *scriptOrder = coll->scriptOrder;
+
+		boolean doShift = false;
+    	boolean notIsContinuation = false;
+
+    	int leadPrimary = 0; // int for easier comparison
+    	int caseShift = 0;
+	    
+    	while (true) {
+        	int ce = coleiter.next();
+            if (ce == CollationElementIterator.NULLORDER) {
+            	break;
+            }
+
+            if (ce == CollationElementIterator.IGNORABLE) {
+            	continue;
+            }
+
+            notIsContinuation = !isContinuation(ce);
+
+            /*
+             * if (notIsContinuation) {
+            		if (scriptOrder != NULL) {
+                		primary1 = scriptOrder[primary1];
+              		}
+            	}*/
+            doShift = (m_isAlternateHandlingShifted_ 
+            			&& ((notIsContinuation && ce <= m_variableTopValue_ 
+    						 && (ce >> 24) != 0)) // primary byte not 0
+    					|| (!notIsContinuation && doShift));
+			leadPrimary = doPrimaryBytes(ce, bytes, bytescount, count, 
+									notIsContinuation, doShift, leadPrimary, 
+									commonBottom4, bottomCount4);
+			if (compare[2]) {
+        		doSecondaryBytes(ce, bytes, bytescount, count, 
+        						 notIsContinuation,	doFrench, frenchOffset);
+			}
+
+			int t = ce & LAST_BYTE_MASK_;
+			if (!notIsContinuation) {
+              	t = ce & CE_REMOVE_CONTINUATION_MASK_;
+            }
+            	
+            if (compare[0]) {
+              	caseShift = doCaseBytes(t, bytes, bytescount, 
+              							notIsContinuation, caseShift);
+            }
+            else if (notIsContinuation) {
+                 t ^= m_caseSwitch_;
+            }
+
+            t &= m_mask3_;
+              	
+            if (compare[3]) {
+            	doTertiaryBytes(t, bytes, bytescount, count, 
+            					notIsContinuation);
+            }
+                
+            if (compare[4] && notIsContinuation) { // compare quad
+                doQuartenaryBytes(bytes, bytescount, count, 
+                			 	coleiter.m_isCodePointHiragana_, 
+                			 	commonBottom4, bottomCount4, hiragana4);
+            }
+        }
+        m_decomposition_ = backupDecomposition; // reverts to original	
+        if (frenchOffset[0] != -1) {
+        	// one last round of checks
+    		reverseBuffer(bytes[2], frenchOffset);
+  		}
+	}
+	
+	/**
+	 * From the individual strength byte results the final compact sortkey 
+	 * will be calculated.
+	 * @param source text string
+	 * @param compare array of flags indicating if a particular strength is 
+	 * 			to be processed
+	 * @param bytes an array of byte arrays corresponding to the strengths
+	 * @param bytescount an array of the size of the byte arrays
+	 * @param count array of compression counters for each strength
+	 * @param doFrench flag indicating that special handling of French has to 
+	 * 					be done
+	 * @param commonBottom4 smallest common quaternary byte
+	 * @param bottomCount4 smallest quaternary byte
+	 * @return the compact sortkey
+	 */
+	private final byte[] getSortKey(String source, boolean compare[], 
+									byte bytes[][], int bytescount[], 
+									int count[], boolean doFrench, 
+									int commonBottom4, int bottomCount4)
+	{
+		// we have done all the CE's, now let's put them together to form 
+      	// a key 
+      	if (compare[2]) {
+        	doSecondary(bytes, bytescount, count, doFrench);
+      		if (compare[0]) {
+				doCase(bytes, bytescount);        
+      		}
+      		if (compare[3]) {
+      			doTertiary(bytes, bytescount, count);
+      			if (compare[4]) {
+      				doQuaternary(bytes, bytescount, count, commonBottom4,
+      							 bottomCount4);
+        			if (compare[5]) {
+          				doIdentical(source, bytes, bytescount);
+        			}
+
+      			}
+      		}
+      	}
+      	append(bytes, bytescount, 1, (byte)0);
+    	return bytes[1];
+	}
+	
+	/**
+	 * Packs the French bytes
+	 * @param bytes array of byte arrays corresponding to strenghts
+	 * @param bytescount array of the size of byte arrays
+	 * @param count array of compression counts
+	 */
+	private final void doFrench(byte bytes[][], int bytescount[], int count[]) 
+	{
+		for (int i = 0; i < bytescount[2]; i ++) {
+		    byte s = bytes[2][bytescount[2] - i - 1];
+		    // This is compression code.
+		    if (s == COMMON_2_) {
+		      ++ count[2];
+		    } 
+		    else {
+		      	if (count[2] > 0) {
+		        	if (s > COMMON_2_) { // not necessary for 4th level.
+		          		while (count[2] > TOP_COUNT_2_) {
+		            		append(bytes, bytescount, 1, 
+		            					(byte)(COMMON_TOP_2_ - TOP_COUNT_2_));
+		            		count[2] -= TOP_COUNT_2_;
+		          		}
+		          		append(bytes, bytescount, 1, (byte)(COMMON_TOP_2_ 
+		          											- (count[2] - 1)));
+		        	} 
+		        	else {
+		          		while (count[2] > BOTTOM_COUNT_2_) {
+		            		append(bytes, bytescount, 1,
+		            			(byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_));
+		            		count[2] -= BOTTOM_COUNT_2_;
+		          		}
+		          		append(bytes, bytescount, 1, (byte)(COMMON_BOTTOM_2_ 
+		          											+ (count[2] - 1)));
+		        	}
+		        	count[2] = 0;
+		      	}
+		      	append(bytes, bytescount, 1, s);
+		    }
+		}
+		if (count[2] > 0) {
+		    while (count[2] > BOTTOM_COUNT_2_) {
+		      	append(bytes, bytescount, 1, (byte)(COMMON_BOTTOM_2_ 
+		      										+ BOTTOM_COUNT_2_));
+		      	count[2] -= BOTTOM_COUNT_2_;
+		    }
+		    append(bytes, bytescount, 1, (byte)(COMMON_BOTTOM_2_ 
+		    											+ (count[2] - 1)));
+		}
+	}
+
+	/**
+	 * Compacts the secondary bytes and stores them into the primary array
+	 * @param bytes array of byte arrays corresponding to the strengths
+	 * @param bytecount array of the size of the byte arrays
+	 * @param count array of the number of compression counts
+	 * @param doFrench flag indicator that French has to be handled specially
+	 */
+	private final void doSecondary(byte bytes[][], int bytescount[], 
+									int count[], boolean doFrench)
+	{
+		if (count[2] > 0) {
+          	while (count[2] > BOTTOM_COUNT_2_) {
+            	append(bytes, bytescount, 2, (byte)(COMMON_BOTTOM_2_ 
+            											+ BOTTOM_COUNT_2_));
+            	count[2] -= BOTTOM_COUNT_2_;
+          	}
+          	append(bytes, bytescount, 2, (byte)(COMMON_BOTTOM_2_ + 
+          											(count[2] - 1)));
+        }
+        
+        append(bytes, bytescount, 1, SORT_LEVEL_TERMINATOR_);
+        
+        if (doFrench) { // do the reverse copy
+           	doFrench(bytes, bytescount, count);
+        } 
+        else {
+        	if (bytes[1].length <= bytescount[1] + bytescount[2]) {
+        		bytes[1] = increase(bytes[1], bytescount[1], bytescount[2]);
+        	}
+           	System.arraycopy(bytes[2], 0, bytes[1], bytescount[1], 
+           					 bytescount[2]);
+            bytescount[1] += bytescount[2];
+        } 
+	}
+	
+	/**
+	 * Increase buffer size
+	 * @param array array of bytes
+	 * @param size of the byte array
+	 * @param incrementsize size to increase
+	 * @return the new buffer
+	 */
+	private static final byte[] increase(byte buffer[], int size, 
+										 int incrementsize)
+	{
+		byte result[] = new byte[buffer.length + incrementsize];
+		System.arraycopy(buffer, 0, result, 0, size);
+		return result;
+	}
+	
+	/**
+	 * Increase buffer size
+	 * @param array array of bytes
+	 * @param size of the byte array
+	 * @param incrementsize size to increase
+	 * @return the new buffer
+	 */
+	private static final int[] increase(int buffer[], int size, 
+										int incrementsize)
+	{
+		int result[] = new int[buffer.length + incrementsize];
+		System.arraycopy(buffer, 0, result, 0, size);
+		return result;
+	}
+	
+	/**
+	 * Compacts the case bytes and stores them into the primary array
+	 * @param bytes array of byte arrays corresponding to the strengths
+	 * @param bytecount array of the size of the byte arrays
+	 */
+	private final void doCase(byte bytes[][], int bytescount[])
+	{
+		append(bytes, bytescount, 1, SORT_LEVEL_TERMINATOR_);
+		if (bytes[1].length <= bytescount[1] + bytescount[0]) {
+			bytes[1] = increase(bytes[1], bytescount[1], bytescount[0]);
+		}
+		if (bytes[1].length <= bytescount[1] + bytescount[0]) {
+        	bytes[1] = increase(bytes[1], bytescount[1], bytescount[0]);
+        }
+		System.arraycopy(bytes[0], 0, bytes[1], bytescount[1], bytescount[0]);
+        bytescount[1] += bytescount[0];
+	}
+	
+	/**
+	 * Compacts the tertiary bytes and stores them into the primary array
+	 * @param bytes array of byte arrays corresponding to the strengths
+	 * @param bytecount array of the size of the byte arrays
+	 * @param count array of the number of compression counts
+	 */
+	private final void doTertiary(byte bytes[][], int bytescount[], 
+									int count[])
+	{
+		if (count[3] > 0) {
+          	if (m_common3_ != COMMON_BOTTOM_3_) {
+          		while (count[3] >= m_topCount3_) {
+              		append(bytes, bytescount, 3, (byte)(m_top3_	
+              												- m_topCount3_));
+              		count[3] -= m_topCount3_;
+            	}
+            	append(bytes, bytescount, 3, (byte)(m_top3_ - count[3]));
+          	} 
+          	else {
+          		while (count[3] > m_bottomCount3_) {
+              		append(bytes, bytescount, 3, (byte)(m_bottom3_ 
+              											+ m_bottomCount3_));
+              		count[3] -= m_bottomCount3_;
+            	}
+            	append(bytes, bytescount, 3, (byte)(m_bottom3_ 
+            											+ (count[3] - 1)));
+          	}
+        }
+        append(bytes, bytescount, 1, SORT_LEVEL_TERMINATOR_);
+        if (bytes[1].length <= bytescount[1] + bytescount[3]) {
+        	bytes[1] = increase(bytes[1], bytescount[1], bytescount[3]);
+        }
+        System.arraycopy(bytes[3], 0, bytes[1], bytescount[1], bytescount[3]);
+        bytescount[1] += bytescount[3];
+	}
+	
+	/**
+	 * Compacts the quaternary bytes and stores them into the primary array
+	 * @param bytes array of byte arrays corresponding to the strengths
+	 * @param bytecount array of the size of the byte arrays
+	 * @param count array of compression counts
+	 */
+	private final void doQuaternary(byte bytes[][], int bytescount[], 
+									int count[], int commonbottom4, 
+									int bottomcount4)
+	{
+		if (count[4] > 0) {
+            while (count[4] > bottomcount4) {
+                append(bytes, bytescount, 4, (byte)(commonbottom4 
+                									+ bottomcount4));
+                count[4] -= bottomcount4;
+            }
+            append(bytes, bytescount, 4, (byte)(commonbottom4
+            									+ (count[4] - 1)));
+        }
+        append(bytes, bytescount, 1, SORT_LEVEL_TERMINATOR_);
+        if (bytes[1].length <= bytescount[1] + bytescount[4]) {
+        	bytes[1] = increase(bytes[1], bytescount[1], bytescount[4]);
+        }
+        System.arraycopy(bytes[4], 0, bytes[1], bytescount[1], bytescount[4]);
+        bytescount[1] += bytescount[4];
+	}
+	
+	/**
+	 * Deals with the identical sort.
+	 * Appends the BOCSU version of the source string to the ends of the
+	 * byte buffer.
+	 * @param source text string
+	 * @param bytes array of a byte array corresponding to the strengths
+	 * @param bytescount array of the byte array size
+	 */
+	private final void doIdentical(String source, byte bytes[][], 
+								   int bytescount[])
+	{
+		int isize = BOSCU.lengthOfIdenticalLevelRun(source);
+		append(bytes, bytescount, 1, SORT_LEVEL_TERMINATOR_);
+		if (bytes[1].length <= bytescount[1] + isize) {
+        	bytes[1] = increase(bytes[1], bytescount[1], 1 + isize);
+        }
+        BOSCU.writeIdenticalLevelRun(source, bytes[1], bytescount[1]); 
+	}
+	
+	/**
+	 * Gets the offset of the first unmatched characters in source and target.
+	 * This method returns the offset of the start of a contraction or a 
+	 * combining sequence, if the first difference is in the middle of such a 
+	 * sequence.
+	 * @param source string
+	 * @param target string
+	 * @return offset of the first unmatched characters in source and target.
+	 */
+	private final int getFirstUnmatchedOffset(String source, String target)
+	{
+		int result = 0;
+		while (source.charAt(result) == target.charAt(result) 
+				&& source.charAt(result) != 0) {
+			result ++;
+	    }
+	    if (result > 0) {
+	        // There is an identical portion at the beginning of the two 
+	        // strings. If the identical portion ends within a contraction or a 
+	        // combining character sequence, back up to the start of that 
+	        // sequence.              
+	        char schar = source.charAt(result); // first differing chars   
+	        char tchar = target.charAt(result);
+	        if (schar != 0 && isUnsafe(schar) || tchar != 0 && isUnsafe(tchar))
+	        {
+	            // We are stopped in the middle of a contraction or combining
+	            // sequence.
+	            // Look backwards for the part of the string for the start of 
+	            // the sequence
+	            // It doesn't matter which string we scan, since they are the 
+	            // same in this region.
+	            do {
+	                result --;
+	            }
+	            while (result > 0 && isUnsafe(source.charAt(result)));
+	        }
+	    }
+	    return result;
+	}
+	
+	/**
+	 * Appending an byte to an array of bytes and increases it if we run out of 
+	 * space
+	 * @param array of byte arrays
+	 * @param array of the end offsets corresponding to array
+	 * @param appendarrayindex of the int array to append
+	 * @param value to append
+	 */
+	private static final void append(byte array[][], int arrayoffset[], 
+										int appendarrayindex, byte value)
+	{
+		if (arrayoffset[appendarrayindex] + 1 
+			>= array[appendarrayindex].length) {
+			array[appendarrayindex] = increase(array[appendarrayindex],	
+											   arrayoffset[appendarrayindex],
+											   SORT_BUFFER_INIT_SIZE_);
+		}			
+		array[appendarrayindex][arrayoffset[appendarrayindex]] = value;
+		arrayoffset[appendarrayindex] ++;
+	}
+	
+	/** 
+	 * This is a trick string compare function that goes in and uses sortkeys 
+	 * to compare. It is used when compare gets in trouble and needs to bail 
+	 * out.
+	 * @param source text string
+	 * @param target text string          
+	 */
+	private final int compareBySortKeys(String source, String target)
+	{
+	    CollationKey sourcekey = getCollationKey(source);
+	    CollationKey targetkey = getCollationKey(target);	
+	    return sourcekey.compareTo(targetkey);
+	}
+	
+	/**
+	 * Performs the primary comparisons, and fills up the CE buffer at the
+	 * same time. 
+	 * The return value toggles between the comparison result and the hiragana
+	 * result. If either the source is greater than target or vice versa, the 
+	 * return result is the comparison result, ie 1 or -1, furthermore the
+	 * cebuffers will be cleared when that happens. If the primary comparisons
+	 * are equal, we'll have to continue with secondary comparison. In this case
+	 * the cebuffer will not be cleared and the return result will be the 
+	 * hiragana result.
+	 * @param doHiragana4 flag indicator that Hiragana Quaternary has to be 
+	 * 					observed
+	 * @param lowestpvalue the lowest primary value that will not be ignored if 
+	 * 						alternate handling is shifted
+	 * @param source text string
+	 * @param target text string
+	 * @param textoffset offset in text to start the comparison
+	 * @param cebuffer array of CE buffers to populate, offset 0 for source, 
+	 * 					1 for target, cleared when a primary difference is 
+	 * 					found.
+	 * @param cebuffersize array of CE buffer size corresponding to the 
+	 * 						cebuffer, 0 when a primary difference is found.
+	 * @return comparion result if a primary difference is found, otherwise
+	 * 						hiragana result
+	 */
+	private final int doPrimaryCompare(boolean doHiragana4, int lowestpvalue,
+										String source, String target, 
+										int textoffset, int cebuffer[][], 
+									   	int cebuffersize[])
+	{
+		// Preparing the context objects for iterating over strings
+	    UCharacterIterator siter = new UCharacterIterator(source, textoffset, 
+	    													source.length());
+	    CollationElementIterator scoleiter = new CollationElementIterator(
+	    														siter, this);
+	    UCharacterIterator titer = new UCharacterIterator(target, textoffset, 
+	    													target.length());
+	    CollationElementIterator tcoleiter = new CollationElementIterator(
+	    														titer, this);
+		
+		// Non shifted primary processing is quite simple
+	    if (!m_isAlternateHandlingShifted_) {
+	    	int hiraganaresult = 0;    														
+	      	while (true) {
+	      		int sorder = 0;
+				// We fetch CEs until we hit a non ignorable primary or end.
+	        	do {
+	          		sorder = scoleiter.next();
+	          		append(cebuffer, cebuffersize, 0, sorder);
+	          		sorder &= CE_PRIMARY_MASK_;
+	        	} while (sorder == CollationElementIterator.IGNORABLE);
+	
+				int torder = 0;
+	        	do {
+	          		torder = tcoleiter.next();
+	          		append(cebuffer, cebuffersize, 1, torder);
+	          		torder &= CE_PRIMARY_MASK_;
+	        	} while (torder == CollationElementIterator.IGNORABLE);
+	
+	        	// if both primaries are the same
+	        	if (sorder == torder) {
+	            	// and there are no more CEs, we advance to the next level
+	            	if (cebuffer[0][cebuffersize[0] - 1] 
+	            					== CollationElementIterator.NULLORDER) {
+	              		break;
+	            	}
+	            	if (doHiragana4 && hiraganaresult == 0 
+	            		&& scoleiter.m_isCodePointHiragana_ !=
+	              						tcoleiter.m_isCodePointHiragana_) {
+	              		if (scoleiter.m_isCodePointHiragana_) {
+	                		hiraganaresult = -1;
+	              		}
+	              		else {
+	              			hiraganaresult = 1;
+	              		}
+	            	}
+	        	} 
+	        	else {
+	            	// if two primaries are different, we are done
+	            	return endCompare(sorder, torder, cebuffer, cebuffersize);
+	        	}
+	      	} 
+	      	// no primary difference... do the rest from the buffers
+	      	return hiraganaresult;
+	    } 
+	    else { // shifted - do a slightly more complicated processing :)
+	      	while (true) {
+	        	int sorder = getPrimaryShiftedCompareCE(scoleiter, lowestpvalue, 
+	        										cebuffer, cebuffersize, 0);
+				int torder = getPrimaryShiftedCompareCE(tcoleiter, lowestpvalue, 
+													cebuffer, cebuffersize, 1);
+	        	if (sorder == torder) {
+	            	if (cebuffer[0][cebuffersize[0] - 1] 
+	            			== CollationElementIterator.NULLORDER) {
+	              		break;
+	            	} 
+	            	else {
+	              		continue;
+	            	}
+	        	} 
+	        	else {
+	    			return endCompare(sorder, torder, cebuffer, cebuffersize);
+	        	}
+	      	} // no primary difference... do the rest from the buffers
+	    }
+		return 0;
+	}
+	
+	/**
+	 * This is used only when we know that sorder is already different from
+	 * torder.
+	 * Compares sorder and torder, returns -1 if sorder is less than torder.
+	 * Clears the cebuffer at the same time.
+	 * @param sorder source strength order
+	 * @param torder target strength order
+	 * @param cebuffer array of buffers containing the ce values
+	 * @param cebuffersize array of cebuffer offsets
+	 * @return the comparison result of sorder and torder
+	 */
+	private static final int endCompare(int sorder, int torder, 
+										int cebuffer[][], int cebuffersize[])
+	{
+		cebuffer[0] = null;
+	    cebuffer[1] = null;
+	    cebuffersize[0] = 0;
+	    cebuffersize[1] = 0;
+	    if (sorder < torder) {
+	    	return -1;
+	    }
+	    return 1;
+	}
+	
+	/**
+	 * Calculates the next primary shifted value and fills up cebuffer with the 
+	 * next non-ignorable ce.
+	 * @param coleiter collation element iterator
+	 * @param doHiragana4 flag indicator if hiragana quaternary is to be 
+	 * 						handled
+	 * @param lowestpvalue lowest primary shifted value that will not be 
+	 * 						ignored
+	 * @param cebuffer array of buffers to append with the next ce
+	 * @param cebuffersize array of offsets corresponding to the cebuffer
+	 * @param cebufferindex index of the buffer to append to
+	 * @return result next modified ce 
+	 */
+	private final static int getPrimaryShiftedCompareCE(
+										CollationElementIterator coleiter,
+										int lowestpvalue, int cebuffer[][], 
+										int cebuffersize[],	int cebufferindex)
+	{
+		boolean shifted = false;
+		int result = CollationElementIterator.IGNORABLE;
+	    while (true) {
+	        result = coleiter.next();
+	        if (result == CollationElementIterator.NULLORDER) {
+	            append(cebuffer, cebuffersize, cebufferindex, result);
+	            break;
+	        } 
+	        else if (result == CollationElementIterator.IGNORABLE) {
+	            continue;
+	        } 
+	        else if (isContinuation(result)) {
+	        	if ((result & CE_PRIMARY_MASK_) 
+	            					!= CollationElementIterator.IGNORABLE) { 
+	            	// There is primary value
+	              	if (shifted) {
+	                	result = (result & CE_PRIMARY_MASK_) 
+	                						| CE_CONTINUATION_MARKER_; 
+	                	// preserve interesting continuation
+	                	append(cebuffer, cebuffersize, cebufferindex, result);
+	           			continue;
+	        		} 
+	        		else {
+	           			append(cebuffer, cebuffersize, cebufferindex, result);
+	                	break;
+	              	}
+	            } 
+	            else { // Just lower level values
+	            	if (!shifted) {
+	            		append(cebuffer, cebuffersize, cebufferindex, result);
+	              	}
+	            }
+	        } 
+	        else { // regular
+	        	if ((result & CE_PRIMARY_MASK_) > lowestpvalue) {
+	             	append(cebuffer, cebuffersize, cebufferindex, result);
+	             	break;
+	            } 
+	            else {
+	            	if ((result & CE_PRIMARY_MASK_) > 0) {
+	                	shifted = true;
+	                	result &= CE_PRIMARY_MASK_;
+	                	append(cebuffer, cebuffersize, cebufferindex, result);
+	                	continue;
+	              	} 
+	              	else {
+	                	append(cebuffer, cebuffersize, cebufferindex, result);
+	                	shifted = false;
+	                	continue;
+	              	}
+	            }
+	        }
+	    }
+	    result &= CE_PRIMARY_MASK_;
+	    return result;
+	}
+							
+	/**
+	 * Appending an int to an array of ints and increases it if we run out of 
+	 * space
+	 * @param array of int arrays
+	 * @param array of the end offsets corresponding to array
+	 * @param appendarrayindex of the int array to append
+	 * @param value to append
+	 */
+	private static final void append(int array[][], int arrayoffset[], 
+										int appendarrayindex, int value)
+	{
+		if (arrayoffset[appendarrayindex] + 1 
+			>= array[appendarrayindex].length) {
+			array[appendarrayindex] = increase(array[appendarrayindex],
+												arrayoffset[appendarrayindex],		
+												CE_BUFFER_SIZE_);
+		}			
+		array[appendarrayindex][arrayoffset[appendarrayindex]] = value;
+		arrayoffset[appendarrayindex] ++;
+	}
+	
+	/**
+	 * Does secondary strength comparison based on the collected ces.
+	 * @param cebuffer array of int arrays that contains the collected ces
+	 * @param cebuffersize array of offsets corresponding to the cebuffer,
+	 *							indicates the offset of the last ce in buffer
+	 * @param doFrench flag indicates if French ordering is to be done
+	 * @return the secondary strength comparison result
+	 */
+	private static final int doSecondaryCompare(int cebuffer[][], 
+												int cebuffersize[],
+												boolean doFrench)
+	{
+		// now, we're gonna reexamine collected CEs
+	    if (!doFrench) { // normal
+	    	int offset = 0;
+	        while (true) {
+	        	int sorder = CollationElementIterator.IGNORABLE;
+	          	while (sorder == CollationElementIterator.IGNORABLE) {
+	            	sorder = cebuffer[0][offset ++] & CE_SECONDARY_MASK_;
+	          	}
+				int torder = CollationElementIterator.IGNORABLE;
+	          	while (torder == CollationElementIterator.IGNORABLE) {
+	          		torder = cebuffer[1][offset ++] & CE_SECONDARY_MASK_;
+	          	}
+	
+	          	if (sorder == torder) {
+	            	if (cebuffer[0][offset - 1]  
+	            					== CollationElementIterator.NULLORDER) {
+	              		break;
+	            	}
+	          	} 
+	          	else {
+	               	return (sorder < torder) ? -1 : 1;
+	          	}
+	        }
+	    } 
+	    else { // do the French 
+	    	int continuationoffset[] = {0, 0};
+	    	int offset[] = {cebuffersize[0] - 2, cebuffersize[1] - 2} ; 
+	        while (true) {
+	        	int sorder = getSecondaryFrenchCE(cebuffer, offset, 
+	        										continuationoffset, 0);
+	        	int torder = getSecondaryFrenchCE(cebuffer, offset, 
+	        										continuationoffset,1);
+	          	if (sorder == torder) {
+	            	if (cebuffer[0][offset[0] - 1] 
+	            						== CollationElementIterator.NULLORDER	            					 
+	            		|| (offset[0] < 0 && offset[1] < 0)) {
+	              		break;
+	            	} 
+	          	} 
+	          	else {
+	              	return (sorder < torder) ? -1 : 1;
+	          	}
+	        }
+	    }
+	    return 0;
+	}
+	
+	/**
+	 * Calculates the next secondary french CE.
+	 * @param cebuffer array of buffers to append with the next ce
+	 * @param offset array of offsets corresponding to the cebuffer
+	 * @param continuationoffset index of the start of a continuation
+	 * @param index of cebuffer to use
+	 * @return result next modified ce
+	 */
+	private static final int getSecondaryFrenchCE(int cebuffer[][], 
+												  int offset[], 
+												  int continuationoffset[],
+												  int index)
+	{
+		int result = CollationElementIterator.IGNORABLE;
+	    while (result == CollationElementIterator.IGNORABLE 
+	    		&& offset[index] >= 0) {
+	        if (continuationoffset[index] == 0) {
+	        	while (isContinuation(cebuffer[0][offset[index] --]));
+	            // after this, sorder is at the start of continuation, 
+	            // and offset points before that 
+	            if (isContinuation(cebuffer[0][offset[index] + 1])) {
+	            	// save offset for later
+	            	continuationoffset[index] = offset[index]; 
+	            	offset[index] += 2;  
+	           	}
+	        }
+	        else {
+	        	result = cebuffer[0][offset[index] ++];
+	        	if (!isContinuation(result)) { 
+	        		// we have finished with this continuation
+	           		offset[index] = continuationoffset[index];
+	           		// reset the pointer to before continuation 
+	           		continuationoffset[index] = 0;
+	           		continue;
+	        	}
+	        }
+	        result &= CE_SECONDARY_MASK_; // remove continuation bit        
+	    }    
+	    return result;
+	}
+	
+	/**
+	 * Does case strength comparison based on the collected ces.
+	 * @param cebuffer array of int arrays that contains the collected ces
+	 * @return the case strength comparison result
+	 */
+	private final int doCaseCompare(int cebuffer[][])
+	{
+		int sorder = CollationElementIterator.IGNORABLE;
+		int torder = CollationElementIterator.IGNORABLE;
+		int soffset = 0;
+		int toffset = 0;
+	    while (true) {
+	        while ((sorder & CE_REMOVE_CASE_) 
+	        						== CollationElementIterator.IGNORABLE) {
+	        	sorder = cebuffer[0][soffset ++];
+	          	if (!isContinuation(sorder)) {
+	            	sorder &= CE_CASE_MASK_3_;
+	            	sorder ^= m_caseSwitch_;
+	          	} 
+	          	else {
+	            	sorder = CollationElementIterator.IGNORABLE;
+	          	}
+	        }
+	
+	        while ((torder & CE_REMOVE_CASE_) 
+	        						== CollationElementIterator.IGNORABLE) {
+	        	torder = cebuffer[1][toffset ++];
+	          	if (!isContinuation(sorder)) {
+	            	torder &= CE_CASE_MASK_3_;
+	            	torder ^= m_caseSwitch_;
+	          	} 
+	          	else {
+	            	torder = CollationElementIterator.IGNORABLE;
+	          	}
+	        }
+	
+	        if ((sorder & CE_CASE_BIT_MASK_) < (torder & CE_CASE_BIT_MASK_)) {
+	          	return -1;
+	        } 
+	        else if ((sorder & CE_CASE_BIT_MASK_) 
+	        							> (torder & CE_CASE_BIT_MASK_)) {
+	          	return 1;
+	        }
+	
+	        if (cebuffer[0][soffset - 1] == CollationElementIterator.NULLORDER) 
+	        {
+	          	break;
+	        } 
+	        else {
+	          	sorder = CollationElementIterator.IGNORABLE;
+	          	torder = CollationElementIterator.IGNORABLE;
+	        }
+	    }
+	    return 0;
+	}
+	
+	/**
+	 * Does tertiary strength comparison based on the collected ces.
+	 * @param cebuffer array of int arrays that contains the collected ces
+	 * @return the tertiary strength comparison result
+	 */
+	private final int doTertiaryCompare(int cebuffer[][])
+	{
+		int soffset = 0;
+	    int toffset = 0;
+	    while (true) {
+	    	int sorder = CollationElementIterator.IGNORABLE;
+	    	int torder = CollationElementIterator.IGNORABLE;
+	        while ((sorder & CE_REMOVE_CASE_) 
+	        					== CollationElementIterator.IGNORABLE) {
+	          	sorder = cebuffer[0][soffset ++] & m_mask3_;
+	          	if (!isContinuation(sorder)) {
+	            	sorder ^= m_caseSwitch_;
+	          	} 
+	          	else {
+	            	sorder &= CE_REMOVE_CASE_;
+	          	}
+	        }
+	
+			while ((torder & CE_REMOVE_CASE_) 
+	        					== CollationElementIterator.IGNORABLE) {
+	          	torder = cebuffer[1][toffset ++] & m_mask3_;
+	          	if (!isContinuation(torder)) {
+	            	torder ^= m_caseSwitch_;
+	          	} 
+	          	else {
+	            	torder &= CE_REMOVE_CASE_;
+	          	}
+	        }        
+	
+	        if (sorder == torder) {
+	          	if (cebuffer[0][soffset - 1] 
+	          		== (CollationElementIterator.NULLORDER & CE_REMOVE_CASE_)) {
+	            	break;
+	          	} 
+	        } 
+	        else {
+	            return (sorder < torder) ? -1 : 1;
+	        }
+	    }
+	    return 0;
+	}
+	
+	/**
+	 * Does quaternary strength comparison based on the collected ces.
+	 * @param cebuffer array of int arrays that contains the collected ces
+	 * @param lowestpvalue the lowest primary value that will not be ignored if 
+	 * 						alternate handling is shifted
+	 * @return the quaternary strength comparison result
+	 */
+	private final int doQuaternaryCompare(int cebuffer[][], int lowestpvalue)
+	{
+		boolean sShifted = true;
+	    boolean tShifted = true;
+	    int soffset = 0;
+	    int toffset = 0;
+	    while (true) {
+	    	int sorder = CollationElementIterator.IGNORABLE;
+	    	int torder = CollationElementIterator.IGNORABLE;
+	        while (sorder == CollationElementIterator.IGNORABLE 
+	        		&& sorder != CollationElementIterator.NULLORDER 
+	        		|| (isContinuation(sorder) && !sShifted)) {
+	          	sorder = cebuffer[0][soffset ++];
+	          	if (isContinuation(sorder)) {
+	            	if (!sShifted) {
+	              		continue;
+	            	}
+	          	} 
+	          	else if (sorder > lowestpvalue 
+	          				|| (sorder & CE_PRIMARY_MASK_) 
+	          						== CollationElementIterator.IGNORABLE) { 
+	          		// non continuation
+	            	sorder = CE_PRIMARY_MASK_;
+	            	sShifted = false;
+	          	} 
+	          	else {
+	            	sShifted = true;
+	          	}
+	        }
+	        sorder &= CE_PRIMARY_MASK_;
+			while (torder == CollationElementIterator.IGNORABLE 
+	        		&& torder != CollationElementIterator.NULLORDER 
+	        		|| (isContinuation(torder) && !tShifted)) {
+	          	torder = cebuffer[0][toffset ++];
+	          	if (isContinuation(torder)) {
+	            	if (!tShifted) {
+	              		continue;
+	            	}
+	          	} 
+	          	else if (torder > lowestpvalue 
+	          				|| (torder & CE_PRIMARY_MASK_) 
+	          						== CollationElementIterator.IGNORABLE) { 
+	          		// non continuation
+	            	torder = CE_PRIMARY_MASK_;
+	            	tShifted = false;
+	          	} 
+	          	else {
+	            	tShifted = true;
+	          	}
+	        }
+	        torder &= CE_PRIMARY_MASK_;
+	
+	        if (sorder == torder) {
+	          	if (cebuffer[0][soffset -1] 
+	          		== CollationElementIterator.NULLORDER) {
+	            	break;
+	          	}
+	        } 
+	        else {
+	            return (sorder < torder) ? -1 : 1;
+	        }
+	    }
+	    return 0;
+	}
+	
+	/**  
+	 * Internal function. Does byte level string compare. Used by strcoll if 
+	 * strength == identical and strings are otherwise equal. This is a rare 
+	 * case. Comparison must be done on NFD normalized strings. FCD is not good 
+	 * enough.
+	 * @param source text
+	 * @param target text
+	 * @param offset of the first difference in the text strings
+	 * @param normalize flag indicating if we are to normalize the text before
+	 * 				comparison
+	 * @return 1 if source is greater than target, -1 less than and 0 if equals
+	 */
+	private static final int doIdenticalCompare(String source, String target, 
+												int offset, boolean normalize)
+	{
+	    if (normalize) {
+	        /*
+	        if (unorm_quickCheck(sColl->string, sLen, UNORM_NFD) != UNORM_YES) {
+	            source = unorm_decompose(sColl->writableBuffer, 
+	            							sColl->writableBufSize,
+	                                   		sBuf, sLen, FALSE, FALSE);
+	        }
+	
+	        if (unorm_quickCheck(tColl->string, tLen, UNORM_NFD) != UNORM_YES) {
+	            target = unorm_decompose(tColl->writableBuffer, 
+	            							tColl->writableBufSize,
+	                                   		tBuf, tLen, FALSE, FALSE);
+	        }
+	        */
+	        offset = 0;
+	    }
+	
+	    return doStringCompare(source, target, offset);
+	}
+	
+	/**
+	 * Compares string for their codepoint order.
+	 * This comparison handles surrogate characters and place them after the 
+	 * all non surrogate characters.
+	 * @param source text
+	 * @param target text
+	 * @param offset start offset for comparison
+	 * @return 1 if source is greater than target, -1 less than and 0 if equals
+	 */
+	private static final int doStringCompare(String source, 
+											 String target,
+											 int offset) 
+	{
+    	// compare identical prefixes - they do not need to be fixed up
+    	char schar = 0;
+    	char tchar = 0;
+    	while (true) {
+        	schar = source.charAt(offset);
+        	tchar = target.charAt(offset ++);
+        	if (schar != tchar) {
+            	break;
+        	}
+        	if (schar == 0) {
+            	return 0;
+        	}
+    	}
+
+   		//  if both values are in or above the surrogate range, Fix them up.
+   		if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE 
+   			&& tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
+        	schar = fixupUTF16(schar);
+        	tchar = fixupUTF16(tchar);
+    	}
+
+    	// now c1 and c2 are in UTF-32-compatible order
+    	return (schar < tchar) ? -1 : 1; // schar and tchar has to be different
+	}
+	
+	/** 
+	 * Rotate surrogates to the top to get code point order
+	 */
+	private static final char fixupUTF16(char ch) 
+	{                  
+    	if (ch >= 0xe000) {                 
+        	ch -= 0x800;                    
+    	} 
+    	else {                             
+        	ch += 0x2000;                 
+    	}     
+    	return ch;                             
+	}
+}